Reading: Space/Time Trade-offs in Hash Coding with Allowable Errors

Space/time trade-offs in hash coding with allowable errors

概要

References

サンプルコード

#include <cstdio>
#include <cstdint>
#include <limits>
#include <xxhash.h>
#include <bitset>
#include <string>
#include <memory>

uint64_t fnv1_hash(const char *key, std::size_t n_bytes)
{
  std::uint64_t h = 14695981039346656037ull;
  for (std::size_t i = 0; i < n_bytes; i++) {
    h = (h * 1099511628211) ^ key[i];
  }
  return h;
}

class Slice
{
public:
  explicit Slice(const std::string& src)
      : data_(src.data()), len_(src.length()) {}
  ~Slice() {}
  const char* data() const { return data_; }
  std::size_t len() const { return len_; }

private:
  const char* data_;
  std::size_t len_;
};

template <std::size_t N = std::numeric_limits<std::uint32_t>::max()>
class BloomFilter
{
public:
  BloomFilter() : bits_(new std::bitset<N>{}) {}

  ~BloomFilter() = default;

  void set(const Slice& key)
  {
    auto buffer = key.data();
    auto len = key.len();
    bits_->set(XXH32(buffer, len , len) % N);
    bits_->set(fnv1_hash(buffer, len) % N);
  }

  bool test(const Slice& key) const
  {
    auto buffer = key.data();
    auto len = key.len();

    if (!bits_->test(XXH32(buffer, len, len) % N)) {
      return false;
    }

    if (!bits_->test(fnv1_hash(buffer, len) % N)) {
      return false;
    }

    return true;
  }

private:
  std::unique_ptr<std::bitset<N>> bits_;
};

int main()
{
  using namespace std;

  BloomFilter<> bf;

  for (auto i = 0; i < 1000000; i++) {
    bf.set(Slice(string(std::to_string(i))));
  }

  if (bf.test(Slice(string("1")))) {
    printf("ok\n");
  } else {
    printf("bad\n");
  }

  return 0;
}

Comments

comments powered by Disqus