|
@@ -2,9 +2,13 @@
|
|
|
#include<cmath>
|
|
|
#include<cstdlib>
|
|
|
#include<cstring>
|
|
|
+#include<exception>
|
|
|
#include"../basic/nanxing_operator_check.h"
|
|
|
+
|
|
|
namespace nanxing_extend
|
|
|
{
|
|
|
+
|
|
|
+
|
|
|
//这是个意外,本来是想工厂类和不同的过滤器划分到不同文件的,但是由于贯彻head_only,还没想出来怎么组织文件,只能丢在一起了
|
|
|
//同时也是一个控制类,即这个类本身带有控制功能,只允许一次产生持有唯一的filter
|
|
|
//如果不对产生的类进行管控,最后的结果很可能是完全无法处理,因为C++中内存分配方式过于多样,malloc,new,new[],数组。。。。乱七八糟,最后类内生成的空间完全无法析构,因为根本不知道类内怎么实现的
|
|
@@ -25,15 +29,18 @@ namespace nanxing_extend
|
|
|
FilterPolicy(){};
|
|
|
FilterPolicy(FilterPolicy&&){}
|
|
|
virtual void policy_print(){producter->policy_print();} //输出对应的计算公式
|
|
|
- virtual void parameter_print(){producter->parameter_print();} //输出过滤器的参数
|
|
|
+ virtual void parameter_print(){producter->parameter_print();} //输出过滤器的参数
|
|
|
+ virtual void insert(K key)noexcept{};
|
|
|
//两个工厂函数用于生成不同的过滤器
|
|
|
FilterPolicy* creat_Bloomfilter(int N,int P);
|
|
|
FilterPolicy* creat_Cuckoofilter();
|
|
|
- virtual void init_filter(){producter->init_filter();}
|
|
|
+ virtual void init_filter(){producter->init_filter();} //初始化过滤器
|
|
|
virtual ~FilterPolicy(){};
|
|
|
};
|
|
|
+
|
|
|
+ //bloomfilter
|
|
|
template<typename K>
|
|
|
- class bloomfilter:public FilterPolicy
|
|
|
+ class bloomfilter:public FilterPolicy<K>
|
|
|
{
|
|
|
static_assert(NANXING_BASIC_OPERATOR_(K,compare),"the type of key cannot compare");
|
|
|
private:
|
|
@@ -42,13 +49,13 @@ namespace nanxing_extend
|
|
|
constexpr static const int hash_prime_number2[10]={94399,94421,94427,94433,94439,94441,94447,94463,94477,94483}; //哈希函数的置偏值
|
|
|
struct param{
|
|
|
float P; //概率参数
|
|
|
- int k; //哈希函数个数
|
|
|
+ int k=0; //哈希函数个数
|
|
|
int n; //n数据长度
|
|
|
- int m; //bloomfilter长度
|
|
|
+ int m=0; //bloomfilter长度
|
|
|
param(float _p,int _n):P(_p),n(_n){}
|
|
|
};
|
|
|
private:
|
|
|
- u_int64_t* bitarray; //bloomfilter的核心数据结构,一个比特数组
|
|
|
+ char* bitarray; //bloomfilter的核心数据结构,一个比特数组
|
|
|
param parameter;
|
|
|
|
|
|
protected:
|
|
@@ -57,25 +64,71 @@ namespace nanxing_extend
|
|
|
bloomfilter& operator=(bloomfilter)=delete; //不允许复制构造
|
|
|
void caculater()noexcept //计算bloomfilter的参数
|
|
|
{
|
|
|
- parameter.m=static_cast<int>(-(parameter.n*std::log(parameter.P)/0.4804530139));
|
|
|
- parameter.k=static_cast<int>(0.6931471806*(parameter.m/parameter.n));
|
|
|
+ this->parameter.m=static_cast<int>(-(parameter.n*std::log(parameter.P)/0.4804530139));
|
|
|
+ this->parameter.k=static_cast<int>(0.6931471806*(parameter.m/parameter.n));
|
|
|
}
|
|
|
int hash_function(int i,K data) //i代表第i个哈希函数,返回在bloomfilter中的位置
|
|
|
{
|
|
|
return ((static_cast<int>(data))*(this->hash_prime_number1[i])+(this->hash_prime_number2[i]))%(this->parameter.m);
|
|
|
}
|
|
|
public:
|
|
|
- bloomfilter(float _p,int _n):bitarray(0),parameter(_p,_n){}
|
|
|
+ bloomfilter(float _p,int _n):bitarray(nullptr),parameter(_p,_n){}
|
|
|
+
|
|
|
void init_filter()override{
|
|
|
+ LOOP:
|
|
|
caculater();
|
|
|
- bitarray=new u_int64_t[static_cast<int>(parameter.m/64)+1]; //构建数组
|
|
|
- std::memset(bitarray,0,sizeof(u_int64_t)*static_cast<int>(parameter.m/64)+1); //初始化
|
|
|
+ LOOP1:
|
|
|
+ try{
|
|
|
+ bitarray=new char[static_cast<int>(parameter.m/8)+1]; //构建数组
|
|
|
+ }
|
|
|
+ catch(std::bad_alloc)
|
|
|
+ {
|
|
|
+ char tmp_input;
|
|
|
+ std::cerr<<"May not have enough memory to use"<<std::endl;
|
|
|
+ std::cerr<<"if you want to try again,please input r"<<std::endl;
|
|
|
+ std::cerr<<"if you want to exit please input e"<<std::endl;
|
|
|
+ std::cerr<<"if you want to use less bloomfilter with higher error rates,please input p"<<std::endl;
|
|
|
+ std::cin>>tmp_input;
|
|
|
+ switch(tmp_input)
|
|
|
+ {
|
|
|
+ case 'r':
|
|
|
+ goto LOOP1;
|
|
|
+ break;
|
|
|
+ case 'e':
|
|
|
+ std::terminate();
|
|
|
+ break;
|
|
|
+ case 'p':
|
|
|
+ std::cerr<<"please input the new P by float"<<std::endl;
|
|
|
+ std::cin>>this->parameter.P;
|
|
|
+ goto LOOP;
|
|
|
+ break;
|
|
|
+ }
|
|
|
+ }
|
|
|
+ std::memset(bitarray,0,sizeof(char)*static_cast<int>(parameter.m/8)+1); //初始化
|
|
|
}
|
|
|
+
|
|
|
void policy_print()override //打印出使用的公式
|
|
|
{
|
|
|
std::cout<<"m=-((nlnP)/((ln2)^2))"<<"//m为bloomfilter的长度"<<std::endl;
|
|
|
std::cout<<"k=ln2*(m/n)"<<"//k为所需的哈希函数的个数"<<std::endl;
|
|
|
}
|
|
|
+
|
|
|
+ void insert(K key)noexcept override
|
|
|
+ {
|
|
|
+ if(this->parameter.k==0)
|
|
|
+ {
|
|
|
+ std::cerr<<"the filter never init,and the filter is useless."<<std::endl;
|
|
|
+ return;
|
|
|
+ }
|
|
|
+ for(int i=0;i<this->parameter.k;i++)
|
|
|
+ {
|
|
|
+ int tmp=this->hash_function(i,key);
|
|
|
+ int filter_u=static_cast<int>(tmp/8);
|
|
|
+ int move=tmp%8;
|
|
|
+ this->bitarray[filter_u]||('1'<<(7-move));
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
virtual ~bloomfilter()
|
|
|
{
|
|
|
if(bitarray!=nullptr)
|
|
@@ -85,8 +138,9 @@ namespace nanxing_extend
|
|
|
}
|
|
|
};
|
|
|
|
|
|
+ //布谷鸟过滤器
|
|
|
template<typename K>
|
|
|
- class Cuckoofilter:public FilterPolicy
|
|
|
+ class Cuckoofilter:public FilterPolicy<K>
|
|
|
{
|
|
|
static_assert(NANXING_BASIC_OPERATOR_(K,compare),"the type of key cannot compare");
|
|
|
};
|
|
@@ -98,7 +152,7 @@ namespace nanxing_extend
|
|
|
{
|
|
|
delete producter;
|
|
|
}
|
|
|
- producter=new bloomfilter(N,P);
|
|
|
+ producter=new bloomfilter<K>(N,P);
|
|
|
return producter;
|
|
|
}
|
|
|
|
|
@@ -109,7 +163,7 @@ namespace nanxing_extend
|
|
|
{
|
|
|
delete producter;
|
|
|
}
|
|
|
- producter=new Cuckoofilter;
|
|
|
+ producter=new Cuckoofilter<K>;
|
|
|
return producter;
|
|
|
}
|
|
|
}
|