filter.h 7.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195
  1. #include<iostream>
  2. #include<cmath>
  3. #include<cstdlib>
  4. #include<cstring>
  5. #include<exception>
  6. #include"../basic/nanxing_operator_check.h"
  7. namespace nanxing_extend
  8. {
  9. //这是个意外,本来是想工厂类和不同的过滤器划分到不同文件的,但是由于贯彻head_only,还没想出来怎么组织文件,只能丢在一起了
  10. //同时也是一个控制类,即这个类本身带有控制功能,只允许一次产生持有唯一的filter
  11. //如果不对产生的类进行管控,最后的结果很可能是完全无法处理,因为C++中内存分配方式过于多样,malloc,new,new[],数组。。。。乱七八糟,最后类内生成的空间完全无法析构,因为根本不知道类内怎么实现的
  12. //当然也可以通过返回智能指针的方式进行管控,但是没有选择那么做的原因在于过滤器本身不会大量存在
  13. //一个程序中最多一到两个过滤器
  14. //但是但是相信我不会有God class
  15. template<typename K>
  16. class FilterPolicy
  17. {
  18. static_assert(NANXING_BASIC_OPERATOR_(K,compare),"the type of key cannot compare");
  19. private:
  20. FilterPolicy& operator=(FilterPolicy const&)=delete;
  21. FilterPolicy(FilterPolicy const&)=delete;
  22. FilterPolicy* producter=nullptr; //用于管控生成的类
  23. public:
  24. //限制当主动生成一个工厂类后,不管怎么赋值,最后只有一个工厂
  25. //move-only
  26. FilterPolicy(){};
  27. FilterPolicy(FilterPolicy&&){}
  28. virtual void policy_print(){} //输出对应的计算公式
  29. virtual void parameter_print(){} //输出过滤器的参数
  30. virtual void insert(K key)noexcept{}; //当向数据库插入数据后对过滤器进行调整
  31. [[nodiscard]]virtual bool check(K key)noexcept{};
  32. //两个工厂函数用于生成不同的过滤器
  33. FilterPolicy* creat_Bloomfilter(int N,int P);
  34. FilterPolicy* creat_Cuckoofilter();
  35. virtual void init_filter(){} //初始化过滤器
  36. virtual ~FilterPolicy(){
  37. if(producter!=nullptr)
  38. {
  39. delete producter;
  40. }
  41. };
  42. };
  43. //bloomfilter
  44. template<typename K>
  45. class bloomfilter:public FilterPolicy<K>
  46. {
  47. static_assert(NANXING_BASIC_OPERATOR_(K,compare),"the type of key cannot compare");
  48. private:
  49. //这里使用线性混合同余来构造哈希函数,因为线性混合同余的随机性和运算效率的平衡比较好
  50. constexpr static const int hash_prime_number1[10]={52223,52237,79939,19937,8243,24133,12647,13147,62459,94547}; //给出10个大素数作为哈希函数的素数种子
  51. constexpr static const int hash_prime_number2[10]={94399,94421,94427,94433,94439,94441,94447,94463,94477,94483}; //哈希函数的置偏值
  52. struct param{
  53. float P; //概率参数
  54. int k=0; //哈希函数个数
  55. int n; //n数据长度
  56. int m=0; //bloomfilter长度
  57. param(float _p,int _n):P(_p),n(_n){}
  58. };
  59. private:
  60. char* bitarray; //bloomfilter的核心数据结构,一个比特数组
  61. param parameter;
  62. protected:
  63. bloomfilter()=delete;
  64. bloomfilter(bloomfilter&)=delete;
  65. bloomfilter& operator=(bloomfilter)=delete; //不允许复制构造
  66. void caculater()noexcept //计算bloomfilter的参数
  67. {
  68. this->parameter.m=static_cast<int>(-(parameter.n*std::log(parameter.P)/0.4804530139));
  69. this->parameter.k=static_cast<int>(0.6931471806*(parameter.m/parameter.n));
  70. }
  71. int hash_function(int i,K data) //i代表第i个哈希函数,返回在bloomfilter中的位置
  72. {
  73. return ((static_cast<int>(data))*(this->hash_prime_number1[i])+(this->hash_prime_number2[i]))%(this->parameter.m);
  74. }
  75. public:
  76. bloomfilter(float _p,int _n):bitarray(nullptr),parameter(_p,_n){}
  77. void init_filter()override{
  78. LOOP:
  79. caculater();
  80. LOOP1:
  81. try{
  82. bitarray=new char[static_cast<int>(parameter.m/8)+1]; //构建数组
  83. }
  84. catch(std::bad_alloc)
  85. {
  86. char tmp_input;
  87. std::cerr<<"May not have enough memory to use"<<std::endl;
  88. std::cerr<<"if you want to try again,please input r"<<std::endl;
  89. std::cerr<<"if you want to exit please input e"<<std::endl;
  90. std::cerr<<"if you want to use less bloomfilter with higher error rates,please input p"<<std::endl;
  91. std::cin>>tmp_input;
  92. switch(tmp_input)
  93. {
  94. case 'r':
  95. goto LOOP1;
  96. break;
  97. case 'e':
  98. std::terminate();
  99. break;
  100. case 'p':
  101. std::cerr<<"please input the new P by float"<<std::endl;
  102. std::cin>>this->parameter.P;
  103. goto LOOP;
  104. break;
  105. }
  106. }
  107. std::memset(bitarray,0,sizeof(char)*static_cast<int>(parameter.m/8)+1); //初始化
  108. }
  109. void policy_print()override //打印出使用的公式
  110. {
  111. std::cout<<"m=-((nlnP)/((ln2)^2))"<<"//m为bloomfilter的长度"<<std::endl;
  112. std::cout<<"k=ln2*(m/n)"<<"//k为所需的哈希函数的个数"<<std::endl;
  113. }
  114. void insert(K key)noexcept override
  115. {
  116. if(this->parameter.k==0)
  117. {
  118. std::cerr<<"the filter never init,and the filter is useless."<<std::endl;
  119. return;
  120. }
  121. for(int i=0;i<this->parameter.k;i++)
  122. {
  123. int tmp=this->hash_function(i,key);
  124. int filter_u=static_cast<int>(tmp/8);
  125. int move=tmp%8;
  126. this->bitarray[filter_u]=this->bitarray[filter_u]|('1'<<(7-move));
  127. }
  128. }
  129. [[nodiscard]]bool check(K key)noexcept override
  130. {
  131. if(this->parameter.k==0)
  132. {
  133. std::cerr<<"the filter never init,and the filter is useless."<<std::endl;
  134. return false;
  135. }
  136. for(int i=0;i<this->parameter.k;i++)
  137. {
  138. int tmp=this->hash_function(i,key);
  139. int filter_u=static_cast<int>(tmp/8);
  140. int move=tmp%8;
  141. if((this->bitarray[filter_u]&('1'<<(7-move)))==0) //若对应位为0
  142. {
  143. return false;
  144. }
  145. }
  146. return true;
  147. }
  148. virtual ~bloomfilter()
  149. {
  150. if(bitarray!=nullptr)
  151. {
  152. delete[] bitarray;
  153. }
  154. }
  155. };
  156. //布谷鸟过滤器
  157. template<typename K>
  158. class Cuckoofilter:public FilterPolicy<K>
  159. {
  160. static_assert(NANXING_BASIC_OPERATOR_(K,compare),"the type of key cannot compare");
  161. };
  162. template<typename K>
  163. FilterPolicy<K>* FilterPolicy<K>::creat_Bloomfilter(int N,int P)
  164. {
  165. if(producter!=nullptr)
  166. {
  167. delete producter;
  168. }
  169. producter=new bloomfilter<K>(N,P);
  170. return producter;
  171. }
  172. template<typename K>
  173. FilterPolicy<K>* FilterPolicy<K>::creat_Cuckoofilter()
  174. {
  175. if(producter!=nullptr)
  176. {
  177. delete producter;
  178. }
  179. producter=new Cuckoofilter<K>;
  180. return producter;
  181. }
  182. }