filter.h 8.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226
  1. #include<iostream>
  2. #include<cmath>
  3. #include<cstdlib>
  4. #include<cstring>
  5. #include<exception>
  6. #include<cstdint>
  7. #include"../basic/nanxing_operator_check.h"
  8. namespace nanxing_extend
  9. {
  10. //这是个意外,本来是想工厂类和不同的过滤器划分到不同文件的,但是由于贯彻head_only,还没想出来怎么组织文件,只能丢在一起了
  11. //同时也是一个控制类,即这个类本身带有控制功能,只允许一次产生持有唯一的filter
  12. //如果不对产生的类进行管控,最后的结果很可能是完全无法处理,因为C++中内存分配方式过于多样,malloc,new,new[],数组。。。。乱七八糟,最后类内生成的空间完全无法析构,因为根本不知道类内怎么实现的
  13. //当然也可以通过返回智能指针的方式进行管控,但是没有选择那么做的原因在于过滤器本身不会大量存在
  14. //一个程序中最多一到两个过滤器
  15. //但是但是相信我不会有God class
  16. template<typename K>
  17. class FilterPolicy
  18. {
  19. static_assert(NANXING_BASIC_OPERATOR_(K,compare),"the type of key cannot compare");
  20. private:
  21. FilterPolicy& operator=(FilterPolicy const&)=delete;
  22. FilterPolicy(FilterPolicy const&)=delete;
  23. FilterPolicy* producter=nullptr; //用于管控生成的类
  24. public:
  25. //限制当主动生成一个工厂类后,不管怎么赋值,最后只有一个工厂
  26. //move-only
  27. FilterPolicy(){}
  28. FilterPolicy(FilterPolicy&&){}
  29. virtual void policy_print(){} //输出对应的计算公式
  30. #ifdef NANXING_DEBUG_
  31. virtual void parameter_print(){} //输出过滤器的参数
  32. #endif
  33. virtual void insert(K key)noexcept{} //当向数据库插入数据后对过滤器进行调整
  34. [[nodiscard]]
  35. //cppcheck还不能识别C++17引入的属性
  36. virtual bool check(K key)noexcept{}
  37. //两个工厂函数用于生成不同的过滤器
  38. FilterPolicy* creat_Bloomfilter(int N,float P);
  39. FilterPolicy* creat_Cuckoofilter();
  40. virtual void init_filter(){} //初始化过滤器
  41. #ifdef NANXING_DEBUG_
  42. virtual auto get_k()->uint64_t{}
  43. virtual auto get_m()->uint64_t{}
  44. #endif
  45. virtual ~FilterPolicy(){
  46. if(producter!=nullptr)
  47. {
  48. delete producter;
  49. }
  50. };
  51. };
  52. //bloomfilter
  53. template<typename K>
  54. class bloomfilter:public FilterPolicy<K>
  55. {
  56. static_assert(NANXING_BASIC_OPERATOR_(K,compare),"the type of key cannot compare");
  57. private:
  58. //这里使用线性混合同余来构造哈希函数,因为线性混合同余的随机性和运算效率的平衡比较好
  59. constexpr static const int hash_prime_number1[10]={52223,52237,79939,19937,8243,24133,12647,13147,62459,94547}; //给出10个大素数作为哈希函数的素数种子
  60. constexpr static const int hash_prime_number2[10]={94399,94421,94427,94433,94439,94441,94447,94463,94477,94483}; //哈希函数的置偏值
  61. struct param{
  62. float P; //概率参数
  63. int k=0; //哈希函数个数
  64. uint64_t n; //n数据长度
  65. uint64_t m=0; //bloomfilter长度
  66. param(float _p,uint64_t _n):P(_p),n(_n){}
  67. };
  68. private:
  69. char* bitarray; //bloomfilter的核心数据结构,一个比特数组
  70. param parameter;
  71. protected:
  72. bloomfilter()=delete;
  73. bloomfilter(bloomfilter&)=delete;
  74. bloomfilter& operator=(bloomfilter)=delete; //不允许复制构造
  75. void caculater()noexcept //计算bloomfilter的参数
  76. {
  77. this->parameter.m=static_cast<uint64_t>(-(parameter.n*std::log(parameter.P)/0.48045301)); //这两个数字分别是ln2和(ln2)^2,位公式中的常数
  78. this->parameter.k=static_cast<uint64_t>(0.693147*(parameter.m/parameter.n)+1);
  79. }
  80. int hash_function(int i,K data) //i代表第i个哈希函数,返回在bloomfilter中的位置
  81. {
  82. return ((static_cast<int>(data))*(this->hash_prime_number1[i])+(this->hash_prime_number2[i]))%(this->parameter.m);
  83. }
  84. public:
  85. bloomfilter(float _p,uint64_t _n):bitarray(nullptr),parameter(_p,_n){}
  86. void init_filter()override{
  87. LOOP:
  88. caculater();
  89. LOOP1:
  90. try{
  91. bitarray=new char[static_cast<uint64_t>(parameter.m/8)+1]; //构建数组
  92. }
  93. catch(std::bad_alloc)
  94. {
  95. char tmp_input;
  96. std::cerr<<"May not have enough memory to use"<<std::endl;
  97. std::cerr<<"if you want to try again,please input r"<<std::endl;
  98. std::cerr<<"if you want to exit please input e"<<std::endl;
  99. std::cerr<<"if you want to use less bloomfilter with higher error rates,please input p"<<std::endl;
  100. std::cin>>tmp_input;
  101. switch(tmp_input)
  102. {
  103. case 'r':
  104. goto LOOP1;
  105. break;
  106. case 'e':
  107. std::terminate();
  108. break;
  109. case 'p':
  110. std::cerr<<"please input the new P by float"<<std::endl;
  111. std::cin>>this->parameter.P;
  112. goto LOOP;
  113. break;
  114. }
  115. }
  116. std::memset(bitarray,0,sizeof(char)*static_cast<u_int64_t>(parameter.m/8)+1); //初始化
  117. }
  118. void policy_print()override //打印出使用的公式
  119. {
  120. std::cout<<"m=-((nlnP)/((ln2)^2))"<<"//m为bloomfilter的长度"<<std::endl;
  121. std::cout<<"k=ln2*(m/n)"<<"//k为所需的哈希函数的个数"<<std::endl;
  122. }
  123. void insert(K key)noexcept override
  124. {
  125. if(this->parameter.k==0)
  126. {
  127. std::cerr<<"the filter never init,and the filter is useless."<<std::endl;
  128. return;
  129. }
  130. for(int i=0;i<this->parameter.k;i++)
  131. {
  132. int tmp=this->hash_function(i,key);
  133. uint64_t filter_u=static_cast<uint64_t>(tmp/8);
  134. int move=tmp%8;
  135. this->bitarray[filter_u]=this->bitarray[filter_u]|('1'<<(7-move));
  136. }
  137. }
  138. [[nodiscard]]bool check(K key)noexcept override
  139. {
  140. if(this->parameter.k==0)
  141. {
  142. std::cerr<<"the filter never init,and the filter is useless."<<std::endl;
  143. return false;
  144. }
  145. for(int i=0;i<this->parameter.k;i++)
  146. {
  147. int tmp=this->hash_function(i,key);
  148. uint64_t filter_u=static_cast<uint64_t>(tmp/8);
  149. int move=tmp%8;
  150. if((this->bitarray[filter_u]&('1'<<(7-move)))==0) //若对应位为0
  151. {
  152. return false;
  153. }
  154. }
  155. return true;
  156. }
  157. #ifdef NANXING_DEBUG_
  158. void parameter_print()override
  159. {
  160. std::cout<<"(m,n,k,p) = "<<"("<<this->parameter.m<<","<<parameter.n<<","<<parameter.k<<","<<parameter.P<<")";
  161. }
  162. auto get_k()->uint64_t override
  163. {
  164. return this->parameter.k;
  165. }
  166. auto get_m()->uint64_t override
  167. {
  168. return this->parameter.m;
  169. }
  170. #endif
  171. virtual ~bloomfilter()
  172. {
  173. if(bitarray!=nullptr)
  174. {
  175. delete[] bitarray;
  176. }
  177. }
  178. };
  179. //布谷鸟过滤器
  180. template<typename K>
  181. class Cuckoofilter:public FilterPolicy<K>
  182. {
  183. static_assert(NANXING_BASIC_OPERATOR_(K,compare),"the type of key cannot compare");
  184. };
  185. template<typename K>
  186. FilterPolicy<K>* FilterPolicy<K>::creat_Bloomfilter(int N,float P)
  187. {
  188. if(producter!=nullptr)
  189. {
  190. delete producter;
  191. }
  192. producter=new bloomfilter<K>(P,N);
  193. return producter;
  194. }
  195. template<typename K>
  196. FilterPolicy<K>* FilterPolicy<K>::creat_Cuckoofilter()
  197. {
  198. if(producter!=nullptr)
  199. {
  200. delete producter;
  201. }
  202. producter=new Cuckoofilter<K>;
  203. return producter;
  204. }
  205. }