Browse Source

完成bloomfilter的哈希函数,并且扩充operatorchecking中的int强制类型转换检查

Gogs 4 months ago
parent
commit
9e13eacd41
6 changed files with 9709 additions and 26 deletions
  1. 9592 0
      append/primes-to-100k.txt
  2. 21 4
      lib/basic/nanxing_operator_check.h
  3. 86 12
      lib/extend/filter.h
  4. 8 8
      lib/extend/skiplist.h
  5. 1 1
      lib/nanxing_memory.h
  6. 1 1
      lib/others/skiplist.h

File diff suppressed because it is too large
+ 9592 - 0
append/primes-to-100k.txt


+ 21 - 4
lib/basic/nanxing_operator_check.h

@@ -10,6 +10,7 @@
 #define NANXING_TYPETRAIT_TEMPLATE_T_V_ template<typename T ,typename V =void >
 #define NANXING_TYPETRAIT_TEMPLATE_T_ template<typename T > 
 #define NANXING_BASIC_OPERATOR_(Type, Op) nanxing::Op##_admit<Type>::value   //注意要写成这样才行
+#define NANXING_OPERATOR_FORBIDEN_(Type,Op) nanxing::op##_forbiden<Type>::value
 namespace nanxing{
     template<typename...>         //这里用泛型的作用在于为了后面做模板参数匹配
     using void_t=void;
@@ -79,7 +80,7 @@ namespace nanxing{
     struct fun_admit:std::false_type{};
 
     NANXING_TYPETRAIT_TEMPLATE_T_
-    struct fun_admit<T,void_t<decltype(std::declval<T>()())>>:std::true_type{};
+    struct fun_admit<T,void_t<decltype((std::declval<T>())())>>:std::true_type{};
     
 
     //T类型允许从一个T类型的实例构造(即拷贝构造)
@@ -87,9 +88,25 @@ namespace nanxing{
     struct copy_construct_admit:std::false_type{};
 
     NANXING_TYPETRAIT_TEMPLATE_T_
-    struct copy_construct_admit<T,void_t<decltype(T(std::declval<T>(T)))>>:std::true_type{};      
-    //接下去用另一种技术实现函数存在性的判定
-    //用函数的模板匹配
+    struct copy_construct_admit<T,void_t<decltype(T(std::declval<T>()))>>:std::true_type{};      
+
+    //T类型允许强制类型转换为int
+    NANXING_TYPETRAIT_TEMPLATE_T_V_
+    struct convert_into_int_admit:std::false_type{};
+
+    NANXING_TYPETRAIT_TEMPLATE_T_
+    struct convert_into_int_admit<T,void_t<decltype(static_cast<int>(std::declval<T>()))>>:std::true_type{};
+
+    //判断一些操作是不存在的
+
+    //不能存在解引用
+    NANXING_TYPETRAIT_TEMPLATE_T_V_
+    struct point_forbiden:std::true_type{};
+
+    NANXING_TYPETRAIT_TEMPLATE_T_
+    struct point_forbiden<T,void_t<decltype(*(std::declval<T>()))>>:std::false_type{};
+
+
 
 
 }

+ 86 - 12
lib/extend/filter.h

@@ -1,41 +1,115 @@
-
+#include<iostream>
+#include<cmath>
+#include<cstdlib>
+#include<cstring>
+#include"../basic/nanxing_operator_check.h"
 namespace nanxing_extend
 {
     //这是个意外,本来是想工厂类和不同的过滤器划分到不同文件的,但是由于贯彻head_only,还没想出来怎么组织文件,只能丢在一起了
+    //同时也是一个控制类,即这个类本身带有控制功能,只允许一次产生持有唯一的filter
+    //如果不对产生的类进行管控,最后的结果很可能是完全无法处理,因为C++中内存分配方式过于多样,malloc,new,new[],数组。。。。乱七八糟,最后类内生成的空间完全无法析构,因为根本不知道类内怎么实现的
+    //当然也可以通过返回智能指针的方式进行管控,但是没有选择那么做的原因在于过滤器本身不会大量存在
+    //一个程序中最多一到两个过滤器
     //但是但是相信我不会有God class
+    template<typename K>
     class FilterPolicy
     {
+        static_assert(NANXING_BASIC_OPERATOR_(K,compare),"the type of key cannot compare");
     private:
         FilterPolicy& operator=(FilterPolicy const&)=delete;
         FilterPolicy(FilterPolicy const&)=delete;       
+        FilterPolicy* producter=nullptr;                        //用于管控生成的类
     public:
         //限制当主动生成一个工厂类后,不管怎么赋值,最后只有一个工厂
         //move-only    
         FilterPolicy(){};
-        FilterPolicy(FilterPolicy&&){};           
-
+        FilterPolicy(FilterPolicy&&){}
+        virtual void policy_print(){producter->policy_print();}     //输出对应的计算公式 
+        virtual void parameter_print(){producter->parameter_print();}     //输出过滤器的参数             
         //两个工厂函数用于生成不同的过滤器    
-        FilterPolicy* creat_Bloomfilter();
-        FilterPolicy* creat_Cuckoofilter();      
+        FilterPolicy* creat_Bloomfilter(int N,int P);
+        FilterPolicy* creat_Cuckoofilter();  
+        virtual void init_filter(){producter->init_filter();}   
+        virtual ~FilterPolicy(){};
     };
-
+    template<typename K>
     class bloomfilter:public FilterPolicy
     {
-
+        static_assert(NANXING_BASIC_OPERATOR_(K,compare),"the type of key cannot compare");
+    private:
+        //这里使用线性混合同余来构造哈希函数,因为线性混合同余的随机性和运算效率的平衡比较好
+        constexpr static const int hash_prime_number1[10]={52223,52237,79939,19937,8243,24133,12647,13147,62459,94547};                //给出10个大素数作为哈希函数的素数种子
+        constexpr static const int hash_prime_number2[10]={94399,94421,94427,94433,94439,94441,94447,94463,94477,94483};             //哈希函数的置偏值
+        struct param{
+            float P;          //概率参数
+            int k;             //哈希函数个数
+            int n;             //n数据长度
+            int m;              //bloomfilter长度
+            param(float _p,int _n):P(_p),n(_n){}
+        };
+    private:
+        u_int64_t* bitarray;               //bloomfilter的核心数据结构,一个比特数组
+        param parameter;
+        
+    protected:
+        bloomfilter()=delete;
+        bloomfilter(bloomfilter&)=delete;
+        bloomfilter& operator=(bloomfilter)=delete;           //不允许复制构造
+        void caculater()noexcept                              //计算bloomfilter的参数
+        {
+            parameter.m=static_cast<int>(-(parameter.n*std::log(parameter.P)/0.4804530139));
+            parameter.k=static_cast<int>(0.6931471806*(parameter.m/parameter.n));
+        }
+        int hash_function(int i,K data)                              //i代表第i个哈希函数,返回在bloomfilter中的位置
+        {
+            return ((static_cast<int>(data))*(this->hash_prime_number1[i])+(this->hash_prime_number2[i]))%(this->parameter.m);
+        }
+    public:
+        bloomfilter(float _p,int _n):bitarray(0),parameter(_p,_n){}
+        void init_filter()override{
+            caculater();
+            bitarray=new u_int64_t[static_cast<int>(parameter.m/64)+1];           //构建数组
+            std::memset(bitarray,0,sizeof(u_int64_t)*static_cast<int>(parameter.m/64)+1);   //初始化
+        }
+        void policy_print()override                  //打印出使用的公式
+        {
+            std::cout<<"m=-((nlnP)/((ln2)^2))"<<"//m为bloomfilter的长度"<<std::endl;
+            std::cout<<"k=ln2*(m/n)"<<"//k为所需的哈希函数的个数"<<std::endl;
+        }
+        virtual ~bloomfilter()
+        {
+            if(bitarray!=nullptr)
+            {
+                delete[] bitarray;
+            }
+        }  
     };
 
+    template<typename K>
     class Cuckoofilter:public FilterPolicy
     {
-
+        static_assert(NANXING_BASIC_OPERATOR_(K,compare),"the type of key cannot compare");
     };
 
-    FilterPolicy* FilterPolicy::creat_Bloomfilter()
+    template<typename K>
+    FilterPolicy<K>* FilterPolicy<K>::creat_Bloomfilter(int N,int P)
     {
-        return new bloomfilter;
+        if(producter!=nullptr)
+        {
+            delete producter;
+        }
+        producter=new bloomfilter(N,P);
+        return producter;
     }
 
-    FilterPolicy* FilterPolicy::creat_Cuckoofilter()
+    template<typename K>
+    FilterPolicy<K>* FilterPolicy<K>::creat_Cuckoofilter()
     {
-        return new Cuckoofilter;
+        if(producter!=nullptr)
+        {
+            delete producter;
+        }
+        producter=new Cuckoofilter;
+        return producter;
     }
 }

+ 8 - 8
lib/extend/skiplist.h

@@ -1,4 +1,4 @@
-#include"../basical/type_checking.h"
+#include"../basic/nanxing_operator_check.h"
 #include<cstdlib>
 #include<cstring> 
 #include<exception>
@@ -53,13 +53,13 @@ namespace nanxing_extend
 
     };
 
-    //注意这里的V只能是要么是能直接深拷贝的类型,要么是指向堆上数据的指针类型
+    //注意这里的V只能是非指针类型,即侵入式数据结构因为这样的内存是可控的
     template<typename K,typename V>
     struct skip_node
     {
-        static_assert(nanxing_extend::compare_admit<K>::value,"the type of K is error");
-        static_assert(nanxing_extend::compare_admit<V>::value,"the type of V is error");
-        static_assert(nanxing_extend:point_forbid<V>::value,"the type of V cannot be point");              //限定为侵入式数据结构
+        static_assert(NANXING_BASIC_OPERATOR_(K,compare),"the type of K is error");
+        static_assert(NANXING_BASIC_OPERATOR_(V,compare),"the type of V is error");
+        static_assert(NANXING_OPERATOR_FORBIDEN_(V,point),"the type of V cannot be point");              //限定为侵入式数据结构
         skip_node<K,V>** next_node; 
         V value;
         K key;
@@ -90,9 +90,9 @@ namespace nanxing_extend
     template<typename K,typename V>
     class skipList
     {
-        static_assert(nanxing_extend::compare_admit<K>::value,"the type of K is error");
-        static_assert(nanxing_extend::compare_admit<V>::value,"the type of V is error");
-        static_assert(nanxing_extend:point_forbid<V>::value,"the type of V cannot be point");              //限定为侵入式数据结构
+        static_assert(NANXING_BASIC_OPERATOR_(K,compare),"the type of K is error");
+        static_assert(NANXING_BASIC_OPERATOR_(V,compare),"the type of V is error");
+        static_assert(NANXING_OPERATOR_FORBIDEN_(V,point),"the type of V cannot be point");              //限定为侵入式数据结构
     private:
         using Node=skip_node<K,V>;
         using ptr=Node*;

+ 1 - 1
lib/nanxing_memory.h

@@ -1,7 +1,7 @@
 #ifndef NANXING_MEMORY_
 #define NANXING_MEMORY_
 
-#include<new.h>                    //访问系统提供的new函数
+#include<new>                    //访问系统提供的new函数
 #include"backward/nanxing_auto_ptr.h"        //auto_ptr指针
 #include"basic/nanxing_defalloc.h"              //空间分配函数
 #include"others/nanxing_memory_pool.h"         //线程池

+ 1 - 1
lib/others/skiplist.h

@@ -11,7 +11,7 @@
 #include<random>
 #include<chrono>
 #include<optional>
-#include"../nanxing_memory.h"
+#include"nanxing_memory.h"
 
 namespace nanxing
 {