码迷,mamicode.com
首页 > Windows程序 > 详细

[原创]大数据:布隆过滤器C#版简单实现。

时间:2018-11-29 15:07:04      阅读:215      评论:0      收藏:0      [点我收藏+]

标签:bitarray   abi   ==   bubuko   readline   string   start   hash   summary   

    public class BloomFilter
    {
        public BitArray _BloomArray;
        public Int64 BloomArryLength { get; }
        public Int64 DataArrayLeng { get; }
        public Int64 BitIndexCount { get; }

        /// <summary>
        /// 初始化
        /// </summary>
        /// <param name="BloomArryLength">布隆数组的大小</param>
        /// <param name="DataArrayLeng">数据的长度</param>
        /// <param name="bitIndexCount">hash数</param>
        public BloomFilter(int BloomArryLength,int DataArrayLeng,int bitIndexCount)
        {
            _BloomArray = new BitArray(BloomArryLength);
            this.BloomArryLength = BloomArryLength;
            this.DataArrayLeng = DataArrayLeng;
            this.BitIndexCount = bitIndexCount;
        }

        
        public void Add(string str)
        {
            var hashCode = GetHashCode(str);
            Random random = new Random(hashCode);
            for (int i = 0; i < BitIndexCount; i++)
            {
                var c = random.Next((int)(this.BloomArryLength - 1));
                _BloomArray[c] = true;
            }
        }

        public bool isExist(string str)
        {
            var hashCode = GetHashCode(str);
            Random random = new Random(hashCode);
            for (int i = 0; i < BitIndexCount; i++)
            {
                if(!_BloomArray[random.Next((int)(this.BloomArryLength - 1))])
                {
                    return false;
                }
            }
            return true;
        }

        public int GetHashCode(object value)
        {
            return value.GetHashCode();
        }

        public double getFalsePositiveProbability()
        {
            // (1 - e^(-k * n / m)) ^ k
            return Math.Pow((1 - Math.Exp(-BitIndexCount * (double)DataArrayLeng / BloomArryLength)),
                    BitIndexCount);
        }
    }

 

        static void Main(string[] args)
        {
            Bloom_Filter.BloomFilter bloom = new Bloom_Filter.BloomFilter(200000000, 50000000, 3);//五千万条数据

            for (int i = 0; i < bloom.DataArrayLeng; i++)//五千万条数据
            {
                bloom.Add(i.ToString());
            }
            do
            {
                var c = Console.ReadLine();
                if (c == "e")
                    break;
                Stopwatch sw = new Stopwatch();
                sw.Start();
                var temp=bloom.isExist(c);
                sw.Stop();
                Console.WriteLine($"查找:{c}\n结果:{temp}\n总耗时:{sw.ElapsedTicks}\n错误概率:{bloom.getFalsePositiveProbability()}");
            } while (true);
        }

结果:使用内存27MB,查找结果一般在100毫秒以内。

技术分享图片

 

[原创]大数据:布隆过滤器C#版简单实现。

标签:bitarray   abi   ==   bubuko   readline   string   start   hash   summary   

原文地址:https://www.cnblogs.com/yueyue184/p/10037587.html

(0)
(0)
   
举报
评论 一句话评论(0
登录后才能评论!
© 2014 mamicode.com 版权所有  联系我们:gaon5@hotmail.com
迷上了代码!