{******************************************************************************}
{                       CnPack For Delphi/C++Builder                           }
{                     йԼĿԴ                         }
{                   (C)Copyright 2001-2024 CnPack                        }
{                   ------------------------------------                       }
{                                                                              }
{            ǿԴ CnPack ķЭ        }
{        ĺ·һ                                                }
{                                                                              }
{            һĿϣãûκεû        }
{        ʺضĿĶĵϸ CnPack Э顣        }
{                                                                              }
{            ӦѾͿһյһ CnPack Эĸ        }
{        ûУɷǵվ                                            }
{                                                                              }
{            վַhttps://www.cnpack.org                                  }
{            ʼmaster@cnpack.org                                       }
{                                                                              }
{******************************************************************************}

unit CnBloomFilter;
{* |<PRE>
================================================================================
* ƣ
* ԪƣϣӳĿٲ BloomFilter ʵֵԪ
* ԪߣCnPack 
*     עϣ = ln2 * (Bit  / ַ) ʱжţʱ
*               ϣ˹ʱԷΣ
*               1ϣ 4 32СģݣСλڴ
*               2ϣ 6 30
*               3ϣ 8 25
*               4ϣ 10 20
*               5ϣ 12 18
*               6ϣ 15 15ڴģݣ󣬵λڴС
*               ϣ CRC32 ͬĳʼֵ
* ƽ̨Win 7 + Delphi 5.0
* ݲԣδ
*   õԪ豾ػ
* ޸ļ¼2023.01.31 V1.1
*               ƽ̨֧
*           2015.05.22 V1.0
*               Ԫ
================================================================================
|</PRE>}

interface

{$I CnPack.inc}

uses
  SysUtils, Classes, CnCRC32;

const
  CN_LN_2 = 0.69314718;

  CN_BLOOM_HASH_CRC32S: array[0..15] of Cardinal =
    ($00000000, $11111111, $22222222, $33333333, $44444444,
     $55555555, $66666666, $77777777, $88888888, $99999999,
     $AAAAAAAA, $BBBBBBBB, $CCCCCCCC, $DDDDDDDD, $EEEEEEEE,
     $FFFFFFFF);

type
  ECnBloomFilterSizeException = class(Exception);

  TCnBloomFilterCapacity = (bfc10Power3, bfc10Power4, bfc10Power5, bfc10Power6,
    bfc10Power7, bfc10Power8);

  TCnHashStringFunc = function (const Str: string): Cardinal;

  TCnStringBloomFilter = class
  {* ַ Bloom Filter ϣٲҵԪ}
  private
    FBits: TBits;
    FHashFuncCount: Integer;
    FHashResults: array of Cardinal;
    FBitSize: Cardinal;
    FCapacity: Integer;
    FCount: Integer;
    procedure CalcSize(ACapacity: TCnBloomFilterCapacity);
  public
    constructor Create(ACapacity: TCnBloomFilterCapacity = bfc10Power4);
    destructor Destroy; override;

    function StrExists(const Str: string): Boolean;
    {* ַǷ}
    function AddString(const Str: string): Boolean;
    {* һַĹϣ}
    property Count: Integer read FCount;
    {* ӵַ}
  end;

implementation

resourcestring
  SCnErrorNoProperSize = 'NO Proper Size Specified.';

{ TCnStringBloomFilter }

function TCnStringBloomFilter.AddString(const Str: string): Boolean;
var
  I: Integer;
begin
  Result := False;
  if Str = '' then
    Exit;

  for I := 0 to FHashFuncCount - 1 do
  begin
    FHashResults[I] := StrCRC32(CN_BLOOM_HASH_CRC32S[I], Str) mod FBitSize;
    FBits[FHashResults[I]] := True;
  end;
  Inc(FCount);
  Result := True;
end;

procedure TCnStringBloomFilter.CalcSize(ACapacity: TCnBloomFilterCapacity);
begin
  case ACapacity of
    bfc10Power3:
      begin
        FHashFuncCount := 4;
        FCapacity := 1000;
        FBitSize := FCapacity * 32;
      end;
    bfc10Power4:
      begin
        FHashFuncCount := 6;
        FCapacity := 10000;
        FBitSize := FCapacity * 30;
      end;
    bfc10Power5:
      begin
        FHashFuncCount := 8;
        FCapacity := 100000;
        FBitSize := FCapacity * 25
      end;
    bfc10Power6:
      begin
        FHashFuncCount := 10;
        FCapacity := 1000000;
        FBitSize := FCapacity * 20;
      end;
    bfc10Power7:
      begin
        FHashFuncCount := 12;
        FCapacity := 10000000;
        FBitSize := FCapacity * 18;
      end;
    bfc10Power8:
      begin
        FHashFuncCount := 15;
        FCapacity := 100000000;
        FBitSize := FCapacity * 15;
      end;
  else
    raise ECnBloomFilterSizeException.Create(SCnErrorNoProperSize);
  end;

  SetLength(FHashResults, FHashFuncCount);
end;

constructor TCnStringBloomFilter.Create(ACapacity: TCnBloomFilterCapacity);
begin
  inherited Create;
  CalcSize(ACapacity);
  FBits := TBits.Create;
  FBits.Size := FBitSize;
end;

destructor TCnStringBloomFilter.Destroy;
begin
  FBits.Free;
  inherited;
end;

function TCnStringBloomFilter.StrExists(const Str: string): Boolean;
var
  I: Integer;
begin
  Result := False;
  for I := 0 to FHashFuncCount - 1 do
  begin
    FHashResults[I] := StrCRC32(CN_BLOOM_HASH_CRC32S[I], Str) mod FBitSize;
    if not FBits[FHashResults[I]] then
      Exit;
  end;
  Result := True;
end;

end.
