tokenize method Null safety
- SourceText text,
- {NGramRange? nGramRange,
- Zone? zone,
- TokenizingStrategy strategy = TokenizingStrategy.terms}
override
Extracts one or more tokens from source for use in full-text search
queries and indexes.
nGramRangeis the range of N-gram lengths to generate; andzoneis the name of the zone in a document in which the term is located.
Returns a List<Token>.
Implementation
@override
Future<List<Token>> tokenize(SourceText text,
{NGramRange? nGramRange,
Zone? zone,
TokenizingStrategy strategy = TokenizingStrategy.terms}) async {
List<Token> tokens = [];
// add term tokens and n-gram tokens
if (strategy != TokenizingStrategy.keyWords) {
tokens.addAll(await _nGramAndTermTokens(
text, _effectiveNGramRange(nGramRange, strategy), zone));
}
// add keyword tokens
if (strategy == TokenizingStrategy.keyWords ||
strategy == TokenizingStrategy.all) {
final keywordTokens = _keyWordTokens(text, zone);
final existingTerms = tokens.map((e) => e.term);
tokens.addAll(_newKeywordTokens(existingTerms, keywordTokens));
}
// remove duplicate tokens
tokens = _toOrderedSet(tokens);
// apply the tokenFilter if it is not null and return the tokens collection
return tokenFilter != null ? await tokenFilter!(tokens) : tokens;
}