termFilter property Null safety
override
A filter function that returns a collection of terms from term:
- return an empty collection if the term is to be excluded from analysis;
- return multiple terms if the term is split; and/or
- return modified term(s), such as applying a stemmer algorithm.
Implementation
@override
TermFilter get termFilter => (Term term) async {
// remove white-space from start and end of term
term = term.trim();
final terms = <String>{};
// exclude empty terms and that are stopwords
var exception = termExceptions[term]?.trim();
if (term.isNotEmpty && !stopWords.contains(term)) {
if (abbreviations.keys.contains(term)) {
// return the abbreviation and a version with no punctuation.
terms.addAll({term, term.replaceAll('.', '').trim()});
} else if (exception != null) {
terms.add(exception);
} else {
{
// Cleans the term as follows:
// - change all quote marks to single apostrophe +U0027;
// - remove enclosing quote marks;
// - change all dashes to single standard hyphen;
// - remove all characters except letters and numbers at end of term
term = characterFilter(term);
// check the resulting term is longer than 1 characters and not
// contained in [stopWords]
if (!stopWords.contains(term) && term.length > 1) {
// - insert [term] in the return value
terms.add(term);
// insert a version without apostrophes and/or hyphens
final unHyphenated =
term.replaceAll(RegExp(r"['\-]"), '').trim();
terms.add(unHyphenated);
// split at all non-word characters unless preceded and ended by a number.
final splitTerms = term.split(RegExp(
r'(?<=[^0-9\b])[^a-zA-Z0-9À-öø-ÿ]+|[^a-zA-Z0-9À-öø-ÿ]+(?=[^0-9\b])'));
for (var splitTerm in splitTerms) {
exception = termExceptions[splitTerm.trim()]?.trim();
// var tokenTerm = splitTerm;
if (exception != null) {
// add the exception
terms.add(exception);
} else if (splitTerm.isNotEmpty) {
if (!stopWords.contains(splitTerm) &&
splitTerm.length > 1) {
// only add terms longer than 1 character to exclude possesives etc.
terms.add(splitTerm);
}
}
}
}
}
}
}
final retVal = (terms.map((e) {
final exception = termExceptions[e];
if (exception != null) {
return exception;
}
final stemmedTerm = stemmer(lemmatizer(e.trim())).trim();
return termExceptions[stemmedTerm] ?? stemmedTerm;
}).toSet());
retVal.removeWhere((e) => e.isEmpty);
return retVal;
};