awscli>=1.22.55
beautifulsoup4
charset_normalizer>=3.1.0
comment_parser
crossfit>=0.0.6
dask-mpi>=2021.11.0
dask[complete]>=2021.7.1
distributed>=2021.7.1
fasttext==0.9.2
ftfy==6.1.1
in-place==0.5.0
jieba==0.42.1
justext==3.0.1
lxml_html_clean
mecab-python3
mwparserfromhell==0.6.5
nemo_toolkit[nlp]>=1.23.0
numpy<2
openai
peft
presidio-analyzer==2.2.351
presidio-anonymizer==2.2.351
pycld2
resiliparse
spacy>=3.6.0, <3.8.0
unidic-lite==1.0.8
usaddress==0.5.10
warcio==1.7.4
zstandard==0.18.0
