Corpus Cleaner
Main Page
Related Pages
Namespaces
Classes
Files
File List
File Members
All
Functions
Typedefs
Macros
c
e
g
m
n
o
p
r
s
t
u
w
Here is a list of all functions with links to the files they belong to:
- c -
CalculateNextEmoji() :
util.cpp
,
util.hpp
ConutLines() :
main.cpp
ConvertInputFilesToJsonl() :
corpus_cleaner.cpp
,
corpus_cleaner.hpp
ConvertTextToDocument() :
corpus_cleaner.cpp
,
corpus_cleaner.hpp
ConvertUTF8ToWstring() :
util.cpp
,
util.hpp
ConvertWstringToUTF8() :
util.cpp
,
util.hpp
CopyFile() :
util.cpp
,
util.hpp
CopyFolder() :
util.cpp
,
util.hpp
- e -
EscapeWord() :
util.cpp
,
util.hpp
- g -
GetFileLineNumberList() :
util.cpp
,
util.hpp
GetFileNameListAddedJsonl() :
util.cpp
GetFileNameListWithoutExtention() :
util.cpp
,
util.hpp
GetFilePathWithoutExtention() :
util.cpp
- m -
main() :
main.cpp
MakeStats() :
corpus_cleaner.cpp
,
corpus_cleaner.hpp
MergeFiles() :
main.cpp
model() :
perplexity_filter.cc
MoveFile() :
util.cpp
MoveFolder() :
util.cpp
,
util.hpp
MultiProcessCorpusClean() :
main.cpp
- n -
NormalizeNeologd() :
normalizer.cpp
,
normalizer.hpp
Normalizer() :
normalizer.hpp
- o -
OutputStats() :
corpus_cleaner.cpp
,
corpus_cleaner.hpp
- p -
ProceedProgressBar() :
util.cpp
,
util.hpp
- r -
ReadDocumentFromJsonlOneLine() :
corpus_cleaner.cpp
,
corpus_cleaner.hpp
RemoveExtraSpaces() :
normalizer.cpp
,
normalizer.hpp
RemoveFolder() :
util.cpp
,
util.hpp
ReplaceSubstring() :
util.cpp
,
util.hpp
- s -
SegmentSentence() :
util.cpp
,
util.hpp
SplitFiles() :
main.cpp
Strip() :
util.cpp
,
util.hpp
strlen_utf8() :
util.cpp
,
util.hpp
- t -
TranslateToFullwidth() :
normalizer.cpp
,
normalizer.hpp
- u -
UnicodeNormalize() :
normalizer.cpp
,
normalizer.hpp
- w -
WriteDocumentToJsonl() :
corpus_cleaner.cpp
,
corpus_cleaner.hpp
Generated by
1.10.0