|
Corpus Cleaner
|
#include "util.hpp"Go to the source code of this file.
Functions | |
| uint32_t | strlen_utf8 (string input) |
| Get exact length of UTF-8 string in C. | |
| void | RemoveFolder (const std::string &path) |
| Delete a folder with its contents. | |
| void | CopyFile (string source_path, string target_path) |
| copy source_fileto target_file | |
| void | MoveFile (string source_path, string target_folder) |
| copy source_fileto target_file | |
| void | CopyFolder (string source_folder, string target_folder) |
| copy source_folder to target_folder | |
| void | MoveFolder (string source_folder, string target_folder) |
| copy source_folder to target_folder | |
| string | CalculateNextEmoji (string pre_emoji) |
| Derive the next emoji. | |
| void | GetFileNameListWithoutExtention (const string folder_path, vector< string > *file_list) |
| Get filename list in folder_path. | |
| void | GetFileLineNumberList (const string folder_path, const vector< string > *file_list, const string file_extention, vector< uint64_t > *file_line_number_list) |
| Get file line number list. | |
| wstring | ConvertUTF8ToWstring (const string &sentence) |
| Convert string to Wstring. | |
| string | ConvertWstringToUTF8 (const wstring &sentence) |
| Convert Wstring to string. | |
| void | SegmentSentence (string sentence, vector< string > &segments) |
| Segmentation Sentence. | |
| string | Strip (const string &sentence) |
| Remove leading and trailing white space. | |
| void | ReplaceSubstring (string &sentence, const string &target, const string &replacement) |
| string | GetFilePathWithoutExtention (const string &file_path) |
| Get file name from path without file extention (.txt). | |
| void | GetFileNameListAddedJsonl (const vector< string > &file_path_list, vector< string > &jsonl_file_path_list) |
| Extract file name list from file path list and add .jsonl extention. | |
| string | EscapeWord (const string &input) |
| Escape word. | |
| void | ProceedProgressBar (unsigned long long line_count, unsigned long long file_line_number, uint32_t elapsed_time_ms) |
| Update progress bar. | |
| string CalculateNextEmoji | ( | string | pre_emoji | ) |
Derive the next emoji.
Example:
| string | pre_emoji: emoji string |
| wstring ConvertUTF8ToWstring | ( | const string & | sentence | ) |
Convert string to Wstring.
Example:
| const | string& src: text sentence |
| string ConvertWstringToUTF8 | ( | const wstring & | sentence | ) |
| void CopyFile | ( | string | source_path, |
| string | target_path ) |
copy source_fileto target_file
Example:
| string | source_path: Copy source file path |
| string | target_path: Copy target file path |
| void CopyFolder | ( | string | source_folder, |
| string | target_folder ) |
copy source_folder to target_folder
Example:
| string | source_folder: Copy source folder path |
| string | target_folder: Copy target folder path |
| string EscapeWord | ( | const string & | input | ) |
| void GetFileLineNumberList | ( | const string | folder_path, |
| const vector< string > * | file_list, | ||
| const string | file_extention, | ||
| vector< uint64_t > * | file_line_number_list ) |
Get file line number list.
Example:
| const | string folder_path: folder path |
| const | vector<string> *file_list: (return) filename list |
| const | string file_extention: file extention of file_list (".json",".txt", and so on.) |
| vector<uint64_t> | *file_line_number_list: (return) file line number list |
| void GetFileNameListAddedJsonl | ( | const vector< string > & | file_path_list, |
| vector< string > & | jsonl_file_path_list ) |
Extract file name list from file path list and add .jsonl extention.
Example:
| const | vector<string> &file_path_list: original file path list |
| vector<string> | &jsonl_file_path_list: (output) file path list added jsonl extention |
| void GetFileNameListWithoutExtention | ( | const string | folder_path, |
| vector< string > * | file_list ) |
| string GetFilePathWithoutExtention | ( | const string & | file_path | ) |
| void MoveFile | ( | string | source_path, |
| string | target_folder ) |
copy source_fileto target_file
Example:
| string | source_path: Copy source file path |
| string | target_path: Copy target file path |
| void MoveFolder | ( | string | source_folder, |
| string | target_folder ) |
copy source_folder to target_folder
Example:
| string | source_folder: Copy source folder path |
| string | target_folder: Copy target folder path |
| void ProceedProgressBar | ( | unsigned long long | line_count, |
| unsigned long long | file_line_number, | ||
| uint32_t | elapsed_time_ms ) |
| void RemoveFolder | ( | const std::string & | path | ) |
Delete a folder with its contents.
Recursively delete files in a folder. Example:
| string | path: folder path to be deleted |
| void ReplaceSubstring | ( | string & | sentence, |
| const string & | target, | ||
| const string & | replacement ) |
| void SegmentSentence | ( | string | sentence, |
| vector< string > & | segments ) |
Segmentation Sentence.
Segmentation sentence is following steps...
| string | sentence: sentence |
| string Strip | ( | const string & | sentence | ) |
Remove leading and trailing white space.
Example:
| const | string& sentence: text sentence |
| uint32_t strlen_utf8 | ( | string | input | ) |
Get exact length of UTF-8 string in C.
Example:
| string | input: text string. |