Corpus Cleaner
util.hpp
Go to the documentation of this file.
1#include <bits/stdc++.h>
2#include <sys/stat.h>
3
4using namespace std;
5namespace fs = filesystem;
6
7uint32_t strlen_utf8(string input);
8void RemoveFolder(const string& path);
9string CalculateNextEmoji(string pre_emoji);
10void GetFileNameListWithoutExtention(string folder_path, vector<string> *file_list);
11void GetFileLineNumberList(const string folder_path,
12 const vector<string> *file_list,
13 const string file_extention,
14 vector<uint64_t> *file_line_number_list);
15void CopyFolder(string source_folder, string target_folder);
16void CopyFile(string source_path, string target_path);
17void MoveFolder(string source_folder, string target_folder);
18wstring ConvertUTF8ToWstring(const string& sentence);
19string ConvertWstringToUTF8(const wstring& sentence);
20void SegmentSentence(string sentence, vector<string> &segments);
21string Strip(const string& sentence);
22void ReplaceSubstring(string& sentence, const string& target, const string& replacement);
23string EscapeWord(const string& input);
24void ProceedProgressBar(unsigned long long line_count,
25 unsigned long long file_line_number,
26 uint32_t elapsed_time_ms);
void GetFileLineNumberList(const string folder_path, const vector< string > *file_list, const string file_extention, vector< uint64_t > *file_line_number_list)
Get file line number list.
Definition util.cpp:237
string ConvertWstringToUTF8(const wstring &sentence)
Convert Wstring to string.
Definition util.cpp:286
void GetFileNameListWithoutExtention(string folder_path, vector< string > *file_list)
Get filename list in folder_path.
Definition util.cpp:208
void ProceedProgressBar(unsigned long long line_count, unsigned long long file_line_number, uint32_t elapsed_time_ms)
Update progress bar.
Definition util.cpp:498
void CopyFolder(string source_folder, string target_folder)
copy source_folder to target_folder
Definition util.cpp:116
void RemoveFolder(const string &path)
uint32_t strlen_utf8(string input)
Get exact length of UTF-8 string in C.
Definition util.cpp:17
string CalculateNextEmoji(string pre_emoji)
Derive the next emoji.
Definition util.cpp:177
void ReplaceSubstring(string &sentence, const string &target, const string &replacement)
Definition util.cpp:403
void CopyFile(string source_path, string target_path)
copy source_fileto target_file
Definition util.cpp:73
wstring ConvertUTF8ToWstring(const string &sentence)
Convert string to Wstring.
Definition util.cpp:270
string Strip(const string &sentence)
Remove leading and trailing white space.
Definition util.cpp:391
void SegmentSentence(string sentence, vector< string > &segments)
Segmentation Sentence.
Definition util.cpp:315
void MoveFolder(string source_folder, string target_folder)
copy source_folder to target_folder
Definition util.cpp:142
string EscapeWord(const string &input)
Escape word.
Definition util.cpp:474