#include <bits/stdc++.h>
#include <sys/stat.h>
Go to the source code of this file.
|
| uint32_t | strlen_utf8 (string input) |
| | Get exact length of UTF-8 string in C.
|
| |
| void | RemoveFolder (const string &path) |
| |
| string | CalculateNextEmoji (string pre_emoji) |
| | Derive the next emoji.
|
| |
| void | GetFileNameListWithoutExtention (string folder_path, vector< string > *file_list) |
| | Get filename list in folder_path.
|
| |
| void | GetFileLineNumberList (const string folder_path, const vector< string > *file_list, const string file_extention, vector< uint64_t > *file_line_number_list) |
| | Get file line number list.
|
| |
| void | CopyFolder (string source_folder, string target_folder) |
| | copy source_folder to target_folder
|
| |
| void | CopyFile (string source_path, string target_path) |
| | copy source_fileto target_file
|
| |
| void | MoveFolder (string source_folder, string target_folder) |
| | copy source_folder to target_folder
|
| |
| wstring | ConvertUTF8ToWstring (const string &sentence) |
| | Convert string to Wstring.
|
| |
| string | ConvertWstringToUTF8 (const wstring &sentence) |
| | Convert Wstring to string.
|
| |
| void | SegmentSentence (string sentence, vector< string > &segments) |
| | Segmentation Sentence.
|
| |
| string | Strip (const string &sentence) |
| | Remove leading and trailing white space.
|
| |
| void | ReplaceSubstring (string &sentence, const string &target, const string &replacement) |
| |
| string | EscapeWord (const string &input) |
| | Escape word.
|
| |
| void | ProceedProgressBar (unsigned long long line_count, unsigned long long file_line_number, uint32_t elapsed_time_ms) |
| | Update progress bar.
|
| |
◆ CalculateNextEmoji()
| string CalculateNextEmoji |
( |
string | pre_emoji | ) |
|
◆ ConvertUTF8ToWstring()
| wstring ConvertUTF8ToWstring |
( |
const string & | sentence | ) |
|
Convert string to Wstring.
Example:
string input= "ใใใซใกใใ";
wstring ConvertUTF8ToWstring(const string &sentence)
Convert string to Wstring.
- Parameters
-
| const | string& src: text sentence |
- Returns
- wstring: text sentence converted wstring
- Attention
Definition at line 270 of file util.cpp.
◆ ConvertWstringToUTF8()
| string ConvertWstringToUTF8 |
( |
const wstring & | sentence | ) |
|
Convert Wstring to string.
Example: wstring input_w = L"ใใใซใกใใ"; string input = ConvertWstringToUTF8(input_w);
- Parameters
-
| const | string& src: text sentence |
- Returns
- wstring: text sentence converted wstring
- Attention
Definition at line 286 of file util.cpp.
◆ CopyFile()
| void CopyFile |
( |
string | source_path, |
|
|
string | target_path ) |
copy source_fileto target_file
Example:
string source_path="data/source/a.txt";
string target_path="data/target/a.txt";
void CopyFile(string source_path, string target_path)
copy source_fileto target_file
- Parameters
-
| string | source_path: Copy source file path |
| string | target_path: Copy target file path |
- Returns
- None
- Attention
Definition at line 73 of file util.cpp.
◆ CopyFolder()
| void CopyFolder |
( |
string | source_folder, |
|
|
string | target_folder ) |
copy source_folder to target_folder
Example:
string source_folder="data/source";
string target_folder="data/target";
void CopyFolder(string source_folder, string target_folder)
copy source_folder to target_folder
- Parameters
-
| string | source_folder: Copy source folder path |
| string | target_folder: Copy target folder path |
- Returns
- None
- Attention
Definition at line 116 of file util.cpp.
◆ EscapeWord()
| string EscapeWord |
( |
const string & | input | ) |
|
Escape word.
If " or ' or \, replace to \", \', and \\.
Example:
@icode{cpp}
string input = """; string output = EscapeWord(input); // output == "\""
- Parameters
-
| const | string& sentence: text sentence |
- Returns
- string: converted sentence
- Attention
Definition at line 474 of file util.cpp.
◆ GetFileLineNumberList()
| void GetFileLineNumberList |
( |
const string | folder_path, |
|
|
const vector< string > * | file_list, |
|
|
const string | file_extention, |
|
|
vector< uint64_t > * | file_line_number_list ) |
Get file line number list.
Example:
string folder_path = "../data/input/";
vector<string> file_list;
vector<string> file_line_number_list;
void GetFileLineNumberList(const string folder_path, const vector< string > *file_list, const string file_extention, vector< uint64_t > *file_line_number_list)
Get file line number list.
void GetFileNameListWithoutExtention(const string folder_path, vector< string > *file_list)
Get filename list in folder_path.
- Parameters
-
| const | string folder_path: folder path |
| const | vector<string> *file_list: (return) filename list |
| const | string file_extention: file extention of file_list (".json",".txt", and so on.) |
| vector<uint64_t> | *file_line_number_list: (return) file line number list |
- Returns
- None
- Attention
Definition at line 237 of file util.cpp.
◆ GetFileNameListWithoutExtention()
| void GetFileNameListWithoutExtention |
( |
const string | folder_path, |
|
|
vector< string > * | file_list ) |
Get filename list in folder_path.
Example:
string input_path = "../data/input/";
vector<string> file_list;
GetFileList(input_path, &file_list);
- Parameters
-
| string | folder_path: folder path |
| vector<string> | *file_list: (return) filename list |
- Returns
- None
- Attention
Definition at line 208 of file util.cpp.
◆ MoveFolder()
| void MoveFolder |
( |
string | source_folder, |
|
|
string | target_folder ) |
copy source_folder to target_folder
Example:
string source_folder="data/source";
string target_folder="data/target";
void MoveFolder(string source_folder, string target_folder)
copy source_folder to target_folder
- Parameters
-
| string | source_folder: Copy source folder path |
| string | target_folder: Copy target folder path |
- Returns
- None
- Attention
Definition at line 142 of file util.cpp.
◆ ProceedProgressBar()
| void ProceedProgressBar |
( |
unsigned long long | line_count, |
|
|
unsigned long long | file_line_number, |
|
|
uint32_t | elapsed_time_ms ) |
Update progress bar.
Example:
- Parameters
-
| uint64_t | line_count |
| uint64_t | file_line_number |
- Returns
- None
- Attention
Definition at line 498 of file util.cpp.
◆ RemoveFolder()
| void RemoveFolder |
( |
const string & | path | ) |
|
◆ ReplaceSubstring()
| void ReplaceSubstring |
( |
string & | sentence, |
|
|
const string & | target, |
|
|
const string & | replacement ) |
◆ SegmentSentence()
| void SegmentSentence |
( |
string | sentence, |
|
|
vector< string > & | segments ) |
Segmentation Sentence.
Segmentation sentence is following steps...
- Check if there are sentences enclosed in quotation marks.
- Puctuations enclosed in quotation marks are ignored.
- Replace "ใ"(puctuation_list) to "ใ\n".
- Remove the last character of the string. Example:
string sentence = "../data/input/";
SegmentationSentence(sentence);
- Parameters
-
- Returns
- string: segmentated sentence
- Note
- Be sure to use Normalizer to normalize your corpus before this process.
- Attention
- TODO: "." to be segmented. (ignore that "12.3", "wiki.txt". ) In the case of Japanese sentences, if you are talking about a full stop, there should be a space at the end, and the missing mark should be
(at the end of the sentence). Only in that case should it be used as a full stop.
Definition at line 315 of file util.cpp.
◆ Strip()
| string Strip |
( |
const string & | sentence | ) |
|
Remove leading and trailing white space.
Example:
string sentence= " ใใใซใกใใ ";
sentence =
Strip(sentence);
string Strip(const string &sentence)
Remove leading and trailing white space.
- Parameters
-
| const | string& sentence: text sentence |
- Returns
- string: sentence has been processed
- Attention
Definition at line 391 of file util.cpp.
◆ strlen_utf8()
| uint32_t strlen_utf8 |
( |
string | input | ) |
|
Get exact length of UTF-8 string in C.
Example:
string input="ใใใซใกใใ";
uint32_t strlen_utf8(string input)
Get exact length of UTF-8 string in C.
- Parameters
-
| string | input: text string. |
- Returns
- uint32_t: length of input https://ja.stackoverflow.com/questions/2988/cE8A8%80E8AA%9EE3%81A7E3%81AEutf-8E6%96%87E5AD%97E5%88%97E3%81AEE6ADA3E7A2BAE3%81AAE9%95B7E3%81%95E3%82%92E5%8F%96E5BE%97E3%81%99E3%82%8B
- Attention
Definition at line 17 of file util.cpp.