6lm::ngram::Model
model(
"ja.arpa.bin");
29 const auto status =
processor.Load(
"ja.sp.model");
31 std::cerr << status.ToString() << std::endl;
59 double total_score=0,score=0;
63 lm::ngram::State state(
model.BeginSentenceState()), out_state;
64 const lm::ngram::Vocabulary &vocab =
model.GetVocabulary();
65 for (
int i=0;i<(int)sentence.size();i++) {
67 wstring word_w=sentence.substr(i,1);
70 score=
model.BaseScore(&state, vocab.Index(word), &out_state);
76 score=
model.BaseScore(&state, vocab.EndSentence(), &out_state);
105 double total_score=0,score=0;
108 vector<string> pieces;
111 lm::ngram::State state(
model.BeginSentenceState()), out_state;
112 const lm::ngram::Vocabulary &vocab =
model.GetVocabulary();
113 for (
auto piece:pieces) {
116 score=
model.BaseScore(&state, vocab.Index(piece), &out_state);
118 total_score += score;
122 score=
model.BaseScore(&state, vocab.EndSentence(), &out_state);
123 total_score += score;
150 double words = (double)(sentence.size()+1);
153 return pow(10.0,(-this->
Score(sentence) / words));
178 vector<string> pieces;
180 string sentence_tokenized =
"";
185 double words = (double)pieces.size()+1;
KenLMFilter()
Score sentence by KenLM.
double PerplexityWithSentencePiece(const wstring sentence)
Perplexity sentence by KenLM with SentencePiece Tokenizing.
double ScoreWithSentencePiece(const wstring sentence)
Score sentence by KenLM with SentencePiece Tokenizing.
sentencepiece::SentencePieceProcessor processor
double Score(const wstring sentence)
Score sentence by KenLM.
double Perplexity(const wstring sentence)
Perplexity sentence by KenLM.
lm::ngram::Model model("ja.arpa.bin")
string ConvertWstringToUTF8(const wstring &sentence)
Convert Wstring to string.