Corpus Cleaner
perplexity_filter.hh
Go to the documentation of this file.
1#include <algorithm>
2#include <cstdlib>
3#include <exception>
4#include <iostream>
5#include <iomanip>
6#include <limits>
7#include <cmath>
8#include <vector>
9#ifdef WIN32
10#include "./kenlm/util/getopt.hh"
11#else
12#include <unistd.h>
13#endif
14#include <bits/stdc++.h>
15
16#include "./kenlm/lm/model.hh"
17#include "./kenlm/lm/sizes.hh"
18#include "./kenlm/util/file_piece.hh"
19#include "./kenlm/util/usage.hh"
20
21#ifdef WIN32
22#include "./kenlm/util/getopt.hh"
23#else
24#include <unistd.h>
25#endif
26#include <omp.h>
27#include "util.hpp"
28#include "../scripts/sentencepiece/src/sentencepiece_processor.h"
29
30
31using namespace std;
32
34{
35public:
36 sentencepiece::SentencePieceProcessor processor;
37
39 double Score(const wstring sentence);
40 double ScoreWithSentencePiece(const wstring sentence);
41 double Perplexity(const wstring sentence);
42 double PerplexityWithSentencePiece(const wstring sentence);
43};
KenLMFilter()
Score sentence by KenLM.
double PerplexityWithSentencePiece(const wstring sentence)
Perplexity sentence by KenLM with SentencePiece Tokenizing.
double ScoreWithSentencePiece(const wstring sentence)
Score sentence by KenLM with SentencePiece Tokenizing.
sentencepiece::SentencePieceProcessor processor
double Score(const wstring sentence)
Score sentence by KenLM.
double Perplexity(const wstring sentence)
Perplexity sentence by KenLM.