Corpus Cleaner
language_filter.cpp
Go to the documentation of this file.
1#include "language_filter.hpp"
2
3
5{
6 //FastTextEx fasttextex; fasttextex.loadModel("lid.176.bin");
7 this->loadModel("lid.176.bin");
8}
9
10/**
11 * @brief Judge the language of one sentence
12 * @details
13 * Example:
14 * @param string sentence: sentence Judged
15 * @param vector<pair<real, string>>& predictions: Pointer for returning judgment results
16 * @param int32_t k: Judge one sentence(1), judge multiple sentences(-1)
17 * @param real threshold: threshold for return
18 * @return None : minhash
19 * @ref https://fasttext.cc/docs/en/supervised-tutorial.html
20 * @attention
21**/
22void FastTextEx::predictOneLine(string sentence,
23 vector<pair<real, string>>& predictions,
24 int32_t k,
25 real threshold) const
26{
27 predictions.clear();
28
29 vector<int32_t> words, labels;
30 dict_->getStringNoNewline(sentence, words, labels);
31 Predictions linePredictions;
32 this->predict(k, words, linePredictions, threshold);
33 for (const auto& p : linePredictions) {
34 predictions.push_back(
35 make_pair(exp(p.first), dict_->getLabel(p.second)));
36 }
37 return;
38}
void predictOneLine(string sentence, vector< pair< real, string > > &predictions, int32_t k, real threshold) const
Judge the language of one sentence.