����JFIF��x�x����'
| Server IP : 78.140.185.180 / Your IP : 216.73.216.170 Web Server : LiteSpeed System : Linux cpanel13.v.fozzy.com 4.18.0-513.11.1.lve.el8.x86_64 #1 SMP Thu Jan 18 16:21:02 UTC 2024 x86_64 User : builderbox ( 1072) PHP Version : 7.3.33 Disable Function : NONE MySQL : OFF | cURL : ON | WGET : ON | Perl : ON | Python : ON | Sudo : OFF | Pkexec : OFF Directory : /home/builderbox/./././www/vendor/teamtnt/tntsearch/src/Classifier/ |
Upload File : |
<?php
namespace TeamTNT\TNTSearch\Classifier;
use TeamTNT\TNTSearch\Stemmer\PorterStemmer;
use TeamTNT\TNTSearch\Support\Tokenizer;
class TNTClassifier
{
public $documents = [];
public $words = [];
public $types = [];
public $tokenizer = null;
public $stemmer = null;
protected $arraySumOfWordType = null;
protected $arraySumOfDocuments = null;
public function __construct()
{
$this->tokenizer = new Tokenizer;
$this->stemmer = new PorterStemmer;
}
public function predict($statement)
{
$words = $this->tokenizer->tokenize($statement);
$best_likelihood = -INF;
$best_type = '';
foreach ($this->types as $type) {
$likelihood = log($this->pTotal($type)); // calculate P(Type)
$p = 0;
foreach ($words as $word) {
$word = $this->stemmer->stem($word);
$p += log($this->p($word, $type));
}
$likelihood += $p; // calculate P(word, Type)
if ($likelihood > $best_likelihood) {
$best_likelihood = $likelihood;
$best_type = $type;
}
}
return [
'likelihood' => $best_likelihood,
'label' => $best_type
];
}
public function learn($statement, $type)
{
if (!in_array($type, $this->types)) {
$this->types[] = $type;
}
$words = $this->tokenizer->tokenize($statement);
foreach ($words as $word) {
$word = $this->stemmer->stem($word);
if (!isset($this->words[$type][$word])) {
$this->words[$type][$word] = 0;
}
$this->words[$type][$word]++; // increment the word count for the type
}
if (!isset($this->documents[$type])) {
$this->documents[$type] = 0;
}
$this->documents[$type]++; // increment the document count for the type
}
public function p($word, $type)
{
$count = 0;
if (isset($this->words[$type][$word])) {
$count = $this->words[$type][$word];
}
if (!isset($this->arraySumOfWordType[$type])) {
$this->arraySumOfWordType[$type] = array_sum($this->words[$type]);
}
return ($count + 1) / ($this->arraySumOfWordType[$type] + $this->vocabularyCount());
}
public function pTotal($type)
{
if (!isset($this->arraySumOfDocuments)) {
$this->arraySumOfDocuments = array_sum($this->documents);
}
return ($this->documents[$type]) / $this->arraySumOfDocuments;
}
public function vocabularyCount()
{
if (isset($this->vc)) {
return $this->vc;
}
$words = [];
foreach ($this->words as $key => $value) {
foreach ($this->words[$key] as $word => $count) {
$words[$word] = 0;
}
}
$this->vc = count($words);
return $this->vc;
}
public function save($path)
{
$s = serialize($this);
return file_put_contents($path, $s);
}
public function load($name)
{
$s = file_get_contents($name);
$classifier = unserialize($s);
unset($this->vc);
unset($this->arraySumOfDocuments);
unset($this->arraySumOfWordType);
$this->documents = $classifier->documents;
$this->words = $classifier->words;
$this->types = $classifier->types;
$this->tokenizer = $classifier->tokenizer;
$this->stemmer = $classifier->stemmer;
}
}