123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366 |
- <?php
- /**
- * Created by PhpStorm.
- * User: stanley-king
- * Date: 2016/10/20
- * Time: 下午7:02
- */
- namespace search;
- use algorithm;
- class word_segment
- {
- const special_character = array(' ','“','”','Ⅰ','、','。','「','」','【','】','!','&','(',')',',',':','’','┊', '╭','╮','╯','╰','▔','▽',' ','《','》','の','*','8',';','?','°');
- static function mb_str_split( $string )
- {
- $val = preg_match_all('/./u', $string,$datas);
- if($val === false) {
- return false;
- }
- else {
- return $datas[0];
- }
- }
- static private function non_english($word)
- {
- $ar = str_split($word);
- return count($ar) == 1 ? true : false;
- }
- static public function filter($keywords)
- {
- $words = [];
- $keywords = self::mb_str_split(mb_strtolower($keywords));
- if($keywords === false) return $words;
- $word = '';
- foreach ($keywords as $ch)
- {
- $ret = word_segment::filter_word($ch);
- if($ret == false)
- { //抛弃——》之前的词当成单独的词
- if(!empty($word)) {
- $words[] = $word;
- $word = '';
- }
- }
- elseif($ret === 1) { //可见的ASCII字,可连接上
- $word .= $ch;
- }
- else
- { //独立的中文单词
- if(!empty($word)) {
- $words[] = $word;
- $word = '';
- }
- $words[] = $ch;
- }
- }
- if(!empty($word)) {
- $words[] = $word;
- }
- return $words;
- }
- static private function filter_word($word)
- {
- if($word == '') return false;
- if(self::non_english($word))
- {
- if(ctype_space($word)) {
- return false;
- }
- elseif (ctype_graph($word)) {
- return 1;
- }
- elseif (ctype_cntrl($word)) {
- return false;
- }
- else {
- return false;
- }
- }
- else
- {
- if(in_array($word,self::special_character)) {
- return false;
- }
- else {
- return 2;
- }
- }
- }
- }
- //通过字找到key,通过key找到词
- class indexer
- {
- protected $mDict;
- protected $mContainer;
- public function __construct()
- {
- $this->mDict = array();
- $this->mContainer = array();
- }
- public function parase($words,$value)
- {
- $fwords = word_segment::filter($words);
- foreach ($fwords as $word) {
- $this->add($word,$value);
- }
- $this->mContainer[$value] = $words;
- }
- protected function add($key,$value)
- {
- if(isset($this->mDict[$key]))
- {
- $datas = &$this->mDict[$key];
- if(algorithm::binary_search($datas,$value) == false) {
- $pos = algorithm::lower_bonud($datas,$value);
- algorithm::array_insert($datas,$pos,$value);
- }
- }
- else {
- $this->mDict[$key] = array($value);
- }
- }
- public function find($key)
- {
- if(isset($this->mDict[$key])) {
- return $this->mDict[$key];
- } else {
- return array();
- }
- }
- public function name($val)
- {
- if(isset($this->mContainer[$val])) {
- return $this->mContainer[$val];
- } else {
- return false;
- }
- }
- }
- class one_multi
- {
- private $mContainer;
- public function __construct()
- {
- $this->mContainer = array();
- }
- public function add($key,$val)
- {
- if(isset($this->mContainer[$key]))
- {
- $values = &$this->mContainer[$key];
- if(algorithm::binary_search($values,$val) == false) {
- $pos = algorithm::lower_bonud($values,$val);
- algorithm::array_insert($values,$pos,$val);
- }
- }
- else {
- $this->mContainer[$key] = [];
- $this->mContainer[$key][] = $val;
- }
- }
- public function get($key)
- {
- if(isset($this->mContainer[$key]))
- {
- return $this->mContainer[$key];
- }
- else {
- return array();
- }
- }
- public function values() {
- return $this->mContainer;
- }
- public function reset($values) {
- $this->mContainer = $values;
- }
- }
- class one_one
- {
- private $mContainer;
- public function __construct()
- {
- $this->mContainer = array();
- }
- public function add($key,$val)
- {
- $this->mContainer[$key] = $val;
- }
- public function get($key)
- {
- if(isset($this->mContainer[$key]))
- {
- return $this->mContainer[$key];
- }
- else {
- return false;
- }
- }
- }
- class parent_sub_tree
- {
- private $mTree;
- public function __construct()
- {
- $this->mTree = [];
- }
- public function add($id,$pid)
- {
- $id = intval($id);
- $pid = intval($pid);
- if(isset($this->mTree[$pid]) == false) {
- $this->mTree[$pid] = [];
- $this->mTree[$pid]['pid'] = 0;
- $this->mTree[$pid]['subids'] = [];
- $this->mTree[$pid]['subids'][] = $id;
- } else {
- $sub_ids = &$this->mTree[$pid]['subids'];
- $this->add_sub($sub_ids,$id);
- }
- if(isset($this->mTree[$id]) == false) {
- $this->mTree[$id] = [];
- $this->mTree[$id]['pid'] = $pid;
- $this->mTree[$id]['subids'] = [];
- }
- }
- private function add_sub(&$values,$val)
- {
- if(algorithm::binary_search($values,$val) == false) {
- $pos = algorithm::lower_bonud($values,$val);
- algorithm::array_insert($values,$pos,$val);
- }
- }
- public function is_parent($hot)
- {
- if (isset($this->mTree[$hot]) == false) {
- return false;
- }
- return (count($this->mTree[$hot]['subids']) > 0);
- }
- public function subs($hot)
- {
- if (isset($this->mTree[$hot]) == false) {
- return array();
- }
- return $this->mTree[$hot]['subids'];
- }
- }
- class valtokey
- {
- private $mKeys;
- private $mValMap;
- private $mKeyMap;
- private $mCount;
- public function __construct()
- {
- $this->mKeys = [];
- $this->mValMap = [];
- $this->mKeyMap = [];
- $this->mCount = 0;
- }
- public function add($key,$val)
- {
- if(algorithm::binary_search($this->mKeyMap,$key) == true) {
- return false;
- } else {
- $pos = algorithm::lower_bonud($this->mKeyMap,$key);
- algorithm::array_insert($this->mKeyMap,$pos,$key);
- }
- $pos = algorithm::lower_bonud($this->mValMap,$val);
- algorithm::array_insert($this->mValMap,$pos,$val);
- algorithm::array_insert($this->mKeys,$pos,$key);
- return true;
- }
- public function finish() {
- $this->mKeyMap = [];
- $this->mCount = count($this->mKeys);
- }
- public function findless($val,$start,$length)
- {
- $pos = algorithm::upper_bound($this->mValMap,$val);
- if($pos < 0) {
- return false;
- }
- elseif ($pos >= $this->mCount) {
- $pos = $this->mCount - 1;
- }
- else {
- $data = $this->mValMap[$pos];
- if($data > $val) {
- $pos -= 1;
- }
- }
- $pos = $pos - $start;
- if($pos < 0) {
- return false;
- }
- $result = [];
- for ($i = $pos; $i >= 0 && $length > 0; $i--,$length--) {
- $result[] = $this->mKeys[$i];
- }
- return array('total' => $pos + 1,'cids' => $result);
- }
- public function findall($val)
- {
- $pos = algorithm::upper_bound($this->mValMap,$val);
- if($pos < 0) {
- return false;
- }
- elseif ($pos >= $this->mCount) {
- $pos = $this->mCount - 1;
- }
- else {
- $data = $this->mValMap[$pos];
- if($data > $val) {
- $pos -= 1;
- }
- }
- $result = [];
- for ($i = $pos; $i >= 0; $i--) {
- $result[] = $this->mKeys[$i];
- }
- return $result;
- }
- }
|