util.php 8.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366
  1. <?php
  2. /**
  3. * Created by PhpStorm.
  4. * User: stanley-king
  5. * Date: 2016/10/20
  6. * Time: 下午7:02
  7. */
  8. namespace search;
  9. use algorithm;
  10. class word_segment
  11. {
  12. const special_character = array(' ','“','”','Ⅰ','、','。','「','」','【','】','!','&','(',')',',',':','’','┊', '╭','╮','╯','╰','▔','▽',' ','《','》','の','*','8',';','?','°');
  13. static function mb_str_split( $string )
  14. {
  15. $val = preg_match_all('/./u', $string,$datas);
  16. if($val === false) {
  17. return false;
  18. }
  19. else {
  20. return $datas[0];
  21. }
  22. }
  23. static private function non_english($word)
  24. {
  25. $ar = str_split($word);
  26. return count($ar) == 1 ? true : false;
  27. }
  28. static public function filter($keywords)
  29. {
  30. $words = [];
  31. $keywords = self::mb_str_split(mb_strtolower($keywords));
  32. if($keywords === false) return $words;
  33. $word = '';
  34. foreach ($keywords as $ch)
  35. {
  36. $ret = word_segment::filter_word($ch);
  37. if($ret == false)
  38. { //抛弃——》之前的词当成单独的词
  39. if(!empty($word)) {
  40. $words[] = $word;
  41. $word = '';
  42. }
  43. }
  44. elseif($ret === 1) { //可见的ASCII字,可连接上
  45. $word .= $ch;
  46. }
  47. else
  48. { //独立的中文单词
  49. if(!empty($word)) {
  50. $words[] = $word;
  51. $word = '';
  52. }
  53. $words[] = $ch;
  54. }
  55. }
  56. if(!empty($word)) {
  57. $words[] = $word;
  58. }
  59. return $words;
  60. }
  61. static private function filter_word($word)
  62. {
  63. if($word == '') return false;
  64. if(self::non_english($word))
  65. {
  66. if(ctype_space($word)) {
  67. return false;
  68. }
  69. elseif (ctype_graph($word)) {
  70. return 1;
  71. }
  72. elseif (ctype_cntrl($word)) {
  73. return false;
  74. }
  75. else {
  76. return false;
  77. }
  78. }
  79. else
  80. {
  81. if(in_array($word,self::special_character)) {
  82. return false;
  83. }
  84. else {
  85. return 2;
  86. }
  87. }
  88. }
  89. }
  90. //通过字找到key,通过key找到词
  91. class indexer
  92. {
  93. protected $mDict;
  94. protected $mContainer;
  95. public function __construct()
  96. {
  97. $this->mDict = array();
  98. $this->mContainer = array();
  99. }
  100. public function parase($words,$value)
  101. {
  102. $fwords = word_segment::filter($words);
  103. foreach ($fwords as $word) {
  104. $this->add($word,$value);
  105. }
  106. $this->mContainer[$value] = $words;
  107. }
  108. protected function add($key,$value)
  109. {
  110. if(isset($this->mDict[$key]))
  111. {
  112. $datas = &$this->mDict[$key];
  113. if(algorithm::binary_search($datas,$value) == false) {
  114. $pos = algorithm::lower_bonud($datas,$value);
  115. algorithm::array_insert($datas,$pos,$value);
  116. }
  117. }
  118. else {
  119. $this->mDict[$key] = array($value);
  120. }
  121. }
  122. public function find($key)
  123. {
  124. if(isset($this->mDict[$key])) {
  125. return $this->mDict[$key];
  126. } else {
  127. return array();
  128. }
  129. }
  130. public function name($val)
  131. {
  132. if(isset($this->mContainer[$val])) {
  133. return $this->mContainer[$val];
  134. } else {
  135. return false;
  136. }
  137. }
  138. }
  139. class one_multi
  140. {
  141. private $mContainer;
  142. public function __construct()
  143. {
  144. $this->mContainer = array();
  145. }
  146. public function add($key,$val)
  147. {
  148. if(isset($this->mContainer[$key]))
  149. {
  150. $values = &$this->mContainer[$key];
  151. if(algorithm::binary_search($values,$val) == false) {
  152. $pos = algorithm::lower_bonud($values,$val);
  153. algorithm::array_insert($values,$pos,$val);
  154. }
  155. }
  156. else {
  157. $this->mContainer[$key] = [];
  158. $this->mContainer[$key][] = $val;
  159. }
  160. }
  161. public function get($key)
  162. {
  163. if(isset($this->mContainer[$key]))
  164. {
  165. return $this->mContainer[$key];
  166. }
  167. else {
  168. return array();
  169. }
  170. }
  171. public function values() {
  172. return $this->mContainer;
  173. }
  174. public function reset($values) {
  175. $this->mContainer = $values;
  176. }
  177. }
  178. class one_one
  179. {
  180. private $mContainer;
  181. public function __construct()
  182. {
  183. $this->mContainer = array();
  184. }
  185. public function add($key,$val)
  186. {
  187. $this->mContainer[$key] = $val;
  188. }
  189. public function get($key)
  190. {
  191. if(isset($this->mContainer[$key]))
  192. {
  193. return $this->mContainer[$key];
  194. }
  195. else {
  196. return false;
  197. }
  198. }
  199. }
  200. class parent_sub_tree
  201. {
  202. private $mTree;
  203. public function __construct()
  204. {
  205. $this->mTree = [];
  206. }
  207. public function add($id,$pid)
  208. {
  209. $id = intval($id);
  210. $pid = intval($pid);
  211. if(isset($this->mTree[$pid]) == false) {
  212. $this->mTree[$pid] = [];
  213. $this->mTree[$pid]['pid'] = 0;
  214. $this->mTree[$pid]['subids'] = [];
  215. $this->mTree[$pid]['subids'][] = $id;
  216. } else {
  217. $sub_ids = &$this->mTree[$pid]['subids'];
  218. $this->add_sub($sub_ids,$id);
  219. }
  220. if(isset($this->mTree[$id]) == false) {
  221. $this->mTree[$id] = [];
  222. $this->mTree[$id]['pid'] = $pid;
  223. $this->mTree[$id]['subids'] = [];
  224. }
  225. }
  226. private function add_sub(&$values,$val)
  227. {
  228. if(algorithm::binary_search($values,$val) == false) {
  229. $pos = algorithm::lower_bonud($values,$val);
  230. algorithm::array_insert($values,$pos,$val);
  231. }
  232. }
  233. public function is_parent($hot)
  234. {
  235. if (isset($this->mTree[$hot]) == false) {
  236. return false;
  237. }
  238. return (count($this->mTree[$hot]['subids']) > 0);
  239. }
  240. public function subs($hot)
  241. {
  242. if (isset($this->mTree[$hot]) == false) {
  243. return array();
  244. }
  245. return $this->mTree[$hot]['subids'];
  246. }
  247. }
  248. class valtokey
  249. {
  250. private $mKeys;
  251. private $mValMap;
  252. private $mKeyMap;
  253. private $mCount;
  254. public function __construct()
  255. {
  256. $this->mKeys = [];
  257. $this->mValMap = [];
  258. $this->mKeyMap = [];
  259. $this->mCount = 0;
  260. }
  261. public function add($key,$val)
  262. {
  263. if(algorithm::binary_search($this->mKeyMap,$key) == true) {
  264. return false;
  265. } else {
  266. $pos = algorithm::lower_bonud($this->mKeyMap,$key);
  267. algorithm::array_insert($this->mKeyMap,$pos,$key);
  268. }
  269. $pos = algorithm::lower_bonud($this->mValMap,$val);
  270. algorithm::array_insert($this->mValMap,$pos,$val);
  271. algorithm::array_insert($this->mKeys,$pos,$key);
  272. return true;
  273. }
  274. public function finish() {
  275. $this->mKeyMap = [];
  276. $this->mCount = count($this->mKeys);
  277. }
  278. public function findless($val,$start,$length)
  279. {
  280. $pos = algorithm::upper_bound($this->mValMap,$val);
  281. if($pos < 0) {
  282. return false;
  283. }
  284. elseif ($pos >= $this->mCount) {
  285. $pos = $this->mCount - 1;
  286. }
  287. else {
  288. $data = $this->mValMap[$pos];
  289. if($data > $val) {
  290. $pos -= 1;
  291. }
  292. }
  293. $pos = $pos - $start;
  294. if($pos < 0) {
  295. return false;
  296. }
  297. $result = [];
  298. for ($i = $pos; $i >= 0 && $length > 0; $i--,$length--) {
  299. $result[] = $this->mKeys[$i];
  300. }
  301. return array('total' => $pos + 1,'cids' => $result);
  302. }
  303. public function findall($val)
  304. {
  305. $pos = algorithm::upper_bound($this->mValMap,$val);
  306. if($pos < 0) {
  307. return false;
  308. }
  309. elseif ($pos >= $this->mCount) {
  310. $pos = $this->mCount - 1;
  311. }
  312. else {
  313. $data = $this->mValMap[$pos];
  314. if($data > $val) {
  315. $pos -= 1;
  316. }
  317. }
  318. $result = [];
  319. for ($i = $pos; $i >= 0; $i--) {
  320. $result[] = $this->mKeys[$i];
  321. }
  322. return $result;
  323. }
  324. }