12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364 |
- <?php
- /**
- * 初始化敏感词库<br>
- * 将敏感词加入到HashMap中<br>
- * 构建DFA算法模型
- *
- * @author dxm
- *
- */
- namespace sensitive_word;
- require_once(BASE_ROOT_PATH . "/helper/sensitive/DFAItem.php");
- class SensitiveWordInit
- {
- // 字符编码
- const ENCODING = "UTF-8";
- /**
- * 初始化敏感字库
- *
- * @return
- */
- public function initKeyWord()
- {
- $word_array = $this->readSensitiveWordFile();
- return $this->addSensitiveWordToHashMap($word_array);
- }
- /**
- * 读取敏感词库,将敏感词放入HashSet中,构建一个DFA算法模型:<br>
- * 中 = { isEnd = 0 国 = {<br>
- * isEnd = 1 人 = {isEnd = 0 民 = {isEnd = 1} } 男 = { isEnd = 0 人 = { isEnd =
- * 1 } } } } 五 = { isEnd = 0 星 = { isEnd = 0 红 = { isEnd = 0 旗 = { isEnd = 1
- * } } } }
- *
- */
- public function addSensitiveWordToHashMap($words)
- {
- $dfa = new DFAItem();
- foreach ($words as $word) {
- $dfa->addwords(explode(" ",$word));
- }
- return $dfa;
- }
- /**
- * 读取敏感词库中的内容,将内容添加到array中
- *
- * @return
- * @throws Exception
- */
- private function readSensitiveWordFile()
- {
- $word_array = array();
- array_push($word_array, '中 国');
- array_push($word_array, '中 央');
- array_push($word_array, '国 家');
- array_push($word_array, '他 妈 的');
- return $word_array;
- }
- }
|