* 将敏感词加入到HashMap中
* 构建DFA算法模型 * * @author dxm * */ namespace sensitive_word; require_once(BASE_ROOT_PATH . "/helper/sensitive/DFAItem.php"); class SensitiveWordInit { // 字符编码 const ENCODING = "UTF-8"; /** * 初始化敏感字库 * * @return */ public function initKeyWord() { $word_array = $this->readSensitiveWordFile(); return $this->addSensitiveWordToHashMap($word_array); } /** * 读取敏感词库,将敏感词放入HashSet中,构建一个DFA算法模型:
* 中 = { isEnd = 0 国 = {
* isEnd = 1 人 = {isEnd = 0 民 = {isEnd = 1} } 男 = { isEnd = 0 人 = { isEnd = * 1 } } } } 五 = { isEnd = 0 星 = { isEnd = 0 红 = { isEnd = 0 旗 = { isEnd = 1 * } } } } * */ public function addSensitiveWordToHashMap($words) { $dfa = new DFAItem(); foreach ($words as $word) { $dfa->addwords(explode(" ",$word)); } return $dfa; } /** * 读取敏感词库中的内容,将内容添加到array中 * * @return * @throws Exception */ private function readSensitiveWordFile() { $word_array = array(); array_push($word_array, '中 国'); array_push($word_array, '中 央'); array_push($word_array, '国 家'); array_push($word_array, '他 妈 的'); return $word_array; } }