* 将敏感词加入到HashMap中
* 构建DFA算法模型
*
* @author dxm
*
*/
namespace sensitive_word;
require_once(BASE_ROOT_PATH . "/helper/sensitive/DFAItem.php");
class SensitiveWordInit
{
// 字符编码
const ENCODING = "UTF-8";
/**
* 初始化敏感字库
*
* @return
*/
public function initKeyWord()
{
$word_array = $this->readSensitiveWordFile();
return $this->addSensitiveWordToHashMap($word_array);
}
/**
* 读取敏感词库,将敏感词放入HashSet中,构建一个DFA算法模型:
* 中 = { isEnd = 0 国 = {
* isEnd = 1 人 = {isEnd = 0 民 = {isEnd = 1} } 男 = { isEnd = 0 人 = { isEnd =
* 1 } } } } 五 = { isEnd = 0 星 = { isEnd = 0 红 = { isEnd = 0 旗 = { isEnd = 1
* } } } }
*
*/
public function addSensitiveWordToHashMap($words)
{
$dfa = new DFAItem();
foreach ($words as $word) {
$dfa->addwords(explode(" ",$word));
}
return $dfa;
}
/**
* 读取敏感词库中的内容,将内容添加到array中
*
* @return
* @throws Exception
*/
private function readSensitiveWordFile()
{
$word_array = array();
array_push($word_array, '中 国');
array_push($word_array, '中 央');
array_push($word_array, '国 家');
array_push($word_array, '他 妈 的');
return $word_array;
}
}