Quest.php 13 KB


  1. #!/usr/bin/env php
  2. <?php
  3. /**
  4. * Xunsearch PHP-SDK 搜索测试工具
  5. *
  6. * @author hightman
  7. * @link http://www.xunsearch.com/
  8. * @copyright Copyright &copy; 2011 HangZhou YunSheng Network Technology Co., Ltd.
  9. * @license http://www.xunsearch.com/license/
  10. * @version $Id$
  11. */
  12. require_once dirname(__FILE__) . '/../lib/XS.php';
  13. require_once dirname(__FILE__) . '/XSUtil.class.php';
  14. // check arguments
  15. XSUtil::parseOpt(array('p', 'q', 'c', 'd', 's',
  16. 'project', 'query', 'db', 'limit', 'charset',
  17. 'sort', 'add-weight', 'scws-multi', 'cut-off',
  18. ));
  19. $project = XSUtil::getOpt('p', 'project', true);
  20. $query = XSUtil::getOpt('q', 'query', true);
  21. $hot = XSUtil::getOpt(null, 'hot');
  22. $synonyms = XSUtil::getOpt(null, 'list-synonyms');
  23. $terms = XSUtil::getOpt(null, 'terms');
  24. $weights = XSUtil::getOpt(null, 'add-weight');
  25. $info = XSUtil::getOpt(null, 'info');
  26. $scws_multi = XSUtil::getOpt(null, 'scws-multi');
  27. $cut_off = XSUtil::getOpt(null, 'cut-off');
  28. // magick output charset
  29. $charset = XSUtil::getOpt('c', 'charset');
  30. XSUtil::setCharset($charset);
  31. $query = XSUtil::convertIn($query);
  32. // sort scheme
  33. $sort = XSUtil::getOpt('s', 'sort');
  34. if (XSUtil::getOpt('h', 'help') !== null || !is_string($project)
  35. || (!$info && !$hot && !$synonyms && !is_string($query))) {
  36. $version = PACKAGE_NAME . '/' . PACKAGE_VERSION;
  37. echo <<<EOF
  38. Quest - 搜索查询和测试工具 ($version)
  39. 用法
  40. {$_SERVER['argv'][0]} [options] [-p|--project] <project> [[-q|--query] <query>]
  41. 选项说明
  42. --project=<name|ini>
  43. -p <project> 用于指定要搜索的项目名称或项目配置文件的路径,
  44. 如果指定的是名称,则使用 ../app/<name>.ini 作为配置文件
  45. --charset=<gbk|utf-8>
  46. -c <charset> 指定您当前在用的字符集,以便系统进行智能转换(默认:UTF-8)
  47. --db=<name[,name2 ...]>
  48. -d <db[,db2 ...]> 指定项目中的数据库名称,默认是名为 db 的库,多个库之间用逗号分隔
  49. --query=<query>
  50. -q <query> 指定要搜索的查询语句,如果语句中包含空格请用使用双引号包围起来
  51. 在搜索语句中可采用 'field:\$from..\$to' 做区间过滤
  52. --sort=<field1[,field2[,...]]
  53. -s <field1[,field2[,...]] 指定排序字段,在字段前加上 ~ 符号表示逆序
  54. --fuzzy 将搜索默认设为模糊搜索
  55. --synonym[=scale]
  56. 开启自动同义词搜索功能,可选择设置同义词权重调整(0.01~2.55)
  57. --scws-multi=<level>
  58. 查看或设置搜索语句的 scws 复合分词等级(值:0-15,默认为 3)
  59. --add-weight=<[field1:]word1[:weight1][,[field2:]word2[:weight2]]>
  60. 添加搜索权重词汇,词与次数之间用半角冒号分隔
  61. --hot[=total|last|cur]
  62. 用于显示指定项目的热门搜索词,此时 <query> 参数无意义,可省略
  63. 其值含义分别表示总搜索量、上周搜索量、本周搜索量,默认为总搜索量。
  64. --suggest 根据当前搜索词展开常用搜索词建议,如查询“中”,即显示“中”开头的词
  65. --correct 根据当前搜索词进行同音、拼写纠错,输出更合适的关键词
  66. --related 根据当前搜索词查找相关搜索词
  67. --list-synonyms[=stemmed]
  68. 列出库内的全部同义词,每行显示一个,可以搭配 --limit 使用,默认显示前 100 个
  69. 如果设置了 stemmed 值则连同词根同义词也列出
  70. --limit=<num>用于设置 suggest|hot|related 的返回数量,两者默认值均为 10 个
  71. 对于普通搜索和列出同义词时,还支持用 --limit=offset,num 的格式
  72. --show-query 用于在搜索结果显示内部的 Xapian 结构的 query 语句用于调试
  73. --cut-off=<percent[,weight>
  74. 设置搜索结果剔除的匹配百分比及权限(百分比:0-100,权重:0.1-25.5)
  75. --terms 列出搜索词被切分后的词(不含排除及权重词)
  76. --info 显示当前连接服务端的信息及线程(仅绘制当前 worker 进程)
  77. -h|--help 显示帮助信息
  78. 若未指定 -p 或 -q 则会依次把附加的参数当作 <project> 和 <query> 处理,例:
  79. {$_SERVER['argv'][0]} <project> <query>
  80. {$_SERVER['argv'][0]} --hot <project>
  81. EOF;
  82. exit(0);
  83. }
  84. // create xs project
  85. $ini = file_exists($project) ? $project : dirname(__FILE__) . '/../app/' . $project . '.ini';
  86. if (!file_exists($ini)) {
  87. echo "错误:无效的项目名称 ($project),不存在相应的配置文件。\n";
  88. exit(-1);
  89. }
  90. // execute the search
  91. try {
  92. // params
  93. $params = array('hot', 'suggest', 'correct', 'related', 'output', 'limit');
  94. foreach ($params as $_) {
  95. $$_ = XSUtil::getOpt(null, $_);
  96. }
  97. $limit1 = $limit === null ? 10 : intval($limit);
  98. $db = XSUtil::getOpt('d', 'db');
  99. // create xs object
  100. $xs = new XS($ini);
  101. $search = $xs->search;
  102. $search->setCharset('UTF-8');
  103. if ($db !== null) {
  104. $dbs = explode(',', $db);
  105. $search->setDb(trim($dbs[0]));
  106. for ($i = 1; $i < count($dbs); $i++) {
  107. $search->addDb(trim($dbs[$i]));
  108. }
  109. }
  110. if ($scws_multi !== null) {
  111. $search->setScwsMulti($scws_multi);
  112. }
  113. if ($hot !== null) {
  114. $type = $hot === 'cur' ? 'currnum' : ($hot === 'last' ? 'lastnum' : 'total');
  115. $result = $search->getHotQuery($limit1, $type);
  116. if (count($result) === 0) {
  117. echo "暂无相关热门搜索记录。\n";
  118. } else {
  119. $i = 1;
  120. printf("序 %s %s\n%s\n", XSUtil::fixWidth('搜索关键词(' . $type . ')', 40),
  121. XSUtil::fixWidth('次数', 10), XSUtil::fixWidth('', 56, '-'));
  122. foreach ($result as $word => $freq) {
  123. printf("%2d. %s %d\n", $i, XSUtil::fixWidth($word, 40), $freq);
  124. $i++;
  125. }
  126. }
  127. } elseif ($info !== null) {
  128. // server info
  129. echo "---------- SERVER INFO BEGIN ----------\n";
  130. $res = $search->execCommand(CMD_DEBUG);
  131. echo $res->buf;
  132. echo "\n---------- SERVER INFO END ----------\n";
  133. // thread pool
  134. $res = $search->execCommand(CMD_SEARCH_DRAW_TPOOL);
  135. echo $res->buf;
  136. } elseif ($synonyms !== null) {
  137. if ($limit === null) {
  138. $offset = $limit1 = 0;
  139. } elseif (($pos = strpos($limit, ',')) === false) {
  140. $offset = 0;
  141. } else {
  142. $limit1 = intval(substr($limit, $pos + 1));
  143. $offset = intval($limit);
  144. }
  145. $synonyms = $search->getAllSynonyms($limit1, $offset, $synonyms === 'stemmed');
  146. if (count($synonyms) == 0) {
  147. echo "暂无相关的同义词记录";
  148. if ($offset != 0) {
  149. echo ",反正总数不超过 $offset 个";
  150. }
  151. echo "。\n";
  152. } else {
  153. $i = $offset + 1;
  154. printf(" %s %s\n%s\n", XSUtil::fixWidth('原词', 32), '同义词', XSUtil::fixWidth('', 56, '-'));
  155. foreach ($synonyms as $raw => $list) {
  156. printf("%4d. %s %s\n", $i++, XSUtil::fixWidth($raw, 29), implode(", ", $list));
  157. }
  158. }
  159. } elseif ($terms !== null) {
  160. $result = $search->terms($query);
  161. echo "列出\033[7m" . $query . "\033[m的内部切分结果:\n";
  162. print_r($result);
  163. } elseif ($correct !== null) {
  164. $result = $search->getCorrectedQuery($query);
  165. if (count($result) === 0) {
  166. echo "目前对\033[7m" . $query . "\033[m还没有更好的修正方案。\n";
  167. } else {
  168. echo "您可以试试找:\033[4m" . implode("\033[m \033[4m", $result) . "\033[m\n";
  169. }
  170. } elseif ($suggest !== null) {
  171. $result = $search->getExpandedQuery($query, $limit1);
  172. if (count($result) === 0) {
  173. echo "目前对\033[7m" . $query . "\033[m还没有任何搜索建议。\n";
  174. } else {
  175. echo "展开\033[7m" . $query . "\033[m得到以下搜索建议:\n";
  176. for ($i = 0; $i < count($result); $i++) {
  177. printf("%d. %s\n", $i + 1, $result[$i]);
  178. }
  179. }
  180. } elseif ($related !== null) {
  181. $result = $search->getRelatedQuery($query, $limit1);
  182. if (count($result) === 0) {
  183. echo "目前还没有与\033[7m" . $query . "\033[m相关的搜索词。\n";
  184. } else {
  185. echo "与\033[7m" . $query . "\033[m相关的搜索词:\n";
  186. for ($i = 0; $i < count($result); $i++) {
  187. printf("%d. %s\n", $i + 1, $result[$i]);
  188. }
  189. }
  190. } else {
  191. // fuzzy search
  192. if (XSUtil::getOpt(null, 'fuzzy') !== null) {
  193. $search->setFuzzy();
  194. }
  195. $syn = XSUtil::getOpt(null, 'synonym');
  196. if ($syn !== null) {
  197. $search->setAutoSynonyms();
  198. if ($syn !== true) {
  199. $search->setSynonymScale(floatval($syn));
  200. }
  201. }
  202. if (($pos = strpos($limit, ',')) === false) {
  203. $offset = 0;
  204. } else {
  205. $limit1 = intval(substr($limit, $pos + 1));
  206. $offset = intval($limit);
  207. }
  208. // sort
  209. if ($sort !== null) {
  210. $fields = array();
  211. $tmps = explode(',', $sort);
  212. foreach ($tmps as $tmp) {
  213. $tmp = trim($tmp);
  214. if ($tmp === '') {
  215. continue;
  216. }
  217. if (substr($tmp, 0, 1) === '~') {
  218. $fields[substr($tmp, 1)] = false;
  219. } else {
  220. $fields[$tmp] = true;
  221. }
  222. }
  223. $search->setMultiSort($fields);
  224. }
  225. // special fields
  226. $fid = $xs->getFieldId();
  227. $ftitle = $xs->getFieldTitle();
  228. $fbody = $xs->getFieldBody();
  229. if ($fbody) {
  230. $xs->getFieldBody()->cutlen = 100;
  231. }
  232. // add range
  233. $ranges = array();
  234. if (strpos($query, '..') !== false) {
  235. $regex = '/(\S+?):(\S*?)\.\.(\S*)/';
  236. if (preg_match_all($regex, $query, $matches) > 0) {
  237. for ($i = 0; $i < count($matches[0]); $i++) {
  238. $ranges[] = array($matches[1][$i],
  239. $matches[2][$i] === '' ? null : $matches[2][$i],
  240. $matches[3][$i] === '' ? null : $matches[3][$i]);
  241. $query = str_replace($matches[0][$i], '', $query);
  242. }
  243. }
  244. }
  245. // set query
  246. $search->setQuery($query);
  247. foreach ($ranges as $range) {
  248. $search->addRange($range[0], $range[1], $range[2]);
  249. }
  250. // add weights
  251. if ($weights !== null) {
  252. foreach (explode(',', $weights) as $tmp) {
  253. $tmp = explode(':', trim($tmp));
  254. if (count($tmp) === 1) {
  255. $search->addWeight(null, $tmp[0]);
  256. } elseif (count($tmp) === 2) {
  257. if (is_numeric($tmp[1])) {
  258. $search->addWeight(null, $tmp[0], floatval($tmp[1]));
  259. } else {
  260. $search->addWeight($tmp[0], $tmp[1]);
  261. }
  262. } else {
  263. $search->addWeight($tmp[0], $tmp[1], floatval($tmp[2]));
  264. }
  265. }
  266. }
  267. // cut off
  268. if ($cut_off !== null) {
  269. if (($pos = strpos($cut_off, ','))) {
  270. $search->setCutOff(substr($cut_off, 0, $pos), substr($cut_off, $pos + 1));
  271. } elseif (strpos($cut_off, '.') !== false) {
  272. $search->setCutOff(0, $cut_off);
  273. } else {
  274. $search->setCutOff($cut_off);
  275. }
  276. }
  277. // preform search
  278. $begin = microtime(true);
  279. $result = $search->setLimit($limit1, $offset)->search();
  280. $cost = microtime(true) - $begin;
  281. $matched = $search->getLastCount();
  282. $total = $search->getDbTotal();
  283. // show query?
  284. if (XSUtil::getOpt(null, 'show-query') !== null) {
  285. echo str_repeat("-", 20) . "\n";
  286. echo "解析后的 QUERY 语句:" . $search->getQuery() . "\n";
  287. echo str_repeat("-", 20) . "\n";
  288. }
  289. // related & corrected
  290. $correct = $search->getCorrectedQuery();
  291. $related = $search->getRelatedQuery();
  292. // info
  293. printf("在 %s 条数据中,大约有 %d 条包含 \033[7m%s\033[m ,第 %d-%d 条,用时:%.4f 秒。\n", number_format($total),
  294. $matched, $query, min($matched, $offset + 1), min($matched, $limit1 + $offset), $cost);
  295. // correct
  296. if (count($correct) > 0) {
  297. echo "您是不是想找:\033[4m" . implode("\033[m \033[4m", $correct) . "\033[m\n";
  298. }
  299. // show result
  300. foreach ($result as $doc) /* @var $doc XSDocument */ {
  301. // body & title
  302. $body = $title = '';
  303. if ($ftitle !== false) {
  304. $title = cliHighlight($doc->f($ftitle));
  305. }
  306. if ($fbody !== false) {
  307. $body = cliHighlight($doc->f($fbody)) . "\n";
  308. }
  309. // main fields
  310. printf("\n%d. \033[4m%s#%s# [%d%%,%.2f]\033[m\n", $doc->rank(), $title, $doc->f($fid),
  311. $doc->percent(), $doc->weight());
  312. echo $body;
  313. // other fields
  314. $line = '';
  315. foreach ($xs->getAllFields() as $field) /* @var $field XSFieldMeta */ {
  316. if ($field->isSpeical()) {
  317. continue;
  318. }
  319. $tmp = ucfirst($field->name) . ':' . cliHighlight($doc->f($field));
  320. if ((strlen($tmp) + strlen($line)) > 80) {
  321. if (strlen($line) > 0) {
  322. echo $line . "\n";
  323. $line = '';
  324. }
  325. echo $tmp . "\n";
  326. } else {
  327. $line .= $tmp . ' ';
  328. }
  329. }
  330. if (strlen($line) > 0) {
  331. echo $line . "\n";
  332. }
  333. }
  334. // related
  335. if (count($related) > 0) {
  336. echo "\n相关搜索:\033[4m" . implode("\033[m \033[4m", $related) . "\033[m\n";
  337. }
  338. echo "\n";
  339. }
  340. } catch (XSException $e) {
  341. // Exception
  342. $start = dirname(dirname(__FILE__));
  343. $relative = XSException::getRelPath($start);
  344. $traceString = $e->getTraceAsString();
  345. $traceString = str_replace(dirname(__FILE__) . '/', '', $traceString);
  346. $traceString = str_replace($start . ($relative === '' ? '/' : ''), $relative, $traceString);
  347. echo $e . "\n" . $traceString . "\n";
  348. }
  349. // local highlight function
  350. function cliHighlight($str)
  351. {
  352. global $search;
  353. $str = $search->highlight($str);
  354. $str = preg_replace('#<em>(.+?)</em>#', "\033[7m\\1\033[m", $str) . ' ';
  355. $str = strtr($str, array('<em>' => '', '</em>' => ''));
  356. return $str;
  357. }