标签:
首先本文测试数据100多万的域名的wwwtitle 信息 检索数据:
CREATE TABLE `sph_counter` ( `index_id` tinyint(1) NOT NULL, `max_id` int(11) NOT NULL, PRIMARY KEY (`index_id`) ) ENGINE=MyISAM DEFAULT CHARSET=utf8
#增量数据索引 区段查询 合并为一个文件实现 测试 配置文件 #数据源 source src { # data source type. mandatory, no default value # known types are mysql, pgsql, mssql, xmlpipe, xmlpipe2, odbc type = mysql ##################################################################### ## SQL settings (for ‘mysql‘ and ‘pgsql‘ types) ##################################################################### # some straightforward parameters for SQL source types sql_host = localhost sql_user = root sql_pass = 201671zhuang sql_db = whomx sql_port = 3306 # optional, default is 3306 sql_query_pre = SET NAMES utf8 sql_query_pre = SET SESSION query_cache_type=OFF sql_query_pre = REPLACE INTO sph_counter SELECT 1, MAX(id) FROM mx_domain_wwwinfo #分次查询数据,不要一次将全部数据取 sql_query_range = SELECT MIN(id), (SELECT max_id FROM sph_counter WHERE index_id = 1) FROM mx_domain_wwwinfo #每一次的 数据条数量 sql_range_step = 10000 sql_query = SELECT * FROM mx_domain_wwwinfo WHERE id>=$start AND id<=$end sql_query_info = SELECT * FROM mx_domain_wwwinfo WHERE id=$id } # 表示增量数据源 source moresrc : src { sql_query_pre = SET NAMES utf8 sql_query_pre = SET SESSION query_cache_type=OFF #增量索引实现 sql_query_range = SELECT (SELECT max_id FROM sph_counter WHERE index_id=1),MAX(id) from mx_domain_wwwinfo sql_range_step = 10000 sql_query = SELECT * FROM mx_domain_wwwinfo WHERE id>=$start AND id<=$end sql_query_post = REPLACE INTO sph_counter SELECT 1,MAX(id) FROM mx_domain_wwwinfo #获取数据后,改写sph_counter增量索引计数表中的数据 } #其他配置参照以前
root@timeless-HP-Pavilion-g4-Notebook-PC:/usr/local/coreseek/bin# /usr/local/coreseek/bin/indexer -c /usr/local/coreseek/etc/csft.complex.conf -all --rotate Coreseek Fulltext 4.1 [ Sphinx 2.0.2-dev (r2922)] Copyright (c) 2007-2011, Beijing Choice Software Technologies Inc (http://www.coreseek.com) using config file ‘/usr/local/coreseek/etc/csft.complex.conf‘... indexing index ‘src‘... WARNING: Attribute count is 0: switching to none docinfo collected 1500838 docs, 117.1 MB sorted 20.5 Mhits, 100.0% done total 1500838 docs, 117142922 bytes total 39.557 sec, 2961363 bytes/sec, 37941.06 docs/sec indexing index ‘moresrc‘... WARNING: Attribute count is 0: switching to none docinfo collected 1 docs, 0.0 MB sorted 0.0 Mhits, 100.0% done total 1 docs, 21 bytes total 0.001 sec, 13282 bytes/sec, 632.51 docs/sec total 37 reads, 0.047 sec, 3240.3 kb/call avg, 1.2 msec/call avg total 269 writes, 0.216 sec, 834.8 kb/call avg, 0.8 msec/call avg rotating indices: succesfully sent SIGHUP to searchd (pid=28843).
root@timeless-HP-Pavilion-g4-Notebook-PC:/usr/local/coreseek/bin# /usr/local/coreseek/bin/indexer moresrc -c /usr/local/coreseek/etc/csft.complex.conf --rotate Coreseek Fulltext 4.1 [ Sphinx 2.0.2-dev (r2922)] Copyright (c) 2007-2011, Beijing Choice Software Technologies Inc (http://www.coreseek.com) using config file ‘/usr/local/coreseek/etc/csft.complex.conf‘... indexing index ‘moresrc‘... WARNING: Attribute count is 0: switching to none docinfo collected 1 docs, 0.0 MB sorted 0.0 Mhits, 100.0% done total 1 docs, 21 bytes total 0.013 sec, 1571 bytes/sec, 74.83 docs/sec total 2 reads, 0.000 sec, 0.0 kb/call avg, 0.0 msec/call avg total 6 writes, 0.000 sec, 0.0 kb/call avg, 0.0 msec/call avg rotating indices: succesfully sent SIGHUP to searchd (pid=28843).
root@timeless-HP-Pavilion-g4-Notebook-PC:/usr/local/coreseek/bin# /usr/local/coreseek/bin/indexer --merge src moresrc -c /usr/local/coreseek/etc/csft.complex.conf --rotate Coreseek Fulltext 4.1 [ Sphinx 2.0.2-dev (r2922)] Copyright (c) 2007-2011, Beijing Choice Software Technologies Inc (http://www.coreseek.com) using config file ‘/usr/local/coreseek/etc/csft.complex.conf‘... read 11.5 of 11.5 MB, 100.0% done merged 2268.5 Kwords merged in 2.724 sec total 74183 reads, 0.075 sec, 1.5 kb/call avg, 0.0 msec/call avg total 413 writes, 0.110 sec, 253.4 kb/call avg, 0.2 msec/call avg rotating indices: succesfully sent SIGHUP to searchd (pid=28843).
root@timeless-HP-Pavilion-g4-Notebook-PC:/usr/local/coreseek/bin# ./search -c /usr/local/coreseek/etc/csft.complex.conf 济南Coreseek Fulltext 4.1 [ Sphinx 2.0.2-dev (r2922)] Copyright (c) 2007-2011, Beijing Choice Software Technologies Inc (http://www.coreseek.com) using config file ‘/usr/local/coreseek/etc/csft.complex.conf‘... index ‘src‘: query ‘济南 ‘: returned 1000 matches of 11041 total in 0.001 sec displaying matches: 1. document=53592, weight=1664 id=53592 domain_id=75937 title=?????????????,??????,??????,?????????????????????????????????????,??????,??????,????,????,??????,??????,??????,??????,????????,????,??????,??????,??????,??????,????????,????????,?????????,????????? addtime=1419001556 2. document=156494, weight=1663 id=156494 domain_id=320070 title=??--??????,?????,????????,????????,??????,??????,?????,?????,???,?????,?????,?????,?????,????,????,??????,??????,??????,?????,?????,?????,???,???????, addtime=1419041933 3. document=53624, weight=1661 id=53624 domain_id=74960 title=???????-???.??.???.???????/?????/?????/?????????/?????/?????/???? ????? ????? ????? ????? ????? ????? ??POS??? ????? ????? ????? ??POS? addtime=1419001559 4. document=908267, weight=1661 id=908267 domain_id=3482035 title=???????-???.??.???.???????/?????/?????/?????????/?????/?????/???? ????? ????? ????? ????? ????? ????? ??POS??? ????? ????? ????? ??POS? addtime=1421983846 5. document=1074259, weight=1659 id=1074259 domain_id=2805964 title=?????? - ???? | ????? | ????? | ?????? | ?????? | ?????? | ?????? | ?????? | ?????? | ?????? | ?????? | ?????? | ?????? | ?????? addtime=1421998317 6. document=628662, weight=1658 id=628662 domain_id=1603934 title=????????|????????|?????????|?????????|??????|?????|????????????????|????????|?????????|?????????|??????|?????|???????? addtime=1420628500 7. document=82498, weight=1656 id=82498 domain_id=75205 title=??????????????????????????????????????????????????????????????????????????????????????????????????????? addtime=1419030813 8. document=373234, weight=1656 id=373234 domain_id=75953 title=????|??????|????????|??????|??????|??????|??????|??????|??????|??????|??????-???????? addtime=1419481313 9. document=97657, weight=1655 id=97657 domain_id=75152 title=???????????????????????????????????????????????????????????????????? addtime=1419032238 10. document=108426, weight=1655 id=108426 domain_id=76651 title=??????|??????|??SKF??|??NSK??|??FAG??|??NTN??|??KOYO??|??TIMKEN??|??FAG??|????|????????|????????|??????|-?? addtime=1419033228 11. document=184337, weight=1655 id=184337 domain_id=75654 title=???????|??????????|???????|??????????|??????|???????|?????????|????????|?????????|???????|?????????? addtime=1419043496 12. document=246303, weight=1655 id=246303 domain_id=262037 title=???? ?????? ?????? ?????? ????? ?????? ???? ???? ?????? ???? ?????? addtime=1419046975 13. document=261372, weight=1655 id=261372 domain_id=544595 title=??????|????|?????|?????????|???????????|??????|??????|????|?????|?????|?????|????? addtime=1419215630 14. document=1163692, weight=1655 id=1163692 domain_id=2514244 title=??????????????????????????????????????????????????????????????????????????????_?????????????? addtime=1422005290 15. document=1163740, weight=1655 id=1163740 domain_id=2514240 title=?????????????????????????????????????????????????????????????????????????_?????????????? addtime=1422005293 16. document=1163762, weight=1655 id=1163762 domain_id=2514239 title=????????????????????????????????????????????????????????????????????????????_?????????????? addtime=1422005295 17. document=10694, weight=1653 id=10694 domain_id=454049 title=??????|??????|??????|??????|??????|?????|??????|????????|???????????|????????????|???400-070-3005 addtime=1418996572 18. document=15876, weight=1653 id=15876 domain_id=66098 title=????????? ???????????? ??????? ?????????? ??????? ??????? ????????? ??????? ??????? ???????_????????? addtime=1418997101 19. document=23385, weight=1653 id=23385 domain_id=421622 title=????0531-82825553|??????|??????????????|????????|???????|??????|????T1|????T3|????T6|????U8 addtime=1418997836 20. document=34628, weight=1653 id=34628 domain_id=320077 title=????|?????|?????|?????|?????|?????|?????|?????|????|?????????? addtime=1418998927 words:
#起作用 1. ‘济南‘: 11041 documents, 22216 hits index ‘moresrc‘: query ‘济南 ‘: returned 1 matches of 1 total in 0.000 sec displaying matches: 1. document=1501042, weight=1500 id=1501042 domain_id=4 title=???? ?? addtime= words: 1. ‘济南‘: 1 documents, 2 hits
增量索引起作用
sphinx (coreseek)——3、区段查询 与 增量索引实例
标签:
原文地址:http://www.cnblogs.com/timelesszhuang/p/4772054.html