码迷,mamicode.com
首页 > 其他好文 > 详细

elasticsearch搜索提示

时间:2017-06-03 16:20:44      阅读:564      评论:0      收藏:0      [点我收藏+]

标签:info   custom   根据   weight   roc   self   arc   pes   meta   

elasticsearch搜索提示(补全)接口需要新增suggest字段并设type为:completion,结合到scrapy,修改es_types.py文件:

from datetime import datetime
from elasticsearch_dsl import DocType, Date, Nested, Boolean, analyzer, InnerObjectWrapper, Completion, Keyword, Text, Integer
from elasticsearch_dsl.connections import connections
connections.create_connection(hosts=[localhost])
class ArticleType(DocType):
    #文章类型
    suggest = Completion(analyzer="ik_max_word") #这样做由于原码问题这里会报错
    title = Text(analyzer="ik_max_word")
    create_date = Date()
    praise_nums = Integer()
    fav_nums = Integer()
    comment_nums = Integer()
    tags = Text(analyzer="ik_max_word")
    front_image_url = Keyword()
    url_object_id = Keyword()
    front_image_path = Keyword()
    url = Keyword()
    content = Text(analyzer="ik_max_word")

    class Meta:
        index = jobbole
        doc_type = article
if __name__ == __main__:
    ArticleType.init()

解决办法:自定义CustomAnalysis类,继承自elasticsearch_dsl.analysis下的CustomAnalysis类:

from datetime import datetime
from elasticsearch_dsl import DocType, Date, Nested, Boolean,     analyzer, InnerObjectWrapper, Completion, Keyword, Text, Integer

from elasticsearch_dsl.analysis import CustomAnalyzer as _CustomAnalyzer

from elasticsearch_dsl.connections import connections
connections.create_connection(hosts=["localhost"])

class CustomAnalyzer(_CustomAnalyzer):
    def get_analysis_definition(self):
        return {}

ik_analyzer = CustomAnalyzer("ik_max_word", filter=["lowercase"])#大小写转换(搜索时忽略大小写影响)
class ArticleType(DocType):
    #伯乐在线文章类型
    suggest = Completion(analyzer=ik_analyzer)
    title = Text(analyzer="ik_max_word")
    create_date = Date()
    url = Keyword()
    url_object_id = Keyword()
    front_image_url = Keyword()
    front_image_path = Keyword()
    praise_nums = Integer()
    comment_nums = Integer()
    fav_nums = Integer()
    tags = Text(analyzer="ik_max_word")
    content = Text(analyzer="ik_max_word")

    class Meta:
        index = "jobbole"
        doc_type = "article"

if __name__ == "__main__":
    ArticleType.init()

在item中生成搜索建议词:

from spider.models.es_types import ArticleType
from elasticsearch_dsl.connections import connections
es = connections.create_connection(ArticleType._doc_type.using)
def gen_suggests(index, info_tuple):
    #根据字符串生成搜索建议数组
    used_words = set() #set为去重功能
    suggests = []
    for text, weight in info_tuple:
        if text:
            #字符串不为空时,调用elasticsearch的analyze接口分析字符串(分词、大小写转换)
            words = es.indices.analyze(index=index, analyzer="ik_max_word", params={filter:["lowercase"]}, body=text)
            anylyzed_words = set([r["token"] for r in words["tokens"] if len(r["token"])>1])
            new_words = anylyzed_words - used_words
        else:
            new_words = set()

        if new_words:
            suggests.append({input: list(new_words), weight: weight})
    return suggests


class JobboleArticleItem(scrapy.Item):
    title = scrapy.Field()
    create_date = scrapy.Field(input_processor=MapCompose(date_convert))
    praise_nums = scrapy.Field(input_processor=MapCompose(number_convert))
    fav_nums = scrapy.Field(input_processor=MapCompose(number_convert))
    comment_nums = scrapy.Field(input_processor=MapCompose(number_convert))
    tags = scrapy.Field(input_processor=MapCompose(remove_comment_tags), output_processor=Join(,))
    front_image_url = scrapy.Field(output_processor=MapCompose(returnValue))
    url_object_id = scrapy.Field(input_processor=MapCompose(get_md5))
    front_image_path = scrapy.Field()
    url = scrapy.Field()
    content = scrapy.Field()

def save_to_elasticsearch(self): article = ArticleType() article.title = self[title] article.create_date = self[create_date] article.content = remove_tags(self[content]) # remove_tags()去除html标签 article.front_image_url = self[front_image_url] if front_image_path in self: article.front_image_path = self[front_image_path] article.praise_nums = self[praise_nums] article.fav_nums = self[fav_nums] article.comment_nums = self[comment_nums] article.url = self[url] article.tags = self[tags] article.meta.id = self[url_object_id] #生成搜索建议词 article.suggest = gen_suggests(ArticleType._doc_type.index, ((article.title, 10), (article.tags, 7))) article.save() # 保存 return

 

elasticsearch搜索提示

标签:info   custom   根据   weight   roc   self   arc   pes   meta   

原文地址:http://www.cnblogs.com/jp-mao/p/6937260.html

(0)
(0)
   
举报
评论 一句话评论(0
登录后才能评论!
© 2014 mamicode.com 版权所有  联系我们:gaon5@hotmail.com
迷上了代码!