码迷,mamicode.com
首页 > 其他好文 > 详细

Solr4从XML导入数据

时间:2014-12-20 15:23:32      阅读:259      评论:0      收藏:0      [点我收藏+]

标签:

白编辑了....cnblogs怎么也没给我保存个 草稿....化繁为简,你照着我的做就ok了

lib什么的记得,去E:\solrbase\distE:\solrbase\lib\ext下面找  然后拷贝到你的tomcat的lib下

总共编辑的3个文件,这三个都在你Solr示例的\solr\collection1\conf下..

solrconfig.xml

schema.xml

xml-data-config.xml

下面贴文件内容了

schema.xml,定义你导入的业务数据的定义,类似数据库的表

<?xml version="1.0" encoding="UTF-8" ?>
<schema name="example" version="1.5">

    <fields>

        <field name="id" type="string" indexed="true" stored="true"
            required="true" multiValued="false" />
        <field name="title" type="text_general" indexed="true" stored="true" />

        <field name="image" type="string" indexed="false" stored="true" />
        <field name="value" type="double" indexed="false" stored="true" />

        <field name="price" type="double" indexed="true" stored="true" />
        <field name="rebate" type="double" indexed="true" stored="true" />

        <field name="bought" type="long" indexed="true" stored="true" />

        <field name="city" type="string" indexed="true" stored="true" />
        <field name="sort" type="string" indexed="true" stored="true" />
        
        <field name="loc" type="string" indexed="true" stored="true" />

        <field name="startTime" type="date" indexed="true" stored="true" />
        <field name="endTime" type="date" indexed="true" stored="true" />

        <!-- catchall field, containing all other searchable text fields (implemented 
            via copyField further on in this schema -->
        <field name="text" type="text_general" indexed="true" stored="false"
            multiValued="true" />

        <field name="_version_" type="long" indexed="true" stored="true" />


    </fields>

    <uniqueKey>id</uniqueKey>

    <copyField source="title" dest="text" />

    <types>

        <fieldType name="string" class="solr.StrField"
            sortMissingLast="true" />

        <!-- boolean type: "true" or "false" -->
        <fieldType name="boolean" class="solr.BoolField"
            sortMissingLast="true" />

        <fieldType name="int" class="solr.TrieIntField"
            precisionStep="0" positionIncrementGap="0" />
        <fieldType name="float" class="solr.TrieFloatField"
            precisionStep="0" positionIncrementGap="0" />
        <fieldType name="long" class="solr.TrieLongField"
            precisionStep="0" positionIncrementGap="0" />
        <fieldType name="double" class="solr.TrieDoubleField"
            precisionStep="0" positionIncrementGap="0" />

        <fieldType name="tint" class="solr.TrieIntField"
            precisionStep="8" positionIncrementGap="0" />
        <fieldType name="tfloat" class="solr.TrieFloatField"
            precisionStep="8" positionIncrementGap="0" />
        <fieldType name="tlong" class="solr.TrieLongField"
            precisionStep="8" positionIncrementGap="0" />
        <fieldType name="tdouble" class="solr.TrieDoubleField"
            precisionStep="8" positionIncrementGap="0" />

        <fieldType name="date" class="solr.TrieDateField"
            precisionStep="0" positionIncrementGap="0" />
        <fieldType name="tdate" class="solr.TrieDateField"
            precisionStep="6" positionIncrementGap="0" />

        <fieldtype name="binary" class="solr.BinaryField" />
        <fieldType name="pint" class="solr.IntField" />
        <fieldType name="plong" class="solr.LongField" />
        <fieldType name="pfloat" class="solr.FloatField" />
        <fieldType name="pdouble" class="solr.DoubleField" />
        <fieldType name="pdate" class="solr.DateField"
            sortMissingLast="true" />
        <fieldType name="random" class="solr.RandomSortField"
            indexed="true" />


        <fieldType name="text_ws" class="solr.TextField"
            positionIncrementGap="100">
            <analyzer>
                <tokenizer class="solr.WhitespaceTokenizerFactory" />
            </analyzer>
        </fieldType>

        <fieldType name="text_general" class="solr.TextField"
            positionIncrementGap="100">
            <analyzer type="index">
                <tokenizer class="solr.StandardTokenizerFactory" />
                <filter class="solr.LowerCaseFilterFactory" />
            </analyzer>
            <analyzer type="query">
                <tokenizer class="solr.StandardTokenizerFactory" />
                <filter class="solr.LowerCaseFilterFactory" />
            </analyzer>
        </fieldType>


        <!-- CJK bigram (see text_ja for a Japanese configuration using morphological 
            analysis) -->
        <fieldType name="text_cjk" class="solr.TextField"
            positionIncrementGap="100">
            <analyzer>
                <tokenizer class="solr.StandardTokenizerFactory" />
                <!-- normalize width before bigram, as e.g. half-width dakuten combine -->
                <filter class="solr.CJKWidthFilterFactory" />
                <!-- for any non-CJK -->
                <filter class="solr.LowerCaseFilterFactory" />
                <filter class="solr.CJKBigramFilterFactory" />
            </analyzer>
        </fieldType>

    </types>
</schema>

solrconfig.xml,添加个 

requestHandler name="/dataimport

    <requestHandler name="/dataimport" class="org.apache.solr.handler.dataimport.DataImportHandler">
        <lst name="defaults">
            <str name="config">xml-data-config.xml</str>
        </lst>
    </requestHandler>

xml-data-config.xml,一个建立Solr和xml内容之间的桥梁

<dataConfig>
    <script>
        <![CDATA[
                function ReplaceLocAddId(row)    {
                    var loc_1 = row.get(loc).split(/deal/);
                    var loc_2 = loc_1[1].split(.html);
                    var id = loc_2[0];
                    row.put(id, id);
                    var sdf = new java.text.SimpleDateFormat(yyyy-MM-dd HH:mm:ss);
                    row.put(startTime, com.demo.tuan.DateUtils.format(row.get(startTime)));
                    row.put(endTime, com.demo.tuan.DateUtils.format(row.get(endTime)));
                    row.put(rebate, row.get(rebate).replace(,‘‘));
                    return row;
                }
        ]]>
    </script>
    <dataSource type="FileDataSource" encoding="UTF-8" />
    <document>
        <entity name="collection1" pk="loc"
            url="D:/meituan_hao123.xml"
            processor="XPathEntityProcessor" forEach="/urlset/url"
            transformer="script:ReplaceLocAddId,DateFormatTransformer">

            <field column="loc" xpath="/urlset/url/loc" commonField="true" />

            <field column="city" xpath="/urlset/url/data/display/city"
                commonField="true" />
            <field column="sort" xpath="/urlset/url/data/display/sort"
                commonField="true" />
                
            <field column="title" xpath="/urlset/url/data/display/title"
                commonField="true" />
            <field column="image" xpath="/urlset/url/data/display/image"
                commonField="true" />

            <field column="value" xpath="/urlset/url/data/display/value"
                commonField="true" />
            <field column="price" xpath="/urlset/url/data/display/price"
                commonField="true" />
            <field column="rebate" xpath="/urlset/url/data/display/rebate"
                commonField="true" />
            <field column="bought" xpath="/urlset/url/data/display/bought"
                commonField="true" />

            <field column="startTime" xpath="/urlset/url/data/display/startTime"
                dateTimeFormat="yyyy-MM-dd HH:mm:ss" commonField="true" />
            <field column="endTime" xpath="/urlset/url/data/display/endTime"
                dateTimeFormat="yyyy-MM-dd HH:mm:ss" commonField="true" />

        </entity>
    </document>
</dataConfig>

 

-----OVER------

Solr4从XML导入数据

标签:

原文地址:http://www.cnblogs.com/studies/p/4175427.html

(0)
(0)
   
举报
评论 一句话评论(0
登录后才能评论!
© 2014 mamicode.com 版权所有  联系我们:gaon5@hotmail.com
迷上了代码!