I have crawled a website using nutch.
When I try to index it with solr I get following error
org.apache.solr.common.SolrException: ERROR: [doc=http://xyz.htm] unknown field 'metatag.keywords'
*unknown field 'metatag.keywords'*

I can not figure out where the error is as I have o not defined any field in schema.xml for metatags.I just copied the schema.xml from nutch into solr.
I am using Nutch 1.9 with Solr 4.10

My *schema.xml* for *solr*

<?xml version="1.0" encoding="UTF-8" ?>
<schema name="nutch" version="1.5">
    <types>
<fieldType name="string" class="solr.StrField" sortMissingLast="true"
            omitNorms="true"/>
        <fieldType name="long" class="solr.TrieLongField" precisionStep="0"
            omitNorms="true" positionIncrementGap="0"/>
<fieldType name="float" class="solr.TrieFloatField" precisionStep="0"
            omitNorms="true" positionIncrementGap="0"/>
        <fieldType name="date" class="solr.TrieDateField" precisionStep="0"
            omitNorms="true" positionIncrementGap="0"/>

        <fieldType name="text" class="solr.TextField"
            positionIncrementGap="100">
            <analyzer>
                <tokenizer class="solr.WhitespaceTokenizerFactory"/>
                <filter class="solr.StopFilterFactory"
                    ignoreCase="true" words="stopwords.txt"/>
                <filter class="solr.WordDelimiterFilterFactory"
                    generateWordParts="1" generateNumberParts="1"
                    catenateWords="1" catenateNumbers="1" catenateAll="0"
                    splitOnCaseChange="1"/>
                <filter class="solr.LowerCaseFilterFactory"/>
                <!--<filter class="solr.EnglishPorterFilterFactory"
                    protected="protwords.txt"/>-->
                <filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
            </analyzer>
        </fieldType>
        <fieldType name="url" class="solr.TextField"
            positionIncrementGap="100">
            <analyzer>
                <tokenizer class="solr.StandardTokenizerFactory"/>
                <filter class="solr.LowerCaseFilterFactory"/>
                <filter class="solr.WordDelimiterFilterFactory"
                    generateWordParts="1" generateNumberParts="1"/>
            </analyzer>
        </fieldType>
    </types>
    <fields>
        <field name="id" type="string" stored="true" indexed="true"
            required="true"/>
        <field name="_version_" type="long" indexed="true" stored="true"/>
        <!-- core fields -->
        <field name="segment" type="string" stored="true" indexed="false"/>
        <field name="digest" type="string" stored="true" indexed="false"/>
        <field name="boost" type="float" stored="true" indexed="false"/>

        <!-- fields for index-basic plugin -->
        <field name="host" type="string" stored="false" indexed="true"/>
        <field name="url" type="url" stored="true" indexed="true"/>
        <field name="content" type="text" stored="true" indexed="true"/>
        <field name="title" type="text" stored="true" indexed="true"/>
        <field name="cache" type="string" stored="true" indexed="false"/>
        <field name="tstamp" type="date" stored="true" indexed="false"/>

        <!-- fields for index-anchor plugin -->
        <field name="anchor" type="string" stored="true" indexed="true"
            multiValued="true"/>

        <!-- fields for index-more plugin -->
        <field name="type" type="string" stored="true" indexed="true"
            multiValued="true"/>
        <field name="contentLength" type="long" stored="true"
            indexed="false"/>
        <field name="lastModified" type="date" stored="true"
            indexed="false"/>
        <field name="date" type="date" stored="true" indexed="true"/>

        <!-- fields for languageidentifier plugin -->
        <field name="lang" type="string" stored="true" indexed="true"/>

        <!-- fields for subcollection plugin -->
        <field name="subcollection" type="string" stored="true"
            indexed="true" multiValued="true"/>

<!-- fields for feed plugin (tag is also used by microformats-reltag)-->
        <field name="author" type="string" stored="true" indexed="true"/>
<field name="tag" type="string" stored="true" indexed="true" multiValued="true"/>
        <field name="feed" type="string" stored="true" indexed="true"/>
        <field name="publishedDate" type="date" stored="true"
            indexed="true"/>
        <field name="updatedDate" type="date" stored="true"
            indexed="true"/>

        <!-- fields for creativecommons plugin -->
        <field name="cc" type="string" stored="true" indexed="true"
            multiValued="true"/>

        <!-- fields for tld plugin -->
        <field name="tld" type="string" stored="false" indexed="false"/>
    </fields>
    <uniqueKey>id</uniqueKey>
    <defaultSearchField>content</defaultSearchField>
    <solrQueryParser defaultOperator="OR"/>
</schema>

my *solrindex-mapping.xml*

<mapping>
    <fields>
        <field dest="content" source="content"/>
        <field dest="title" source="title"/>
        <field dest="host" source="host"/>
        <field dest="segment" source="segment"/>
        <field dest="boost" source="boost"/>
        <field dest="digest" source="digest"/>
        <field dest="tstamp" source="tstamp"/>
    </fields>
    <uniqueKey>id</uniqueKey>
</mapping>


Reply via email to