首页 > 解决方案 > Solr 与 nutch 集成后不返回结果

问题描述

我已将 nutch 2.3.1 与 solr 6.5.1 集成在一起。我能够通过 nutch 抓取数据并将其推送到 solr 。但是在基于查询的索引之后它什么也不返回。例如,如果我给出一个查询http://127.0.0.1:8983/solr/testcore/browse?q=World它没有给出任何内容,如下 所示在此处输入图像描述

否则,如果我没有在查询中指定任何内容,它会在不过滤的情况下给出结果,即它返回的整个索引数据

在此处输入图像描述

架构.xml

 <?xml version="1.0" encoding="UTF-8"?>
<!-- Solr managed schema - automatically generated - DO NOT EDIT -->
<schema name="nutch" version="1.5">
  <uniqueKey>id</uniqueKey>
  <defaultSearchField>text</defaultSearchField>
  <solrQueryParser defaultOperator="OR"/>
  <fieldType name="binary" class="solr.BinaryField"/>
  <fieldType name="date" class="solr.TrieDateField" omitNorms="true" positionIncrementGap="0" precisionStep="0"/>
  <fieldType name="double" class="solr.TrieDoubleField" omitNorms="true" positionIncrementGap="0" precisionStep="0"/>
  <fieldType name="float" class="solr.TrieFloatField" omitNorms="true" positionIncrementGap="0" precisionStep="0"/>
  <fieldType name="ignored" class="solr.StrField" indexed="false" stored="false" multiValued="true"/>
  <fieldType name="int" class="solr.TrieIntField" omitNorms="true" positionIncrementGap="0" precisionStep="0"/>
  <fieldType name="long" class="solr.TrieLongField" omitNorms="true" positionIncrementGap="0" precisionStep="0"/>
  <fieldType name="lowercase" class="solr.TextField" positionIncrementGap="100">
    <analyzer>
      <tokenizer class="solr.KeywordTokenizerFactory"/>
      <filter class="solr.LowerCaseFilterFactory"/>
    </analyzer>
  </fieldType>
  <fieldType name="payloads" class="solr.TextField" indexed="true" stored="false">
    <analyzer>
      <tokenizer class="solr.WhitespaceTokenizerFactory"/>
      <filter class="solr.DelimitedPayloadTokenFilterFactory" encoder="float"/>
    </analyzer>
  </fieldType>
  <fieldType name="phonetic" class="solr.TextField" indexed="true" stored="false">
    <analyzer>
      <tokenizer class="solr.StandardTokenizerFactory"/>
      <filter class="solr.DoubleMetaphoneFilterFactory" inject="false"/>
    </analyzer>
  </fieldType>
  <fieldType name="string" class="solr.StrField" omitNorms="true" sortMissingLast="true"/>
  <fieldType name="tdate" class="solr.TrieDateField" omitNorms="true" positionIncrementGap="0" precisionStep="6"/>
  <fieldType name="tdouble" class="solr.TrieDoubleField" omitNorms="true" positionIncrementGap="0" precisionStep="8"/>
  <fieldType name="text_en" class="solr.TextField" positionIncrementGap="100">
    <analyzer type="index">
      <tokenizer class="solr.StandardTokenizerFactory"/>
      <filter class="solr.StopFilterFactory" words="stopwords.txt" ignoreCase="true"/>
      <filter class="solr.LowerCaseFilterFactory"/>
      <filter class="solr.EnglishPossessiveFilterFactory"/>
      <filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
      <filter class="solr.PorterStemFilterFactory"/>
    </analyzer>
    <analyzer type="query">
      <tokenizer class="solr.StandardTokenizerFactory"/>
      <filter class="solr.SynonymFilterFactory" expand="true" ignoreCase="true" synonyms="synonyms.txt"/>
      <filter class="solr.StopFilterFactory" words="stopwords.txt" ignoreCase="true"/>
      <filter class="solr.LowerCaseFilterFactory"/>
      <filter class="solr.EnglishPossessiveFilterFactory"/>
      <filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
      <filter class="solr.PorterStemFilterFactory"/>
    </analyzer>
  </fieldType>
  <fieldType name="text_en_splitting" class="solr.TextField" autoGeneratePhraseQueries="true" positionIncrementGap="100">
    <analyzer type="index">
      <tokenizer class="solr.WhitespaceTokenizerFactory"/>
      <filter class="solr.StopFilterFactory" words="stopwords.txt" ignoreCase="true"/>
      <filter class="solr.WordDelimiterFilterFactory" catenateNumbers="1" generateNumberParts="1" splitOnCaseChange="1" generateWordParts="1" catenateAll="0" catenateWords="1"/>
      <filter class="solr.LowerCaseFilterFactory"/>
      <filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
      <filter class="solr.PorterStemFilterFactory"/>
    </analyzer>
    <analyzer type="query">
      <tokenizer class="solr.WhitespaceTokenizerFactory"/>
      <filter class="solr.SynonymFilterFactory" expand="true" ignoreCase="true" synonyms="synonyms.txt"/>
      <filter class="solr.StopFilterFactory" words="stopwords.txt" ignoreCase="true"/>
      <filter class="solr.WordDelimiterFilterFactory" catenateNumbers="0" generateNumberParts="1" splitOnCaseChange="1" generateWordParts="1" catenateAll="0" catenateWords="0"/>
      <filter class="solr.LowerCaseFilterFactory"/>
      <filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
      <filter class="solr.PorterStemFilterFactory"/>
    </analyzer>
  </fieldType>
  <fieldType name="text_en_splitting_tight" class="solr.TextField" autoGeneratePhraseQueries="true" positionIncrementGap="100">
    <analyzer>
      <tokenizer class="solr.WhitespaceTokenizerFactory"/>
      <filter class="solr.SynonymFilterFactory" expand="false" ignoreCase="true" synonyms="synonyms.txt"/>
      <filter class="solr.StopFilterFactory" words="stopwords.txt" ignoreCase="true"/>
      <filter class="solr.WordDelimiterFilterFactory" catenateNumbers="1" generateNumberParts="0" generateWordParts="0" catenateAll="0" catenateWords="1"/>
      <filter class="solr.LowerCaseFilterFactory"/>
      <filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
      <filter class="solr.EnglishMinimalStemFilterFactory"/>
      <filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
    </analyzer>
  </fieldType>
  <fieldType name="text_general" class="solr.TextField" positionIncrementGap="100">
    <analyzer type="index">
      <tokenizer class="solr.StandardTokenizerFactory"/>
      <filter class="solr.StopFilterFactory" words="stopwords.txt" ignoreCase="true"/>
      <filter class="solr.LowerCaseFilterFactory"/>
    </analyzer>
    <analyzer type="query">
      <tokenizer class="solr.StandardTokenizerFactory"/>
      <filter class="solr.StopFilterFactory" words="stopwords.txt" ignoreCase="true"/>
      <filter class="solr.SynonymFilterFactory" expand="true" ignoreCase="true" synonyms="synonyms.txt"/>
      <filter class="solr.LowerCaseFilterFactory"/>
    </analyzer>
  </fieldType>
  <fieldType name="text_general_rev" class="solr.TextField" positionIncrementGap="100">
    <analyzer type="index">
      <tokenizer class="solr.StandardTokenizerFactory"/>
      <filter class="solr.StopFilterFactory" words="stopwords.txt" ignoreCase="true"/>
      <filter class="solr.LowerCaseFilterFactory"/>
      <filter class="solr.ReversedWildcardFilterFactory" maxPosQuestion="2" maxFractionAsterisk="0.33" maxPosAsterisk="3" withOriginal="true"/>
    </analyzer>
    <analyzer type="query">
      <tokenizer class="solr.StandardTokenizerFactory"/>
      <filter class="solr.SynonymFilterFactory" expand="true" ignoreCase="true" synonyms="synonyms.txt"/>
      <filter class="solr.StopFilterFactory" words="stopwords.txt" ignoreCase="true"/>
      <filter class="solr.LowerCaseFilterFactory"/>
    </analyzer>
  </fieldType>
  <fieldType name="text_path" class="solr.TextField" positionIncrementGap="100">
    <analyzer>
      <tokenizer class="solr.PathHierarchyTokenizerFactory"/>
    </analyzer>
  </fieldType>
  <fieldType name="tfloat" class="solr.TrieFloatField" omitNorms="true" positionIncrementGap="0" precisionStep="8"/>
  <fieldType name="tint" class="solr.TrieIntField" omitNorms="true" positionIncrementGap="0" precisionStep="8"/>
  <fieldType name="tlong" class="solr.TrieLongField" omitNorms="true" positionIncrementGap="0" precisionStep="8"/>
  <fieldType name="url" class="solr.TextField" positionIncrementGap="100">
    <analyzer>
      <tokenizer class="solr.StandardTokenizerFactory"/>
      <filter class="solr.LowerCaseFilterFactory"/>
      <filter class="solr.WordDelimiterFilterFactory" generateNumberParts="1" generateWordParts="1"/>
    </analyzer>
  </fieldType>
  <field name="_version_" type="long" indexed="true" stored="true"/>
  <field name="anchor" type="text_general" multiValued="true" indexed="true" stored="true"/>
  <field name="author" type="string" indexed="true" stored="true"/>
  <field name="batchId" type="string" indexed="false" stored="true"/>
  <field name="boost" type="float" indexed="false" stored="true"/>
  <field name="cache" type="string" indexed="false" stored="true"/>
  <field name="cc" type="string" multiValued="true" indexed="true" stored="true"/>
  <field name="content" type="text_general" indexed="true" stored="true"/>
  <field name="contentLength" type="string" indexed="false" stored="true"/>
  <field name="date" type="tdate" indexed="true" stored="true"/>
  <field name="digest" type="string" indexed="false" stored="true"/>
  <field name="feed" type="string" indexed="true" stored="true"/>
  <field name="host" type="url" indexed="true" stored="false"/>

  <field name="id" type="string" indexed="true" required="true" stored="true"/>


<field name="signatureField" type="string" indexed="true"  stored="true" multiValued="false" />



  <field name="lang" type="string" indexed="true" stored="true"/>
  <field name="lastModified" type="date" indexed="false" stored="true"/>
  <field name="publishedDate" type="date" indexed="true" stored="true"/>
  <field name="rawcontent" type="string" indexed="false" stored="true"/>
  <field name="subcollection" type="string" multiValued="true" indexed="true" stored="true"/>
  <field name="tag" type="string" multiValued="true" indexed="true" stored="true"/>
  <field name="text" type="text_general" multiValued="true" indexed="true" stored="false"/>
  <field name="title" type="text_general" multiValued="true" indexed="true" stored="true"/>
  <field name="tld" type="string" indexed="false" stored="false"/>
  <field name="tstamp" type="date" indexed="false" stored="true"/>
  <field name="type" type="string" multiValued="true" indexed="true" stored="true"/>
  <field name="updatedDate" type="date" indexed="true" stored="true"/>
  <field name="url" type="url" indexed="true" stored="true"/>
  <dynamicField name="meta_*" type="string" indexed="true" stored="true"/>
  <copyField source="anchor" dest="text"/>
  <copyField source="author" dest="text"/>
  <copyField source="content" dest="text"/>
  <copyField source="title" dest="text"/>
  <copyField source="url" dest="text"/>
</schema>

标签: xmlsolr

解决方案


推荐阅读