Quantcast

Show a portion of searchable text in Solr

classic Classic list List threaded Threaded
10 messages Options
Reply | Threaded
Open this post in threaded view
|  
Report Content as Inappropriate

Show a portion of searchable text in Solr

anarchos78
Hello friends,
I have indexed very large documents, In some cases these documents has 100.000 characters. Is there a way to return a portion of the documents (lets say the 300 first characters) when i am querying "Solr"?. Is there any attribute to set in the schema.xml or solrconfig.xml to achieve this?
I have tried many things but nothing worked.
Thank you in advance,
Tom

The schema.xml:

<?xml version="1.0" encoding="UTF-8" ?>




<schema name="example" version="1.5">
 

  <types>
   

   
    <fieldType name="string" class="solr.StrField" sortMissingLast="true" />

   
    <fieldType name="boolean" class="solr.BoolField" sortMissingLast="true"/>
   
    <fieldtype name="binary" class="solr.BinaryField"/>

       

   
    <fieldType name="int" class="solr.TrieIntField" precisionStep="0" positionIncrementGap="0"/>
    <fieldType name="float" class="solr.TrieFloatField" precisionStep="0" positionIncrementGap="0"/>
    <fieldType name="long" class="solr.TrieLongField" precisionStep="0" positionIncrementGap="0"/>
    <fieldType name="double" class="solr.TrieDoubleField" precisionStep="0" positionIncrementGap="0"/>

   
    <fieldType name="tint" class="solr.TrieIntField" precisionStep="8" positionIncrementGap="0"/>
    <fieldType name="tfloat" class="solr.TrieFloatField" precisionStep="8" positionIncrementGap="0"/>
    <fieldType name="tlong" class="solr.TrieLongField" precisionStep="8" positionIncrementGap="0"/>
    <fieldType name="tdouble" class="solr.TrieDoubleField" precisionStep="8" positionIncrementGap="0"/>

   
    <fieldType name="date" class="solr.TrieDateField" precisionStep="0" positionIncrementGap="0"/>

   
    <fieldType name="tdate" class="solr.TrieDateField" precisionStep="6" positionIncrementGap="0"/>


   
    <fieldType name="pint" class="solr.IntField"/>
    <fieldType name="plong" class="solr.LongField"/>
    <fieldType name="pfloat" class="solr.FloatField"/>
    <fieldType name="pdouble" class="solr.DoubleField"/>
    <fieldType name="pdate" class="solr.DateField" sortMissingLast="true"/>


   
    <fieldType name="sint" class="solr.SortableIntField" sortMissingLast="true" omitNorms="true"/>
    <fieldType name="slong" class="solr.SortableLongField" sortMissingLast="true" omitNorms="true"/>
    <fieldType name="sfloat" class="solr.SortableFloatField" sortMissingLast="true" omitNorms="true"/>
    <fieldType name="sdouble" class="solr.SortableDoubleField" sortMissingLast="true" omitNorms="true"/>


   
    <fieldType name="random" class="solr.RandomSortField" indexed="true" />


       


   

   

   
    <fieldType name="text_ws" class="solr.TextField" positionIncrementGap="100">
      <analyzer>
        <tokenizer class="solr.WhitespaceTokenizerFactory"/>
      </analyzer>
    </fieldType>

   
    <fieldType name="text_general" class="solr.TextField" positionIncrementGap="100">
      <analyzer type="index">
        <tokenizer class="solr.StandardTokenizerFactory"/>
        <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" enablePositionIncrements="true" />
       
        <filter class="solr.LowerCaseFilterFactory"/>
      </analyzer>
      <analyzer type="query">
        <tokenizer class="solr.StandardTokenizerFactory"/>
        <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" enablePositionIncrements="true" />
        <filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
        <filter class="solr.LowerCaseFilterFactory"/>
      </analyzer>
    </fieldType>

   
    <fieldType name="text_en" class="solr.TextField" positionIncrementGap="100">
       
         
         
      <analyzer type="index">
           
         
               
        <tokenizer class="solr.StandardTokenizerFactory"/>
       
       
        <filter class="solr.StopFilterFactory"
                ignoreCase="true"
                words="lang/stopwords_en.txt"
                enablePositionIncrements="true"
                />
        <filter class="solr.LowerCaseFilterFactory"/>
        <filter class="solr.EnglishPossessiveFilterFactory"/>
        <filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
       
        <filter class="solr.PorterStemFilterFactory"/>
      </analyzer>
      <analyzer type="query">
        <tokenizer class="solr.StandardTokenizerFactory"/>
        <filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
        <filter class="solr.StopFilterFactory"
                ignoreCase="true"
                words="lang/stopwords_en.txt"
                enablePositionIncrements="true"
                />
        <filter class="solr.LowerCaseFilterFactory"/>
        <filter class="solr.EnglishPossessiveFilterFactory"/>
        <filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
       
        <filter class="solr.PorterStemFilterFactory"/>
      </analyzer>
    </fieldType>

   
    <fieldType name="text_en_splitting" class="solr.TextField" positionIncrementGap="100" autoGeneratePhraseQueries="true">
      <analyzer type="index">
        <tokenizer class="solr.WhitespaceTokenizerFactory"/>
       
       
        <filter class="solr.StopFilterFactory"
                ignoreCase="true"
                words="lang/stopwords_en.txt"
                enablePositionIncrements="true"
                />
        <filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1" catenateNumbers="1" catenateAll="0" splitOnCaseChange="1"/>
        <filter class="solr.LowerCaseFilterFactory"/>
        <filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
        <filter class="solr.PorterStemFilterFactory"/>
      </analyzer>
      <analyzer type="query">
        <tokenizer class="solr.WhitespaceTokenizerFactory"/>
        <filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
        <filter class="solr.StopFilterFactory"
                ignoreCase="true"
                words="lang/stopwords_en.txt"
                enablePositionIncrements="true"
                />
        <filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0" catenateNumbers="0" catenateAll="0" splitOnCaseChange="1"/>
        <filter class="solr.LowerCaseFilterFactory"/>
        <filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
        <filter class="solr.PorterStemFilterFactory"/>
      </analyzer>
    </fieldType>

   
    <fieldType name="text_en_splitting_tight" class="solr.TextField" positionIncrementGap="100" autoGeneratePhraseQueries="true">
      <analyzer>
        <tokenizer class="solr.WhitespaceTokenizerFactory"/>
        <filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="false"/>
        <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_en.txt"/>
        <filter class="solr.WordDelimiterFilterFactory" generateWordParts="0" generateNumberParts="0" catenateWords="1" catenateNumbers="1" catenateAll="0"/>
        <filter class="solr.LowerCaseFilterFactory"/>
        <filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
        <filter class="solr.EnglishMinimalStemFilterFactory"/>
       
        <filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
      </analyzer>
    </fieldType>

   
    <fieldType name="text_general_rev" class="solr.TextField" positionIncrementGap="100">
      <analyzer type="index">
        <tokenizer class="solr.StandardTokenizerFactory"/>
        <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" enablePositionIncrements="true" />
        <filter class="solr.LowerCaseFilterFactory"/>
        <filter class="solr.ReversedWildcardFilterFactory" withOriginal="true"
           maxPosAsterisk="3" maxPosQuestion="2" maxFractionAsterisk="0.33"/>
      </analyzer>
      <analyzer type="query">
        <tokenizer class="solr.StandardTokenizerFactory"/>
        <filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
        <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" enablePositionIncrements="true" />
        <filter class="solr.LowerCaseFilterFactory"/>
      </analyzer>
    </fieldType>

   
   

   
    <fieldType name="alphaOnlySort" class="solr.TextField" sortMissingLast="true" omitNorms="true">
      <analyzer>
       
        <tokenizer class="solr.KeywordTokenizerFactory"/>
       
        <filter class="solr.LowerCaseFilterFactory" />
       
        <filter class="solr.TrimFilterFactory" />
       
        <filter class="solr.PatternReplaceFilterFactory"
                pattern="([^a-z])" replacement="" replace="all"
        />
      </analyzer>
    </fieldType>
   
    <fieldtype name="phonetic" stored="false" indexed="true" class="solr.TextField" >
      <analyzer>
        <tokenizer class="solr.StandardTokenizerFactory"/>
        <filter class="solr.DoubleMetaphoneFilterFactory" inject="false"/>
      </analyzer>
    </fieldtype>

    <fieldtype name="payloads" stored="false" indexed="true" class="solr.TextField" >
      <analyzer>
        <tokenizer class="solr.WhitespaceTokenizerFactory"/>
       
        <filter class="solr.DelimitedPayloadTokenFilterFactory" encoder="float"/>
      </analyzer>
    </fieldtype>

   
    <fieldType name="lowercase" class="solr.TextField" positionIncrementGap="100">
      <analyzer>
        <tokenizer class="solr.KeywordTokenizerFactory"/>
        <filter class="solr.LowerCaseFilterFactory" />
      </analyzer>
    </fieldType>

    <fieldType name="text_path" class="solr.TextField" positionIncrementGap="100">
      <analyzer>
        <tokenizer class="solr.PathHierarchyTokenizerFactory"/>
      </analyzer>
    </fieldType>
   

     
    <fieldtype name="ignored" stored="false" indexed="false" multiValued="true" class="solr.StrField" />

   
    <fieldType name="point" class="solr.PointType" dimension="2" subFieldSuffix="_d"/>

   
    <fieldType name="location" class="solr.LatLonType" subFieldSuffix="_coordinate"/>

   
    <fieldtype name="geohash" class="solr.GeoHashField"/>
   
    <fieldType name="currency" class="solr.CurrencyField" precisionStep="8" defaultCurrency="USD" currencyConfig="currency.xml" />
             
   

   
    <fieldType name="text_ar" class="solr.TextField" positionIncrementGap="100">
      <analyzer> 
        <tokenizer class="solr.StandardTokenizerFactory"/>
       
        <filter class="solr.LowerCaseFilterFactory"/>
        <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_ar.txt" enablePositionIncrements="true"/>
       
        <filter class="solr.ArabicNormalizationFilterFactory"/>
        <filter class="solr.ArabicStemFilterFactory"/>
      </analyzer>
    </fieldType>

   
    <fieldType name="text_bg" class="solr.TextField" positionIncrementGap="100">
      <analyzer> 
        <tokenizer class="solr.StandardTokenizerFactory"/> 
        <filter class="solr.LowerCaseFilterFactory"/>
        <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_bg.txt" enablePositionIncrements="true"/>
        <filter class="solr.BulgarianStemFilterFactory"/>       
      </analyzer>
    </fieldType>
   
   
    <fieldType name="text_ca" class="solr.TextField" positionIncrementGap="100">
      <analyzer> 
        <tokenizer class="solr.StandardTokenizerFactory"/>
       
        <filter class="solr.ElisionFilterFactory" ignoreCase="true" articles="lang/contractions_ca.txt"/>
        <filter class="solr.LowerCaseFilterFactory"/>
        <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_ca.txt" enablePositionIncrements="true"/>
        <filter class="solr.SnowballPorterFilterFactory" language="Catalan"/>       
      </analyzer>
    </fieldType>
   
   
    <fieldType name="text_cjk" class="solr.TextField" positionIncrementGap="100">
      <analyzer>
        <tokenizer class="solr.StandardTokenizerFactory"/>
       
        <filter class="solr.CJKWidthFilterFactory"/>
       
        <filter class="solr.LowerCaseFilterFactory"/>
        <filter class="solr.CJKBigramFilterFactory"/>
      </analyzer>
    </fieldType>

   
    <fieldType name="text_cz" class="solr.TextField" positionIncrementGap="100">
      <analyzer> 
        <tokenizer class="solr.StandardTokenizerFactory"/>
        <filter class="solr.LowerCaseFilterFactory"/>
        <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_cz.txt" enablePositionIncrements="true"/>
        <filter class="solr.CzechStemFilterFactory"/>       
      </analyzer>
    </fieldType>
   
   
    <fieldType name="text_da" class="solr.TextField" positionIncrementGap="100">
      <analyzer> 
        <tokenizer class="solr.StandardTokenizerFactory"/>
        <filter class="solr.LowerCaseFilterFactory"/>
        <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_da.txt" format="snowball" enablePositionIncrements="true"/>
        <filter class="solr.SnowballPorterFilterFactory" language="Danish"/>       
      </analyzer>
    </fieldType>
   
   
    <fieldType name="text_de" class="solr.TextField" positionIncrementGap="100">
      <analyzer> 
        <tokenizer class="solr.StandardTokenizerFactory"/>
        <filter class="solr.LowerCaseFilterFactory"/>
        <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_de.txt" format="snowball" enablePositionIncrements="true"/>
        <filter class="solr.GermanNormalizationFilterFactory"/>
        <filter class="solr.GermanLightStemFilterFactory"/>
       
       
      </analyzer>
    </fieldType>
   
   
    <fieldType name="text_el" class="solr.TextField" positionIncrementGap="100">
      <analyzer> 
        <tokenizer class="solr.StandardTokenizerFactory"/>
       
        <filter class="solr.GreekLowerCaseFilterFactory"/>
        <filter class="solr.StopFilterFactory" ignoreCase="false" words="lang/stopwords_el.txt" enablePositionIncrements="true"/>
        <filter class="solr.GreekStemFilterFactory"/>
      </analyzer>
    </fieldType>
   
   
    <fieldType name="text_es" class="solr.TextField" positionIncrementGap="100">
      <analyzer> 
        <tokenizer class="solr.StandardTokenizerFactory"/>
        <filter class="solr.LowerCaseFilterFactory"/>
        <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_es.txt" format="snowball" enablePositionIncrements="true"/>
        <filter class="solr.SpanishLightStemFilterFactory"/>
       
      </analyzer>
    </fieldType>
   
   
    <fieldType name="text_eu" class="solr.TextField" positionIncrementGap="100">
      <analyzer> 
        <tokenizer class="solr.StandardTokenizerFactory"/>
        <filter class="solr.LowerCaseFilterFactory"/>
        <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_eu.txt" enablePositionIncrements="true"/>
        <filter class="solr.SnowballPorterFilterFactory" language="Basque"/>
      </analyzer>
    </fieldType>
   
   
    <fieldType name="text_fa" class="solr.TextField" positionIncrementGap="100">
      <analyzer>
       
        <charFilter class="solr.PersianCharFilterFactory"/>
        <tokenizer class="solr.StandardTokenizerFactory"/>
        <filter class="solr.LowerCaseFilterFactory"/>
        <filter class="solr.ArabicNormalizationFilterFactory"/>
        <filter class="solr.PersianNormalizationFilterFactory"/>
        <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_fa.txt" enablePositionIncrements="true"/>
      </analyzer>
    </fieldType>
   
   
    <fieldType name="text_fi" class="solr.TextField" positionIncrementGap="100">
      <analyzer> 
        <tokenizer class="solr.StandardTokenizerFactory"/>
        <filter class="solr.LowerCaseFilterFactory"/>
        <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_fi.txt" format="snowball" enablePositionIncrements="true"/>
        <filter class="solr.SnowballPorterFilterFactory" language="Finnish"/>
       
      </analyzer>
    </fieldType>
   
   
    <fieldType name="text_fr" class="solr.TextField" positionIncrementGap="100">
      <analyzer> 
        <tokenizer class="solr.StandardTokenizerFactory"/>
       
        <filter class="solr.ElisionFilterFactory" ignoreCase="true" articles="lang/contractions_fr.txt"/>
        <filter class="solr.LowerCaseFilterFactory"/>
        <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_fr.txt" format="snowball" enablePositionIncrements="true"/>
        <filter class="solr.FrenchLightStemFilterFactory"/>
       
       
      </analyzer>
    </fieldType>
   
   
    <fieldType name="text_ga" class="solr.TextField" positionIncrementGap="100">
      <analyzer> 
        <tokenizer class="solr.StandardTokenizerFactory"/>
       
        <filter class="solr.ElisionFilterFactory" ignoreCase="true" articles="lang/contractions_ga.txt"/>
       
        <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/hyphenations_ga.txt" enablePositionIncrements="false"/>
        <filter class="solr.IrishLowerCaseFilterFactory"/>
        <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_ga.txt" enablePositionIncrements="true"/>
        <filter class="solr.SnowballPorterFilterFactory" language="Irish"/>
      </analyzer>
    </fieldType>
   
   
    <fieldType name="text_gl" class="solr.TextField" positionIncrementGap="100">
      <analyzer> 
        <tokenizer class="solr.StandardTokenizerFactory"/>
        <filter class="solr.LowerCaseFilterFactory"/>
        <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_gl.txt" enablePositionIncrements="true"/>
        <filter class="solr.GalicianStemFilterFactory"/>
       
      </analyzer>
    </fieldType>
   
   
    <fieldType name="text_hi" class="solr.TextField" positionIncrementGap="100">
      <analyzer> 
        <tokenizer class="solr.StandardTokenizerFactory"/>
        <filter class="solr.LowerCaseFilterFactory"/>
       
        <filter class="solr.IndicNormalizationFilterFactory"/>
       
        <filter class="solr.HindiNormalizationFilterFactory"/>
        <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_hi.txt" enablePositionIncrements="true"/>
        <filter class="solr.HindiStemFilterFactory"/>
      </analyzer>
    </fieldType>
   
   
    <fieldType name="text_hu" class="solr.TextField" positionIncrementGap="100">
      <analyzer> 
        <tokenizer class="solr.StandardTokenizerFactory"/>
        <filter class="solr.LowerCaseFilterFactory"/>
        <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_hu.txt" format="snowball" enablePositionIncrements="true"/>
        <filter class="solr.SnowballPorterFilterFactory" language="Hungarian"/>
           
      </analyzer>
    </fieldType>
   
   
    <fieldType name="text_hy" class="solr.TextField" positionIncrementGap="100">
      <analyzer> 
        <tokenizer class="solr.StandardTokenizerFactory"/>
        <filter class="solr.LowerCaseFilterFactory"/>
        <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_hy.txt" enablePositionIncrements="true"/>
        <filter class="solr.SnowballPorterFilterFactory" language="Armenian"/>
      </analyzer>
    </fieldType>
   
   
    <fieldType name="text_id" class="solr.TextField" positionIncrementGap="100">
      <analyzer> 
        <tokenizer class="solr.StandardTokenizerFactory"/>
        <filter class="solr.LowerCaseFilterFactory"/>
        <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_id.txt" enablePositionIncrements="true"/>
       
        <filter class="solr.IndonesianStemFilterFactory" stemDerivational="true"/>
      </analyzer>
    </fieldType>
   
   
    <fieldType name="text_it" class="solr.TextField" positionIncrementGap="100">
      <analyzer> 
        <tokenizer class="solr.StandardTokenizerFactory"/>
       
        <filter class="solr.ElisionFilterFactory" ignoreCase="true" articles="lang/contractions_it.txt"/>
        <filter class="solr.LowerCaseFilterFactory"/>
        <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_it.txt" format="snowball" enablePositionIncrements="true"/>
        <filter class="solr.ItalianLightStemFilterFactory"/>
       
      </analyzer>
    </fieldType>
   
   
    <fieldType name="text_ja" class="solr.TextField" positionIncrementGap="100" autoGeneratePhraseQueries="false">
      <analyzer>
     
        <tokenizer class="solr.JapaneseTokenizerFactory" mode="search"/>
       
       
        <filter class="solr.JapaneseBaseFormFilterFactory"/>
       
        <filter class="solr.JapanesePartOfSpeechStopFilterFactory" tags="lang/stoptags_ja.txt" enablePositionIncrements="true"/>
       
        <filter class="solr.CJKWidthFilterFactory"/>
       
        <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_ja.txt" enablePositionIncrements="true" />
       
        <filter class="solr.JapaneseKatakanaStemFilterFactory" minimumLength="4"/>
       
        <filter class="solr.LowerCaseFilterFactory"/>
      </analyzer>
    </fieldType>
   
   
    <fieldType name="text_lv" class="solr.TextField" positionIncrementGap="100">
      <analyzer> 
        <tokenizer class="solr.StandardTokenizerFactory"/>
        <filter class="solr.LowerCaseFilterFactory"/>
        <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_lv.txt" enablePositionIncrements="true"/>
        <filter class="solr.LatvianStemFilterFactory"/>
      </analyzer>
    </fieldType>
   
   
    <fieldType name="text_nl" class="solr.TextField" positionIncrementGap="100">
      <analyzer> 
        <tokenizer class="solr.StandardTokenizerFactory"/>
        <filter class="solr.LowerCaseFilterFactory"/>
        <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_nl.txt" format="snowball" enablePositionIncrements="true"/>
        <filter class="solr.StemmerOverrideFilterFactory" dictionary="lang/stemdict_nl.txt" ignoreCase="false"/>
        <filter class="solr.SnowballPorterFilterFactory" language="Dutch"/>
      </analyzer>
    </fieldType>
   
   
    <fieldType name="text_no" class="solr.TextField" positionIncrementGap="100">
      <analyzer> 
        <tokenizer class="solr.StandardTokenizerFactory"/>
        <filter class="solr.LowerCaseFilterFactory"/>
        <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_no.txt" format="snowball" enablePositionIncrements="true"/>
        <filter class="solr.SnowballPorterFilterFactory" language="Norwegian"/>
       
       
      </analyzer>
    </fieldType>
   
   
    <fieldType name="text_pt" class="solr.TextField" positionIncrementGap="100">
      <analyzer> 
        <tokenizer class="solr.StandardTokenizerFactory"/>
        <filter class="solr.LowerCaseFilterFactory"/>
        <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_pt.txt" format="snowball" enablePositionIncrements="true"/>
        <filter class="solr.PortugueseLightStemFilterFactory"/>
       
       
       
      </analyzer>
    </fieldType>
   
   
    <fieldType name="text_ro" class="solr.TextField" positionIncrementGap="100">
      <analyzer> 
        <tokenizer class="solr.StandardTokenizerFactory"/>
        <filter class="solr.LowerCaseFilterFactory"/>
        <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_ro.txt" enablePositionIncrements="true"/>
        <filter class="solr.SnowballPorterFilterFactory" language="Romanian"/>
      </analyzer>
    </fieldType>
   
   
    <fieldType name="text_ru" class="solr.TextField" positionIncrementGap="100">
      <analyzer> 
        <tokenizer class="solr.StandardTokenizerFactory"/>
        <filter class="solr.LowerCaseFilterFactory"/>
        <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_ru.txt" format="snowball" enablePositionIncrements="true"/>
        <filter class="solr.SnowballPorterFilterFactory" language="Russian"/>
       
      </analyzer>
    </fieldType>
   
   
    <fieldType name="text_sv" class="solr.TextField" positionIncrementGap="100">
      <analyzer> 
        <tokenizer class="solr.StandardTokenizerFactory"/>
        <filter class="solr.LowerCaseFilterFactory"/>
        <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_sv.txt" format="snowball" enablePositionIncrements="true"/>
        <filter class="solr.SnowballPorterFilterFactory" language="Swedish"/>
       
      </analyzer>
    </fieldType>
   
   
    <fieldType name="text_th" class="solr.TextField" positionIncrementGap="100">
      <analyzer> 
        <tokenizer class="solr.StandardTokenizerFactory"/>
        <filter class="solr.LowerCaseFilterFactory"/>
        <filter class="solr.ThaiWordFilterFactory"/>
        <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_th.txt" enablePositionIncrements="true"/>
      </analyzer>
    </fieldType>
   
   
    <fieldType name="text_tr" class="solr.TextField" positionIncrementGap="100">
      <analyzer> 
        <tokenizer class="solr.StandardTokenizerFactory"/>
        <filter class="solr.TurkishLowerCaseFilterFactory"/>
        <filter class="solr.StopFilterFactory" ignoreCase="false" words="lang/stopwords_tr.txt" enablePositionIncrements="true"/>
        <filter class="solr.SnowballPorterFilterFactory" language="Turkish"/>
      </analyzer>
    </fieldType>
 </types>


 <fields>
   

  <field  name="id" type="string" indexed="true" stored="true" multiValued="false" /> 
  <field  name="fake_id" type="string" indexed="true" stored="true" multiValued="false" /> 
  <field  name="model" type="text_en" indexed="true" stored="true" multiValued="false"  />
  <field  name="firstname" type="text_en" indexed="true" stored="true"/>
  <field  name="lastname" type="text_en" indexed="true" stored="true"/>
  <field  name="title" type="text_en" indexed="true" stored="true"/>
  <field  name="biog" type="text_en" indexed="true" stored="true"/>
  <field  name="last_modified" type="string" indexed="true" stored="true"  />
   
 </fields>

 
 <uniqueKey>fake_id</uniqueKey>

 
 

 
 <solrQueryParser defaultOperator="OR"/>

 

   

   

   
   
       
   
           
   

   
   
 

 
 
 
 


</schema>


The solrconfig.xml:

<?xml version="1.0" encoding="UTF-8" ?>

<config>
 
  <abortOnConfigurationError>${solr.abortOnConfigurationError:true}</abortOnConfigurationError>
 
 
  <luceneMatchVersion>LUCENE_36</luceneMatchVersion>

 
 
 
 
  <lib dir="lib/dist/" regex="apache-solr-cell-\d.*\.jar" />
  <lib dir="lib/contrib/extraction/lib/" regex=".*\.jar" />

  <lib dir="lib/dist/" regex="apache-solr-clustering-\d.*\.jar" />
  <lib dir="lib/contrib/clustering/lib/" regex=".*\.jar" />

  <lib dir="lib/dist/" regex="apache-solr-dataimporthandler-\d.*\.jar" />
  <lib dir="lib/contrib/dataimporthandler/lib/" regex=".*\.jar" />
 
  <lib dir="lib/dist/" regex="apache-solr-langid-\d.*\.jar" />
  <lib dir="lib/contrib/langid/lib/" regex=".*\.jar" />

  <lib dir="lib/dist/" regex="apache-solr-velocity-\d.*\.jar" />
  <lib dir="lib/contrib/velocity/lib/" regex=".*\.jar" />
 
  <lib dir="lib/dist/" regex="apache-solr-dataimporthandler-extras-\d.*\.jar" />
 
  <lib dir="lib/contrib/extraction/lib/" regex="tika-core-\d.*\.jar" />
  <lib dir="lib/contrib/extraction/lib/" regex="tika-parsers-\d.*\.jar" />

 

 
  <dataDir>${solr.data.dir:}</dataDir>


 
  <directoryFactory name="DirectoryFactory"
                    class="${solr.directoryFactory:solr.StandardDirectoryFactory}"/>

 
  <indexConfig>
   
  </indexConfig>


 
  <jmx />
 

 
  <updateHandler class="solr.DirectUpdateHandler2">

   
  </updateHandler>
 
 

 
  <query>
   
    <maxBooleanClauses>1024</maxBooleanClauses>


   
    <filterCache class="solr.FastLRUCache"
                 size="512"
                 initialSize="512"
                 autowarmCount="0"/>

   
    <queryResultCache class="solr.LRUCache"
                     size="512"
                     initialSize="512"
                     autowarmCount="0"/>
   
   
    <documentCache class="solr.LRUCache"
                   size="512"
                   initialSize="512"
                   autowarmCount="0"/>
   
   
   

   
   


   
    <enableLazyFieldLoading>true</enableLazyFieldLoading>

   
   

   
   <queryResultWindowSize>20</queryResultWindowSize>

   
   <queryResultMaxDocsCached>200</queryResultMaxDocsCached>

   
   
    <listener event="newSearcher" class="solr.QuerySenderListener">
      <arr name="queries">
       
      </arr>
    </listener>
    <listener event="firstSearcher" class="solr.QuerySenderListener">
      <arr name="queries">
        <lst>
          <str name="q">static firstSearcher warming in solrconfig.xml</str>
        </lst>
      </arr>
    </listener>

   
    <useColdSearcher>false</useColdSearcher>

   
    <maxWarmingSearchers>2</maxWarmingSearchers>

  </query>


 
  <requestDispatcher>
     
    <requestParsers enableRemoteStreaming="true"
                    multipartUploadLimitInKB="2048000" />

   
    <httpCaching never304="true" />
   
   
   
   
  </requestDispatcher>

 
       
  <requestHandler name="/dataimport" class="org.apache.solr.handler.dataimport.DataImportHandler">
        <lst name="defaults">
                <str name="config">data-config.xml</str>
        </lst>
  </requestHandler>
 
       
 
  <requestHandler name="/select" class="solr.SearchHandler">
   
     <lst name="defaults">
       <str name="echoParams">explicit</str>
       <int name="rows">100</int>
       <str name="df">biog</str>
     </lst>
   
   
   
   
   
   
   
  </requestHandler>

 
  <requestHandler name="/browse" class="solr.SearchHandler">
     <lst name="defaults">
       <str name="echoParams">explicit</str>

       
       <str name="wt">velocity</str>

       <str name="v.template">browse</str>
       <str name="v.layout">layout</str>
       <str name="title">Solritas</str>

       <str name="df">text</str>
       <str name="defType">edismax</str>
       <str name="q.alt">*:*</str>
       <str name="rows">10</str>
       <str name="fl">*,score</str>
       <str name="mlt.qf">
         text^0.5 features^1.0 name^1.2 sku^1.5 id^10.0 manu^1.1 cat^1.4
       </str>
       <str name="mlt.fl">text,features,name,sku,id,manu,cat</str>
       <int name="mlt.count">3</int>

       <str name="qf">
          text^0.5 features^1.0 name^1.2 sku^1.5 id^10.0 manu^1.1 cat^1.4
       </str>

       <str name="facet">on</str>
       <str name="facet.field">cat</str>
       <str name="facet.field">manu_exact</str>
       <str name="facet.query">ipod</str>
       <str name="facet.query">GB</str>
       <str name="facet.mincount">1</str>
       <str name="facet.pivot">cat,inStock</str>
       <str name="facet.range.other">after</str>
       <str name="facet.range">price</str>
       <int name="f.price.facet.range.start">0</int>
       <int name="f.price.facet.range.end">600</int>
       <int name="f.price.facet.range.gap">50</int>
       <str name="facet.range">popularity</str>
       <int name="f.popularity.facet.range.start">0</int>
       <int name="f.popularity.facet.range.end">10</int>
       <int name="f.popularity.facet.range.gap">3</int>
       <str name="facet.range">manufacturedate_dt</str>
       <str name="f.manufacturedate_dt.facet.range.start">NOW/YEAR-10YEARS</str>
       <str name="f.manufacturedate_dt.facet.range.end">NOW</str>
       <str name="f.manufacturedate_dt.facet.range.gap">+1YEAR</str>
       <str name="f.manufacturedate_dt.facet.range.other">before</str>
       <str name="f.manufacturedate_dt.facet.range.other">after</str>


       
       <str name="hl">on</str>
       <str name="hl.fl">text features name</str>
       <str name="f.name.hl.fragsize">0</str>
       <str name="f.name.hl.alternateField">name</str>
     </lst>
     <arr name="last-components">
       <str>spellcheck</str>
     </arr>
     
  </requestHandler>

 
  <requestHandler name="/update"
                  class="solr.XmlUpdateRequestHandler">
   
   
    </requestHandler>
 
  <requestHandler name="/update/javabin"
                  class="solr.BinaryUpdateRequestHandler" />

 
  <requestHandler name="/update/csv"
                  class="solr.CSVRequestHandler"
                  startup="lazy" />

 
  <requestHandler name="/update/json"
                  class="solr.JsonUpdateRequestHandler"
                  startup="lazy" />

 
  <requestHandler name="/update/extract"
                  startup="lazy"
                  class="solr.extraction.ExtractingRequestHandler" >
    <lst name="defaults">
     
      <str name="fmap.content">text</str>
      <str name="lowernames">true</str>
      <str name="uprefix">ignored_</str>

     
      <str name="captureAttr">true</str>
      <str name="fmap.a">links</str>
      <str name="fmap.div">ignored_</str>
    </lst>
  </requestHandler>

 
  <requestHandler name="/update/xslt"
                   startup="lazy"
                   class="solr.XsltUpdateRequestHandler"/>

 
  <requestHandler name="/analysis/field"
                  startup="lazy"
                  class="solr.FieldAnalysisRequestHandler" />


 
  <requestHandler name="/analysis/document"
                  class="solr.DocumentAnalysisRequestHandler"
                  startup="lazy" />

 
  <requestHandler name="/admin/"
                  class="solr.admin.AdminHandlers" />
 
 
 
 

 
  <requestHandler name="/admin/ping" class="solr.PingRequestHandler">
    <lst name="invariants">
      <str name="q">solrpingquery</str>
    </lst>
    <lst name="defaults">
      <str name="echoParams">all</str>
    </lst>
  </requestHandler>

 
  <requestHandler name="/debug/dump" class="solr.DumpRequestHandler" >
    <lst name="defaults">
     <str name="echoParams">explicit</str> 
     <str name="echoHandler">true</str>
    </lst>
  </requestHandler>

 
 

 

   
  <searchComponent name="spellcheck" class="solr.SpellCheckComponent">

    <str name="queryAnalyzerFieldType">textSpell</str>

   

   
    <lst name="spellchecker">
      <str name="name">default</str>
      <str name="field">name</str>
      <str name="spellcheckIndexDir">spellchecker</str>
     
     
    </lst>

   
   

   
   

   
   
  </searchComponent>

 
  <requestHandler name="/spell" class="solr.SearchHandler" startup="lazy">
    <lst name="defaults">
      <str name="df">text</str>
      <str name="spellcheck.onlyMorePopular">false</str>
      <str name="spellcheck.extendedResults">false</str>
      <str name="spellcheck.count">1</str>
    </lst>
    <arr name="last-components">
      <str>spellcheck</str>
    </arr>
  </requestHandler>

 
  <searchComponent name="tvComponent" class="solr.TermVectorComponent"/>

 
  <requestHandler name="/tvrh" class="solr.SearchHandler" startup="lazy">
    <lst name="defaults">
      <str name="df">text</str>
      <bool name="tv">true</bool>
    </lst>
    <arr name="last-components">
      <str>tvComponent</str>
    </arr>
  </requestHandler>

 
  <searchComponent name="clustering"
                   enable="${solr.clustering.enabled:false}"
                   class="solr.clustering.ClusteringComponent" >
   
    <lst name="engine">
     
      <str name="name">default</str>

     
      <str name="carrot.algorithm">org.carrot2.clustering.lingo.LingoClusteringAlgorithm</str>

     
      <str name="LingoClusteringAlgorithm.desiredClusterCountBase">20</str>
     
     
      <str name="carrot.lexicalResourcesDir">clustering/carrot2</str>

     
      <str name="MultilingualClustering.defaultLanguage">ENGLISH</str>
    </lst>
    <lst name="engine">
      <str name="name">stc</str>
      <str name="carrot.algorithm">org.carrot2.clustering.stc.STCClusteringAlgorithm</str>
    </lst>
  </searchComponent>

 
  <requestHandler name="/clustering"
                  startup="lazy"
                  enable="${solr.clustering.enabled:false}"
                  class="solr.SearchHandler">
    <lst name="defaults">
      <bool name="clustering">true</bool>
      <str name="clustering.engine">default</str>
      <bool name="clustering.results">true</bool>
     
      <str name="carrot.title">name</str>
      <str name="carrot.url">id</str>
     
       <str name="carrot.snippet">features</str>
       
       <bool name="carrot.produceSummary">true</bool>
       
       
       
       <bool name="carrot.outputSubClusters">false</bool>
       
       <str name="df">text</str>
       <str name="defType">edismax</str>
       <str name="qf">
          text^0.5 features^1.0 name^1.2 sku^1.5 id^10.0 manu^1.1 cat^1.4
       </str>
       <str name="q.alt">*:*</str>
       <str name="rows">10</str>
       <str name="fl">*,score</str>
    </lst>     
    <arr name="last-components">
      <str>clustering</str>
    </arr>
  </requestHandler>
 
 
  <searchComponent name="terms" class="solr.TermsComponent"/>

 
  <requestHandler name="/terms" class="solr.SearchHandler" startup="lazy">
     <lst name="defaults">
      <bool name="terms">true</bool>
    </lst>     
    <arr name="components">
      <str>terms</str>
    </arr>
  </requestHandler>


 
  <searchComponent name="elevator" class="solr.QueryElevationComponent" >
   
    <str name="queryFieldType">string</str>
    <str name="config-file">elevate.xml</str>
  </searchComponent>

 
  <requestHandler name="/elevate" class="solr.SearchHandler" startup="lazy">
    <lst name="defaults">
      <str name="echoParams">explicit</str>
      <str name="df">text</str>
    </lst>
    <arr name="last-components">
      <str>elevator</str>
    </arr>
  </requestHandler>

 
  <searchComponent class="solr.HighlightComponent" name="highlight">
    <highlighting>
     
     
      <fragmenter name="gap"
                  default="true"
                  class="solr.highlight.GapFragmenter">
        <lst name="defaults">
          <int name="hl.fragsize">100</int>
        </lst>
      </fragmenter>

     
      <fragmenter name="regex"
                  class="solr.highlight.RegexFragmenter">
        <lst name="defaults">
         
          <int name="hl.fragsize">70</int>
         
          <float name="hl.regex.slop">0.5</float>
         
          <str name="hl.regex.pattern">[-\w ,/\n\"']{20,200}</str>
        </lst>
      </fragmenter>

     
      <formatter name="html"
                 default="true"
                 class="solr.highlight.HtmlFormatter">
        <lst name="defaults">
          <str name="hl.simple.pre">
<![CDATA[<em>]]>
</str>
          <str name="hl.simple.post">
<![CDATA[</em>]]>
</str>
        </lst>
      </formatter>

     
      <encoder name="html"
               class="solr.highlight.HtmlEncoder" />

     
      <fragListBuilder name="simple"
                       default="true"
                       class="solr.highlight.SimpleFragListBuilder"/>

     
      <fragListBuilder name="single"
                       class="solr.highlight.SingleFragListBuilder"/>

     
      <fragmentsBuilder name="default"
                        default="true"
                        class="solr.highlight.ScoreOrderFragmentsBuilder">
       
      </fragmentsBuilder>

     
      <fragmentsBuilder name="colored"
                        class="solr.highlight.ScoreOrderFragmentsBuilder">
        <lst name="defaults">
          <str name="hl.tag.pre">
<![CDATA[
               <b style="background:yellow">,<b style="background:lawgreen">,
               <b style="background:aquamarine">,<b style="background:magenta">,
               <b style="background:palegreen">,<b style="background:coral">,
               <b style="background:wheat">,<b style="background:khaki">,
               <b style="background:lime">,<b style="background:deepskyblue">]]>
</str>
          <str name="hl.tag.post">
<![CDATA[</b>]]>
</str>
        </lst>
      </fragmentsBuilder>
     
      <boundaryScanner name="default"
                       default="true"
                       class="solr.highlight.SimpleBoundaryScanner">
        <lst name="defaults">
          <str name="hl.bs.maxScan">10</str>
          <str name="hl.bs.chars">.,!? </str>
        </lst>
      </boundaryScanner>
     
      <boundaryScanner name="breakIterator"
                       class="solr.highlight.BreakIteratorBoundaryScanner">
        <lst name="defaults">
         
          <str name="hl.bs.type">WORD</str>
         
          <str name="hl.bs.language">en</str>
          <str name="hl.bs.country">US</str>
        </lst>
      </boundaryScanner>
    </highlighting>
  </searchComponent>

   
 
 

   
   
 
 
 
 

  <queryResponseWriter name="json" class="solr.JSONResponseWriter">
     
    <str name="content-type">text/plain; charset=UTF-8</str>
  </queryResponseWriter>
 
 
    <queryResponseWriter name="velocity" class="solr.VelocityResponseWriter" startup="lazy"/>
 

 
  <queryResponseWriter name="xslt" class="solr.XSLTResponseWriter">
    <int name="xsltCacheLifetimeSeconds">5</int>
  </queryResponseWriter>

 
 
 

 
 
 

 
  <admin>
    <defaultQuery>*:*</defaultQuery>

   
   
  </admin>

</config>
Reply | Threaded
Open this post in threaded view
|  
Report Content as Inappropriate

Re: Show a portion of searchable text in Solr

iorixxx
> I have indexed very large documents, In some cases these
> documents has
> 100.000 characters. Is there a way to return a portion of
> the documents
> (lets say the 300 first characters) when i am querying
> "Solr"?. Is there any
> attribute to set in the schema.xml or solrconfig.xml to
> achieve this?

I have a set-up with very large documents too. Here is two different solutions that I have used in the past:

1) Use highlighting with hl.alternateField and hl.maxAlternateFieldLength
http://wiki.apache.org/solr/HighlightingParameters

2) Create an extra field (indexed="false" and stored="true") using copyField just for display purposes. (&fl=shortField)

<copyField source="largeField" dest="shortField" maxChars="300"/>
http://wiki.apache.org/solr/SchemaXml#Copy_Fields

Also, didn't used by myself yet but I *think* this can be accomplished by using a custom Transformer too. http://wiki.apache.org/solr/DocTransformers
Reply | Threaded
Open this post in threaded view
|  
Report Content as Inappropriate

Date format in the schema.xml

Bruno Mannina
Dear,

is it mandatory to use the date format yyyy-mm-ddThh:mm:ssZ ?

I have a date with this format:
yyyymmdd
  in my xml source file.

Where can I find more information, I found only these definitions in the
schema.xml

<fieldType name="date" class="solr.TrieDateField" precisionStep="0"
positionIncrementGap="0"/>
<fieldType name="tdate" class="solr.TrieDateField" precisionStep="6"
positionIncrementGap="0"/>

Could you explain me the PrecisionStep param also?

Thanks and sorry for this newbie question,
Bruno
Reply | Threaded
Open this post in threaded view
|  
Report Content as Inappropriate

Re: Date format in the schema.xml

iorixxx
> is it mandatory to use the date format yyyy-mm-ddThh:mm:ssZ
> ?

Yes.

> I have a date with this format:
> yyyymmdd
>  in my xml source file.
>
> Where can I find more information, I found only these
> definitions in the schema.xml

In schema.xml there is a xml comment about dates, starting with
<!-- The format for this date field is of the form 1995-12-31T23:59:59Z,


> Could you explain me the PrecisionStep param also?

This presentation explains trie based fields.
http://www.thetaphi.de/share/Schindler-TrieRange.ppt 



Reply | Threaded
Open this post in threaded view
|  
Report Content as Inappropriate

Re: Date format in the schema.xml

Bruno Mannina
Ok Thanks !

Le 14/05/2012 16:16, Ahmet Arslan a écrit :

>> is it mandatory to use the date format yyyy-mm-ddThh:mm:ssZ
>> ?
> Yes.
>
>> I have a date with this format:
>> yyyymmdd
>>   in my xml source file.
>>
>> Where can I find more information, I found only these
>> definitions in the schema.xml
> In schema.xml there is a xml comment about dates, starting with
> <!-- The format for this date field is of the form 1995-12-31T23:59:59Z,
>
>
>> Could you explain me the PrecisionStep param also?
> This presentation explains trie based fields.
> http://www.thetaphi.de/share/Schindler-TrieRange.ppt
>
>
>
>
>

Reply | Threaded
Open this post in threaded view
|  
Report Content as Inappropriate

Re: Date format in the schema.xml

Jack Krupansky-2
In reply to this post by iorixxx
At least in this case where dates have a precision of day, the total number
of unique values should be relatively low (3,650 for a 10-year period or
even 18,250 for a 50-year period), so precision step probably won't matter
in this case much at all. The big benefit with tdate over old date here is
the fact that the string is stored as a long integer, which speeds
comparisons.

A precisionStep of zero ("0") means use the maximum value
(Integer.MAX_VALUE). The default (in trunk; not sure about 3.6 or earlier)
is "8", not 4 as the Javadoc indicates. Zero or high step values means more
precision, less index space consumed, but slower searching. Lower step
values, such as 8 or 4 or even 1, mean less precision, more index space
consumed, but faster searching.

I'm still struggling to figure out how to map values of precisionStep to
"precision" of the input data. For example, seconds, minutes, hours, days,
years for date values.

I'm also not sure what the implications, if any, are for faceting and the
FieldCache for Trie precision step.

Here's more detail:
http://lucene.apache.org/core/3_6_0/api/all/org/apache/lucene/search/NumericRangeQuery.html

-- Jack Krupansky

-----Original Message-----
From: Ahmet Arslan
Sent: Monday, May 14, 2012 10:16 AM
To: [hidden email]
Subject: Re: Date format in the schema.xml

> is it mandatory to use the date format yyyy-mm-ddThh:mm:ssZ
> ?

Yes.

> I have a date with this format:
> yyyymmdd
>  in my xml source file.
>
> Where can I find more information, I found only these
> definitions in the schema.xml

In schema.xml there is a xml comment about dates, starting with
<!-- The format for this date field is of the form 1995-12-31T23:59:59Z,


> Could you explain me the PrecisionStep param also?

This presentation explains trie based fields.
http://www.thetaphi.de/share/Schindler-TrieRange.ppt


Reply | Threaded
Open this post in threaded view
|  
Report Content as Inappropriate

Re: Show a portion of searchable text in Solr

Shameema Umer
In reply to this post by iorixxx
Can somebody tell me where should I place the highlighting parameters, when
I did on the query, it is not working.
&hl=true&hl.requireFieldMatch=true&hl.fl=*

FYI: I am new to solr. My aim  is to have emphasis tags on the queried
words and need to display only the query relevant snippet of the <content>

Thanks
Shameema





On Mon, May 14, 2012 at 1:18 PM, Ahmet Arslan <[hidden email]> wrote:

> > I have indexed very large documents, In some cases these
> > documents has
> > 100.000 characters. Is there a way to return a portion of
> > the documents
> > (lets say the 300 first characters) when i am querying
> > "Solr"?. Is there any
> > attribute to set in the schema.xml or solrconfig.xml to
> > achieve this?
>
> I have a set-up with very large documents too. Here is two different
> solutions that I have used in the past:
>
> 1) Use highlighting with hl.alternateField and hl.maxAlternateFieldLength
> http://wiki.apache.org/solr/HighlightingParameters
>
> 2) Create an extra field (indexed="false" and stored="true") using
> copyField just for display purposes. (&fl=shortField)
>
> <copyField source="largeField" dest="shortField" maxChars="300"/>
> http://wiki.apache.org/solr/SchemaXml#Copy_Fields
>
> Also, didn't used by myself yet but I *think* this can be accomplished by
> using a custom Transformer too.
> http://wiki.apache.org/solr/DocTransformers
>
Reply | Threaded
Open this post in threaded view
|  
Report Content as Inappropriate

Re: Show a portion of searchable text in Solr

Jack Krupansky-2
See the "/browse" request handler in the example config.

Only stored fields will be highlighted.

-- Jack Krupansky

-----Original Message-----
From: Shameema Umer
Sent: Tuesday, May 15, 2012 2:59 AM
To: [hidden email]
Subject: Re: Show a portion of searchable text in Solr

Can somebody tell me where should I place the highlighting parameters, when
I did on the query, it is not working.
&hl=true&hl.requireFieldMatch=true&hl.fl=*

FYI: I am new to solr. My aim  is to have emphasis tags on the queried
words and need to display only the query relevant snippet of the <content>

Thanks
Shameema





On Mon, May 14, 2012 at 1:18 PM, Ahmet Arslan <[hidden email]> wrote:

> > I have indexed very large documents, In some cases these
> > documents has
> > 100.000 characters. Is there a way to return a portion of
> > the documents
> > (lets say the 300 first characters) when i am querying
> > "Solr"?. Is there any
> > attribute to set in the schema.xml or solrconfig.xml to
> > achieve this?
>
> I have a set-up with very large documents too. Here is two different
> solutions that I have used in the past:
>
> 1) Use highlighting with hl.alternateField and hl.maxAlternateFieldLength
> http://wiki.apache.org/solr/HighlightingParameters
>
> 2) Create an extra field (indexed="false" and stored="true") using
> copyField just for display purposes. (&fl=shortField)
>
> <copyField source="largeField" dest="shortField" maxChars="300"/>
> http://wiki.apache.org/solr/SchemaXml#Copy_Fields
>
> Also, didn't used by myself yet but I *think* this can be accomplished by
> using a custom Transformer too.
> http://wiki.apache.org/solr/DocTransformers
>
Reply | Threaded
Open this post in threaded view
|  
Report Content as Inappropriate

Re: Show a portion of searchable text in Solr

anarchos78
Thanks
Reply | Threaded
Open this post in threaded view
|  
Report Content as Inappropriate

Re: Date format in the schema.xml

Chris Hostetter-3
In reply to this post by Bruno Mannina

: Subject: Date format in the schema.xml
: References: <[hidden email]>
: In-Reply-To: <[hidden email]>

https://people.apache.org/~hossman/#threadhijack
Thread Hijacking on Mailing Lists

When starting a new discussion on a mailing list, please do not reply to
an existing message, instead start a fresh email.  Even if you change the
subject line of your email, other mail headers still track which thread
you replied to and your question is "hidden" in that thread and gets less
attention.   It makes following discussions in the mailing list archives
particularly difficult.



-Hoss
Loading...