Phrase search with Solr 7.2

classic Classic list List threaded Threaded
4 messages Options
Reply | Threaded
Open this post in threaded view
|

Phrase search with Solr 7.2

Steven White
Hi everyone,

I switched over from Solr 5.2.1 to 7.2.1 other than re-indexing my data and
schema design remain the same.

The issue I see now is I'm getting 0 hits on phrase searches, why?

Here is the query I'm sending that gives me 0 hits:

http://localhost:8983/solr/ccfts/select_test?q=%22cat+dog%22&wt=json&indent=true

But this query will give me hits:

http://localhost:8983/solr/ccfts/select_test?q=cat+dog&wt=json&indent=true

Here is my schema:

<fieldType name="analyzer_text" class="solr.TextField"
positionIncrementGap="100" autoGeneratePhraseQueries="true">
       <analyzer>
  <tokenizer class="solr.WhitespaceTokenizerFactory"/>
  <filter class="solr.SynonymFilterFactory" expand="true"
synonyms="synonyms.txt" ignoreCase="true"/>
  <filter class="solr.WordDelimiterFilterFactory" preserveOriginal="0"
generateNumberParts="1" splitOnCaseChange="0" catenateWords="1"
splitOnNumerics="1" stemEnglishPossessive="1" generateWordParts="1"
catenateAll="1" catenateNumbers="1"/>
  <filter class="solr.StopFilterFactory" words="lang/stopwords_en.txt"
ignoreCase="true"/>
  <filter class="solr.LowerCaseFilterFactory"/>
  <filter class="solr.EnglishPossessiveFilterFactory"/>
  <filter class="solr.KeywordMarkerFilterFactory"
protected="protwords.txt"/>
  <filter class="solr.PorterStemFilterFactory"/>
  <filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
       </analyzer>
    </fieldType>

Here are my fields:

<field name=" analyzer _text"     type="text_general"   indexed="true"
required="false" stored="false"  multiValued="true" />
    <field name="CC_UNIQUE_FIELD"    type="string"         indexed="true"
required="true"  stored="true"   multiValued="false" />
<field name="CC_ALL_FIELDS_DATA" type=" analyzer_text" indexed="true"
required="false" stored="false"  multiValued="true" />
    <field name="CC_COMPONENT_NAME"  type="string"         indexed="true"
required="true"  stored="false"  multiValued="false" docValues="true" />
    <field name="CC_FILE_EXT"        type="string"         indexed="true"
required="true"  stored="false"  multiValued="false" docValues="true" />
    <field name="CC_FILE_NAME"       type="string"         indexed="true"
required="true"  stored="false"  multiValued="false" docValues="true" />
    <field name="CC_FILE_PATH"       type="string"         indexed="true"
required="true"  stored="true"   multiValued="false" docValues="true" />

And here is my handler:

{"requestHandler":{"/select_test":{
      "class":"solr.SearchHandler",
      "name":"/select_test",
      "defaults":{
        "defType":"edismax",
        "echoParams":"explicit",
        "fl":"CC_UNIQUE_FIELD,CC_FILE_PATH,score",
        "indent":"true",
        "qf":"CC_ALL_FIELDS_DATA",
        "rows":"10",
        "tie":"1.0",
        "wt":"xml"}}}}

What am I dong wrong?

Steven
Reply | Threaded
Open this post in threaded view
|

Re: Phrase search with Solr 7.2

Mikhail Khludnev-2
Hello, Steven.

Have you tried sow=true?
see
https://lucene.apache.org/solr/guide/7_2/the-extended-dismax-query-parser.html


Anyway, you can start from debugQuery=true, then try to explore
explainOther, and get to Analysis page after all.

On Mon, Mar 26, 2018 at 3:10 AM, Steven White <[hidden email]> wrote:

> Hi everyone,
>
> I switched over from Solr 5.2.1 to 7.2.1 other than re-indexing my data and
> schema design remain the same.
>
> The issue I see now is I'm getting 0 hits on phrase searches, why?
>
> Here is the query I'm sending that gives me 0 hits:
>
> http://localhost:8983/solr/ccfts/select_test?q=%22cat+
> dog%22&wt=json&indent=true
>
> But this query will give me hits:
>
> http://localhost:8983/solr/ccfts/select_test?q=cat+dog&wt=json&indent=true
>
> Here is my schema:
>
> <fieldType name="analyzer_text" class="solr.TextField"
> positionIncrementGap="100" autoGeneratePhraseQueries="true">
>        <analyzer>
>   <tokenizer class="solr.WhitespaceTokenizerFactory"/>
>   <filter class="solr.SynonymFilterFactory" expand="true"
> synonyms="synonyms.txt" ignoreCase="true"/>
>   <filter class="solr.WordDelimiterFilterFactory" preserveOriginal="0"
> generateNumberParts="1" splitOnCaseChange="0" catenateWords="1"
> splitOnNumerics="1" stemEnglishPossessive="1" generateWordParts="1"
> catenateAll="1" catenateNumbers="1"/>
>   <filter class="solr.StopFilterFactory" words="lang/stopwords_en.txt"
> ignoreCase="true"/>
>   <filter class="solr.LowerCaseFilterFactory"/>
>   <filter class="solr.EnglishPossessiveFilterFactory"/>
>   <filter class="solr.KeywordMarkerFilterFactory"
> protected="protwords.txt"/>
>   <filter class="solr.PorterStemFilterFactory"/>
>   <filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
>        </analyzer>
>     </fieldType>
>
> Here are my fields:
>
> <field name=" analyzer _text"     type="text_general"   indexed="true"
> required="false" stored="false"  multiValued="true" />
>     <field name="CC_UNIQUE_FIELD"    type="string"         indexed="true"
> required="true"  stored="true"   multiValued="false" />
> <field name="CC_ALL_FIELDS_DATA" type=" analyzer_text" indexed="true"
> required="false" stored="false"  multiValued="true" />
>     <field name="CC_COMPONENT_NAME"  type="string"         indexed="true"
> required="true"  stored="false"  multiValued="false" docValues="true" />
>     <field name="CC_FILE_EXT"        type="string"         indexed="true"
> required="true"  stored="false"  multiValued="false" docValues="true" />
>     <field name="CC_FILE_NAME"       type="string"         indexed="true"
> required="true"  stored="false"  multiValued="false" docValues="true" />
>     <field name="CC_FILE_PATH"       type="string"         indexed="true"
> required="true"  stored="true"   multiValued="false" docValues="true" />
>
> And here is my handler:
>
> {"requestHandler":{"/select_test":{
>       "class":"solr.SearchHandler",
>       "name":"/select_test",
>       "defaults":{
>         "defType":"edismax",
>         "echoParams":"explicit",
>         "fl":"CC_UNIQUE_FIELD,CC_FILE_PATH,score",
>         "indent":"true",
>         "qf":"CC_ALL_FIELDS_DATA",
>         "rows":"10",
>         "tie":"1.0",
>         "wt":"xml"}}}}
>
> What am I dong wrong?
>
> Steven
>



--
Sincerely yours
Mikhail Khludnev
Reply | Threaded
Open this post in threaded view
|

Re: Phrase search with Solr 7.2

Steven White
Setting "sow=true" didn't make a difference.

Here is what I'm using now:
http://localhost:8983/solr/ccfts/select_test?q=%22record%20type%20session%22&wt=json&indent=true&sow=true&debugQuery=true

And here is the output:

{
  "responseHeader":{
    "status":0,
    "QTime":1,
    "params":{
      "q":"\"record type session\"",
      "indent":"true",
      "sow":"true",
      "wt":"json",
      "debugQuery":"true"}},
  "response":{"numFound":0,"start":0,"maxScore":0.0,"docs":[]
  },
  "debug":{
    "rawquerystring":"\"record type session\"",
    "querystring":"\"record type session\"",
    "parsedquery":"+DisjunctionMaxQuery((CC_ALL_FIELDS_DATA:\"record
type session\")~1.0)",
    "parsedquery_toString":"+(CC_ALL_FIELDS_DATA:\"record type session\")~1.0",
    "explain":{},
    "QParser":"ExtendedDismaxQParser",
    "altquerystring":null,
    "boost_queries":null,
    "parsed_boost_queries":[],
    "boostfuncs":null,
    "timing":{
      "time":1.0,
      "prepare":{
        "time":1.0,
        "query":{
          "time":0.0},
        "facet":{
          "time":0.0},
        "facet_module":{
          "time":0.0},
        "mlt":{
          "time":0.0},
        "highlight":{
          "time":0.0},
        "stats":{
          "time":0.0},
        "expand":{
          "time":0.0},
        "terms":{
          "time":0.0},
        "debug":{
          "time":0.0}},
      "process":{
        "time":0.0,
        "query":{
          "time":0.0},
        "facet":{
          "time":0.0},
        "facet_module":{
          "time":0.0},
        "mlt":{
          "time":0.0},
        "highlight":{
          "time":0.0},
        "stats":{
          "time":0.0},
        "expand":{
          "time":0.0},
        "terms":{
          "time":0.0},
        "debug":{
          "time":0.0}}}}}


How do I debug this?

Steve

On Mon, Mar 26, 2018 at 12:50 AM, Mikhail Khludnev <[hidden email]> wrote:

> Hello, Steven.
>
> Have you tried sow=true?
> see
> https://lucene.apache.org/solr/guide/7_2/the-extended-
> dismax-query-parser.html
>
>
> Anyway, you can start from debugQuery=true, then try to explore
> explainOther, and get to Analysis page after all.
>
> On Mon, Mar 26, 2018 at 3:10 AM, Steven White <[hidden email]>
> wrote:
>
> > Hi everyone,
> >
> > I switched over from Solr 5.2.1 to 7.2.1 other than re-indexing my data
> and
> > schema design remain the same.
> >
> > The issue I see now is I'm getting 0 hits on phrase searches, why?
> >
> > Here is the query I'm sending that gives me 0 hits:
> >
> > http://localhost:8983/solr/ccfts/select_test?q=%22cat+
> > dog%22&wt=json&indent=true
> >
> > But this query will give me hits:
> >
> > http://localhost:8983/solr/ccfts/select_test?q=cat+dog&
> wt=json&indent=true
> >
> > Here is my schema:
> >
> > <fieldType name="analyzer_text" class="solr.TextField"
> > positionIncrementGap="100" autoGeneratePhraseQueries="true">
> >        <analyzer>
> >   <tokenizer class="solr.WhitespaceTokenizerFactory"/>
> >   <filter class="solr.SynonymFilterFactory" expand="true"
> > synonyms="synonyms.txt" ignoreCase="true"/>
> >   <filter class="solr.WordDelimiterFilterFactory" preserveOriginal="0"
> > generateNumberParts="1" splitOnCaseChange="0" catenateWords="1"
> > splitOnNumerics="1" stemEnglishPossessive="1" generateWordParts="1"
> > catenateAll="1" catenateNumbers="1"/>
> >   <filter class="solr.StopFilterFactory" words="lang/stopwords_en.txt"
> > ignoreCase="true"/>
> >   <filter class="solr.LowerCaseFilterFactory"/>
> >   <filter class="solr.EnglishPossessiveFilterFactory"/>
> >   <filter class="solr.KeywordMarkerFilterFactory"
> > protected="protwords.txt"/>
> >   <filter class="solr.PorterStemFilterFactory"/>
> >   <filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
> >        </analyzer>
> >     </fieldType>
> >
> > Here are my fields:
> >
> > <field name=" analyzer _text"     type="text_general"   indexed="true"
> > required="false" stored="false"  multiValued="true" />
> >     <field name="CC_UNIQUE_FIELD"    type="string"         indexed="true"
> > required="true"  stored="true"   multiValued="false" />
> > <field name="CC_ALL_FIELDS_DATA" type=" analyzer_text" indexed="true"
> > required="false" stored="false"  multiValued="true" />
> >     <field name="CC_COMPONENT_NAME"  type="string"         indexed="true"
> > required="true"  stored="false"  multiValued="false" docValues="true" />
> >     <field name="CC_FILE_EXT"        type="string"         indexed="true"
> > required="true"  stored="false"  multiValued="false" docValues="true" />
> >     <field name="CC_FILE_NAME"       type="string"         indexed="true"
> > required="true"  stored="false"  multiValued="false" docValues="true" />
> >     <field name="CC_FILE_PATH"       type="string"         indexed="true"
> > required="true"  stored="true"   multiValued="false" docValues="true" />
> >
> > And here is my handler:
> >
> > {"requestHandler":{"/select_test":{
> >       "class":"solr.SearchHandler",
> >       "name":"/select_test",
> >       "defaults":{
> >         "defType":"edismax",
> >         "echoParams":"explicit",
> >         "fl":"CC_UNIQUE_FIELD,CC_FILE_PATH,score",
> >         "indent":"true",
> >         "qf":"CC_ALL_FIELDS_DATA",
> >         "rows":"10",
> >         "tie":"1.0",
> >         "wt":"xml"}}}}
> >
> > What am I dong wrong?
> >
> > Steven
> >
>
>
>
> --
> Sincerely yours
> Mikhail Khludnev
>
Reply | Threaded
Open this post in threaded view
|

Re: Phrase search with Solr 7.2

Steven White
Please ignore this.  It was a user error.  I was pointing to the wrong
analyzer in my app's cfg file.

Steve

On Mon, Mar 26, 2018 at 10:17 AM, Steven White <[hidden email]> wrote:

> Setting "sow=true" didn't make a difference.
>
> Here is what I'm using now: http://localhost:8983/
> solr/ccfts/select_test?q=%22record%20type%20session%22&
> wt=json&indent=true&sow=true&debugQuery=true
>
> And here is the output:
>
> {
>   "responseHeader":{
>     "status":0,
>     "QTime":1,
>     "params":{
>       "q":"\"record type session\"",
>       "indent":"true",
>       "sow":"true",
>       "wt":"json",
>       "debugQuery":"true"}},
>   "response":{"numFound":0,"start":0,"maxScore":0.0,"docs":[]
>   },
>   "debug":{
>     "rawquerystring":"\"record type session\"",
>     "querystring":"\"record type session\"",
>     "parsedquery":"+DisjunctionMaxQuery((CC_ALL_FIELDS_DATA:\"record type session\")~1.0)",
>     "parsedquery_toString":"+(CC_ALL_FIELDS_DATA:\"record type session\")~1.0",
>     "explain":{},
>     "QParser":"ExtendedDismaxQParser",
>     "altquerystring":null,
>     "boost_queries":null,
>     "parsed_boost_queries":[],
>     "boostfuncs":null,
>     "timing":{
>       "time":1.0,
>       "prepare":{
>         "time":1.0,
>         "query":{
>           "time":0.0},
>         "facet":{
>           "time":0.0},
>         "facet_module":{
>           "time":0.0},
>         "mlt":{
>           "time":0.0},
>         "highlight":{
>           "time":0.0},
>         "stats":{
>           "time":0.0},
>         "expand":{
>           "time":0.0},
>         "terms":{
>           "time":0.0},
>         "debug":{
>           "time":0.0}},
>       "process":{
>         "time":0.0,
>         "query":{
>           "time":0.0},
>         "facet":{
>           "time":0.0},
>         "facet_module":{
>           "time":0.0},
>         "mlt":{
>           "time":0.0},
>         "highlight":{
>           "time":0.0},
>         "stats":{
>           "time":0.0},
>         "expand":{
>           "time":0.0},
>         "terms":{
>           "time":0.0},
>         "debug":{
>           "time":0.0}}}}}
>
>
> How do I debug this?
>
> Steve
>
> On Mon, Mar 26, 2018 at 12:50 AM, Mikhail Khludnev <[hidden email]>
> wrote:
>
>> Hello, Steven.
>>
>> Have you tried sow=true?
>> see
>> https://lucene.apache.org/solr/guide/7_2/the-extended-dismax
>> -query-parser.html
>>
>>
>> Anyway, you can start from debugQuery=true, then try to explore
>> explainOther, and get to Analysis page after all.
>>
>> On Mon, Mar 26, 2018 at 3:10 AM, Steven White <[hidden email]>
>> wrote:
>>
>> > Hi everyone,
>> >
>> > I switched over from Solr 5.2.1 to 7.2.1 other than re-indexing my data
>> and
>> > schema design remain the same.
>> >
>> > The issue I see now is I'm getting 0 hits on phrase searches, why?
>> >
>> > Here is the query I'm sending that gives me 0 hits:
>> >
>> > http://localhost:8983/solr/ccfts/select_test?q=%22cat+
>> > dog%22&wt=json&indent=true
>> >
>> > But this query will give me hits:
>> >
>> > http://localhost:8983/solr/ccfts/select_test?q=cat+dog&wt=
>> json&indent=true
>> >
>> > Here is my schema:
>> >
>> > <fieldType name="analyzer_text" class="solr.TextField"
>> > positionIncrementGap="100" autoGeneratePhraseQueries="true">
>> >        <analyzer>
>> >   <tokenizer class="solr.WhitespaceTokenizerFactory"/>
>> >   <filter class="solr.SynonymFilterFactory" expand="true"
>> > synonyms="synonyms.txt" ignoreCase="true"/>
>> >   <filter class="solr.WordDelimiterFilterFactory" preserveOriginal="0"
>> > generateNumberParts="1" splitOnCaseChange="0" catenateWords="1"
>> > splitOnNumerics="1" stemEnglishPossessive="1" generateWordParts="1"
>> > catenateAll="1" catenateNumbers="1"/>
>> >   <filter class="solr.StopFilterFactory" words="lang/stopwords_en.txt"
>> > ignoreCase="true"/>
>> >   <filter class="solr.LowerCaseFilterFactory"/>
>> >   <filter class="solr.EnglishPossessiveFilterFactory"/>
>> >   <filter class="solr.KeywordMarkerFilterFactory"
>> > protected="protwords.txt"/>
>> >   <filter class="solr.PorterStemFilterFactory"/>
>> >   <filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
>> >        </analyzer>
>> >     </fieldType>
>> >
>> > Here are my fields:
>> >
>> > <field name=" analyzer _text"     type="text_general"   indexed="true"
>> > required="false" stored="false"  multiValued="true" />
>> >     <field name="CC_UNIQUE_FIELD"    type="string"
>>  indexed="true"
>> > required="true"  stored="true"   multiValued="false" />
>> > <field name="CC_ALL_FIELDS_DATA" type=" analyzer_text" indexed="true"
>> > required="false" stored="false"  multiValued="true" />
>> >     <field name="CC_COMPONENT_NAME"  type="string"
>>  indexed="true"
>> > required="true"  stored="false"  multiValued="false" docValues="true" />
>> >     <field name="CC_FILE_EXT"        type="string"
>>  indexed="true"
>> > required="true"  stored="false"  multiValued="false" docValues="true" />
>> >     <field name="CC_FILE_NAME"       type="string"
>>  indexed="true"
>> > required="true"  stored="false"  multiValued="false" docValues="true" />
>> >     <field name="CC_FILE_PATH"       type="string"
>>  indexed="true"
>> > required="true"  stored="true"   multiValued="false" docValues="true" />
>> >
>> > And here is my handler:
>> >
>> > {"requestHandler":{"/select_test":{
>> >       "class":"solr.SearchHandler",
>> >       "name":"/select_test",
>> >       "defaults":{
>> >         "defType":"edismax",
>> >         "echoParams":"explicit",
>> >         "fl":"CC_UNIQUE_FIELD,CC_FILE_PATH,score",
>> >         "indent":"true",
>> >         "qf":"CC_ALL_FIELDS_DATA",
>> >         "rows":"10",
>> >         "tie":"1.0",
>> >         "wt":"xml"}}}}
>> >
>> > What am I dong wrong?
>> >
>> > Steven
>> >
>>
>>
>>
>> --
>> Sincerely yours
>> Mikhail Khludnev
>>
>
>