Child=true does not work for data import handler

Previous Topic Next Topic
 
classic Classic list List threaded Threaded
9 messages Options
Reply | Threaded
Open this post in threaded view
|

Child=true does not work for data import handler

omprab@rediffmail.com
I am using similar db-data config as below for indexing this parent-child
data. solr version 6.6.2

SELECT   id as emp_id,   name FROM emp;
+--------+--------+
| emp_id | name   |
+--------+--------+
|      1 | omkar  |
|      2 | ashwin |
+--------+--------+
2 rows in set (0.00 sec)

select  * from emp_details ;
+------+--------+-------+
| id   | emp_id | dept  |
+------+--------+-------+
|    1 |      1 | IT    |
|    2 |      1 | Data  |
|    3 |      2 | ITI   |
|    4 |      2 | Entry |
+------+--------+-------+
4 rows in set (0.00 sec)

<dataConfig>
  <dataSource type="JdbcDataSource" name="ds-1"
              driver="com.mysql.jdbc.Driver"
              url="jdbc:mysql://localhost:3306/test"
              user="root"
              password=""
                          session.group_concat_max_len = '70000'
                          />
                         
   <document>
   
    <entity name="parent" datasource="ds-1" pk="id"
transformer="RegexTransformer"
             query=" SELECT   id, name FROM  emp">

                                <field column="id" name="id" />
                                <field column="name" name="name" />
                               
                                 <entity  name='child' query="select id,dept from emp_details where
emp_id = '${parent.id}'">
                                                <field column="dept" name="dept" />
                                 </entity>
                 
        </entity>

                  </document>
                 
        </dataConfig>



{
  "responseHeader":{
    "status":0,
    "QTime":0,
    "params":{
      "q":"*:*",
      "indent":"on",
      "wt":"json",
      "_":"1533325469162"}},
  "response":{"numFound":2,"start":0,"docs":[
      {
        "name":"omkar",
        "id":"1",
        "dept":"IT",
        "_version_":1607809693975052288},
      {
        "name":"ashwin",
        "id":"2",
        "dept":"ITI",
        "_version_":1607809693978198016}]
  }}


I am expecting multi child documents. so i added child=true

 <entity  name='child' child='true' query="select id,dept from emp_details
where emp_id = '${parent.id}'">

but output of indexing is as below and it does not process any doucment

Indexing completed. Added/Updated: 0 documents. Deleted 0 documents.
Requests: 3 , Fetched: 6 , Skipped: 0 , Processed: 0
Started: less than a minute ago

can you helping me if there is any issue with db or solr config




--
Sent from: http://lucene.472066.n3.nabble.com/Solr-User-f472068.html
Reply | Threaded
Open this post in threaded view
|

Re: Child=true does not work for data import handler

Mikhail Khludnev-2
Hi, Omkar.

Could it happen that child docs as well as parents are assigned same "id"
field values implicitly and removed due to uniqueKey collision?

On Sat, Aug 4, 2018 at 10:12 PM [hidden email] <
[hidden email]> wrote:

> I am using similar db-data config as below for indexing this parent-child
> data. solr version 6.6.2
>
> SELECT   id as emp_id,   name FROM emp;
> +--------+--------+
> | emp_id | name   |
> +--------+--------+
> |      1 | omkar  |
> |      2 | ashwin |
> +--------+--------+
> 2 rows in set (0.00 sec)
>
> select  * from emp_details ;
> +------+--------+-------+
> | id   | emp_id | dept  |
> +------+--------+-------+
> |    1 |      1 | IT    |
> |    2 |      1 | Data  |
> |    3 |      2 | ITI   |
> |    4 |      2 | Entry |
> +------+--------+-------+
> 4 rows in set (0.00 sec)
>
> <dataConfig>
>   <dataSource type="JdbcDataSource" name="ds-1"
>               driver="com.mysql.jdbc.Driver"
>               url="jdbc:mysql://localhost:3306/test"
>               user="root"
>               password=""
>                           session.group_concat_max_len = '70000'
>                           />
>
>    <document>
>
>     <entity name="parent" datasource="ds-1" pk="id"
> transformer="RegexTransformer"
>              query=" SELECT   id, name FROM  emp">
>
>                                 <field column="id" name="id" />
>                                 <field column="name" name="name" />
>
>                                  <entity  name='child' query="select
> id,dept from emp_details where
> emp_id = '${parent.id}'">
>                                                 <field column="dept"
> name="dept" />
>                                  </entity>
>
>         </entity>
>
>                   </document>
>
>         </dataConfig>
>
>
>
> {
>   "responseHeader":{
>     "status":0,
>     "QTime":0,
>     "params":{
>       "q":"*:*",
>       "indent":"on",
>       "wt":"json",
>       "_":"1533325469162"}},
>   "response":{"numFound":2,"start":0,"docs":[
>       {
>         "name":"omkar",
>         "id":"1",
>         "dept":"IT",
>         "_version_":1607809693975052288},
>       {
>         "name":"ashwin",
>         "id":"2",
>         "dept":"ITI",
>         "_version_":1607809693978198016}]
>   }}
>
>
> I am expecting multi child documents. so i added child=true
>
>  <entity  name='child' child='true' query="select id,dept from emp_details
> where emp_id = '${parent.id}'">
>
> but output of indexing is as below and it does not process any doucment
>
> Indexing completed. Added/Updated: 0 documents. Deleted 0 documents.
> Requests: 3 , Fetched: 6 , Skipped: 0 , Processed: 0
> Started: less than a minute ago
>
> can you helping me if there is any issue with db or solr config
>
>
>
>
> --
> Sent from: http://lucene.472066.n3.nabble.com/Solr-User-f472068.html
>


--
Sincerely yours
Mikhail Khludnev
Reply | Threaded
Open this post in threaded view
|

Re: Child=true does not work for data import handler

omprab@rediffmail.com
Thanks Mikhail, i tried changing conf but that did not help

<dataConfig>
  <dataSource type="JdbcDataSource" name="ds-1"
              driver="com.mysql.jdbc.Driver"
              url="jdbc:mysql://localhost:3306/test"
              user="root"
              password=""
                          session.group_concat_max_len = '70000'
                          />
                         
   <document>
   
    <entity name="parent" datasource="ds-1" pk="id"
transformer="RegexTransformer"
             query="SELECT id,name FROM emp">
                                <field column="id" name="id" />
                                <field column="name" name="name" />
                               
                         <entity  name="child"  query="select id as childpk,dept from emp_details
where emp_id = '${parent.id}'">
                                                <field column="dept" name="dept" />
                                                <field column="childpk" name="childpk" />
                                 </entity>
                 
        </entity>

                  </document>
                 
        </dataConfig>




--
Sent from: http://lucene.472066.n3.nabble.com/Solr-User-f472068.html
Reply | Threaded
Open this post in threaded view
|

Re: Child=true does not work for data import handler

Mikhail Khludnev-2
DIH has debug&verbose modes. Have you tried to use them?

On Mon, Aug 6, 2018 at 4:11 PM [hidden email] <[hidden email]>
wrote:

> Thanks Mikhail, i tried changing conf but that did not help
>
> <dataConfig>
>   <dataSource type="JdbcDataSource" name="ds-1"
>               driver="com.mysql.jdbc.Driver"
>               url="jdbc:mysql://localhost:3306/test"
>               user="root"
>               password=""
>                           session.group_concat_max_len = '70000'
>                           />
>
>    <document>
>
>     <entity name="parent" datasource="ds-1" pk="id"
> transformer="RegexTransformer"
>              query="SELECT id,name      FROM emp">
>                                 <field column="id" name="id" />
>                                 <field column="name" name="name" />
>
>                          <entity  name="child"  query="select id as
> childpk,dept from emp_details
> where emp_id = '${parent.id}'">
>                                                 <field column="dept"
> name="dept" />
>                                                 <field column="childpk"
> name="childpk" />
>                                  </entity>
>
>         </entity>
>
>                   </document>
>
>         </dataConfig>
>
>
>
>
> --
> Sent from: http://lucene.472066.n3.nabble.com/Solr-User-f472068.html
>


--
Sincerely yours
Mikhail Khludnev
Reply | Threaded
Open this post in threaded view
|

Re: Child=true does not work for data import handler

omprab@rediffmail.com
Thanks Mikhail verbose did help. _root_ field was missing in schema also in
make some changes in child entity. Like i created id as alias to emp_id ( in
child query) which is id column of parent table.

                <entity name="parent" pk="id" datasource="ds-1"  query="SELECT id,name
FROM emp">
                                <field column="id" name="id" />
                                <field column="name" name="name" />
                                        <entity child='true'  name="child"  query="SELECT dept,emp_id as id
FROM emp_details where emp_id='${parent.id}' ">
                                                        <field column="dept" name="dept" />
                                        </entity>
                </entity>


Data seems to be returning correctly as below. but it show child documents
and parent documents are shown as individual document. i was expecting 2
documents and 2 child document for each doc.
Any inputs will be helpful


 "response":{"numFound":6,"start":0,"docs":[
      {
        "dept":"IT",
        "id":"1",
        "_version_":1608073809653399552},
      {
        "dept":"Data",
        "id":"1",
        "_version_":1608073809653399552},
      {
        "name":"omkar",
        "id":"1",
        "_version_":1608073809653399552},
      {
        "dept":"ITI",
        "id":"2",
        "_version_":1608073809667031040},
      {
        "dept":"Entry",
        "id":"2",
        "_version_":1608073809667031040},
      {
        "name":"ashwin",
        "id":"2",
        "_version_":1608073809667031040}]
  }}



--
Sent from: http://lucene.472066.n3.nabble.com/Solr-User-f472068.html
Reply | Threaded
Open this post in threaded view
|

Re: Child=true does not work for data import handler

Mikhail Khludnev-2
It never works like you expect. You need to search for parents and then
hook up [child]. I see some improvements are coming, but now that is.

On Mon, Aug 6, 2018 at 9:11 PM [hidden email] <[hidden email]>
wrote:

> Thanks Mikhail verbose did help. _root_ field was missing in schema also in
> make some changes in child entity. Like i created id as alias to emp_id (
> in
> child query) which is id column of parent table.
>
>                 <entity name="parent" pk="id" datasource="ds-1"
> query="SELECT id,name
> FROM emp">
>                                 <field column="id" name="id" />
>                                 <field column="name" name="name" />
>                                         <entity child='true'
> name="child"  query="SELECT dept,emp_id as id
> FROM emp_details where emp_id='${parent.id}' ">
>                                                         <field
> column="dept" name="dept" />
>                                         </entity>
>                 </entity>
>
>
> Data seems to be returning correctly as below. but it show child documents
> and parent documents are shown as individual document. i was expecting 2
> documents and 2 child document for each doc.
> Any inputs will be helpful
>
>
>  "response":{"numFound":6,"start":0,"docs":[
>       {
>         "dept":"IT",
>         "id":"1",
>         "_version_":1608073809653399552},
>       {
>         "dept":"Data",
>         "id":"1",
>         "_version_":1608073809653399552},
>       {
>         "name":"omkar",
>         "id":"1",
>         "_version_":1608073809653399552},
>       {
>         "dept":"ITI",
>         "id":"2",
>         "_version_":1608073809667031040},
>       {
>         "dept":"Entry",
>         "id":"2",
>         "_version_":1608073809667031040},
>       {
>         "name":"ashwin",
>         "id":"2",
>         "_version_":1608073809667031040}]
>   }}
>
>
>
> --
> Sent from: http://lucene.472066.n3.nabble.com/Solr-User-f472068.html
>


--
Sincerely yours
Mikhail Khludnev
Reply | Threaded
Open this post in threaded view
|

Re: Child=true does not work for data import handler

omprab@rediffmail.com
Thanks a lot Mikhail. But as per documentation below nested document
ingestion is possible. Is this limitation of DIH?

https://lucene.apache.org/solr/guide/6_6/uploading-data-with-index-handlers.html#UploadingDatawithIndexHandlers-NestedChildDocuments


Also can block join query be used to get expect relationship for data i have
ingested using DIH?



--
Sent from: http://lucene.472066.n3.nabble.com/Solr-User-f472068.html
Reply | Threaded
Open this post in threaded view
|

Re: Child=true does not work for data import handler

Mikhail Khludnev-2
This is how nested docs look like. These are document blocks with parent in
the end. Block Join Queries work on these blocks.

On Wed, Aug 8, 2018 at 12:47 PM [hidden email] <
[hidden email]> wrote:

> Thanks a lot Mikhail. But as per documentation below nested document
> ingestion is possible. Is this limitation of DIH?
>
>
> https://lucene.apache.org/solr/guide/6_6/uploading-data-with-index-handlers.html#UploadingDatawithIndexHandlers-NestedChildDocuments
>
>
> Also can block join query be used to get expect relationship for data i
> have
> ingested using DIH?
>
>
>
> --
> Sent from: http://lucene.472066.n3.nabble.com/Solr-User-f472068.html
>


--
Sincerely yours
Mikhail Khludnev
Reply | Threaded
Open this post in threaded view
|

Re: Child=true does not work for data import handler

omprab@rediffmail.com
But in my case i see output as below

<lst name="responseHeader">
  <int name="status">0</int>
  <int name="QTime">0</int>
  <lst name="params">
    <str name="q">*:*</str>
    <str name="indent">on</str>
    <str name="wt">xml</str>
    <str name="_">1533734431931</str>
  </lst>
</lst>
<result name="response" numFound="6" start="0">
  <doc>
    <str name="dept">IT</str>
    <str name="id">1</str>
    <str name="dept_id">1</str>
    <long name="_version_">1608130338704326656</long></doc>
  <doc>
    <str name="dept">Data</str>
    <str name="id">1</str>
    <str name="dept_id">2</str>
    <long name="_version_">1608130338704326656</long></doc>
  <doc>
    <str name="name">omkar</str>
    <str name="id">1</str>
    <long name="_version_">1608130338704326656</long></doc>
  <doc>
    <str name="dept">ITI</str>
    <str name="id">2</str>
    <str name="dept_id">3</str>
    <long name="_version_">1608130338712715264</long></doc>
  <doc>
    <str name="dept">Entry</str>
    <str name="id">2</str>
    <str name="dept_id">4</str>
    <long name="_version_">1608130338712715264</long></doc>
  <doc>
    <str name="name">ashwin</str>
    <str name="id">2</str>
    <long name="_version_">1608130338712715264</long></doc>
</result>
</response>



--
Sent from: http://lucene.472066.n3.nabble.com/Solr-User-f472068.html