[GitHub] lucene-solr pull request #455: SOLR-12638

classic Classic list List threaded Threaded
35 messages Options
12
Reply | Threaded
Open this post in threaded view
|

[GitHub] lucene-solr pull request #455: SOLR-12638

moshebla
Github user dsmiley commented on a diff in the pull request:

    https://github.com/apache/lucene-solr/pull/455#discussion_r224130315
 
    --- Diff: solr/core/src/test/org/apache/solr/update/processor/AtomicUpdateBlockTest.java ---
    @@ -0,0 +1,370 @@
    +/*
    + * Licensed to the Apache Software Foundation (ASF) under one or more
    + * contributor license agreements.  See the NOTICE file distributed with
    + * this work for additional information regarding copyright ownership.
    + * The ASF licenses this file to You under the Apache License, Version 2.0
    + * (the "License"); you may not use this file except in compliance with
    + * the License.  You may obtain a copy of the License at
    + *
    + *     http://www.apache.org/licenses/LICENSE-2.0
    + *
    + * Unless required by applicable law or agreed to in writing, software
    + * distributed under the License is distributed on an "AS IS" BASIS,
    + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    + * See the License for the specific language governing permissions and
    + * limitations under the License.
    + */
    +
    +package org.apache.solr.update.processor;
    +
    +import java.util.Arrays;
    +import java.util.Collection;
    +import java.util.Collections;
    +import java.util.List;
    +
    +import org.apache.lucene.util.BytesRef;
    +import org.apache.solr.SolrTestCaseJ4;
    +import org.apache.solr.common.SolrInputDocument;
    +import org.apache.solr.common.SolrInputField;
    +import org.apache.solr.core.SolrCore;
    +import org.apache.solr.handler.component.RealTimeGetComponent;
    +import org.junit.AfterClass;
    +import org.junit.Before;
    +import org.junit.BeforeClass;
    +import org.junit.Test;
    +
    +public class AtomicUpdateBlockTest extends SolrTestCaseJ4 {
    +
    +  private final static String VERSION = "_version_";
    +  private static String PREVIOUS_ENABLE_UPDATE_LOG_VALUE;
    +
    +  @BeforeClass
    +  public static void beforeTests() throws Exception {
    +    PREVIOUS_ENABLE_UPDATE_LOG_VALUE = System.getProperty("enable.update.log");
    +    System.setProperty("enable.update.log", "true");
    +    initCore("solrconfig-block-atomic-update.xml", "schema-nest.xml"); // use "nest" schema
    +  }
    +
    +  @AfterClass
    +  public static void afterTests() throws Exception {
    +    // restore enable.update.log
    +    System.setProperty("enable.update.log", PREVIOUS_ENABLE_UPDATE_LOG_VALUE);
    +  }
    +
    +  @Before
    +  public void before() {
    +    clearIndex();
    +    assertU(commit());
    +  }
    +
    +  @Test
    +  public void testMergeChildDoc() throws Exception {
    +    SolrInputDocument doc = new SolrInputDocument();
    +    doc.setField("id", "1");
    +    doc.setField("cat_ss", new String[]{"aaa", "ccc"});
    +    doc.setField("child", Collections.singletonList(sdoc("id", "2", "cat_ss", "child")));
    +    addDoc(adoc(doc), "nested-rtg");
    +
    +    BytesRef rootDocId = new BytesRef("1");
    +    SolrCore core = h.getCore();
    +    SolrInputDocument block = RealTimeGetComponent.getInputDocument(core, rootDocId, true);
    +    // assert block doc has child docs
    +    assertTrue(block.containsKey("child"));
    +
    +    assertJQ(req("q","id:1")
    +        ,"/response/numFound==0"
    +    );
    +
    +    // commit the changes
    +    assertU(commit());
    +
    +    SolrInputDocument newChildDoc = sdoc("id", "3", "cat_ss", "child");
    +    SolrInputDocument addedDoc = sdoc("id", "1",
    +        "cat_ss", Collections.singletonMap("add", "bbb"),
    +        "child", Collections.singletonMap("add", sdocs(newChildDoc)));
    +    block = RealTimeGetComponent.getInputDocument(core, rootDocId, true);
    +    block.removeField(VERSION);
    +    SolrInputDocument preMergeDoc = new SolrInputDocument(block);
    +    AtomicUpdateDocumentMerger docMerger = new AtomicUpdateDocumentMerger(req());
    +    docMerger.merge(addedDoc, block);
    --- End diff --
   
    It seems the point of this test is to test AtomicUpdateDocumentMerger.merge?  Then why even index anything at all (the first half of this test)?  Simply create the documents directly and call the merge method and test the result.


---

---------------------------------------------------------------------
To unsubscribe, e-mail: [hidden email]
For additional commands, e-mail: [hidden email]

Reply | Threaded
Open this post in threaded view
|

[GitHub] lucene-solr pull request #455: SOLR-12638

moshebla
In reply to this post by moshebla
Github user dsmiley commented on a diff in the pull request:

    https://github.com/apache/lucene-solr/pull/455#discussion_r224131263
 
    --- Diff: solr/core/src/test/org/apache/solr/update/processor/AtomicUpdateBlockTest.java ---
    @@ -0,0 +1,370 @@
    +/*
    + * Licensed to the Apache Software Foundation (ASF) under one or more
    + * contributor license agreements.  See the NOTICE file distributed with
    + * this work for additional information regarding copyright ownership.
    + * The ASF licenses this file to You under the Apache License, Version 2.0
    + * (the "License"); you may not use this file except in compliance with
    + * the License.  You may obtain a copy of the License at
    + *
    + *     http://www.apache.org/licenses/LICENSE-2.0
    + *
    + * Unless required by applicable law or agreed to in writing, software
    + * distributed under the License is distributed on an "AS IS" BASIS,
    + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    + * See the License for the specific language governing permissions and
    + * limitations under the License.
    + */
    +
    +package org.apache.solr.update.processor;
    +
    +import java.util.Arrays;
    +import java.util.Collection;
    +import java.util.Collections;
    +import java.util.List;
    +
    +import org.apache.lucene.util.BytesRef;
    +import org.apache.solr.SolrTestCaseJ4;
    +import org.apache.solr.common.SolrInputDocument;
    +import org.apache.solr.common.SolrInputField;
    +import org.apache.solr.core.SolrCore;
    +import org.apache.solr.handler.component.RealTimeGetComponent;
    +import org.junit.AfterClass;
    +import org.junit.Before;
    +import org.junit.BeforeClass;
    +import org.junit.Test;
    +
    +public class AtomicUpdateBlockTest extends SolrTestCaseJ4 {
    +
    +  private final static String VERSION = "_version_";
    +  private static String PREVIOUS_ENABLE_UPDATE_LOG_VALUE;
    +
    +  @BeforeClass
    +  public static void beforeTests() throws Exception {
    +    PREVIOUS_ENABLE_UPDATE_LOG_VALUE = System.getProperty("enable.update.log");
    +    System.setProperty("enable.update.log", "true");
    +    initCore("solrconfig-block-atomic-update.xml", "schema-nest.xml"); // use "nest" schema
    +  }
    +
    +  @AfterClass
    +  public static void afterTests() throws Exception {
    +    // restore enable.update.log
    +    System.setProperty("enable.update.log", PREVIOUS_ENABLE_UPDATE_LOG_VALUE);
    +  }
    +
    +  @Before
    +  public void before() {
    +    clearIndex();
    +    assertU(commit());
    +  }
    +
    +  @Test
    +  public void testMergeChildDoc() throws Exception {
    +    SolrInputDocument doc = new SolrInputDocument();
    +    doc.setField("id", "1");
    +    doc.setField("cat_ss", new String[]{"aaa", "ccc"});
    +    doc.setField("child", Collections.singletonList(sdoc("id", "2", "cat_ss", "child")));
    +    addDoc(adoc(doc), "nested-rtg");
    +
    +    BytesRef rootDocId = new BytesRef("1");
    +    SolrCore core = h.getCore();
    +    SolrInputDocument block = RealTimeGetComponent.getInputDocument(core, rootDocId, true);
    +    // assert block doc has child docs
    +    assertTrue(block.containsKey("child"));
    +
    +    assertJQ(req("q","id:1")
    +        ,"/response/numFound==0"
    +    );
    +
    +    // commit the changes
    +    assertU(commit());
    +
    +    SolrInputDocument newChildDoc = sdoc("id", "3", "cat_ss", "child");
    +    SolrInputDocument addedDoc = sdoc("id", "1",
    +        "cat_ss", Collections.singletonMap("add", "bbb"),
    +        "child", Collections.singletonMap("add", sdocs(newChildDoc)));
    +    block = RealTimeGetComponent.getInputDocument(core, rootDocId, true);
    +    block.removeField(VERSION);
    +    SolrInputDocument preMergeDoc = new SolrInputDocument(block);
    +    AtomicUpdateDocumentMerger docMerger = new AtomicUpdateDocumentMerger(req());
    +    docMerger.merge(addedDoc, block);
    +    assertEquals("merged document should have the same id", preMergeDoc.getFieldValue("id"), block.getFieldValue("id"));
    +    assertDocContainsSubset(preMergeDoc, block);
    +    assertDocContainsSubset(addedDoc, block);
    +    assertDocContainsSubset(newChildDoc, (SolrInputDocument) ((List) block.getFieldValues("child")).get(1));
    +    assertEquals(doc.getFieldValue("id"), block.getFieldValue("id"));
    +  }
    +
    +  @Test
    +  public void testBlockAtomicAdd() throws Exception {
    +
    +    SolrInputDocument doc = sdoc("id", "1",
    +        "cat_ss", new String[] {"aaa", "ccc"},
    +        "child1", sdoc("id", "2", "cat_ss", "child")
    --- End diff --
   
    I think it'd be easier to comprehend tests involving nested documents if the ID for a nested document somehow looked different.  For example, for this child doc, do "1.1" to mean the first child doc of parent doc 1.  Second would be "1.2".  WDYT?


---

---------------------------------------------------------------------
To unsubscribe, e-mail: [hidden email]
For additional commands, e-mail: [hidden email]

Reply | Threaded
Open this post in threaded view
|

[GitHub] lucene-solr pull request #455: SOLR-12638

moshebla
In reply to this post by moshebla
Github user dsmiley commented on a diff in the pull request:

    https://github.com/apache/lucene-solr/pull/455#discussion_r224133917
 
    --- Diff: solr/core/src/test/org/apache/solr/update/processor/AtomicUpdateBlockTest.java ---
    @@ -0,0 +1,370 @@
    +/*
    + * Licensed to the Apache Software Foundation (ASF) under one or more
    + * contributor license agreements.  See the NOTICE file distributed with
    + * this work for additional information regarding copyright ownership.
    + * The ASF licenses this file to You under the Apache License, Version 2.0
    + * (the "License"); you may not use this file except in compliance with
    + * the License.  You may obtain a copy of the License at
    + *
    + *     http://www.apache.org/licenses/LICENSE-2.0
    + *
    + * Unless required by applicable law or agreed to in writing, software
    + * distributed under the License is distributed on an "AS IS" BASIS,
    + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    + * See the License for the specific language governing permissions and
    + * limitations under the License.
    + */
    +
    +package org.apache.solr.update.processor;
    +
    +import java.util.Arrays;
    +import java.util.Collection;
    +import java.util.Collections;
    +import java.util.List;
    +
    +import org.apache.lucene.util.BytesRef;
    +import org.apache.solr.SolrTestCaseJ4;
    +import org.apache.solr.common.SolrInputDocument;
    +import org.apache.solr.common.SolrInputField;
    +import org.apache.solr.core.SolrCore;
    +import org.apache.solr.handler.component.RealTimeGetComponent;
    +import org.junit.AfterClass;
    +import org.junit.Before;
    +import org.junit.BeforeClass;
    +import org.junit.Test;
    +
    +public class AtomicUpdateBlockTest extends SolrTestCaseJ4 {
    +
    +  private final static String VERSION = "_version_";
    +  private static String PREVIOUS_ENABLE_UPDATE_LOG_VALUE;
    +
    +  @BeforeClass
    +  public static void beforeTests() throws Exception {
    +    PREVIOUS_ENABLE_UPDATE_LOG_VALUE = System.getProperty("enable.update.log");
    +    System.setProperty("enable.update.log", "true");
    +    initCore("solrconfig-block-atomic-update.xml", "schema-nest.xml"); // use "nest" schema
    +  }
    +
    +  @AfterClass
    +  public static void afterTests() throws Exception {
    +    // restore enable.update.log
    +    System.setProperty("enable.update.log", PREVIOUS_ENABLE_UPDATE_LOG_VALUE);
    +  }
    +
    +  @Before
    +  public void before() {
    +    clearIndex();
    +    assertU(commit());
    +  }
    +
    +  @Test
    +  public void testMergeChildDoc() throws Exception {
    +    SolrInputDocument doc = new SolrInputDocument();
    +    doc.setField("id", "1");
    +    doc.setField("cat_ss", new String[]{"aaa", "ccc"});
    +    doc.setField("child", Collections.singletonList(sdoc("id", "2", "cat_ss", "child")));
    +    addDoc(adoc(doc), "nested-rtg");
    +
    +    BytesRef rootDocId = new BytesRef("1");
    +    SolrCore core = h.getCore();
    +    SolrInputDocument block = RealTimeGetComponent.getInputDocument(core, rootDocId, true);
    +    // assert block doc has child docs
    +    assertTrue(block.containsKey("child"));
    +
    +    assertJQ(req("q","id:1")
    +        ,"/response/numFound==0"
    +    );
    +
    +    // commit the changes
    +    assertU(commit());
    +
    +    SolrInputDocument newChildDoc = sdoc("id", "3", "cat_ss", "child");
    +    SolrInputDocument addedDoc = sdoc("id", "1",
    +        "cat_ss", Collections.singletonMap("add", "bbb"),
    +        "child", Collections.singletonMap("add", sdocs(newChildDoc)));
    +    block = RealTimeGetComponent.getInputDocument(core, rootDocId, true);
    +    block.removeField(VERSION);
    +    SolrInputDocument preMergeDoc = new SolrInputDocument(block);
    +    AtomicUpdateDocumentMerger docMerger = new AtomicUpdateDocumentMerger(req());
    +    docMerger.merge(addedDoc, block);
    +    assertEquals("merged document should have the same id", preMergeDoc.getFieldValue("id"), block.getFieldValue("id"));
    +    assertDocContainsSubset(preMergeDoc, block);
    +    assertDocContainsSubset(addedDoc, block);
    +    assertDocContainsSubset(newChildDoc, (SolrInputDocument) ((List) block.getFieldValues("child")).get(1));
    +    assertEquals(doc.getFieldValue("id"), block.getFieldValue("id"));
    +  }
    +
    +  @Test
    +  public void testBlockAtomicAdd() throws Exception {
    +
    +    SolrInputDocument doc = sdoc("id", "1",
    +        "cat_ss", new String[] {"aaa", "ccc"},
    +        "child1", sdoc("id", "2", "cat_ss", "child")
    +    );
    +    json(doc);
    +    addDoc(adoc(doc), "nested-rtg");
    --- End diff --
   
    Maybe the default chain in this config should have those URPs, and therefore we wouldn't need to have the tests refer to the URP chain.


---

---------------------------------------------------------------------
To unsubscribe, e-mail: [hidden email]
For additional commands, e-mail: [hidden email]

Reply | Threaded
Open this post in threaded view
|

[GitHub] lucene-solr pull request #455: SOLR-12638

moshebla
In reply to this post by moshebla
Github user dsmiley commented on a diff in the pull request:

    https://github.com/apache/lucene-solr/pull/455#discussion_r224307768
 
    --- Diff: solr/core/src/java/org/apache/solr/update/processor/DistributedUpdateProcessor.java ---
    @@ -1360,24 +1385,47 @@ boolean getUpdatedDocument(AddUpdateCommand cmd, long versionOnUpdate) throws IO
         }
         
         // full (non-inplace) atomic update
    +    final boolean isNestedSchema = req.getSchema().isUsableForChildDocs();
         SolrInputDocument sdoc = cmd.getSolrInputDocument();
         BytesRef id = cmd.getIndexedId();
    -    SolrInputDocument oldDoc = RealTimeGetComponent.getInputDocument(cmd.getReq().getCore(), id);
    +    SolrInputDocument blockDoc = RealTimeGetComponent.getInputDocument(cmd.getReq().getCore(), id, null,
    +        false, null, true, true);
     
    -    if (oldDoc == null) {
    -      // create a new doc by default if an old one wasn't found
    -      if (versionOnUpdate <= 0) {
    -        oldDoc = new SolrInputDocument();
    -      } else {
    +    if (blockDoc == null) {
    +      if (versionOnUpdate > 0) {
             // could just let the optimistic locking throw the error
             throw new SolrException(ErrorCode.CONFLICT, "Document not found for update.  id=" + cmd.getPrintableId());
           }
         } else {
    -      oldDoc.remove(CommonParams.VERSION_FIELD);
    +      blockDoc.remove(CommonParams.VERSION_FIELD);
         }
     
     
    -    cmd.solrDoc = docMerger.merge(sdoc, oldDoc);
    +    SolrInputDocument mergedDoc;
    +    if(idField == null || blockDoc == null) {
    +      // create a new doc by default if an old one wasn't found
    +      mergedDoc = docMerger.merge(sdoc, new SolrInputDocument());
    +    } else {
    +      if(isNestedSchema && req.getSchema().hasExplicitField(IndexSchema.NEST_PATH_FIELD_NAME) &&
    +          blockDoc.containsKey(IndexSchema.ROOT_FIELD_NAME) && !id.utf8ToString().equals(blockDoc.getFieldValue(IndexSchema.ROOT_FIELD_NAME))) {
    +        SolrInputDocument oldDoc = RealTimeGetComponent.getInputDocument(cmd.getReq().getCore(), id, null,
    +            false, null, true, false);
    +        mergedDoc = docMerger.merge(sdoc, oldDoc);
    +        String docPath = (String) mergedDoc.getFieldValue(IndexSchema.NEST_PATH_FIELD_NAME);
    +        List<String> docPaths = StrUtils.splitSmart(docPath, PATH_SEP_CHAR);
    +        SolrInputField replaceDoc = blockDoc.getField(docPaths.remove(0).replaceAll(PATH_SEP_CHAR + "|" + NUM_SEP_CHAR, ""));
    --- End diff --
   
    The logic here (not just this line) is non-obvious to me.  There are no comments.  Please add comments and maybe refactor out a method.  The use of replaceAll with a regexp is suspicious to me.  None of the tests you added triggered a breakpoint within the docPaths loop below.  Needs more testing or possible error.


---

---------------------------------------------------------------------
To unsubscribe, e-mail: [hidden email]
For additional commands, e-mail: [hidden email]

Reply | Threaded
Open this post in threaded view
|

[GitHub] lucene-solr pull request #455: SOLR-12638

moshebla
In reply to this post by moshebla
Github user dsmiley commented on a diff in the pull request:

    https://github.com/apache/lucene-solr/pull/455#discussion_r223747455
 
    --- Diff: solr/core/src/java/org/apache/solr/update/processor/AtomicUpdateDocumentMerger.java ---
    @@ -442,5 +442,58 @@ protected void doRemoveRegex(SolrInputDocument toDoc, SolrInputField sif, Object
         }
         return patterns;
       }
    +
    +  private Object getNativeFieldValue(String fieldName, Object val) {
    +    if(isChildDoc(val)) {
    +      return val;
    +    }
    +    SchemaField sf = schema.getField(fieldName);
    +    return sf.getType().toNativeType(val);
    +  }
    +
    +  private static boolean isChildDoc(Object obj) {
    +    if(!(obj instanceof Collection)) {
    +      return obj instanceof SolrDocumentBase;
    +    }
    +    Collection objValues = (Collection) obj;
    +    if(objValues.size() == 0) {
    +      return false;
    +    }
    +    return objValues.iterator().next() instanceof SolrDocumentBase;
    +  }
    +
    +  private void removeObj(Collection original, Object toRemove, String fieldName) {
    +    if(isChildDoc(toRemove)) {
    +      removeChildDoc(original, (SolrInputDocument) toRemove);
    +    } else {
    +      original.remove(getNativeFieldValue(fieldName, toRemove));
    +    }
    +  }
    +
    +  private static void removeChildDoc(Collection original, SolrInputDocument docToRemove) {
    +    for(SolrInputDocument doc: (Collection<SolrInputDocument>) original) {
    +      if(isDerivedFromDoc(doc, docToRemove)) {
    +        original.remove(doc);
    +        return;
    +      }
    +    }
    +  }
    +
    +  /**
    +   *
    +   * @param fullDoc the document to be tested
    +   * @param subDoc the sub document that should be a subset of fullDoc
    +   * @return whether subDoc is a subset of fullDoc
    +   */
    +  private static boolean isDerivedFromDoc(SolrInputDocument fullDoc, SolrInputDocument subDoc) {
    +    for(SolrInputField subSif: subDoc) {
    +      String fieldName = subSif.getName();
    +      if(!fullDoc.containsKey(fieldName)) return false;
    +      Collection<Object> fieldValues = fullDoc.getFieldValues(fieldName);
    +      if(fieldValues.size() < subSif.getValueCount()) return false;
    +      if(!fullDoc.getFieldValues(fieldName).containsAll(subSif.getValues())) return false;
    --- End diff --
   
    `fullDoc.getFieldValues(fieldName)` on this line can be replaced by `fieldValues` since we already have the values.  And the previous line on the count is unnecessary since the containsAll check on this line would fail.


---

---------------------------------------------------------------------
To unsubscribe, e-mail: [hidden email]
For additional commands, e-mail: [hidden email]

Reply | Threaded
Open this post in threaded view
|

[GitHub] lucene-solr pull request #455: SOLR-12638

moshebla
In reply to this post by moshebla
Github user dsmiley commented on a diff in the pull request:

    https://github.com/apache/lucene-solr/pull/455#discussion_r224127559
 
    --- Diff: solr/core/src/test/org/apache/solr/update/processor/AtomicUpdateBlockTest.java ---
    @@ -0,0 +1,370 @@
    +/*
    + * Licensed to the Apache Software Foundation (ASF) under one or more
    + * contributor license agreements.  See the NOTICE file distributed with
    + * this work for additional information regarding copyright ownership.
    + * The ASF licenses this file to You under the Apache License, Version 2.0
    + * (the "License"); you may not use this file except in compliance with
    + * the License.  You may obtain a copy of the License at
    + *
    + *     http://www.apache.org/licenses/LICENSE-2.0
    + *
    + * Unless required by applicable law or agreed to in writing, software
    + * distributed under the License is distributed on an "AS IS" BASIS,
    + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    + * See the License for the specific language governing permissions and
    + * limitations under the License.
    + */
    +
    +package org.apache.solr.update.processor;
    +
    +import java.util.Arrays;
    +import java.util.Collection;
    +import java.util.Collections;
    +import java.util.List;
    +
    +import org.apache.lucene.util.BytesRef;
    +import org.apache.solr.SolrTestCaseJ4;
    +import org.apache.solr.common.SolrInputDocument;
    +import org.apache.solr.common.SolrInputField;
    +import org.apache.solr.core.SolrCore;
    +import org.apache.solr.handler.component.RealTimeGetComponent;
    +import org.junit.AfterClass;
    +import org.junit.Before;
    +import org.junit.BeforeClass;
    +import org.junit.Test;
    +
    +public class AtomicUpdateBlockTest extends SolrTestCaseJ4 {
    +
    +  private final static String VERSION = "_version_";
    +  private static String PREVIOUS_ENABLE_UPDATE_LOG_VALUE;
    +
    +  @BeforeClass
    +  public static void beforeTests() throws Exception {
    +    PREVIOUS_ENABLE_UPDATE_LOG_VALUE = System.getProperty("enable.update.log");
    +    System.setProperty("enable.update.log", "true");
    --- End diff --
   
    The `solrconfig-block-atomic-update.xml` file is not toggled by this system property (perhaps others are).  Why set this system property?


---

---------------------------------------------------------------------
To unsubscribe, e-mail: [hidden email]
For additional commands, e-mail: [hidden email]

Reply | Threaded
Open this post in threaded view
|

[GitHub] lucene-solr pull request #455: SOLR-12638

moshebla
In reply to this post by moshebla
Github user dsmiley commented on a diff in the pull request:

    https://github.com/apache/lucene-solr/pull/455#discussion_r224306158
 
    --- Diff: solr/core/src/java/org/apache/solr/handler/component/RealTimeGetComponent.java ---
    @@ -639,12 +650,30 @@ public static SolrInputDocument getInputDocument(SolrCore core, BytesRef idBytes
               sid = new SolrInputDocument();
             } else {
               Document luceneDocument = docFetcher.doc(docid);
    -          sid = toSolrInputDocument(luceneDocument, core.getLatestSchema());
    +          sid = toSolrInputDocument(luceneDocument, schema);
             }
    -        if (onlyTheseNonStoredDVs != null) {
    -          docFetcher.decorateDocValueFields(sid, docid, onlyTheseNonStoredDVs);
    -        } else {
    -          docFetcher.decorateDocValueFields(sid, docid, docFetcher.getNonStoredDVsWithoutCopyTargets());
    +        ensureDocDecorated(onlyTheseNonStoredDVs, sid, docid, docFetcher, resolveBlock || schema.hasExplicitField(IndexSchema.NEST_PATH_FIELD_NAME));
    +        SolrInputField rootField;
    +        if(resolveBlock && schema.isUsableForChildDocs() && (rootField = sid.getField(IndexSchema.ROOT_FIELD_NAME))!=null) {
    +          // doc is part of a nested structure
    +          ModifiableSolrParams params = new ModifiableSolrParams()
    +              .set("q", core.getLatestSchema().getUniqueKeyField().getName()+ ":" +rootField.getFirstValue())
    --- End diff --
   
    It seems the LocalSolrQueryRequest here is a dummy needed to satisfy some of the methods below.  This threw me; there should be comments and/or choice of var names (e.g. dummyReq) to reflect this.  "q" isn't needed; just the "fl".  It seems we don't even need the "fl" here since that can be supplied as the first parameter to SolrReturnFields, which seems better if it works.


---

---------------------------------------------------------------------
To unsubscribe, e-mail: [hidden email]
For additional commands, e-mail: [hidden email]

Reply | Threaded
Open this post in threaded view
|

[GitHub] lucene-solr pull request #455: SOLR-12638

moshebla
In reply to this post by moshebla
Github user dsmiley commented on a diff in the pull request:

    https://github.com/apache/lucene-solr/pull/455#discussion_r224300831
 
    --- Diff: solr/core/src/java/org/apache/solr/handler/component/RealTimeGetComponent.java ---
    @@ -639,12 +650,30 @@ public static SolrInputDocument getInputDocument(SolrCore core, BytesRef idBytes
               sid = new SolrInputDocument();
             } else {
               Document luceneDocument = docFetcher.doc(docid);
    -          sid = toSolrInputDocument(luceneDocument, core.getLatestSchema());
    +          sid = toSolrInputDocument(luceneDocument, schema);
             }
    -        if (onlyTheseNonStoredDVs != null) {
    -          docFetcher.decorateDocValueFields(sid, docid, onlyTheseNonStoredDVs);
    -        } else {
    -          docFetcher.decorateDocValueFields(sid, docid, docFetcher.getNonStoredDVsWithoutCopyTargets());
    +        ensureDocDecorated(onlyTheseNonStoredDVs, sid, docid, docFetcher, resolveBlock || schema.hasExplicitField(IndexSchema.NEST_PATH_FIELD_NAME));
    +        SolrInputField rootField;
    --- End diff --
   
    no big deal to simply initialize rootField up front.  You are doing it as an expression with a side-effect below which is needlessly awkward.


---

---------------------------------------------------------------------
To unsubscribe, e-mail: [hidden email]
For additional commands, e-mail: [hidden email]

Reply | Threaded
Open this post in threaded view
|

[GitHub] lucene-solr pull request #455: SOLR-12638

moshebla
In reply to this post by moshebla
Github user dsmiley commented on a diff in the pull request:

    https://github.com/apache/lucene-solr/pull/455#discussion_r224283996
 
    --- Diff: solr/core/src/java/org/apache/solr/handler/component/RealTimeGetComponent.java ---
    @@ -609,9 +618,10 @@ public static SolrInputDocument getInputDocument(SolrCore core, BytesRef idBytes
        * @param resolveFullDocument In case the document is fetched from the tlog, it could only be a partial document if the last update
        *                  was an in-place update. In that case, should this partial document be resolved to a full document (by following
        *                  back prevPointer/prevVersion)?
    +   * @param resolveBlock Check whether the document is part of a block. If so, return the whole block.
        */
       public static SolrInputDocument getInputDocument(SolrCore core, BytesRef idBytes, AtomicLong versionReturned, boolean avoidRetrievingStoredFields,
    -      Set<String> onlyTheseNonStoredDVs, boolean resolveFullDocument) throws IOException {
    +      Set<String> onlyTheseNonStoredDVs, boolean resolveFullDocument, boolean resolveBlock) throws IOException {
         SolrInputDocument sid = null;
    --- End diff --
   
    It would be helpful to add a javadoc comment to say that if the id refers to a nested document (which isn't known up-front), then it'll never be found in the tlog (at least if you follow the rules of nested docs).  Also, perhaps the parameter "resolveBlock" should be "resolveToRootDocument" since I think the "root" terminology may be more widely used as it's even in the schema, whereas "block" is I think not so much.  If you disagree, a compromise may be to use both "root" and "Block" together -- "resolveRootBlock".


---

---------------------------------------------------------------------
To unsubscribe, e-mail: [hidden email]
For additional commands, e-mail: [hidden email]

Reply | Threaded
Open this post in threaded view
|

[GitHub] lucene-solr pull request #455: SOLR-12638

moshebla
In reply to this post by moshebla
Github user dsmiley commented on a diff in the pull request:

    https://github.com/apache/lucene-solr/pull/455#discussion_r224306892
 
    --- Diff: solr/core/src/java/org/apache/solr/update/processor/DistributedUpdateProcessor.java ---
    @@ -1360,24 +1385,47 @@ boolean getUpdatedDocument(AddUpdateCommand cmd, long versionOnUpdate) throws IO
         }
         
         // full (non-inplace) atomic update
    +    final boolean isNestedSchema = req.getSchema().isUsableForChildDocs();
         SolrInputDocument sdoc = cmd.getSolrInputDocument();
         BytesRef id = cmd.getIndexedId();
    -    SolrInputDocument oldDoc = RealTimeGetComponent.getInputDocument(cmd.getReq().getCore(), id);
    +    SolrInputDocument blockDoc = RealTimeGetComponent.getInputDocument(cmd.getReq().getCore(), id, null,
    +        false, null, true, true);
     
    -    if (oldDoc == null) {
    -      // create a new doc by default if an old one wasn't found
    -      if (versionOnUpdate <= 0) {
    -        oldDoc = new SolrInputDocument();
    -      } else {
    +    if (blockDoc == null) {
    +      if (versionOnUpdate > 0) {
             // could just let the optimistic locking throw the error
             throw new SolrException(ErrorCode.CONFLICT, "Document not found for update.  id=" + cmd.getPrintableId());
           }
         } else {
    -      oldDoc.remove(CommonParams.VERSION_FIELD);
    +      blockDoc.remove(CommonParams.VERSION_FIELD);
         }
     
     
    -    cmd.solrDoc = docMerger.merge(sdoc, oldDoc);
    +    SolrInputDocument mergedDoc;
    +    if(idField == null || blockDoc == null) {
    +      // create a new doc by default if an old one wasn't found
    +      mergedDoc = docMerger.merge(sdoc, new SolrInputDocument());
    +    } else {
    +      if(isNestedSchema && req.getSchema().hasExplicitField(IndexSchema.NEST_PATH_FIELD_NAME) &&
    +          blockDoc.containsKey(IndexSchema.ROOT_FIELD_NAME) && !id.utf8ToString().equals(blockDoc.getFieldValue(IndexSchema.ROOT_FIELD_NAME))) {
    --- End diff --
   
    I don't think we can assume id.utf8ToString() is correct.  I think we have to consult the corresponding FieldType to get the "external value".  Also, cast blockDoc.getFieldValue as a String to make it clear we expected it to be one.


---

---------------------------------------------------------------------
To unsubscribe, e-mail: [hidden email]
For additional commands, e-mail: [hidden email]

Reply | Threaded
Open this post in threaded view
|

[GitHub] lucene-solr pull request #455: SOLR-12638

moshebla
In reply to this post by moshebla
Github user dsmiley commented on a diff in the pull request:

    https://github.com/apache/lucene-solr/pull/455#discussion_r224306195
 
    --- Diff: solr/core/src/java/org/apache/solr/handler/component/RealTimeGetComponent.java ---
    @@ -661,6 +690,21 @@ public static SolrInputDocument getInputDocument(SolrCore core, BytesRef idBytes
         return sid;
       }
     
    +  private static void ensureDocDecorated(Set<String> onlyTheseNonStoredDVs, SolrDocumentBase doc, int docid, SolrDocumentFetcher docFetcher) throws IOException {
    --- End diff --
   
    I suggest renaming these methods `ensureDocDecorated` since it's what it calls.


---

---------------------------------------------------------------------
To unsubscribe, e-mail: [hidden email]
For additional commands, e-mail: [hidden email]

Reply | Threaded
Open this post in threaded view
|

[GitHub] lucene-solr pull request #455: SOLR-12638

moshebla
In reply to this post by moshebla
Github user dsmiley commented on a diff in the pull request:

    https://github.com/apache/lucene-solr/pull/455#discussion_r226709264
 
    --- Diff: solr/core/src/test/org/apache/solr/update/processor/AtomicUpdateBlockTest.java ---
    @@ -59,39 +52,147 @@ public void before() {
     
       @Test
       public void testMergeChildDoc() throws Exception {
    -    SolrInputDocument doc = new SolrInputDocument();
    -    doc.setField("id", "1");
    -    doc.setField("cat_ss", new String[]{"aaa", "ccc"});
    -    doc.setField("child", Collections.singletonList(sdoc("id", "2", "cat_ss", "child")));
    +    SolrInputDocument newChildDoc = sdoc("id", "3", "cat_ss", "child");
    +    SolrInputDocument addedDoc = sdoc("id", "1",
    +        "cat_ss", Collections.singletonMap("add", "bbb"),
    +        "child", Collections.singletonMap("add", sdocs(newChildDoc)));
    +
    +    SolrInputDocument dummyBlock = sdoc("id", "1",
    +        "cat_ss", new ArrayList<>(Arrays.asList("aaa", "ccc")),
    +        "_root_", "1", "child", new ArrayList<>(sdocs(addedDoc)));
    +    dummyBlock.removeField(VERSION);
    +
    +    SolrInputDocument preMergeDoc = new SolrInputDocument(dummyBlock);
    +    AtomicUpdateDocumentMerger docMerger = new AtomicUpdateDocumentMerger(req());
    +    docMerger.merge(addedDoc, dummyBlock);
    +    assertEquals("merged document should have the same id", preMergeDoc.getFieldValue("id"), dummyBlock.getFieldValue("id"));
    +    assertDocContainsSubset(preMergeDoc, dummyBlock);
    +    assertDocContainsSubset(addedDoc, dummyBlock);
    +    assertDocContainsSubset(newChildDoc, (SolrInputDocument) ((List) dummyBlock.getFieldValues("child")).get(1));
    +    assertEquals(dummyBlock.getFieldValue("id"), dummyBlock.getFieldValue("id"));
    +  }
    +
    +  @Test
    +  public void testBlockAtomicQuantities() throws Exception {
    +    SolrInputDocument doc = sdoc("id", "1", "string_s", "root");
         addDoc(adoc(doc), "nested-rtg");
     
    -    BytesRef rootDocId = new BytesRef("1");
    -    SolrCore core = h.getCore();
    -    SolrInputDocument block = RealTimeGetComponent.getInputDocument(core, rootDocId, true);
    -    // assert block doc has child docs
    -    assertTrue(block.containsKey("child"));
    +    List<SolrInputDocument> docs = IntStream.range(10, 20).mapToObj(x -> sdoc("id", String.valueOf(x), "string_s", "child")).collect(Collectors.toList());
    +    doc = sdoc("id", "1", "children", Collections.singletonMap("add", docs));
    +    addAndGetVersion(doc, params("update.chain", "nested-rtg", "wt", "json"));
     
    -    assertJQ(req("q","id:1")
    -        ,"/response/numFound==0"
    +    assertU(commit());
    +
    +    assertJQ(req("q", "_root_:1"),
    +        "/response/numFound==11");
    +
    +    assertJQ(req("q", "string_s:child", "fl", "*", "rows", "1000000"),
    +        "/response/numFound==10");
    +
    +    // ensure updates work when block has more than 10 children
    --- End diff --
   
    Why is 10 special?


---

---------------------------------------------------------------------
To unsubscribe, e-mail: [hidden email]
For additional commands, e-mail: [hidden email]

Reply | Threaded
Open this post in threaded view
|

[GitHub] lucene-solr pull request #455: SOLR-12638

moshebla
In reply to this post by moshebla
Github user dsmiley commented on a diff in the pull request:

    https://github.com/apache/lucene-solr/pull/455#discussion_r226709714
 
    --- Diff: solr/core/src/test/org/apache/solr/update/processor/AtomicUpdateBlockTest.java ---
    @@ -59,39 +52,147 @@ public void before() {
     
       @Test
       public void testMergeChildDoc() throws Exception {
    -    SolrInputDocument doc = new SolrInputDocument();
    -    doc.setField("id", "1");
    -    doc.setField("cat_ss", new String[]{"aaa", "ccc"});
    -    doc.setField("child", Collections.singletonList(sdoc("id", "2", "cat_ss", "child")));
    +    SolrInputDocument newChildDoc = sdoc("id", "3", "cat_ss", "child");
    +    SolrInputDocument addedDoc = sdoc("id", "1",
    +        "cat_ss", Collections.singletonMap("add", "bbb"),
    +        "child", Collections.singletonMap("add", sdocs(newChildDoc)));
    +
    +    SolrInputDocument dummyBlock = sdoc("id", "1",
    +        "cat_ss", new ArrayList<>(Arrays.asList("aaa", "ccc")),
    +        "_root_", "1", "child", new ArrayList<>(sdocs(addedDoc)));
    +    dummyBlock.removeField(VERSION);
    +
    +    SolrInputDocument preMergeDoc = new SolrInputDocument(dummyBlock);
    +    AtomicUpdateDocumentMerger docMerger = new AtomicUpdateDocumentMerger(req());
    +    docMerger.merge(addedDoc, dummyBlock);
    +    assertEquals("merged document should have the same id", preMergeDoc.getFieldValue("id"), dummyBlock.getFieldValue("id"));
    +    assertDocContainsSubset(preMergeDoc, dummyBlock);
    +    assertDocContainsSubset(addedDoc, dummyBlock);
    +    assertDocContainsSubset(newChildDoc, (SolrInputDocument) ((List) dummyBlock.getFieldValues("child")).get(1));
    +    assertEquals(dummyBlock.getFieldValue("id"), dummyBlock.getFieldValue("id"));
    +  }
    +
    +  @Test
    +  public void testBlockAtomicQuantities() throws Exception {
    +    SolrInputDocument doc = sdoc("id", "1", "string_s", "root");
         addDoc(adoc(doc), "nested-rtg");
     
    -    BytesRef rootDocId = new BytesRef("1");
    -    SolrCore core = h.getCore();
    -    SolrInputDocument block = RealTimeGetComponent.getInputDocument(core, rootDocId, true);
    -    // assert block doc has child docs
    -    assertTrue(block.containsKey("child"));
    +    List<SolrInputDocument> docs = IntStream.range(10, 20).mapToObj(x -> sdoc("id", String.valueOf(x), "string_s", "child")).collect(Collectors.toList());
    +    doc = sdoc("id", "1", "children", Collections.singletonMap("add", docs));
    +    addAndGetVersion(doc, params("update.chain", "nested-rtg", "wt", "json"));
     
    -    assertJQ(req("q","id:1")
    -        ,"/response/numFound==0"
    +    assertU(commit());
    +
    +    assertJQ(req("q", "_root_:1"),
    +        "/response/numFound==11");
    +
    +    assertJQ(req("q", "string_s:child", "fl", "*", "rows", "1000000"),
    +        "/response/numFound==10");
    +
    +    // ensure updates work when block has more than 10 children
    +    for(int i = 10; i < 20; ++i) {
    +      System.out.println("indexing " + i);
    +      docs = IntStream.range(i * 10, (i * 10) + 5).mapToObj(x -> sdoc("id", String.valueOf(x), "string_s", "grandChild")).collect(Collectors.toList());
    +      doc = sdoc("id", String.valueOf(i), "grandChildren", Collections.singletonMap("add", docs));
    +      addAndGetVersion(doc, params("update.chain", "nested-rtg", "wt", "json"));
    +      assertU(commit());
    +    }
    +
    +    assertJQ(req("q", "id:114", "fl", "*", "rows", "1000000"),
    --- End diff --
   
    Why set the "fl" or "rows" in these queries?  Your assertion only checks numFound and not the content of those that were found.


---

---------------------------------------------------------------------
To unsubscribe, e-mail: [hidden email]
For additional commands, e-mail: [hidden email]

Reply | Threaded
Open this post in threaded view
|

[GitHub] lucene-solr issue #455: SOLR-12638

moshebla
In reply to this post by moshebla
Github user dsmiley commented on the issue:

    https://github.com/apache/lucene-solr/pull/455
 
    As a general point, I feel we should prefer "nested" terminology instead of "block".  If we were purely working within Lucene then I think "block" might be okay but at the Solr layer people see this stuff as "nested" docs.


---

---------------------------------------------------------------------
To unsubscribe, e-mail: [hidden email]
For additional commands, e-mail: [hidden email]

Reply | Threaded
Open this post in threaded view
|

[GitHub] lucene-solr pull request #455: SOLR-12638

moshebla
In reply to this post by moshebla
Github user moshebla commented on a diff in the pull request:

    https://github.com/apache/lucene-solr/pull/455#discussion_r226858107
 
    --- Diff: solr/core/src/test/org/apache/solr/update/processor/AtomicUpdateBlockTest.java ---
    @@ -59,39 +52,147 @@ public void before() {
     
       @Test
       public void testMergeChildDoc() throws Exception {
    -    SolrInputDocument doc = new SolrInputDocument();
    -    doc.setField("id", "1");
    -    doc.setField("cat_ss", new String[]{"aaa", "ccc"});
    -    doc.setField("child", Collections.singletonList(sdoc("id", "2", "cat_ss", "child")));
    +    SolrInputDocument newChildDoc = sdoc("id", "3", "cat_ss", "child");
    +    SolrInputDocument addedDoc = sdoc("id", "1",
    +        "cat_ss", Collections.singletonMap("add", "bbb"),
    +        "child", Collections.singletonMap("add", sdocs(newChildDoc)));
    +
    +    SolrInputDocument dummyBlock = sdoc("id", "1",
    +        "cat_ss", new ArrayList<>(Arrays.asList("aaa", "ccc")),
    +        "_root_", "1", "child", new ArrayList<>(sdocs(addedDoc)));
    +    dummyBlock.removeField(VERSION);
    +
    +    SolrInputDocument preMergeDoc = new SolrInputDocument(dummyBlock);
    +    AtomicUpdateDocumentMerger docMerger = new AtomicUpdateDocumentMerger(req());
    +    docMerger.merge(addedDoc, dummyBlock);
    +    assertEquals("merged document should have the same id", preMergeDoc.getFieldValue("id"), dummyBlock.getFieldValue("id"));
    +    assertDocContainsSubset(preMergeDoc, dummyBlock);
    +    assertDocContainsSubset(addedDoc, dummyBlock);
    +    assertDocContainsSubset(newChildDoc, (SolrInputDocument) ((List) dummyBlock.getFieldValues("child")).get(1));
    +    assertEquals(dummyBlock.getFieldValue("id"), dummyBlock.getFieldValue("id"));
    +  }
    +
    +  @Test
    +  public void testBlockAtomicQuantities() throws Exception {
    +    SolrInputDocument doc = sdoc("id", "1", "string_s", "root");
         addDoc(adoc(doc), "nested-rtg");
     
    -    BytesRef rootDocId = new BytesRef("1");
    -    SolrCore core = h.getCore();
    -    SolrInputDocument block = RealTimeGetComponent.getInputDocument(core, rootDocId, true);
    -    // assert block doc has child docs
    -    assertTrue(block.containsKey("child"));
    +    List<SolrInputDocument> docs = IntStream.range(10, 20).mapToObj(x -> sdoc("id", String.valueOf(x), "string_s", "child")).collect(Collectors.toList());
    +    doc = sdoc("id", "1", "children", Collections.singletonMap("add", docs));
    +    addAndGetVersion(doc, params("update.chain", "nested-rtg", "wt", "json"));
     
    -    assertJQ(req("q","id:1")
    -        ,"/response/numFound==0"
    +    assertU(commit());
    +
    +    assertJQ(req("q", "_root_:1"),
    +        "/response/numFound==11");
    +
    +    assertJQ(req("q", "string_s:child", "fl", "*", "rows", "1000000"),
    +        "/response/numFound==10");
    +
    +    // ensure updates work when block has more than 10 children
    --- End diff --
   
    This is to ensure the limit=-1 is passed to childDocumentTransformer, to retrieve all the block's child documents


---

---------------------------------------------------------------------
To unsubscribe, e-mail: [hidden email]
For additional commands, e-mail: [hidden email]

12