Integrating Solr with my existing java web application

classic Classic list List threaded Threaded
2 messages Options
Reply | Threaded
Open this post in threaded view
|

Integrating Solr with my existing java web application

Khare, Kushal (MIND)
Hello mates !
Hope you people are doing good !

Well, I am trying to integrate the SolrJ code for indexing and querying the documents through Solr with my java web app. I am facing a very wired issue, that when I run my method for Solr as java app (independently using main() function) it works fine- extracts text , searches. But, as soon as I give the hit through my web app from search page, it hits, function is being called, but it does not extracts any text from docs, and is unable to search.

I verified all the jars, etc. but could not figure out anything. Following is the code that I am using :


package com.mind.qdms.utility;

import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import org.apache.solr.client.solrj.SolrServerException;
import org.apache.solr.client.solrj.impl.HttpSolrClient;
import org.apache.solr.client.solrj.response.QueryResponse;
import org.apache.solr.client.solrj.response.UpdateResponse;
import org.apache.solr.common.SolrDocument;
import org.apache.solr.common.SolrDocumentList;
import org.apache.solr.common.SolrInputDocument;
import org.apache.solr.common.params.MapSolrParams;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.parser.AutoDetectParser;
import org.apache.tika.parser.ParseContext;
import org.apache.tika.sax.BodyContentHandler;
import org.xml.sax.ContentHandler;


public class QdmsSolrUtilityMethods {



         public static void main(String[] args) throws IOException,
         SolrServerException { QdmsSolrUtilityMethods.getDocsList("kushal"); }


       public static List<String> getDocsList(String keyword) throws IOException, SolrServerException {
              System.out.println("in util");
              HttpSolrClient client = new HttpSolrClient.Builder("http://localhost:8983/solr/tika").build();
              AutoDetectParser autoParser = new AutoDetectParser();
              indexTikaDocuments(new File("D:\\docs"), client, autoParser);
              List<String> resultDocList = queryDocuments(client, keyword);
              return resultDocList;
       }

       public static void indexTikaDocuments(File root,HttpSolrClient
         client,AutoDetectParser autoParser) throws IOException, SolrServerException {
         int totalTika = 0;

         @SuppressWarnings("rawtypes") Collection docList = new ArrayList();

         for (File file : root.listFiles()){
                if (file.isDirectory()) {
                       indexTikaDocuments(file,client,autoParser);
                       continue;
                }
                ContentHandler textHandler = new BodyContentHandler(-1);
                Metadata metadata = new Metadata();
                ParseContext context = new ParseContext();
                InputStream input = new
                FileInputStream(file);
                try {
                autoParser.parse(input, textHandler, metadata, context);
                }catch (Exception e) {
                System.out.println(String.format("File %s failed", file.getCanonicalPath()));
                e.printStackTrace();
                continue;
                }
                SolrInputDocument doc = new SolrInputDocument();
                doc.addField("id", file.getCanonicalPath());
                doc.addField("_text_", textHandler.toString());
                docList.add(doc);
                System.out.println(textHandler.toString());
                System.out.println( file.getCanonicalPath()); ++totalTika;
                // Completely arbitrary, just batch up more than one document for throughput!
                if(docList.size() >= 1000) {
                       // Commit within 5 minutes.
                       UpdateResponse resp = client.add(docList, 300000);
                       if (resp.getStatus() != 0) {
                             System.out.println("Some horrible error has occurred, status is: " +
                             resp.getStatus());
                             }
                       docList.clear();
                       }
                }if(docList.size() > 0) {
                       client.add(docList, 300000);
                     } client.commit();
                System.out.println("indexed " + totalTika + " documents");
                }

       public static List<String> queryDocuments(HttpSolrClient client1, String queryTerm) throws SolrServerException, IOException {
              List<String> resultList = null;
              HttpSolrClient client = new HttpSolrClient.Builder("http://localhost:8983/solr").build();

              final Map<String, String> queryParamMap = new HashMap<String, String>();
              queryParamMap.put("q", queryTerm);
              queryParamMap.put("rows", "5000");
              MapSolrParams queryParams = new MapSolrParams(queryParamMap);

              final QueryResponse response = client.query("tika", queryParams);
              final SolrDocumentList docList = response.getResults();

              System.out.println("docList ::  "+docList);
              System.out.println("docList size ::  "+docList.size());

              for (SolrDocument document : docList) {
                     final String id = (String) document.getFirstValue("id");
                     resultList.add(id);
              }
              return resultList;
       }


}



Please help me with this.

Thanks!

________________________________

The information contained in this electronic message and any attachments to this message are intended for the exclusive use of the addressee(s) and may contain proprietary, confidential or privileged information. If you are not the intended recipient, you should not disseminate, distribute or copy this e-mail. Please notify the sender immediately and destroy all copies of this message and any attachments. WARNING: Computer viruses can be transmitted via email. The recipient should check this email and any attachments for the presence of viruses. The company accepts no liability for any damage caused by any virus/trojan/worms/malicious code transmitted by this email. www.motherson.com
Reply | Threaded
Open this post in threaded view
|

Re: Integrating Solr with my existing java web application

Jörn Franke
I recommend to integrate log4j2 into the app instead of using println. Then you will see all the log statements including the one of Solr in a log file that will indicate you the issue.

> Am 01.11.2019 um 07:46 schrieb Khare, Kushal (MIND) <[hidden email]>:
>
> Hello mates !
> Hope you people are doing good !
>
> Well, I am trying to integrate the SolrJ code for indexing and querying the documents through Solr with my java web app. I am facing a very wired issue, that when I run my method for Solr as java app (independently using main() function) it works fine- extracts text , searches. But, as soon as I give the hit through my web app from search page, it hits, function is being called, but it does not extracts any text from docs, and is unable to search.
>
> I verified all the jars, etc. but could not figure out anything. Following is the code that I am using :
>
>
> package com.mind.qdms.utility;
>
> import java.io.File;
> import java.io.FileInputStream;
> import java.io.IOException;
> import java.io.InputStream;
> import java.util.ArrayList;
> import java.util.Collection;
> import java.util.HashMap;
> import java.util.List;
> import java.util.Map;
> import org.apache.solr.client.solrj.SolrServerException;
> import org.apache.solr.client.solrj.impl.HttpSolrClient;
> import org.apache.solr.client.solrj.response.QueryResponse;
> import org.apache.solr.client.solrj.response.UpdateResponse;
> import org.apache.solr.common.SolrDocument;
> import org.apache.solr.common.SolrDocumentList;
> import org.apache.solr.common.SolrInputDocument;
> import org.apache.solr.common.params.MapSolrParams;
> import org.apache.tika.metadata.Metadata;
> import org.apache.tika.parser.AutoDetectParser;
> import org.apache.tika.parser.ParseContext;
> import org.apache.tika.sax.BodyContentHandler;
> import org.xml.sax.ContentHandler;
>
>
> public class QdmsSolrUtilityMethods {
>
>
>
>         public static void main(String[] args) throws IOException,
>         SolrServerException { QdmsSolrUtilityMethods.getDocsList("kushal"); }
>
>
>       public static List<String> getDocsList(String keyword) throws IOException, SolrServerException {
>              System.out.println("in util");
>              HttpSolrClient client = new HttpSolrClient.Builder("http://localhost:8983/solr/tika").build();
>              AutoDetectParser autoParser = new AutoDetectParser();
>              indexTikaDocuments(new File("D:\\docs"), client, autoParser);
>              List<String> resultDocList = queryDocuments(client, keyword);
>              return resultDocList;
>       }
>
>       public static void indexTikaDocuments(File root,HttpSolrClient
>         client,AutoDetectParser autoParser) throws IOException, SolrServerException {
>         int totalTika = 0;
>
>         @SuppressWarnings("rawtypes") Collection docList = new ArrayList();
>
>         for (File file : root.listFiles()){
>                if (file.isDirectory()) {
>                       indexTikaDocuments(file,client,autoParser);
>                       continue;
>                }
>                ContentHandler textHandler = new BodyContentHandler(-1);
>                Metadata metadata = new Metadata();
>                ParseContext context = new ParseContext();
>                InputStream input = new
>                FileInputStream(file);
>                try {
>                autoParser.parse(input, textHandler, metadata, context);
>                }catch (Exception e) {
>                System.out.println(String.format("File %s failed", file.getCanonicalPath()));
>                e.printStackTrace();
>                continue;
>                }
>                SolrInputDocument doc = new SolrInputDocument();
>                doc.addField("id", file.getCanonicalPath());
>                doc.addField("_text_", textHandler.toString());
>                docList.add(doc);
>                System.out.println(textHandler.toString());
>                System.out.println( file.getCanonicalPath()); ++totalTika;
>                // Completely arbitrary, just batch up more than one document for throughput!
>                if(docList.size() >= 1000) {
>                       // Commit within 5 minutes.
>                       UpdateResponse resp = client.add(docList, 300000);
>                       if (resp.getStatus() != 0) {
>                             System.out.println("Some horrible error has occurred, status is: " +
>                             resp.getStatus());
>                             }
>                       docList.clear();
>                       }
>                }if(docList.size() > 0) {
>                       client.add(docList, 300000);
>                     } client.commit();
>                System.out.println("indexed " + totalTika + " documents");
>                }
>
>       public static List<String> queryDocuments(HttpSolrClient client1, String queryTerm) throws SolrServerException, IOException {
>              List<String> resultList = null;
>              HttpSolrClient client = new HttpSolrClient.Builder("http://localhost:8983/solr").build();
>
>              final Map<String, String> queryParamMap = new HashMap<String, String>();
>              queryParamMap.put("q", queryTerm);
>              queryParamMap.put("rows", "5000");
>              MapSolrParams queryParams = new MapSolrParams(queryParamMap);
>
>              final QueryResponse response = client.query("tika", queryParams);
>              final SolrDocumentList docList = response.getResults();
>
>              System.out.println("docList ::  "+docList);
>              System.out.println("docList size ::  "+docList.size());
>
>              for (SolrDocument document : docList) {
>                     final String id = (String) document.getFirstValue("id");
>                     resultList.add(id);
>              }
>              return resultList;
>       }
>
>
> }
>
>
>
> Please help me with this.
>
> Thanks!
>
> ________________________________
>
> The information contained in this electronic message and any attachments to this message are intended for the exclusive use of the addressee(s) and may contain proprietary, confidential or privileged information. If you are not the intended recipient, you should not disseminate, distribute or copy this e-mail. Please notify the sender immediately and destroy all copies of this message and any attachments. WARNING: Computer viruses can be transmitted via email. The recipient should check this email and any attachments for the presence of viruses. The company accepts no liability for any damage caused by any virus/trojan/worms/malicious code transmitted by this email. www.motherson.com