Re: return unaltered complete multivalued fields with Highlighted results

alexei Thu, 02 Jun 2011 08:31:45 -0700

Hi,

Here is the code for Solr 3.1 that will preserve all the text and will
disable sorting.


This goes in solrconfig.xml request handler config or which ever way you
pass params:
     <str name="hl.preserveOrder">true</str>

This line goes into HighlightParams class:
  public static final String PRESERVE_ORDER = HIGHLIGHT + ".preserveOrder";

Replace this method DefaultSolrHighlighter.doHighlightingByHighlighter (I
only added 3 if blocks):

  private void doHighlightingByHighlighter( Query query, SolrQueryRequest
req, NamedList docSummaries,
      int docId, Document doc, String fieldName ) throws IOException {
    SolrParams params = req.getParams(); 
    String[] docTexts = doc.getValues(fieldName);
    // according to Document javadoc, doc.getValues() never returns null.
check empty instead of null
    if (docTexts.length == 0) return;
    
    SolrIndexSearcher searcher = req.getSearcher();
    IndexSchema schema = searcher.getSchema();
    TokenStream tstream = null;
    int numFragments = getMaxSnippets(fieldName, params);
    boolean mergeContiguousFragments = isMergeContiguousFragments(fieldName,
params);

    String[] summaries = null;
    List<TextFragment> frags = new ArrayList<TextFragment>();

    TermOffsetsTokenStream tots = null; // to be non-null iff we're using
TermOffsets optimization
    try {
        TokenStream tvStream =
TokenSources.getTokenStream(searcher.getReader(), docId, fieldName);
        if (tvStream != null) {
          tots = new TermOffsetsTokenStream(tvStream);
        }
    }
    catch (IllegalArgumentException e) {
      // No problem. But we can't use TermOffsets optimization.
    }

    for (int j = 0; j < docTexts.length; j++) {
      if( tots != null ) {
        // if we're using TermOffsets optimization, then get the next
        // field value's TokenStream (i.e. get field j's TokenStream) from
tots:
        tstream = tots.getMultiValuedTokenStream( docTexts[j].length() );
      } else {
        // fall back to analyzer
        tstream = createAnalyzerTStream(schema, fieldName, docTexts[j]);
      }
                   
      Highlighter highlighter;
      if
(Boolean.valueOf(req.getParams().get(HighlightParams.USE_PHRASE_HIGHLIGHTER,
"true"))) {
        // TODO: this is not always necessary - eventually we would like to
avoid this wrap
        //       when it is not needed.
        tstream = new CachingTokenFilter(tstream);
        
        // get highlighter
        highlighter = getPhraseHighlighter(query, fieldName, req,
(CachingTokenFilter) tstream);
         
        // after highlighter initialization, reset tstream since
construction of highlighter already used it
        tstream.reset();
      }
      else {
        // use "the old way"
        highlighter = getHighlighter(query, fieldName, req);
      }
      
      int maxCharsToAnalyze = params.getFieldInt(fieldName,
          HighlightParams.MAX_CHARS,
          Highlighter.DEFAULT_MAX_CHARS_TO_ANALYZE);
      if (maxCharsToAnalyze < 0) {
        highlighter.setMaxDocCharsToAnalyze(docTexts[j].length());
      } else {
        highlighter.setMaxDocCharsToAnalyze(maxCharsToAnalyze);
      }

      try {
        TextFragment[] bestTextFragments =
highlighter.getBestTextFragments(tstream, docTexts[j],
mergeContiguousFragments, numFragments);
        for (int k = 0; k < bestTextFragments.length; k++) {
          if (params.getBool( HighlightParams.PRESERVE_ORDER, false ) ) {       
                
                if ((bestTextFragments[k] != null) ){//&&
(bestTextFragments[k].getScore() > 0)) {
                  frags.add(bestTextFragments[k]);
                }
          }
          else {
                if ((bestTextFragments[k] != null) &&
(bestTextFragments[k].getScore() > 0)) {
                  frags.add(bestTextFragments[k]);
            }
          }
        }
      } catch (InvalidTokenOffsetsException e) {
        throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, e);
      }
    }
    // sort such that the fragments with the highest score come first
    if (!params.getBool( HighlightParams.PRESERVE_ORDER, false ) ) {
            Collections.sort(frags, new Comparator<TextFragment>() {
              public int compare(TextFragment arg0, TextFragment arg1) {
                return Math.round(arg1.getScore() - arg0.getScore());
              }
            });
    }
    
     // convert fragments back into text
     // TODO: we can include score and position information in output as
snippet attributes
    if (frags.size() > 0) {
      ArrayList<String> fragTexts = new ArrayList<String>();
      for (TextFragment fragment: frags) {
        if (params.getBool( HighlightParams.PRESERVE_ORDER, false ) ) {  
                if ((fragment != null) ){// && (fragment.getScore() > 0)) {
                  fragTexts.add(fragment.toString());
                }
                if (fragTexts.size() >= numFragments) break;
        } else {
                if ((fragment != null) && (fragment.getScore() > 0)) {
                  fragTexts.add(fragment.toString());
                    }
                    if (fragTexts.size() >= numFragments) break;
        }
      }
      summaries = fragTexts.toArray(new String[0]);
      if (summaries.length > 0) 
      docSummaries.add(fieldName, summaries);
    }
    // no summeries made, copy text from alternate field
    if (summaries == null || summaries.length == 0) {
      alternateField( docSummaries, params, doc, fieldName );
    }
  }


This seems to work for my purposes. If nobody has any issues with this code
perhaps it should be a patch?

Thanks,
Alexei


--
View this message in context: 
http://lucene.472066.n3.nabble.com/return-unaltered-complete-multivalued-fields-with-Highlighted-results-tp2967146p3015616.html
Sent from the Solr - User mailing list archive at Nabble.com.

Re: return unaltered complete multivalued fields with Highlighted results

Reply via email to