Re: Implementing Fuzzy Search using OWA operator and Fuzzy Linguistic Quantifier

Anurag Fri, 18 Mar 2011 11:30:45 -0700

I have some sample code to implement it written using Lucene. This code is
not final and need many modification. Now i want to embed with solr. How
this is possible.


the code is below
//package lia.searching;
import java.util.Arrays;
import java.util.Collections;
//import org.apache.lucene.analysis.standard.StandardAnalyzer;

import org.apache.lucene.index.IndexReader;
import org.apache.lucene.search.Scorer;
import org.apache.lucene.analysis.SimpleAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.Explanation;
import org.apache.lucene.search.Hits;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.store.FSDirectory;

public class Explainer {

    public static void reverse(float[] array) {
      if (array == null) {
          return;
      }
      int i = 0;
      int j = array.length - 1;
      float tmp;
      while (j > i) {
          tmp = array[j];
          array[j] = array[i];
          array[i] = tmp;
          j--;
          i++;
      }
  }

    public static float fun(float r ,float a ,float b )
    {
        if(r&lt;a) return 0.0f;
        else if(a&lt;=r&amp;&amp;r&lt;=b) return (r-a)/(b-a);
        else if(r&gt;b) return 1.0f;
        return 0.0f; 
   }
      
  public static void main(String[] args) throws Exception {
   
   if (args.length < 3) {
      System.err.println("Usage: Explainer  ");
      System.exit(1);
    }
  
   
    String indexDir = args[0];
    String options = args[1]; //atleasthalf,most, asmanyaspossible
    String[] queryExpression=new String[args.length-2];
    for(int i=2;i&lt;args.length;i++)
    {
      String tmp=args[i];
      queryExpression[i-2]=tmp;
      System.out.println(queryExpression[i-2]);
    }
    


    FSDirectory directory =
        FSDirectory.getDirectory(indexDir, false);
 
     int TOTAL_DOC=12483;//total document=12484

     float[][] R=new float[TOTAL_DOC][10]; //Relevancy Matrix
     float[][] QR= new float[TOTAL_DOC][10]; //Query Relevancy matrix
     float[] Weight=new float[10]; //weigthts for terms like &quot;at least
half of&quot; or &quot;most&quot;
                        //calcualted from formulae Q(r)=Q(i/m)-Q((i-1)/m;
Q(0)=0;

     //calculating weights for the terms
    float a=0.0f,b=0.5f;
    if(options.equals(&quot;atleasthalf&quot;)){a=0.0f;b=0.5f;}
    else if(options.equals(&quot;most&quot;)){a=0.3f;b=0.8f;}
    else
if(options.equals(&quot;asmanyaspossible&quot;)){a=0.5f;b=1.0f;System.out.println(&quot;3rd&quot;);}
 

       
    int m=args.length-2;
    for(int i=2,j=1;i&lt;args.length;i++,j++)
     {
       float f1=(float)j/m;
       float f2=(float)(j-1)/m;
         
       Weight[i-2]=fun(f1,a,b)-fun(f2,a,b);
       System.out.print(Weight[i-2]+&quot; &quot;);
     }
     
      System.out.println();
     for(int start=0;start&lt;queryExpression.length;start++)
     {
     
///////////////////////////////////////////////////////////////////////////////////
    QueryParser queryParser = new QueryParser(&quot;content&quot;, new
SimpleAnalyzer());//added
     //QueryParser queryParser = new QueryParser(&quot;content&quot;, new
StandardAnalyzer());
     
    Query query =
queryParser.parse(queryExpression[start]);//,&quot;contents&quot;,new
StandardAnalyzer()  );//old syntax - not work

//////////////////////////////////////////////////////////////////////////////////


    //System.out.println(&quot;Query: &quot; + queryExpression);

    IndexSearcher searcher = new IndexSearcher(directory);
    Hits hits = searcher.search(query);

    System.out.println(&quot;total hits=&quot;+hits.length());

     
    /*//////////////////////////////////////////
    Similarity sim = searcher.getSimilarity();
    /*//////////////////////////////////////

    

    for (int i = 0; i &lt; TOTAL_DOC; i++) {
     // Explanation explanation =
                              //searcher.explain(query, hits.id(i));

      //System.out.println(&quot;----------&quot;);
      try{
        Document doc = hits.doc(i);
        System.out.println(doc.get(&quot;title&quot;));
        }catch(Exception e){}
      try{
      R[i][start]=hits.score(i);
      System.out.println(R[i][start]);
      }
      catch(Exception e)
      {
        R[i][start]=0.0f;
      }
      ////////////////////////////////////
      //System.out.println(hits.score(i));//working
      ////////////////////////////////////



      /*////////////////////////////////////
      int docId = hits.id(i);
          int freq = doc.freq();

      TermFreqVector vector = knownSearcher.reader.getTermFreqVector(doc,
&quot;field&quot;);
          float tf = sim.tf(freq);

         float idf = sim.idf(term, knownSearcher);

       /*//////////////////////////////////////
      


      /* 
      String tmp=explanation.toString();
     
      String tmp2[]=tmp.split(&quot; &quot;);
      for(int l=0;l&lt;tmp2.length;l++)
      {
         float t;
         try{
             t=Float.parseFloat(tmp2[l]);
             System.out.println(t);
            }
            catch(Exception e){}

       }//for

       */
    }//for

    }//for

   



  //sort the relevancy matrix and multiply with weights
  
   for(int i=0;i&lt;TOTAL_DOC;i++)
   {
    
      Arrays.sort(R[i]);
      reverse(R[i]);

       for(int j=0;j&lt;1;j++)
       {
           QR[i][j]=0.0f;
       
          for(int k=0;k&lt;queryExpression.length;k++)
          {
       
               QR[i][j]+=(R[i][k]*Weight[k]);
               
          }
       }
    }

    
  //print the scores of the final documents
  System.out.println(&quot;Final Scores of Documents&quot;);
 
  IndexReader reader = IndexReader.open(directory);
 
  float max=0.0f,min=0.9999f;
  int num1=0,num2=0;
  String ds=&quot;&quot;,ds2=&quot;&quot;;
 for(int i=0;i&lt;TOTAL_DOC;i++)
  {
     try{
     Document d = reader.document( i);
     
     System.out.println(&quot;Document
&quot;+d.get(&quot;title&quot;).toString()+&quot;score= &quot;+QR[i][0]);
     if(QR[i][0]&gt;max) {num1=i;max=QR[i][0];ds=d.get("title").toString();}
     if(QR[i][0]&lt;min&amp;&amp;QR[i][0]&gt;=0.1f)
{num2=i;min=QR[i][0];ds2=d.get("title").toString();}
     }catch(Exception e){}
     //Thread.sleep(100);
    System.out.println(i);
 }

  System.out.println(num1+" "+ds+" "+max);
  System.out.println(num2+" "+ds2+" "+min);
  }//main
}//class

-----
Kumar Anurag

--
View this message in context: 
http://lucene.472066.n3.nabble.com/Implementing-Fuzzy-Search-using-OWA-operator-and-Fuzzy-Linguistic-Quantifier-tp2261469p2699065.html
Sent from the Solr - User mailing list archive at Nabble.com.

Re: Implementing Fuzzy Search using OWA operator and Fuzzy Linguistic Quantifier

Reply via email to