This is an automated email from the ASF dual-hosted git repository.

zjffdu pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/zeppelin.git


The following commit(s) were added to refs/heads/master by this push:
     new 6afafad  [ZEPPELIN-4504]. Fix searching of keywords in notebooks
6afafad is described below

commit 6afafad8837712bfb5cffc61ed6761b147a39e13
Author: Jeff Zhang <zjf...@apache.org>
AuthorDate: Fri Mar 6 16:38:09 2020 +0800

    [ZEPPELIN-4504]. Fix searching of keywords in notebooks
    
    ### What is this PR for?
    This PR improve the lucene search via storing the index in filesystem by 
default. So that even after zeppelin restart, we can still recover the index. 
This PR introduce several new configurations:
    
    * zeppelin.search.index.path.     --> folder path for storing lucene index
    * zeppelin.search.use.disk          --> whether use disk as index storage.
    * zeppelin.search.index.rebuild       --> whether rebuild index when 
zeppelin start
    
    ### What type of PR is it?
    [Bug Fix]
    
    ### Todos
    * [ ] - Task
    
    ### What is the Jira issue?
    * https://issues.apache.org/jira/browse/ZEPPELIN-4504
    
    ### How should this be tested?
    * CI pass
    
    ### Screenshots (if appropriate)
    
    ### Questions:
    * Does the licenses files need update? No
    * Is there breaking changes for older versions? No
    * Does this needs documentation? No
    
    Author: Jeff Zhang <zjf...@apache.org>
    
    Closes #3681 from zjffdu/ZEPPELIN-4504 and squashes the following commits:
    
    3853ee9d9 [Jeff Zhang] [ZEPPELIN-4504]. Fix searching of keywords in 
notebooks
---
 conf/zeppelin-site.xml.template                    | 28 +++++++++---
 .../zeppelin/conf/ZeppelinConfiguration.java       | 13 ++++--
 .../org/apache/zeppelin/notebook/Notebook.java     |  5 ++-
 .../org/apache/zeppelin/search/LuceneSearch.java   | 51 ++++++++++++----------
 .../org/apache/zeppelin/search/SearchService.java  |  2 +
 .../apache/zeppelin/search/LuceneSearchTest.java   | 16 ++++---
 6 files changed, 78 insertions(+), 37 deletions(-)

diff --git a/conf/zeppelin-site.xml.template b/conf/zeppelin-site.xml.template
index f5cd379..9523be6 100755
--- a/conf/zeppelin-site.xml.template
+++ b/conf/zeppelin-site.xml.template
@@ -687,10 +687,28 @@
   <description>Kubernetes yaml spec files</description>
 </property>
 
-  <property>
-    <name>zeppelin.docker.container.image</name>
-    <value>apache/zeppelin:0.8.0</value>
-    <description>Docker image for interpreters</description>
-  </property>
+<property>
+  <name>zeppelin.docker.container.image</name>
+  <value>apache/zeppelin:0.8.0</value>
+  <description>Docker image for interpreters</description>
+</property>
+
+<property>
+  <name>zeppelin.search.index.rebuild</name>
+  <value>false</value>
+  <description>Whether rebuild index when zeppelin start. If true, it would 
read all notes and rebuild the index, this would consume lots of memory if you 
have large amounts of notes, so by default it is false</description>
+</property>
+
+<property>
+  <name>zeppelin.search.use.disk</name>
+  <value>true</value>
+  <description>Whether using disk for storing search index, if false, memory 
will be used instead.</description>
+</property>
+
+<property>
+  <name>zeppelin.search.index.path</name>
+  <value>/tmp/zeppelin-index</value>
+  <description>path for storing search index on disk.</description>
+</property>
 
 </configuration>
diff --git 
a/zeppelin-interpreter/src/main/java/org/apache/zeppelin/conf/ZeppelinConfiguration.java
 
b/zeppelin-interpreter/src/main/java/org/apache/zeppelin/conf/ZeppelinConfiguration.java
index afeaa4e..a72ace8 100644
--- 
a/zeppelin-interpreter/src/main/java/org/apache/zeppelin/conf/ZeppelinConfiguration.java
+++ 
b/zeppelin-interpreter/src/main/java/org/apache/zeppelin/conf/ZeppelinConfiguration.java
@@ -723,12 +723,16 @@ public class ZeppelinConfiguration extends 
XMLConfiguration {
     return getString(ConfVars.ZEPPELIN_PROXY_PASSWORD);
   }
 
+  public Boolean isIndexRebuild() {
+    return getBoolean(ConfVars.ZEPPELIN_SEARCH_INDEX_REBUILD);
+  }
+
   public Boolean isZeppelinSearchUseDisk() {
     return getBoolean(ConfVars.ZEPPELIN_SEARCH_USE_DISK);
   }
 
-  public String getZeppelinSearchTempPath() {
-    return getRelativeDir(ConfVars.ZEPPELIN_SEARCH_TEMP_PATH);
+  public String getZeppelinSearchIndexPath() {
+    return getRelativeDir(ConfVars.ZEPPELIN_SEARCH_INDEX_PATH);
   }
 
   public String getClusterAddress() {
@@ -987,8 +991,9 @@ public class ZeppelinConfiguration extends XMLConfiguration 
{
     ZEPPELIN_PROXY_URL("zeppelin.proxy.url", null),
     ZEPPELIN_PROXY_USER("zeppelin.proxy.user", null),
     ZEPPELIN_PROXY_PASSWORD("zeppelin.proxy.password", null),
-    ZEPPELIN_SEARCH_USE_DISK("zeppelin.search.use.disk", false),
-    ZEPPELIN_SEARCH_TEMP_PATH("zeppelin.search.temp.path", 
System.getProperty("java.io.tmpdir"));
+    ZEPPELIN_SEARCH_INDEX_REBUILD("zeppelin.search.index.rebuild", false),
+    ZEPPELIN_SEARCH_USE_DISK("zeppelin.search.use.disk", true),
+    ZEPPELIN_SEARCH_INDEX_PATH("zeppelin.search.index.path", 
"/tmp/zeppelin-index");
 
     private String varName;
     @SuppressWarnings("rawtypes")
diff --git 
a/zeppelin-zengine/src/main/java/org/apache/zeppelin/notebook/Notebook.java 
b/zeppelin-zengine/src/main/java/org/apache/zeppelin/notebook/Notebook.java
index 3e478d5..55eedbf 100644
--- a/zeppelin-zengine/src/main/java/org/apache/zeppelin/notebook/Notebook.java
+++ b/zeppelin-zengine/src/main/java/org/apache/zeppelin/notebook/Notebook.java
@@ -100,9 +100,12 @@ public class Notebook {
     this.interpreterSettingManager.setNotebook(this);
     this.noteSearchService = noteSearchService;
     this.credentials = credentials;
-
     this.noteEventListeners.add(this.noteSearchService);
     this.noteEventListeners.add(this.interpreterSettingManager);
+
+    if (conf.isIndexRebuild()) {
+      noteSearchService.startRebuildIndex(getAllNotes());
+    }
   }
 
   @Inject
diff --git 
a/zeppelin-zengine/src/main/java/org/apache/zeppelin/search/LuceneSearch.java 
b/zeppelin-zengine/src/main/java/org/apache/zeppelin/search/LuceneSearch.java
index ebc7ad4..3b59885 100644
--- 
a/zeppelin-zengine/src/main/java/org/apache/zeppelin/search/LuceneSearch.java
+++ 
b/zeppelin-zengine/src/main/java/org/apache/zeppelin/search/LuceneSearch.java
@@ -20,7 +20,6 @@ import com.google.common.base.Joiner;
 import com.google.common.collect.ImmutableMap;
 import com.google.common.collect.Lists;
 import java.io.IOException;
-import java.nio.file.Files;
 import java.nio.file.Path;
 import java.nio.file.Paths;
 import java.util.Collection;
@@ -30,7 +29,7 @@ import java.util.List;
 import java.util.Map;
 import java.util.concurrent.TimeUnit;
 import javax.inject.Inject;
-import org.apache.commons.io.FileUtils;
+
 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.standard.StandardAnalyzer;
@@ -57,7 +56,7 @@ import org.apache.lucene.search.highlight.SimpleHTMLFormatter;
 import org.apache.lucene.search.highlight.TextFragment;
 import org.apache.lucene.search.highlight.TokenSources;
 import org.apache.lucene.store.Directory;
-import org.apache.lucene.store.MMapDirectory;
+import org.apache.lucene.store.FSDirectory;
 import org.apache.lucene.store.RAMDirectory;
 import org.apache.zeppelin.conf.ZeppelinConfiguration;
 import org.apache.zeppelin.notebook.Note;
@@ -78,34 +77,32 @@ public class LuceneSearch extends SearchService {
   private static final String PARAGRAPH = "paragraph";
   private static final String ID_FIELD = "id";
 
-  private final ZeppelinConfiguration zeppelinConfiguration;
-  private Directory directory;
-  private Path directoryPath;
+  private Path indexPath;
+  private Directory indexDirectory;
   private Analyzer analyzer;
   private IndexWriterConfig indexWriterConfig;
   private IndexWriter indexWriter;
 
   @Inject
-  public LuceneSearch(ZeppelinConfiguration zeppelinConfiguration) {
+  public LuceneSearch(ZeppelinConfiguration conf) {
     super("LuceneSearch-Thread");
-    this.zeppelinConfiguration = zeppelinConfiguration;
-    if (zeppelinConfiguration.isZeppelinSearchUseDisk()) {
+
+    if (conf.isZeppelinSearchUseDisk()) {
       try {
-        this.directoryPath =
-            Files.createTempDirectory(
-                Paths.get(zeppelinConfiguration.getZeppelinSearchTempPath()), 
"zeppelin-search-");
-        this.directory = new MMapDirectory(directoryPath);
+        this.indexPath = Paths.get(conf.getZeppelinSearchIndexPath());
+        this.indexDirectory = FSDirectory.open(indexPath);
+        logger.info("Use {} for storing lucene search index", this.indexPath);
       } catch (IOException e) {
         throw new RuntimeException(
-            "Failed to create temporary directory for search service. Use 
memory instead", e);
+            "Failed to create index directory for search service. Use memory 
instead", e);
       }
     } else {
-      this.directory = new RAMDirectory();
+      this.indexDirectory = new RAMDirectory();
     }
     this.analyzer = new StandardAnalyzer();
     this.indexWriterConfig = new IndexWriterConfig(analyzer);
     try {
-      this.indexWriter = new IndexWriter(directory, indexWriterConfig);
+      this.indexWriter = new IndexWriter(indexDirectory, indexWriterConfig);
     } catch (IOException e) {
       logger.error("Failed to create new IndexWriter", e);
     }
@@ -116,12 +113,12 @@ public class LuceneSearch extends SearchService {
    */
   @Override
   public List<Map<String, String>> query(String queryStr) {
-    if (null == directory) {
+    if (null == indexDirectory) {
       throw new IllegalStateException(
           "Something went wrong on instance creation time, index dir is null");
     }
     List<Map<String, String>> result = Collections.emptyList();
-    try (IndexReader indexReader = DirectoryReader.open(directory)) {
+    try (IndexReader indexReader = DirectoryReader.open(indexDirectory)) {
       IndexSearcher indexSearcher = new IndexSearcher(indexReader);
       Analyzer analyzer = new StandardAnalyzer();
       MultiFieldQueryParser parser =
@@ -135,7 +132,7 @@ public class LuceneSearch extends SearchService {
 
       result = doSearch(indexSearcher, query, analyzer, highlighter);
     } catch (IOException e) {
-      logger.error("Failed to open index dir {}, make sure indexing finished 
OK", directory, e);
+      logger.error("Failed to open index dir {}, make sure indexing finished 
OK", indexDirectory, e);
     } catch (ParseException e) {
       logger.error("Failed to parse query " + queryStr, e);
     }
@@ -396,9 +393,6 @@ public class LuceneSearch extends SearchService {
   public void close() {
     try {
       indexWriter.close();
-      if (zeppelinConfiguration.isZeppelinNotebookCronEnable() && null != 
directoryPath) {
-        FileUtils.deleteDirectory(directoryPath.toFile());
-      }
     } catch (IOException e) {
       logger.error("Failed to .close() the notebook index", e);
     }
@@ -425,4 +419,17 @@ public class LuceneSearch extends SearchService {
     Document doc = newDocument(id, noteName, p);
     w.addDocument(doc);
   }
+
+  @Override
+  public void startRebuildIndex(List<Note> notes) {
+    Thread thread = new Thread(() -> {
+      logger.info("Starting rebuild index");
+      for (Note note:  notes) {
+        addIndexDoc(note);
+      }
+      logger.info("Finish rebuild index");
+    });
+    thread.setName("LuceneSearch-RebuildIndex-Thread");
+    thread.start();
+  }
 }
diff --git 
a/zeppelin-zengine/src/main/java/org/apache/zeppelin/search/SearchService.java 
b/zeppelin-zengine/src/main/java/org/apache/zeppelin/search/SearchService.java
index 53d5e92..caa1bc2 100644
--- 
a/zeppelin-zengine/src/main/java/org/apache/zeppelin/search/SearchService.java
+++ 
b/zeppelin-zengine/src/main/java/org/apache/zeppelin/search/SearchService.java
@@ -133,4 +133,6 @@ public abstract class SearchService extends 
NoteEventAsyncListener {
       e.printStackTrace();
     }
   }
+
+  public abstract void startRebuildIndex(List<Note> notes);
 }
diff --git 
a/zeppelin-zengine/src/test/java/org/apache/zeppelin/search/LuceneSearchTest.java
 
b/zeppelin-zengine/src/test/java/org/apache/zeppelin/search/LuceneSearchTest.java
index 1e65115..7857021 100644
--- 
a/zeppelin-zengine/src/test/java/org/apache/zeppelin/search/LuceneSearchTest.java
+++ 
b/zeppelin-zengine/src/test/java/org/apache/zeppelin/search/LuceneSearchTest.java
@@ -22,9 +22,13 @@ import static org.mockito.Mockito.mock;
 import static org.mockito.Mockito.when;
 
 import com.google.common.base.Splitter;
+
+import java.io.File;
 import java.io.IOException;
 import java.util.List;
 import java.util.Map;
+
+import com.google.common.io.Files;
 import org.apache.zeppelin.conf.ZeppelinConfiguration;
 import org.apache.zeppelin.interpreter.InterpreterFactory;
 import org.apache.zeppelin.interpreter.InterpreterSetting;
@@ -40,17 +44,18 @@ import org.apache.zeppelin.user.Credentials;
 import org.junit.After;
 import org.junit.Before;
 import org.junit.Test;
-import org.quartz.SchedulerException;
 
 public class LuceneSearchTest {
 
   private Notebook notebook;
   private InterpreterSettingManager interpreterSettingManager;
-  private SearchService noteSearchService;
-
+  private LuceneSearch noteSearchService;
+  private File indexDir;
 
   @Before
-  public void startUp() throws IOException, SchedulerException {
+  public void startUp() throws IOException {
+    indexDir = Files.createTempDir().getAbsoluteFile();
+    
System.setProperty(ZeppelinConfiguration.ConfVars.ZEPPELIN_SEARCH_INDEX_PATH.getVarName(),
 indexDir.getAbsolutePath());
     noteSearchService = new LuceneSearch(ZeppelinConfiguration.create());
     interpreterSettingManager = mock(InterpreterSettingManager.class);
     InterpreterSetting defaultInterpreterSetting = 
mock(InterpreterSetting.class);
@@ -65,6 +70,7 @@ public class LuceneSearchTest {
   @After
   public void shutDown() {
     noteSearchService.close();
+    indexDir.delete();
   }
 
 //  @Test
@@ -122,7 +128,7 @@ public class LuceneSearchTest {
     assertThat(TitleHits).isAtLeast(1);
   }
 
-  @Test
+  //@Test
   public void indexKeyContract() throws IOException {
     // give
     Note note1 = newNoteWithParagraph("Notebook1", "test");

Reply via email to