tags 535162 + patch
thanks

On Sun, Nov 01, 2009 at 12:54:17PM +0100, Norbert Preining wrote:
> On So, 01 Nov 2009, Stefano Zacchiroli wrote:
> > Thanks a lot also for the extra investigation, I'll try to mock up a
> > patch for both issues.
> 
> Would be great, I don't have much time ATM. I moved to Japan this month,
> and TL2009 is costing my last nerves ...

Sorry for the delay, here is a first stab at the patch. The overall
result is shown by the simple test below, deleting 2.3K messages has
been re-indexed in about 90 seconds, whereas it used to take hours.

  z...@usha:~$ mu-index 
  mu-index: scanning /home/zack/Maildir, storing in
  /home/zack/.mu/mu-sqlite.db-0.4
  * counting msgs.......21056 messages found
  * cleaning up database.......2360 messages cleaned up
  / mu-index processing msg 21056 of 21056 (100.0%)
  * message(s) up-to-date : 21011
  * message(s) added      : 45
  * message(s) updated    : 0
  * message(s) cleaned up : 0
  
  * time taken: 91 second(s)

Now for the bad news:

- I haven't actually benchmarked the same mailbox change before and
  after the patch, due to lack of time: help in that respect is
  appreciated

- to obtain the patch, I had to expose transaction related functions in
  the .h file of the xapian storage (in turn, that's because the cleanup
  callback is called outside that module and we really need to start a
  transaction before any callback and stop it at the end of all of
  them). I doubt this is a problem, cause mu is not a library, but it is
  worth to pint this out

- I haven't implemented the other optimizations suggested by Olly, only
  the "big transaction vs tons of small transactions" one

The patch is attached in two forms: debdiff (for the packages) and plain
patch for upstream or similar.

Tests are welcome.

Cheers.

-- 
Stefano Zacchiroli -o- PhD in Computer Science \ PostDoc @ Univ. Paris 7
z...@{upsilon.cc,pps.jussieu.fr,debian.org} -<>- http://upsilon.cc/zack/
Dietro un grande uomo c'è ..|  .  |. Et ne m'en veux pas si je te tutoie
sempre uno zaino ...........| ..: |.... Je dis tu à tous ceux que j'aime
diff -u maildir-utils-0.4/debian/changelog maildir-utils-0.4/debian/changelog
--- maildir-utils-0.4/debian/changelog
+++ maildir-utils-0.4/debian/changelog
@@ -1,3 +1,11 @@
+maildir-utils (0.4-4) UNRELEASED; urgency=low
+
+  [ Stefano Zacchiroli ]
+  * Add new patch xapian-cleanup-transaction to avoid one Xapian
+    transaction per delete message upon cleanup. (Closes: #535162)
+
+ -- Stefano Zacchiroli <z...@debian.org>  Mon, 21 Dec 2009 18:12:57 +0100
+
 maildir-utils (0.4-3) unstable; urgency=low
 
   * start using git: import 0.4-1, 0.4-2, add Vcs-{Git,Web} in debian/control
diff -u maildir-utils-0.4/debian/patches/series maildir-utils-0.4/debian/patches/series
--- maildir-utils-0.4/debian/patches/series
+++ maildir-utils-0.4/debian/patches/series
@@ -4,0 +5 @@
+xapian-cleanup-transaction -p0
only in patch2:
unchanged:
--- maildir-utils-0.4.orig/debian/patches/xapian-cleanup-transaction
+++ maildir-utils-0.4/debian/patches/xapian-cleanup-transaction
@@ -0,0 +1,155 @@
+Index: index/mu-index.c
+===================================================================
+--- index/mu-index.c.orig
++++ index/mu-index.c
+@@ -366,6 +366,7 @@
+ 	cb_data._user_data = user_data;
+ #ifdef MU_HAVE_XAPIAN
+ 	cb_data._xapian    = index->_xapian;
++	mu_storage_xapian_transaction_begin(index->_xapian);
+ #endif /*MU_HAVE_XAPIAN*/
+ 		
+ 	cb_data._stats     = stats;
+@@ -373,6 +374,12 @@
+ 		(index->_sqlite,
+ 		 (MuStorageSQLiteCleanupCallback)sqlite_remove_callback,
+ 		 &cb_data);
++#ifdef MU_HAVE_XAPIAN
++	if (result == MU_OK)
++		mu_storage_xapian_transaction_commit(index->_xapian);
++	else
++		mu_storage_xapian_transaction_rollback(index->_xapian);
++#endif /*MU_HAVE_XAPIAN*/
+ 	
+ 	return result;
+ }
+Index: index/mu-storage-xapian.cc
+===================================================================
+--- index/mu-storage-xapian.cc.orig
++++ index/mu-storage-xapian.cc
+@@ -26,10 +26,6 @@
+ #include "msg/mu-msg-gmime.h"
+ #include "mu-storage-xapian.h"
+ 
+-static bool transaction_begin (MuStorageXapian *storage);
+-static bool transaction_commit (MuStorageXapian *storage);
+-static bool transaction_rollback (MuStorageXapian *storage);
+-
+ struct _MuStorageXapian {
+ 	Xapian::WritableDatabase *_db;
+ 
+@@ -97,7 +93,7 @@
+ 
+ 	try { 	
+ 		if (storage->_in_transaction)
+-			transaction_commit (storage);
++			mu_storage_xapian_transaction_commit (storage);
+ 
+ 		delete storage->_db;
+ 		g_free (storage);
+@@ -113,8 +109,8 @@
+ 	}
+ }
+ 
+-static bool
+-transaction_begin (MuStorageXapian *storage)
++int
++mu_storage_xapian_transaction_begin (MuStorageXapian *storage)
+ {
+ 	if (storage->_in_transaction) {
+ 		g_warning ("%s: already in a transaction", __FUNCTION__);
+@@ -141,8 +137,8 @@
+ }
+ 
+ 
+-static bool
+-transaction_commit (MuStorageXapian *storage)
++int
++mu_storage_xapian_transaction_commit (MuStorageXapian *storage)
+ {
+ 	if (!storage->_in_transaction) {
+ 		g_warning ("%s: not in a tranction", __FUNCTION__);
+@@ -169,8 +165,8 @@
+ }
+ 
+ 
+-static bool
+-transaction_rollback (MuStorageXapian *storage)
++int
++mu_storage_xapian_transaction_rollback (MuStorageXapian *storage)
+ {
+ 	if (!storage->_in_transaction) {
+ 		g_warning ("%s: not in a tranction", __FUNCTION__);
+@@ -224,7 +220,8 @@
+ 		Xapian::TermGenerator termgen;
+ 
+ 		/* start transaction if needed */
+-		if (!storage->_in_transaction && !transaction_begin (storage)) {
++		if (!storage->_in_transaction &&
++		    !mu_storage_xapian_transaction_begin (storage)) {
+ 			g_warning ("%s: failed to start transaction", __FUNCTION__);
+ 			return MU_ERROR;
+ 		}
+@@ -253,14 +250,14 @@
+ 		if (id == 0) {
+ 			g_warning ("%s: failed to store document data", 
+ 				   __FUNCTION__);
+-			transaction_rollback (storage);
++			mu_storage_xapian_transaction_rollback (storage);
+ 			return MU_OK;
+ 		}
+ 		g_debug ("%s: stored document with id %u",__FUNCTION__, 
+ 			 (unsigned int)id);
+ 		
+ 		if ((++storage->_processed % storage->_transaction_size) == 0)
+-			if (!transaction_commit (storage)) {
++			if (!mu_storage_xapian_transaction_commit (storage)) {
+ 				g_warning ("%s: commit failed", __FUNCTION__);
+ 				return MU_ERROR;
+ 			}
+@@ -298,14 +295,12 @@
+ 		enq.set_query(q);
+ 		matches = enq.get_mset(0, 1); 
+ 		
+-		transaction_begin (storage);
+ 		for (Xapian::MSet::const_iterator ci = matches.begin();
+ 		     ci != matches.end(); ++ci) {
+ 			Xapian::docid id (ci.get_document().get_docid());
+ 			g_message ("xapian: deleting document %u", (guint)id);
+ 			storage->_db->delete_document(id);
+ 		}		
+-		transaction_commit (storage);
+ 		return MU_OK;
+ 
+ 	} catch (const Xapian::Error &err) {
+@@ -313,16 +308,10 @@
+ 			   __FUNCTION__, err.get_msg().c_str(), 
+ 			   err.get_error_string());
+ 		
+-		if (storage->_in_transaction)
+-			transaction_rollback (storage);
+-
+ 		return MU_ERROR;
+ 	} catch (...) {
+ 		g_warning ("%s: caught exception", __FUNCTION__);
+ 		
+-		if (storage->_in_transaction)
+-			transaction_rollback (storage);
+-
+ 		return MU_ERROR;
+ 	}
+ 
+Index: index/mu-storage-xapian.h
+===================================================================
+--- index/mu-storage-xapian.h.orig
++++ index/mu-storage-xapian.h
+@@ -49,6 +49,9 @@
+ 					     MuMsgGMime *msg);
+ MuResult          mu_storage_xapian_cleanup (MuStorageXapian *storage, 
+ 					     const char* msgpath);
++int               mu_storage_xapian_transaction_begin(MuStorageXapian *storage);
++int               mu_storage_xapian_transaction_commit(MuStorageXapian *storage);
++int               mu_storage_xapian_transaction_rollback(MuStorageXapian *storage);
+ G_END_DECLS
+ 
+ #endif /*__MU_STORAGE_XAPIAN_H__*/
Index: index/mu-index.c
===================================================================
--- index/mu-index.c.orig
+++ index/mu-index.c
@@ -366,6 +366,7 @@
        cb_data._user_data = user_data;
 #ifdef MU_HAVE_XAPIAN
        cb_data._xapian    = index->_xapian;
+       mu_storage_xapian_transaction_begin(index->_xapian);
 #endif /*MU_HAVE_XAPIAN*/
                
        cb_data._stats     = stats;
@@ -373,6 +374,12 @@
                (index->_sqlite,
                 (MuStorageSQLiteCleanupCallback)sqlite_remove_callback,
                 &cb_data);
+#ifdef MU_HAVE_XAPIAN
+       if (result == MU_OK)
+               mu_storage_xapian_transaction_commit(index->_xapian);
+       else
+               mu_storage_xapian_transaction_rollback(index->_xapian);
+#endif /*MU_HAVE_XAPIAN*/
        
        return result;
 }
Index: index/mu-storage-xapian.cc
===================================================================
--- index/mu-storage-xapian.cc.orig
+++ index/mu-storage-xapian.cc
@@ -26,10 +26,6 @@
 #include "msg/mu-msg-gmime.h"
 #include "mu-storage-xapian.h"
 
-static bool transaction_begin (MuStorageXapian *storage);
-static bool transaction_commit (MuStorageXapian *storage);
-static bool transaction_rollback (MuStorageXapian *storage);
-
 struct _MuStorageXapian {
        Xapian::WritableDatabase *_db;
 
@@ -97,7 +93,7 @@
 
        try {   
                if (storage->_in_transaction)
-                       transaction_commit (storage);
+                       mu_storage_xapian_transaction_commit (storage);
 
                delete storage->_db;
                g_free (storage);
@@ -113,8 +109,8 @@
        }
 }
 
-static bool
-transaction_begin (MuStorageXapian *storage)
+int
+mu_storage_xapian_transaction_begin (MuStorageXapian *storage)
 {
        if (storage->_in_transaction) {
                g_warning ("%s: already in a transaction", __FUNCTION__);
@@ -141,8 +137,8 @@
 }
 
 
-static bool
-transaction_commit (MuStorageXapian *storage)
+int
+mu_storage_xapian_transaction_commit (MuStorageXapian *storage)
 {
        if (!storage->_in_transaction) {
                g_warning ("%s: not in a tranction", __FUNCTION__);
@@ -169,8 +165,8 @@
 }
 
 
-static bool
-transaction_rollback (MuStorageXapian *storage)
+int
+mu_storage_xapian_transaction_rollback (MuStorageXapian *storage)
 {
        if (!storage->_in_transaction) {
                g_warning ("%s: not in a tranction", __FUNCTION__);
@@ -224,7 +220,8 @@
                Xapian::TermGenerator termgen;
 
                /* start transaction if needed */
-               if (!storage->_in_transaction && !transaction_begin (storage)) {
+               if (!storage->_in_transaction &&
+                   !mu_storage_xapian_transaction_begin (storage)) {
                        g_warning ("%s: failed to start transaction", 
__FUNCTION__);
                        return MU_ERROR;
                }
@@ -253,14 +250,14 @@
                if (id == 0) {
                        g_warning ("%s: failed to store document data", 
                                   __FUNCTION__);
-                       transaction_rollback (storage);
+                       mu_storage_xapian_transaction_rollback (storage);
                        return MU_OK;
                }
                g_debug ("%s: stored document with id %u",__FUNCTION__, 
                         (unsigned int)id);
                
                if ((++storage->_processed % storage->_transaction_size) == 0)
-                       if (!transaction_commit (storage)) {
+                       if (!mu_storage_xapian_transaction_commit (storage)) {
                                g_warning ("%s: commit failed", __FUNCTION__);
                                return MU_ERROR;
                        }
@@ -298,14 +295,12 @@
                enq.set_query(q);
                matches = enq.get_mset(0, 1); 
                
-               transaction_begin (storage);
                for (Xapian::MSet::const_iterator ci = matches.begin();
                     ci != matches.end(); ++ci) {
                        Xapian::docid id (ci.get_document().get_docid());
                        g_message ("xapian: deleting document %u", (guint)id);
                        storage->_db->delete_document(id);
                }               
-               transaction_commit (storage);
                return MU_OK;
 
        } catch (const Xapian::Error &err) {
@@ -313,16 +308,10 @@
                           __FUNCTION__, err.get_msg().c_str(), 
                           err.get_error_string());
                
-               if (storage->_in_transaction)
-                       transaction_rollback (storage);
-
                return MU_ERROR;
        } catch (...) {
                g_warning ("%s: caught exception", __FUNCTION__);
                
-               if (storage->_in_transaction)
-                       transaction_rollback (storage);
-
                return MU_ERROR;
        }
 
Index: index/mu-storage-xapian.h
===================================================================
--- index/mu-storage-xapian.h.orig
+++ index/mu-storage-xapian.h
@@ -49,6 +49,9 @@
                                             MuMsgGMime *msg);
 MuResult          mu_storage_xapian_cleanup (MuStorageXapian *storage, 
                                             const char* msgpath);
+int               mu_storage_xapian_transaction_begin(MuStorageXapian 
*storage);
+int               mu_storage_xapian_transaction_commit(MuStorageXapian 
*storage);
+int               mu_storage_xapian_transaction_rollback(MuStorageXapian 
*storage);
 G_END_DECLS
 
 #endif /*__MU_STORAGE_XAPIAN_H__*/

Reply via email to