Could you try building trunk? I believe these issues have been resolved or at least improved. It would be good to see how much.
Sent from my iPhone On Jun 25, 2012, at 8:36 AM, Attila Nagy <b...@fsn.hu> wrote: > Hi, > > (my humble attempts to make some improvement can be read at the end) > > I suffer from the slowness of svn rm since the upgrade to 1.7 from 1.6, but I > couldn't find the time to profile it until now. > My setup is: FreeBSD 9-STABLE/amd64 with zfs, eight fast cores, SAN, 32 GiB > of RAM. > Versions: > svn, version 1.7.5 (r1336830) > sqlite: 3.7.12.1 2012-05-22 02:45:53 6d326d44fd1d626aae0e8456e5fa2049f1ce0789 > > The working copy currently consists of 1992210 entries (files and > directories), the wc.db database is 1.5 GiB (which can easily fit into > memory, and pre-heated, so sqlite won't have to read a bit from the disks). > > Deleting a single file and committing the change take 25.43 seconds combined: > # time svn rm test > /tmp/svn-rm > 4.825u 6.640s 0:11.47 99.9% 223+2544k 4+95io 0pf+0w > # time svn commit -m 't' test > /tmp/svn-commit > 5.380u 6.167s 0:13.96 82.6% 223+2544k 6+143io 0pf+0w > > I've modified subversion/libsvn_subr/sqlite.c in svn_sqlite__step() to > measure time for each SQL operations and print it along with the original > statement: > svn_error_t * > svn_sqlite__step(svn_boolean_t *got_row, svn_sqlite__stmt_t *stmt) > { > clock_t start = clock(); > long i; > int sqlite_result = sqlite3_step(stmt->s3stmt); > printf ( "%f, %s\n", ( (double)clock() - start ) / > CLOCKS_PER_SEC,sqlite3_sql(stmt->s3stmt) ); > > Here are the numerically sorted top lines from svn-rm and svn-commit (I've > included the last zero-time operations, so every more than zero lines can be > observed): > rm: > 0.000000, SELECT root, uuid FROM repository WHERE id = ?1 > 0.007812, DELETE FROM actual_node WHERE wc_id = ?1 AND (?2 = '' OR > local_relpath = ?2 OR ((local_relpath) > (?2) || '/' AND > (local_relpath) < (?2) || '0') ) AND (changelist IS NULL OR NOT > EXISTS (SELECT 1 FROM nodes_current c WHERE c.wc_id = > ?1 AND c.local_relpath = actual_node.local_relpath > AND c.kind = 'file')) > 0.007812, UPDATE actual_node SET properties = NULL, text_mod = NULL, > tree_conflict_data = NULL, conflict_old = NULL, conflict_new = NULL, > conflict_working = NULL, prop_reject = NULL, older_checksum = NULL, > left_checksum = NULL, right_checksum = NULL WHERE wc_id = ?1 AND (?2 = '' > OR local_relpath = ?2 OR ((local_relpath) > (?2) || '/' AND > (local_relpath) < (?2) || '0') ) > 1.882812, DELETE FROM nodes WHERE wc_id = ?1 AND (?2 = '' OR local_relpath > = ?2 OR ((local_relpath) > (?2) || '/' AND (local_relpath) < (?2) || > '0') ) AND op_depth >= ?3 > 9.539062, SELECT local_relpath FROM nodes WHERE wc_id = ?1 AND (?2 = '' > OR local_relpath = ?2 OR ((local_relpath) > (?2) || '/' AND > (local_relpath) < (?2) || '0') ) AND op_depth = 0 AND presence = 'absent' > LIMIT 1 > commit: > 0.000000, SELECT root, uuid FROM repository WHERE id = ?1 > 1.171875, SELECT nodes.repos_id, nodes.repos_path, lock_token FROM nodes LEFT > JOIN lock ON nodes.repos_id = lock.repos_id AND nodes.repos_path = > lock.repos_relpath WHERE wc_id = ?1 AND op_depth = 0 AND (?2 = '' OR > local_relpath = ?2 OR ((local_relpath) > (?2) || '/' AND > (local_relpath) < (?2) || '0') ) > 2.007812, DELETE FROM nodes WHERE wc_id = ?1 AND (?2 = '' OR local_relpath > = ?2 OR ((local_relpath) > (?2) || '/' AND (local_relpath) < (?2) || > '0') ) AND op_depth >= ?3 > 8.320312, SELECT nodes.repos_id, nodes.repos_path, lock_token FROM nodes LEFT > JOIN lock ON nodes.repos_id = lock.repos_id AND nodes.repos_path = > lock.repos_relpath WHERE wc_id = ?1 AND op_depth = 0 AND (?2 = '' OR > local_relpath = ?2 OR ((local_relpath) > (?2) || '/' AND > (local_relpath) < (?2) || '0') ) > > It can be seen that in case of delete, two statements, in case of commit, > three statements take most of the time. > > For the curious, I've repeated the above test with 100 files to see how > multiple files affect this (in my application I delete 1000s of files > regularly in one svn rm "transaction", this often takes days, literally). > > So deleting 100 files takes 1353.219 seconds (so it has some benefits > compared to deleting the files one by one): > # time svn rm test* > /tmp/svn-rm > 508.826u 692.233s 20:01.47 99.9% 223+2544k 7+8720io 0pf+0w > # time svn commit -m 't' > /tmp/svn-commit > 523.866u 660.986s 19:47.65 99.7% 223+2544k 6+6574io 0pf+0w > > and still printing statements, which has a bigger aggregated execution time > than 0: > rm: > 0.000000, SELECT root, uuid FROM repository WHERE id = ?1 > 0.007812, DELETE FROM wc_lock WHERE wc_id = ?1 AND (?2 = '' OR > local_dir_relpath = ?2 OR ((local_dir_relpath) > (?2) || '/' AND > (local_dir_relpath) < (?2) || '0') ) AND NOT EXISTS (SELECT 1 FROM nodes > WHERE nodes.wc_id = ?1 AND > nodes.local_relpath = wc_lock.local_dir_relpath) > 0.007812, SELECT 1 FROM nodes WHERE wc_id = ?1 AND local_relpath = ?2 LIMIT 1 > 0.007812, SELECT id, work FROM work_queue ORDER BY id LIMIT 1 > 0.007812, SELECT local_dir_relpath FROM wc_lock WHERE wc_id = ?1 AND > local_dir_relpath LIKE ?2 ESCAPE '#' > 0.007812, SELECT prop_reject, changelist, conflict_old, conflict_new, > conflict_working, tree_conflict_data, properties FROM actual_node WHERE wc_id > = ?1 AND local_relpath = ?2 > 0.015624, SELECT local_relpath, def_local_relpath FROM externals WHERE wc_id > = ?1 AND (?2 = '' OR def_local_relpath = ?2 OR > ((def_local_relpath) > (?2) || '/' AND (def_local_relpath) < (?2) || '0') ) > 0.015624, SELECT op_depth, nodes.repos_id, nodes.repos_path, presence, kind, > revision, checksum, translated_size, changed_revision, changed_date, > changed_author, depth, symlink_target, last_mod_time, properties, > lock_token, lock_owner, lock_comment, lock_date FROM nodes LEFT OUTER JOIN > lock ON nodes.repos_id = lock.repos_id AND nodes.repos_path = > lock.repos_relpath WHERE wc_id = ?1 AND local_relpath = ?2 ORDER BY op_depth > DESC > 0.015624, SELECT presence, kind, def_local_relpath, repos_id, > def_repos_relpath, def_operational_revision, def_revision, presence FROM > externals WHERE wc_id = ?1 AND local_relpath = ?2 LIMIT 1 > 0.023436, INSERT INTO delete_list(local_relpath) SELECT local_relpath FROM > nodes n WHERE wc_id = ?1 AND (local_relpath = ?2 OR ((local_relpath) > > (?2) || '/' AND (local_relpath) < (?2) || '0') ) AND op_depth >= ?3 AND > presence NOT IN ('base-deleted', 'not-present', 'excluded', 'absent') AND > op_depth = (SELECT MAX(op_depth) FROM nodes s WHERE s.wc_id = n.wc_id > AND s.local_relpath = n.local_relpath) > 0.031248, SELECT local_dir_relpath, locked_levels FROM wc_lock WHERE wc_id = > ?1 AND ((local_dir_relpath <= ?2 AND local_dir_relpath >= ?3) OR > local_dir_relpath = '') ORDER BY local_dir_relpath DESC > 0.03906, INSERT INTO nodes ( wc_id, local_relpath, op_depth, > parent_relpath, presence, kind) SELECT wc_id, local_relpath, ?4 , > parent_relpath, 'base-deleted', kind FROM nodes WHERE wc_id = ?1 AND > (local_relpath = ?2 OR ((local_relpath) > (?2) || '/' AND > (local_relpath) < (?2) || '0') ) AND op_depth = ?3 AND presence NOT IN > ('base-deleted', 'not-present', 'excluded', 'absent') > 0.03906, INSERT INTO wc_lock (wc_id, local_dir_relpath, locked_levels) VALUES > (?1, ?2, ?3) > 0.046872, SELECT op_depth, repos_id, repos_path, presence, kind, revision, > checksum, translated_size, changed_revision, changed_date, changed_author, > depth, symlink_target, last_mod_time, properties FROM nodes WHERE wc_id = > ?1 AND local_relpath = ?2 ORDER BY op_depth DESC > 0.062496, DELETE FROM wc_lock WHERE wc_id = ?1 AND local_dir_relpath = ?2 > 0.398413, UPDATE actual_node SET properties = NULL, text_mod = NULL, > tree_conflict_data = NULL, conflict_old = NULL, conflict_new = NULL, > conflict_working = NULL, prop_reject = NULL, older_checksum = NULL, > left_checksum = NULL, right_checksum = NULL WHERE wc_id = ?1 AND (?2 = '' > OR local_relpath = ?2 OR ((local_relpath) > (?2) || '/' AND > (local_relpath) < (?2) || '0') ) > 1.031218, DELETE FROM actual_node WHERE wc_id = ?1 AND (?2 = '' OR > local_relpath = ?2 OR ((local_relpath) > (?2) || '/' AND > (local_relpath) < (?2) || '0') ) AND (changelist IS NULL OR NOT > EXISTS (SELECT 1 FROM nodes_current c WHERE c.wc_id = > ?1 AND c.local_relpath = actual_node.local_relpath > AND c.kind = 'file')) > 205.515629, DELETE FROM nodes WHERE wc_id = ?1 AND (?2 = '' OR > local_relpath = ?2 OR ((local_relpath) > (?2) || '/' AND > (local_relpath) < (?2) || '0') ) AND op_depth >= ?3 > 993.296875, SELECT local_relpath FROM nodes WHERE wc_id = ?1 AND (?2 = '' > OR local_relpath = ?2 OR ((local_relpath) > (?2) || '/' AND > (local_relpath) < (?2) || '0') ) AND op_depth = 0 AND presence = 'absent' > LIMIT 1 > commit: > 0.000000, SELECT root, uuid FROM repository WHERE id = ?1 > 0.007812, DELETE FROM nodes WHERE wc_id = ?1 AND local_relpath = ?2 AND > op_depth = (SELECT MIN(op_depth) FROM nodes WHERE wc_id = > ?1 AND local_relpath = ?2 AND op_depth > 0) AND presence = 'base-deleted' > 0.007812, DELETE FROM wc_lock WHERE wc_id = ?1 AND local_dir_relpath = ?2 > 0.015624, INSERT OR REPLACE INTO nodes ( wc_id, local_relpath, op_depth, > parent_relpath, repos_id, repos_path, revision, presence, depth, kind, > changed_revision, changed_date, changed_author, checksum, properties, > translated_size, last_mod_time, dav_cache, symlink_target, file_external) > VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8, ?9, ?10, ?11, ?12, ?13, ?14, > ?15, ?16, ?17, ?18, ?19, ?20) > 0.015624, SELECT prop_reject, changelist, conflict_old, conflict_new, > conflict_working, tree_conflict_data, properties FROM actual_node WHERE wc_id > = ?1 AND local_relpath = ?2 > 0.015624, SELECT repos_id, repos_path, presence, kind, revision, checksum, > translated_size, changed_revision, changed_date, changed_author, depth, > symlink_target, last_mod_time, properties, file_external IS NOT NULL FROM > nodes WHERE wc_id = ?1 AND local_relpath = ?2 AND op_depth = 0 > 0.023436, SELECT op_depth, repos_id, repos_path, presence, kind, revision, > checksum, translated_size, changed_revision, changed_date, changed_author, > depth, symlink_target, last_mod_time, properties FROM nodes WHERE wc_id = > ?1 AND local_relpath = ?2 ORDER BY op_depth DESC > 0.031248, SELECT op_depth, nodes.repos_id, nodes.repos_path, presence, kind, > revision, checksum, translated_size, changed_revision, changed_date, > changed_author, depth, symlink_target, last_mod_time, properties, > lock_token, lock_owner, lock_comment, lock_date FROM nodes LEFT OUTER JOIN > lock ON nodes.repos_id = lock.repos_id AND nodes.repos_path = > lock.repos_relpath WHERE wc_id = ?1 AND local_relpath = ?2 ORDER BY op_depth > DESC > 0.539034, DELETE FROM actual_node WHERE wc_id = ?1 AND (?2 = '' OR > local_relpath = ?2 OR ((local_relpath) > (?2) || '/' AND > (local_relpath) < (?2) || '0') ) > 205.703121, DELETE FROM nodes WHERE wc_id = ?1 AND (?2 = '' OR > local_relpath = ?2 OR ((local_relpath) > (?2) || '/' AND > (local_relpath) < (?2) || '0') ) AND op_depth >= ?3 > 978.265612, SELECT nodes.repos_id, nodes.repos_path, lock_token FROM nodes > LEFT JOIN lock ON nodes.repos_id = lock.repos_id AND nodes.repos_path = > lock.repos_relpath WHERE wc_id = ?1 AND op_depth = 0 AND (?2 = '' OR > local_relpath = ?2 OR ((local_relpath) > (?2) || '/' AND > (local_relpath) < (?2) || '0') ) > > Looking at these, it seems some indexes are missing. > Particularly: > nodes has an index of: CREATE INDEX I_NODES_PARENT ON NODES (wc_id, > parent_relpath, op_depth); > but the long running statement query the local_relpath instead of the parent > one. > Placing the following index on it makes svn rm run in 3.43 seconds instead of > 11.47 and commit in 4.29 seconds instead of 13.96 (deleting only one file): > sqlite> CREATE INDEX I_NODES_LOCAL ON NODES (wc_id, local_relpath, op_depth); > wc.db's size was increased to 1.6GiB (not much for the speedup). > There is another similar query, where presence is used, so I've placed > another one: > sqlite> CREATE INDEX I_NODES_LOCAL_PRESENCE ON NODES (wc_id, local_relpath, > op_depth, presence); > wc.db increased to 1.7 GiB > > With these, removing 100 files took 4:46.21 seconds (was 20:01.47) and > committing 6:00.25 (was 19:47.65). Still sucks, but much better than before. > Any ideas about how could it be made even faster without ditching sqlite? > Deleting 100 files and committing the change shouldn't take 40 (10, with the > above indexes) minutes, even with 2 million files... I guess.