Author: julianfoad
Date: Thu Mar 4 12:05:28 2021
New Revision: 1887170
URL: http://svn.apache.org/viewvc?rev=1887170&view=rev
Log:
Update 'fsfsfixer' to work with packed repositories.
* contrib/server-side/fsfsfixer/fixer/find_good_id.py
(usage): Update to match long-ago changes.
(Repository): New class, based on one found in
'tools/dev/benchmarks/RepoPerf/copy_repo.py'.
(rev_file_path): Return the path of the rev pack file instead of a single
rev file if the rev packed. Adjust to be a method of Repository class.
(rev_file_indexes): Adjust to be a method of Repository class.
(find_good_id
find_good_rep_header,
main script): Adjust to use Repository class.
* contrib/server-side/fsfsfixer/fixer/fixer_config.py
(REVS_PER_SHARD): Remove, as it is now determined automatically.
* contrib/server-side/fsfsfixer/fixer/fix-rev.py
Adjust everywhere to use Repository class.
* contrib/server-side/fsfsfixer/README
Remove a poor suggested work-around to use another script to unpack the
repo. (That script does not support packed repositories either and was
never declared ready for use.)
Modified:
subversion/trunk/contrib/server-side/fsfsfixer/README
subversion/trunk/contrib/server-side/fsfsfixer/fixer/find_good_id.py
subversion/trunk/contrib/server-side/fsfsfixer/fixer/fix-rev.py
subversion/trunk/contrib/server-side/fsfsfixer/fixer/fixer_config.py
Modified: subversion/trunk/contrib/server-side/fsfsfixer/README
URL:
http://svn.apache.org/viewvc/subversion/trunk/contrib/server-side/fsfsfixer/README?rev=1887170&r1=1887169&r2=1887170&view=diff
==============================================================================
--- subversion/trunk/contrib/server-side/fsfsfixer/README (original)
+++ subversion/trunk/contrib/server-side/fsfsfixer/README Thu Mar 4 12:05:28
2021
@@ -25,6 +25,3 @@ Backup your repository before running th
For more details, see the email from Julian Foad on 2010-10-06, subject
"Fixing FSFS 'Corrupt node-revision' and 'Corrupt representation' errors",
<http://svn.haxx.se/dev/archive-2010-10/0095.shtml>.
-
-This script does not support fixing revisions that had been packed. Consider
-using ../../../tools/server-side/fsfs-reshard.py first.
Modified: subversion/trunk/contrib/server-side/fsfsfixer/fixer/find_good_id.py
URL:
http://svn.apache.org/viewvc/subversion/trunk/contrib/server-side/fsfsfixer/fixer/find_good_id.py?rev=1887170&r1=1887169&r2=1887170&view=diff
==============================================================================
--- subversion/trunk/contrib/server-side/fsfsfixer/fixer/find_good_id.py
(original)
+++ subversion/trunk/contrib/server-side/fsfsfixer/fixer/find_good_id.py Thu
Mar 4 12:05:28 2021
@@ -1,9 +1,12 @@
#!/usr/bin/env python
usage = """
-Print the correct FSFS node-rev id, given one that is correct except for
-its byte-offset part.
-Usage: $0 REPO-DIR FSFS-ID-WITH-BAD-OFFSET
+Usage:
+ $0 REPO-DIR FSFS-ID-WITH-BAD-OFFSET
+ -- Find the correct FSFS node-rev id, given one that is correct except for
+ its byte-offset part.
+ $0 REPO-DIR REV SIZE
+ -- Find a rep header that matches REV and SIZE.
Example:
Result of running 'svnadmin verify':
svnadmin: Corrupt node-revision '5-12302.1-12953.r12953/29475'
@@ -29,42 +32,107 @@ def parse_id(id):
noderev = node_id + '.' + copy_id + '.r' + rev
return noderev, rev, offset
-def rev_file_path(repo_dir, rev):
- """Return the path to the revision file in the repository at REPO_DIR
- (a path string) for revision number REV (int or string).
+class Repository:
+ """Encapsulates key information of a repository:
+ its NAME, PATH, SHARD_SIZE, HEAD revision and MIN_UNPACKED_REV.
"""
- if REVS_PER_SHARD:
- shard = int(rev) / REVS_PER_SHARD
- path = os.path.join(repo_dir, 'db', 'revs', str(shard), str(rev))
- else:
- path = os.path.join(repo_dir, 'db', 'revs', str(rev))
- return path
-
-def rev_file_indexes(repo_dir, rev):
- """Return (ids, texts), where IDS is a dictionary of all node-rev ids
- defined in revision REV of the repo at REPO_DIR, in the form
- {noderev: full_id}, and TEXTS is an array of
- (offset, size, expanded-size, csum [,sha1-csum, uniquifier]) tuples
- taken from all the "text: REV ..." representation lines
- in revision REV.
- Here, NODEREV is the node-revision id minus the /offset part, and
- FULL_ID is the full node-revision id (including the /offset part).
- """
- ids = {}
- texts = []
- for line in open(rev_file_path(repo_dir, rev)):
- if line.startswith('id: '):
- id = line.replace('id: ', '').rstrip()
- id_noderev, id_rev, _ = parse_id(id)
- assert id_rev == rev
- ids[id_noderev] = id
- if line.startswith('text: ' + rev + ' '): # also 'props:' lines?
- fields = line.split()
- texts.append(tuple(fields[2:]))
- return ids, texts
+ def _read_repo_file(self, filename):
+ """Read and return all lines from FILENAME in REPO.
+ """
+
+ f = open(os.path.join(self.path, filename), "rb")
+ lines = f.readlines()
+ f.close()
+ return lines
+
+ def _read_config(self, filename):
+ """ Read and return all lines from FILENAME.
+ This will be used to read 'format', 'current' etc. . """
+
+ if filename not in self.db_config:
+ f = open(os.path.join(self.path, 'db', filename), "rb")
+ self.db_config[filename] = f.readlines()
+ f.close()
+
+ return self.db_config[filename]
+
+ def __init__(self, path_or_parent, name=None):
+ """Constructor collecting everything we need to know about
+ the repository at path PATH_OR_PARENT or NAME within PARENT folder.
+ """
+
+ if name is None:
+ self.name = os.path.basename(path_or_parent)
+ self.path = path_or_parent
+ else:
+ self.name = name
+ self.path = os.path.join(path_or_parent, name)
+
+ self.db_config = {}
+ self.repo_format = int(self._read_repo_file('format')[0])
+ self.fs_type = self._read_config('fs-type')[0].rstrip()
+ self.db_format = int(self._read_config('format')[0])
+ try:
+ self.shard_size = int(self._read_config('format')[1].split(' ')[2])
+ except IndexError:
+ self.shard_size = 0
+ if self.db_format >= 4:
+ self.min_unpacked_rev = int(self._read_config('min-unpacked-rev')[0])
+ else:
+ self.min_unpacked_rev = 0
+ self.head = int(self._read_config('current')[0])
+
+ def rev_file_path(self, rev):
+ """Return the path to the revision file in the repository at REPO_DIR
+ (a path string) for revision number REV (int or string).
+ """
+ if isinstance(rev, str):
+ rev = int(rev)
+ if self.shard_size > 0:
+ shard = int(rev) / self.shard_size
+ if rev < self.min_unpacked_rev:
+ path = os.path.join(self.path, 'db', 'revs', str(shard) + '.pack',
'pack')
+ else:
+ path = os.path.join(self.path, 'db', 'revs', str(shard), str(rev))
+ else:
+ path = os.path.join(self.path, 'db', 'revs', str(rev))
+ return path
+
+ def rev_file_indexes(self, rev):
+ """Return (ids, texts), where IDS is a dictionary of all node-rev ids
+ defined in revision REV of the repo at REPO_DIR, in the form
+ {noderev: full_id}, and TEXTS is an array of
+ (offset, size, expanded-size, csum [,sha1-csum, uniquifier]) tuples
+ taken from all the "text: REV ..." representation lines
+ in revision REV.
+
+ Here, NODEREV is the node-revision id minus the /offset part, and
+ FULL_ID is the full node-revision id (including the /offset part).
+ """
+ if isinstance(rev, str):
+ rev = int(rev)
+ ids = {}
+ texts = []
+ for line in open(self.rev_file_path(rev)):
+ if line.startswith('id: '):
+ id = line.replace('id: ', '').rstrip()
+ id_noderev, id_rev, _ = parse_id(id)
+ id_rev = int(id_rev)
+ # all ids in an unpacked rev file should match its rev number
+ if rev >= self.min_unpacked_rev:
+ assert id_rev == rev
+ # in a pre-f7 pack, revs are ordered so after REV we can stop looking
+ if id_rev > rev:
+ break
+ if id_rev == rev:
+ ids[id_noderev] = id
+ elif line.startswith('text: ' + str(rev) + ' '): # also 'props:' lines?
+ fields = line.split()
+ texts.append(tuple(fields[2:]))
+ return ids, texts
-def find_good_id(repo_dir, bad_id):
+def find_good_id(repo, bad_id):
"""Return the node-rev id that is like BAD_ID but has the byte-offset
part corrected, by looking in the revision file in the repository
at REPO_DIR.
@@ -74,17 +142,21 @@ def find_good_id(repo_dir, bad_id):
possibility of a false match.
"""
+ if isinstance(repo, str):
+ repo = Repository(repo)
noderev, rev, bad_offset = parse_id(bad_id)
- ids, _ = rev_file_indexes(repo_dir, rev)
+ ids, _ = repo.rev_file_indexes(rev)
if noderev not in ids:
raise FixError("NodeRev Id '" + noderev + "' not found in r" + rev)
return ids[noderev]
-def find_good_rep_header(repo_dir, rev, size):
+def find_good_rep_header(repo, rev, size):
"""Find a rep header that matches REV and SIZE.
Return the correct offset."""
- _, texts = rev_file_indexes(repo_dir, rev)
+ if isinstance(repo, str):
+ repo = Repository(repo)
+ _, texts = repo.rev_file_indexes(rev)
n_matches = 0
for fields in texts:
if fields[1] == size:
@@ -101,7 +173,8 @@ if __name__ == '__main__':
repo_dir = sys.argv[1]
rev = sys.argv[2]
size = sys.argv[3]
- print("Good offset:", find_good_rep_header(repo_dir, rev, size))
+ repo = Repository(repo_dir)
+ print("Good rep header offset:", find_good_rep_header(repo, rev, size))
sys.exit(0)
if len(sys.argv) != 3:
@@ -111,7 +184,8 @@ if __name__ == '__main__':
repo_dir = sys.argv[1]
bad_id = sys.argv[2]
- good_id = find_good_id(repo_dir, bad_id)
+ repo = Repository(repo_dir)
+ good_id = find_good_id(repo, bad_id)
# Replacement ID must be the same length, otherwise I don't know how to
# reconstruct the file so as to preserve all offsets.
Modified: subversion/trunk/contrib/server-side/fsfsfixer/fixer/fix-rev.py
URL:
http://svn.apache.org/viewvc/subversion/trunk/contrib/server-side/fsfsfixer/fixer/fix-rev.py?rev=1887170&r1=1887169&r2=1887170&view=diff
==============================================================================
--- subversion/trunk/contrib/server-side/fsfsfixer/fixer/fix-rev.py (original)
+++ subversion/trunk/contrib/server-side/fsfsfixer/fixer/fix-rev.py Thu Mar 4
12:05:28 2021
@@ -12,7 +12,7 @@ $LastChangedRevision$
import os, sys, re, subprocess
from subprocess import Popen, PIPE
-from find_good_id import FixError, rev_file_path, find_good_id,
find_good_rep_header
+from find_good_id import FixError, Repository, find_good_id,
find_good_rep_header
from fixer_config import *
@@ -70,11 +70,11 @@ def replace_in_file(filename, old, new):
" with '" + new + "'")
os.remove(filename + '.bak')
-def replace_in_rev_file(repo_dir, rev, old, new):
+def replace_in_rev_file(repo, rev, old, new):
"""Replace all occurrences of the string OLD with the string NEW in the
revision file for revision REV in the repository at REPO_DIR. Raise an
error if nothing changes."""
- rev_file = rev_file_path(repo_dir, rev)
+ rev_file = repo.rev_file_path(rev)
replace_in_file(rev_file, old, new)
# Fix a node-rev ID that has a bad byte-offset part. Look up the correct
@@ -85,10 +85,10 @@ def replace_in_rev_file(repo_dir, rev, o
# since the error reported for <REV> might actually exist in an older
# revision that is referenced by <REV>.
#
-def fix_id(repo_dir, rev, bad_id):
+def fix_id(repo, rev, bad_id):
# Find the GOOD_ID to replace BAD_ID.
- good_id = find_good_id(repo_dir, bad_id)
+ good_id = find_good_id(repo, bad_id)
# Replacement ID must be the same length, otherwise I don't know how to
# reconstruct the file so as to preserve all offsets.
@@ -100,37 +100,37 @@ def fix_id(repo_dir, rev, bad_id):
raise FixError("The ID supplied is already correct: " +
"good id '" + good_id + "'")
- replace_in_rev_file(repo_dir, rev, bad_id, good_id)
+ replace_in_rev_file(repo, rev, bad_id, good_id)
print("Fixed id: " + bad_id + " -> " + good_id)
fixed_ids[bad_id] = good_id
-def fix_checksum(repo_dir, rev, old_checksum, new_checksum):
+def fix_checksum(repo, rev, old_checksum, new_checksum):
"""Change all occurrences of OLD_CHECKSUM to NEW_CHECKSUM in the revision
file for REV in REPO_DIR."""
assert len(old_checksum) and len(new_checksum)
assert old_checksum != new_checksum
- replace_in_rev_file(repo_dir, rev, old_checksum, new_checksum)
+ replace_in_rev_file(repo, rev, old_checksum, new_checksum)
print("Fixed checksum: " + old_checksum + " -> " + new_checksum)
fixed_checksums[old_checksum] = new_checksum
-def fix_rep_ref(repo_dir, rev, prefix, rep_rev, bad_offset, rep_size):
+def fix_rep_ref(repo, rev, prefix, rep_rev, bad_offset, rep_size):
"""Fix a "DELTA <REP_REV> <BAD_OFFSET> <REP_SIZE>"
or "text: <REP_REV> <BAD_OFFSET> <REP_SIZE> ..."
line in the revision file for REV in REPO_DIR, where <BAD_OFFSET> is
wrong. PREFIX is 'DELTA' or 'text:'.
"""
- good_offset = find_good_rep_header(repo_dir, rep_rev, rep_size)
+ good_offset = find_good_rep_header(repo, rep_rev, rep_size)
old_line = ' '.join([prefix, rep_rev, bad_offset, rep_size])
new_line = ' '.join([prefix, rep_rev, good_offset, rep_size])
if good_offset == bad_offset:
raise FixError("Attempting to fix a rep ref that appears to be correct: "
+ old_line)
- replace_in_rev_file(repo_dir, rev, old_line, new_line)
+ replace_in_rev_file(repo, rev, old_line, new_line)
print("Fixed rep ref:", old_line, "->", new_line)
-def handle_one_error(repo_dir, rev, error_lines):
+def handle_one_error(repo, rev, error_lines):
"""If ERROR_LINES describes an error we know how to fix, then fix it.
Return True if fixed, False if not fixed."""
@@ -151,7 +151,7 @@ def handle_one_error(repo_dir, rev, erro
# Fix it.
bad_id = match.group(1)
verbose_print(error_lines[0])
- fix_id(repo_dir, rev, bad_id)
+ fix_id(repo, rev, bad_id)
# Verify again, and expect to discover a checksum mismatch.
# verbose_print("Fixed an ID; now verifying to discover the checksum we
need to update")
@@ -163,7 +163,7 @@ def handle_one_error(repo_dir, rev, erro
#
# expected = ...
# actual = ...
- # fix_checksum(repo_dir, rev, expected, actual)
+ # fix_checksum(repo, rev, expected, actual)
return True
@@ -171,7 +171,7 @@ def handle_one_error(repo_dir, rev, erro
if match:
expected = re.match(r' *expected: *([^ ]*)', error_lines[1]).group(1)
actual = re.match(r' *actual: *([^ ]*)', error_lines[2]).group(1)
- fix_checksum(repo_dir, rev, expected, actual)
+ fix_checksum(repo, rev, expected, actual)
return True
match = re.match(r"svn.*: Corrupt representation '([0-9]*) ([0-9]*) ([0-9]*)
.*'", line1)
@@ -196,11 +196,11 @@ def handle_one_error(repo_dir, rev, erro
# a knock-on effect, invalidating the checksum of the rep so that all
# references to this rep will then need their checksums correcting.
try:
- fix_rep_ref(repo_dir, rev, 'DELTA', bad_rev, bad_offset, bad_size)
+ fix_rep_ref(repo, rev, 'DELTA', bad_rev, bad_offset, bad_size)
except FixError:
# In at least one case of corruption, every bad reference has been in a
# 'text:' line. Fixing this has no knock-on effect.
- fix_rep_ref(repo_dir, rev, 'text:', bad_rev, bad_offset, bad_size)
+ fix_rep_ref(repo, rev, 'text:', bad_rev, bad_offset, bad_size)
return True
@@ -213,18 +213,18 @@ def grab_stderr(child_argv):
child_err = [line for line in stderr.splitlines() if '(apr_err=' not in line]
return child_err
-def fix_one_error(repo_dir, rev):
+def fix_one_error(repo, rev):
"""Verify, and if there is an error we know how to fix, then fix it.
Return False if no error, True if fixed, exception if can't fix."""
# Capture the output of 'svnadmin verify' (ignoring any debug-build output)
- svnadmin_err = grab_stderr([SVNADMIN, 'verify', '-q', '-r'+rev, repo_dir])
+ svnadmin_err = grab_stderr([SVNADMIN, 'verify', '-q', '-r'+rev, repo.path])
if svnadmin_err == []:
return False
try:
- if handle_one_error(repo_dir, rev, svnadmin_err):
+ if handle_one_error(repo, rev, svnadmin_err):
return True
else:
verbose_print("Unrecognized error message; trying 'svnlook' instead.")
@@ -237,51 +237,48 @@ def fix_one_error(repo_dir, rev):
# one that we *can* handle.
# Capture the output of 'svnlook tree' (ignoring any debug-build output)
- svnlook_err = grab_stderr([SVNLOOK, 'tree', '-r'+rev, repo_dir])
+ svnlook_err = grab_stderr([SVNLOOK, 'tree', '-r'+rev, repo.path])
if svnlook_err == []:
print('warning: svnlook did not find an error')
else:
- if handle_one_error(repo_dir, rev, svnlook_err):
+ if handle_one_error(repo, rev, svnlook_err):
return True
else:
verbose_print("Unrecognized error message.")
raise FixError("unable to fix r" + str(rev))
-def check_formats(repo_dir):
+def check_formats(repo):
"""Check that REPO_DIR isn't newer than we know how to handle."""
- repos_format = int(open(os.path.join(repo_dir, 'format')).readline())
- if repos_format not in [3,5]:
+ if repo.repo_format not in [3,5]:
raise FixError("Repository '%s' too new (format %d); try the version at %s"
- % (repo_dir, repos_format, URL))
+ % (repo.path, repo.repo_format, URL))
- fs_type = open(os.path.join(repo_dir, 'db', 'fs-type')).read().rstrip()
- if fs_type != 'fsfs':
+ if repo.fs_type != 'fsfs':
raise FixError("Repository '%s' has wrong FS backend: "
- "found '%s', expected '%s'" % (repo_dir, fs_type, 'fsfs'))
+ "found '%s', expected '%s'" % (repo.path, repo.fs_type,
'fsfs'))
- fsfs_format = int(open(os.path.join(repo_dir, 'db', 'format')).readline())
- if fsfs_format > MAX_FSFS_FORMAT:
+ if repo.db_format > MAX_FSFS_FORMAT:
raise FixError("Filesystem '%s' is too new (format %d); try the version at
%s"
- % (os.path.join(repo_dir, 'db'), fsfs_format, URL))
+ % (os.path.join(repo.path, 'db'), repo.db_format, URL))
# ----------------------------------------------------------------------
# Main program
-def fix_rev(repo_dir, rev):
+def fix_rev(repo, rev):
""""""
- check_formats(repo_dir)
+ check_formats(repo)
# Back up the file
- if not os.path.exists(rev_file_path(repo_dir, rev) + '.orig'):
+ if not os.path.exists(repo.rev_file_path(rev) + '.orig'):
pass
# cp -a "$FILE" "$FILE.orig"
# Keep looking for verification errors in r$REV and fixing them while we can.
- while fix_one_error(repo_dir, rev):
+ while fix_one_error(repo, rev):
pass
print("Revision " + rev + " verifies OK.")
@@ -295,8 +292,9 @@ if __name__ == '__main__':
repo_dir = sys.argv[1]
rev = sys.argv[2]
+ repo = Repository(repo_dir)
try:
- fix_rev(repo_dir, rev)
+ fix_rev(repo, rev)
except FixError as e:
print('error:', e)
sys.exit(1)
Modified: subversion/trunk/contrib/server-side/fsfsfixer/fixer/fixer_config.py
URL:
http://svn.apache.org/viewvc/subversion/trunk/contrib/server-side/fsfsfixer/fixer/fixer_config.py?rev=1887170&r1=1887169&r2=1887170&view=diff
==============================================================================
--- subversion/trunk/contrib/server-side/fsfsfixer/fixer/fixer_config.py
(original)
+++ subversion/trunk/contrib/server-side/fsfsfixer/fixer/fixer_config.py Thu
Mar 4 12:05:28 2021
@@ -9,16 +9,3 @@ SVNLOOK = 'svnlook'
# Verbosity: True for verbose, or False for quiet
VERBOSE = True
-# PER-REPOSITORY CONFIGURATION
-
-# The number of revs per shard of the repository being accessed, or 'None'
-# for a linear (that is, non-sharded) layout. This is 1000 for almost all
-# repositories in practice.
-#
-# The correct value can be found in the 'db/format' file in the repository.
-# The second line of that file will say something like 'layout sharded 1000'
-# or 'layout linear'.
-#
-# TODO: Read this value automatically from the db/format file.
-REVS_PER_SHARD=1000
-