On Wed, Feb 08, 2017 at 04:18:25PM -0800, Junio C Hamano wrote:

> > We wrote something similar at GitHub, too, but we never ended up using
> > it in production. We found that with a sane scheduler, it's not too big
> > a deal to just do maintenance once in a while.
> 
> Thanks again for this.  I've also been wondering about how effective
> a "concatenate packs without paying reachability penalty" would be.

For the sake of posterity, I'll include our patch at the end (sorry, not
chunked into nice readable commits; that never existed in the first
place).

> > I'm still not sure if it's worth making the fatal/non-fatal distinction.
> > Doing so is perhaps safer, but it does mean that somebody has to decide
> > which errors are important enough to block a retry totally, and which
> > are not. In theory, it would be safe to always _try_ and then the gc
> > process can decide when something is broken and abort. And all you've
> > wasted is some processing power each day.
> 
> Yup, and somebody or something need to monitor so that repeated
> failures can be dealt with.

Yes. I think that part is probably outside the scope of Git. But if
auto-gc leaves gc.log lying around, it would be easy to visit each repo
and collect the various failures.

-- >8 --
This is the "pack-fast" patch, for reference. It applies on v2.6.5,
though I had to do some wiggling due to a few of our other custom
patches, so it's possible I introduced new bugs. It compiles, but I
didn't actually re-test the result.  I _think_ the original at least
generated valid packs in all cases.

So I would certainly not recommend anybody run this. It's just a
possible base to work off of if anybody's interested in the topic. I
haven't looked at David's combine-packs at all to see if it is any less
gross. :)

---
 Makefile            |   1 +
 builtin.h           |   1 +
 builtin/pack-fast.c | 618 +++++++++++++++++++++++++++++++++++
 cache.h             |   5 +
 git.c               |   1 +
 pack-bitmap-write.c | 167 +++++++++-
 pack-bitmap.c       |   2 +-
 pack-bitmap.h       |   8 +
 sha1_file.c         |   4 +-
 9 files changed, 792 insertions(+), 15 deletions(-)

diff --git a/Makefile b/Makefile
index 37e2d9e18..524b185ec 100644
--- a/Makefile
+++ b/Makefile
@@ -887,6 +887,7 @@ BUILTIN_OBJS += builtin/mv.o
 BUILTIN_OBJS += builtin/name-rev.o
 BUILTIN_OBJS += builtin/notes.o
 BUILTIN_OBJS += builtin/pack-objects.o
+BUILTIN_OBJS += builtin/pack-fast.o
 BUILTIN_OBJS += builtin/pack-redundant.o
 BUILTIN_OBJS += builtin/pack-refs.o
 BUILTIN_OBJS += builtin/patch-id.o
diff --git a/builtin.h b/builtin.h
index 79aaf0afe..df4e4d668 100644
--- a/builtin.h
+++ b/builtin.h
@@ -95,6 +95,7 @@ extern int cmd_mv(int argc, const char **argv, const char 
*prefix);
 extern int cmd_name_rev(int argc, const char **argv, const char *prefix);
 extern int cmd_notes(int argc, const char **argv, const char *prefix);
 extern int cmd_pack_objects(int argc, const char **argv, const char *prefix);
+extern int cmd_pack_fast(int argc, const char **argv, const char *prefix);
 extern int cmd_pack_redundant(int argc, const char **argv, const char *prefix);
 extern int cmd_patch_id(int argc, const char **argv, const char *prefix);
 extern int cmd_prune(int argc, const char **argv, const char *prefix);
diff --git a/builtin/pack-fast.c b/builtin/pack-fast.c
new file mode 100644
index 000000000..ad9f5e5f1
--- /dev/null
+++ b/builtin/pack-fast.c
@@ -0,0 +1,618 @@
+#include "builtin.h"
+#include "cache.h"
+#include "pack.h"
+#include "progress.h"
+#include "csum-file.h"
+#include "sha1-lookup.h"
+#include "parse-options.h"
+#include "tempfile.h"
+#include "pack-bitmap.h"
+#include "pack-revindex.h"
+
+static const char *pack_usage[] = {
+       N_("git pack-fast --quiet [options...] [base-name]"),
+       NULL
+};
+
+struct packwriter {
+       struct tempfile *tmp;
+       off_t total;
+       int fd;
+       uint32_t crc32;
+       unsigned do_crc;
+};
+
+static void packwriter_crc32_start(struct packwriter *w)
+{
+       w->crc32 = crc32(0, NULL, 0);
+       w->do_crc = 1;
+}
+
+static uint32_t packwriter_crc32_end(struct packwriter *w)
+{
+       w->do_crc = 0;
+       return w->crc32;
+}
+
+static void packwriter_write(struct packwriter *w, const void *buf, unsigned 
int count)
+{
+       if (w->do_crc)
+               w->crc32 = crc32(w->crc32, buf, count);
+       write_or_die(w->fd, buf, count);
+       w->total += count;
+}
+
+static off_t packwriter_total(struct packwriter *w)
+{
+       return w->total;
+}
+
+static void packwriter_init(struct packwriter *w)
+{
+       char tmpname[PATH_MAX];
+
+       w->fd = odb_mkstemp(tmpname, sizeof(tmpname), "pack/tmp_pack_XXXXXX");
+       w->total = 0;
+       w->do_crc = 0;
+       w->tmp = xcalloc(1, sizeof(*w->tmp));
+
+       register_tempfile(w->tmp, tmpname);
+}
+
+
+static int progress = 1;
+static struct progress *progress_state;
+static struct pack_idx_option pack_idx_opts;
+static const char *base_name = "pack-fast";
+static int skip_largest;
+static int write_bitmap_index = 1;
+
+static struct packed_git **all_packfiles;
+static unsigned int all_packfiles_nr;
+
+static struct pack_idx_entry **written_list;
+static unsigned int written_nr;
+
+struct write_slab {
+       struct write_slab *next;
+       unsigned int nr;
+
+       struct write_slab_entry {
+               struct pack_idx_entry idx;
+               enum object_type real_type;
+       } entries[];
+};
+
+static struct write_slab *written_slab_root;
+static struct write_slab *written_slab_current;
+
+static void add_to_write_list(
+       const unsigned char *sha1, off_t offset, uint32_t crc32,
+       enum object_type real_type)
+{
+       struct write_slab *slab = written_slab_current;
+       struct write_slab_entry *entry = &(slab->entries[slab->nr++]);
+
+       entry->real_type = real_type;
+       entry->idx.offset = offset;
+       entry->idx.crc32 = crc32;
+       hashcpy(entry->idx.sha1, sha1);
+}
+
+static void preallocate_write_slab(unsigned int num_entries)
+{
+       struct write_slab *slab = xmalloc(
+               sizeof(struct write_slab) +
+               num_entries * sizeof(struct write_slab_entry));
+
+       slab->next = NULL;
+       slab->nr = 0;
+
+       if (!written_slab_current) {
+               written_slab_current = slab;
+               written_slab_root = slab;
+       } else {
+               written_slab_current->next = slab;
+               written_slab_current = slab;
+       }
+}
+
+static struct skipped_object {
+       off_t skipped_offset;
+       off_t real_offset;
+} *skipped_list;
+static unsigned int skipped_nr;
+static unsigned int skipped_alloc;
+
+static void add_to_skipped_list(off_t skipped_offset, off_t real_offset)
+{
+       if (skipped_nr >= skipped_alloc) {
+               skipped_alloc = (skipped_alloc + 32) * 2;
+               REALLOC_ARRAY(skipped_list, skipped_alloc);
+       }
+
+       skipped_list[skipped_nr].skipped_offset = skipped_offset;
+       skipped_list[skipped_nr].real_offset = real_offset;
+       skipped_nr++;
+}
+
+static off_t find_real_offset_for_base(off_t skipped_offset)
+{
+       int lo = 0, hi = skipped_nr;
+       while (lo < hi) {
+               int mi = lo + ((hi - lo) / 2);
+               if (skipped_offset == skipped_list[mi].skipped_offset)
+                       return skipped_list[mi].real_offset;
+               if (skipped_offset < skipped_list[mi].skipped_offset)
+                       hi = mi;
+               else
+                       lo = mi + 1;
+       }
+
+       return 0;
+}
+
+/*
+ * Record the offsets needed in our reused packfile chunks due to
+ * "gaps" where we omitted some objects.
+ */
+static struct reused_chunk {
+       off_t start;
+       off_t offset;
+} *reused_chunks;
+static int reused_chunks_nr;
+static int reused_chunks_alloc;
+
+static void record_reused_object(off_t where, off_t offset)
+{
+       if (reused_chunks_nr && reused_chunks[reused_chunks_nr-1].offset == 
offset)
+               return;
+
+       ALLOC_GROW(reused_chunks, reused_chunks_nr + 1,
+                  reused_chunks_alloc);
+       reused_chunks[reused_chunks_nr].start = where;
+       reused_chunks[reused_chunks_nr].offset = offset;
+       reused_chunks_nr++;
+}
+
+/*
+ * Binary search to find the chunk that "where" is in. Note
+ * that we're not looking for an exact match, just the first
+ * chunk that contains it (which implicitly ends at the start
+ * of the next chunk.
+ */
+static off_t find_reused_offset(off_t where)
+{
+       int lo = 0, hi = reused_chunks_nr;
+       while (lo < hi) {
+               int mi = lo + ((hi - lo) / 2);
+               if (where == reused_chunks[mi].start)
+                       return reused_chunks[mi].offset;
+               if (where < reused_chunks[mi].start)
+                       hi = mi;
+               else
+                       lo = mi + 1;
+       }
+
+       /*
+        * The first chunk starts at zero, so we can't have gone below
+        * there.
+        */
+       assert(lo);
+       return reused_chunks[lo-1].offset;
+}
+
+static uint32_t nth_packed_object_crc32(const struct packed_git *p, uint32_t 
nr)
+{
+       const uint32_t *index_crc = p->index_data;
+       index_crc += 2 + 256 + p->num_objects * (20/4) + nr;
+       return ntohl(*index_crc);
+}
+
+static void load_index_or_die(struct packed_git *p)
+{
+       if (open_pack_index(p) < 0)
+               die("failed to open index for '%s'", p->pack_name);
+
+       if (p->index_version != 2)
+               die("unsupported index version %d (fast-pack requires index 
v2)\n",
+                       p->index_version);
+}
+
+static int sort_pack(const void *a_, const void *b_)
+{
+       struct packed_git *a = *((struct packed_git **)a_);
+       struct packed_git *b = *((struct packed_git **)b_);
+
+       if (a->mtime > b->mtime)
+               return 1;
+       else if (a->mtime == b->mtime)
+               return 0;
+       return -1;
+}
+
+static void find_packfiles(void)
+{
+       struct packed_git *p;
+       unsigned int n;
+
+       prepare_packed_git();
+
+       for (n = 0, p = packed_git; p; p = p->next) {
+               if (p->pack_local)
+                       n++;
+       }
+
+       all_packfiles = xcalloc(n, sizeof(struct packed_git *));
+       all_packfiles_nr = n;
+
+       for (n = 0, p = packed_git; p; p = p->next) {
+               if (p->pack_local)
+                       all_packfiles[n++] = p;
+       }
+
+       for (n = 1; n < all_packfiles_nr; ++n) {
+               if (all_packfiles[n]->pack_size > all_packfiles[0]->pack_size) {
+                       struct packed_git *tmp = all_packfiles[0];
+                       all_packfiles[0] = all_packfiles[n];
+                       all_packfiles[n] = tmp;
+               }
+       }
+
+       qsort(all_packfiles + 1, all_packfiles_nr - 1, sizeof(struct packed_git 
*), sort_pack);
+}
+
+static int sha1_index__cmp(const void *a_, const void *b_)
+{
+       struct pack_idx_entry *a = *((struct pack_idx_entry **)a_);
+       struct pack_idx_entry *b = *((struct pack_idx_entry **)b_);
+       return hashcmp(a->sha1, b->sha1);
+}
+
+static const unsigned char *sha1_index__access(size_t pos, void *table)
+{
+       struct pack_idx_entry **index = table;
+       return index[pos]->sha1;
+}
+
+static void sha1_index_update(void)
+{
+       const unsigned int left_nr = written_nr;
+       const unsigned int right_nr = written_slab_current->nr;
+       const unsigned int total_nr = left_nr + right_nr;
+
+       struct pack_idx_entry **left = written_list;
+       struct pack_idx_entry **right = xmalloc(right_nr * sizeof(struct 
pack_idx_entry *));
+       struct pack_idx_entry **result = xmalloc(total_nr * sizeof(struct 
pack_idx_entry *));
+
+       unsigned int i, j, n;
+
+       for (j = 0; j < right_nr; ++j)
+               right[j] = (struct pack_idx_entry 
*)(&written_slab_current->entries[j]);
+
+       qsort(right, right_nr, sizeof(struct pack_idx_entry  *), 
sha1_index__cmp);
+
+       for (i = j = n = 0; i < left_nr && j < right_nr; ++n) {
+               struct pack_idx_entry *a = left[i];
+               struct pack_idx_entry *b = right[j];
+
+               if (hashcmp(a->sha1, b->sha1) <= 0) {
+                       result[n] = a;
+                       i++;
+               } else {
+                       result[n] = b;
+                       j++;
+               }
+       }
+
+       for (; i < left_nr; ++n, ++i)
+               result[n] = left[i];
+
+       for (; j < right_nr; ++n, ++j)
+               result[n] = right[j];
+
+       free(written_list);
+       free(right);
+
+       written_list = result;
+       written_nr = total_nr;
+}
+
+static off_t sha1_index_find_offset(const unsigned char *sha1)
+{
+       int pos = sha1_pos(sha1, written_list, written_nr, sha1_index__access);
+       return (pos < 0) ? 0 : written_list[pos]->offset;
+}
+
+static void copy_pack_data(
+               struct packwriter *w,
+               struct packed_git *p,
+               struct pack_window **w_curs,
+               off_t offset,
+               off_t len)
+{
+       unsigned char *in;
+       unsigned long avail;
+
+       while (len) {
+               in = use_pack(p, w_curs, offset, &avail);
+               if (avail > len)
+                       avail = (unsigned long)len;
+               packwriter_write(w, in, avail);
+               offset += avail;
+               len -= avail;
+       }
+}
+
+extern enum object_type packed_to_object_type(
+       struct packed_git *p, off_t obj_offset, enum object_type type,
+       struct pack_window **w_curs, off_t curpos);
+
+static int append_object_1(
+       struct revindex_entry *reventry,
+       struct packwriter *w,
+       struct packed_git *pack,
+       struct pack_window **w_curs,
+       enum object_type *real_type)
+{
+       const off_t offset = reventry[0].offset;
+       const off_t next = reventry[1].offset;
+
+       off_t cur;
+       enum object_type type;
+       unsigned long size;
+
+       record_reused_object(offset, offset - packwriter_total(w));
+
+       cur = offset;
+       type = unpack_object_header(pack, w_curs, &cur, &size);
+       assert(type >= 0);
+
+       if (write_bitmap_index)
+               *real_type = packed_to_object_type(pack, offset, type, w_curs, 
cur);
+
+       if (type == OBJ_OFS_DELTA) {
+               const off_t base_offset = get_delta_base(pack, w_curs, &cur, 
type, offset);
+               const off_t real_base_offset = 
find_real_offset_for_base(base_offset);
+               off_t fixed_offset = 0;
+
+               assert(base_offset != 0);
+
+               if (real_base_offset) {
+                       fixed_offset = packwriter_total(w) - real_base_offset;
+               } else {
+                       off_t fixup = find_reused_offset(offset) - 
find_reused_offset(base_offset);
+                       if (fixup)
+                               fixed_offset = offset - base_offset - fixup;
+               }
+
+               if (fixed_offset) {
+                       unsigned char header[10], ofs_header[10];
+                       unsigned i, len, ofs_len;
+
+                       assert(fixed_offset > 0);
+                       len = encode_in_pack_object_header(OBJ_OFS_DELTA, size, 
header);
+
+                       i = sizeof(ofs_header) - 1;
+                       ofs_header[i] = fixed_offset & 127;
+                       while (fixed_offset >>= 7)
+                               ofs_header[--i] = 128 | (--fixed_offset & 127);
+
+                       ofs_len = sizeof(ofs_header) - i;
+
+                       packwriter_write(w, header, len);
+                       packwriter_write(w, ofs_header + sizeof(ofs_header) - 
ofs_len, ofs_len);
+                       copy_pack_data(w, pack, w_curs, cur, next - cur);
+                       return 1;
+               }
+
+               /* ...otherwise we have no fixup, and can write it verbatim */
+       }
+
+       copy_pack_data(w, pack, w_curs, offset, next - offset);
+       return 0;
+}
+
+static int copy_packfile(int from, struct packwriter *w)
+{
+       unsigned char buffer[8192];
+       struct stat st;
+       ssize_t to_read;
+
+       if (from < 0 || fstat(from, &st))
+               return -1;
+
+       posix_fadvise(from, 0, st.st_size, POSIX_FADV_SEQUENTIAL);
+       to_read = st.st_size - 20;
+
+       if (progress)
+               fprintf(stderr, "Copying main packfile...");
+
+       while (to_read) {
+               ssize_t r, cap = sizeof(buffer);
+
+               if (cap > to_read)
+                       cap = to_read;
+
+               r = xread(from, buffer, cap);
+               if (r < 0)
+                       return -1;
+
+               packwriter_write(w, buffer, r);
+               to_read -= r;
+       }
+
+       if (progress)
+               fprintf(stderr, " done.\n");
+       assert(to_read == 0);
+       return 0;
+}
+
+static void write_initial_packfile(struct packed_git *p, struct packwriter *w)
+{
+       unsigned int n;
+       int source_fd = git_open_noatime(p->pack_name);
+
+       if (copy_packfile(source_fd, w) < 0)
+               die_errno("failed to copy '%s'", p->pack_name);
+       close(source_fd);
+
+       load_index_or_die(p);
+       preallocate_write_slab(p->num_objects);
+
+       if (progress)
+               progress_state = start_progress("Indexing main packfile", 
p->num_objects);
+
+       for (n = 0; n < p->num_objects; ++n) {
+               const unsigned char *sha1 = nth_packed_object_sha1(p, n);
+               const off_t offset = nth_packed_object_offset(p, n);
+               const uint32_t crc32 = nth_packed_object_crc32(p, n);
+               add_to_write_list(sha1, offset, crc32, OBJ_BAD);
+               display_progress(progress_state, n + 1);
+       }
+
+       stop_progress(&progress_state);
+       close_pack_index(p);
+
+       written_list = xmalloc(p->num_objects * sizeof(struct packed_git *));
+       written_nr = p->num_objects;
+       for (n = 0; n < written_nr; ++n)
+               written_list[n] = (struct pack_idx_entry 
*)(&written_slab_current->entries[n]);
+}
+
+static void append_packfile(struct packed_git *p, struct packwriter *w)
+{
+       struct pack_window *w_curs = NULL;
+       struct pack_revindex *revidx;
+
+       unsigned int n;
+
+       load_index_or_die(p);
+       preallocate_write_slab(p->num_objects);
+       revidx = revindex_for_pack(p);
+
+       if (progress)
+               progress_state = start_progress("Appending packfile", 
p->num_objects);
+
+       for (n = 0; n < p->num_objects; ++n) {
+               struct revindex_entry *reventry = &revidx->revindex[n];
+               const unsigned char *sha1 = nth_packed_object_sha1(p, 
reventry[0].nr);
+               const off_t offset_in_pack = sha1_index_find_offset(sha1);
+
+               if (!offset_in_pack) {
+                       const off_t offset = packwriter_total(w);
+
+                       enum object_type real_type = OBJ_BAD;
+                       uint32_t crc32;
+                       int rewrite_header;
+
+                       packwriter_crc32_start(w);
+                       rewrite_header = append_object_1(reventry, w, p, 
&w_curs, &real_type);
+                       crc32 = packwriter_crc32_end(w);
+
+                       if (!rewrite_header && crc32 != 
nth_packed_object_crc32(p, reventry[0].nr))
+                               die("crc32 check failed for %s", 
sha1_to_hex(sha1));
+
+                       add_to_write_list(sha1, offset, crc32, real_type);
+               } else {
+                       add_to_skipped_list(reventry[0].offset, offset_in_pack);
+               }
+
+               display_progress(progress_state, n + 1);
+       }
+
+       stop_progress(&progress_state);
+       unuse_pack(&w_curs);
+       close_pack_windows(p);
+       close_pack_index(p);
+
+       sha1_index_update();
+       skipped_nr = 0;
+       reused_chunks_nr = 0;
+}
+
+static void write_packs(void)
+{
+       struct packwriter w;
+       unsigned int i;
+
+       packwriter_init(&w);
+       write_initial_packfile(all_packfiles[0], &w);
+
+       for (i = 1; i < all_packfiles_nr; ++i)
+               append_packfile(all_packfiles[i], &w);
+
+       /* finalize pack */
+       {
+               unsigned char sha1[20];
+               struct strbuf tmpname = STRBUF_INIT;
+
+               fixup_pack_header_footer(w.fd, sha1, w.tmp->filename.buf, 
written_nr, NULL, 0);
+               close(w.fd);
+
+               strbuf_addf(&tmpname, "%s-", base_name);
+
+               finish_tmp_packfile(&tmpname, w.tmp->filename.buf,
+                               written_list, written_nr,
+                               &pack_idx_opts, sha1);
+
+               if (write_bitmap_index) {
+                       strbuf_addf(&tmpname, "%s.bitmap", sha1_to_hex(sha1));
+                       bitmap_rewrite_existing(
+                               all_packfiles[0],
+                               written_list, written_nr,
+                               packwriter_total(&w),
+                               sha1, tmpname.buf);
+               }
+
+               strbuf_release(&tmpname);
+               puts(sha1_to_hex(sha1));
+       }
+}
+
+void pack_fast_grow_typemaps(struct packed_git *p, struct ewah_bitmap 
**typemaps)
+{
+       uint32_t n;
+       size_t pos = p->num_objects;
+       struct write_slab *slab = written_slab_root;
+
+       assert(slab->nr == p->num_objects);
+       assert(slab->next);
+       slab = slab->next;
+
+       while (slab) {
+               for (n = 0; n < slab->nr; ++n) {
+                       const enum object_type real_type = 
slab->entries[n].real_type;
+                       assert(real_type >= OBJ_COMMIT && real_type <= OBJ_TAG);
+                       ewah_set(typemaps[real_type - 1], pos++);
+               }
+               slab = slab->next;
+       }
+}
+
+int cmd_pack_fast(int argc, const char **argv, const char *prefix)
+{
+       struct option pack_fast_options[] = {
+               OPT_SET_INT('q', "quiet", &progress,
+                           N_("do not show progress meter"), 0),
+               OPT_SET_INT(0, "progress", &progress,
+                           N_("show progress meter"), 1),
+               OPT_BOOL(0, "skip-largest", &skip_largest,
+                        N_("do not pack the largest packfile in the 
repository")),
+               OPT_END(),
+       };
+
+       reset_pack_idx_option(&pack_idx_opts);
+       progress = isatty(2);
+       argc = parse_options(argc, argv, prefix, pack_fast_options,
+                            pack_usage, 0);
+
+       if (argc) {
+               base_name = argv[0];
+               argc--;
+       }
+
+       find_packfiles();
+       write_packs();
+       return 0;
+}
diff --git a/cache.h b/cache.h
index 6f53962bf..1a13961bd 100644
--- a/cache.h
+++ b/cache.h
@@ -1336,6 +1336,11 @@ extern void *unpack_entry(struct packed_git *, off_t, 
enum object_type *, unsign
 extern unsigned long unpack_object_header_buffer(const unsigned char *buf, 
unsigned long len, enum object_type *type, unsigned long *sizep);
 extern unsigned long get_size_from_delta(struct packed_git *, struct 
pack_window **, off_t);
 extern int unpack_object_header(struct packed_git *, struct pack_window **, 
off_t *, unsigned long *);
+extern off_t get_delta_base(struct packed_git *p,
+                           struct pack_window **w_curs,
+                           off_t *curpos,
+                           enum object_type type,
+                           off_t delta_obj_offset);
 
 /*
  * Iterate over the files in the loose-object parts of the object
diff --git a/git.c b/git.c
index 40f9df089..d81bd4469 100644
--- a/git.c
+++ b/git.c
@@ -440,6 +440,7 @@ static struct cmd_struct commands[] = {
        { "name-rev", cmd_name_rev, RUN_SETUP },
        { "notes", cmd_notes, RUN_SETUP },
        { "pack-objects", cmd_pack_objects, RUN_SETUP },
+       { "pack-fast", cmd_pack_fast, RUN_SETUP },
        { "pack-redundant", cmd_pack_redundant, RUN_SETUP },
        { "pack-refs", cmd_pack_refs, RUN_SETUP },
        { "patch-id", cmd_patch_id },
diff --git a/pack-bitmap-write.c b/pack-bitmap-write.c
index c05d1386a..449715f02 100644
--- a/pack-bitmap-write.c
+++ b/pack-bitmap-write.c
@@ -505,23 +505,39 @@ void bitmap_writer_set_checksum(unsigned char *sha1)
        hashcpy(writer.pack_checksum, sha1);
 }
 
+static struct sha1file *bitmap_file_new(char *tmp_file, size_t len)
+{
+       int fd = odb_mkstemp(tmp_file, len, "pack/tmp_bitmap_XXXXXX");
+
+       if (fd < 0)
+               die_errno("unable to create '%s'", tmp_file);
+
+       return sha1fd(fd, tmp_file);
+}
+
+static void bitmap_file_close(struct sha1file *f, const char *tmp_file, const 
char *dest)
+{
+       sha1close(f, NULL, CSUM_FSYNC);
+
+       if (adjust_shared_perm(tmp_file))
+               die_errno("unable to make temporary bitmap file readable");
+
+       if (rename(tmp_file, dest))
+               die_errno("unable to rename temporary bitmap file to '%s'", 
dest);
+}
+
 void bitmap_writer_finish(struct pack_idx_entry **index,
                          uint32_t index_nr,
                          const char *filename,
                          uint16_t options)
 {
-       static char tmp_file[PATH_MAX];
        static uint16_t default_version = 1;
        static uint16_t flags = BITMAP_OPT_FULL_DAG;
+       char tmp_file[PATH_MAX];
        struct sha1file *f;
-
        struct bitmap_disk_header header;
 
-       int fd = odb_mkstemp(tmp_file, sizeof(tmp_file), 
"pack/tmp_bitmap_XXXXXX");
-
-       if (fd < 0)
-               die_errno("unable to create '%s'", tmp_file);
-       f = sha1fd(fd, tmp_file);
+       f = bitmap_file_new(tmp_file, sizeof(tmp_file));
 
        memcpy(header.magic, BITMAP_IDX_SIGNATURE, 
sizeof(BITMAP_IDX_SIGNATURE));
        header.version = htons(default_version);
@@ -539,11 +555,138 @@ void bitmap_writer_finish(struct pack_idx_entry **index,
        if (options & BITMAP_OPT_HASH_CACHE)
                write_hash_cache(f, index, index_nr);
 
-       sha1close(f, NULL, CSUM_FSYNC);
+       bitmap_file_close(f, tmp_file, filename);
+}
 
-       if (adjust_shared_perm(tmp_file))
-               die_errno("unable to make temporary bitmap file readable");
+static void *try_load_bitmap(struct packed_git *p, size_t *_size_out)
+{
+       void *reused_bitmap;
+       size_t reused_bitmap_size;
+
+       int fd;
+       struct stat st;
+       char *idx_name;
+
+       idx_name = pack_bitmap_filename(p);
+       fd = git_open_noatime(idx_name);
+       free(idx_name);
+
+       if (fd < 0)
+               return NULL;
+
+       if (fstat(fd, &st)) {
+               close(fd);
+               return NULL;
+       }
+
+       reused_bitmap_size = xsize_t(st.st_size);
+       reused_bitmap = xmmap(NULL, reused_bitmap_size, PROT_READ, MAP_PRIVATE, 
fd, 0);
+       close(fd);
+
+       *_size_out = reused_bitmap_size;
+       return reused_bitmap;
+}
+
+extern void pack_fast_grow_typemaps(struct packed_git *p, struct ewah_bitmap 
**typemaps);
+
+static size_t rewrite_type_maps(struct sha1file *f,
+       struct packed_git *p, unsigned char *original_map, size_t 
original_size, size_t pos)
+{
+       struct ewah_bitmap *typemaps[4];
+       int r, i;
+
+       for (i = 0; i < 4; ++i) {
+               typemaps[i] = ewah_pool_new();
+               r = ewah_read_mmap(typemaps[i], original_map + pos, 
original_size - pos);
+               if (r < 0)
+                       die("failed to read bitmap index");
+               pos += r;
+       }
+
+       pack_fast_grow_typemaps(p, typemaps);
+
+       for (i = 0; i < 4; ++i) {
+               dump_bitmap(f, typemaps[i]);
+               ewah_pool_free(typemaps[i]);
+       }
+
+       return pos;
+}
+
+static size_t rewrite_bitmaps(struct sha1file *f,
+       struct packed_git *p, unsigned char *original_map, size_t 
original_size, size_t pos,
+       uint32_t entry_count, struct pack_idx_entry **index, uint32_t index_nr)
+{
+       uint32_t i;
+
+       for (i = 0; i < entry_count; ++i) {
+               const unsigned char *sha1;
+               uint32_t src_idx, src_buffer_len, total_len;
+               int new_idx;
+
+               src_idx = get_be32(original_map + pos);
+               pos += 4;
+
+               sha1 = nth_packed_object_sha1(p, src_idx);
+               new_idx = sha1_pos(sha1, index, index_nr, sha1_access);
+               sha1write_be32(f, (uint32_t)new_idx);
+
+               src_buffer_len = get_be32(original_map + pos + 2 + 4);
+               total_len = (3 * 4) + (src_buffer_len * 8);
+
+               sha1write(f, original_map + pos, 2 + total_len);
+               pos += 2 + total_len;
+
+               if (pos > original_size)
+                       die("unexpected end of file");
+       }
+
+       return pos;
+}
+
+void bitmap_rewrite_existing(
+       struct packed_git *p,
+       struct pack_idx_entry **index,
+       uint32_t index_nr,
+       off_t pack_offset,
+       const unsigned char *pack_sha1,
+       const char *filename)
+{
+       char tmp_file[PATH_MAX];
+       struct sha1file *f;
+
+       unsigned char *original_map;
+       size_t original_size, pos = 0;
+       struct bitmap_disk_header header;
+
+       original_map = try_load_bitmap(p, &original_size);
+       if (!original_map || original_size < sizeof(header) + 20)
+               return;
+
+       memcpy(&header, original_map, sizeof(header));
+       hashcpy(header.checksum, pack_sha1);
+
+       if (memcmp(header.magic, BITMAP_IDX_SIGNATURE, 
sizeof(BITMAP_IDX_SIGNATURE)) != 0)
+               die("existing bitmap for '%s' is corrupted", p->pack_name);
+
+       if (ntohs(header.version) != 1)
+               die("existing bitmap for '%s' has an unsupported version", 
p->pack_name);
+
+       f = bitmap_file_new(tmp_file, sizeof(tmp_file));
+
+       sha1write(f, &header, sizeof(header));
+       pos = sizeof(header);
+       pos = rewrite_type_maps(f, p, original_map, original_size, pos);
+       pos = rewrite_bitmaps(f, p, original_map, original_size, pos,
+                       ntohl(header.entry_count), index, index_nr);
+
+       if (ntohs(header.options) & BITMAP_OPT_HASH_CACHE) {
+               uint32_t i, zero = 0;
+               sha1write(f, original_map + pos, p->num_objects * 4);
+               for (i = p->num_objects; i < index_nr; ++i)
+                       sha1write(f, &zero, 4);
+               pos += (p->num_objects * 4);
+       }
 
-       if (rename(tmp_file, filename))
-               die_errno("unable to rename temporary bitmap file to '%s'", 
filename);
+       bitmap_file_close(f, tmp_file, filename);
 }
diff --git a/pack-bitmap.c b/pack-bitmap.c
index 637770af8..ee361fa6a 100644
--- a/pack-bitmap.c
+++ b/pack-bitmap.c
@@ -250,7 +250,7 @@ static int load_bitmap_entries_v1(struct bitmap_index 
*index)
        return 0;
 }
 
-static char *pack_bitmap_filename(struct packed_git *p)
+char *pack_bitmap_filename(struct packed_git *p)
 {
        char *idx_name;
        int len;
diff --git a/pack-bitmap.h b/pack-bitmap.h
index 0adcef77b..398523dbb 100644
--- a/pack-bitmap.h
+++ b/pack-bitmap.h
@@ -34,6 +34,7 @@ typedef int (*show_reachable_fn)(
        struct packed_git *found_pack,
        off_t found_offset);
 
+char *pack_bitmap_filename(struct packed_git *p);
 int prepare_bitmap_git(void);
 void count_bitmap_commit_list(uint32_t *commits, uint32_t *trees, uint32_t 
*blobs, uint32_t *tags);
 void traverse_bitmap_commit_list(show_reachable_fn show_reachable);
@@ -53,5 +54,12 @@ void bitmap_writer_finish(struct pack_idx_entry **index,
                          uint32_t index_nr,
                          const char *filename,
                          uint16_t options);
+void bitmap_rewrite_existing(
+       struct packed_git *p,
+       struct pack_idx_entry **index,
+       uint32_t index_nr,
+       off_t pack_offset,
+       const unsigned char *pack_sha1,
+       const char *filename);
 
 #endif
diff --git a/sha1_file.c b/sha1_file.c
index 72289696d..bcd447f16 100644
--- a/sha1_file.c
+++ b/sha1_file.c
@@ -1821,7 +1821,7 @@ unsigned long get_size_from_delta(struct packed_git *p,
        return get_delta_hdr_size(&data, delta_head+sizeof(delta_head));
 }
 
-static off_t get_delta_base(struct packed_git *p,
+off_t get_delta_base(struct packed_git *p,
                                    struct pack_window **w_curs,
                                    off_t *curpos,
                                    enum object_type type,
@@ -1936,7 +1936,7 @@ static int retry_bad_packed_offset(struct packed_git *p, 
off_t obj_offset)
 
 #define POI_STACK_PREALLOC 64
 
-static enum object_type packed_to_object_type(struct packed_git *p,
+enum object_type packed_to_object_type(struct packed_git *p,
                                              off_t obj_offset,
                                              enum object_type type,
                                              struct pack_window **w_curs,

Reply via email to