Sun Chao (my former colleague at Huawei) found a bug of
git-pack-redundant.  If there are too many packs and many of them
overlap each other, running `git pack-redundant --all` will
exhaust all memories and the process will be killed by kernel.

There is a script in commit log of commit 3/6, which can be used to
create a repository with lots of redundant packs. Running `git
pack-redundant --all` in it can reproduce this issue.

Derrick Stolee <sto...@gmail.com> 于2019年1月20日周日 上午9:08写道:
>
> Here is today's test coverage report.
>
> builtin/pack-redundant.c
> a338d10395 builtin/pack-redundant.c 339) static int 
> cmp_remaining_objects(const void *a, const void *b)
> e4e2c2884e builtin/pack-redundant.c 341) struct pack_list *pl_a = *((struct 
> pack_list **)a);
> e4e2c2884e builtin/pack-redundant.c 342) struct pack_list *pl_b = *((struct 
> pack_list **)b);
> ...

Add new test cases in t5323 for better test coverage.

## Changes since reroll v6

* Add new test cases in t5323.

* Add new patch 2/6 (pack-redundant: delay creation of unique_objects), 
  which will fix a bug which fail to find redundant packs if turn on alt-odb
  searching with `--alt-odb` option.  This is because alt-odb objects are
  only remove in unique_objects fields but not in all_objects fields of
  pack_list.

## Range diff

1:  be6555ae60 ! 1:  799e804d5e t5323: test cases for git-pack-redundant
    @@ -43,7 +43,7 @@
     +}
     +
     +create_pack_1 () {
    -+  P1=$(git -C .git/objects/pack pack-objects -q pack <<-EOF
    ++  P1=$(git -C objects/pack pack-objects -q pack <<-EOF
     +          $T
     +          $A
     +          $B
    @@ -58,7 +58,7 @@
     +}
     +
     +create_pack_2 () {
    -+  P2=$(git -C .git/objects/pack pack-objects -q pack <<-EOF
    ++  P2=$(git -C objects/pack pack-objects -q pack <<-EOF
     +          $B
     +          $C
     +          $D
    @@ -72,7 +72,7 @@
     +}
     +
     +create_pack_3 () {
    -+  P3=$(git -C .git/objects/pack pack-objects -q pack <<-EOF
    ++  P3=$(git -C objects/pack pack-objects -q pack <<-EOF
     +          $F
     +          $I
     +          $J
    @@ -85,7 +85,7 @@
     +}
     +
     +create_pack_4 () {
    -+  P4=$(git -C .git/objects/pack pack-objects -q pack <<-EOF
    ++  P4=$(git -C objects/pack pack-objects -q pack <<-EOF
     +          $J
     +          $K
     +          $L
    @@ -97,7 +97,7 @@
     +}
     +
     +create_pack_5 () {
    -+  P5=$(git -C .git/objects/pack pack-objects -q pack <<-EOF
    ++  P5=$(git -C objects/pack pack-objects -q pack <<-EOF
     +          $G
     +          $H
     +          $N
    @@ -108,7 +108,7 @@
     +}
     +
     +create_pack_6 () {
    -+  P6=$(git -C .git/objects/pack pack-objects -q pack <<-EOF
    ++  P6=$(git -C objects/pack pack-objects -q pack <<-EOF
     +          $N
     +          $O
     +          $Q
    @@ -118,7 +118,7 @@
     +}
     +
     +create_pack_7 () {
    -+  P7=$(git -C .git/objects/pack pack-objects -q pack <<-EOF
    ++  P7=$(git -C objects/pack pack-objects -q pack <<-EOF
     +          $P
     +          $Q
     +          EOF
    @@ -127,18 +127,37 @@
     +}
     +
     +create_pack_8 () {
    -+  P8=$(git -C .git/objects/pack pack-objects -q pack <<-EOF
    ++  P8=$(git -C objects/pack pack-objects -q pack <<-EOF
     +          $A
     +          EOF
     +  ) &&
     +  eval P$P8=P8:$P8
     +}
     +
    -+test_expect_success 'setup' '
    ++format_packfiles () {
    ++  sed \
    ++          -e "s#.*/pack-\(.*\)\.idx#\1#" \
    ++          -e "s#.*/pack-\(.*\)\.pack#\1#" |
    ++  sort -u |
    ++  while read p
    ++  do
    ++          if test -z "$(eval echo \${P$p})"
    ++          then
    ++                  echo $p
    ++          else
    ++                  eval echo "\${P$p}"
    ++          fi
    ++  done |
    ++  sort
    ++}
    ++
    ++test_expect_success 'setup master.git' '
    ++  git init --bare master.git &&
    ++  cd master.git &&
     +  create_commits
     +'
     +
    -+test_expect_success 'no redundant packs' '
    ++test_expect_success 'no redundant for pack 1, 2, 3' '
     +  create_pack_1 && create_pack_2 && create_pack_3 &&
     +  git pack-redundant --all >out &&
     +  test_must_be_empty out
    @@ -154,12 +173,7 @@
     +
     +test_expect_success 'one of pack-2/pack-3 is redundant' '
     +  git pack-redundant --all >out &&
    -+  sed \
    -+          -e "s#.*/pack-\(.*\)\.idx#\1#" \
    -+          -e "s#.*/pack-\(.*\)\.pack#\1#" out |
    -+          sort -u |
    -+          while read p; do eval echo "\${P$p}"; done |
    -+          sort >actual &&
    ++  format_packfiles <out >actual &&
     +  test_cmp expected actual
     +'
     +
    @@ -176,12 +190,7 @@
     +
     +test_expect_success 'pack 2, 4, and 6 are redundant' '
     +  git pack-redundant --all >out &&
    -+  sed \
    -+          -e "s#.*/pack-\(.*\)\.idx#\1#" \
    -+          -e "s#.*/pack-\(.*\)\.pack#\1#" out |
    -+          sort -u |
    -+          while read p; do eval echo "\${P$p}"; done |
    -+          sort >actual &&
    ++  format_packfiles <out >actual &&
     +  test_cmp expected actual
     +'
     +
    @@ -198,18 +207,13 @@
     +
     +test_expect_success 'pack-8 (subset of pack-1) is also redundant' '
     +  git pack-redundant --all >out &&
    -+  sed \
    -+          -e "s#.*/pack-\(.*\)\.idx#\1#" \
    -+          -e "s#.*/pack-\(.*\)\.pack#\1#" out |
    -+          sort -u |
    -+          while read p; do eval echo "\${P$p}"; done |
    -+          sort >actual &&
    ++  format_packfiles <out >actual &&
     +  test_cmp expected actual
     +'
     +
     +test_expect_success 'clean loose objects' '
     +  git prune-packed &&
    -+  find .git/objects -type f | sed -e "/objects\/pack\//d" >out &&
    ++  find objects -type f | sed -e "/objects\/pack\//d" >out &&
     +  test_must_be_empty out
     +'
     +
    @@ -220,4 +224,115 @@
     +  test_must_be_empty out
     +'
     +
    ++test_expect_success 'setup shared.git' '
    ++  cd "$TRASH_DIRECTORY" &&
    ++  git clone -q --mirror master.git shared.git &&
    ++  cd shared.git &&
    ++  printf "../../master.git/objects" >objects/info/alternates
    ++'
    ++
    ++test_expect_success 'no redundant packs without --alt-odb' '
    ++  git pack-redundant --all >out &&
    ++  test_must_be_empty out
    ++'
    ++
    ++cat >expected <<EOF
    ++P1:$P1
    ++P3:$P3
    ++P5:$P5
    ++P7:$P7
    ++EOF
    ++
    ++test_expect_success 'pack-redundant --verbose: show duplicate packs in 
stderr' '
    ++  git pack-redundant --all --verbose >out 2>out.err &&
    ++  test_must_be_empty out &&
    ++  grep "pack$" out.err | format_packfiles >actual &&
    ++  test_cmp expected actual
    ++'
    ++
    ++cat >expected <<EOF
    ++fatal: Zero packs found!
    ++EOF
    ++
    ++test_expect_success 'remove redundant packs by alt-odb, no packs left' '
    ++  git pack-redundant --all --alt-odb | xargs rm &&
    ++  git fsck --no-progress &&
    ++  test_must_fail git pack-redundant --all --alt-odb >actual 2>&1 &&
    ++  test_cmp expected actual
    ++'
    ++
    ++create_commits_others () {
    ++  parent=$(git rev-parse HEAD)
    ++  for name in X Y Z
    ++  do
    ++          test_tick &&
    ++          T=$(git write-tree) &&
    ++          if test -z "$parent"
    ++          then
    ++                  oid=$(echo $name | git commit-tree $T)
    ++          else
    ++                  oid=$(echo $name | git commit-tree -p $parent $T)
    ++          fi &&
    ++          eval $name=$oid &&
    ++          parent=$oid ||
    ++          return 1
    ++  done
    ++  git update-ref refs/heads/master $Z
    ++}
    ++
    ++create_pack_x1 () {
    ++  Px1=$(git -C objects/pack pack-objects -q pack <<-EOF
    ++          $X
    ++          $Y
    ++          $Z
    ++          $A
    ++          $B
    ++          $C
    ++          EOF
    ++  ) &&
    ++  eval P${Px1}=Px1:${Px1}
    ++}
    ++
    ++create_pack_x2 () {
    ++  Px2=$(git -C objects/pack pack-objects -q pack <<-EOF
    ++          $X
    ++          $Y
    ++          $Z
    ++          $D
    ++          $E
    ++          $F
    ++          EOF
    ++  ) &&
    ++  eval P${Px2}=Px2:${Px2}
    ++}
    ++
    ++test_expect_success 'new objects and packs in shared.git' '
    ++  create_commits_others &&
    ++  create_pack_x1 &&
    ++  create_pack_x2 &&
    ++  git pack-redundant --all >out &&
    ++  test_must_be_empty out
    ++'
    ++
    ++test_expect_success 'one pack is redundant' '
    ++  git pack-redundant --all --alt-odb >out &&
    ++  format_packfiles <out >actual &&
    ++  test_line_count = 1 actual
    ++'
    ++
    ++cat >expected <<EOF
    ++Px1:$Px1
    ++Px2:$Px2
    ++EOF
    ++
    ++test_expect_success 'set ignore objects and all two packs are redundant' '
    ++  git pack-redundant --all --alt-odb >out <<-EOF &&
    ++          $X
    ++          $Y
    ++          $Z
    ++          EOF
    ++  format_packfiles <out >actual &&
    ++  test_cmp expected actual
    ++'
    ++
     +test_done
-:  ---------- > 2:  520f6277fb pack-redundant: delay creation of unique_objects
2:  e4e2c2884e ! 3:  ab1c2c4950 pack-redundant: new algorithm to find min packs
    @@ -64,6 +64,9 @@
         4. Drop the duplicated objects from other packs in the ordered
            non_unique pack list, and repeat step 3.
     
    +    Some test cases will fail on Mac OS X. Mark them and will resolve in
    +    later commit.
    +
         Original PR and discussions: https://github.com/jiangxin/git/pull/25
     
         Signed-off-by: Sun Chao <sunch...@huawei.com>
    @@ -213,11 +216,61 @@
        struct llist *ignore;
        struct object_id *oid;
        char buf[GIT_MAX_HEXSZ + 2]; /* hex hash + \n + \0 */
    +
    + diff --git a/t/t5323-pack-redundant.sh b/t/t5323-pack-redundant.sh
    + --- a/t/t5323-pack-redundant.sh
    + +++ b/t/t5323-pack-redundant.sh
     @@
    -   pl = local_packs;
    -   while (pl) {
    -           llist_sorted_difference_inplace(pl->unique_objects, ignore);
    -+          llist_sorted_difference_inplace(pl->all_objects, ignore);
    -           pl = pl->next;
    -   }
    + P2:$P2
    + EOF
    + 
    +-test_expect_success 'one of pack-2/pack-3 is redundant' '
    ++test_expect_failure 'one of pack-2/pack-3 is redundant' '
    +   git pack-redundant --all >out &&
    +   format_packfiles <out >actual &&
    +   test_cmp expected actual
    +@@
    + P6:$P6
    + EOF
    + 
    +-test_expect_success 'pack 2, 4, and 6 are redundant' '
    ++test_expect_failure 'pack 2, 4, and 6 are redundant' '
    +   git pack-redundant --all >out &&
    +   format_packfiles <out >actual &&
    +   test_cmp expected actual
    +@@
    + P8:$P8
    + EOF
    + 
    +-test_expect_success 'pack-8 (subset of pack-1) is also redundant' '
    ++test_expect_failure 'pack-8 (subset of pack-1) is also redundant' '
    +   git pack-redundant --all >out &&
    +   format_packfiles <out >actual &&
    +   test_cmp expected actual
    +@@
    +   test_must_be_empty out
    + '
    + 
    +-test_expect_success 'remove redundant packs and pass fsck' '
    ++test_expect_failure 'remove redundant packs and pass fsck' '
    +   git pack-redundant --all | xargs rm &&
    +   git fsck --no-progress &&
    +   git pack-redundant --all >out &&
    +@@
    +   printf "../../master.git/objects" >objects/info/alternates
    + '
    + 
    +-test_expect_success 'no redundant packs without --alt-odb' '
    ++test_expect_failure 'no redundant packs without --alt-odb' '
    +   git pack-redundant --all >out &&
    +   test_must_be_empty out
    + '
    +@@
    + P7:$P7
    + EOF
      
    +-test_expect_success 'pack-redundant --verbose: show duplicate packs in 
stderr' '
    ++test_expect_failure 'pack-redundant --verbose: show duplicate packs in 
stderr' '
    +   git pack-redundant --all --verbose >out 2>out.err &&
    +   test_must_be_empty out &&
    +   grep "pack$" out.err | format_packfiles >actual &&
3:  e60b134e66 = 4:  3c3a7ea40f pack-redundant: remove unused functions
4:  cb7e0336fc ! 5:  bc4b681f40 pack-redundant: rename pack_list.all_objects
    @@ -20,6 +20,18 @@
      } *local_packs = NULL, *altodb_packs = NULL;
      
      static struct llist_item *free_nodes;
    +@@
    +   const unsigned int hashsz = the_hash_algo->rawsz;
    + 
    +   if (!p1->unique_objects)
    +-          p1->unique_objects = llist_copy(p1->all_objects);
    ++          p1->unique_objects = llist_copy(p1->remaining_objects);
    +   if (!p2->unique_objects)
    +-          p2->unique_objects = llist_copy(p2->all_objects);
    ++          p2->unique_objects = llist_copy(p2->remaining_objects);
    + 
    +   p1_base = p1->pack->index_data;
    +   p2_base = p2->pack->index_data;
     @@
      {
        struct pack_list *pl_a = *((struct pack_list **)a);
    @@ -94,10 +106,12 @@
        }
      }
     @@
    +   while (alt) {
                local = local_packs;
                while (local) {
    -                   llist_sorted_difference_inplace(local->unique_objects,
    +-                  llist_sorted_difference_inplace(local->all_objects,
     -                                                  alt->all_objects);
    ++                  
llist_sorted_difference_inplace(local->remaining_objects,
     +                                                  alt->remaining_objects);
                        local = local->next;
                }
    @@ -123,16 +137,11 @@
     +          llist_insert_back(l.remaining_objects, (const struct object_id 
*)(base + off));
                off += step;
        }
    -   /* this list will be pruned in cmp_two_packs later */
    --  l.unique_objects = llist_copy(l.all_objects);
    -+  l.unique_objects = llist_copy(l.remaining_objects);
    -   if (p->pack_local)
    -           return pack_list_insert(&local_packs, &l);
    -   else
    +   l.unique_objects = NULL;
     @@
    +   llist_sorted_difference_inplace(all_objects, ignore);
        pl = local_packs;
        while (pl) {
    -           llist_sorted_difference_inplace(pl->unique_objects, ignore);
     -          llist_sorted_difference_inplace(pl->all_objects, ignore);
     +          llist_sorted_difference_inplace(pl->remaining_objects, ignore);
                pl = pl->next;
5:  a338d10395 ! 6:  6cfba5b4b2 pack-redundant: consistent sort method
    @@ -75,9 +75,9 @@
                off += step;
        }
     +  l.all_objects_size = l.remaining_objects->size;
    -   /* this list will be pruned in cmp_two_packs later */
    -   l.unique_objects = llist_copy(l.remaining_objects);
    +   l.unique_objects = NULL;
        if (p->pack_local)
    +           return pack_list_insert(&local_packs, &l);
     
      diff --git a/t/t5323-pack-redundant.sh b/t/t5323-pack-redundant.sh
      --- a/t/t5323-pack-redundant.sh
    @@ -90,4 +90,53 @@
     +P3:$P3
      EOF
      
    - test_expect_success 'one of pack-2/pack-3 is redundant' '
    +-test_expect_failure 'one of pack-2/pack-3 is redundant' '
    ++test_expect_success 'one of pack-2/pack-3 is redundant' '
    +   git pack-redundant --all >out &&
    +   format_packfiles <out >actual &&
    +   test_cmp expected actual
    +@@
    + P6:$P6
    + EOF
    + 
    +-test_expect_failure 'pack 2, 4, and 6 are redundant' '
    ++test_expect_success 'pack 2, 4, and 6 are redundant' '
    +   git pack-redundant --all >out &&
    +   format_packfiles <out >actual &&
    +   test_cmp expected actual
    +@@
    + P8:$P8
    + EOF
    + 
    +-test_expect_failure 'pack-8 (subset of pack-1) is also redundant' '
    ++test_expect_success 'pack-8 (subset of pack-1) is also redundant' '
    +   git pack-redundant --all >out &&
    +   format_packfiles <out >actual &&
    +   test_cmp expected actual
    +@@
    +   test_must_be_empty out
    + '
    + 
    +-test_expect_failure 'remove redundant packs and pass fsck' '
    ++test_expect_success 'remove redundant packs and pass fsck' '
    +   git pack-redundant --all | xargs rm &&
    +   git fsck --no-progress &&
    +   git pack-redundant --all >out &&
    +@@
    +   printf "../../master.git/objects" >objects/info/alternates
    + '
    + 
    +-test_expect_failure 'no redundant packs without --alt-odb' '
    ++test_expect_success 'no redundant packs without --alt-odb' '
    +   git pack-redundant --all >out &&
    +   test_must_be_empty out
    + '
    +@@
    + P7:$P7
    + EOF
    + 
    +-test_expect_failure 'pack-redundant --verbose: show duplicate packs in 
stderr' '
    ++test_expect_success 'pack-redundant --verbose: show duplicate packs in 
stderr' '
    +   git pack-redundant --all --verbose >out 2>out.err &&
    +   test_must_be_empty out &&
    +   grep "pack$" out.err | format_packfiles >actual &&

--
Jiang Xin (4):
  t5323: test cases for git-pack-redundant
  pack-redundant: delay creation of unique_objects
  pack-redundant: rename pack_list.all_objects
  pack-redundant: consistent sort method

Sun Chao (2):
  pack-redundant: new algorithm to find min packs
  pack-redundant: remove unused functions

 builtin/pack-redundant.c  | 233 +++++++++++----------------
 t/t5323-pack-redundant.sh | 322 ++++++++++++++++++++++++++++++++++++++
 2 files changed, 415 insertions(+), 140 deletions(-)
 create mode 100755 t/t5323-pack-redundant.sh

-- 
2.20.1.103.ged0fc2ca7b

Reply via email to