Initially (when written in 2009), git-merge-changelog did a perfect job
for me, putting the "downstream" ChangeLog entries at the top.

Things got imperfect when, instead of running 'git pull --rebase', I added
'pull.rebase = true' to my configuration and ran 'git pull'. The heuristic
for determining what is "downstream" and what is "upstream" for this case
is not sufficient.

This patch improves that determination, using two heuristics:
  - The modified ChangeLog with my entries is more likely "downstream",
    whereas the modified ChangeLog with other people's entries is more
    likely "upstream".
  - Starting with git 2.44, git provides a placeholder "%Y" that can be
    used to disambiguate the remaining cases.


2025-07-29  Bruno Haible  <[email protected]>

        git-merge-changelog: Fix upstream/downstream heuristic for "git pull".
        * lib/git-merge-changelog.c: Suggest to pass %Y as 4th parameter.
        Include <errno.h>, spawn-pipe.h, wait-process.h, xvasprintf.h,
        c-ctype.h.
        (_): New macro.
        (execute_and_read_line): New function, from lib/javacomp.c.
        (is_all_hex_digits): New function.
        (long_options): Moved into 'main'.
        (usage): Document the --debug option.
        (main): Accept a --debug option and turn on debugging at runtime instead
        of compile-time. Accept an optional other_conflict_label parameter.
        Improve 'downstream' determination using two heuristics.
        * modules/git-merge-changelog (Depends-on): Add spawn-pipe,
        wait-process, xvasprintf, c-ctype.

diff --git a/lib/git-merge-changelog.c b/lib/git-merge-changelog.c
index cc496bb4cb..d7703bc6d9 100644
--- a/lib/git-merge-changelog.c
+++ b/lib/git-merge-changelog.c
@@ -53,7 +53,7 @@
 
           [merge "merge-changelog"]
                   name = GNU-style ChangeLog merge driver
-                  driver = /usr/local/bin/git-merge-changelog %O %A %B
+                  driver = /usr/local/bin/git-merge-changelog %O %A %B "%Y"
 
      - Add to the top-level directory of the checkout a file '.gitattributes'
        with this line:
@@ -155,6 +155,7 @@
 
 #include <config.h>
 
+#include <errno.h>
 #include <getopt.h>
 #include <limits.h>
 #include <stdint.h>
@@ -167,6 +168,8 @@
 #include <error.h>
 #include "idx.h"
 #include "read-file.h"
+#include "spawn-pipe.h"
+#include "wait-process.h"
 #include "gl_xlist.h"
 #include "gl_array_list.h"
 #include "gl_linkedhash_list.h"
@@ -174,11 +177,16 @@
 #include "gl_linked_list.h"
 #include "xalloc.h"
 #include "xmalloca.h"
+#include "xvasprintf.h"
 #include "fstrcmp.h"
 #include "minmax.h"
+#include "c-ctype.h"
 #include "c-strstr.h"
 #include "fwriteerror.h"
 
+/* No internationalization here.  */
+#define _(msgid) msgid
+
 #define ASSERT(expr) \
   do                                                                         \
     {                                                                        \
@@ -976,14 +984,72 @@ conflict_write (FILE *fp, struct conflict *c)
   fputs (">>>>>>> \n", fp);
 }
 
-/* Long options.  */
-static const struct option long_options[] =
+/* Executes a program.
+   Returns the first line of its output, as a freshly allocated string, or
+   NULL.  */
+static char *
+execute_and_read_line (const char *progname,
+                       const char *prog_path, const char * const *prog_argv)
 {
-  { "help", no_argument, NULL, 'h' },
-  { "split-merged-entry", no_argument, NULL, CHAR_MAX + 1 },
-  { "version", no_argument, NULL, 'V' },
-  { NULL, 0, NULL, 0 }
-};
+  pid_t child;
+  int fd[1];
+  FILE *fp;
+  char *line;
+  size_t linesize;
+  size_t linelen;
+
+  /* Open a pipe to the program.  */
+  child = create_pipe_in (progname, prog_path, prog_argv, NULL, NULL,
+                          DEV_NULL, false, true, false, fd);
+
+  if (child == -1)
+    return NULL;
+
+  /* Retrieve its result.  */
+  fp = fdopen (fd[0], "r");
+  if (fp == NULL)
+    error (EXIT_FAILURE, errno, _("fdopen() failed"));
+
+  line = NULL; linesize = 0;
+  linelen = getline (&line, &linesize, fp);
+  if (linelen == (size_t)(-1))
+    {
+      error (0, 0, _("%s subprocess I/O error"), progname);
+      fclose (fp);
+      wait_subprocess (child, progname, true, false, true, false, NULL);
+    }
+  else
+    {
+      int exitstatus;
+
+      if (linelen > 0 && line[linelen - 1] == '\n')
+        line[linelen - 1] = '\0';
+
+      /* Read until EOF (otherwise the child process may get a SIGPIPE 
signal).  */
+      while (getc (fp) != EOF)
+        ;
+
+      fclose (fp);
+
+      /* Remove zombie process from process list, and retrieve exit status.  */
+      exitstatus =
+        wait_subprocess (child, progname, true, false, true, false, NULL);
+      if (exitstatus == 0)
+        return line;
+    }
+  free (line);
+  return NULL;
+}
+
+/* Returns true if the given string consists only of hexadecimal digits.  */
+static bool
+is_all_hex_digits (const char *s)
+{
+  for (; *s != '\0'; s++)
+    if (!c_isxdigit (*s))
+      return false;
+  return true;
+}
 
 /* Print a usage message and exit.  */
 static void
@@ -1014,6 +1080,7 @@ usage (int status)
       printf ("\n");
       #endif
       printf ("Informative output:\n");
+      printf ("      --debug                 display debugging output\n");
       printf ("  -h, --help                  display this help and exit\n");
       printf ("  -V, --version               output version information and 
exit\n");
       printf ("\n");
@@ -1030,18 +1097,28 @@ main (int argc, char *argv[])
   int optchar;
   bool do_help;
   bool do_version;
+  bool debug;
   bool split_merged_entry;
 
   /* Set default values for variables.  */
   do_help = false;
   do_version = false;
+  debug = false;
   split_merged_entry = true;
 
   /* Parse command line options.  */
+  static const struct option long_options[] =
+  {
+    { "debug", no_argument, NULL, CHAR_MAX + 2 },
+    { "help", no_argument, NULL, 'h' },
+    { "split-merged-entry", no_argument, NULL, CHAR_MAX + 1 },
+    { "version", no_argument, NULL, 'V' },
+    { NULL, 0, NULL, 0 }
+  };
   while ((optchar = getopt_long (argc, argv, "hV", long_options, NULL)) != EOF)
     switch (optchar)
     {
-    case '\0':          /* Long option.  */
+    case '\0':          /* Long option with flag != NULL.  */
       break;
     case 'h':
       do_help = true;
@@ -1051,6 +1128,9 @@ main (int argc, char *argv[])
       break;
     case CHAR_MAX + 1:  /* --split-merged-entry */
       break;
+    case CHAR_MAX + 2:  /* --debug */
+      debug = true;
+      break;
     default:
       usage (EXIT_FAILURE);
     }
@@ -1076,14 +1156,14 @@ There is NO WARRANTY, to the extent permitted by law.\n\
     }
 
   /* Test argument count.  */
-  if (optind + 3 != argc)
+  if (!(argc >= optind + 3 && argc <= optind + 4))
     error (EXIT_FAILURE, 0, "expected three arguments");
 
   {
     const char *ancestor_file_name; /* O-FILE-NAME */
     const char *destination_file_name; /* A-FILE-NAME */
-    bool downstream;
     const char *other_file_name; /* B-FILE-NAME */
+    const char *other_conflict_label; /* %Y */
     const char *mainstream_file_name;
     const char *modified_file_name;
     struct changelog_file ancestor_file;
@@ -1099,6 +1179,12 @@ There is NO WARRANTY, to the extent permitted by law.\n\
     ancestor_file_name = argv[optind];
     destination_file_name = argv[optind + 1];
     other_file_name = argv[optind + 2];
+    other_conflict_label = argv[optind + 3];
+    if (debug)
+      {
+        printf ("\n");
+        printf ("%%Y = |%s|\n", other_conflict_label);
+      }
 
     /* Heuristic to determine whether it's a pull in downstream direction
        (e.g. pull from a centralized server) or a pull in upstream direction
@@ -1138,13 +1224,25 @@ There is NO WARRANTY, to the extent permitted by law.\n\
        usage pattern: He manages local changes through stashes and uses
        "git pull" only to pull downstream.
 
-       How to distinguish these situation? There are several hints:
-         - During a "git stash apply", GIT_REFLOG_ACTION is not set.  During
-           a "git pull", it is set to 'pull '. During a "git pull --rebase",
-           it is set to 'pull --rebase'.  During a "git cherry-pick", it is
-           set to 'cherry-pick'.
+       How to distinguish these situations? There are several hints:
+         - During a "git stash apply", GIT_REFLOG_ACTION is not set.
+         - During a "git pull --rebase", it is set to 'pull --rebase'.
+         - During a "git pull", it is set to 'pull' or 'pull '.
+           But this pull may include a rebase, depending on the
+           configuration values of 'pull.rebase' and 'branch.<name>.rebase'.
+           To disambiguate this case, we look at the first line of %A and
+           of %B.
+         - During a "git cherry-pick", it is set to 'cherry-pick'.
          - During a "git stash apply", there is an environment variable of
-           the form GITHEAD_<40_hex_digits>='Stashed changes'.  */
+           the form GITHEAD_<40_hex_digits>='Stashed changes'.
+         - Looking at the value of %L is not useful: it's usually '7'.
+         - Looking at the value of %P is not useful: it's usually 'ChangeLog'.
+         - Looking at the value of %S is not useful: it's usually
+           "parent of ..." or (during 'git stash apply') "Stash base".
+         - Looking at the value of %X is not useful: it's usually "HEAD"
+           or (during 'git stash apply') "Updated upstream".
+     */
+    int downstream; /* true or false or -1 for ambiguous */
     {
       const char *var;
 
@@ -1159,37 +1257,115 @@ There is NO WARRANTY, to the extent permitted by 
law.\n\
           else
             {
               var = getenv ("GIT_REFLOG_ACTION");
-              #if 0 /* Debugging code */
-              printf ("GIT_REFLOG_ACTION=|%s|\n", var);
-              #endif
-              if (var != NULL
-                  && ((str_startswith (var, "pull")
-                       && c_strstr (var, " --rebase") == NULL)
-                      || str_startswith (var, "merge origin")))
+              if (debug)
+                printf ("GIT_REFLOG_ACTION=|%s|\n", var);
+              if (var == NULL)
+                /* "git am -3", "git stash apply", "git rebase",
+                   "git cherry-pick" and similar.  */
+                downstream = false;
+              else if (str_startswith (var, "merge origin"))
                 downstream = true;
-              else
+              else if (str_startswith (var, "pull"))
                 {
-                  /* "git stash apply", "git rebase", "git cherry-pick" and
-                     similar.  */
-                  downstream = false;
+                  if (c_strstr (var, " --rebase") != NULL)
+                    downstream = false;
+                  else
+                    downstream = -1;
                 }
+              else
+                downstream = false;
             }
         }
     }
 
-    #if 0 /* Debugging code */
-    {
-      char buf[1000];
-      printf ("First line of %%A:\n");
-      sprintf (buf, "head -n 1 %s", destination_file_name); system (buf);
-      printf ("First line of %%B:\n");
-      sprintf (buf, "head -n 1 %s", other_file_name); system (buf);
-      printf ("Guessing calling convention: %s\n",
-              downstream
-              ? "%A = modified by user, %B = upstream"
-              : "%A = upstream, %B = modified by user");
-    }
-    #endif
+    if (downstream < 0)
+      {
+        char *user_name;
+        {
+          const char *git_argv[5];
+          git_argv[0] = "git";
+          git_argv[1] = "config";
+          git_argv[2] = "get";
+          git_argv[3] = "user.name";
+          git_argv[4] = NULL;
+          user_name = execute_and_read_line ("git", "git", git_argv);
+        }
+        if (user_name != NULL && user_name[0] != '\0')
+          {
+            user_name = xasprintf ("  %s  ", user_name);
+            /* Heuristic: Look at the first line of %A and %B.  */
+            char *first_line_of_destination_file;
+            char *first_line_of_other_file;
+            {
+              const char *head_argv[5];
+              head_argv[0] = "head";
+              head_argv[1] = "-n";
+              head_argv[2] = "1";
+              head_argv[4] = NULL;
+
+              head_argv[3] = destination_file_name;
+              first_line_of_destination_file =
+                execute_and_read_line ("head", "head", head_argv);
+
+              head_argv[3] = other_file_name;
+              first_line_of_other_file =
+                execute_and_read_line ("head", "head", head_argv);
+            }
+            if (first_line_of_destination_file != NULL
+                && first_line_of_other_file != NULL)
+              {
+                if (c_strstr (first_line_of_destination_file, user_name) != 
NULL)
+                  {
+                    if (c_strstr (first_line_of_other_file, user_name) == NULL)
+                      /* The user's name is in the first ChangeLog entry of %A
+                         but not in the first ChangeLog entry of %B.  */
+                      downstream = true;
+                  }
+                else
+                  {
+                    if (c_strstr (first_line_of_other_file, user_name) != NULL)
+                      /* The user's name is in the first ChangeLog entry of %B
+                         but not in the first ChangeLog entry of %A.  */
+                      downstream = false;
+                  }
+              }
+            free (first_line_of_other_file);
+            free (first_line_of_destination_file);
+          }
+        free (user_name);
+
+        if (downstream < 0)
+          {
+            /* Disambiguate using the value of %Y:
+               - If it consists of 40 hex-digits, the result will be a merge
+                 made by the 'ort' strategy.  This happens during "git pull"
+                 without rebase, when a long sequence of commits is being
+                 pulled.  In this case, we need downstream = true.
+               - In the other cases — "git pull" with rebase, and "git pull"
+                 without rebase but with a short sequence of commits — we need
+                 downstream = false.  */
+            if (other_conflict_label != NULL
+                && strcmp (other_conflict_label, "%Y") != 0
+                && strlen (other_conflict_label) >= 40
+                && is_all_hex_digits (other_conflict_label))
+              downstream = true;
+            else
+              downstream = false;
+          }
+      }
+
+    if (debug)
+      {
+        char buf[1000];
+        printf ("First line of %%A:\n"); fflush (stdout);
+        sprintf (buf, "head -n 1 %s", destination_file_name); system (buf);
+        printf ("First line of %%B:\n"); fflush (stdout);
+        sprintf (buf, "head -n 1 %s", other_file_name); system (buf);
+        printf ("Guessing calling convention: %s\n",
+                downstream
+                ? "%A = modified by user, %B = upstream"
+                : "%A = upstream, %B = modified by user");
+      }
 
     if (downstream)
       {
diff --git a/modules/git-merge-changelog b/modules/git-merge-changelog
index 9ff56dcc86..d9c2167852 100644
--- a/modules/git-merge-changelog
+++ b/modules/git-merge-changelog
@@ -13,6 +13,8 @@ stdint-h
 stdlib-h
 error
 read-file
+spawn-pipe
+wait-process
 xlist
 array-list
 linkedhash-list
@@ -20,9 +22,11 @@ linked-list
 rbtreehash-list
 xalloc
 xmalloca
+xvasprintf
 fstrcmp
 minmax
 str_startswith
+c-ctype
 c-strstr
 fwriteerror
 memchr




Reply via email to