From f1092363ca283ba024289d53e094a97ef9cdba8b Mon Sep 17 00:00:00 2001
From: Shenhao Wang <wangsh.fnst@fujitsu.com>
Date: Mon, 24 Jan 2022 09:57:34 +0800
Subject: [PATCH 1/2] v2 make canonicalize path remove all . in path

---
 contrib/adminpack/expected/adminpack.out |   2 +-
 src/port/path.c                          | 294 +++++++++++++++++++----
 2 files changed, 244 insertions(+), 52 deletions(-)

diff --git a/contrib/adminpack/expected/adminpack.out b/contrib/adminpack/expected/adminpack.out
index edf3ebfcba..76aafe6316 100644
--- a/contrib/adminpack/expected/adminpack.out
+++ b/contrib/adminpack/expected/adminpack.out
@@ -51,7 +51,7 @@ SELECT pg_file_write(current_setting('data_directory') || '/test_file4', 'test4'
 (1 row)
 
 SELECT pg_file_write(current_setting('data_directory') || '/../test_file4', 'test4', false);
-ERROR:  reference to parent directory ("..") not allowed
+ERROR:  absolute path not allowed
 RESET ROLE;
 REVOKE EXECUTE ON FUNCTION pg_file_write(text,text,bool) FROM regress_user1;
 REVOKE pg_read_all_settings FROM regress_user1;
diff --git a/src/port/path.c b/src/port/path.c
index 69bb8fe40b..e9f1c2e2a2 100644
--- a/src/port/path.c
+++ b/src/port/path.c
@@ -46,7 +46,7 @@
 
 static void make_relative_path(char *ret_path, const char *target_path,
 							   const char *bin_path, const char *my_exec_path);
-static void trim_directory(char *path);
+static char *trim_directory(char *path);
 static void trim_trailing_separator(char *path);
 
 
@@ -240,6 +240,56 @@ join_path_components(char *ret_path,
 	}
 }
 
+/*
+ * Get next dir name start from path.
+ * This function will replace the next dir separator to '\0'.
+ */
+static char*
+canonicalize_path_get_next_dir(char *path)
+{
+	while(!IS_DIR_SEP(*path) && *path != '\0')
+		path++;
+
+	if (*path != '\0')
+	{
+		*path = '\0';
+		return path + 1;
+	}
+	else
+		return path;
+}
+
+/*
+ * Append the dir after path for canonicalize_path.
+ */
+static char*
+canonicalize_path_append_path(char *path, char *folder)
+{
+	int len = strlen(folder);
+
+	/* No need to use memmove if path and folder are the same. */
+	if (path != folder)
+		memmove(path, folder, len);
+
+	return path + len;
+}
+
+typedef enum {
+	ABSOLUTE_PATH_INIT = 0,		/* Init state for absolute path, in this state,
+								 * leading slash is handled(include driver on win32)*/
+	ABSOLUTE_WITH_N_DEPTH,		/* Absoulte path with at least one dirname */
+	RELATIVE_PATH_INIT,			/* Init state for relative path */
+	RELATIVE_WITH_N_DEPTH,		/* Relative path with at least one dirname, eg:
+								 * a/b/c or ../../a/b/c */
+	RELATIVE_WITH_CURRENT_REF,	/* Relative path only has dot, eg: . */
+	RELATIVE_WITH_PARENT_REF	/* Relative path only has double-dot, eg: ../.. */
+} canonicalize_state;
+
+typedef enum {
+	TYPE_CURRENT_DIR = 0,
+	TYPE_PARENT_DIR,
+	TYPE_NORMAL_DIR
+} path_type;
 
 /*
  *	Clean up path by:
@@ -247,18 +297,23 @@ join_path_components(char *ret_path,
  *		o  remove trailing quote on Win32
  *		o  remove trailing slash
  *		o  remove duplicate adjacent separators
- *		o  remove trailing '.'
+ *		o  remove '.' (except for only '.')
  *		o  process trailing '..' ourselves
  */
 void
 canonicalize_path(char *path)
 {
-	char	   *p,
-			   *to_p;
 	char	   *spath;
+	char	   *parsed;
+	char	   *unparse;
+	char	   *unparse_next;
+	char	   *p, *to_p;
 	bool		was_sep = false;
-	int			pending_strips;
 
+	canonicalize_state	state;
+	path_type	type;
+	int			pathdepth = 0; /* used when state is ABSOLUTE_WITH_N_DEPTH or
+								* RELATIVE_WITH_CURRENT_REF */
 #ifdef WIN32
 
 	/*
@@ -308,60 +363,194 @@ canonicalize_path(char *path)
 	*to_p = '\0';
 
 	/*
-	 * Remove any trailing uses of "." and process ".." ourselves
-	 *
-	 * Note that "/../.." should reduce to just "/", while "../.." has to be
-	 * kept as-is.  In the latter case we put back mistakenly trimmed ".."
-	 * components below.  Also note that we want a Windows drive spec to be
-	 * visible to trim_directory(), but it's not part of the logic that's
-	 * looking at the name components; hence distinction between path and
-	 * spath.
+	 * Remove any uses of "." and process ".." ourselves
 	 */
 	spath = skip_drive(path);
-	pending_strips = 0;
-	for (;;)
+	if (*spath == '\0')
+		return;
+
+	if (is_absolute_path(path))
+	{
+		state = ABSOLUTE_PATH_INIT;
+		/* Skip the leading slash for absolute path */
+		parsed = unparse = (spath + 1);
+	}
+	else
 	{
-		int			len = strlen(spath);
+		state = RELATIVE_PATH_INIT;
+		parsed = unparse = spath;
+	}
 
-		if (len >= 2 && strcmp(spath + len - 2, "/.") == 0)
-			trim_directory(path);
-		else if (strcmp(spath, ".") == 0)
-		{
-			/* Want to leave "." alone, but "./.." has to become ".." */
-			if (pending_strips > 0)
-				*spath = '\0';
-			break;
-		}
-		else if ((len >= 3 && strcmp(spath + len - 3, "/..") == 0) ||
-				 strcmp(spath, "..") == 0)
+	while(*unparse != '\0')
+	{
+		unparse_next = canonicalize_path_get_next_dir(unparse);
+
+		if (strcmp(unparse, ".") == 0)
+			type = TYPE_CURRENT_DIR;
+		else if (strcmp(unparse, "..") == 0)
+			type = TYPE_PARENT_DIR;
+		else if (*unparse != '\0')
+			type = TYPE_NORMAL_DIR;
+		else
 		{
-			trim_directory(path);
-			pending_strips++;
+			/* duplicate adjacent separators has been handled before */
+			Assert(false);
 		}
-		else if (pending_strips > 0 && *spath != '\0')
+
+		switch(state)
 		{
-			/* trim a regular directory name canceled by ".." */
-			trim_directory(path);
-			pending_strips--;
-			/* foo/.. should become ".", not empty */
-			if (*spath == '\0')
-				strcpy(spath, ".");
+			case ABSOLUTE_PATH_INIT:
+			{
+				if (type == TYPE_NORMAL_DIR)
+				{
+					/*
+					 * Append next dir name, we don't need to append a slash
+					 * when state is ABSOLUTE_PATH_INIT.
+					 */
+					parsed = canonicalize_path_append_path(parsed, unparse);
+					state = ABSOLUTE_WITH_N_DEPTH;
+					pathdepth ++;
+				}
+
+				/* Ignore dot(.) and double-dot(..) case */
+				break;
+			}
+			case RELATIVE_PATH_INIT:
+			{
+				if (type == TYPE_CURRENT_DIR)
+				{
+					/* Append next dot(.) */
+					parsed = canonicalize_path_append_path(parsed, unparse);
+					state = RELATIVE_WITH_CURRENT_REF;
+				}
+				else if (type == TYPE_PARENT_DIR)
+				{
+					/* Append next double-dot(..) */
+					parsed = canonicalize_path_append_path(parsed, unparse);
+					state = RELATIVE_WITH_PARENT_REF;
+				}
+				else if (type == TYPE_NORMAL_DIR)
+				{
+					/* Append next dir */
+					parsed = canonicalize_path_append_path(parsed, unparse);
+					state = RELATIVE_WITH_N_DEPTH;
+					pathdepth ++;
+				}
+				break;
+			}
+			case ABSOLUTE_WITH_N_DEPTH:
+			{
+				if (type == TYPE_PARENT_DIR)
+				{
+					/* trim_directory never remove the leading slash. */
+					*parsed = '\0';
+					parsed = trim_directory(path);
+					if (-- pathdepth == 0)
+					{
+						state = ABSOLUTE_PATH_INIT;
+					}
+					else
+						state = ABSOLUTE_WITH_N_DEPTH;
+				}
+				else if (type == TYPE_NORMAL_DIR)
+				{
+					/* Append next dir */
+					*parsed ++ = '/';
+					parsed = canonicalize_path_append_path(parsed, unparse);
+					state = ABSOLUTE_WITH_N_DEPTH;
+					pathdepth ++;
+				}
+
+				/* Ignore dot(.) case */
+				break;
+			}
+			case RELATIVE_WITH_N_DEPTH:
+			{
+				if (type == TYPE_PARENT_DIR)
+				{
+					/* Remove last parsed dir */
+					*parsed = '\0';
+					parsed = trim_directory(path);
+					if (--pathdepth == 0)
+					{
+						/*
+						 * There are only two cases when pathdepth becomes 0.
+						 *  - "dir/.." (leading dot(.) is always be removed.)
+						 *  - "../dir/.."
+						 */
+						if (parsed != spath)
+							state = RELATIVE_WITH_PARENT_REF;
+						else
+						{
+							/*
+							 * spath becomes an empty string, append a dot(.)
+							 * which makes the spath never become an empty string.
+							 */
+							*parsed ++ = '.';
+							state = RELATIVE_WITH_CURRENT_REF;
+						}
+					}
+					else
+						state = RELATIVE_WITH_N_DEPTH;
+				}
+				else if (type == TYPE_NORMAL_DIR)
+				{
+					/* Append next dir */
+					*parsed ++ = '/';
+					parsed = canonicalize_path_append_path(parsed, unparse);
+					state = RELATIVE_WITH_N_DEPTH;
+					pathdepth ++;
+				}
+
+				/* Ignore dot(.) case */
+				break;
+			}
+			case RELATIVE_WITH_CURRENT_REF:
+			{
+				if (type == TYPE_PARENT_DIR)
+				{
+					/* Leading "./.." will be converted to ".." */
+					parsed = canonicalize_path_append_path(parsed - 1, unparse);
+					state = RELATIVE_WITH_PARENT_REF;
+				}
+				else if (type == TYPE_NORMAL_DIR)
+				{
+					/* Leading "./dir" will be converted ot "dir" */
+					parsed = canonicalize_path_append_path(parsed - 1, unparse);
+					state = RELATIVE_WITH_N_DEPTH;
+					pathdepth = 1;
+				}
+
+				/* Ignore dot(.) case */
+				break;
+			}
+			case RELATIVE_WITH_PARENT_REF:
+			{
+				if (type == TYPE_PARENT_DIR)
+				{
+					/* Append next double-dot(..)  */
+					*parsed ++ = '/';
+					parsed = canonicalize_path_append_path(parsed, unparse);
+					state = RELATIVE_WITH_PARENT_REF;
+				}
+				else if (type == TYPE_NORMAL_DIR)
+				{
+					/* Append next dir */
+					*parsed ++ = '/';
+					parsed = canonicalize_path_append_path(parsed, unparse);
+					state = RELATIVE_WITH_N_DEPTH;
+					pathdepth = 1;
+				}
+
+				/* Ignore dot(.) case */
+				break;
+			}
 		}
-		else
-			break;
-	}
 
-	if (pending_strips > 0)
-	{
-		/*
-		 * We could only get here if path is now totally empty (other than a
-		 * possible drive specifier on Windows). We have to put back one or
-		 * more ".."'s that we took off.
-		 */
-		while (--pending_strips > 0)
-			strcat(path, "../");
-		strcat(path, "..");
+		unparse = unparse_next;
 	}
+
+	*parsed = '\0';
 }
 
 /*
@@ -866,7 +1055,7 @@ get_parent_directory(char *path)
  *	the last pathname component, and the slash just ahead of it --- but never
  *	remove a leading slash.
  */
-static void
+static char *
 trim_directory(char *path)
 {
 	char	   *p;
@@ -874,7 +1063,7 @@ trim_directory(char *path)
 	path = skip_drive(path);
 
 	if (path[0] == '\0')
-		return;
+		return path;
 
 	/* back up over trailing slash(es) */
 	for (p = path + strlen(path) - 1; IS_DIR_SEP(*p) && p > path; p--)
@@ -889,6 +1078,9 @@ trim_directory(char *path)
 	if (p == path && IS_DIR_SEP(*p))
 		p++;
 	*p = '\0';
+
+	/* return the end of the path for canonicalize_path */
+	return p;
 }
 
 
-- 
2.26.2

