Hi, I have a set of working patches for "fossil export" and "fossil import" as attached.
Please note these are clean reverse engineered dedes independent of the code in the git using the code analysis result included. These codes are BSD-2-Clause licensed and I agree to copyright assignment to Hipp, Wyrick & Company, Inc. to any patches submitted to this ML. Please review and consider. Osamu PS: The previous "fossil import" code could not handle some non-standard filenames.
From: Osamu Aoki <os...@debian.org> Date: Tue, 11 Oct 2016 23:45:30 +0900 Subject: Fix export Newline and UTF-8 characters in filename is now allowed The git-fast-export code was analyzed as below. This analysis was used to create an independent code as this patch. == Git fast-export filename encoding spec == Filenames exported by "git fast-export" are encoded and quoted if they contain some non-plain ASCII alphanumeric characters. Filenames imported by "git fast-import" are unencoded if it is recorded within double quotes. This memo documents the details of encoding and quotation to enable creating a clean reverse engineered GPL unencumbered code in BSD-2-Clause license. In Git 2.9.3, the focal point defining this feature: * quote.c quote_c_style Escape odd characters and quote the escaped string Return TRUE if used * builtin/fast-export.c print_path_1 Use quote_c_style if if finds needs to quote and escape Otherwise, quote entire string if ' ' is found in it If neither, use the original filename to export * quote.c unquote_c_style Unescape escaped characters The following table summarizes the end result for each character found in the filename for the combined effects of the above 2 functions: 01234567 89abcdef 0x00 OOOOOOOa btnvfrOO 0x10 OOOOOOOO OOOOOOOO 0x20 QAEAAAAA AAAAAAAA (quote space and escape double quote) 0x30 AAAAAAAA AAAAAAAA 0x40 AAAAAAAA AAAAAAAA 0x50 AAAAAAAA AAAAEAAA (escape backslash) 0x60 AAAAAAAA AAAAAAAA 0x70 AAAAAAAA AAAAAAAO (DEL is octal!) 0x80 OOOOOOOO OOOOOOOO 0x90 OOOOOOOO OOOOOOOO 0xa0 OOOOOOOO OOOOOOOO 0xb0 OOOOOOOO OOOOOOOO 0xc0 OOOOOOOO OOOOOOOO 0xd0 OOOOOOOO OOOOOOOO 0xe0 OOOOOOOO OOOOOOOO 0xf0 OOOOOOOO OOOOOOOO Here each character is represented as HEX number adding row and column index. The default of the quoting flag is FALSE. The meaning of the conversion rule indicators are the following: --- src/export.c | 60 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 56 insertions(+), 4 deletions(-) diff --git a/src/export.c b/src/export.c index f524cdc..e677351 100644 --- a/src/export.c +++ b/src/export.c @@ -305,6 +305,49 @@ void export_marks(FILE* f, Bag *blobs, Bag *vers){ } /* +** Quote and escape a filename to be exported if it contains some special +** characters. This implements not only the minimum requirements +** '\\', '"' and '\n' but also 3 digits octal escapes for all high bits +** characters and other standard single character escapes such as +** '\a'..'\r' on par with what the git-fast-export does. +*/ +static void quote_git_filename(const char *zName, char *name){ + int i, j; + int needQuote; + static char escs[] = "abtnvfr"; + needQuote = 0; + for(i=0; zName[i]!=0; i++){ + if( zName[i]<=' ' || zName[i]>'~' || zName[i]=='\\' || zName[i]=='"' ){ + needQuote = 1; + } + } + j = 0; + if( needQuote==1 ){ + name[j++] = '"'; + } + for(i=0; zName[i]!=0; i++){ + if( zName[i]=='\\' || zName[i]=='"' ){ + name[j++] = '\\'; + name[j++] = zName[i]; + }else if ( zName[i]>=' ' && zName[i]<='~' ){ + name[j++] = zName[i]; + }else if( zName[i]>='\a' && zName[i]<='\r' ){ + name[j++] = '\\'; + name[j++] = escs[( zName[i] - '\a')]; + }else{ + name[j++] = '\\'; + name[j++] = ((zName[i] >> 6) & '\03') + '0'; + name[j++] = ((zName[i] >> 3) & '\07') + '0'; + name[j++] = ((zName[i] >> 0) & '\07') + '0'; + } + } + if( needQuote==1 ){ + name[j++] = '"'; + } + name[j] = '\0'; +} + +/* ** COMMAND: export ** ** Usage: %fossil export --git ?OPTIONS? ?REPOSITORY? @@ -516,19 +559,28 @@ void export_cmd(void){ ); while( db_step(&q4)==SQLITE_ROW ){ const char *zName = db_column_text(&q4,0); + char *name; int zNew = db_column_int(&q4,1); int mPerm = db_column_int(&q4,2); - if( zNew==0) - printf("D %s\n", zName); - else if( bag_find(&blobs, zNew) ) { + if( zName==0 || zName=="" ){ + name = fossil_malloc( 1 ); + name[0] = '\0'; + }else{ + name = fossil_malloc( 4*strlen(zName) + 2 + 1 ); + } + quote_git_filename(zName, name); + if( zNew==0 ){ + printf("D %s\n", name); + }else if( bag_find(&blobs, zNew) ){ const char *zPerm; switch( mPerm ){ case PERM_LNK: zPerm = "120000"; break; case PERM_EXE: zPerm = "100755"; break; default: zPerm = "100644"; break; } - printf("M %s :%d %s\n", zPerm, BLOBMARK(zNew), zName); + printf("M %s :%d %s\n", zPerm, BLOBMARK(zNew), name); } + fossil_free(name); } db_finalize(&q4); db_finalize(&q3);
From: Osamu Aoki <os...@debian.org> Date: Thu, 13 Oct 2016 21:55:12 +0900 Subject: Fix import Single chapacter escape sequences are properly handled The git-fast-export code was analyzed. This analysis was used to create an independent code as this patch. The code is written to be robust to accept non-conforming inputs. The octal sequence is always with 3 digits as git-fast-import. --- src/import.c | 67 ++++++++++++++++++++++++++++++++++++++++++------------------ 1 file changed, 47 insertions(+), 20 deletions(-) diff --git a/src/import.c b/src/import.c index b74186c..57e65f2 100644 --- a/src/import.c +++ b/src/import.c @@ -477,36 +477,63 @@ static ImportFile *import_find_file(const char *zName, int *pI, int mx){ } /* -** Dequote a fast-export filename. Filenames are normally unquoted. But -** if the contain some obscure special characters, quotes might be added. +** Dequote and unescape a git-fast-export filename. Filenames are normally +** unquoted by the git-fast-export command. But if they contain some special +*: characters, special characters are escaped and quotes are added. This +** function reverses this conversion to get the original filename. */ static void dequote_git_filename(char *zName){ - int n, i, j; if( zName==0 || zName[0]!='"' ) return; - n = (int)strlen(zName); + int n = (int)strlen(zName); if( zName[n-1]!='"' ) return; - for(i=0, j=1; j<n-1; j++){ - char c = zName[j]; - int x; - if( c=='\\' ){ - if( j+3 <= n-1 - && zName[j+1]>='0' && zName[j+1]<='3' - && zName[j+2]>='0' && zName[j+2]<='7' - && zName[j+3]>='0' && zName[j+3]<='7' - && (x = 64*(zName[j+1]-'0') + 8*(zName[j+2]-'0') + zName[j+3]-'0')!=0 - ){ - c = (unsigned char)x; - j += 3; + /* zName is quoted */ + int i=0; /* destination index, always i<j */ + int j=1; /* source index: 0..n-1 */ + while( j<n-1 ){ + if( zName[j]=='\\' ){ + if( j+1<=n-1 && !( zName[j+1]<'0' || zName[j+1]>'3' ) ){ + switch( zName[j+1] ){ + case 'a': zName[i++] = '\a'; break; + case 'b': zName[i++] = '\b'; break; + case 't': zName[i++] = '\t'; break; + case 'n': zName[i++] = '\n'; break; + case 'v': zName[i++] = '\v'; break; + case 'f': zName[i++] = '\f'; break; + case 'r': zName[i++] = '\r'; break; + case '\\': zName[i++] = '\\'; break; + case '"': zName[i++] = '"'; break; + default: /* illegal sequence */ + zName[i++] = '\\'; + zName[i++] = zName[j+1]; break; + } + j += 2; + }else if( j+3<=n-1 + && zName[j+1]>='0' && zName[j+1]<='3' + && zName[j+2]>='0' && zName[j+2]<='7' + && zName[j+3]>='0' && zName[j+3]<='7' ){ + /* Only 3 digits are used by the git-fast-export octal escape */ + zName[i++] = (zName[j+1]-'0')<<6; + zName[i++] |= (zName[j+2]-'0')<<3; + zName[i++] |= (zName[j+3]-'0')<<0; + j += 4; + }else if(j+1<=n-1){ + /* illegal sequence */ + zName[i++] = zName[j]; + zName[i++] = zName[j+1]; + j += 2; }else{ - c = zName[++j]; + /* illegal sequence */ + zName[i++] = zName[j]; + j++; } + }else{ + zName[i++] = zName[j]; + j++; } - zName[i++] = c; } - zName[i] = 0; + zName[i] = '\0'; } - /* ** Read the git-fast-import format from pIn and insert the corresponding ** content into the database.
signature.asc
Description: PGP signature