Assume that input filename is encoded as UTF-8, so correctly create UTF-16 encoding. Reuse long_file_name structure to give back to caller the generated long name. It will be used in next commit to transform the long file name into short file name.
Reference: http://stackoverflow.com/questions/7153935/how-to-convert-utf-8-stdstring-to-utf-16-stdwstring Signed-off-by: Hervé Poussineau <[email protected]> --- block/vvfat.c | 132 ++++++++++++++++++++++++++++++++++++++++++---------------- 1 file changed, 97 insertions(+), 35 deletions(-) diff --git a/block/vvfat.c b/block/vvfat.c index 7da07068b8..5f6356c834 100644 --- a/block/vvfat.c +++ b/block/vvfat.c @@ -357,6 +357,23 @@ typedef struct BDRVVVFATState { Error *migration_blocker; } BDRVVVFATState; +typedef struct { + /* + * Since the sequence number is at most 0x3f, and the filename + * length is at most 13 times the sequence number, the maximal + * filename length is 0x3f * 13 bytes. + */ + unsigned char name[0x3f * 13 + 1]; + int checksum, len; + int sequence_number; +} long_file_name; + +static void lfn_init(long_file_name *lfn) +{ + lfn->sequence_number = lfn->len = 0; + lfn->checksum = 0x100; +} + /* take the sector position spos and convert it to Cylinder/Head/Sector position * if the position is outside the specified geometry, fill maximum value for CHS * and return 1 to signal overflow. @@ -418,29 +435,90 @@ static void init_mbr(BDRVVVFATState *s, int cyls, int heads, int secs) /* direntry functions */ -/* dest is assumed to hold 258 bytes, and pads with 0xffff up to next multiple of 26 */ -static inline int short2long_name(char* dest,const char* src) -{ - int i; - int len; - for(i=0;i<129 && src[i];i++) { - dest[2*i]=src[i]; - dest[2*i+1]=0; +/* fills lfn with UTF-16 representation of src filename */ +/* return true if src is valid UTF-8 string, false otherwise */ +static bool filename2long_name(long_file_name *lfn, const char* src) +{ + uint8_t *dest = lfn->name; + int i = 0, j; + int len = 0; + while (src[i]) { + uint32_t uni = 0; + size_t todo; + uint8_t ch = src[i++]; + if (ch <= 0x7f) { + uni = ch; + todo = 0; + } else if (ch <= 0xbf) { + return false; + } else if (ch <= 0xdf) { + uni = ch & 0x1f; + todo = 1; + } else if (ch <= 0xef) { + uni = ch & 0x0f; + todo = 2; + } else if (ch <= 0xf7) { + uni = ch & 0x07; + todo = 3; + } else { + return false; + } + for (j = 0; j < todo; j++) { + uint8_t ch; + if (src[i] == '\0') { + return false; + } + ch = src[i++]; + if (ch < 0x80 || ch >= 0xbf) { + return false; + } + uni <<= 6; + uni += ch & 0x3f; + } + if (uni >= 0xd800 && uni <= 0xdfff) { + return false; + } else if (uni >= 0x10ffff) { + return false; + } + if (uni <= 0xffff) { + dest[len++] = uni & 0xff; + dest[len++] = uni >> 8; + } else { + uint16_t w; + uni -= 0x10000; + w = (uni >> 10) + 0xd800; + dest[len++] = w & 0xff; + dest[len++] = w >> 8; + w = (uni & 0x3ff) + 0xdc00; + dest[len++] = w & 0xff; + dest[len++] = w >> 8; + } + } + dest[len++] = 0; + dest[len++] = 0; + while (len % 26 != 0) { + dest[len++] = 0xff; } - len=2*i; - dest[2*i]=dest[2*i+1]=0; - for(i=2*i+2;(i%26);i++) - dest[i]=0xff; - return len; + lfn->len = len; + return true; } -static inline direntry_t* create_long_filename(BDRVVVFATState* s,const char* filename) +static direntry_t *create_long_filename(BDRVVVFATState *s, const char *filename, + long_file_name *lfn) { - char buffer[258]; - int length=short2long_name(buffer,filename), - number_of_entries=(length+25)/26,i; + uint8_t *buffer; + int length, number_of_entries, i; direntry_t* entry; + lfn_init(lfn); + if (!filename2long_name(lfn, filename)) { + fprintf(stderr, "vvfat: invalid UTF-8 name: %s\n", filename); + return NULL; + } + buffer = lfn->name; + length = lfn->len; + number_of_entries = (length + 25) / 26; + for(i=0;i<number_of_entries;i++) { entry=array_get_next(&(s->directory)); entry->attributes=0xf; @@ -612,6 +690,7 @@ static inline direntry_t* create_short_and_long_name(BDRVVVFATState* s, int i,j,long_index=s->directory.next; direntry_t* entry = NULL; direntry_t* entry_long = NULL; + long_file_name lfn; if(is_dot) { entry=array_get_next(&(s->directory)); @@ -620,7 +699,7 @@ static inline direntry_t* create_short_and_long_name(BDRVVVFATState* s, return entry; } - entry_long=create_long_filename(s,filename); + entry_long = create_long_filename(s, filename, &lfn); i = strlen(filename); for(j = i - 1; j>0 && filename[j]!='.';j--); @@ -1575,23 +1654,6 @@ static void schedule_mkdir(BDRVVVFATState* s, uint32_t cluster, char* path) commit->action = ACTION_MKDIR; } -typedef struct { - /* - * Since the sequence number is at most 0x3f, and the filename - * length is at most 13 times the sequence number, the maximal - * filename length is 0x3f * 13 bytes. - */ - unsigned char name[0x3f * 13 + 1]; - int checksum, len; - int sequence_number; -} long_file_name; - -static void lfn_init(long_file_name* lfn) -{ - lfn->sequence_number = lfn->len = 0; - lfn->checksum = 0x100; -} - /* return 0 if parsed successfully, > 0 if no long name, < 0 if error */ static int parse_long_name(long_file_name* lfn, const direntry_t* direntry) -- 2.11.0
