On Sun, Jun 04, 2017 at 01:46:23AM +0700, PePa wrote: [...] > But the fact that unicode functions are already supported does seem to > pave the way for allowing variable names in unicode. For consistency, it [...]
I know I said I wasn't going to reply, but this changed my mind :-) I hadn't realized that bash already supports Unicode in function names! FWIW: bash-4.4$ Lēv=? Φ=0.618033988749894848 ɸ=1.61803398874989485 π=3.14159265358979324 declare -p Lēv Φ ɸ π declare -- Lēv="?" declare -- Φ="0.618033988749894848" declare -- ɸ="1.61803398874989485" declare -- π="3.14159265358979324" With this terrible patch: dualbus@debian:~/src/gnu/bash$ PAGER= git diff diff --git a/general.c b/general.c index 584e7859..40db7b1d 100644 --- a/general.c +++ b/general.c @@ -61,6 +61,9 @@ extern int errno; # include <sys/cygwin.h> #endif +#define wlegal_variable_starter(c) (iswalpha(c) || (L'_' == c)) +#define wlegal_variable_char(c) (iswalnum(c) || (L'_' == c)) + static char *bash_special_tilde_expansions __P((char *)); static int unquoted_tilde_word __P((const char *)); static void initialize_group_array __P((void)); @@ -214,15 +217,25 @@ int legal_identifier (name) const char *name; { - register const char *s; - unsigned char c; + wchar_t *s, *wstring; + wchar_t c; + size_t n; + + if (!name || *name == '\0') + return (0); + + n = mbstowcs(NULL, name, 0); + if((size_t) -1 == n) return 0; + wstring = xmalloc(sizeof(wchar_t) * (n+1)); + n = mbstowcs(wstring, name, n); + if((size_t) -1 == n) return 0; - if (!name || !(c = *name) || (legal_variable_starter (c) == 0)) + if (wlegal_variable_starter (*wstring) == 0) return (0); - for (s = name + 1; (c = *s) != 0; s++) + for (s = wstring + 1; (c = *s) != 0; s++) { - if (legal_variable_char (c) == 0) + if (wlegal_variable_char (c) == 0) return (0); } return (1); @@ -357,27 +370,31 @@ assignment (string, flags) const char *string; int flags; { - register unsigned char c; + wchar_t c; register int newi, indx; + wchar_t *wstring; + int n; + size_t len; - c = string[indx = 0]; - + len = strlen(string); + if ((n=mbtowc(&c, &string[indx = 0], len)) < 1) return (0); + indx += n; len -= n; #if defined (ARRAY_VARS) - if ((legal_variable_starter (c) == 0) && ((flags&1) == 0 || c != '[')) /* ] */ + if ((wlegal_variable_starter (c) == 0) && ((flags&1) == 0 || c != L'[')) /* ] */ #else - if (legal_variable_starter (c) == 0) + if (wlegal_variable_starter (c) == 0) #endif return (0); - while (c = string[indx]) + while ((n=mbtowc(&c, &string[indx], len)) > 0) { /* The following is safe. Note that '=' at the start of a word is not an assignment statement. */ - if (c == '=') + if (c == L'=') return (indx); #if defined (ARRAY_VARS) - if (c == '[') + if (c == L'[') { newi = skipsubscript (string, indx, (flags & 2) ? 1 : 0); if (string[newi++] != ']') @@ -389,15 +406,15 @@ assignment (string, flags) #endif /* ARRAY_VARS */ /* Check for `+=' */ - if (c == '+' && string[indx+1] == '=') + if (c == L'+' && string[indx+1] == '=') return (indx + 1); /* Variable names in assignment statements may contain only letters, digits, and `_'. */ - if (legal_variable_char (c) == 0) + if (wlegal_variable_char (c) == 0) return (0); - indx++; + indx += n; len -= n; } return (0); } It seems to have issues with compound assignments though. -- Eduardo Bustamante https://dualbus.me/