[PATCH] libcpp: add function to check XID properties
From: Raiki Tamura libcpp/ChangeLog: * charset.cc (check_xid_property):new function to check XID_Start and XID_Continue * include/cpplib.h (check_xid_property):add enum representing XID properties Signed-off-by: Raiki Tamura --- libcpp/charset.cc | 36 libcpp/include/cpplib.h | 7 +++ 2 files changed, 43 insertions(+) diff --git a/libcpp/charset.cc b/libcpp/charset.cc index 7b625c9956a..5d6d7aed325 100644 --- a/libcpp/charset.cc +++ b/libcpp/charset.cc @@ -1256,6 +1256,42 @@ _cpp_uname2c_uax44_lm2 (const char *name, size_t len, char *canon_name) return result; } +/* Returns flags representing the XID properties of the given codepoint. */ +unsigned int +check_xid_property (cppchar_t c) +{ + // fast path for ASCII + if (c < 0x80) + { +if (('A' <= c && c <= 'Z') || ('a' <= c && c <= 'z')) + return XID_START | XID_CONTINUE; +if (('0' <= c && c <= '9') || c == '_') + return XID_CONTINUE; + } + + if (c > UCS_LIMIT) +return 0; + + int mn, mx, md; + mn = 0; + mx = ARRAY_SIZE (ucnranges) - 1; + while (mx != mn) +{ + md = (mn + mx) / 2; + if (c <= ucnranges[md].end) + mx = md; + else + mn = md + 1; +} + + unsigned short flags = ucnranges[mn].flags; + + if (flags & CXX23) +return XID_START | XID_CONTINUE; + if (flags & NXX23) +return XID_CONTINUE; + return 0; +} /* Returns 1 if C is valid in an identifier, 2 if C is valid except at the start of an identifier, and 0 if C is not valid in an diff --git a/libcpp/include/cpplib.h b/libcpp/include/cpplib.h index fcdaf082b09..4ad75f877d9 100644 --- a/libcpp/include/cpplib.h +++ b/libcpp/include/cpplib.h @@ -1606,4 +1606,11 @@ bool cpp_valid_utf8_p (const char *data, size_t num_bytes); bool cpp_is_combining_char (cppchar_t c); bool cpp_is_printable_char (cppchar_t c); +enum { + XID_START = 1, + XID_CONTINUE = 2 +}; + +unsigned int check_xid_property (cppchar_t c); + #endif /* ! LIBCPP_CPPLIB_H */ -- 2.40.0 -- Gcc-rust mailing list Gcc-rust@gcc.gnu.org https://gcc.gnu.org/mailman/listinfo/gcc-rust
Re: [PATCH] libcpp: add function to check XID properties
On Fri, Sep 08, 2023 at 02:58:40PM +0200, Arthur Cohen wrote: > From: Raiki Tamura > > libcpp/ChangeLog: > > * charset.cc (check_xid_property):new function to check XID_Start and > XID_Continue > * include/cpplib.h (check_xid_property):add enum representing XID > properties Just random comments, not a proper review. 1) functions exported from libcpp should IMNSHO use the cpp_ prefix 2) similarly the enumerators should be prefixed with CPP_ 3) formatting of the ChangeLog entry is incorrect. There should be a space after ): followed by uppercase rather than lowercase letter, descriptions should end with . and there should be line wrapping so that it fits into 80 columns. For a new function, one can just say New. or New function., doesn't need to describe what it is good for. And the include/cpplib.h changes don't describe what actually changed. A new anonymous enum (why not a named one?) was added, and check_xid_property declared. > --- a/libcpp/include/cpplib.h > +++ b/libcpp/include/cpplib.h > @@ -1606,4 +1606,11 @@ bool cpp_valid_utf8_p (const char *data, size_t > num_bytes); > bool cpp_is_combining_char (cppchar_t c); > bool cpp_is_printable_char (cppchar_t c); > > +enum { > + XID_START = 1, > + XID_CONTINUE = 2 Formatting. There should be indentation just by 2 columns rather than 3. Jakub -- Gcc-rust mailing list Gcc-rust@gcc.gnu.org https://gcc.gnu.org/mailman/listinfo/gcc-rust
[PATCH v2] libcpp: add function to check XID properties
From: Raiki Tamura This commit adds a new function intended for checking the XID properties of a possibly unicode character, as well as the accompanying enum describing the possible properties. libcpp/ChangeLog: * charset.cc (cpp_check_xid_property): New. * include/cpplib.h (cpp_check_xid_property): New. (enum cpp_xid_property): New. Signed-off-by: Raiki Tamura --- libcpp/charset.cc | 36 libcpp/include/cpplib.h | 7 +++ 2 files changed, 43 insertions(+) diff --git a/libcpp/charset.cc b/libcpp/charset.cc index 7b625c9956a..a92ba75539e 100644 --- a/libcpp/charset.cc +++ b/libcpp/charset.cc @@ -1256,6 +1256,42 @@ _cpp_uname2c_uax44_lm2 (const char *name, size_t len, char *canon_name) return result; } +/* Returns flags representing the XID properties of the given codepoint. */ +unsigned int +cpp_check_xid_property (cppchar_t c) +{ + // fast path for ASCII + if (c < 0x80) + { +if (('A' <= c && c <= 'Z') || ('a' <= c && c <= 'z')) + return CPP_XID_START | CPP_XID_CONTINUE; +if (('0' <= c && c <= '9') || c == '_') + return CPP_XID_CONTINUE; + } + + if (c > UCS_LIMIT) +return 0; + + int mn, mx, md; + mn = 0; + mx = ARRAY_SIZE (ucnranges) - 1; + while (mx != mn) +{ + md = (mn + mx) / 2; + if (c <= ucnranges[md].end) + mx = md; + else + mn = md + 1; +} + + unsigned short flags = ucnranges[mn].flags; + + if (flags & CXX23) +return CPP_XID_START | CPP_XID_CONTINUE; + if (flags & NXX23) +return CPP_XID_CONTINUE; + return 0; +} /* Returns 1 if C is valid in an identifier, 2 if C is valid except at the start of an identifier, and 0 if C is not valid in an diff --git a/libcpp/include/cpplib.h b/libcpp/include/cpplib.h index fcdaf082b09..013d276f384 100644 --- a/libcpp/include/cpplib.h +++ b/libcpp/include/cpplib.h @@ -1606,4 +1606,11 @@ bool cpp_valid_utf8_p (const char *data, size_t num_bytes); bool cpp_is_combining_char (cppchar_t c); bool cpp_is_printable_char (cppchar_t c); +enum { + CPP_XID_START = 1, + CPP_XID_CONTINUE = 2 +}; + +unsigned int cpp_check_xid_property (cppchar_t c); + #endif /* ! LIBCPP_CPPLIB_H */ -- 2.40.0 -- Gcc-rust mailing list Gcc-rust@gcc.gnu.org https://gcc.gnu.org/mailman/listinfo/gcc-rust
[PATCH v3] libcpp: add function to check XID properties
From: Raiki Tamura Fixed to include the enum's name which I had forgotten to commit. Thanks This commit adds a new function intended for checking the XID properties of a possibly unicode character, as well as the accompanying enum describing the possible properties. libcpp/ChangeLog: * charset.cc (cpp_check_xid_property): New. * include/cpplib.h (cpp_check_xid_property): New. (enum cpp_xid_property): New. Signed-off-by: Raiki Tamura --- libcpp/charset.cc | 36 libcpp/include/cpplib.h | 7 +++ 2 files changed, 43 insertions(+) diff --git a/libcpp/charset.cc b/libcpp/charset.cc index 7b625c9956a..a92ba75539e 100644 --- a/libcpp/charset.cc +++ b/libcpp/charset.cc @@ -1256,6 +1256,42 @@ _cpp_uname2c_uax44_lm2 (const char *name, size_t len, char *canon_name) return result; } +/* Returns flags representing the XID properties of the given codepoint. */ +unsigned int +cpp_check_xid_property (cppchar_t c) +{ + // fast path for ASCII + if (c < 0x80) + { +if (('A' <= c && c <= 'Z') || ('a' <= c && c <= 'z')) + return CPP_XID_START | CPP_XID_CONTINUE; +if (('0' <= c && c <= '9') || c == '_') + return CPP_XID_CONTINUE; + } + + if (c > UCS_LIMIT) +return 0; + + int mn, mx, md; + mn = 0; + mx = ARRAY_SIZE (ucnranges) - 1; + while (mx != mn) +{ + md = (mn + mx) / 2; + if (c <= ucnranges[md].end) + mx = md; + else + mn = md + 1; +} + + unsigned short flags = ucnranges[mn].flags; + + if (flags & CXX23) +return CPP_XID_START | CPP_XID_CONTINUE; + if (flags & NXX23) +return CPP_XID_CONTINUE; + return 0; +} /* Returns 1 if C is valid in an identifier, 2 if C is valid except at the start of an identifier, and 0 if C is not valid in an diff --git a/libcpp/include/cpplib.h b/libcpp/include/cpplib.h index fcdaf082b09..583e3071e90 100644 --- a/libcpp/include/cpplib.h +++ b/libcpp/include/cpplib.h @@ -1606,4 +1606,11 @@ bool cpp_valid_utf8_p (const char *data, size_t num_bytes); bool cpp_is_combining_char (cppchar_t c); bool cpp_is_printable_char (cppchar_t c); +enum cpp_xid_property { + CPP_XID_START = 1, + CPP_XID_CONTINUE = 2 +}; + +unsigned int cpp_check_xid_property (cppchar_t c); + #endif /* ! LIBCPP_CPPLIB_H */ -- 2.40.0 -- Gcc-rust mailing list Gcc-rust@gcc.gnu.org https://gcc.gnu.org/mailman/listinfo/gcc-rust