This is an automated email from the ASF dual-hosted git repository. vatamane pushed a commit to branch update-quickjs in repository https://gitbox.apache.org/repos/asf/couchdb.git
commit 6a854ff3868726bc090a4644f362f761f2f0c408 Author: Nick Vatamaniuc <[email protected]> AuthorDate: Sun Jan 4 23:56:15 2026 -0500 Update QuickJS: regexp updates + memory leak fixes * Add regexp duplicate named groups https://github.com/bellard/quickjs/commit/24379bf53c30b35151f345318d850bac49219288 * Fix fast array extension optimization when there are multiple realms https://github.com/bellard/quickjs/commit/e5fd3918c1c4a2ee39016e71b66a9eeda85ce716 * Remove memory leak in case of error in `cpool_add()` https://github.com/bellard/quickjs/commit/fcd33c1afa7b3028531f53cd1190a3877454f6b3 * Remove use after free in `js_create_module_bytecode_function()` https://github.com/bellard/quickjs/commit/1dbba8a88eaa40d15a8a9b70bb1a0b8fb5b552e6 * Don't call well-known Symbol methods for RegExp on primitive values https://github.com/bellard/quickjs/commit/c73a435f365e5250dd529cde00675528d7609edf * Slightly faster lexical variable assignment https://github.com/bellard/quickjs/commit/31ef02b90785fbc6effb82c300bf05a2a0903088 * `\x{N}` is a syntax error https://github.com/bellard/quickjs/commit/7bd1ae2c76f9053e00e405998a3ea66a995403c4 * Removed `alloca()` is `lre_exec()` https://github.com/bellard/quickjs/commit/f1139494d18a2053630c5ed3384a42bb70db3c53 --- .../patches/01-spidermonkey-185-mode.patch | 6 +- src/couch_quickjs/patches/02-test262-errors.patch | 4 +- src/couch_quickjs/quickjs/Changelog | 1 + src/couch_quickjs/quickjs/libregexp-opcode.h | 4 +- src/couch_quickjs/quickjs/libregexp.c | 437 +++++++++++++-------- src/couch_quickjs/quickjs/libregexp.h | 4 + src/couch_quickjs/quickjs/libunicode.h | 5 + src/couch_quickjs/quickjs/quickjs-opcode.h | 1 + src/couch_quickjs/quickjs/quickjs.c | 184 +++++---- src/couch_quickjs/quickjs/test262.conf | 50 +-- src/couch_quickjs/quickjs/test262_errors.txt | 6 - 11 files changed, 418 insertions(+), 284 deletions(-) diff --git a/src/couch_quickjs/patches/01-spidermonkey-185-mode.patch b/src/couch_quickjs/patches/01-spidermonkey-185-mode.patch index 2f642114e..db740bf2b 100644 --- a/src/couch_quickjs/patches/01-spidermonkey-185-mode.patch +++ b/src/couch_quickjs/patches/01-spidermonkey-185-mode.patch @@ -1,6 +1,6 @@ ---- quickjs-master/quickjs.c 2025-11-29 09:14:41.000000000 -0500 -+++ quickjs/quickjs.c 2025-11-29 20:38:38.829402534 -0500 -@@ -31420,10 +31420,24 @@ +--- quickjs-master/quickjs.c 2025-12-22 09:12:46 ++++ quickjs/quickjs.c 2026-01-04 23:55:00 +@@ -31443,10 +31443,24 @@ if (s->token.val == TOK_FUNCTION || (token_is_pseudo_keyword(s, JS_ATOM_async) && peek_token(s, TRUE) == TOK_FUNCTION)) { diff --git a/src/couch_quickjs/patches/02-test262-errors.patch b/src/couch_quickjs/patches/02-test262-errors.patch index 692e080ac..81646c900 100644 --- a/src/couch_quickjs/patches/02-test262-errors.patch +++ b/src/couch_quickjs/patches/02-test262-errors.patch @@ -1,5 +1,5 @@ ---- quickjs-master/test262_errors.txt 2025-11-29 09:14:41.000000000 -0500 -+++ quickjs/test262_errors.txt 2025-11-29 20:38:38.835402578 -0500 +--- quickjs-master/test262_errors.txt 2025-12-22 09:12:46 ++++ quickjs/test262_errors.txt 2026-01-04 23:55:00 @@ -23,6 +23,8 @@ test262/test/language/module-code/ambiguous-export-bindings/namespace-unambiguous-if-export-star-as-from-and-import-star-as-and-export.js:74: SyntaxError: export 'foo' in module 'test262/test/language/module-code/ambiguous-export-bindings/namespace-unambiguous-if-import-star-as-and-export.js' is ambiguous test262/test/language/module-code/ambiguous-export-bindings/namespace-unambiguous-if-export-star-as-from.js:75: SyntaxError: export 'foo' in module 'test262/test/language/module-code/ambiguous-export-bindings/namespace-unambiguous-if-export-star-as-from.js' is ambiguous diff --git a/src/couch_quickjs/quickjs/Changelog b/src/couch_quickjs/quickjs/Changelog index 070b0a77a..3c08f0c58 100644 --- a/src/couch_quickjs/quickjs/Changelog +++ b/src/couch_quickjs/quickjs/Changelog @@ -6,6 +6,7 @@ - added Atomics.pause - added added Map and WeakMap upsert methods - added Math.sumPrecise() +- added regexp duplicate named groups - misc bug fixes 2025-09-13: diff --git a/src/couch_quickjs/quickjs/libregexp-opcode.h b/src/couch_quickjs/quickjs/libregexp-opcode.h index 6b97b1273..b3d7b6fdf 100644 --- a/src/couch_quickjs/quickjs/libregexp-opcode.h +++ b/src/couch_quickjs/quickjs/libregexp-opcode.h @@ -31,6 +31,8 @@ DEF(char32, 5) DEF(char32_i, 5) DEF(dot, 1) DEF(any, 1) /* same as dot but match any character including line terminator */ +DEF(space, 1) +DEF(not_space, 1) /* must come after */ DEF(line_start, 1) DEF(line_start_m, 1) DEF(line_end, 1) @@ -54,7 +56,7 @@ DEF(word_boundary, 1) DEF(word_boundary_i, 1) DEF(not_word_boundary, 1) DEF(not_word_boundary_i, 1) -DEF(back_reference, 2) +DEF(back_reference, 2) /* variable length */ DEF(back_reference_i, 2) /* must come after */ DEF(backward_back_reference, 2) /* must come after */ DEF(backward_back_reference_i, 2) /* must come after */ diff --git a/src/couch_quickjs/quickjs/libregexp.c b/src/couch_quickjs/quickjs/libregexp.c index 0c989b969..c387f0043 100644 --- a/src/couch_quickjs/quickjs/libregexp.c +++ b/src/couch_quickjs/quickjs/libregexp.c @@ -34,7 +34,9 @@ /* TODO: - + - remove REOP_char_i and REOP_range_i by precomputing the case folding. + - add specific opcodes for simple unicode property tests so that the + generated bytecode is smaller. - Add a lock step execution mode (=linear time execution guaranteed) when the regular expression is "simple" i.e. no backreference nor complicated lookahead. The opcodes are designed for this execution @@ -77,6 +79,7 @@ typedef struct { BOOL ignore_case; BOOL multi_line; BOOL dotall; + uint8_t group_name_scope; int capture_count; int total_capture_count; /* -1 = not computed yet */ int has_named_captures; /* -1 = don't know, 0 = no, 1 = yes */ @@ -478,7 +481,7 @@ static __maybe_unused void lre_dump_bytecode(const uint8_t *buf, if (i != 1) printf(","); printf("<%s>", p); - p += strlen(p) + 1; + p += strlen(p) + LRE_GROUP_NAME_TRAILER_LEN; } printf("\n"); assert(p == (char *)(buf + buf_len)); @@ -547,11 +550,22 @@ static __maybe_unused void lre_dump_bytecode(const uint8_t *buf, break; case REOP_save_start: case REOP_save_end: + printf(" %u", buf[pos + 1]); + break; case REOP_back_reference: case REOP_back_reference_i: case REOP_backward_back_reference: case REOP_backward_back_reference_i: - printf(" %u", buf[pos + 1]); + { + int n, i; + n = buf[pos + 1]; + len += n; + for(i = 0; i < n; i++) { + if (i != 0) + printf(","); + printf(" %u", buf[pos + 2 + i]); + } + } break; case REOP_save_reset: printf(" %u %u", buf[pos + 1], buf[pos + 2]); @@ -745,9 +759,21 @@ int lre_parse_escape(const uint8_t **pp, int allow_utf16) c = '\v'; break; case 'x': + { + int h0, h1; + + h0 = from_hex(*p++); + if (h0 < 0) + return -1; + h1 = from_hex(*p++); + if (h1 < 0) + return -1; + c = (h0 << 4) | h1; + } + break; case 'u': { - int h, n, i; + int h, i; uint32_t c1; if (*p == '{' && allow_utf16) { @@ -765,14 +791,8 @@ int lre_parse_escape(const uint8_t **pp, int allow_utf16) } p++; } else { - if (c == 'x') { - n = 2; - } else { - n = 4; - } - c = 0; - for(i = 0; i < n; i++) { + for(i = 0; i < 4; i++) { h = from_hex(*p++); if (h < 0) { return -1; @@ -1060,7 +1080,7 @@ static int get_class_atom(REParseState *s, REStringList *cr, goto default_escape; if (cr_init_char_range(s, cr, c)) return -1; - c = CLASS_RANGE_BASE; + c += CLASS_RANGE_BASE; break; case 'c': c = *p; @@ -1531,17 +1551,18 @@ static int re_parse_char_class(REParseState *s, const uint8_t **pp) return -1; } -/* Return: - - true if the opcodes may not advance the char pointer - - false if the opcodes always advance the char pointer +/* need_check_adv: false if the opcodes always advance the char pointer + need_capture_init: true if all the captures in the atom are not set */ -static BOOL re_need_check_advance(const uint8_t *bc_buf, int bc_buf_len) +static BOOL re_need_check_adv_and_capture_init(BOOL *pneed_capture_init, + const uint8_t *bc_buf, int bc_buf_len) { int pos, opcode, len; uint32_t val; - BOOL ret; + BOOL need_check_adv, need_capture_init; - ret = TRUE; + need_check_adv = TRUE; + need_capture_init = FALSE; pos = 0; while (pos < bc_buf_len) { opcode = bc_buf[pos]; @@ -1551,20 +1572,23 @@ static BOOL re_need_check_advance(const uint8_t *bc_buf, int bc_buf_len) case REOP_range_i: val = get_u16(bc_buf + pos + 1); len += val * 4; - goto simple_char; + need_check_adv = FALSE; + break; case REOP_range32: case REOP_range32_i: val = get_u16(bc_buf + pos + 1); len += val * 8; - goto simple_char; + need_check_adv = FALSE; + break; case REOP_char: case REOP_char_i: case REOP_char32: case REOP_char32_i: case REOP_dot: case REOP_any: - simple_char: - ret = FALSE; + case REOP_space: + case REOP_not_space: + need_check_adv = FALSE; break; case REOP_line_start: case REOP_line_start_m: @@ -1582,18 +1606,25 @@ static BOOL re_need_check_advance(const uint8_t *bc_buf, int bc_buf_len) case REOP_save_start: case REOP_save_end: case REOP_save_reset: + break; case REOP_back_reference: case REOP_back_reference_i: case REOP_backward_back_reference: case REOP_backward_back_reference_i: + val = bc_buf[pos + 1]; + len += val; + need_capture_init = TRUE; break; default: /* safe behavior: we cannot predict the outcome */ - return TRUE; + need_capture_init = TRUE; + goto done; } pos += len; } - return ret; + done: + *pneed_capture_init = need_capture_init; + return need_check_adv; } /* '*pp' is the first char after '<' */ @@ -1652,16 +1683,16 @@ static int re_parse_group_name(char *buf, int buf_size, const uint8_t **pp) } /* if capture_name = NULL: return the number of captures + 1. - Otherwise, return the capture index corresponding to capture_name - or -1 if none */ + Otherwise, return the number of matching capture groups */ static int re_parse_captures(REParseState *s, int *phas_named_captures, - const char *capture_name) + const char *capture_name, BOOL emit_group_index) { const uint8_t *p; - int capture_index; + int capture_index, n; char name[TMP_BUF_SIZE]; capture_index = 1; + n = 0; *phas_named_captures = 0; for (p = s->buf_start; p < s->buf_end; p++) { switch (*p) { @@ -1673,8 +1704,11 @@ static int re_parse_captures(REParseState *s, int *phas_named_captures, if (capture_name) { p += 3; if (re_parse_group_name(name, sizeof(name), &p) == 0) { - if (!strcmp(name, capture_name)) - return capture_index; + if (!strcmp(name, capture_name)) { + if (emit_group_index) + dbuf_putc(&s->byte_code, capture_index); + n++; + } } } capture_index++; @@ -1699,17 +1733,18 @@ static int re_parse_captures(REParseState *s, int *phas_named_captures, } } done: - if (capture_name) - return -1; - else + if (capture_name) { + return n; + } else { return capture_index; + } } static int re_count_captures(REParseState *s) { if (s->total_capture_count < 0) { s->total_capture_count = re_parse_captures(s, &s->has_named_captures, - NULL); + NULL, FALSE); } return s->total_capture_count; } @@ -1721,25 +1756,53 @@ static BOOL re_has_named_captures(REParseState *s) return s->has_named_captures; } -static int find_group_name(REParseState *s, const char *name) +static int find_group_name(REParseState *s, const char *name, BOOL emit_group_index) { const char *p, *buf_end; size_t len, name_len; - int capture_index; + int capture_index, n; p = (char *)s->group_names.buf; - if (!p) return -1; + if (!p) + return 0; buf_end = (char *)s->group_names.buf + s->group_names.size; name_len = strlen(name); capture_index = 1; + n = 0; while (p < buf_end) { len = strlen(p); - if (len == name_len && memcmp(name, p, name_len) == 0) - return capture_index; - p += len + 1; + if (len == name_len && memcmp(name, p, name_len) == 0) { + if (emit_group_index) + dbuf_putc(&s->byte_code, capture_index); + n++; + } + p += len + LRE_GROUP_NAME_TRAILER_LEN; capture_index++; } - return -1; + return n; +} + +static BOOL is_duplicate_group_name(REParseState *s, const char *name, int scope) +{ + const char *p, *buf_end; + size_t len, name_len; + int scope1; + + p = (char *)s->group_names.buf; + if (!p) + return 0; + buf_end = (char *)s->group_names.buf + s->group_names.size; + name_len = strlen(name); + while (p < buf_end) { + len = strlen(p); + if (len == name_len && memcmp(name, p, name_len) == 0) { + scope1 = (uint8_t)p[len + 1]; + if (scope == scope1) + return TRUE; + } + p += len + LRE_GROUP_NAME_TRAILER_LEN; + } + return FALSE; } static int re_parse_disjunction(REParseState *s, BOOL is_backward_dir); @@ -1783,7 +1846,7 @@ static int re_parse_term(REParseState *s, BOOL is_backward_dir) { const uint8_t *p; int c, last_atom_start, quant_min, quant_max, last_capture_count; - BOOL greedy, add_zero_advance_check, is_neg, is_backward_lookahead; + BOOL greedy, is_neg, is_backward_lookahead; REStringList cr_s, *cr = &cr_s; last_atom_start = -1; @@ -1922,12 +1985,16 @@ static int re_parse_term(REParseState *s, BOOL is_backward_dir) &p)) { return re_parse_error(s, "invalid group name"); } - if (find_group_name(s, s->u.tmp_buf) > 0) { + /* poor's man method to test duplicate group + names. */ + /* XXX: this method does not catch all the errors*/ + if (is_duplicate_group_name(s, s->u.tmp_buf, s->group_name_scope)) { return re_parse_error(s, "duplicate group name"); } /* group name with a trailing zero */ dbuf_put(&s->group_names, (uint8_t *)s->u.tmp_buf, strlen(s->u.tmp_buf) + 1); + dbuf_putc(&s->group_names, s->group_name_scope); s->has_named_captures = 1; goto parse_capture; } else { @@ -1938,6 +2005,7 @@ static int re_parse_term(REParseState *s, BOOL is_backward_dir) p++; /* capture without group name */ dbuf_putc(&s->group_names, 0); + dbuf_putc(&s->group_names, 0); parse_capture: if (s->capture_count >= CAPTURE_COUNT_MAX) return re_parse_error(s, "too many captures"); @@ -1964,17 +2032,18 @@ static int re_parse_term(REParseState *s, BOOL is_backward_dir) case 'b': case 'B': if (p[1] != 'b') { - re_emit_op(s, s->ignore_case ? REOP_not_word_boundary_i : REOP_not_word_boundary); + re_emit_op(s, s->ignore_case && s->is_unicode ? REOP_not_word_boundary_i : REOP_not_word_boundary); } else { - re_emit_op(s, s->ignore_case ? REOP_word_boundary_i : REOP_word_boundary); + re_emit_op(s, s->ignore_case && s->is_unicode ? REOP_word_boundary_i : REOP_word_boundary); } p += 2; break; case 'k': { const uint8_t *p1; - int dummy_res; - + int dummy_res, n; + BOOL is_forward; + p1 = p; if (p1[2] != '<') { /* annex B: we tolerate invalid group names in non @@ -1993,21 +2062,33 @@ static int re_parse_term(REParseState *s, BOOL is_backward_dir) else goto parse_class_atom; } - c = find_group_name(s, s->u.tmp_buf); - if (c < 0) { + is_forward = FALSE; + n = find_group_name(s, s->u.tmp_buf, FALSE); + if (n == 0) { /* no capture name parsed before, try to look after (inefficient, but hopefully not common */ - c = re_parse_captures(s, &dummy_res, s->u.tmp_buf); - if (c < 0) { + n = re_parse_captures(s, &dummy_res, s->u.tmp_buf, FALSE); + if (n == 0) { if (s->is_unicode || re_has_named_captures(s)) return re_parse_error(s, "group name not defined"); else goto parse_class_atom; } + is_forward = TRUE; + } + last_atom_start = s->byte_code.size; + last_capture_count = s->capture_count; + + /* emit back references to all the captures indexes matching the group name */ + re_emit_op_u8(s, REOP_back_reference + 2 * is_backward_dir + s->ignore_case, n); + if (is_forward) { + re_parse_captures(s, &dummy_res, s->u.tmp_buf, TRUE); + } else { + find_group_name(s, s->u.tmp_buf, TRUE); } p = p1; } - goto emit_back_reference; + break; case '0': p += 2; c = 0; @@ -2053,11 +2134,11 @@ static int re_parse_term(REParseState *s, BOOL is_backward_dir) } return re_parse_error(s, "back reference out of range in regular expression"); } - emit_back_reference: last_atom_start = s->byte_code.size; last_capture_count = s->capture_count; - re_emit_op_u8(s, REOP_back_reference + 2 * is_backward_dir + s->ignore_case, c); + re_emit_op_u8(s, REOP_back_reference + 2 * is_backward_dir + s->ignore_case, 1); + dbuf_putc(&s->byte_code, c); } break; default: @@ -2090,8 +2171,15 @@ static int re_parse_term(REParseState *s, BOOL is_backward_dir) if (is_backward_dir) re_emit_op(s, REOP_prev); if (c >= CLASS_RANGE_BASE) { - int ret; - ret = re_emit_string_list(s, cr); + int ret = 0; + /* optimize the common 'space' tests */ + if (c == (CLASS_RANGE_BASE + CHAR_RANGE_s)) { + re_emit_op(s, REOP_space); + } else if (c == (CLASS_RANGE_BASE + CHAR_RANGE_S)) { + re_emit_op(s, REOP_not_space); + } else { + ret = re_emit_string_list(s, cr); + } re_string_list_free(cr); if (ret) return -1; @@ -2166,20 +2254,39 @@ static int re_parse_term(REParseState *s, BOOL is_backward_dir) if (last_atom_start < 0) { return re_parse_error(s, "nothing to repeat"); } - /* the spec tells that if there is no advance when - running the atom after the first quant_min times, - then there is no match. We remove this test when we - are sure the atom always advances the position. */ - add_zero_advance_check = re_need_check_advance(s->byte_code.buf + last_atom_start, - s->byte_code.size - last_atom_start); - { + BOOL need_capture_init, add_zero_advance_check; int len, pos; + + /* the spec tells that if there is no advance when + running the atom after the first quant_min times, + then there is no match. We remove this test when we + are sure the atom always advances the position. */ + add_zero_advance_check = + re_need_check_adv_and_capture_init(&need_capture_init, + s->byte_code.buf + last_atom_start, + s->byte_code.size - last_atom_start); + + /* general case: need to reset the capture at each + iteration. We don't do it if there are no captures + in the atom or if we are sure all captures are + initialized in the atom. If quant_min = 0, we still + need to reset once the captures in case the atom + does not match. */ + if (need_capture_init && last_capture_count != s->capture_count) { + if (dbuf_insert(&s->byte_code, last_atom_start, 3)) + goto out_of_memory; + int pos = last_atom_start; + s->byte_code.buf[pos++] = REOP_save_reset; + s->byte_code.buf[pos++] = last_capture_count; + s->byte_code.buf[pos++] = s->capture_count - 1; + } + len = s->byte_code.size - last_atom_start; if (quant_min == 0) { /* need to reset the capture in case the atom is not executed */ - if (last_capture_count != s->capture_count) { + if (!need_capture_init && last_capture_count != s->capture_count) { if (dbuf_insert(&s->byte_code, last_atom_start, 3)) goto out_of_memory; s->byte_code.buf[last_atom_start++] = REOP_save_reset; @@ -2320,6 +2427,8 @@ static int re_parse_disjunction(REParseState *s, BOOL is_backward_dir) pos = re_emit_op_u32(s, REOP_goto, 0); + s->group_name_scope++; + if (re_parse_alternative(s, is_backward_dir)) return -1; @@ -2382,6 +2491,13 @@ static int compute_register_count(uint8_t *bc_buf, int bc_buf_len) val = get_u16(bc_buf + pos + 1); len += val * 8; break; + case REOP_back_reference: + case REOP_back_reference_i: + case REOP_backward_back_reference: + case REOP_backward_back_reference_i: + val = bc_buf[pos + 1]; + len += val; + break; } pos += len; } @@ -2481,7 +2597,7 @@ uint8_t *lre_compile(int *plen, char *error_msg, int error_msg_size, s->byte_code.size - RE_HEADER_LEN); /* add the named groups if needed */ - if (s->group_names.size > (s->capture_count - 1)) { + if (s->group_names.size > (s->capture_count - 1) * LRE_GROUP_NAME_TRAILER_LEN) { dbuf_put(&s->byte_code, s->group_names.buf, s->group_names.size); put_u16(s->byte_code.buf + RE_HEADER_FLAGS, lre_get_flags(s->byte_code.buf) | LRE_FLAG_NAMED_GROUPS); @@ -2502,14 +2618,6 @@ static BOOL is_line_terminator(uint32_t c) return (c == '\n' || c == '\r' || c == CP_LS || c == CP_PS); } -static BOOL is_word_char(uint32_t c) -{ - return ((c >= '0' && c <= '9') || - (c >= 'a' && c <= 'z') || - (c >= 'A' && c <= 'Z') || - (c == '_')); -} - #define GET_CHAR(c, cptr, cbuf_end, cbuf_type) \ do { \ if (cbuf_type == 0) { \ @@ -2664,7 +2772,7 @@ static no_inline int stack_realloc(REExecContext *s, size_t n) /* return 1 if match, 0 if not match or < 0 if error. */ static intptr_t lre_exec_backtrack(REExecContext *s, uint8_t **capture, - uint8_t **regs, const uint8_t *pc, const uint8_t *cptr) + const uint8_t *pc, const uint8_t *cptr) { int opcode; int cbuf_type; @@ -2704,24 +2812,24 @@ static intptr_t lre_exec_backtrack(REExecContext *s, uint8_t **capture, } /* avoid saving the previous value if already saved */ -#define SAVE_REG(idx, value) \ +#define SAVE_CAPTURE_CHECK(idx, value) \ { \ StackElem *sp1; \ sp1 = sp; \ for(;;) { \ if (sp1 > bp) { \ - if (sp1[-2].val == -(int)(idx + 1)) \ + if (sp1[-2].val == idx) \ break; \ sp1 -= 2; \ } else { \ CHECK_STACK_SPACE(2); \ - sp[0].val = -(int)(idx + 1); \ - sp[1].ptr = regs[idx]; \ + sp[0].val = idx; \ + sp[1].ptr = capture[idx]; \ sp += 2; \ break; \ } \ } \ - regs[idx] = (value); \ + capture[idx] = (value); \ } @@ -2746,13 +2854,9 @@ static intptr_t lre_exec_backtrack(REExecContext *s, uint8_t **capture, REExecStateEnum type; if (bp == s->stack_buf) return 0; - /* undo the modifications to capture[] and regs[] */ + /* undo the modifications to capture[] */ while (sp > bp) { - intptr_t idx2 = sp[-2].val; - if (idx2 >= 0) - capture[idx2] = sp[-1].ptr; - else - regs[-idx2 - 1] = sp[-1].ptr; + capture[sp[-2].val] = sp[-1].ptr; sp -= 2; } @@ -2805,13 +2909,9 @@ static intptr_t lre_exec_backtrack(REExecContext *s, uint8_t **capture, for(;;) { REExecStateEnum type; type = bp[-1].bp.type; - /* undo the modifications to capture[] and regs[] */ + /* undo the modifications to capture[] */ while (sp > bp) { - intptr_t idx2 = sp[-2].val; - if (idx2 >= 0) - capture[idx2] = sp[-1].ptr; - else - regs[-idx2 - 1] = sp[-1].ptr; + capture[sp[-2].val] = sp[-1].ptr; sp -= 2; } pc = sp[-3].ptr; @@ -2914,6 +3014,20 @@ static intptr_t lre_exec_backtrack(REExecContext *s, uint8_t **capture, goto no_match; GET_CHAR(c, cptr, cbuf_end, cbuf_type); break; + case REOP_space: + if (cptr == cbuf_end) + goto no_match; + GET_CHAR(c, cptr, cbuf_end, cbuf_type); + if (!lre_is_space(c)) + goto no_match; + break; + case REOP_not_space: + if (cptr == cbuf_end) + goto no_match; + GET_CHAR(c, cptr, cbuf_end, cbuf_type); + if (lre_is_space(c)) + goto no_match; + break; case REOP_save_start: case REOP_save_end: val = *pc++; @@ -2939,20 +3053,20 @@ static intptr_t lre_exec_backtrack(REExecContext *s, uint8_t **capture, } break; case REOP_set_i32: - idx = pc[0]; + idx = 2 * s->capture_count + pc[0]; val = get_u32(pc + 1); pc += 5; - SAVE_REG(idx, (void *)(uintptr_t)val); + SAVE_CAPTURE_CHECK(idx, (void *)(uintptr_t)val); break; case REOP_loop: { uint32_t val2; - idx = pc[0]; + idx = 2 * s->capture_count + pc[0]; val = get_u32(pc + 1); pc += 5; - val2 = (uintptr_t)regs[idx] - 1; - SAVE_REG(idx, (void *)(uintptr_t)val2); + val2 = (uintptr_t)capture[idx] - 1; + SAVE_CAPTURE_CHECK(idx, (void *)(uintptr_t)val2); if (val2 != 0) { pc += (int)val; if (lre_poll_timeout(s)) @@ -2967,14 +3081,14 @@ static intptr_t lre_exec_backtrack(REExecContext *s, uint8_t **capture, { const uint8_t *pc1; uint32_t val2, limit; - idx = pc[0]; + idx = 2 * s->capture_count + pc[0]; limit = get_u32(pc + 1); val = get_u32(pc + 5); pc += 9; /* decrement the counter */ - val2 = (uintptr_t)regs[idx] - 1; - SAVE_REG(idx, (void *)(uintptr_t)val2); + val2 = (uintptr_t)capture[idx] - 1; + SAVE_CAPTURE_CHECK(idx, (void *)(uintptr_t)val2); if (val2 > limit) { /* normal loop if counter > limit */ @@ -2985,7 +3099,7 @@ static intptr_t lre_exec_backtrack(REExecContext *s, uint8_t **capture, /* check advance */ if ((opcode == REOP_loop_check_adv_split_goto_first || opcode == REOP_loop_check_adv_split_next_first) && - regs[idx + 1] == cptr && + capture[idx + 1] == cptr && val2 != limit) { goto no_match; } @@ -3011,14 +3125,14 @@ static intptr_t lre_exec_backtrack(REExecContext *s, uint8_t **capture, } break; case REOP_set_char_pos: - idx = pc[0]; + idx = 2 * s->capture_count + pc[0]; pc++; - SAVE_REG(idx, (uint8_t *)cptr); + SAVE_CAPTURE_CHECK(idx, (uint8_t *)cptr); break; case REOP_check_advance: - idx = pc[0]; + idx = 2 * s->capture_count + pc[0]; pc++; - if (regs[idx] == cptr) + if (capture[idx] == cptr) goto no_match; break; case REOP_word_boundary: @@ -3034,18 +3148,22 @@ static intptr_t lre_exec_backtrack(REExecContext *s, uint8_t **capture, v1 = FALSE; } else { PEEK_PREV_CHAR(c, cptr, s->cbuf, cbuf_type); - if (ignore_case) - c = lre_canonicalize(c, s->is_unicode); - v1 = is_word_char(c); + if (c < 256) { + v1 = (lre_is_word_byte(c) != 0); + } else { + v1 = ignore_case && (c == 0x017f || c == 0x212a); + } } /* current char */ if (cptr >= cbuf_end) { v2 = FALSE; } else { PEEK_CHAR(c, cptr, cbuf_end, cbuf_type); - if (ignore_case) - c = lre_canonicalize(c, s->is_unicode); - v2 = is_word_char(c); + if (c < 256) { + v2 = (lre_is_word_byte(c) != 0); + } else { + v2 = ignore_case && (c == 0x017f || c == 0x212a); + } } if (v1 ^ v2 ^ is_boundary) goto no_match; @@ -3057,43 +3175,53 @@ static intptr_t lre_exec_backtrack(REExecContext *s, uint8_t **capture, case REOP_backward_back_reference_i: { const uint8_t *cptr1, *cptr1_end, *cptr1_start; + const uint8_t *pc1; uint32_t c1, c2; + int i, n; - val = *pc++; - if (val >= s->capture_count) - goto no_match; - cptr1_start = capture[2 * val]; - cptr1_end = capture[2 * val + 1]; - if (!cptr1_start || !cptr1_end) - break; - if (opcode == REOP_back_reference || - opcode == REOP_back_reference_i) { - cptr1 = cptr1_start; - while (cptr1 < cptr1_end) { - if (cptr >= cbuf_end) - goto no_match; - GET_CHAR(c1, cptr1, cptr1_end, cbuf_type); - GET_CHAR(c2, cptr, cbuf_end, cbuf_type); - if (opcode == REOP_back_reference_i) { - c1 = lre_canonicalize(c1, s->is_unicode); - c2 = lre_canonicalize(c2, s->is_unicode); - } - if (c1 != c2) - goto no_match; - } - } else { - cptr1 = cptr1_end; - while (cptr1 > cptr1_start) { - if (cptr == s->cbuf) - goto no_match; - GET_PREV_CHAR(c1, cptr1, cptr1_start, cbuf_type); - GET_PREV_CHAR(c2, cptr, s->cbuf, cbuf_type); - if (opcode == REOP_backward_back_reference_i) { - c1 = lre_canonicalize(c1, s->is_unicode); - c2 = lre_canonicalize(c2, s->is_unicode); + n = *pc++; + pc1 = pc; + pc += n; + + for(i = 0; i < n; i++) { + val = pc1[i]; + if (val >= s->capture_count) + goto no_match; + cptr1_start = capture[2 * val]; + cptr1_end = capture[2 * val + 1]; + /* test the first not empty capture */ + if (cptr1_start && cptr1_end) { + if (opcode == REOP_back_reference || + opcode == REOP_back_reference_i) { + cptr1 = cptr1_start; + while (cptr1 < cptr1_end) { + if (cptr >= cbuf_end) + goto no_match; + GET_CHAR(c1, cptr1, cptr1_end, cbuf_type); + GET_CHAR(c2, cptr, cbuf_end, cbuf_type); + if (opcode == REOP_back_reference_i) { + c1 = lre_canonicalize(c1, s->is_unicode); + c2 = lre_canonicalize(c2, s->is_unicode); + } + if (c1 != c2) + goto no_match; + } + } else { + cptr1 = cptr1_end; + while (cptr1 > cptr1_start) { + if (cptr == s->cbuf) + goto no_match; + GET_PREV_CHAR(c1, cptr1, cptr1_start, cbuf_type); + GET_PREV_CHAR(c2, cptr, s->cbuf, cbuf_type); + if (opcode == REOP_backward_back_reference_i) { + c1 = lre_canonicalize(c1, s->is_unicode); + c2 = lre_canonicalize(c2, s->is_unicode); + } + if (c1 != c2) + goto no_match; + } } - if (c1 != c2) - goto no_match; + break; } } } @@ -3200,8 +3328,7 @@ int lre_exec(uint8_t **capture, int cbuf_type, void *opaque) { REExecContext s_s, *s = &s_s; - int re_flags, i, ret, register_count; - uint8_t **regs; + int re_flags, i, ret; const uint8_t *cptr; re_flags = lre_get_flags(bc_buf); @@ -3220,10 +3347,6 @@ int lre_exec(uint8_t **capture, for(i = 0; i < s->capture_count * 2; i++) capture[i] = NULL; - /* XXX: modify the API so that the registers are allocated after - the captures to suppress some tests */ - register_count = bc_buf[RE_HEADER_REGISTER_COUNT]; - regs = alloca(register_count * sizeof(regs[0])); cptr = cbuf + (cindex << cbuf_type); if (0 < cindex && cindex < clen && s->cbuf_type == 2) { @@ -3233,13 +3356,19 @@ int lre_exec(uint8_t **capture, } } - ret = lre_exec_backtrack(s, capture, regs, bc_buf + RE_HEADER_LEN, - cptr); + ret = lre_exec_backtrack(s, capture, bc_buf + RE_HEADER_LEN, cptr); + if (s->stack_buf != s->static_stack_buf) lre_realloc(s->opaque, s->stack_buf, 0); return ret; } +int lre_get_alloc_count(const uint8_t *bc_buf) +{ + return bc_buf[RE_HEADER_CAPTURE_COUNT] * 2 + + bc_buf[RE_HEADER_REGISTER_COUNT]; +} + int lre_get_capture_count(const uint8_t *bc_buf) { return bc_buf[RE_HEADER_CAPTURE_COUNT]; @@ -3278,7 +3407,7 @@ int main(int argc, char **argv) int len, flags, ret, i; uint8_t *bc; char error_msg[64]; - uint8_t *capture[CAPTURE_COUNT_MAX * 2]; + uint8_t *capture; const char *input; int input_len, capture_count; @@ -3297,6 +3426,7 @@ int main(int argc, char **argv) input = argv[3]; input_len = strlen(input); + capture = malloc(sizeof(capture[0]) * lre_get_alloc_count(bc)); ret = lre_exec(capture, bc, (uint8_t *)input, 0, input_len, 0, NULL); printf("ret=%d\n", ret); if (ret == 1) { @@ -3312,6 +3442,7 @@ int main(int argc, char **argv) printf("\n"); } } + free(capture); return 0; } #endif diff --git a/src/couch_quickjs/quickjs/libregexp.h b/src/couch_quickjs/quickjs/libregexp.h index da76e4cef..0905bcb79 100644 --- a/src/couch_quickjs/quickjs/libregexp.h +++ b/src/couch_quickjs/quickjs/libregexp.h @@ -40,9 +40,13 @@ #define LRE_RET_MEMORY_ERROR (-1) #define LRE_RET_TIMEOUT (-2) +/* trailer length after the group name including the trailing '\0' */ +#define LRE_GROUP_NAME_TRAILER_LEN 2 + uint8_t *lre_compile(int *plen, char *error_msg, int error_msg_size, const char *buf, size_t buf_len, int re_flags, void *opaque); +int lre_get_alloc_count(const uint8_t *bc_buf); int lre_get_capture_count(const uint8_t *bc_buf); int lre_get_flags(const uint8_t *bc_buf); const char *lre_get_groupnames(const uint8_t *bc_buf); diff --git a/src/couch_quickjs/quickjs/libunicode.h b/src/couch_quickjs/quickjs/libunicode.h index 5d964e40f..5b02c82b4 100644 --- a/src/couch_quickjs/quickjs/libunicode.h +++ b/src/couch_quickjs/quickjs/libunicode.h @@ -147,6 +147,11 @@ static inline int lre_is_id_continue_byte(uint8_t c) { UNICODE_C_DIGIT); } +static inline int lre_is_word_byte(uint8_t c) { + return lre_ctype_bits[c] & (UNICODE_C_UPPER | UNICODE_C_LOWER | + UNICODE_C_UNDER | UNICODE_C_DIGIT); +} + int lre_is_space_non_ascii(uint32_t c); static inline int lre_is_space(uint32_t c) { diff --git a/src/couch_quickjs/quickjs/quickjs-opcode.h b/src/couch_quickjs/quickjs/quickjs-opcode.h index d93852133..7b98ddf05 100644 --- a/src/couch_quickjs/quickjs/quickjs-opcode.h +++ b/src/couch_quickjs/quickjs/quickjs-opcode.h @@ -168,6 +168,7 @@ DEF( set_var_ref, 3, 1, 1, var_ref) /* must come after put_var_ref */ DEF(set_loc_uninitialized, 3, 0, 0, loc) DEF( get_loc_check, 3, 0, 1, loc) DEF( put_loc_check, 3, 1, 0, loc) /* must come after get_loc_check */ +DEF( set_loc_check, 3, 1, 1, loc) /* must come after put_loc_check */ DEF( put_loc_check_init, 3, 1, 0, loc) DEF(get_loc_checkthis, 3, 0, 1, loc) DEF(get_var_ref_check, 3, 0, 1, var_ref) diff --git a/src/couch_quickjs/quickjs/quickjs.c b/src/couch_quickjs/quickjs/quickjs.c index b84af4a27..6cad52d76 100644 --- a/src/couch_quickjs/quickjs/quickjs.c +++ b/src/couch_quickjs/quickjs/quickjs.c @@ -451,13 +451,6 @@ struct JSContext { uint16_t binary_object_count; int binary_object_size; - /* TRUE if the array prototype is "normal": - - no small index properties which are get/set or non writable - - its prototype is Object.prototype - - Object.prototype has no small index properties which are get/set or non writable - - the prototype of Object.prototype is null (always true as it is immutable) - */ - uint8_t std_array_prototype; JSShape *array_shape; /* initial shape for Array objects */ JSShape *arguments_shape; /* shape for arguments objects */ @@ -936,7 +929,13 @@ struct JSObject { struct { int __gc_ref_count; /* corresponds to header.ref_count */ uint8_t __gc_mark : 7; /* corresponds to header.mark/gc_obj_type */ - uint8_t is_prototype : 1; /* object may be used as prototype */ + /* TRUE if the array prototype is "normal": + - no small index properties which are get/set or non writable + - its prototype is Object.prototype + - Object.prototype has no small index properties which are get/set or non writable + - the prototype of Object.prototype is null (always true as it is immutable) + */ + uint8_t is_std_array_prototype : 1; uint8_t extensible : 1; uint8_t free_mark : 1; /* only used when freeing objects with cycles */ @@ -5206,7 +5205,7 @@ static JSValue JS_NewObjectFromShape(JSContext *ctx, JSShape *sh, JSClassID clas if (unlikely(!p)) goto fail; p->class_id = class_id; - p->is_prototype = 0; + p->is_std_array_prototype = 0; p->extensible = TRUE; p->free_mark = 0; p->is_exotic = 0; @@ -7566,14 +7565,7 @@ static int JS_SetPrototypeInternal(JSContext *ctx, JSValueConst obj, if (sh->proto) JS_FreeValue(ctx, JS_MKPTR(JS_TAG_OBJECT, sh->proto)); sh->proto = proto; - if (proto) - proto->is_prototype = TRUE; - if (p->is_prototype) { - /* track modification of Array.prototype */ - if (unlikely(p == JS_VALUE_GET_OBJ(ctx->class_proto[JS_CLASS_ARRAY]))) { - ctx->std_array_prototype = FALSE; - } - } + p->is_std_array_prototype = FALSE; return TRUE; } @@ -8773,12 +8765,25 @@ static JSProperty *add_property(JSContext *ctx, { JSShape *sh, *new_sh; - if (unlikely(p->is_prototype)) { - /* track addition of small integer properties to Array.prototype and Object.prototype */ - if (unlikely((p == JS_VALUE_GET_OBJ(ctx->class_proto[JS_CLASS_ARRAY]) || - p == JS_VALUE_GET_OBJ(ctx->class_proto[JS_CLASS_OBJECT])) && - __JS_AtomIsTaggedInt(prop))) { - ctx->std_array_prototype = FALSE; + if (unlikely(__JS_AtomIsTaggedInt(prop))) { + /* update is_std_array_prototype */ + if (unlikely(p->is_std_array_prototype)) { + p->is_std_array_prototype = FALSE; + } else if (unlikely(p->has_immutable_prototype)) { + struct list_head *el; + + /* modifying Object.prototype : reset the corresponding is_std_array_prototype */ + list_for_each(el, &ctx->rt->context_list) { + JSContext *ctx1 = list_entry(el, JSContext, link); + if (JS_IsObject(ctx1->class_proto[JS_CLASS_OBJECT]) && + JS_VALUE_GET_OBJ(ctx1->class_proto[JS_CLASS_OBJECT]) == p) { + if (JS_IsObject(ctx1->class_proto[JS_CLASS_ARRAY])) { + JSObject *p1 = JS_VALUE_GET_OBJ(ctx1->class_proto[JS_CLASS_ARRAY]); + p1->is_std_array_prototype = FALSE; + } + break; + } + } } } sh = p->shape; @@ -8860,11 +8865,7 @@ static no_inline __exception int convert_fast_array_to_array(JSContext *ctx, p->u.array.u.values = NULL; /* fail safe */ p->u.array.u1.size = 0; p->fast_array = 0; - - /* track modification of Array.prototype */ - if (unlikely(p == JS_VALUE_GET_OBJ(ctx->class_proto[JS_CLASS_ARRAY]))) { - ctx->std_array_prototype = FALSE; - } + p->is_std_array_prototype = FALSE; return 0; } @@ -9509,6 +9510,18 @@ int JS_SetPropertyInternal(JSContext *ctx, JSValueConst obj, } } +/* return true if an element can be added to a fast array without further tests */ +static force_inline BOOL can_extend_fast_array(JSObject *p) +{ + JSObject *proto; + if (!p->extensible) + return FALSE; + proto = p->shape->proto; + if (!proto) + return TRUE; + return proto->is_std_array_prototype; +} + /* flags can be JS_PROP_THROW or JS_PROP_THROW_STRICT */ static int JS_SetPropertyValue(JSContext *ctx, JSValueConst this_obj, JSValue prop, JSValue val, int flags) @@ -9529,9 +9542,7 @@ static int JS_SetPropertyValue(JSContext *ctx, JSValueConst this_obj, /* fast path to add an element to the array */ if (unlikely(idx != (uint32_t)p->u.array.count || !p->fast_array || - !p->extensible || - p->shape->proto != JS_VALUE_GET_OBJ(ctx->class_proto[JS_CLASS_ARRAY]) || - !ctx->std_array_prototype)) { + !can_extend_fast_array(p))) { goto slow_path; } /* add element */ @@ -18335,6 +18346,18 @@ static JSValue JS_CallInternal(JSContext *caller_ctx, JSValueConst func_obj, sp--; } BREAK; + CASE(OP_set_loc_check): + { + int idx; + idx = get_u16(pc); + pc += 2; + if (unlikely(JS_IsUninitialized(var_buf[idx]))) { + JS_ThrowReferenceErrorUninitialized2(ctx, b, idx, FALSE); + goto exception; + } + set_value(ctx, &var_buf[idx], JS_DupValue(ctx, sp[-1])); + } + BREAK; CASE(OP_put_loc_check_init): { int idx; @@ -19142,9 +19165,7 @@ static JSValue JS_CallInternal(JSContext *caller_ctx, JSValueConst func_obj, uint32_t new_len, array_len; if (unlikely(idx != (uint32_t)p->u.array.count || !p->fast_array || - !p->extensible || - p->shape->proto != JS_VALUE_GET_OBJ(ctx->class_proto[JS_CLASS_ARRAY]) || - !ctx->std_array_prototype)) { + !can_extend_fast_array(p))) { goto put_array_el_slow_path; } if (likely(JS_VALUE_GET_TAG(p->prop[0].u.value) != JS_TAG_INT)) @@ -23355,8 +23376,10 @@ static int cpool_add(JSParseState *s, JSValue val) JSFunctionDef *fd = s->cur_func; if (js_resize_array(s->ctx, (void *)&fd->cpool, sizeof(fd->cpool[0]), - &fd->cpool_size, fd->cpool_count + 1)) + &fd->cpool_size, fd->cpool_count + 1)) { + JS_FreeValue(s->ctx, val); return -1; + } fd->cpool[fd->cpool_count++] = val; return fd->cpool_count - 1; } @@ -29946,6 +29969,7 @@ static int js_create_module_bytecode_function(JSContext *ctx, JSModuleDef *m) if (JS_IsException(func_obj)) return -1; + m->func_obj = func_obj; b = JS_VALUE_GET_PTR(bfunc); func_obj = js_closure2(ctx, func_obj, b, NULL, NULL, TRUE, m); if (JS_IsException(func_obj)) { @@ -29953,7 +29977,6 @@ static int js_create_module_bytecode_function(JSContext *ctx, JSModuleDef *m) JS_FreeValue(ctx, func_obj); return -1; } - m->func_obj = func_obj; return 0; } @@ -34760,7 +34783,7 @@ static __exception int resolve_labels(JSContext *ctx, JSFunctionDef *s) /* Transformation: dup put_x(n) drop -> put_x(n) */ int op1, line2 = -1; /* Transformation: dup put_x(n) -> set_x(n) */ - if (code_match(&cc, pos_next, M3(OP_put_loc, OP_put_arg, OP_put_var_ref), -1, -1)) { + if (code_match(&cc, pos_next, M4(OP_put_loc, OP_put_loc_check, OP_put_arg, OP_put_var_ref), -1, -1)) { if (cc.line_num >= 0) line_num = cc.line_num; op1 = cc.op + 1; /* put_x -> set_x */ pos_next = cc.pos; @@ -34868,6 +34891,7 @@ static __exception int resolve_labels(JSContext *ctx, JSFunctionDef *s) goto no_change; #endif case OP_put_loc: + case OP_put_loc_check: case OP_put_arg: case OP_put_var_ref: if (OPTIMIZE) { @@ -42131,9 +42155,7 @@ static JSValue js_array_push(JSContext *ctx, JSValueConst this_val, if (likely(JS_VALUE_GET_TAG(this_val) == JS_TAG_OBJECT && !unshift)) { JSObject *p = JS_VALUE_GET_OBJ(this_val); if (likely(p->class_id == JS_CLASS_ARRAY && p->fast_array && - p->extensible && - p->shape->proto == JS_VALUE_GET_OBJ(ctx->class_proto[JS_CLASS_ARRAY]) && - ctx->std_array_prototype && + can_extend_fast_array(p) && JS_VALUE_GET_TAG(p->prop[0].u.value) == JS_TAG_INT && JS_VALUE_GET_INT(p->prop[0].u.value) == p->u.array.count && (get_shape_prop(p->shape)->flags & JS_PROP_WRITABLE) != 0)) { @@ -45147,7 +45169,7 @@ static JSValue js_string_match(JSContext *ctx, JSValueConst this_val, if (JS_IsUndefined(O) || JS_IsNull(O)) return JS_ThrowTypeError(ctx, "cannot convert to object"); - if (!JS_IsUndefined(regexp) && !JS_IsNull(regexp)) { + if (JS_IsObject(regexp)) { matcher = JS_GetProperty(ctx, regexp, atom); if (JS_IsException(matcher)) return JS_EXCEPTION; @@ -45327,7 +45349,7 @@ static JSValue js_string_replace(JSContext *ctx, JSValueConst this_val, replaceValue_str = JS_UNDEFINED; repl_str = JS_UNDEFINED; - if (!JS_IsUndefined(searchValue) && !JS_IsNull(searchValue)) { + if (JS_IsObject(searchValue)) { JSValue replacer; if (is_replaceAll) { if (check_regexp_g_flag(ctx, searchValue) < 0) @@ -45438,7 +45460,7 @@ static JSValue js_string_split(JSContext *ctx, JSValueConst this_val, A = JS_UNDEFINED; R = JS_UNDEFINED; - if (!JS_IsUndefined(separator) && !JS_IsNull(separator)) { + if (JS_IsObject(separator)) { JSValue splitter; splitter = JS_GetProperty(ctx, separator, JS_ATOM_Symbol_split); if (JS_IsException(splitter)) @@ -45479,7 +45501,6 @@ static JSValue js_string_split(JSContext *ctx, JSValueConst this_val, goto add_tail; goto done; } - q = p; for (q = p; (q += !r) <= s - r - !r; q = p = e + r) { e = string_indexof(sp, rp, q); if (e < 0) @@ -47415,11 +47436,12 @@ static JSValue js_regexp_exec(JSContext *ctx, JSValueConst this_val, JSValue indices, indices_groups; uint8_t *re_bytecode; uint8_t **capture, *str_buf; - int rc, capture_count, shift, i, re_flags; + int rc, capture_count, shift, i, re_flags, alloc_count; int64_t last_index; const char *group_name_ptr; JSObject *p_obj; - + JSAtom group_name; + if (!re) return JS_EXCEPTION; @@ -47433,7 +47455,8 @@ static JSValue js_regexp_exec(JSContext *ctx, JSValueConst this_val, indices = JS_UNDEFINED; indices_groups = JS_UNDEFINED; capture = NULL; - + group_name = JS_ATOM_NULL; + if (js_regexp_get_lastIndex(ctx, &last_index, this_val)) goto fail; @@ -47443,12 +47466,13 @@ static JSValue js_regexp_exec(JSContext *ctx, JSValueConst this_val, last_index = 0; } str = JS_VALUE_GET_STRING(str_val); - capture_count = lre_get_capture_count(re_bytecode); - if (capture_count > 0) { - capture = js_malloc(ctx, sizeof(capture[0]) * capture_count * 2); + alloc_count = lre_get_alloc_count(re_bytecode); + if (alloc_count > 0) { + capture = js_malloc(ctx, sizeof(capture[0]) * alloc_count); if (!capture) goto fail; } + capture_count = lre_get_capture_count(re_bytecode); shift = str->is_wide_char; str_buf = str->u.str8; if (last_index > str->len) { @@ -47515,15 +47539,20 @@ static JSValue js_regexp_exec(JSContext *ctx, JSValueConst this_val, goto fail; for(i = 0; i < capture_count; i++) { - const char *name = NULL; uint8_t **match = &capture[2 * i]; int start = -1; int end = -1; JSValue val; if (group_name_ptr && i > 0) { - if (*group_name_ptr) name = group_name_ptr; - group_name_ptr += strlen(group_name_ptr) + 1; + if (*group_name_ptr) { + /* XXX: slow, should create a shape when the regexp is + compiled */ + group_name = JS_NewAtom(ctx, group_name_ptr); + if (group_name == JS_ATOM_NULL) + goto fail; + } + group_name_ptr += strlen(group_name_ptr) + LRE_GROUP_NAME_TRAILER_LEN; } if (match[0] && match[1]) { @@ -47550,12 +47579,15 @@ static JSValue js_regexp_exec(JSContext *ctx, JSValueConst this_val, goto fail; } } - if (name && !JS_IsUndefined(indices_groups)) { - val = JS_DupValue(ctx, val); - if (JS_DefinePropertyValueStr(ctx, indices_groups, - name, val, prop_flags) < 0) { - JS_FreeValue(ctx, val); - goto fail; + if (group_name != JS_ATOM_NULL) { + /* JS_HasProperty() cannot fail here */ + if (!JS_IsUndefined(val) || + !JS_HasProperty(ctx, indices_groups, group_name)) { + if (JS_DefinePropertyValue(ctx, indices_groups, + group_name, JS_DupValue(ctx, val), prop_flags) < 0) { + JS_FreeValue(ctx, val); + goto fail; + } } } if (JS_DefinePropertyValueUint32(ctx, indices, i, val, @@ -47571,13 +47603,19 @@ static JSValue js_regexp_exec(JSContext *ctx, JSValueConst this_val, goto fail; } - if (name) { - if (JS_DefinePropertyValueStr(ctx, groups, name, - JS_DupValue(ctx, val), - prop_flags) < 0) { - JS_FreeValue(ctx, val); - goto fail; + if (group_name != JS_ATOM_NULL) { + /* JS_HasProperty() cannot fail here */ + if (!JS_IsUndefined(val) || + !JS_HasProperty(ctx, groups, group_name)) { + if (JS_DefinePropertyValue(ctx, groups, group_name, + JS_DupValue(ctx, val), + prop_flags) < 0) { + JS_FreeValue(ctx, val); + goto fail; + } } + JS_FreeAtom(ctx, group_name); + group_name = JS_ATOM_NULL; } p_obj->u.array.u.values[p_obj->u.array.count++] = val; } @@ -47598,6 +47636,7 @@ static JSValue js_regexp_exec(JSContext *ctx, JSValueConst this_val, ret = obj; obj = JS_UNDEFINED; fail: + JS_FreeAtom(ctx, group_name); JS_FreeValue(ctx, indices_groups); JS_FreeValue(ctx, indices); JS_FreeValue(ctx, str_val); @@ -47617,7 +47656,7 @@ static JSValue js_regexp_replace(JSContext *ctx, JSValueConst this_val, JSValueC uint8_t *re_bytecode; int ret; uint8_t **capture, *str_buf; - int capture_count, shift, re_flags; + int capture_count, alloc_count, shift, re_flags; int next_src_pos, start, end; int64_t last_index; StringBuffer b_s, *b = &b_s; @@ -47651,12 +47690,13 @@ static JSValue js_regexp_replace(JSContext *ctx, JSValueConst this_val, JSValueC if (js_regexp_get_lastIndex(ctx, &last_index, this_val)) goto fail; } - capture_count = lre_get_capture_count(re_bytecode); - if (capture_count > 0) { - capture = js_malloc(ctx, sizeof(capture[0]) * capture_count * 2); + alloc_count = lre_get_alloc_count(re_bytecode); + if (alloc_count > 0) { + capture = js_malloc(ctx, sizeof(capture[0]) * alloc_count); if (!capture) goto fail; } + capture_count = lre_get_capture_count(re_bytecode); fullUnicode = ((re_flags & (LRE_FLAG_UNICODE | LRE_FLAG_UNICODE_SETS)) != 0); shift = str->is_wide_char; str_buf = str->u.str8; @@ -55405,6 +55445,11 @@ static int JS_AddIntrinsicBasicObjects(JSContext *ctx) return -1; ctx->array_ctor = obj; + { + JSObject *p = JS_VALUE_GET_OBJ(ctx->class_proto[JS_CLASS_ARRAY]); + p->is_std_array_prototype = TRUE; + } + ctx->array_shape = js_new_shape2(ctx, get_proto_obj(ctx->class_proto[JS_CLASS_ARRAY]), JS_PROP_INITIAL_HASH_SIZE, 1); if (!ctx->array_shape) @@ -55412,7 +55457,6 @@ static int JS_AddIntrinsicBasicObjects(JSContext *ctx) if (add_shape_property(ctx, &ctx->array_shape, NULL, JS_ATOM_length, JS_PROP_WRITABLE | JS_PROP_LENGTH)) return -1; - ctx->std_array_prototype = TRUE; ctx->arguments_shape = js_new_shape2(ctx, get_proto_obj(ctx->class_proto[JS_CLASS_OBJECT]), JS_PROP_INITIAL_HASH_SIZE, 3); diff --git a/src/couch_quickjs/quickjs/test262.conf b/src/couch_quickjs/quickjs/test262.conf index fe52a0b08..aa76e6395 100644 --- a/src/couch_quickjs/quickjs/test262.conf +++ b/src/couch_quickjs/quickjs/test262.conf @@ -176,7 +176,7 @@ Reflect.construct Reflect.set Reflect.setPrototypeOf regexp-dotall -regexp-duplicate-named-groups=skip +regexp-duplicate-named-groups regexp-lookbehind regexp-match-indices regexp-modifiers @@ -255,54 +255,6 @@ test262/test/built-ins/ThrowTypeError/unique-per-realm-function-proto.js #test262/test/built-ins/RegExp/CharacterClassEscapes/ #test262/test/built-ins/RegExp/property-escapes/ -# not yet in official specification -test262/test/built-ins/String/prototype/match/cstm-matcher-on-bigint-primitive.js -test262/test/built-ins/String/prototype/match/cstm-matcher-on-bigint-primitive.js -test262/test/built-ins/String/prototype/match/cstm-matcher-on-boolean-primitive.js -test262/test/built-ins/String/prototype/match/cstm-matcher-on-boolean-primitive.js -test262/test/built-ins/String/prototype/match/cstm-matcher-on-number-primitive.js -test262/test/built-ins/String/prototype/match/cstm-matcher-on-number-primitive.js -test262/test/built-ins/String/prototype/match/cstm-matcher-on-string-primitive.js -test262/test/built-ins/String/prototype/match/cstm-matcher-on-string-primitive.js -test262/test/built-ins/String/prototype/matchAll/cstm-matchall-on-bigint-primitive.js -test262/test/built-ins/String/prototype/matchAll/cstm-matchall-on-bigint-primitive.js -test262/test/built-ins/String/prototype/matchAll/cstm-matchall-on-number-primitive.js -test262/test/built-ins/String/prototype/matchAll/cstm-matchall-on-number-primitive.js -test262/test/built-ins/String/prototype/matchAll/cstm-matchall-on-string-primitive.js -test262/test/built-ins/String/prototype/matchAll/cstm-matchall-on-string-primitive.js -test262/test/built-ins/String/prototype/replace/cstm-replace-on-bigint-primitive.js -test262/test/built-ins/String/prototype/replace/cstm-replace-on-bigint-primitive.js -test262/test/built-ins/String/prototype/replace/cstm-replace-on-boolean-primitive.js -test262/test/built-ins/String/prototype/replace/cstm-replace-on-boolean-primitive.js -test262/test/built-ins/String/prototype/replace/cstm-replace-on-number-primitive.js -test262/test/built-ins/String/prototype/replace/cstm-replace-on-number-primitive.js -test262/test/built-ins/String/prototype/replace/cstm-replace-on-string-primitive.js -test262/test/built-ins/String/prototype/replace/cstm-replace-on-string-primitive.js -test262/test/built-ins/String/prototype/replaceAll/cstm-replaceall-on-bigint-primitive.js -test262/test/built-ins/String/prototype/replaceAll/cstm-replaceall-on-bigint-primitive.js -test262/test/built-ins/String/prototype/replaceAll/cstm-replaceall-on-boolean-primitive.js -test262/test/built-ins/String/prototype/replaceAll/cstm-replaceall-on-boolean-primitive.js -test262/test/built-ins/String/prototype/replaceAll/cstm-replaceall-on-number-primitive.js -test262/test/built-ins/String/prototype/replaceAll/cstm-replaceall-on-number-primitive.js -test262/test/built-ins/String/prototype/replaceAll/cstm-replaceall-on-string-primitive.js -test262/test/built-ins/String/prototype/replaceAll/cstm-replaceall-on-string-primitive.js -test262/test/built-ins/String/prototype/search/cstm-search-on-bigint-primitive.js -test262/test/built-ins/String/prototype/search/cstm-search-on-bigint-primitive.js -test262/test/built-ins/String/prototype/search/cstm-search-on-boolean-primitive.js -test262/test/built-ins/String/prototype/search/cstm-search-on-boolean-primitive.js -test262/test/built-ins/String/prototype/search/cstm-search-on-number-primitive.js -test262/test/built-ins/String/prototype/search/cstm-search-on-number-primitive.js -test262/test/built-ins/String/prototype/search/cstm-search-on-string-primitive.js -test262/test/built-ins/String/prototype/search/cstm-search-on-string-primitive.js -test262/test/built-ins/String/prototype/split/cstm-split-on-bigint-primitive.js -test262/test/built-ins/String/prototype/split/cstm-split-on-bigint-primitive.js -test262/test/built-ins/String/prototype/split/cstm-split-on-boolean-primitive.js -test262/test/built-ins/String/prototype/split/cstm-split-on-boolean-primitive.js -test262/test/built-ins/String/prototype/split/cstm-split-on-number-primitive.js -test262/test/built-ins/String/prototype/split/cstm-split-on-number-primitive.js -test262/test/built-ins/String/prototype/split/cstm-split-on-string-primitive.js -test262/test/built-ins/String/prototype/split/cstm-split-on-string-primitive.js - #################################### # staging tests diff --git a/src/couch_quickjs/quickjs/test262_errors.txt b/src/couch_quickjs/quickjs/test262_errors.txt index d60814ac4..796c296e2 100644 --- a/src/couch_quickjs/quickjs/test262_errors.txt +++ b/src/couch_quickjs/quickjs/test262_errors.txt @@ -33,12 +33,6 @@ test262/test/staging/sm/Function/function-name-for.js:13: Test262Error: Expected test262/test/staging/sm/Function/implicit-this-in-parameter-expression.js:12: Test262Error: Expected SameValue(«[object Object]», «undefined») to be true test262/test/staging/sm/Function/invalid-parameter-list.js:13: Test262Error: Expected a SyntaxError to be thrown but no exception was thrown at all test262/test/staging/sm/Function/invalid-parameter-list.js:13: strict mode: Test262Error: Expected a SyntaxError to be thrown but no exception was thrown at all -test262/test/staging/sm/RegExp/regress-613820-1.js:12: Test262Error: Actual [aaa, aa, a] and expected [aa, a, a] should have the same contents. -test262/test/staging/sm/RegExp/regress-613820-1.js:12: strict mode: Test262Error: Actual [aaa, aa, a] and expected [aa, a, a] should have the same contents. -test262/test/staging/sm/RegExp/regress-613820-2.js:12: Test262Error: Actual [foobar, f, o, o, b, a, r] and expected [foobar, undefined, undefined, undefined, b, a, r] should have the same contents. -test262/test/staging/sm/RegExp/regress-613820-2.js:12: strict mode: Test262Error: Actual [foobar, f, o, o, b, a, r] and expected [foobar, undefined, undefined, undefined, b, a, r] should have the same contents. -test262/test/staging/sm/RegExp/regress-613820-3.js:12: Test262Error: Actual [aab, a, undefined, ab] and expected [aa, undefined, a, undefined] should have the same contents. -test262/test/staging/sm/RegExp/regress-613820-3.js:12: strict mode: Test262Error: Actual [aab, a, undefined, ab] and expected [aa, undefined, a, undefined] should have the same contents. test262/test/staging/sm/String/string-upper-lower-mapping.js:16: Test262Error: Expected SameValue(«""», «""») to be true test262/test/staging/sm/String/string-upper-lower-mapping.js:16: strict mode: Test262Error: Expected SameValue(«""», «""») to be true test262/test/staging/sm/TypedArray/constructor-buffer-sequence.js:29: Test262Error: Expected a ExpectedError but got a Error
