This is an automated email from the ASF dual-hosted git repository. cmcfarlen pushed a commit to branch 10.0.x in repository https://gitbox.apache.org/repos/asf/trafficserver.git
commit 8ffa5f61a98e9727867e47231d8161f31d9556cd Author: Masakazu Kitajo <[email protected]> AuthorDate: Thu Jul 18 14:22:40 2024 -0600 Deprecate the support for URL param segment (#11519) * Make TSUrlHttpParamsGet include params segment * Add some unit tests * Update docs * Fix (cherry picked from commit 5eb87bc8f264f638763fff93015652417a31ae3d) --- include/ts/ts.h | 6 +++ src/proxy/hdrs/URL.cc | 76 ++++++++++---------------------- src/proxy/hdrs/unit_tests/test_URL.cc | 82 +++++++++++++++++++++++++++++++++++ src/proxy/http/HttpSM.cc | 3 +- src/traffic_cache_tool/CacheScan.cc | 8 ++-- 5 files changed, 115 insertions(+), 60 deletions(-) diff --git a/include/ts/ts.h b/include/ts/ts.h index 075fea93ee..95be90f3a6 100644 --- a/include/ts/ts.h +++ b/include/ts/ts.h @@ -666,6 +666,9 @@ TSReturnCode TSUrlFtpTypeSet(TSMBuffer bufp, TSMLoc offset, int type); argument. Note: the returned string is not guaranteed to be null-terminated. + This function is deprecated and returns empty string. + Plugins that need "params" can call TSUrlPathGet to get a whole path string to parse it. + @param bufp marshal buffer containing the URL. @param offset location of the URL. @param length of the returned string. @@ -682,6 +685,9 @@ const char *TSUrlHttpParamsGet(TSMBuffer bufp, TSMLoc offset, int *length); copies the string to within bufp, so you can modify or delete value after calling TSUrlHttpParamsSet(). + This function is deprecated. The value passed will be internally appended to the path portion. + Thus, TSUrlHttpParamsGet will not return the value set by this function. + @param bufp marshal buffer containing the URL. @param offset location of the URL. @param value HTTP params string to set in the URL. diff --git a/src/proxy/hdrs/URL.cc b/src/proxy/hdrs/URL.cc index 191e644876..e56ab796c2 100644 --- a/src/proxy/hdrs/URL.cc +++ b/src/proxy/hdrs/URL.cc @@ -551,8 +551,22 @@ URLImpl::set_path(HdrHeap *heap, const char *value, int length, bool copy_string void URLImpl::set_params(HdrHeap *heap, const char *value, int length, bool copy_string) { - url_called_set(this); - mime_str_u16_set(heap, value, length, &(this->m_ptr_params), &(this->m_len_params), copy_string); + int path_len = this->m_len_path; + + // Truncate the current param segment if it exists + if (auto p = strchr(this->m_ptr_path, ';'); p != nullptr) { + auto params_len = path_len - (p - this->m_ptr_path); + path_len -= params_len; + } + + int total_length = path_len + 1 + length; + char *path_and_params = static_cast<char *>(alloca(total_length)); + + memcpy(path_and_params, this->m_ptr_path, path_len); + memcpy(path_and_params + path_len, ";", 1); + memcpy(path_and_params + path_len + 1, value, length); + + this->set_path(heap, path_and_params, total_length, true); } /*------------------------------------------------------------------------- @@ -887,10 +901,6 @@ url_length_get(URLImpl *url, unsigned normalization_flags) length += 1; // +1 for "/" } - if (url->m_ptr_params && url->m_len_params > 0) { - length += url->m_len_params + 1; // +1 for ";" - } - if (url->m_ptr_query && url->m_len_query > 0) { length += url->m_len_query + 1; // +1 for "?" } @@ -962,12 +972,6 @@ url_to_string(URLImpl *url, Arena *arena, int *length) memcpy(&str[idx], url->m_ptr_path, url->m_len_path); idx += url->m_len_path; - if (url->m_ptr_params && url->m_len_params > 0) { - str[idx++] = ';'; - memcpy(&str[idx], url->m_ptr_params, url->m_len_params); - idx += url->m_len_params; - } - if (url->m_ptr_query && url->m_len_query > 0) { str[idx++] = '?'; memcpy(&str[idx], url->m_ptr_query, url->m_len_query); @@ -1433,8 +1437,6 @@ url_parse_http(HdrHeap *heap, URLImpl *url, const char **start, const char *end, const char *cur; const char *path_start = nullptr; const char *path_end = nullptr; - const char *params_start = nullptr; - const char *params_end = nullptr; const char *query_start = nullptr; const char *query_end = nullptr; const char *fragment_start = nullptr; @@ -1456,13 +1458,9 @@ url_parse_http(HdrHeap *heap, URLImpl *url, const char **start, const char *end, if (*cur == '/') { path_start = cur; } - mask = ';' & '?' & '#'; + mask = '?' & '#'; parse_path2: if ((*cur & mask) == mask) { - if (*cur == ';') { - path_end = cur; - goto parse_params1; - } if (*cur == '?') { path_end = cur; goto parse_query1; @@ -1472,26 +1470,11 @@ parse_path2: goto parse_fragment1; } } else { - ink_assert((*cur != ';') && (*cur != '?') && (*cur != '#')); + ink_assert((*cur != '?') && (*cur != '#')); } GETNEXT(done); goto parse_path2; -parse_params1: - params_start = cur + 1; - GETNEXT(done); -parse_params2: - if (*cur == '?') { - params_end = cur; - goto parse_query1; - } - if (*cur == '#') { - params_end = cur; - goto parse_fragment1; - } - GETNEXT(done); - goto parse_params2; - parse_query1: query_start = cur + 1; GETNEXT(done); @@ -1552,12 +1535,6 @@ done: // nothing_after_host check. url->m_path_is_empty = true; } - if (params_start) { - if (!params_end) { - params_end = cur; - } - url->set_params(heap, params_start, params_end - params_start, copy_strings); - } if (query_start) { // There was a query string marked by '?'. if (!query_end) { @@ -1715,11 +1692,6 @@ url_print(URLImpl *url, char *buf_start, int buf_length, int *buf_index_inout, i TRY(mime_mem_print(url->m_ptr_path, url->m_len_path, buf_start, buf_length, buf_index_inout, buf_chars_to_skip_inout)); } - if (url->m_ptr_params && url->m_len_params > 0) { - TRY(mime_mem_print(";", 1, buf_start, buf_length, buf_index_inout, buf_chars_to_skip_inout)); - TRY(mime_mem_print(url->m_ptr_params, url->m_len_params, buf_start, buf_length, buf_index_inout, buf_chars_to_skip_inout)); - } - if (url->m_ptr_query && url->m_len_query > 0) { TRY(mime_mem_print("?", 1, buf_start, buf_length, buf_index_inout, buf_chars_to_skip_inout)); TRY(mime_mem_print(url->m_ptr_query, url->m_len_query, buf_start, buf_length, buf_index_inout, buf_chars_to_skip_inout)); @@ -1753,8 +1725,6 @@ url_describe(HdrHeapObjImpl *raw, bool /* recurse ATS_UNUSED */) obj->m_len_port, obj->m_port); Dbg(dbg_ctl_http, "\tPATH: \"%.*s\", PATH_LEN: %d,", obj->m_len_path, (obj->m_ptr_path ? obj->m_ptr_path : "NULL"), obj->m_len_path); - Dbg(dbg_ctl_http, "\tPARAMS: \"%.*s\", PARAMS_LEN: %d,", obj->m_len_params, (obj->m_ptr_params ? obj->m_ptr_params : "NULL"), - obj->m_len_params); Dbg(dbg_ctl_http, "\tQUERY: \"%.*s\", QUERY_LEN: %d,", obj->m_len_query, (obj->m_ptr_query ? obj->m_ptr_query : "NULL"), obj->m_len_query); Dbg(dbg_ctl_http, "\tFRAGMENT: \"%.*s\", FRAGMENT_LEN: %d]", obj->m_len_fragment, @@ -1855,8 +1825,8 @@ url_CryptoHash_get_general(const URLImpl *url, CryptoContext &ctx, CryptoHash &h ends[7] = strs[7] + 1; ends[8] = strs[8] + url->m_len_path; - strs[9] = ";"; - strs[10] = url->m_ptr_params; + strs[9] = ""; + strs[10] = nullptr; strs[11] = "?"; // Special case for the query paramters, allowing us to ignore them if requested @@ -1868,8 +1838,8 @@ url_CryptoHash_get_general(const URLImpl *url, CryptoContext &ctx, CryptoHash &h ends[12] = nullptr; } - ends[9] = strs[9] + 1; - ends[10] = strs[10] + url->m_len_params; + ends[9] = strs[9] + 0; + ends[10] = strs[10] + 0; ends[11] = strs[11] + 1; p = buffer; @@ -1927,7 +1897,7 @@ url_CryptoHash_get(const URLImpl *url, CryptoHash *hash, bool ignore_query, cach { URLHashContext ctx; if ((url_hash_method != 0) && (url->m_url_type == URL_TYPE_HTTP) && - ((url->m_len_user + url->m_len_password + url->m_len_params + (ignore_query ? 0 : url->m_len_query)) == 0) && + ((url->m_len_user + url->m_len_password + (ignore_query ? 0 : url->m_len_query)) == 0) && (3 + 1 + 1 + 1 + 1 + 1 + 2 + url->m_len_scheme + url->m_len_host + url->m_len_path < BUFSIZE) && (memchr(url->m_ptr_host, '%', url->m_len_host) == nullptr) && (memchr(url->m_ptr_path, '%', url->m_len_path) == nullptr)) { url_CryptoHash_get_fast(url, ctx, hash, generation); diff --git a/src/proxy/hdrs/unit_tests/test_URL.cc b/src/proxy/hdrs/unit_tests/test_URL.cc index 5c7cdf4f40..9c5b3c6413 100644 --- a/src/proxy/hdrs/unit_tests/test_URL.cc +++ b/src/proxy/hdrs/unit_tests/test_URL.cc @@ -697,3 +697,85 @@ TEST_CASE("UrlHashGet", "[url][hash_get]") } } } + +struct get_path_test_case { + const std::string description; + const std::string uri; + const std::string path; +}; + +// clang-format off +std::vector<get_path_test_case> get_path_test_cases = { + { + "Semicolon in paths 1", + "http://foo.test/abc/xyz;p1=1,p2=2", + "abc/xyz;p1=1,p2=2", + }, + { + "Semicolon in paths 2", + "http://foo.test/abc;p1=1,p2=2/xyz", + "abc;p1=1,p2=2/xyz", + }, + { + "Semicolon in paths 3", + "http://foo.test/abc/xyz;p1=1,p2=2?q1=1", + "abc/xyz;p1=1,p2=2", + }, + { + "Semicolon in paths 4", + "http://foo.test/abc;p1=1,p2=2/xyz?q1=1", + "abc;p1=1,p2=2/xyz", + }, +}; + +/** Return the hash related to a URI. + * + * @param[in] uri The URI to hash. + * @return The hash of the URI. + */ +TEST_CASE("UrlPathGet", "[url][path_get]") +{ + for (auto const &test_case : get_path_test_cases) { + std::string description = test_case.description + ": " + test_case.uri + " -> " + test_case.path; + SECTION(description) { + URL url; + HdrHeap *heap = new_HdrHeap(); + url.create(heap); + url.parse(test_case.uri); + const char *path; + int path_len; + path = url.path_get(&path_len); + CHECK(std::string_view(path, path_len) == test_case.path); + heap->destroy(); + } + } +} + + +/** + * Tests for deprecated params_get/set + */ +TEST_CASE("UrlParamsGet", "[url][params_get]") +{ + // Expected behavior + // - ParamsGet always return empty string + // - ParamsSet appends the value to path + // - PathGet returns params appended by ParamsSet + + const char *value; + int value_len; + + URL url; + HdrHeap *heap = new_HdrHeap(); + url.create(heap); + url.parse("https://foo.test/path;p=1"); + value = url.path_get(&value_len); + CHECK(std::string_view(value, value_len) == "path;p=1"); + url.params_set("param=1", 7); + value = url.params_get(&value_len); + CHECK(value == nullptr); + CHECK(value_len == 0); + value = url.path_get(&value_len); + CHECK(std::string_view(value, value_len) == "path;param=1"); + heap->destroy(); +} diff --git a/src/proxy/http/HttpSM.cc b/src/proxy/http/HttpSM.cc index 2b2119ae76..a3cd0eb644 100644 --- a/src/proxy/http/HttpSM.cc +++ b/src/proxy/http/HttpSM.cc @@ -709,8 +709,7 @@ HttpSM::state_read_client_request_header(int event, void *data) call_transact_and_set_next_state(HttpTransact::TooEarly); return 0; } else if (!SSLConfigParams::server_allow_early_data_params && - (t_state.hdr_info.client_request.m_http->u.req.m_url_impl->m_len_params > 0 || - t_state.hdr_info.client_request.m_http->u.req.m_url_impl->m_len_query > 0)) { + t_state.hdr_info.client_request.m_http->u.req.m_url_impl->m_len_query > 0) { SMDbg(dbg_ctl_http, "client request was from early data but HAS parameters"); call_transact_and_set_next_state(HttpTransact::TooEarly); return 0; diff --git a/src/traffic_cache_tool/CacheScan.cc b/src/traffic_cache_tool/CacheScan.cc index bac9ea3f75..26e92894ef 100644 --- a/src/traffic_cache_tool/CacheScan.cc +++ b/src/traffic_cache_tool/CacheScan.cc @@ -388,19 +388,17 @@ CacheScan::get_alternates(const char *buf, int length, bool search) std::string str; if (search) { - swoc::bwprint(str, "{}://{}:{}/{};{}?{}", std::string_view(url->m_ptr_scheme, url->m_len_scheme), + swoc::bwprint(str, "{}://{}:{}/{}?{}", std::string_view(url->m_ptr_scheme, url->m_len_scheme), std::string_view(url->m_ptr_host, url->m_len_host), std::string_view(url->m_ptr_port, url->m_len_port), - std::string_view(url->m_ptr_path, url->m_len_path), std::string_view(url->m_ptr_params, url->m_len_params), - std::string_view(url->m_ptr_query, url->m_len_query)); + std::string_view(url->m_ptr_path, url->m_len_path), std::string_view(url->m_ptr_query, url->m_len_query)); if (u_matcher->match(str.data())) { str = this->stripe->hashText + " " + str; std::cout << "match found " << str << std::endl; } } else { - swoc::bwprint(str, "stripe: {} : {}://{}:{}/{};{}?{}", std::string_view(this->stripe->hashText), + swoc::bwprint(str, "stripe: {} : {}://{}:{}/{}?{}", std::string_view(this->stripe->hashText), std::string_view(url->m_ptr_scheme, url->m_len_scheme), std::string_view(url->m_ptr_host, url->m_len_host), std::string_view(url->m_ptr_port, url->m_len_port), std::string_view(url->m_ptr_path, url->m_len_path), - std::string_view(url->m_ptr_params, url->m_len_params), std::string_view(url->m_ptr_query, url->m_len_query)); std::cout << str << std::endl; }
