Hello Andreas,

thanks for your patience. I believe I have found the underlying problem.
It is a parsing issue in src/adaptation/icap/ModXact.cc and HttpMsg.cc.

2020/07/28 09:55:14.614 kid1| 58,3| HttpMsg.cc(184) parse:
HttpMsg::parse: cannot parse isolated headers in 'OPTIONS
icap://127.0.0.1:1344/virus_scan ICAP/1.0

To fix CVE-2019-12523 the urlParse function had to be updated to use the
new SBuf API for better access checks. However at one point in time
upstream did no longer used this function to parse icap headers and
simply copied an already known url. I have attached the
CVE-2019-12523.patch. You can just replace it with the old one. If
everything works as expected I will upload this change as +deb9u3 shortly.

Regards,

Markus
From: Markus Koschany <a...@debian.org>
Date: Tue, 30 Jun 2020 22:11:00 +0200
Subject: CVE-2019-12523

Origin: http://www.squid-cache.org/Versions/v4/changesets/squid-4-fbbdf75efd7a5cc244b4886a9d42ea458c5a3a73.patch
---
 src/HttpRequest.cc                |  12 +--
 src/HttpRequest.h                 |   4 +-
 src/URL.h                         |   4 +-
 src/acl/Asn.cc                    |   2 +-
 src/adaptation/ecap/MessageRep.cc |   4 +-
 src/adaptation/icap/ModXact.cc    |   5 +-
 src/client_side.cc                |   2 +-
 src/client_side_request.cc        |   4 +-
 src/htcp.cc                       |   2 +-
 src/icmp/net_db.cc                |   2 +-
 src/icp_v2.cc                     |   2 +-
 src/mgr/Inquirer.cc               |   2 +-
 src/mime.cc                       |   2 +-
 src/neighbors.cc                  |   2 +-
 src/peer_digest.cc                |   2 +-
 src/servers/FtpServer.cc          |   6 +-
 src/store_digest.cc               |   2 +-
 src/tests/testHttpRequest.cc      |  31 ++----
 src/url.cc                        | 199 ++++++++++++++++++++++++++++++--------
 src/urn.cc                        |   2 +-
 src/urn.h                         |   2 +
 21 files changed, 195 insertions(+), 98 deletions(-)

diff --git a/src/HttpRequest.cc b/src/HttpRequest.cc
index a2c7afd..8b7ab99 100644
--- a/src/HttpRequest.cc
+++ b/src/HttpRequest.cc
@@ -320,13 +320,7 @@ HttpRequest::parseFirstLine(const char *start, const char *end)
     if (end < start)   // missing URI
         return false;
 
-    char save = *end;
-
-    * (char *) end = '\0';     // temp terminate URI, XXX dangerous?
-
-    HttpRequest *tmp = urlParse(method, (char *) start, this);
-
-    * (char *) end = save;
+    HttpRequest *tmp = urlParse(method, SBuf(start, size_t(end-start)), this);
 
     if (NULL == tmp)
         return false;
@@ -540,7 +534,7 @@ HttpRequest::expectingBody(const HttpRequestMethod& unused, int64_t& theSize) co
  * If the request cannot be created cleanly, NULL is returned
  */
 HttpRequest *
-HttpRequest::CreateFromUrlAndMethod(char * url, const HttpRequestMethod& method)
+HttpRequest::CreateFromUrlAndMethod(const SBuf & url, const HttpRequestMethod& method)
 {
     return urlParse(method, url, NULL);
 }
@@ -551,7 +545,7 @@ HttpRequest::CreateFromUrlAndMethod(char * url, const HttpRequestMethod& method)
  * If the request cannot be created cleanly, NULL is returned
  */
 HttpRequest *
-HttpRequest::CreateFromUrl(char * url)
+HttpRequest::CreateFromUrl(const SBuf & url)
 {
     return urlParse(Http::METHOD_GET, url, NULL);
 }
diff --git a/src/HttpRequest.h b/src/HttpRequest.h
index cfb5a46..47f3593 100644
--- a/src/HttpRequest.h
+++ b/src/HttpRequest.h
@@ -224,9 +224,9 @@ public:
 
     static void httpRequestPack(void *obj, Packer *p);
 
-    static HttpRequest * CreateFromUrlAndMethod(char * url, const HttpRequestMethod& method);
+    static HttpRequest * CreateFromUrlAndMethod(const SBuf & url, const HttpRequestMethod& method);
 
-    static HttpRequest * CreateFromUrl(char * url);
+    static HttpRequest * CreateFromUrl(const SBuf & url);
 
     ConnStateData *pinnedConnection();
 
diff --git a/src/URL.h b/src/URL.h
index 0b75bc6..5857272 100644
--- a/src/URL.h
+++ b/src/URL.h
@@ -62,9 +62,9 @@ MEMPROXY_CLASS_INLINE(URL);
 class HttpRequest;
 class HttpRequestMethod;
 
-AnyP::ProtocolType urlParseProtocol(const char *, const char *e = NULL);
+AnyP::ProtocolType urlParseProtocol(const SBuf &);
 void urlInitialize(void);
-HttpRequest *urlParse(const HttpRequestMethod&, char *, HttpRequest *request = NULL);
+HttpRequest *urlParse(const HttpRequestMethod&, const SBuf & rawRequest, HttpRequest *request = NULL);
 const char *urlCanonical(HttpRequest *);
 char *urlCanonicalClean(const HttpRequest *);
 const char *urlCanonicalFakeHttps(const HttpRequest * request);
diff --git a/src/acl/Asn.cc b/src/acl/Asn.cc
index 551c182..e3ccb91 100644
--- a/src/acl/Asn.cc
+++ b/src/acl/Asn.cc
@@ -240,7 +240,7 @@ asnCacheStart(int as)
     debugs(53, 3, "AS " << as);
     snprintf(asres, 4096, "whois://%s/!gAS%d", Config.as_whois_server, as);
     asState->as_number = as;
-    asState->request = HttpRequest::CreateFromUrl(asres);
+    asState->request = HttpRequest::CreateFromUrl(SBuf(asres));
     assert(asState->request != NULL);
 
     if ((e = storeGetPublic(asres, Http::METHOD_GET)) == NULL) {
diff --git a/src/adaptation/ecap/MessageRep.cc b/src/adaptation/ecap/MessageRep.cc
index cff44d4..60a6d28 100644
--- a/src/adaptation/ecap/MessageRep.cc
+++ b/src/adaptation/ecap/MessageRep.cc
@@ -210,9 +210,7 @@ Adaptation::Ecap::RequestLineRep::uri(const Area &aUri)
     // TODO: if method is not set, urlPath will assume it is not connect;
     // Can we change urlParse API to remove the method parameter?
     // TODO: optimize: urlPath should take constant URL buffer
-    char *buf = xstrdup(aUri.toString().c_str());
-    const bool ok = urlParse(theMessage.method, buf, &theMessage);
-    xfree(buf);
+    const bool ok = urlParse(theMessage.method, SBuf(aUri.toString()), &theMessage);
     Must(ok);
 }
 
diff --git a/src/adaptation/icap/ModXact.cc b/src/adaptation/icap/ModXact.cc
index f9375d3..f6809f5 100644
--- a/src/adaptation/icap/ModXact.cc
+++ b/src/adaptation/icap/ModXact.cc
@@ -1539,8 +1539,9 @@ void Adaptation::Icap::ModXact::encapsulateHead(MemBuf &icapBuf, const char *sec
 
     if (const HttpRequest* old_request = dynamic_cast<const HttpRequest*>(head)) {
         HttpRequest::Pointer new_request(new HttpRequest);
-        Must(old_request->canonical);
-        urlParse(old_request->method, old_request->canonical, new_request.getRaw());
+        // copy the requst-line details
+        new_request->method = old_request->method;
+        new_request->url = old_request->url;
         new_request->http_ver = old_request->http_ver;
         headClone = new_request.getRaw();
     } else if (const HttpReply *old_reply = dynamic_cast<const HttpReply*>(head)) {
diff --git a/src/client_side.cc b/src/client_side.cc
index dbb75ae..b025af8 100644
--- a/src/client_side.cc
+++ b/src/client_side.cc
@@ -2639,7 +2639,7 @@ clientProcessRequest(ConnStateData *conn, HttpParser *hp, ClientSocketContext *c
             return;
         }
 
-        if ((request = HttpRequest::CreateFromUrlAndMethod(http->uri, method)) == NULL) {
+        if ((request = HttpRequest::CreateFromUrlAndMethod(SBuf(http->uri), method)) == NULL) {
             clientStreamNode *node = context->getClientReplyContext();
             debugs(33, 5, "Invalid URL: " << http->uri);
             conn->quitAfterError(request.getRaw());
diff --git a/src/client_side_request.cc b/src/client_side_request.cc
index e266ace..7a7aad4 100644
--- a/src/client_side_request.cc
+++ b/src/client_side_request.cc
@@ -325,7 +325,7 @@ clientBeginRequest(const HttpRequestMethod& method, char const *url, CSCB * stre
     http->uri = (char *)xcalloc(url_sz, 1);
     strcpy(http->uri, url);
 
-    if ((request = HttpRequest::CreateFromUrlAndMethod(http->uri, method)) == NULL) {
+    if ((request = HttpRequest::CreateFromUrlAndMethod(SBuf(http->uri), method)) == NULL) {
         debugs(85, 5, "Invalid URL: " << http->uri);
         return -1;
     }
@@ -1281,7 +1281,7 @@ ClientRequestContext::clientRedirectDone(const Helper::Reply &reply)
                 // XXX: validate the URL properly *without* generating a whole new request object right here.
                 // XXX: the clone() should be done only AFTER we know the new URL is valid.
                 HttpRequest *new_request = old_request->clone();
-                if (urlParse(old_request->method, const_cast<char*>(urlNote), new_request)) {
+                if (urlParse(old_request->method, SBuf(urlNote), new_request)) {
                     debugs(61,2, HERE << "URL-rewriter diverts URL from " << urlCanonical(old_request) << " to " << urlCanonical(new_request));
 
                     // update the new request to flag the re-writing was done on it
diff --git a/src/htcp.cc b/src/htcp.cc
index e033951..cddb635 100644
--- a/src/htcp.cc
+++ b/src/htcp.cc
@@ -747,7 +747,7 @@ htcpUnpackSpecifier(char *buf, int sz)
      */
     method = HttpRequestMethod(s->method, NULL);
 
-    s->request = HttpRequest::CreateFromUrlAndMethod(s->uri, method == Http::METHOD_NONE ? HttpRequestMethod(Http::METHOD_GET) : method);
+    s->request = HttpRequest::CreateFromUrlAndMethod(SBuf(s->uri), method == Http::METHOD_NONE ? HttpRequestMethod(Http::METHOD_GET) : method);
 
     if (s->request)
         HTTPMSGLOCK(s->request);
diff --git a/src/icmp/net_db.cc b/src/icmp/net_db.cc
index 8d9e1ec..f8c3bf9 100644
--- a/src/icmp/net_db.cc
+++ b/src/icmp/net_db.cc
@@ -1285,7 +1285,7 @@ netdbExchangeStart(void *data)
     uri = internalRemoteUri(p->host, p->http_port, "/squid-internal-dynamic/", "netdb");
     debugs(38, 3, "netdbExchangeStart: Requesting '" << uri << "'");
     assert(NULL != uri);
-    ex->r = HttpRequest::CreateFromUrl(uri);
+    ex->r = HttpRequest::CreateFromUrl(SBuf(uri));
 
     if (NULL == ex->r) {
         debugs(38, DBG_IMPORTANT, "netdbExchangeStart: Bad URI " << uri);
diff --git a/src/icp_v2.cc b/src/icp_v2.cc
index 5b2a4ee..38ba428 100644
--- a/src/icp_v2.cc
+++ b/src/icp_v2.cc
@@ -438,7 +438,7 @@ icpGetRequest(char *url, int reqnum, int fd, Ip::Address &from)
 
     HttpRequest *result;
 
-    if ((result = HttpRequest::CreateFromUrl(url)) == NULL)
+    if ((result = HttpRequest::CreateFromUrl(SBuf(url))) == NULL)
         icpCreateAndSend(ICP_ERR, 0, url, reqnum, 0, fd, from);
 
     return result;
diff --git a/src/mgr/Inquirer.cc b/src/mgr/Inquirer.cc
index d3b020b..97a90d9 100644
--- a/src/mgr/Inquirer.cc
+++ b/src/mgr/Inquirer.cc
@@ -76,7 +76,7 @@ Mgr::Inquirer::start()
     if (strands.empty()) {
         LOCAL_ARRAY(char, url, MAX_URL);
         snprintf(url, MAX_URL, "%s", aggrAction->command().params.httpUri.termedBuf());
-        HttpRequest *req = HttpRequest::CreateFromUrl(url);
+        HttpRequest *req = HttpRequest::CreateFromUrl(SBuf(url));
         ErrorState err(ERR_INVALID_URL, Http::scNotFound, req);
         std::unique_ptr<HttpReply> reply(err.BuildHttpReply());
         replyBuf.reset(reply->pack());
diff --git a/src/mime.cc b/src/mime.cc
index a07564f..2739d07 100644
--- a/src/mime.cc
+++ b/src/mime.cc
@@ -407,7 +407,7 @@ MimeIcon::created(StoreEntry *newEntry)
     EBIT_SET(e->flags, ENTRY_SPECIAL);
     e->setPublicKey();
     e->buffer();
-    HttpRequest *r = HttpRequest::CreateFromUrl(url_);
+    HttpRequest *r = HttpRequest::CreateFromUrl(SBuf(url_));
 
     if (NULL == r)
         fatalf("mimeLoadIcon: cannot parse internal URL: %s", url_);
diff --git a/src/neighbors.cc b/src/neighbors.cc
index 103c9bc..5225909 100644
--- a/src/neighbors.cc
+++ b/src/neighbors.cc
@@ -1398,7 +1398,7 @@ peerCountMcastPeersStart(void *data)
     p->in_addr.toUrl(url+7, MAX_URL -8 );
     strcat(url, "/");
     fake = storeCreateEntry(url, url, RequestFlags(), Http::METHOD_GET);
-    HttpRequest *req = HttpRequest::CreateFromUrl(url);
+    HttpRequest *req = HttpRequest::CreateFromUrl(SBuf(url));
     psstate = new ps_state;
     psstate->request = req;
     HTTPMSGLOCK(psstate->request);
diff --git a/src/peer_digest.cc b/src/peer_digest.cc
index e955d4a..2497801 100644
--- a/src/peer_digest.cc
+++ b/src/peer_digest.cc
@@ -293,7 +293,7 @@ peerDigestRequest(PeerDigest * pd)
     else
         url = xstrdup(internalRemoteUri(p->host, p->http_port, "/squid-internal-periodic/", StoreDigestFileName));
 
-    req = HttpRequest::CreateFromUrl(url);
+    req = HttpRequest::CreateFromUrl(SBuf(url));
 
     assert(req);
 
diff --git a/src/servers/FtpServer.cc b/src/servers/FtpServer.cc
index ea3dab7..fa83577 100644
--- a/src/servers/FtpServer.cc
+++ b/src/servers/FtpServer.cc
@@ -725,12 +725,10 @@ Ftp::Server::parseOneRequest(Http::ProtocolVersion &ver)
     const SBuf *path = (params.length() && CommandHasPathParameter(cmd)) ?
                        &params : NULL;
     calcUri(path);
-    char *newUri = xstrdup(uri.c_str());
-    HttpRequest *const request = HttpRequest::CreateFromUrlAndMethod(newUri, method);
+    HttpRequest *const request = HttpRequest::CreateFromUrlAndMethod(uri, method);
     if (!request) {
         debugs(33, 5, "Invalid FTP URL: " << uri);
         uri.clear();
-        safe_free(newUri);
         return earlyError(eekInvalidUri);
     }
 
@@ -753,7 +751,7 @@ Ftp::Server::parseOneRequest(Http::ProtocolVersion &ver)
     http->request = request;
     HTTPMSGLOCK(http->request);
     http->req_sz = tok.parsedSize();
-    http->uri = newUri;
+    http->uri = xstrdup(uri.c_str());
 
     ClientSocketContext *const result =
         new ClientSocketContext(clientConnection, http);
diff --git a/src/store_digest.cc b/src/store_digest.cc
index da676bc..86bfd0f 100644
--- a/src/store_digest.cc
+++ b/src/store_digest.cc
@@ -420,7 +420,7 @@ storeDigestRewriteStart(void *datanotused)
     assert(e);
     sd_state.rewrite_lock = e;
     debugs(71, 3, "storeDigestRewrite: url: " << url << " key: " << e->getMD5Text());
-    HttpRequest *req = HttpRequest::CreateFromUrl(url);
+    HttpRequest *req = HttpRequest::CreateFromUrl(SBuf(url));
     e->mem_obj->request = req;
     HTTPMSGLOCK(e->mem_obj->request);
     /* wait for rebuild (if any) to finish */
diff --git a/src/tests/testHttpRequest.cc b/src/tests/testHttpRequest.cc
index 82a7efe..8914464 100644
--- a/src/tests/testHttpRequest.cc
+++ b/src/tests/testHttpRequest.cc
@@ -43,47 +43,43 @@ testHttpRequest::testCreateFromUrlAndMethod()
 {
     /* vanilla url */
     unsigned short expected_port;
-    char * url = xstrdup("http://foo:90/bar";);
+    SBuf url("http://foo:90/bar";);
     HttpRequest *aRequest = HttpRequest::CreateFromUrlAndMethod(url, Http::METHOD_GET);
     expected_port = 90;
+    CPPUNIT_ASSERT(aRequest != nullptr);
     HttpRequest *nullRequest = NULL;
     CPPUNIT_ASSERT_EQUAL(expected_port, aRequest->port);
     CPPUNIT_ASSERT(aRequest->method == Http::METHOD_GET);
     CPPUNIT_ASSERT_EQUAL(String("foo"), String(aRequest->GetHost()));
     CPPUNIT_ASSERT_EQUAL(String("/bar"), aRequest->urlpath);
     CPPUNIT_ASSERT_EQUAL(AnyP::PROTO_HTTP, static_cast<AnyP::ProtocolType>(aRequest->url.getScheme()));
-    CPPUNIT_ASSERT_EQUAL(String("http://foo:90/bar";), String(url));
-    xfree(url);
 
     /* vanilla url, different method */
-    url = xstrdup("http://foo/bar";);
+    url = "http://foo/bar";;
     aRequest = HttpRequest::CreateFromUrlAndMethod(url, Http::METHOD_PUT);
     expected_port = 80;
+    CPPUNIT_ASSERT(aRequest != nullptr);
     CPPUNIT_ASSERT_EQUAL(expected_port, aRequest->port);
     CPPUNIT_ASSERT(aRequest->method == Http::METHOD_PUT);
     CPPUNIT_ASSERT_EQUAL(String("foo"), String(aRequest->GetHost()));
     CPPUNIT_ASSERT_EQUAL(String("/bar"), aRequest->urlpath);
     CPPUNIT_ASSERT_EQUAL(AnyP::PROTO_HTTP, static_cast<AnyP::ProtocolType>(aRequest->url.getScheme()));
-    CPPUNIT_ASSERT_EQUAL(String("http://foo/bar";), String(url));
-    xfree(url);
 
     /* a connect url with non-CONNECT data */
-    url = xstrdup(":foo/bar");
+    url = ":foo/bar";
     aRequest = HttpRequest::CreateFromUrlAndMethod(url, Http::METHOD_CONNECT);
-    xfree(url);
     CPPUNIT_ASSERT_EQUAL(nullRequest, aRequest);
 
     /* a CONNECT url with CONNECT data */
-    url = xstrdup("foo:45");
+    url = "foo:45";
     aRequest = HttpRequest::CreateFromUrlAndMethod(url, Http::METHOD_CONNECT);
     expected_port = 45;
+    CPPUNIT_ASSERT(aRequest != nullptr);
     CPPUNIT_ASSERT_EQUAL(expected_port, aRequest->port);
     CPPUNIT_ASSERT(aRequest->method == Http::METHOD_CONNECT);
     CPPUNIT_ASSERT_EQUAL(String("foo"), String(aRequest->GetHost()));
     CPPUNIT_ASSERT_EQUAL(String(""), aRequest->urlpath);
     CPPUNIT_ASSERT_EQUAL(AnyP::PROTO_NONE, static_cast<AnyP::ProtocolType>(aRequest->url.getScheme()));
-    CPPUNIT_ASSERT_EQUAL(String("foo:45"), String(url));
-    xfree(url);
 }
 
 /*
@@ -113,11 +109,10 @@ void
 testHttpRequest::testIPv6HostColonBug()
 {
     unsigned short expected_port;
-    char * url = NULL;
     HttpRequest *aRequest = NULL;
 
     /* valid IPv6 address without port */
-    url = xstrdup("http://[2000:800::45]/foo";);
+    SBuf url("http://[2000:800::45]/foo";);
     aRequest = HttpRequest::CreateFromUrlAndMethod(url, Http::METHOD_GET);
     expected_port = 80;
     CPPUNIT_ASSERT_EQUAL(expected_port, aRequest->port);
@@ -125,11 +120,9 @@ testHttpRequest::testIPv6HostColonBug()
     CPPUNIT_ASSERT_EQUAL(String("[2000:800::45]"), String(aRequest->GetHost()));
     CPPUNIT_ASSERT_EQUAL(String("/foo"), aRequest->urlpath);
     CPPUNIT_ASSERT_EQUAL(AnyP::PROTO_HTTP, static_cast<AnyP::ProtocolType>(aRequest->url.getScheme()));
-    CPPUNIT_ASSERT_EQUAL(String("http://[2000:800::45]/foo";), String(url));
-    xfree(url);
 
     /* valid IPv6 address with port */
-    url = xstrdup("http://[2000:800::45]:90/foo";);
+    url = "http://[2000:800::45]:90/foo";;
     aRequest = HttpRequest::CreateFromUrlAndMethod(url, Http::METHOD_GET);
     expected_port = 90;
     CPPUNIT_ASSERT_EQUAL(expected_port, aRequest->port);
@@ -137,11 +130,9 @@ testHttpRequest::testIPv6HostColonBug()
     CPPUNIT_ASSERT_EQUAL(String("[2000:800::45]"), String(aRequest->GetHost()));
     CPPUNIT_ASSERT_EQUAL(String("/foo"), aRequest->urlpath);
     CPPUNIT_ASSERT_EQUAL(AnyP::PROTO_HTTP, static_cast<AnyP::ProtocolType>(aRequest->url.getScheme()));
-    CPPUNIT_ASSERT_EQUAL(String("http://[2000:800::45]:90/foo";), String(url));
-    xfree(url);
 
     /* IPv6 address as invalid (bug trigger) */
-    url = xstrdup("http://2000:800::45/foo";);
+    url = "http://2000:800::45/foo";;
     aRequest = HttpRequest::CreateFromUrlAndMethod(url, Http::METHOD_GET);
     expected_port = 80;
     CPPUNIT_ASSERT_EQUAL(expected_port, aRequest->port);
@@ -149,8 +140,6 @@ testHttpRequest::testIPv6HostColonBug()
     CPPUNIT_ASSERT_EQUAL(String("[2000:800::45]"), String(aRequest->GetHost()));
     CPPUNIT_ASSERT_EQUAL(String("/foo"), aRequest->urlpath);
     CPPUNIT_ASSERT_EQUAL(AnyP::PROTO_HTTP, static_cast<AnyP::ProtocolType>(aRequest->url.getScheme()));
-    CPPUNIT_ASSERT_EQUAL(String("http://2000:800::45/foo";), String(url));
-    xfree(url);
 }
 
 void
diff --git a/src/url.cc b/src/url.cc
index 1122bf3..d3f7dd8 100644
--- a/src/url.cc
+++ b/src/url.cc
@@ -11,10 +11,12 @@
 #include "squid.h"
 #include "globals.h"
 #include "HttpRequest.h"
+#include "parser/Tokenizer.h"
 #include "rfc1738.h"
 #include "SquidConfig.h"
 #include "SquidString.h"
 #include "URL.h"
+#include "urn.h"
 
 static HttpRequest *urlParseFinish(const HttpRequestMethod& method,
                                    const AnyP::ProtocolType protocol,
@@ -23,7 +25,9 @@ static HttpRequest *urlParseFinish(const HttpRequestMethod& method,
                                    const char *const login,
                                    const int port,
                                    HttpRequest *request);
-static HttpRequest *urnParse(const HttpRequestMethod& method, char *urn, HttpRequest *request);
+
+static HttpRequest *parseUrn(const HttpRequestMethod &method, Parser::Tokenizer &tok, HttpRequest *request);
+
 static const char valid_hostname_chars_u[] =
     "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
     "abcdefghijklmnopqrstuvwxyz"
@@ -37,6 +41,7 @@ static const char valid_hostname_chars[] =
     "[:]"
     ;
 
+
 void
 urlInitialize(void)
 {
@@ -81,13 +86,55 @@ urlInitialize(void)
     /* more cases? */
 }
 
+/**
+ * Extract the URI scheme and ':' delimiter from the given input buffer.
+ *
+ * Schemes up to 16 characters are accepted.
+ *
+ * Governed by RFC 3986 section 3.1
+ */
+static AnyP::UriScheme
+uriParseScheme(Parser::Tokenizer &tok)
+{
+    /*
+     * RFC 3986 section 3.1 paragraph 2:
+     *
+     * Scheme names consist of a sequence of characters beginning with a
+     * letter and followed by any combination of letters, digits, plus
+     * ("+"), period ("."), or hyphen ("-").
+     *
+     * The underscore ("_") required to match "cache_object://" squid
+     * special URI scheme.
+     */
+    static const auto schemeChars =
+#if USE_HTTP_VIOLATIONS
+        CharacterSet("special", "_") +
+#endif
+        CharacterSet("scheme", "+.-") + CharacterSet::ALPHA + CharacterSet::DIGIT;
+
+    SBuf str;
+    if (tok.prefix(str, schemeChars, 16) && tok.skip(':') && CharacterSet::ALPHA[str.at(0)]) {
+        // const auto protocol = AnyP::UriScheme::FindProtocolType(str);
+        const auto protocol = urlParseProtocol(str);
+        if (protocol == AnyP::PROTO_UNKNOWN) {
+            // return AnyP::UriScheme(protocol, str.c_str());
+            throw TextException("Unknown protocol", __FILE__, __LINE__);
+        }
+        return AnyP::UriScheme(protocol);
+    }
+
+    throw TextException("invalid URI scheme", __FILE__, __LINE__);
+}
+
+
 /**
  * urlParseProtocol() takes begin (b) and end (e) pointers, but for
  * backwards compatibility, e defaults to NULL, in which case we
  * assume b is NULL-terminated.
  */
+
 AnyP::ProtocolType
-urlParseProtocol(const char *b, const char *e)
+urlParseProtocol(const SBuf & protocol) //const char *b, const char *e)
 {
     /*
      * if e is NULL, b must be NULL terminated and we
@@ -95,10 +142,8 @@ urlParseProtocol(const char *b, const char *e)
      * after b.
      */
 
-    if (NULL == e)
-        e = b + strcspn(b, ":");
-
-    int len = e - b;
+    auto len = protocol.length();
+    auto b = protocol.rawContent();
 
     /* test common stuff first */
 
@@ -199,6 +244,13 @@ urlAppendDomain(char *host)
     return true;
 }
 
+static const SBuf &
+Asterisk()
+{
+    static SBuf star("*");
+    return star;
+}
+
 /*
  * Parse a URI/URL.
  *
@@ -221,59 +273,71 @@ urlAppendDomain(char *host)
  * being "end of host with implied path of /".
  */
 HttpRequest *
-urlParse(const HttpRequestMethod& method, char *url, HttpRequest *request)
+urlParse(const HttpRequestMethod& method, const SBuf &rawUrl, HttpRequest *request)
 {
-    LOCAL_ARRAY(char, proto, MAX_URL);
+	try {
+
     LOCAL_ARRAY(char, login, MAX_URL);
     LOCAL_ARRAY(char, host, MAX_URL);
     LOCAL_ARRAY(char, urlpath, MAX_URL);
     char *t = NULL;
     char *q = NULL;
     int port;
-    AnyP::ProtocolType protocol = AnyP::PROTO_NONE;
     int l;
     int i;
     const char *src;
     char *dst;
-    proto[0] = host[0] = urlpath[0] = login[0] = '\0';
+    host[0] = urlpath[0] = login[0] = '\0';
 
-    if ((l = strlen(url)) + Config.appendDomainLen > (MAX_URL - 1)) {
-        /* terminate so it doesn't overflow other buffers */
-        *(url + (MAX_URL >> 1)) = '\0';
+	if ((l = rawUrl.length()) + Config.appendDomainLen > (MAX_URL - 1)) {
         debugs(23, DBG_IMPORTANT, "urlParse: URL too large (" << l << " bytes)");
         return NULL;
     }
+
+    if ((method == Http::METHOD_OPTIONS || method == Http::METHOD_TRACE) &&
+               Asterisk().cmp(rawUrl) == 0) {
+        // XXX: these methods might also occur in HTTPS traffic. Handle this better.
+        port = urlDefaultPort(AnyP::PROTO_HTTP);
+
+        const SBuf& h = Asterisk();
+        return urlParseFinish(method, AnyP::PROTO_HTTP, h.rawContent(), host, login, port, request);
+    }
+
+    Parser::Tokenizer tok(rawUrl);
+    AnyP::UriScheme scheme;
+
     if (method == Http::METHOD_CONNECT) {
         port = CONNECT_PORT;
 
+        // XXX: use tokenizer
+        auto B = tok.buf();
+        const char *url = B.c_str();
+
         if (sscanf(url, "[%[^]]]:%d", host, &port) < 1)
             if (sscanf(url, "%[^:]:%d", host, &port) < 1)
                 return NULL;
 
-    } else if ((method == Http::METHOD_OPTIONS || method == Http::METHOD_TRACE) &&
-               strcmp(url, "*") == 0) {
-        protocol = AnyP::PROTO_HTTP;
-        port = urlDefaultPort(protocol);
-        return urlParseFinish(method, protocol, url, host, login, port, request);
-    } else if (!strncmp(url, "urn:", 4)) {
-        return urnParse(method, url, request);
     } else {
-        /* Parse the URL: */
-        src = url;
-        i = 0;
-        /* Find first : - everything before is protocol */
-        for (i = 0, dst = proto; i < l && *src != ':'; ++i, ++src, ++dst) {
-            *dst = *src;
+        scheme = uriParseScheme(tok);
+
+        if (scheme == AnyP::PROTO_NONE)
+            return NULL; // invalid scheme
+
+        if (scheme == AnyP::PROTO_URN) {
+            return parseUrn(method, tok, request); // throws on any error
         }
-        if (i >= l)
-            return NULL;
-        *dst = '\0';
 
-        /* Then its :// */
-        if ((i+3) > l || *src != ':' || *(src + 1) != '/' || *(src + 2) != '/')
-            return NULL;
-        i += 3;
-        src += 3;
+		// URLs then have "//"
+		static const SBuf doubleSlash("//");
+        if (!tok.skip(doubleSlash))
+			return NULL;
+
+		auto B = tok.remaining();
+		const char *url = B.c_str();
+
+		/* Parse the URL: */
+		src = url;
+		i = 0;
 
         /* Then everything until first /; thats host (and port; which we'll look for here later) */
         // bug 1881: If we don't get a "/" then we imply it was there
@@ -314,8 +378,8 @@ urlParse(const HttpRequestMethod& method, char *url, HttpRequest *request)
         }
         *dst = '\0';
 
-        protocol = urlParseProtocol(proto);
-        port = urlDefaultPort(protocol);
+		// port = scheme.defaultPort(); // may be reset later
+        port = urlDefaultPort(scheme);
 
         /* Is there any login information? (we should eventually parse it above) */
         t = strrchr(host, '@');
@@ -363,7 +427,7 @@ urlParse(const HttpRequestMethod& method, char *url, HttpRequest *request)
         }
 
         // Bug 3183 sanity check: If scheme is present, host must be too.
-        if (protocol != AnyP::PROTO_NONE && host[0] == '\0') {
+        if (scheme != AnyP::PROTO_NONE && host[0] == '\0') {
             debugs(23, DBG_IMPORTANT, "SECURITY ALERT: Missing hostname in URL '" << url << "'. see access.log for details.");
             return NULL;
         }
@@ -392,7 +456,7 @@ urlParse(const HttpRequestMethod& method, char *url, HttpRequest *request)
         }
     }
 
-    debugs(23, 3, "urlParse: Split URL '" << url << "' into proto='" << proto << "', host='" << host << "', port='" << port << "', path='" << urlpath << "'");
+    debugs(23, 3, "urlParse: Split URL '" << rawUrl << "' into proto='" << scheme << "', host='" << host << "', port='" << port << "', path='" << urlpath << "'");
 
     if (Config.onoff.check_hostnames && strspn(host, Config.onoff.allow_underscore ? valid_hostname_chars_u : valid_hostname_chars) != strlen(host)) {
         debugs(23, DBG_IMPORTANT, "urlParse: Illegal character in hostname '" << host << "'");
@@ -427,7 +491,7 @@ urlParse(const HttpRequestMethod& method, char *url, HttpRequest *request)
 #endif
 
     if (stringHasWhitespace(urlpath)) {
-        debugs(23, 2, "urlParse: URI has whitespace: {" << url << "}");
+        debugs(23, 2, "urlParse: URI has whitespace: {" << rawUrl << "}");
 
         switch (Config.uri_whitespace) {
 
@@ -460,7 +524,12 @@ urlParse(const HttpRequestMethod& method, char *url, HttpRequest *request)
         }
     }
 
-    return urlParseFinish(method, protocol, urlpath, host, login, port, request);
+    return urlParseFinish(method, scheme, urlpath, host, login, port, request);
+
+	} catch (...) {
+		debugs(23, 2, "error: " << CurrentException << " " << Raw("rawUrl", rawUrl.rawContent(), rawUrl.length()));
+	}
+	return NULL;
 }
 
 /**
@@ -490,6 +559,51 @@ urlParseFinish(const HttpRequestMethod& method,
     return request;
 }
 
+/**
+ * Governed by RFC 8141 section 2:
+ *
+ *  assigned-name = "urn" ":" NID ":" NSS
+ *  NID           = (alphanum) 0*30(ldh) (alphanum)
+ *  ldh           = alphanum / "-"
+ *  NSS           = pchar *(pchar / "/")
+ *
+ * RFC 3986 Appendix D.2 defines (as deprecated):
+ *
+ *   alphanum     = ALPHA / DIGIT
+ *
+ * Notice that NID is exactly 2-32 characters in length.
+ */
+static HttpRequest*
+parseUrn(const HttpRequestMethod &method, Parser::Tokenizer &tok, HttpRequest *request)
+{
+    static const auto nidChars = CharacterSet("NID","-") + CharacterSet::ALPHA + CharacterSet::DIGIT;
+    static const auto alphanum = (CharacterSet::ALPHA + CharacterSet::DIGIT).rename("alphanum");
+    SBuf nid;
+    if (!tok.prefix(nid, nidChars, 32))
+        throw TextException("NID not found", __FILE__, __LINE__);
+
+    if (!tok.skip(':'))
+        throw TextException("NID too long or missing ':' delimiter", __FILE__, __LINE__);
+
+    if (nid.length() < 2)
+        throw TextException("NID too short", __FILE__, __LINE__);
+
+    if (!alphanum[nid[0]])
+        throw TextException("NID prefix is not alphanumeric", __FILE__, __LINE__);
+
+    if (!alphanum[nid[nid.length()-1]])
+        throw TextException("NID suffix is not alphanumeric", __FILE__, __LINE__);
+
+    nid.append(':').append(tok.remaining());
+    if (request) {
+        request->initHTTP(method, AnyP::PROTO_URN, nid.rawContent());
+        safe_free(request->canonical);
+        return request;
+    }
+    return new HttpRequest(method, AnyP::PROTO_URN, nid.rawContent());
+}
+
+/*
 static HttpRequest *
 urnParse(const HttpRequestMethod& method, char *urn, HttpRequest *request)
 {
@@ -501,7 +615,7 @@ urnParse(const HttpRequestMethod& method, char *urn, HttpRequest *request)
     }
 
     return new HttpRequest(method, AnyP::PROTO_URN, urn + 4);
-}
+}*/
 
 const char *
 urlCanonical(HttpRequest * request)
@@ -673,7 +787,8 @@ urlMakeAbsolute(const HttpRequest * req, const char *relUrl)
     char *urlbuf = (char *)xmalloc(MAX_URL * sizeof(char));
 
     if (req->url.getScheme() == AnyP::PROTO_URN) {
-        snprintf(urlbuf, MAX_URL, "urn:" SQUIDSTRINGPH,
+        snprintf(urlbuf, MAX_URL, "urn:%s:" SQUIDSTRINGPH,
+                 req->GetHost(),
                  SQUIDSTRINGPRINT(req->urlpath));
         return (urlbuf);
     }
diff --git a/src/urn.cc b/src/urn.cc
index 487e4f2..1ff54f7 100644
--- a/src/urn.cc
+++ b/src/urn.cc
@@ -183,7 +183,7 @@ UrnState::createUriResRequest (String &uri)
     safe_free(host);
     safe_free(urlres);
     urlres = xstrdup(local_urlres);
-    urlres_r = HttpRequest::CreateFromUrl(urlres);
+    urlres_r = HttpRequest::CreateFromUrl(SBuf(urlres));
 }
 
 void
diff --git a/src/urn.h b/src/urn.h
index 81b1e95..d54f5ff 100644
--- a/src/urn.h
+++ b/src/urn.h
@@ -16,5 +16,7 @@ class StoreEntry;
 
 void urnStart(HttpRequest *, StoreEntry *);
 
+std::ostream& CurrentException(std::ostream &os);
+
 #endif /* SQUID_URN_H_ */
 

Attachment: signature.asc
Description: OpenPGP digital signature

Reply via email to