This is an automated email from the ASF dual-hosted git repository.

cmcfarlen pushed a commit to branch 10.0.x
in repository https://gitbox.apache.org/repos/asf/trafficserver.git

commit f10756008bb39aa812a3728be3325e008b15d78e
Author: JosiahWI <[email protected]>
AuthorDate: Fri Jun 7 13:46:47 2024 -0500

    Modernize ja3 fingerprint encoding implementation  (#11424)
    
    * Improve detection of transitive imports
    
    This is done by moving the standard library imports after other imports in
    the import list.
    
    * Modernize ja3 fingerprint encoding implementation
    
    This makes the following changes:
    
      * This adds unit tests for the behavior of custom_get_ja3_prefixed.
      * This rewrites custom_get_ja3_prefixed as two separate functions,
        encode_word_buffer and encode_dword_buffer. The new implementations
        both offer a 20% performance improvement over the old one, measured on a
        release build with -O3 using Catch2 benchmarks and a 10 byte buffer that
        included GREASE values.
      * This adds a function encode_integer_buffer for encoding the buffer of 
TLS
        extensions.
      * This documents the new functions and moves them to a new ja3:: namespace
        in a separate source file.
      * This puts the steps of custom_get_ja3 in the right order, improves its
        variable names, and uses some std::string methods instead of + for 
string
        concatenation.
    
    * Implement changes requested by Brian Neradt
    
     * Fix formatting of file-level docstrings.
     * Use bitwise or (|) for bitops instead of (+).
     * Add const to a local that was missing it.
     * Make order of operations explicit.
     * Fix incorrect terminology in method names.
    
    (cherry picked from commit f53f0ff218ad31c79211ab25fc0217f3ccdcd203)
---
 plugins/ja3_fingerprint/CMakeLists.txt     |   6 +-
 plugins/ja3_fingerprint/ja3_fingerprint.cc | 115 ++++++++++-------------------
 plugins/ja3_fingerprint/ja3_utils.cc       | 108 +++++++++++++++++++++++++++
 plugins/ja3_fingerprint/ja3_utils.h        |  71 ++++++++++++++++++
 plugins/ja3_fingerprint/test_utils.cc      |  97 ++++++++++++++++++++++++
 5 files changed, 321 insertions(+), 76 deletions(-)

diff --git a/plugins/ja3_fingerprint/CMakeLists.txt 
b/plugins/ja3_fingerprint/CMakeLists.txt
index 1daf6175f0..5dcd372221 100644
--- a/plugins/ja3_fingerprint/CMakeLists.txt
+++ b/plugins/ja3_fingerprint/CMakeLists.txt
@@ -15,8 +15,12 @@
 #
 #######################
 
-add_atsplugin(ja3_fingerprint ja3_fingerprint.cc)
+add_atsplugin(ja3_fingerprint ja3_fingerprint.cc ja3_utils.cc)
 
 target_link_libraries(ja3_fingerprint PRIVATE OpenSSL::SSL)
 verify_global_plugin(ja3_fingerprint)
 verify_remap_plugin(ja3_fingerprint)
+
+add_executable(test_ja3_fingerprint ja3_utils.cc test_utils.cc)
+target_link_libraries(test_ja3_fingerprint PRIVATE catch2::catch2)
+add_test(test_ja3_fingerprint test_ja3_fingerprint)
diff --git a/plugins/ja3_fingerprint/ja3_fingerprint.cc 
b/plugins/ja3_fingerprint/ja3_fingerprint.cc
index 379485b0be..e6d86f4324 100644
--- a/plugins/ja3_fingerprint/ja3_fingerprint.cc
+++ b/plugins/ja3_fingerprint/ja3_fingerprint.cc
@@ -1,6 +1,9 @@
-/** @ja3_fingerprint.cc
+/** @file ja3_fingerprint.cc
+ *
   Plugin JA3 Fingerprint calculates JA3 signatures for incoming SSL traffic.
+
   @section license License
+
   Licensed to the Apache Software Foundation (ASF) under one
   or more contributor license agreements.  See the NOTICE file
   distributed with this work for additional information
@@ -8,26 +11,23 @@
   to you under the Apache License, Version 2.0 (the
   "License"); you may not use this file except in compliance
   with the License.  You may obtain a copy of the License at
+
       http://www.apache.org/licenses/LICENSE-2.0
+
   Unless required by applicable law or agreed to in writing, software
   distributed under the License is distributed on an "AS IS" BASIS,
   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
   See the License for the specific language governing permissions and
   limitations under the License.
+
  */
 
-#include <cstdlib>
-#include <cstdio>
-#include <cstring>
-#include <cstdlib>
+#include "ja3_utils.h"
+
 #include <getopt.h>
 #include <netinet/in.h>
 #include <arpa/inet.h>
 
-#include <string>
-#include <unordered_set>
-#include <memory>
-
 #include "ts/apidefs.h"
 #include "ts/ts.h"
 #include "ts/remap.h"
@@ -40,8 +40,11 @@
 #include <openssl/md5.h>
 #include <openssl/opensslv.h>
 
-// Get 16bit big endian order and update pointer
-#define n2s(c, s) ((s = (((unsigned int)(c[0])) << 8) | (((unsigned 
int)(c[1])))), c += 2)
+#include <cstdio>
+#include <cstdlib>
+#include <cstring>
+#include <memory>
+#include <string>
 
 const char            *PLUGIN_NAME = "ja3_fingerprint";
 static DbgCtl          dbg_ctl{PLUGIN_NAME};
@@ -51,10 +54,6 @@ static int             global_raw_enabled             = 0;
 static int             global_log_enabled             = 0;
 static int             global_modify_incoming_enabled = 0;
 
-// GREASE table as in ja3
-static const std::unordered_set<uint16_t> GREASE_table = {0x0a0a, 0x1a1a, 
0x2a2a, 0x3a3a, 0x4a4a, 0x5a5a, 0x6a6a, 0x7a7a,
-                                                          0x8a8a, 0x9a9a, 
0xaaaa, 0xbaba, 0xcaca, 0xdada, 0xeaea, 0xfafa};
-
 struct ja3_data {
   std::string ja3_string;
   char        md5_string[33];
@@ -75,31 +74,6 @@ struct ja3_remap_info {
   }
 };
 
-static int
-custom_get_ja3_prefixed(int unit, const unsigned char *&data, int len, 
std::string &result)
-{
-  int  cnt, tmp;
-  bool first = true;
-  // Extract each entry and append to result string
-  for (cnt = 0; cnt < len; cnt += unit) {
-    if (unit == 1) {
-      tmp = *(data++);
-    } else {
-      n2s(data, tmp);
-    }
-
-    // Check for GREASE for 16-bit values, append only if non-GREASE
-    if (unit != 2 || GREASE_table.find(tmp) == GREASE_table.end()) {
-      if (!first) {
-        result += '-';
-      }
-      first   = false;
-      result += std::to_string(tmp);
-    }
-  }
-  return 0;
-}
-
 char *
 getIP(sockaddr const *s_sockaddr, char res[INET6_ADDRSTRLEN])
 {
@@ -126,52 +100,43 @@ getIP(sockaddr const *s_sockaddr, char 
res[INET6_ADDRSTRLEN])
 }
 
 static std::string
-custom_get_ja3(SSL *s)
+custom_get_ja3(SSL *ssl)
 {
-  std::string          ja3;
-  size_t               len;
-  const unsigned char *p;
+  std::string          result;
+  std::size_t          len{};
+  const unsigned char *buf{};
 
   // Get version
-  unsigned int version  = SSL_client_hello_get0_legacy_version(s);
-  ja3                  += std::to_string(version) + ',';
+  unsigned int version = SSL_client_hello_get0_legacy_version(ssl);
+  result.append(std::to_string(version));
+  result.push_back(',');
 
   // Get cipher suites
-  len = SSL_client_hello_get0_ciphers(s, &p);
-  custom_get_ja3_prefixed(2, p, len, ja3);
-  ja3 += ',';
+  len = SSL_client_hello_get0_ciphers(ssl, &buf);
+  result.append(ja3::encode_word_buffer(buf, len));
+  result.push_back(',');
 
   // Get extensions
-  int        *o;
-  std::string eclist, ecpflist;
-  if (SSL_client_hello_get0_ext(s, 0x0a, &p, &len) == 1) {
+  int *extension_ids{};
+  if (SSL_client_hello_get1_extensions_present(ssl, &extension_ids, &len) == 
1) {
+    result.append(ja3::encode_integer_buffer(extension_ids, len));
+    OPENSSL_free(extension_ids);
+  }
+  result.push_back(',');
+
+  // Get elliptic curves
+  if (SSL_client_hello_get0_ext(ssl, 0x0a, &buf, &len) == 1) {
     // Skip first 2 bytes since we already have length
-    p   += 2;
-    len -= 2;
-    custom_get_ja3_prefixed(2, p, len, eclist);
+    result.append(ja3::encode_word_buffer(buf + 2, len - 2));
   }
-  if (SSL_client_hello_get0_ext(s, 0x0b, &p, &len) == 1) {
+  result.push_back(',');
+
+  // Get elliptic curve point formats
+  if (SSL_client_hello_get0_ext(ssl, 0x0b, &buf, &len) == 1) {
     // Skip first byte since we already have length
-    ++p;
-    --len;
-    custom_get_ja3_prefixed(1, p, len, ecpflist);
-  }
-  if (SSL_client_hello_get1_extensions_present(s, &o, &len) == 1) {
-    bool first = true;
-    for (size_t i = 0; i < len; i++) {
-      int type = o[i];
-      if (GREASE_table.find(type) == GREASE_table.end()) {
-        if (!first) {
-          ja3 += '-';
-        }
-        first  = false;
-        ja3   += std::to_string(type);
-      }
-    }
-    OPENSSL_free(o);
+    result.append(ja3::encode_byte_buffer(buf + 1, len - 1));
   }
-  ja3 += "," + eclist + "," + ecpflist;
-  return ja3;
+  return result;
 }
 
 // This function will append value to the last occurrence of field. If none 
exists, it will
diff --git a/plugins/ja3_fingerprint/ja3_utils.cc 
b/plugins/ja3_fingerprint/ja3_utils.cc
new file mode 100644
index 0000000000..b54e5066c8
--- /dev/null
+++ b/plugins/ja3_fingerprint/ja3_utils.cc
@@ -0,0 +1,108 @@
+/** @file ja3_utils.cc
+
+  Plugin JA3 Fingerprint calculates JA3 signatures for incoming SSL traffic.
+
+  @section license License
+
+  Licensed to the Apache Software Foundation (ASF) under one
+  or more contributor license agreements.  See the NOTICE file
+  distributed with this work for additional information
+  regarding copyright ownership.  The ASF licenses this file
+  to you under the Apache License, Version 2.0 (the
+  "License"); you may not use this file except in compliance
+  with the License.  You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+
+ */
+
+#include <algorithm>
+#include <cstdint>
+#include <string>
+#include <unordered_set>
+
+namespace ja3
+{
+
+// GREASE table as in ja3
+static std::unordered_set<std::uint16_t> const GREASE_table = {0x0a0a, 0x1a1a, 
0x2a2a, 0x3a3a, 0x4a4a, 0x5a5a, 0x6a6a, 0x7a7a,
+                                                               0x8a8a, 0x9a9a, 
0xaaaa, 0xbaba, 0xcaca, 0xdada, 0xeaea, 0xfafa};
+
+static constexpr std::uint16_t
+from_big_endian(unsigned char lowbyte, unsigned char highbyte)
+{
+  return (static_cast<std::uint16_t>(lowbyte) << 8) | highbyte;
+}
+
+static bool
+ja3_should_ignore(std::uint16_t n)
+{
+  return GREASE_table.find(n) != GREASE_table.end();
+}
+
+std::string
+encode_byte_buffer(unsigned char const *buf, int const len)
+{
+  std::string result;
+  if (len > 0) {
+    // Benchmarks show that reserving space in the string here would cause
+    // a 40% increase in runtime for a buffer with 10 elements... so we
+    // don't do it.
+    result.append(std::to_string(buf[0]));
+    std::for_each(buf + 1, buf + len, [&result](unsigned char i) {
+      result.push_back('-');
+      result.append(std::to_string(i));
+    });
+  }
+  return result;
+}
+
+std::string
+encode_word_buffer(unsigned char const *buf, int const len)
+{
+  std::string result;
+  auto        it{buf};
+  while ((it < (buf + len)) && ja3_should_ignore(from_big_endian(it[0], 
it[1]))) {
+    it += 2;
+  }
+  if (it < (buf + len)) {
+    // Benchmarks show that reserving buf.size() - 1 space in the string here
+    // would have no impact on performance. Since the string may not even need
+    // that much due to GREASE values present in the buffer, we don't do it.
+    result.append(std::to_string(from_big_endian(it[0], it[1])));
+    it += 2;
+    for (; it < buf + len; it += 2) {
+      auto const value{from_big_endian(it[0], it[1])};
+      if (!ja3_should_ignore(value)) {
+        result.push_back('-');
+        result.append(std::to_string(value));
+      }
+    }
+  }
+  return result;
+}
+
+std::string
+encode_integer_buffer(int const *buf, int const len)
+{
+  std::string result;
+  auto        it{std::find_if(buf, buf + len, [](int i) { return 
!ja3_should_ignore(i); })};
+  if (it < (buf + len)) {
+    result.append(std::to_string(*it));
+    std::for_each(it + 1, buf + len, [&result](int const i) {
+      if (!ja3_should_ignore(i)) {
+        result.push_back('-');
+        result.append(std::to_string(i));
+      }
+    });
+  }
+  return result;
+}
+
+} // end namespace ja3
diff --git a/plugins/ja3_fingerprint/ja3_utils.h 
b/plugins/ja3_fingerprint/ja3_utils.h
new file mode 100644
index 0000000000..93a0d8fe39
--- /dev/null
+++ b/plugins/ja3_fingerprint/ja3_utils.h
@@ -0,0 +1,71 @@
+/** @file ja3_utils.h
+
+  Plugin JA3 Fingerprint calculates JA3 signatures for incoming SSL traffic.
+
+  @section license License
+
+  Licensed to the Apache Software Foundation (ASF) under one
+  or more contributor license agreements.  See the NOTICE file
+  distributed with this work for additional information
+  regarding copyright ownership.  The ASF licenses this file
+  to you under the Apache License, Version 2.0 (the
+  "License"); you may not use this file except in compliance
+  with the License.  You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+
+ */
+
+#include <string>
+
+namespace ja3
+{
+
+/** Encode a buffer of 8bit values.
+ *
+ * The values will be converted to their decimal string representations and
+ * joined with the '-' character.
+ *
+ * @param buf The buffer to encode. This should be an SSL buffer of 8bit
+ *  values.
+ * @param len The length of the buffer. If the length is zero, buf will
+ *  not be dereferenced.
+ * @return The string-encoded ja3 representation of the buffer.
+ */
+std::string encode_byte_buffer(unsigned char const *buf, int const len);
+
+/** Encode a buffer of big-endian 16bit values.
+ *
+ * The values will be converted to their decimal string representations and
+ * joined with the '-' character. Any GREASE values in the buffer will be
+ * ignored.
+ *
+ * @param buf The buffer to encode. This should be a big-endian SSL buffer
+ *  of 16bit values.
+ * @param len The length of the buffer. If the length is zero, buf will not
+ *  be dereferenced.
+ * @return The string-encoded ja3 representation of the buffer.
+ */
+std::string encode_word_buffer(unsigned char const *buf, int const len);
+
+/** Encode a buffer of integers.
+ *
+ * The values will be converted to their decimal string representations and
+ * joined with the '-' character. Any GREASE values in the buffer will be
+ * ignored.
+ *
+ * @param buf The buffer to encode. The buffer underlying the span should be
+ *  an SSL buffer of ints.
+ * @param len The length (number of values) in the buffer. If the length is
+ *  zero, buf will not be dereferenced.
+ * @return The string-encoded ja3 representation of the buffer.
+ */
+std::string encode_integer_buffer(int const *buf, int const len);
+
+} // end namespace ja3
diff --git a/plugins/ja3_fingerprint/test_utils.cc 
b/plugins/ja3_fingerprint/test_utils.cc
new file mode 100644
index 0000000000..6ff894e69d
--- /dev/null
+++ b/plugins/ja3_fingerprint/test_utils.cc
@@ -0,0 +1,97 @@
+/** @file test_utils.cc
+
+  Unit tests for ja3.
+
+  @section license License
+
+  Licensed to the Apache Software Foundation (ASF) under one
+  or more contributor license agreements.  See the NOTICE file
+  distributed with this work for additional information
+  regarding copyright ownership.  The ASF licenses this file
+  to you under the Apache License, Version 2.0 (the
+  "License"); you may not use this file except in compliance
+  with the License.  You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+
+ */
+
+#include "ja3_utils.h"
+
+#define CATCH_CONFIG_MAIN
+#include <catch.hpp>
+
+TEST_CASE("ja3 byte buffer encoding")
+{
+  unsigned char const buf[]{0x8, 0x3, 0x4};
+
+  SECTION("empty buffer")
+  {
+    auto got{ja3::encode_byte_buffer(nullptr, 0)};
+    CHECK("" == got);
+  }
+
+  SECTION("1 value")
+  {
+    auto got{ja3::encode_byte_buffer(buf, 1)};
+    CHECK("8" == got);
+  }
+
+  SECTION("3 values")
+  {
+    auto got{ja3::encode_byte_buffer(buf, 3)};
+    CHECK("8-3-4" == got);
+  }
+}
+
+TEST_CASE("ja3 word buffer encoding")
+{
+  unsigned char const buf[]{0x0, 0x5, 0x0a, 0x0a, 0x0, 0x8, 0xda, 0xda, 0x1, 
0x0};
+
+  SECTION("empty buffer")
+  {
+    auto got{ja3::encode_word_buffer(nullptr, 0)};
+    CHECK("" == got);
+  }
+
+  SECTION("1 value")
+  {
+    auto got{ja3::encode_word_buffer(buf, 2)};
+    CHECK("5" == got);
+  }
+
+  SECTION("5 values including GREASE values")
+  {
+    auto got{ja3::encode_word_buffer(buf, 10)};
+    CHECK("5-8-256" == got);
+  }
+}
+
+TEST_CASE("ja3 integer buffer encoding")
+{
+  int const buf[]{5, 2570, 8, 56026, 256};
+
+  SECTION("empty buffer")
+  {
+    auto got{ja3::encode_integer_buffer(nullptr, 0)};
+    CHECK("" == got);
+  }
+
+  SECTION("1 value")
+  {
+    auto got{ja3::encode_integer_buffer(buf, 1)};
+    CHECK("5" == got);
+  }
+
+  SECTION("5 values including GREASE values")
+  {
+    auto got{ja3::encode_integer_buffer(buf, 5)};
+    CHECK("5-8-256" == got);
+  }
+}

Reply via email to