He-Pin commented on code in PR #2838:
URL: https://github.com/apache/pekko/pull/2838#discussion_r3067927317


##########
actor/src/main/scala/org/apache/pekko/util/ByteString.scala:
##########
@@ -921,6 +1030,67 @@ object ByteString {
       }
     }
 
+    override def lastIndexOf[B >: Byte](elem: B, end: Int): Int = {
+      if (end < 0) -1
+      else {
+        val byteStringsLast = bytestrings.size - 1
+
+        @tailrec
+        def find(bsIdx: Int, relativeIndex: Int, len: Int): Int = {
+          if (bsIdx < 0) -1
+          else {
+            val bs = bytestrings(bsIdx)
+            val bsStartIndex = len - bs.length
+
+            if (relativeIndex < bsStartIndex || bs.isEmpty) {
+              if (bsIdx == 0) -1
+              else find(bsIdx - 1, relativeIndex, bsStartIndex)
+            } else {
+              val subIndexOf = bs.lastIndexOf(elem, relativeIndex)

Review Comment:
   ### Bug: `relativeIndex` is in global coordinates, `bs.lastIndexOf` expects 
local
   
   `relativeIndex` tracks the overall ByteStrings position, but 
`bs.lastIndexOf(elem, end)` interprets `end` as a local index within that 
sub-bytestring.
   
   **Failing case:**
   ```scala
   val bs = ByteStrings(ByteString1.fromString("ab"), 
ByteString1.fromString("dd"))
   // Total: "abdd", 'd' at global indices 2 and 3
   bs.lastIndexOf('d'.toByte, 2) // Expected: 2, Actual: 3
   ```
   
   Trace: `relativeIndex = 2`, sub-bs = `"dd"`, `bsStartIndex = 2`.
   `"dd".lastIndexOf('d', 2)` → endIdx clamps to `min(2, 1) = 1` → finds last 
`'d'` at **local 1** → returns `1 + 2 = 3` (beyond `end = 2`).
   
   **Fix:**
   ```scala
   val subIndexOf = bs.lastIndexOf(elem, relativeIndex - bsStartIndex)
   ```



##########
actor/src/main/scala/org/apache/pekko/util/ByteString.scala:
##########
@@ -921,6 +1030,67 @@ object ByteString {
       }
     }
 
+    override def lastIndexOf[B >: Byte](elem: B, end: Int): Int = {
+      if (end < 0) -1
+      else {
+        val byteStringsLast = bytestrings.size - 1
+
+        @tailrec
+        def find(bsIdx: Int, relativeIndex: Int, len: Int): Int = {
+          if (bsIdx < 0) -1
+          else {
+            val bs = bytestrings(bsIdx)
+            val bsStartIndex = len - bs.length
+
+            if (relativeIndex < bsStartIndex || bs.isEmpty) {
+              if (bsIdx == 0) -1
+              else find(bsIdx - 1, relativeIndex, bsStartIndex)
+            } else {
+              val subIndexOf = bs.lastIndexOf(elem, relativeIndex)
+              if (subIndexOf < 0) {
+                if (bsIdx == 0) -1
+                else find(bsIdx - 1, relativeIndex, bsStartIndex)
+              } else subIndexOf + bsStartIndex
+            }
+          }
+        }
+
+        find(byteStringsLast, math.min(end, length), length)
+      }
+    }
+
+    override def lastIndexOf(elem: Byte, end: Int): Int = {
+      if (end < 0) -1
+      else {
+        if (end < 0) -1
+        else {
+          val byteStringsLast = bytestrings.size - 1
+
+          @tailrec
+          def find(bsIdx: Int, relativeIndex: Int, len: Int): Int = {
+            if (bsIdx < 0) -1
+            else {
+              val bs = bytestrings(bsIdx)
+              val bsStartIndex = len - bs.length
+
+              if (relativeIndex < bsStartIndex || bs.isEmpty) {
+                if (bsIdx == 0) -1
+                else find(bsIdx - 1, relativeIndex, bsStartIndex)
+              } else {
+                val subIndexOf = bs.lastIndexOf(elem, relativeIndex)

Review Comment:
   Same coordinate-translation bug as the generic version above.
   
   ```suggestion
                   val subIndexOf = bs.lastIndexOf(elem, relativeIndex - 
bsStartIndex)
   ```



##########
actor/src/main/scala/org/apache/pekko/util/ByteString.scala:
##########
@@ -921,6 +1030,67 @@ object ByteString {
       }
     }
 
+    override def lastIndexOf[B >: Byte](elem: B, end: Int): Int = {
+      if (end < 0) -1
+      else {
+        val byteStringsLast = bytestrings.size - 1
+
+        @tailrec
+        def find(bsIdx: Int, relativeIndex: Int, len: Int): Int = {
+          if (bsIdx < 0) -1
+          else {
+            val bs = bytestrings(bsIdx)
+            val bsStartIndex = len - bs.length
+
+            if (relativeIndex < bsStartIndex || bs.isEmpty) {
+              if (bsIdx == 0) -1
+              else find(bsIdx - 1, relativeIndex, bsStartIndex)
+            } else {
+              val subIndexOf = bs.lastIndexOf(elem, relativeIndex)
+              if (subIndexOf < 0) {
+                if (bsIdx == 0) -1
+                else find(bsIdx - 1, relativeIndex, bsStartIndex)
+              } else subIndexOf + bsStartIndex
+            }
+          }
+        }
+
+        find(byteStringsLast, math.min(end, length), length)
+      }
+    }
+
+    override def lastIndexOf(elem: Byte, end: Int): Int = {
+      if (end < 0) -1
+      else {
+        if (end < 0) -1

Review Comment:
   Redundant check -- `end < 0` is already handled at line 1063. This inner 
check is dead code.
   
   ```suggestion
           {
   ```



##########
actor-tests/src/test/scala/org/apache/pekko/util/ByteStringSpec.scala:
##########
@@ -821,6 +821,56 @@ class ByteStringSpec extends AnyWordSpec with Matchers 
with Checkers {
       compact.lastIndexOf('b', 0) should ===(-1)
       compact.lastIndexOf('b', -1) should ===(-1)
     }
+    "lastIndexOf (specialized)" in {
+      ByteString.empty.lastIndexOf(5.toByte, -1) should ===(-1)
+      ByteString.empty.lastIndexOf(5.toByte, 0) should ===(-1)
+      ByteString.empty.lastIndexOf(5.toByte, 1) should ===(-1)
+      ByteString.empty.lastIndexOf(5.toByte) should ===(-1)
+      val byteString1 = ByteString1.fromString("abb")
+      byteString1.lastIndexOf('d'.toByte) should ===(-1)
+      byteString1.lastIndexOf('d'.toByte, -1) should ===(-1)
+      byteString1.lastIndexOf('d'.toByte, 4) should ===(-1)
+      byteString1.lastIndexOf('d'.toByte, 1) should ===(-1)
+      byteString1.lastIndexOf('d'.toByte, 0) should ===(-1)
+      byteString1.lastIndexOf('a'.toByte, -1) should ===(-1)
+      byteString1.lastIndexOf('a'.toByte) should ===(0)
+      byteString1.lastIndexOf('a'.toByte, 0) should ===(0)
+      byteString1.lastIndexOf('a'.toByte, 1) should ===(0)
+      byteString1.lastIndexOf('b'.toByte) should ===(2)
+      byteString1.lastIndexOf('b'.toByte, 2) should ===(2)
+      byteString1.lastIndexOf('b'.toByte, 1) should ===(1)
+      byteString1.lastIndexOf('b'.toByte, 0) should ===(-1)
+
+      val byteStrings = ByteStrings(ByteString1.fromString("abb"), 
ByteString1.fromString("efg"))
+      byteStrings.lastIndexOf('e'.toByte) should ===(3)
+      byteStrings.lastIndexOf('e'.toByte, 6) should ===(3)
+      byteStrings.lastIndexOf('e'.toByte, 4) should ===(3)
+      byteStrings.lastIndexOf('e'.toByte, 1) should ===(-1)
+      byteStrings.lastIndexOf('e'.toByte, 0) should ===(-1)
+      byteStrings.lastIndexOf('e'.toByte, -1) should ===(-1)
+
+      byteStrings.lastIndexOf('b'.toByte) should ===(2)
+      byteStrings.lastIndexOf('b'.toByte, 6) should ===(2)
+      byteStrings.lastIndexOf('b'.toByte, 4) should ===(2)
+      byteStrings.lastIndexOf('b'.toByte, 1) should ===(1)
+      byteStrings.lastIndexOf('b'.toByte, 0) should ===(-1)
+      byteStrings.lastIndexOf('b'.toByte, -1) should ===(-1)
+
+      val compact = byteStrings.compact
+      compact.lastIndexOf('e'.toByte) should ===(3)
+      compact.lastIndexOf('e'.toByte, 6) should ===(3)
+      compact.lastIndexOf('e'.toByte, 4) should ===(3)
+      compact.lastIndexOf('e'.toByte, 1) should ===(-1)
+      compact.lastIndexOf('e'.toByte, 0) should ===(-1)
+      compact.lastIndexOf('e'.toByte, -1) should ===(-1)
+
+      compact.lastIndexOf('b'.toByte) should ===(2)
+      compact.lastIndexOf('b'.toByte, 6) should ===(2)
+      compact.lastIndexOf('b'.toByte, 4) should ===(2)
+      compact.lastIndexOf('b'.toByte, 1) should ===(1)
+      compact.lastIndexOf('b'.toByte, 0) should ===(-1)

Review Comment:
   Test coverage suggestions:
   
   1. **Exercise the SWAR path** -- use strings >=16 bytes so 
`SWARUtil.getLong` is actually called:
   ```scala
   val long1 = ByteString1.fromString("abcdefghijklmnop") // 16 bytes
   long1.lastIndexOf('a'.toByte) should ===(0)
   long1.lastIndexOf('p'.toByte) should ===(15)
   long1.lastIndexOf('h'.toByte, 7) should ===(7)
   long1.lastIndexOf('h'.toByte, 6) should ===(-1)
   ```
   
   2. **Catch the ByteStrings relativeIndex bug** -- duplicate bytes spanning 
`end`:
   ```scala
   val bs = ByteStrings(ByteString1.fromString("ab"), 
ByteString1.fromString("dd"))
   bs.lastIndexOf('d'.toByte, 2) should ===(2)  // not 3
   bs.lastIndexOf('d'.toByte, 3) should ===(3)
   ```
   
   3. **Non-zero startIndex** -- via `.drop()`:
   ```scala
   val sliced = ByteString1.fromString("xxabcdefghijk").drop(2)
   sliced.lastIndexOf('k'.toByte) should ===(8)
   ```
   
   4. **Edge byte values**:
   ```scala
   val zeros = ByteString(Array[Byte](0, 1, 0, 1))
   zeros.lastIndexOf(0.toByte) should ===(2)
   val neg = ByteString(Array[Byte](-1, 0, -1))
   neg.lastIndexOf((-1).toByte) should ===(2)
   ```



##########
actor/src/main/scala/org/apache/pekko/util/ByteString.scala:
##########
@@ -921,6 +1030,67 @@ object ByteString {
       }
     }
 
+    override def lastIndexOf[B >: Byte](elem: B, end: Int): Int = {
+      if (end < 0) -1
+      else {
+        val byteStringsLast = bytestrings.size - 1
+
+        @tailrec
+        def find(bsIdx: Int, relativeIndex: Int, len: Int): Int = {
+          if (bsIdx < 0) -1
+          else {
+            val bs = bytestrings(bsIdx)
+            val bsStartIndex = len - bs.length
+
+            if (relativeIndex < bsStartIndex || bs.isEmpty) {
+              if (bsIdx == 0) -1
+              else find(bsIdx - 1, relativeIndex, bsStartIndex)
+            } else {
+              val subIndexOf = bs.lastIndexOf(elem, relativeIndex)
+              if (subIndexOf < 0) {
+                if (bsIdx == 0) -1
+                else find(bsIdx - 1, relativeIndex, bsStartIndex)
+              } else subIndexOf + bsStartIndex
+            }
+          }
+        }
+
+        find(byteStringsLast, math.min(end, length), length)

Review Comment:
   Nit: should be `math.min(end, length - 1)` -- valid indices are 
`0..length-1`. Currently masked by inner clamping in 
`ByteString1C`/`ByteString1`, but inconsistent with those implementations which 
all use `length - 1`.
   
   Same applies to line 1089.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to