aokolnychyi commented on code in PR #12056:
URL: https://github.com/apache/iceberg/pull/12056#discussion_r1926229097


##########
spark/v3.5/spark/src/main/java/org/apache/iceberg/spark/data/vectorized/ColumnVectorWithFilter.java:
##########
@@ -18,78 +18,121 @@
  */
 package org.apache.iceberg.spark.data.vectorized;
 
-import org.apache.iceberg.arrow.vectorized.VectorHolder;
 import org.apache.spark.sql.types.Decimal;
+import org.apache.spark.sql.vectorized.ColumnVector;
 import org.apache.spark.sql.vectorized.ColumnarArray;
+import org.apache.spark.sql.vectorized.ColumnarMap;
 import org.apache.spark.unsafe.types.UTF8String;
 
-public class ColumnVectorWithFilter extends IcebergArrowColumnVector {
+/**
+ * A column vector implementation that applies row-level filtering.
+ *
+ * <p>This class wraps an existing column vector and uses a row ID mapping 
array to remap row
+ * indices during data access. Each method that retrieves data for a specific 
row translates the
+ * provided row index using the mapping array, effectively filtering the 
original data to only
+ * expose the live subset of rows. This approach allows efficient row-level 
filtering without
+ * modifying the underlying data.
+ */
+public class ColumnVectorWithFilter extends ColumnVector {
+  private final ColumnVector delegate;
   private final int[] rowIdMapping;
 
-  public ColumnVectorWithFilter(VectorHolder holder, int[] rowIdMapping) {
-    super(holder);
+  public ColumnVectorWithFilter(ColumnVector delegate, int[] rowIdMapping) {
+    super(delegate.dataType());
+    this.delegate = delegate;
     this.rowIdMapping = rowIdMapping;
   }
 
+  @Override
+  public void close() {
+    delegate.close();
+  }
+
+  @Override
+  public void closeIfFreeable() {
+    delegate.closeIfFreeable();
+  }
+
+  @Override
+  public boolean hasNull() {
+    return delegate.hasNull();
+  }
+
+  @Override
+  public int numNulls() {
+    // computing the actual number of nulls with rowIdMapping is expensive
+    // it is OK to overestimate and return the number of nulls in the original 
vector
+    return delegate.numNulls();
+  }
+
   @Override
   public boolean isNullAt(int rowId) {
-    return nullabilityHolder().isNullAt(rowIdMapping[rowId]) == 1;
+    return delegate.isNullAt(rowIdMapping[rowId]);
   }
 
   @Override
   public boolean getBoolean(int rowId) {
-    return accessor().getBoolean(rowIdMapping[rowId]);
+    return delegate.getBoolean(rowIdMapping[rowId]);
+  }
+
+  @Override
+  public byte getByte(int rowId) {
+    return delegate.getByte(rowIdMapping[rowId]);
+  }
+
+  @Override
+  public short getShort(int rowId) {
+    return delegate.getShort(rowIdMapping[rowId]);
   }
 
   @Override
   public int getInt(int rowId) {
-    return accessor().getInt(rowIdMapping[rowId]);
+    return delegate.getInt(rowIdMapping[rowId]);
   }
 
   @Override
   public long getLong(int rowId) {
-    return accessor().getLong(rowIdMapping[rowId]);
+    return delegate.getLong(rowIdMapping[rowId]);
   }
 
   @Override
   public float getFloat(int rowId) {
-    return accessor().getFloat(rowIdMapping[rowId]);
+    return delegate.getFloat(rowIdMapping[rowId]);
   }
 
   @Override
   public double getDouble(int rowId) {
-    return accessor().getDouble(rowIdMapping[rowId]);
+    return delegate.getDouble(rowIdMapping[rowId]);
   }
 
   @Override
   public ColumnarArray getArray(int rowId) {
-    if (isNullAt(rowId)) {

Review Comment:
   We no longer need this as we wrap valid `ColumnVector` implementations now.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org
For additional commands, e-mail: issues-h...@iceberg.apache.org

Reply via email to