This is an automated email from the ASF dual-hosted git repository.

jeffreyvo pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git


The following commit(s) were added to refs/heads/main by this push:
     new 49c27d67a5 Add special implementation for zip for Utf8View/BinaryView 
scalars (#8963)
49c27d67a5 is described below

commit 49c27d67a52e696a694e27631ffec14d01fe9018
Author: Michael Kleen <[email protected]>
AuthorDate: Sat Jan 3 03:07:37 2026 +0100

    Add special implementation for zip for Utf8View/BinaryView scalars (#8963)
    
    # Which issue does this PR close?
    
    - Closes https://github.com/apache/arrow-rs/issues/8724
    
    # Rationale for this change
    
    It's explained in the issue.
    
    # What changes are included in this PR?
    
    This adds a special implementation for Utf8View/BinaryView scalars for
    zip based on the design from
    https://github.com/apache/arrow-rs/pull/8653. It also includes tests.
    Benchmarks are available here:
    - https://github.com/apache/arrow-rs/pull/8988
    
    
    # Are these changes tested?
    
    Yes.
    
    # Are there any user-facing changes?
    
    There is a new struct `ByteViewScalarImpl`.
    
    <details close>
      <summary>Benchmarks</summary>
    
    System: Apple M1 Max with 10 cores on macOS 26.1
    
    ```
    group                                                                       
                                branch                                 main
    -----                                                                       
                                ------                                 ----
    zip_8192_from_string_views size 10 and string_views size 
10/non_null_scalar_vs_null_scalar/10pct_true       1.00      3.5±0.04µs        
? ?/sec    37.06   128.9±1.36µs        ? ?/sec
    zip_8192_from_string_views size 10 and string_views size 
10/non_null_scalar_vs_null_scalar/1pct_true        1.00      3.5±0.07µs        
? ?/sec    35.76   125.1±1.76µs        ? ?/sec
    zip_8192_from_string_views size 10 and string_views size 
10/non_null_scalar_vs_null_scalar/50pct_nulls      1.00      3.7±0.12µs        
? ?/sec    36.91   136.8±2.17µs        ? ?/sec
    zip_8192_from_string_views size 10 and string_views size 
10/non_null_scalar_vs_null_scalar/50pct_true       1.00      3.5±0.06µs        
? ?/sec    40.30   139.9±2.11µs        ? ?/sec
    zip_8192_from_string_views size 10 and string_views size 
10/non_null_scalar_vs_null_scalar/90pct_true       1.00      3.6±0.10µs        
? ?/sec    30.57   108.5±2.62µs        ? ?/sec
    zip_8192_from_string_views size 10 and string_views size 
10/non_null_scalar_vs_null_scalar/99pct_true       1.00      3.5±0.05µs        
? ?/sec    28.40    99.8±2.12µs        ? ?/sec
    zip_8192_from_string_views size 10 and string_views size 
10/non_null_scalar_vs_null_scalar/all_false        1.00      3.5±0.02µs        
? ?/sec    36.04   127.4±3.14µs        ? ?/sec
    zip_8192_from_string_views size 10 and string_views size 
10/non_null_scalar_vs_null_scalar/all_true         1.00      3.5±0.08µs        
? ?/sec    27.39    97.1±1.11µs        ? ?/sec
    zip_8192_from_string_views size 10 and string_views size 
10/non_nulls_scalars/10pct_true                    1.00     28.2±0.37µs        
? ?/sec    2.70     75.9±0.61µs        ? ?/sec
    zip_8192_from_string_views size 10 and string_views size 
10/non_nulls_scalars/1pct_true                     1.00      7.2±0.24µs        
? ?/sec    9.89    71.4±12.56µs        ? ?/sec
    zip_8192_from_string_views size 10 and string_views size 
10/non_nulls_scalars/50pct_nulls                   1.00     51.0±2.97µs        
? ?/sec    1.75     89.4±2.50µs        ? ?/sec
    zip_8192_from_string_views size 10 and string_views size 
10/non_nulls_scalars/50pct_true                    1.00     62.1±1.00µs        
? ?/sec    1.61     99.7±4.68µs        ? ?/sec
    zip_8192_from_string_views size 10 and string_views size 
10/non_nulls_scalars/90pct_true                    1.00     28.8±0.64µs        
? ?/sec    2.63     75.7±1.22µs        ? ?/sec
    zip_8192_from_string_views size 10 and string_views size 
10/non_nulls_scalars/99pct_true                    1.00      7.7±0.11µs        
? ?/sec    8.98     69.0±0.74µs        ? ?/sec
    zip_8192_from_string_views size 10 and string_views size 
10/non_nulls_scalars/all_false                     1.00      3.7±0.13µs        
? ?/sec    19.06    69.8±1.55µs        ? ?/sec
    zip_8192_from_string_views size 10 and string_views size 
10/non_nulls_scalars/all_true                      1.00      3.6±0.10µs        
? ?/sec    18.90    68.0±1.12µs        ? ?/sec
    zip_8192_from_string_views size 10 and string_views size 
10/null_vs_non_null_scalar/10pct_true              1.00      3.8±0.07µs        
? ?/sec    28.85   108.4±3.09µs        ? ?/sec
    zip_8192_from_string_views size 10 and string_views size 
10/null_vs_non_null_scalar/1pct_true               1.00      3.8±0.09µs        
? ?/sec    25.83    98.7±2.71µs        ? ?/sec
    zip_8192_from_string_views size 10 and string_views size 
10/null_vs_non_null_scalar/50pct_nulls             1.00      3.9±0.06µs        
? ?/sec    32.25   127.3±7.41µs        ? ?/sec
    zip_8192_from_string_views size 10 and string_views size 
10/null_vs_non_null_scalar/50pct_true              1.00      3.7±0.06µs        
? ?/sec    37.66   139.5±3.00µs        ? ?/sec
    zip_8192_from_string_views size 10 and string_views size 
10/null_vs_non_null_scalar/90pct_true              1.00      3.8±0.16µs        
? ?/sec    34.52   129.5±1.53µs        ? ?/sec
    zip_8192_from_string_views size 10 and string_views size 
10/null_vs_non_null_scalar/99pct_true              1.00      3.7±0.05µs        
? ?/sec    33.83   124.8±1.28µs        ? ?/sec
    zip_8192_from_string_views size 10 and string_views size 
10/null_vs_non_null_scalar/all_false               1.00      3.8±0.09µs        
? ?/sec    26.08    98.8±2.02µs        ? ?/sec
    zip_8192_from_string_views size 10 and string_views size 
10/null_vs_non_null_scalar/all_true                1.00      3.8±0.08µs        
? ?/sec    32.56   123.9±1.48µs        ? ?/sec
    zip_8192_from_string_views size 10 and string_views size 
100/non_null_scalar_vs_null_scalar/10pct_true      1.00      3.6±0.06µs        
? ?/sec    36.09   129.8±6.06µs        ? ?/sec
    zip_8192_from_string_views size 10 and string_views size 
100/non_null_scalar_vs_null_scalar/1pct_true       1.00      3.6±0.35µs        
? ?/sec    34.05   122.9±5.06µs        ? ?/sec
    zip_8192_from_string_views size 10 and string_views size 
100/non_null_scalar_vs_null_scalar/50pct_nulls     1.00      3.7±0.12µs        
? ?/sec    36.77   137.9±5.49µs        ? ?/sec
    zip_8192_from_string_views size 10 and string_views size 
100/non_null_scalar_vs_null_scalar/50pct_true      1.00      3.6±0.09µs        
? ?/sec    38.23   137.4±3.35µs        ? ?/sec
    zip_8192_from_string_views size 10 and string_views size 
100/non_null_scalar_vs_null_scalar/90pct_true      1.00      3.6±0.06µs        
? ?/sec    29.20   104.8±1.64µs        ? ?/sec
    zip_8192_from_string_views size 10 and string_views size 
100/non_null_scalar_vs_null_scalar/99pct_true      1.00      3.6±0.15µs        
? ?/sec    26.94    96.9±2.73µs        ? ?/sec
    zip_8192_from_string_views size 10 and string_views size 
100/non_null_scalar_vs_null_scalar/all_false       1.00      3.6±0.05µs        
? ?/sec    34.97   127.5±5.81µs        ? ?/sec
    zip_8192_from_string_views size 10 and string_views size 
100/non_null_scalar_vs_null_scalar/all_true        1.00      3.8±1.05µs        
? ?/sec    24.98    95.0±2.14µs        ? ?/sec
    zip_8192_from_string_views size 10 and string_views size 
100/non_nulls_scalars/10pct_true                   1.00     28.9±0.46µs        
? ?/sec    2.69     77.7±1.57µs        ? ?/sec
    zip_8192_from_string_views size 10 and string_views size 
100/non_nulls_scalars/1pct_true                    1.00      7.3±0.09µs        
? ?/sec    9.81     71.6±1.96µs        ? ?/sec
    zip_8192_from_string_views size 10 and string_views size 
100/non_nulls_scalars/50pct_nulls                  1.00     50.3±1.16µs        
? ?/sec    1.74     87.7±1.14µs        ? ?/sec
    zip_8192_from_string_views size 10 and string_views size 
100/non_nulls_scalars/50pct_true                   1.00     63.5±1.44µs        
? ?/sec    1.59    100.7±1.97µs        ? ?/sec
    zip_8192_from_string_views size 10 and string_views size 
100/non_nulls_scalars/90pct_true                   1.00     29.8±0.48µs        
? ?/sec    2.64     78.6±2.85µs        ? ?/sec
    zip_8192_from_string_views size 10 and string_views size 
100/non_nulls_scalars/99pct_true                   1.00      8.2±0.12µs        
? ?/sec    8.54     69.7±0.91µs        ? ?/sec
    zip_8192_from_string_views size 10 and string_views size 
100/non_nulls_scalars/all_false                    1.00      3.8±0.07µs        
? ?/sec    18.77    71.6±1.51µs        ? ?/sec
    zip_8192_from_string_views size 10 and string_views size 
100/non_nulls_scalars/all_true                     1.00      3.8±0.11µs        
? ?/sec    18.31    68.8±1.10µs        ? ?/sec
    zip_8192_from_string_views size 10 and string_views size 
100/null_vs_non_null_scalar/10pct_true             1.00      3.8±0.07µs        
? ?/sec    27.36   104.3±1.35µs        ? ?/sec
    zip_8192_from_string_views size 10 and string_views size 
100/null_vs_non_null_scalar/1pct_true              1.00      3.8±0.07µs        
? ?/sec    24.86    94.8±1.12µs        ? ?/sec
    zip_8192_from_string_views size 10 and string_views size 
100/null_vs_non_null_scalar/50pct_nulls            1.00      4.0±0.04µs        
? ?/sec    29.84   117.9±1.34µs        ? ?/sec
    zip_8192_from_string_views size 10 and string_views size 
100/null_vs_non_null_scalar/50pct_true             1.00      3.9±0.21µs        
? ?/sec    35.19   137.1±3.87µs        ? ?/sec
    zip_8192_from_string_views size 10 and string_views size 
100/null_vs_non_null_scalar/90pct_true             1.00      3.8±0.06µs        
? ?/sec    32.78   125.8±1.73µs        ? ?/sec
    zip_8192_from_string_views size 10 and string_views size 
100/null_vs_non_null_scalar/99pct_true             1.00      3.8±0.11µs        
? ?/sec    31.87   121.5±1.47µs        ? ?/sec
    zip_8192_from_string_views size 10 and string_views size 
100/null_vs_non_null_scalar/all_false              1.00      3.8±0.07µs        
? ?/sec    25.36    95.5±1.89µs        ? ?/sec
    zip_8192_from_string_views size 10 and string_views size 
100/null_vs_non_null_scalar/all_true               1.00      3.9±0.20µs        
? ?/sec    30.83   121.7±3.36µs        ? ?/sec
    zip_8192_from_string_views size 100 and string_views size 
100/non_null_scalar_vs_null_scalar/10pct_true     1.00      3.7±0.73µs        ? 
?/sec    35.72   132.2±6.77µs        ? ?/sec
    zip_8192_from_string_views size 100 and string_views size 
100/non_null_scalar_vs_null_scalar/1pct_true      1.00      3.6±0.04µs        ? 
?/sec    35.35   125.8±2.79µs        ? ?/sec
    zip_8192_from_string_views size 100 and string_views size 
100/non_null_scalar_vs_null_scalar/50pct_nulls    1.00      3.8±0.11µs        ? 
?/sec    36.05   136.0±2.59µs        ? ?/sec
    zip_8192_from_string_views size 100 and string_views size 
100/non_null_scalar_vs_null_scalar/50pct_true     1.00      3.6±0.13µs        ? 
?/sec    39.36   142.5±6.32µs        ? ?/sec
    zip_8192_from_string_views size 100 and string_views size 
100/non_null_scalar_vs_null_scalar/90pct_true     1.00      3.6±0.11µs        ? 
?/sec    29.63   107.5±2.03µs        ? ?/sec
    zip_8192_from_string_views size 100 and string_views size 
100/non_null_scalar_vs_null_scalar/99pct_true     1.00      3.6±0.08µs        ? 
?/sec    28.40   102.2±6.74µs        ? ?/sec
    zip_8192_from_string_views size 100 and string_views size 
100/non_null_scalar_vs_null_scalar/all_false      1.00      3.6±0.05µs        ? 
?/sec    34.83   126.0±2.12µs        ? ?/sec
    zip_8192_from_string_views size 100 and string_views size 
100/non_null_scalar_vs_null_scalar/all_true       1.00      3.6±0.05µs        ? 
?/sec    27.38    98.6±1.62µs        ? ?/sec
    zip_8192_from_string_views size 100 and string_views size 
100/non_nulls_scalars/10pct_true                  1.00     29.9±2.79µs        ? 
?/sec    2.51     75.1±0.98µs        ? ?/sec
    zip_8192_from_string_views size 100 and string_views size 
100/non_nulls_scalars/1pct_true                   1.00      7.2±0.16µs        ? 
?/sec    9.48     68.3±1.01µs        ? ?/sec
    zip_8192_from_string_views size 100 and string_views size 
100/non_nulls_scalars/50pct_nulls                 1.00     50.5±1.90µs        ? 
?/sec    1.68     84.6±1.27µs        ? ?/sec
    zip_8192_from_string_views size 100 and string_views size 
100/non_nulls_scalars/50pct_true                  1.00     64.4±0.60µs        ? 
?/sec    1.53     98.6±1.71µs        ? ?/sec
    zip_8192_from_string_views size 100 and string_views size 
100/non_nulls_scalars/90pct_true                  1.00     29.7±0.61µs        ? 
?/sec    2.57     76.1±1.15µs        ? ?/sec
    zip_8192_from_string_views size 100 and string_views size 
100/non_nulls_scalars/99pct_true                  1.00      7.9±0.09µs        ? 
?/sec    8.89     70.5±2.13µs        ? ?/sec
    zip_8192_from_string_views size 100 and string_views size 
100/non_nulls_scalars/all_false                   1.00      3.7±0.06µs        ? 
?/sec    18.31    67.8±0.86µs        ? ?/sec
    zip_8192_from_string_views size 100 and string_views size 
100/non_nulls_scalars/all_true                    1.00      3.7±0.06µs        ? 
?/sec    18.35    67.9±1.16µs        ? ?/sec
    zip_8192_from_string_views size 100 and string_views size 
100/null_vs_non_null_scalar/10pct_true            1.00      3.8±0.12µs        ? 
?/sec    28.20   107.5±2.55µs        ? ?/sec
    zip_8192_from_string_views size 100 and string_views size 
100/null_vs_non_null_scalar/1pct_true             1.00      3.9±0.16µs        ? 
?/sec    25.73    99.5±2.19µs        ? ?/sec
    zip_8192_from_string_views size 100 and string_views size 
100/null_vs_non_null_scalar/50pct_nulls           1.00      4.1±0.14µs        ? 
?/sec    29.98   122.2±2.27µs        ? ?/sec
    zip_8192_from_string_views size 100 and string_views size 
100/null_vs_non_null_scalar/50pct_true            1.00      3.8±0.08µs        ? 
?/sec    37.05   140.1±2.01µs        ? ?/sec
    zip_8192_from_string_views size 100 and string_views size 
100/null_vs_non_null_scalar/90pct_true            1.00      3.9±0.20µs        ? 
?/sec    33.52   131.8±3.10µs        ? ?/sec
    zip_8192_from_string_views size 100 and string_views size 
100/null_vs_non_null_scalar/99pct_true            1.00      3.8±0.09µs        ? 
?/sec    33.55   127.6±3.56µs        ? ?/sec
    zip_8192_from_string_views size 100 and string_views size 
100/null_vs_non_null_scalar/all_false             1.00      3.8±0.08µs        ? 
?/sec    26.47   100.8±5.55µs        ? ?/sec
    zip_8192_from_string_views size 100 and string_views size 
100/null_vs_non_null_scalar/all_true              1.00      3.9±0.06µs        ? 
?/sec    32.05   124.6±2.16µs        ? ?/sec
    zip_8192_from_string_views size 3 and string_views size 
10/non_null_scalar_vs_null_scalar/10pct_true        1.00      3.6±0.40µs        
? ?/sec    35.16   126.4±1.92µs        ? ?/sec
    zip_8192_from_string_views size 3 and string_views size 
10/non_null_scalar_vs_null_scalar/1pct_true         1.00      3.5±0.07µs        
? ?/sec    35.43   123.6±4.98µs        ? ?/sec
    zip_8192_from_string_views size 3 and string_views size 
10/non_null_scalar_vs_null_scalar/50pct_nulls       1.00      3.7±0.06µs        
? ?/sec    36.06   132.4±1.80µs        ? ?/sec
    zip_8192_from_string_views size 3 and string_views size 
10/non_null_scalar_vs_null_scalar/50pct_true        1.00      3.6±0.06µs        
? ?/sec    38.44   136.9±2.82µs        ? ?/sec
    zip_8192_from_string_views size 3 and string_views size 
10/non_null_scalar_vs_null_scalar/90pct_true        1.00      3.5±0.04µs        
? ?/sec    29.82   105.2±2.25µs        ? ?/sec
    zip_8192_from_string_views size 3 and string_views size 
10/non_null_scalar_vs_null_scalar/99pct_true        1.00      3.5±0.08µs        
? ?/sec    27.48    96.9±1.69µs        ? ?/sec
    zip_8192_from_string_views size 3 and string_views size 
10/non_null_scalar_vs_null_scalar/all_false         1.00      3.6±0.12µs        
? ?/sec    33.80   123.0±2.52µs        ? ?/sec
    zip_8192_from_string_views size 3 and string_views size 
10/non_null_scalar_vs_null_scalar/all_true          1.00      3.6±0.14µs        
? ?/sec    26.74    95.0±1.74µs        ? ?/sec
    zip_8192_from_string_views size 3 and string_views size 
10/non_nulls_scalars/10pct_true                     1.00     27.9±0.32µs        
? ?/sec    2.65     73.9±1.31µs        ? ?/sec
    zip_8192_from_string_views size 3 and string_views size 
10/non_nulls_scalars/1pct_true                      1.00      6.9±0.09µs        
? ?/sec    9.64     67.0±0.92µs        ? ?/sec
    zip_8192_from_string_views size 3 and string_views size 
10/non_nulls_scalars/50pct_nulls                    1.00     49.0±0.60µs        
? ?/sec    1.73     84.7±2.45µs        ? ?/sec
    zip_8192_from_string_views size 3 and string_views size 
10/non_nulls_scalars/50pct_true                     1.00     62.4±2.22µs        
? ?/sec    1.56     97.1±2.37µs        ? ?/sec
    zip_8192_from_string_views size 3 and string_views size 
10/non_nulls_scalars/90pct_true                     1.00     28.7±0.37µs        
? ?/sec    2.59     74.1±1.17µs        ? ?/sec
    zip_8192_from_string_views size 3 and string_views size 
10/non_nulls_scalars/99pct_true                     1.00      7.8±0.20µs        
? ?/sec    8.69     67.7±1.34µs        ? ?/sec
    zip_8192_from_string_views size 3 and string_views size 
10/non_nulls_scalars/all_false                      1.00      3.6±0.09µs        
? ?/sec    18.78    68.2±2.16µs        ? ?/sec
    zip_8192_from_string_views size 3 and string_views size 
10/non_nulls_scalars/all_true                       1.00      3.6±0.05µs        
? ?/sec    19.10   68.4±11.77µs        ? ?/sec
    zip_8192_from_string_views size 3 and string_views size 
10/null_vs_non_null_scalar/10pct_true               1.00      3.8±0.21µs        
? ?/sec    27.30   104.1±1.34µs        ? ?/sec
    zip_8192_from_string_views size 3 and string_views size 
10/null_vs_non_null_scalar/1pct_true                1.00      3.7±0.04µs        
? ?/sec    25.76    95.8±2.00µs        ? ?/sec
    zip_8192_from_string_views size 3 and string_views size 
10/null_vs_non_null_scalar/50pct_nulls              1.00      4.2±0.96µs        
? ?/sec    28.05   118.0±1.17µs        ? ?/sec
    zip_8192_from_string_views size 3 and string_views size 
10/null_vs_non_null_scalar/50pct_true               1.00      3.9±0.13µs        
? ?/sec    35.42   136.6±3.78µs        ? ?/sec
    zip_8192_from_string_views size 3 and string_views size 
10/null_vs_non_null_scalar/90pct_true               1.00      3.8±0.10µs        
? ?/sec    33.31   125.5±1.89µs        ? ?/sec
    zip_8192_from_string_views size 3 and string_views size 
10/null_vs_non_null_scalar/99pct_true               1.00      3.8±0.04µs        
? ?/sec    32.36   121.6±1.80µs        ? ?/sec
    zip_8192_from_string_views size 3 and string_views size 
10/null_vs_non_null_scalar/all_false                1.00      3.7±0.04µs        
? ?/sec    25.64    95.1±0.98µs        ? ?/sec
    zip_8192_from_string_views size 3 and string_views size 
10/null_vs_non_null_scalar/all_true                 1.00      3.9±0.07µs        
? ?/sec    31.19   121.2±2.69µs        ? ?/sec
    zip_8192_from_string_views size 3 and string_views size 
100/non_null_scalar_vs_null_scalar/10pct_true       1.00      3.5±0.04µs        
? ?/sec    35.69   126.5±2.89µs        ? ?/sec
    zip_8192_from_string_views size 3 and string_views size 
100/non_null_scalar_vs_null_scalar/1pct_true        1.00      3.6±0.05µs        
? ?/sec    33.84   120.9±1.68µs        ? ?/sec
    zip_8192_from_string_views size 3 and string_views size 
100/non_null_scalar_vs_null_scalar/50pct_nulls      1.00      3.7±0.10µs        
? ?/sec    35.72   133.2±3.49µs        ? ?/sec
    zip_8192_from_string_views size 3 and string_views size 
100/non_null_scalar_vs_null_scalar/50pct_true       1.00      3.6±0.12µs        
? ?/sec    38.28   136.0±2.11µs        ? ?/sec
    zip_8192_from_string_views size 3 and string_views size 
100/non_null_scalar_vs_null_scalar/90pct_true       1.00      3.5±0.06µs        
? ?/sec    29.81   104.4±1.56µs        ? ?/sec
    zip_8192_from_string_views size 3 and string_views size 
100/non_null_scalar_vs_null_scalar/99pct_true       1.00      3.5±0.08µs        
? ?/sec    27.69    98.1±2.86µs        ? ?/sec
    zip_8192_from_string_views size 3 and string_views size 
100/non_null_scalar_vs_null_scalar/all_false        1.00      3.6±0.10µs        
? ?/sec    33.58   122.3±1.77µs        ? ?/sec
    zip_8192_from_string_views size 3 and string_views size 
100/non_null_scalar_vs_null_scalar/all_true         1.00      3.5±0.08µs        
? ?/sec    26.79    94.7±1.02µs        ? ?/sec
    zip_8192_from_string_views size 3 and string_views size 
100/non_nulls_scalars/10pct_true                    1.00     29.0±0.51µs        
? ?/sec    2.59     75.1±1.08µs        ? ?/sec
    zip_8192_from_string_views size 3 and string_views size 
100/non_nulls_scalars/1pct_true                     1.00      7.4±0.10µs        
? ?/sec    9.41     69.2±1.76µs        ? ?/sec
    zip_8192_from_string_views size 3 and string_views size 
100/non_nulls_scalars/50pct_nulls                   1.00     50.2±0.54µs        
? ?/sec    1.70     85.2±1.17µs        ? ?/sec
    zip_8192_from_string_views size 3 and string_views size 
100/non_nulls_scalars/50pct_true                    1.00     64.1±1.59µs        
? ?/sec    1.51     96.9±1.22µs        ? ?/sec
    zip_8192_from_string_views size 3 and string_views size 
100/non_nulls_scalars/90pct_true                    1.00     29.8±0.36µs        
? ?/sec    2.55     75.9±2.47µs        ? ?/sec
    zip_8192_from_string_views size 3 and string_views size 
100/non_nulls_scalars/99pct_true                    1.00      8.2±0.17µs        
? ?/sec    8.24     67.8±1.11µs        ? ?/sec
    zip_8192_from_string_views size 3 and string_views size 
100/non_nulls_scalars/all_false                     1.00      3.8±0.07µs        
? ?/sec    17.96    68.8±1.15µs        ? ?/sec
    zip_8192_from_string_views size 3 and string_views size 
100/non_nulls_scalars/all_true                      1.00      3.8±0.12µs        
? ?/sec    17.37    66.1±0.97µs        ? ?/sec
    zip_8192_from_string_views size 3 and string_views size 
100/null_vs_non_null_scalar/10pct_true              1.00      3.8±0.27µs        
? ?/sec    27.57   105.2±3.06µs        ? ?/sec
    zip_8192_from_string_views size 3 and string_views size 
100/null_vs_non_null_scalar/1pct_true               1.00      3.7±0.08µs        
? ?/sec    25.44    94.8±0.94µs        ? ?/sec
    zip_8192_from_string_views size 3 and string_views size 
100/null_vs_non_null_scalar/50pct_nulls             1.00      3.9±0.07µs        
? ?/sec    30.10   118.6±2.83µs        ? ?/sec
    zip_8192_from_string_views size 3 and string_views size 
100/null_vs_non_null_scalar/50pct_true              1.00      3.9±0.30µs        
? ?/sec    35.20   135.6±1.67µs        ? ?/sec
    zip_8192_from_string_views size 3 and string_views size 
100/null_vs_non_null_scalar/90pct_true              1.00      3.9±0.55µs        
? ?/sec    32.58   125.9±2.14µs        ? ?/sec
    zip_8192_from_string_views size 3 and string_views size 
100/null_vs_non_null_scalar/99pct_true              1.00      3.8±0.36µs        
? ?/sec    32.47   122.9±4.15µs        ? ?/sec
    zip_8192_from_string_views size 3 and string_views size 
100/null_vs_non_null_scalar/all_false               1.00      3.8±0.10µs        
? ?/sec    25.24    94.9±0.97µs        ? ?/sec
    zip_8192_from_string_views size 3 and string_views size 
100/null_vs_non_null_scalar/all_true                1.00      3.8±0.09µs        
? ?/sec    31.58   120.3±1.65µs        ? ?/sec
    zip_8192_from_string_views size 3 and string_views size 
3/non_null_scalar_vs_null_scalar/10pct_true         1.00      3.5±0.04µs        
? ?/sec    37.39   131.4±4.74µs        ? ?/sec
    zip_8192_from_string_views size 3 and string_views size 
3/non_null_scalar_vs_null_scalar/1pct_true          1.00      3.5±0.09µs        
? ?/sec    35.84   126.8±3.56µs        ? ?/sec
    zip_8192_from_string_views size 3 and string_views size 
3/non_null_scalar_vs_null_scalar/50pct_nulls        1.00      3.7±0.06µs        
? ?/sec    37.15   137.8±3.16µs        ? ?/sec
    zip_8192_from_string_views size 3 and string_views size 
3/non_null_scalar_vs_null_scalar/50pct_true         1.00      3.5±0.06µs        
? ?/sec    39.19   138.9±4.82µs        ? ?/sec
    zip_8192_from_string_views size 3 and string_views size 
3/non_null_scalar_vs_null_scalar/90pct_true         1.00      3.6±0.04µs        
? ?/sec    30.30   107.9±5.71µs        ? ?/sec
    zip_8192_from_string_views size 3 and string_views size 
3/non_null_scalar_vs_null_scalar/99pct_true         1.00      3.6±0.05µs        
? ?/sec    27.33    97.7±2.10µs        ? ?/sec
    zip_8192_from_string_views size 3 and string_views size 
3/non_null_scalar_vs_null_scalar/all_false          1.00      3.6±0.06µs        
? ?/sec    34.64   124.7±2.24µs        ? ?/sec
    zip_8192_from_string_views size 3 and string_views size 
3/non_null_scalar_vs_null_scalar/all_true           1.00      3.7±0.19µs        
? ?/sec    26.17    96.9±1.75µs        ? ?/sec
    zip_8192_from_string_views size 3 and string_views size 
3/non_nulls_scalars/10pct_true                      1.00     28.7±0.55µs        
? ?/sec    2.66     76.2±1.45µs        ? ?/sec
    zip_8192_from_string_views size 3 and string_views size 
3/non_nulls_scalars/1pct_true                       1.00      7.2±0.12µs        
? ?/sec    9.58     69.0±0.80µs        ? ?/sec
    zip_8192_from_string_views size 3 and string_views size 
3/non_nulls_scalars/50pct_nulls                     1.00     49.5±1.15µs        
? ?/sec    1.75     86.8±2.09µs        ? ?/sec
    zip_8192_from_string_views size 3 and string_views size 
3/non_nulls_scalars/50pct_true                      1.00     62.6±0.88µs        
? ?/sec    1.65   103.4±16.82µs        ? ?/sec
    zip_8192_from_string_views size 3 and string_views size 
3/non_nulls_scalars/90pct_true                      1.00     29.1±0.49µs        
? ?/sec    2.69     78.3±2.51µs        ? ?/sec
    zip_8192_from_string_views size 3 and string_views size 
3/non_nulls_scalars/99pct_true                      1.00      7.8±0.09µs        
? ?/sec    9.01     70.2±1.72µs        ? ?/sec
    zip_8192_from_string_views size 3 and string_views size 
3/non_nulls_scalars/all_false                       1.00      3.7±0.06µs        
? ?/sec    18.77    68.7±0.73µs        ? ?/sec
    zip_8192_from_string_views size 3 and string_views size 
3/non_nulls_scalars/all_true                        1.00      3.6±0.10µs        
? ?/sec    18.73    68.2±1.44µs        ? ?/sec
    zip_8192_from_string_views size 3 and string_views size 
3/null_vs_non_null_scalar/10pct_true                1.00      3.9±0.11µs        
? ?/sec    27.68   106.9±2.29µs        ? ?/sec
    zip_8192_from_string_views size 3 and string_views size 
3/null_vs_non_null_scalar/1pct_true                 1.00      3.9±0.19µs        
? ?/sec    26.12   101.9±8.79µs        ? ?/sec
    zip_8192_from_string_views size 3 and string_views size 
3/null_vs_non_null_scalar/50pct_nulls               1.00      4.1±0.07µs        
? ?/sec    29.91   122.7±3.28µs        ? ?/sec
    zip_8192_from_string_views size 3 and string_views size 
3/null_vs_non_null_scalar/50pct_true                1.00      3.8±0.14µs        
? ?/sec    36.82   141.4±3.69µs        ? ?/sec
    zip_8192_from_string_views size 3 and string_views size 
3/null_vs_non_null_scalar/90pct_true                1.00      3.8±0.10µs        
? ?/sec    34.15   131.4±2.99µs        ? ?/sec
    zip_8192_from_string_views size 3 and string_views size 
3/null_vs_non_null_scalar/99pct_true                1.00      3.8±0.06µs        
? ?/sec    32.89   125.2±3.21µs        ? ?/sec
    zip_8192_from_string_views size 3 and string_views size 
3/null_vs_non_null_scalar/all_false                 1.00      3.8±0.06µs        
? ?/sec    26.05    99.2±2.30µs        ? ?/sec
    zip_8192_from_string_views size 3 and string_views size 
3/null_vs_non_null_scalar/all_true                  1.00      4.0±0.33µs        
? ?/sec    32.00  126.7±25.05µs        ? ?/sec
    ```
    
    </details>
---
 arrow-select/src/zip.rs | 341 +++++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 337 insertions(+), 4 deletions(-)

diff --git a/arrow-select/src/zip.rs b/arrow-select/src/zip.rs
index e45b817dc6..6be034fca2 100644
--- a/arrow-select/src/zip.rs
+++ b/arrow-select/src/zip.rs
@@ -19,14 +19,17 @@
 
 use crate::filter::{SlicesIterator, prep_null_mask_filter};
 use arrow_array::cast::AsArray;
-use arrow_array::types::{BinaryType, ByteArrayType, LargeBinaryType, 
LargeUtf8Type, Utf8Type};
+use arrow_array::types::{
+    BinaryType, BinaryViewType, ByteArrayType, ByteViewType, LargeBinaryType, 
LargeUtf8Type,
+    StringViewType, Utf8Type,
+};
 use arrow_array::*;
 use arrow_buffer::{
     BooleanBuffer, Buffer, MutableBuffer, NullBuffer, OffsetBuffer, 
OffsetBufferBuilder,
-    ScalarBuffer,
+    ScalarBuffer, ToByteSlice,
 };
-use arrow_data::ArrayData;
 use arrow_data::transform::MutableArrayData;
+use arrow_data::{ArrayData, ByteView};
 use arrow_schema::{ArrowError, DataType};
 use std::fmt::{Debug, Formatter};
 use std::hash::Hash;
@@ -284,7 +287,12 @@ impl ScalarZipper {
             DataType::LargeBinary => {
                 Arc::new(BytesScalarImpl::<LargeBinaryType>::new(truthy, 
falsy)) as Arc<dyn ZipImpl>
             },
-            // TODO: Handle Utf8View 
https://github.com/apache/arrow-rs/issues/8724
+            DataType::Utf8View => {
+                Arc::new(ByteViewScalarImpl::<StringViewType>::new(truthy, 
falsy)) as Arc<dyn ZipImpl>
+            },
+            DataType::BinaryView => {
+                Arc::new(ByteViewScalarImpl::<BinaryViewType>::new(truthy, 
falsy)) as Arc<dyn ZipImpl>
+            },
             _ => {
                 Arc::new(FallbackImpl::new(truthy, falsy)) as Arc<dyn ZipImpl>
             },
@@ -657,6 +665,177 @@ fn maybe_prep_null_mask_filter(predicate: &BooleanArray) 
-> BooleanBuffer {
     }
 }
 
+struct ByteViewScalarImpl<T: ByteViewType> {
+    truthy_view: Option<u128>,
+    truthy_buffers: Vec<Buffer>,
+    falsy_view: Option<u128>,
+    falsy_buffers: Vec<Buffer>,
+    phantom: PhantomData<T>,
+}
+
+impl<T: ByteViewType> ByteViewScalarImpl<T> {
+    fn new(truthy: &dyn Array, falsy: &dyn Array) -> Self {
+        let (truthy_view, truthy_buffers) = 
Self::get_value_from_scalar(truthy);
+        let (falsy_view, falsy_buffers) = Self::get_value_from_scalar(falsy);
+        Self {
+            truthy_view,
+            truthy_buffers,
+            falsy_view,
+            falsy_buffers,
+            phantom: PhantomData,
+        }
+    }
+
+    fn get_value_from_scalar(scalar: &dyn Array) -> (Option<u128>, 
Vec<Buffer>) {
+        if scalar.is_null(0) {
+            (None, vec![])
+        } else {
+            let (views, buffers, _) = 
scalar.as_byte_view::<T>().clone().into_parts();
+            (views.first().copied(), buffers)
+        }
+    }
+
+    fn get_views_for_single_non_nullable(
+        predicate: BooleanBuffer,
+        value: u128,
+        buffers: Vec<Buffer>,
+    ) -> (ScalarBuffer<u128>, Vec<Buffer>, Option<NullBuffer>) {
+        let number_of_true = predicate.count_set_bits();
+        let number_of_values = predicate.len();
+
+        // Fast path for all nulls
+        if number_of_true == 0 {
+            // All values are null
+            return (
+                vec![0; number_of_values].into(),
+                vec![],
+                Some(NullBuffer::new_null(number_of_values)),
+            );
+        }
+        let bytes = vec![value; number_of_values];
+
+        // If value is true and we want to handle the TRUTHY case, the null 
buffer will have 1 (meaning not null)
+        // If value is false and we want to handle the FALSY case, the null 
buffer will have 0 (meaning null)
+        let nulls = NullBuffer::new(predicate);
+        (bytes.into(), buffers, Some(nulls))
+    }
+
+    fn get_views_for_non_nullable(
+        predicate: BooleanBuffer,
+        result_len: usize,
+        truthy_view: u128,
+        truthy_buffers: Vec<Buffer>,
+        falsy_view: u128,
+        falsy_buffers: Vec<Buffer>,
+    ) -> (ScalarBuffer<u128>, Vec<Buffer>, Option<NullBuffer>) {
+        let true_count = predicate.count_set_bits();
+        match true_count {
+            0 => {
+                // all values are falsy
+                (vec![falsy_view; result_len].into(), falsy_buffers, None)
+            }
+            n if n == predicate.len() => {
+                // all values are truthy
+                (vec![truthy_view; result_len].into(), truthy_buffers, None)
+            }
+            _ => {
+                let true_count = predicate.count_set_bits();
+                let mut buffers: Vec<Buffer> = truthy_buffers.to_vec();
+
+                // If the falsy buffers are empty, we can use the falsy view 
as it is, because the value
+                // is completely inlined. Otherwise, we have non-inlined 
values in the buffer, and we need
+                // to recalculate the falsy view
+                let view_falsy = if falsy_buffers.is_empty() {
+                    falsy_view
+                } else {
+                    let byte_view_falsy = ByteView::from(falsy_view);
+                    let new_index_falsy_buffers =
+                        buffers.len() as u32 + byte_view_falsy.buffer_index;
+                    buffers.extend(falsy_buffers);
+                    let byte_view_falsy =
+                        
byte_view_falsy.with_buffer_index(new_index_falsy_buffers);
+                    byte_view_falsy.as_u128()
+                };
+
+                let total_number_of_bytes = true_count * 16 + (predicate.len() 
- true_count) * 16;
+                let mut mutable = MutableBuffer::new(total_number_of_bytes);
+                let mut filled = 0;
+
+                SlicesIterator::from(&predicate).for_each(|(start, end)| {
+                    if start > filled {
+                        let false_repeat_count = start - filled;
+                        mutable
+                            .repeat_slice_n_times(view_falsy.to_byte_slice(), 
false_repeat_count);
+                    }
+                    let true_repeat_count = end - start;
+                    mutable.repeat_slice_n_times(truthy_view.to_byte_slice(), 
true_repeat_count);
+                    filled = end;
+                });
+
+                if filled < predicate.len() {
+                    let false_repeat_count = predicate.len() - filled;
+                    mutable.repeat_slice_n_times(view_falsy.to_byte_slice(), 
false_repeat_count);
+                }
+
+                let bytes = Buffer::from(mutable);
+                (bytes.into(), buffers, None)
+            }
+        }
+    }
+}
+
+impl<T: ByteViewType> Debug for ByteViewScalarImpl<T> {
+    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
+        f.debug_struct("ByteViewScalarImpl")
+            .field("truthy", &self.truthy_view)
+            .field("falsy", &self.falsy_view)
+            .finish()
+    }
+}
+
+impl<T: ByteViewType> ZipImpl for ByteViewScalarImpl<T> {
+    fn create_output(&self, predicate: &BooleanArray) -> Result<ArrayRef, 
ArrowError> {
+        let result_len = predicate.len();
+        // Nulls are treated as false
+        let predicate = maybe_prep_null_mask_filter(predicate);
+
+        let (views, buffers, nulls) = match (self.truthy_view, 
self.falsy_view) {
+            (Some(truthy), Some(falsy)) => Self::get_views_for_non_nullable(
+                predicate,
+                result_len,
+                truthy,
+                self.truthy_buffers.clone(),
+                falsy,
+                self.falsy_buffers.clone(),
+            ),
+            (Some(truthy), None) => Self::get_views_for_single_non_nullable(
+                predicate,
+                truthy,
+                self.truthy_buffers.clone(),
+            ),
+            (None, Some(falsy)) => {
+                let predicate = predicate.not();
+                Self::get_views_for_single_non_nullable(
+                    predicate,
+                    falsy,
+                    self.falsy_buffers.clone(),
+                )
+            }
+            (None, None) => {
+                // All values are null
+                (
+                    vec![0; result_len].into(),
+                    vec![],
+                    Some(NullBuffer::new_null(result_len)),
+                )
+            }
+        };
+
+        let result = unsafe { GenericByteViewArray::<T>::new_unchecked(views, 
buffers, nulls) };
+        Ok(Arc::new(result))
+    }
+}
+
 #[cfg(test)]
 mod test {
     use super::*;
@@ -1222,4 +1401,158 @@ mod test {
         ]);
         assert_eq!(actual, &expected);
     }
+
+    #[test]
+    fn test_zip_kernel_scalar_strings_array_view() {
+        let scalar_truthy = Scalar::new(StringViewArray::from(vec!["hello"]));
+        let scalar_falsy = Scalar::new(StringViewArray::from(vec!["world"]));
+
+        let mask = BooleanArray::from(vec![true, false, true, false]);
+        let out = zip(&mask, &scalar_truthy, &scalar_falsy).unwrap();
+        let actual = out.as_string_view();
+        let expected = StringViewArray::from(vec![
+            Some("hello"),
+            Some("world"),
+            Some("hello"),
+            Some("world"),
+        ]);
+        assert_eq!(actual, &expected);
+    }
+
+    #[test]
+    fn test_zip_kernel_scalar_binary_array_view() {
+        let scalar_truthy = 
Scalar::new(BinaryViewArray::from_iter_values(vec![b"hello"]));
+        let scalar_falsy = 
Scalar::new(BinaryViewArray::from_iter_values(vec![b"world"]));
+
+        let mask = BooleanArray::from(vec![true, false]);
+        let out = zip(&mask, &scalar_truthy, &scalar_falsy).unwrap();
+        let actual = out.as_byte_view();
+        let expected = BinaryViewArray::from_iter_values(vec![b"hello", 
b"world"]);
+        assert_eq!(actual, &expected);
+    }
+
+    #[test]
+    fn test_zip_kernel_scalar_strings_array_view_with_nulls() {
+        let scalar_truthy = 
Scalar::new(StringViewArray::from_iter_values(["hello"]));
+        let scalar_falsy = Scalar::new(StringViewArray::new_null(1));
+
+        let mask = BooleanArray::from(vec![true, true, false, false, true]);
+        let out = zip(&mask, &scalar_truthy, &scalar_falsy).unwrap();
+        let actual = out.as_any().downcast_ref::<StringViewArray>().unwrap();
+        let expected = StringViewArray::from_iter(vec![
+            Some("hello"),
+            Some("hello"),
+            None,
+            None,
+            Some("hello"),
+        ]);
+        assert_eq!(actual, &expected);
+    }
+
+    #[test]
+    fn test_zip_kernel_scalar_strings_array_view_all_true_null() {
+        let scalar_truthy = Scalar::new(StringViewArray::new_null(1));
+        let scalar_falsy = Scalar::new(StringViewArray::new_null(1));
+        let mask = BooleanArray::from(vec![true, true]);
+        let out = zip(&mask, &scalar_truthy, &scalar_falsy).unwrap();
+        let actual = out.as_any().downcast_ref::<StringViewArray>().unwrap();
+        let expected = StringViewArray::from_iter(vec![None::<String>, None]);
+        assert_eq!(actual, &expected);
+    }
+
+    #[test]
+    fn test_zip_kernel_scalar_strings_array_view_all_false_null() {
+        let scalar_truthy = Scalar::new(StringViewArray::new_null(1));
+        let scalar_falsy = Scalar::new(StringViewArray::new_null(1));
+        let mask = BooleanArray::from(vec![false, false]);
+        let out = zip(&mask, &scalar_truthy, &scalar_falsy).unwrap();
+        let actual = out.as_any().downcast_ref::<StringViewArray>().unwrap();
+        let expected = StringViewArray::from_iter(vec![None::<String>, None]);
+        assert_eq!(actual, &expected);
+    }
+
+    #[test]
+    fn test_zip_kernel_scalar_string_array_view_all_true() {
+        let scalar_truthy = Scalar::new(StringViewArray::from(vec!["hello"]));
+        let scalar_falsy = Scalar::new(StringViewArray::from(vec!["world"]));
+
+        let mask = BooleanArray::from(vec![true, true]);
+        let out = zip(&mask, &scalar_truthy, &scalar_falsy).unwrap();
+        let actual = out.as_string_view();
+        let expected = StringViewArray::from(vec![Some("hello"), 
Some("hello")]);
+        assert_eq!(actual, &expected);
+    }
+
+    #[test]
+    fn test_zip_kernel_scalar_string_array_view_all_false() {
+        let scalar_truthy = Scalar::new(StringViewArray::from(vec!["hello"]));
+        let scalar_falsy = Scalar::new(StringViewArray::from(vec!["world"]));
+
+        let mask = BooleanArray::from(vec![false, false]);
+        let out = zip(&mask, &scalar_truthy, &scalar_falsy).unwrap();
+        let actual = out.as_string_view();
+        let expected = StringViewArray::from(vec![Some("world"), 
Some("world")]);
+        assert_eq!(actual, &expected);
+    }
+
+    #[test]
+    fn test_zip_kernel_scalar_strings_large_strings() {
+        let scalar_truthy = Scalar::new(StringViewArray::from(vec!["longer 
than 12 bytes"]));
+        let scalar_falsy = Scalar::new(StringViewArray::from(vec!["another 
longer than 12 bytes"]));
+
+        let mask = BooleanArray::from(vec![true, false]);
+        let out = zip(&mask, &scalar_truthy, &scalar_falsy).unwrap();
+        let actual = out.as_string_view();
+        let expected = StringViewArray::from(vec![
+            Some("longer than 12 bytes"),
+            Some("another longer than 12 bytes"),
+        ]);
+        assert_eq!(actual, &expected);
+    }
+
+    #[test]
+    fn test_zip_kernel_scalar_strings_array_view_large_short_strings() {
+        let scalar_truthy = Scalar::new(StringViewArray::from(vec!["hello"]));
+        let scalar_falsy = Scalar::new(StringViewArray::from(vec!["longer than 
12 bytes"]));
+
+        let mask = BooleanArray::from(vec![true, false, true, false]);
+        let out = zip(&mask, &scalar_truthy, &scalar_falsy).unwrap();
+        let actual = out.as_string_view();
+        let expected = StringViewArray::from(vec![
+            Some("hello"),
+            Some("longer than 12 bytes"),
+            Some("hello"),
+            Some("longer than 12 bytes"),
+        ]);
+        assert_eq!(actual, &expected);
+    }
+    #[test]
+    fn test_zip_kernel_scalar_strings_array_view_large_all_true() {
+        let scalar_truthy = Scalar::new(StringViewArray::from(vec!["longer 
than 12 bytes"]));
+        let scalar_falsy = Scalar::new(StringViewArray::from(vec!["another 
longer than 12 bytes"]));
+
+        let mask = BooleanArray::from(vec![true, true]);
+        let out = zip(&mask, &scalar_truthy, &scalar_falsy).unwrap();
+        let actual = out.as_string_view();
+        let expected = StringViewArray::from(vec![
+            Some("longer than 12 bytes"),
+            Some("longer than 12 bytes"),
+        ]);
+        assert_eq!(actual, &expected);
+    }
+
+    #[test]
+    fn test_zip_kernel_scalar_strings_array_view_large_all_false() {
+        let scalar_truthy = Scalar::new(StringViewArray::from(vec!["longer 
than 12 bytes"]));
+        let scalar_falsy = Scalar::new(StringViewArray::from(vec!["another 
longer than 12 bytes"]));
+
+        let mask = BooleanArray::from(vec![false, false]);
+        let out = zip(&mask, &scalar_truthy, &scalar_falsy).unwrap();
+        let actual = out.as_string_view();
+        let expected = StringViewArray::from(vec![
+            Some("another longer than 12 bytes"),
+            Some("another longer than 12 bytes"),
+        ]);
+        assert_eq!(actual, &expected);
+    }
 }


Reply via email to