ALPSMAC opened a new issue, #49699:
URL: https://github.com/apache/arrow/issues/49699

   ### Describe the bug, including details regarding any error messages, 
version, and platform.
   
   I'm interacting with a Flight gRPC server written in Rust and attempting to 
call `DoGet`.  The service is handing back a Ticket size of 32 bytes 
(ostensibly 2 UUIDs concatenated together - the server has good reason for that 
ticket size and it cannot be easily changed).  
   
   I can successfully read the Flight RecordBatch stream in Rust and in Golang, 
but cannot read back the RecordBatch stream in Python (and by extension 
probably C++).
   
   Below is a working Golang example (presuming the ticket has been stored off 
in a `ticket.bin` file of 32 bytes). 
   
   (PLEASE NOTE - much of this code has been generated with LLM assistance, I 
have tried to check it for accuracy, but if there is a gross misuse of the 
Arrow Flight library somewhere in here that's an 
error-between-user-and-keyboard, my up-front apologies): 
   
   ```golang
   package main
   
   import (
       "context"
       "fmt"
       "os"
       "github.com/apache/arrow/go/arrow/flight"
       "google.golang.org/grpc"
       "google.golang.org/grpc/credentials/insecure"
   )
   
   func main() {
       // 1. Define the Flight server address
       serverAddr := "localhost:9090" // Change if your Flight server is 
elsewhere
   
       // 2. Read the ticket from the file created by the Python script
       ticketBytes, err := os.ReadFile("ticket.bin")
       if err != nil {
           fmt.Printf("Failed to read ticket file 'ticket.bin': %v\n", err)
           os.Exit(1)
       }
       fmt.Printf("Successfully read %d-byte ticket from file.\n", 
len(ticketBytes))
   
       // 3. Connect to the Arrow Flight server
       // We use insecure credentials for this example.
       client, err := flight.NewClientWithMiddleware(serverAddr, nil, nil, 
grpc.WithTransportCredentials(insecure.NewCredentials()))
       if err != nil {
           fmt.Printf("Failed to create Flight client: %v\n", err)
           os.Exit(1)
       }
       defer client.Close()
   
       fmt.Printf("Connected to Flight server at %s\n", serverAddr)
   
       // 4. Create the Flight Ticket object
       ticket := &flight.Ticket{Ticket: ticketBytes}
   
       // 5. Perform the DoGet call
       ctx := context.Background()
       stream, err := client.DoGet(ctx, ticket)
       if err != nil {
           fmt.Printf("DoGet call failed: %v\n", err)
           os.Exit(1)
       }
   
       // 6. Read the entire RecordBatch stream
       // The reader will handle consuming all messages and assembling the 
table.
       reader, err := flight.NewRecordReader(stream)
       if err != nil {
           fmt.Printf("Failed to create record reader from stream: %v\n", err)
           os.Exit(1)
       }
       defer reader.Release()
   
       rowCount := int64(0)
       // Iterate through the stream and count the rows
       for reader.Next() {
           rec := reader.Record()
           rowCount += rec.NumRows()
       }
   
       if reader.Err() != nil {
           fmt.Printf("Error reading stream: %v\n", reader.Err())
           os.Exit(1)
       }
   
       fmt.Println("\n--- Go Client Success! ---")
       fmt.Printf("Successfully read stream and found %d total rows.\n", 
rowCount)
       // You could print the schema with `fmt.Println(reader.Schema())`
   }
   ```
   
   I can provide a Rust client sample too if helpful, but it's a little more 
work to cobble that together.
   
   Here's the problematic Python code: 
   
   ```python
                      flight_service_stub = 
Flight_pb2_grpc.FlightServiceStub(channel)
   
                      # then later on after obtaining the ticket (or reading it 
from `ticket.bin`): 
   
                       ## 1. Make the raw gRPC call (same as before)
                       ## This returns a raw, iterable gRPC stream object.
                       raw_stream = flight_service_stub.DoGet(flight_ticket_pb)
   
                       ## 2. Define a simple generator function that yields 
each message from the raw stream.
                       ## This acts as a bridge between the gRPC stream and the 
PyArrow reader.
                       def stream_generator(stream):
                           for item in stream:
                               yield item
                      
                       # --- Explicitly define the schema ---
                       # NOTE: Field names have been omitted for data 
sensitivity reasons. 
                       explicit_schema = pyarrow.schema([
                           pyarrow.field("<omitted>", pyarrow.uint64(), 
nullable=False),
                           pyarrow.field("<omitted>", pyarrow.uint64(), 
nullable=False),
                           pyarrow.field("<omitted>", pyarrow.uint64(), 
nullable=False),
                           pyarrow.field("<omitted>", pyarrow.uint64(), 
nullable=False),
                           pyarrow.field("<omitted>", pyarrow.float64(), 
nullable=False)
                       ])
    
                       ## 3. Wrap the generator in a GeneratorStream.
                       ## This is a specific PyArrow class designed for this 
purpose.
                       generator_stream = pyarrow.flight.GeneratorStream(
                           # I've tried with and without defining a schema 
here, either way results in the same SEGFAULT 
                           #schema=None, 
                           schema=explicit_schema,
                           generator=stream_generator(raw_stream)
                       )
   
                       ## 4. Use a FlightStreamReader on the *wrapped* stream.
                       reader = 
pyarrow.flight.FlightStreamReader(generator_stream)
                      
                       ## 5. Read the data
                       ## 
***************************************************************************************************
                       # SEGFAULT OCCURS HERE 
                       ## 
***************************************************************************************************
                       data_table = reader.read_all()
   ```
   
   If I attach gdb, here's the backtrace I get: 
   
   ```
   0x00007fffdd12ac87 in 
arrow::flight::FlightStreamReader::ToTable(arrow::StopToken const&) () from 
python-client/.venv/lib64/python3.12/site-packages/pyarrow/libarrow_flight.so.2300
   Missing separate debuginfos, use: yum debuginfo-install 
glibc-2.28-251.el8_10.31.x86_64 libgcc-8.5.0-28.el8_10.x86_64 
libstdc++-8.5.0-28.el8_10.x86_64 libuuid-2.32.1-48.el8_10.x86_64 
mpdecimal-2.5.1-3.el8.x86_64 openssl-libs-1.1.1k-15.el8_6.x86_64 
python3.12-libs-3.12.12-4.el8_10.x86_64 zlib-1.2.11-25.el8.x86_64
   (gdb) bt
   #0  0x00007fffdd12ac87 in 
arrow::flight::FlightStreamReader::ToTable(arrow::StopToken const&) () from 
python-client/.venv/lib64/python3.12/site-packages/pyarrow/libarrow_flight.so.2300
   #1  0x00007fffde27b13f in 
__pyx_pw_7pyarrow_7_flight_18FlightStreamReader_3read_all(_object*, _object* 
const*, long, _object*) [clone .lto_priv.0] () from 
python-client/.venv/lib64/python3.12/site-packages/pyarrow/_flight.cpython-312-x86_64-linux-gnu.so
   #2  0x00007ffff77669bf in PyObject_Vectorcall () from 
/lib64/libpython3.12.so.1.0
   #3  0x00007ffff76fb0ba in _PyEval_EvalFrameDefault () from 
/lib64/libpython3.12.so.1.0
   #4  0x00007ffff7771e7c in PyEval_EvalCode () from /lib64/libpython3.12.so.1.0
   #5  0x00007ffff7845561 in run_eval_code_obj () from 
/lib64/libpython3.12.so.1.0
   #6  0x00007ffff7883b5b in run_mod () from /lib64/libpython3.12.so.1.0
   #7  0x00007ffff788f030 in pyrun_file () from /lib64/libpython3.12.so.1.0
   #8  0x00007ffff788f212 in _PyRun_SimpleFileObject () from 
/lib64/libpython3.12.so.1.0
   #9  0x00007ffff788f2e4 in _PyRun_AnyFileObject () from 
/lib64/libpython3.12.so.1.0
   #10 0x00007ffff788f6b3 in Py_RunMain () from /lib64/libpython3.12.so.1.0
   #11 0x00007ffff7890dc9 in Py_BytesMain () from /lib64/libpython3.12.so.1.0
   #12 0x00007ffff679b865 in __libc_start_main () from /lib64/libc.so.6
   ```
   
   (again, some sensitive file path prefixes removed from stack trace). 
   
   My best guess and working theory is that there's an issue with the 32 byte 
ticket size the Rust service is returning, but that's based on interactive LLM 
hypothesizing and not direct evidence in the C++ implementation.
   
   Rust Flight version (server-side): 58.1.0
   Golang flight version: github.com/apache/arrow/go/v16/arrow/flight
   Python flight version: 23.0.1
   
   Python version: 3.12
   golang version: 1.22.3
   Rust version: 1.93.1
   
   ### Component(s)
   
   C++, FlightRPC, Python


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to