raulcd opened a new issue, #47778:
URL: https://github.com/apache/arrow/issues/47778

   ### Describe the bug, including details regarding any error messages, 
version, and platform.
   
   Several nightly tests and verification jobs have started failing with:
   ```
   =================================== FAILURES 
===================================
   _____________________________ test_timezone_absent 
_____________________________
   
   datadir = PosixPath('/arrow/python/pyarrow/tests/data/orc')
   tmpdir = local('/tmp/pytest-of-root/pytest-0/test_timezone_absent0')
   
       def test_timezone_absent(datadir, tmpdir):
           # Example file relies on the timezone "US/Pacific". It should 
gracefully
           # fail, not crash, if the timezone database is present but the 
timezone
           # is not found (GH-40633).
           source_tzdir = Path('/usr/share/zoneinfo')
           if not source_tzdir.exists():
               pytest.skip(f"Test needs timezone database in {source_tzdir}")
           tzdir = Path(tmpdir / 'zoneinfo')
           try:
               shutil.copytree(source_tzdir, tzdir, symlinks=True)
           except OSError as e:
               pytest.skip(f"Failed to copy timezone database: {e}")
           (tzdir / 'US' / 'Pacific').unlink(missing_ok=True)
       
           path = datadir / 'TestOrcFile.testDate1900.orc'
           code = f"""if 1:
               import os
               os.environ['TZDIR'] = {str(tzdir)!r}
       
               from pyarrow import orc
               orc_file = orc.ORCFile({str(path)!r})
               try:
                   orc_file.read()
               except Exception as e:
                   assert "zoneinfo/US/Pacific" in str(e), e
               else:
                   assert False, "Should have raised exception"
           """
   >       subprocess.run([sys.executable, "-c", code], check=True)
   
   pyarrow/tests/test_orc.py:196: 
   _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
_ _ 
   
   input = None, capture_output = False, timeout = None, check = True
   popenargs = (['/tmp/arrow-HEAD.oREbi/venv-source/bin/python3', '-c', 'if 
1:\n        import os\n        os.environ[\'TZDIR\'] = \'...t 
"zoneinfo/US/Pacific" in str(e), e\n        else:\n            assert False, 
"Should have raised exception"\n    '],)
   kwargs = {}
   process = <Popen: returncode: 1 args: 
['/tmp/arrow-HEAD.oREbi/venv-source/bin/python3'...>
   stdout = None, stderr = None, retcode = 1
   
       def run(*popenargs,
               input=None, capture_output=False, timeout=None, check=False, 
**kwargs):
           """Run command with arguments and return a CompletedProcess instance.
       
           The returned instance will have attributes args, returncode, stdout 
and
           stderr. By default, stdout and stderr are not captured, and those 
attributes
           will be None. Pass stdout=PIPE and/or stderr=PIPE in order to 
capture them,
           or pass capture_output=True to capture both.
       
           If check is True and the exit code was non-zero, it raises a
           CalledProcessError. The CalledProcessError object will have the 
return code
           in the returncode attribute, and output & stderr attributes if those 
streams
           were captured.
       
           If timeout is given, and the process takes too long, a TimeoutExpired
           exception will be raised.
       
           There is an optional argument "input", allowing you to
           pass bytes or a string to the subprocess's stdin.  If you use this 
argument
           you may not also use the Popen constructor's "stdin" argument, as
           it will be used internally.
       
           By default, all communication is in bytes, and therefore any "input" 
should
           be bytes, and the stdout and stderr will be bytes. If in text mode, 
any
           "input" should be a string, and stdout and stderr will be strings 
decoded
           according to locale encoding, or by "encoding" if set. Text mode is
           triggered by setting any of text, encoding, errors or 
universal_newlines.
       
           The other arguments are the same as for the Popen constructor.
           """
           if input is not None:
               if kwargs.get('stdin') is not None:
                   raise ValueError('stdin and input arguments may not both be 
used.')
               kwargs['stdin'] = PIPE
       
           if capture_output:
               if kwargs.get('stdout') is not None or kwargs.get('stderr') is 
not None:
                   raise ValueError('stdout and stderr arguments may not be 
used '
                                    'with capture_output.')
               kwargs['stdout'] = PIPE
               kwargs['stderr'] = PIPE
       
           with Popen(*popenargs, **kwargs) as process:
               try:
                   stdout, stderr = process.communicate(input, timeout=timeout)
               except TimeoutExpired as exc:
                   process.kill()
                   if _mswindows:
                       # Windows accumulates the output in a single blocking
                       # read() call run on child threads, with the timeout
                       # being done in a join() on those threads.  communicate()
                       # _after_ kill() is required to collect that and add it
                       # to the exception.
                       exc.stdout, exc.stderr = process.communicate()
                   else:
                       # POSIX _communicate already populated the output so
                       # far into the TimeoutExpired exception.
                       process.wait()
                   raise
               except:  # Including KeyboardInterrupt, communicate handled that.
                   process.kill()
                   # We don't call process.wait() as .__exit__ does that for us.
                   raise
               retcode = process.poll()
               if check and retcode:
   >               raise CalledProcessError(retcode, process.args,
                                            output=stdout, stderr=stderr)
   E               subprocess.CalledProcessError: Command 
'['/tmp/arrow-HEAD.oREbi/venv-source/bin/python3', '-c', 'if 1:\n        import 
os\n        os.environ[\'TZDIR\'] = 
\'/tmp/pytest-of-root/pytest-0/test_timezone_absent0/zoneinfo\'\n\n        from 
pyarrow import orc\n        orc_file = 
orc.ORCFile(\'/arrow/python/pyarrow/tests/data/orc/TestOrcFile.testDate1900.orc\')\n
        try:\n            orc_file.read()\n        except Exception as e:\n     
       assert "zoneinfo/US/Pacific" in str(e), e\n        else:\n            
assert False, "Should have raised exception"\n    ']' returned non-zero exit 
status 1.
   
   /usr/lib/python3.10/subprocess.py:526: CalledProcessError
   ----------------------------- Captured stderr call 
-----------------------------
   Traceback (most recent call last):
     File "<string>", line 12, in <module>
   AssertionError: Should have raised exception
   ```
   - 
[verify-rc-source-python-linux-almalinux-8-amd64](https://github.com/ursacomputing/crossbow/actions/runs/18377086908/job/52353665688)
   - 
[verify-rc-source-python-linux-conda-latest-amd64](https://github.com/ursacomputing/crossbow/actions/runs/18377087568/job/52353668008)
   - 
[verify-rc-source-python-linux-ubuntu-22.04-amd64](https://github.com/ursacomputing/crossbow/actions/runs/18377087861/job/52353669078)
   - 
[verify-rc-source-python-linux-ubuntu-24.04-amd64](https://github.com/ursacomputing/crossbow/actions/runs/18377087016/job/52353665960)
   - 
[test-debian-12-python-3-amd64](https://github.com/ursacomputing/crossbow/actions/runs/18392907979/job/52406673630)
   - 
[test-debian-12-python-3-i386](https://github.com/ursacomputing/crossbow/actions/runs/18392907412/job/52406671621)
   - 
[test-ubuntu-22.04-python-3](https://github.com/ursacomputing/crossbow/actions/runs/18392906905/job/52406670146)
   - 
[test-ubuntu-22.04-python-313-freethreading](https://github.com/ursacomputing/crossbow/actions/runs/18392907589/job/52406672144)
   - 
[test-ubuntu-24.04-python-3](https://github.com/ursacomputing/crossbow/actions/runs/18392907963/job/52406673398)
   
   This seems related to the ORC upgrade:
   - https://github.com/apache/arrow/issues/47747
   
   I am trying to reproduce locally.
   
   ### Component(s)
   
   Continuous Integration, Python


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to