This is an automated email from the ASF dual-hosted git repository.
tallison pushed a change to branch TIKA-4691-improve-charset-detection
in repository https://gitbox.apache.org/repos/asf/tika.git
from 9b4894f757 Update charset detector.
add e035d0110e TIKA-4688 -- fix truncated ooxml regression (#2691)
add 6f9dc085a4 TIKA-4327: update javacpp, aws, fastutil, reactor,
swagger-annotations
add c6fe6360b8 TIKA-4327: update cxf
add b9a0d9889b TIKA-4689 - streamline embedded file naming (#2692)
add a026574a25 TIKA-4327: update mockito
add 457629bde2 TIKA-4327: update aws
add 771688fa39 TIKA-4690-add-generative-models (#2693)
add 3e6f4d7ff0 Merge main into TIKA-4691-improve-charset-detection
No new revisions were added by this update.
Summary of changes:
.../ROOT/pages/advanced/charset-detection-eval.txt | 1 -
.../tika/extractor/EmbeddedDocumentUtil.java | 83 ++++++++++++++++++++++
.../java/org/apache/tika/io/FilenameUtils.java | 12 +---
.../apache/tika/metadata/TikaCoreProperties.java | 8 +++
.../org/apache/tika/parser/AutoDetectParser.java | 6 +-
.../apache/tika/parser/RecursiveParserWrapper.java | 7 +-
tika-parent/pom.xml | 18 ++---
.../tika/parser/sqlite3/SQLite3ParserTest.java | 6 +-
.../apache/tika/parser/jdbc/JDBCTableReader.java | 1 +
.../microsoft/MSEmbeddedStreamTranslator.java | 1 +
.../parser/microsoft/AbstractPOIFSExtractor.java | 3 +
.../tika/parser/microsoft/HSLFExtractor.java | 40 +++++++++--
.../apache/tika/parser/microsoft/TNEFParser.java | 22 ++++--
.../tika/parser/microsoft/WordExtractor.java | 9 ++-
.../microsoft/ooxml/OOXMLExtractorFactory.java | 11 ++-
.../parser/microsoft/rtf/RTFEmbObjHandler.java | 8 ++-
.../parser/microsoft/rtf/RTFObjDataParser.java | 7 +-
.../microsoft/POIContainerExtractionTest.java | 8 +--
.../parser/microsoft/PowerPointParserTest.java | 9 +--
.../tika/parser/microsoft/WordParserTest.java | 6 +-
.../parser/microsoft/ooxml/TruncatedOOXMLTest.java | 16 +++++
.../tika/parser/pdf/image/ImageGraphicsEngine.java | 4 +-
.../org/apache/tika/parser/pdf/PDFParserTest.java | 6 +-
.../microsoft/POIContainerExtractionTest.java | 64 ++++++++---------
.../tika/parser/microsoft/rtf/RTFParserTest.java | 18 ++---
.../org/apache/tika/parser/pdf/PDFParserTest.java | 4 +-
.../core/extractor/StandardUnpackSelector.java | 13 +---
.../tika/pipes/core/server/ParseHandler.java | 8 ++-
28 files changed, 286 insertions(+), 113 deletions(-)