This is an automated email from the ASF dual-hosted git repository. tallison pushed a commit to branch TIKA-4690-add-generative-models in repository https://gitbox.apache.org/repos/asf/tika.git
commit fd7f62103f4b0e92bda2c778ecdb8fbcf45574b4 Merge: 65cceb58cd 457629bde2 Author: tballison <[email protected]> AuthorDate: Fri Mar 13 22:12:30 2026 -0400 Merge remote-tracking branch 'origin/main' into add-generative-models .../tika/extractor/EmbeddedDocumentUtil.java | 83 ++++++++++++++++++++++ .../java/org/apache/tika/io/FilenameUtils.java | 12 +--- .../apache/tika/metadata/TikaCoreProperties.java | 8 +++ .../org/apache/tika/parser/AutoDetectParser.java | 6 +- .../apache/tika/parser/RecursiveParserWrapper.java | 7 +- tika-parent/pom.xml | 18 ++--- .../tika/parser/sqlite3/SQLite3ParserTest.java | 6 +- .../apache/tika/parser/jdbc/JDBCTableReader.java | 1 + .../microsoft/MSEmbeddedStreamTranslator.java | 1 + .../parser/microsoft/AbstractPOIFSExtractor.java | 3 + .../tika/parser/microsoft/HSLFExtractor.java | 40 +++++++++-- .../apache/tika/parser/microsoft/TNEFParser.java | 22 ++++-- .../tika/parser/microsoft/WordExtractor.java | 9 ++- .../microsoft/ooxml/OOXMLExtractorFactory.java | 11 ++- .../parser/microsoft/rtf/RTFEmbObjHandler.java | 8 ++- .../parser/microsoft/rtf/RTFObjDataParser.java | 7 +- .../microsoft/POIContainerExtractionTest.java | 8 +-- .../parser/microsoft/PowerPointParserTest.java | 9 +-- .../tika/parser/microsoft/WordParserTest.java | 6 +- .../parser/microsoft/ooxml/TruncatedOOXMLTest.java | 16 +++++ .../tika/parser/pdf/image/ImageGraphicsEngine.java | 4 +- .../org/apache/tika/parser/pdf/PDFParserTest.java | 6 +- .../microsoft/POIContainerExtractionTest.java | 64 ++++++++--------- .../tika/parser/microsoft/rtf/RTFParserTest.java | 18 ++--- .../org/apache/tika/parser/pdf/PDFParserTest.java | 4 +- .../core/extractor/StandardUnpackSelector.java | 13 +--- .../tika/pipes/core/server/ParseHandler.java | 8 ++- 27 files changed, 286 insertions(+), 112 deletions(-)
