This is an automated email from the ASF dual-hosted git repository.
tallison pushed a change to branch TIKA-4712
in repository https://gitbox.apache.org/repos/asf/tika.git
from a18b129869 TIKA-4712 -- update documentation
add fa6895c61f TIKA-4327: update aws, azure, error-prone-annotations
add 50cb0fa1dc Merge branch 'main' of
https://gitbox.apache.org/repos/asf/tika
add 9b211c5e11 TIKA-4700 -- this adds a few more components to the
activator and service loader for osgi. (#2751)
add dc2ee57e94 TIKA-4699 - fix bundle to handle
tika-standard-parsers-package (#2752)
add 8501894c11 TIKA-4707 -- rm dom parsers for docx/pptx (#2747)
add d5c6e07194 Merge remote-tracking branch 'origin/main' into TIKA-4712
No new revisions were added by this update.
Summary of changes:
tika-bundles/tika-bundle-standard/pom.xml | 4 +
.../java/org/apache/tika/config/ServiceLoader.java | 10 +-
.../java/org/apache/tika/config/TikaActivator.java | 31 +-
tika-parent/pom.xml | 6 +-
.../ooxml/AbstractOOXMLDocxPackageTest.java | 51 --
.../ooxml/AbstractOOXMLPptxPackageTest.java | 41 --
.../microsoft/ooxml/OOXMLDocxDOMPackageTest.java | 94 ---
.../microsoft/ooxml/OOXMLDocxSAXPackageTest.java | 32 +-
.../microsoft/ooxml/OOXMLPptxDOMPackageTest.java | 27 -
.../microsoft/ooxml/OOXMLPptxSAXPackageTest.java | 26 +-
.../tika-parser-microsoft-module/pom.xml | 6 +
.../tika/parser/microsoft/OfficeParserConfig.java | 37 --
.../microsoft/ooxml/AbstractOOXMLExtractor.java | 36 +-
.../parser/microsoft/ooxml/MetadataExtractor.java | 279 +-------
.../parser/microsoft/ooxml/OOXMLExtractor.java | 15 -
.../microsoft/ooxml/OOXMLExtractorFactory.java | 239 +++----
.../ooxml/POIXMLTextExtractorDecorator.java | 51 --
.../microsoft/ooxml/SAXBasedMetadataExtractor.java | 1 -
.../ooxml/SXSLFPowerPointExtractorDecorator.java | 11 +-
.../ooxml/SXWPFWordExtractorDecorator.java | 10 +-
.../microsoft/ooxml/TikaXSSFBCommentsTable.java | 137 ++++
.../ooxml/TikaXSSFBSharedStringsTable.java | 159 +++++
.../microsoft/ooxml/VSDXExtractorDecorator.java | 174 +++++
.../ooxml/XSLFPowerPointExtractorDecorator.java | 412 ------------
.../ooxml/XSSFBExcelExtractorDecorator.java | 98 +--
.../ooxml/XSSFExcelExtractorDecorator.java | 24 +-
.../ooxml/XWPFWordExtractorDecorator.java | 734 ---------------------
.../microsoft/ooxml/xps/XPSExtractorDecorator.java | 16 +-
.../microsoft/ooxml/xps/XPSTextExtractor.java | 89 ---
.../xslf/XSLFEventBasedPowerPointExtractor.java | 212 +-----
.../ooxml/xwpf/XWPFEventBasedWordExtractor.java | 386 +----------
.../microsoft/ooxml/AbstractOOXMLDocxTest.java | 601 -----------------
.../microsoft/ooxml/AbstractOOXMLPptxTest.java | 363 ----------
.../parser/microsoft/ooxml/OOXMLDocxDOMTest.java | 133 ----
.../parser/microsoft/ooxml/OOXMLDocxSAXTest.java | 505 ++++++++++++--
.../parser/microsoft/ooxml/OOXMLPptxDOMTest.java | 204 ------
.../parser/microsoft/ooxml/OOXMLPptxSAXTest.java | 341 ++++++++--
.../parser/microsoft/ooxml/VSDXParserTest.java | 62 ++
.../resources/configs/tika-config-sax-docx.json | 7 +-
.../resources/configs/tika-config-sax-macros.json | 4 +-
.../resources/test-documents/testVISIO_60489.vsdx | Bin 0 -> 44426 bytes
.../resources/test-documents/testVISIO_text.vsdx | Bin 0 -> 22343 bytes
42 files changed, 1547 insertions(+), 4121 deletions(-)
delete mode 100644
tika-parsers/tika-parsers-standard/tika-parsers-standard-integration-tests/src/test/java/org/apache/tika/parser/microsoft/ooxml/AbstractOOXMLDocxPackageTest.java
delete mode 100644
tika-parsers/tika-parsers-standard/tika-parsers-standard-integration-tests/src/test/java/org/apache/tika/parser/microsoft/ooxml/AbstractOOXMLPptxPackageTest.java
delete mode 100644
tika-parsers/tika-parsers-standard/tika-parsers-standard-integration-tests/src/test/java/org/apache/tika/parser/microsoft/ooxml/OOXMLDocxDOMPackageTest.java
delete mode 100644
tika-parsers/tika-parsers-standard/tika-parsers-standard-integration-tests/src/test/java/org/apache/tika/parser/microsoft/ooxml/OOXMLPptxDOMPackageTest.java
delete mode 100644
tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/parser/microsoft/ooxml/POIXMLTextExtractorDecorator.java
create mode 100644
tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/parser/microsoft/ooxml/TikaXSSFBCommentsTable.java
create mode 100644
tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/parser/microsoft/ooxml/TikaXSSFBSharedStringsTable.java
create mode 100644
tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/parser/microsoft/ooxml/VSDXExtractorDecorator.java
delete mode 100644
tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/parser/microsoft/ooxml/XSLFPowerPointExtractorDecorator.java
delete mode 100644
tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/parser/microsoft/ooxml/XWPFWordExtractorDecorator.java
delete mode 100644
tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/parser/microsoft/ooxml/xps/XPSTextExtractor.java
delete mode 100644
tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/test/java/org/apache/tika/parser/microsoft/ooxml/AbstractOOXMLDocxTest.java
delete mode 100644
tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/test/java/org/apache/tika/parser/microsoft/ooxml/AbstractOOXMLPptxTest.java
delete mode 100644
tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/test/java/org/apache/tika/parser/microsoft/ooxml/OOXMLDocxDOMTest.java
delete mode 100644
tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/test/java/org/apache/tika/parser/microsoft/ooxml/OOXMLPptxDOMTest.java
create mode 100644
tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/test/java/org/apache/tika/parser/microsoft/ooxml/VSDXParserTest.java
create mode 100644
tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/test/resources/test-documents/testVISIO_60489.vsdx
create mode 100644
tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/test/resources/test-documents/testVISIO_text.vsdx