This is an automated email from the ASF dual-hosted git repository.
tallison pushed a change to branch TIKA-4707
in repository https://gitbox.apache.org/repos/asf/tika.git
from 85c74b0321 TIKA-4707 -- rm dom parsers for docx/pptx
add e4d7eb372f TIKA-4707 -- rm poi-ooxml-lite
No new revisions were added by this update.
Summary of changes:
.../tika-parser-microsoft-module/pom.xml | 6 +
.../microsoft/ooxml/AbstractOOXMLExtractor.java | 36 +-
.../parser/microsoft/ooxml/MetadataExtractor.java | 279 +--------------
.../parser/microsoft/ooxml/OOXMLExtractor.java | 15 -
.../microsoft/ooxml/OOXMLExtractorFactory.java | 219 ++++++------
.../ooxml/POIXMLTextExtractorDecorator.java | 51 ---
.../microsoft/ooxml/SAXBasedMetadataExtractor.java | 1 -
.../ooxml/SXSLFPowerPointExtractorDecorator.java | 11 +-
.../ooxml/SXWPFWordExtractorDecorator.java | 10 +-
.../microsoft/ooxml/TikaXSSFBCommentsTable.java | 137 ++++++++
.../ooxml/TikaXSSFBSharedStringsTable.java | 159 +++++++++
.../microsoft/ooxml/VSDXExtractorDecorator.java | 174 ++++++++++
.../ooxml/XSSFBExcelExtractorDecorator.java | 98 +++---
.../ooxml/XSSFExcelExtractorDecorator.java | 24 +-
.../microsoft/ooxml/xps/XPSExtractorDecorator.java | 16 +-
.../microsoft/ooxml/xps/XPSTextExtractor.java | 89 -----
.../xslf/XSLFEventBasedPowerPointExtractor.java | 212 +----------
.../ooxml/xwpf/XWPFEventBasedWordExtractor.java | 386 +--------------------
.../parser/microsoft/ooxml/VSDXParserTest.java | 62 ++++
.../resources/test-documents/testVISIO_60489.vsdx | Bin 0 -> 44426 bytes
.../resources/test-documents/testVISIO_text.vsdx | Bin 0 -> 22343 bytes
21 files changed, 732 insertions(+), 1253 deletions(-)
delete mode 100644
tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/parser/microsoft/ooxml/POIXMLTextExtractorDecorator.java
create mode 100644
tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/parser/microsoft/ooxml/TikaXSSFBCommentsTable.java
create mode 100644
tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/parser/microsoft/ooxml/TikaXSSFBSharedStringsTable.java
create mode 100644
tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/parser/microsoft/ooxml/VSDXExtractorDecorator.java
delete mode 100644
tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/parser/microsoft/ooxml/xps/XPSTextExtractor.java
create mode 100644
tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/test/java/org/apache/tika/parser/microsoft/ooxml/VSDXParserTest.java
create mode 100644
tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/test/resources/test-documents/testVISIO_60489.vsdx
create mode 100644
tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/test/resources/test-documents/testVISIO_text.vsdx