This is an automated email from the ASF dual-hosted git repository.
tallison pushed a change to branch
TIKA-4636-simplify-embedded-extractor-handling
in repository https://gitbox.apache.org/repos/asf/tika.git
from d64041cf2d Merge origin/main into simplify-embedded-extractor-handling
add 222e0859e3 TIKA-4638 -- unify sax style configuration (#2557)
add 1828685d31 merge main, fix conflicts
No new revisions were added by this update.
Summary of changes:
.../ParsingEmbeddedDocumentExtractor.java | 18 +--
.../tika/extractor/StandardExtractorFactory.java | 11 +-
.../java/org/apache/tika/parser/EmptyParser.java | 2 +-
.../tika/parser/external/ExternalParser.java | 2 +-
.../tika/parser/external2/ExternalParser.java | 2 +-
.../java/org/apache/tika/sax/SAXOutputConfig.java | 76 +++++++++++++
.../org/apache/tika/sax/XHTMLContentHandler.java | 87 +++++++++++----
.../apache/tika/sax/XHTMLContentHandlerTest.java | 123 +++++++++++++++++++++
.../org/apache/tika/example/RollbackSoftware.java | 2 +-
.../org/apache/custom/parser/MyCustomParser.java | 2 +-
.../apache/tika/parser/envi/EnviHeaderParser.java | 2 +-
.../org/apache/tika/parser/gdal/GDALParser.java | 8 +-
.../geoinfo/GeographicInformationParser.java | 2 +-
.../org/apache/tika/parser/grib/GribParser.java | 2 +-
.../java/org/apache/tika/parser/hdf/HDFParser.java | 2 +-
.../apache/tika/parser/isatab/ISArchiveParser.java | 2 +-
.../apache/tika/parser/netcdf/NetCDFParser.java | 2 +-
.../apache/tika/parser/ner/NamedEntityParser.java | 2 +-
.../parser/transcribe/aws/AmazonTranscribe.java | 2 +-
.../tika/parser/apple/AppleSingleFileParser.java | 2 +-
.../org/apache/tika/parser/apple/PListParser.java | 2 +-
.../tika/parser/iwork/IWorkPackageParser.java | 2 +-
.../parser/iwork/iwana/IWork13PackageParser.java | 2 +-
.../org/apache/tika/parser/audio/AudioParser.java | 2 +-
.../org/apache/tika/parser/audio/MidiParser.java | 2 +-
.../java/org/apache/tika/parser/mp3/Mp3Parser.java | 2 +-
.../java/org/apache/tika/parser/mp4/MP4Parser.java | 2 +-
.../org/apache/tika/parser/ogg/FlacParser.java | 2 +-
.../java/org/apache/tika/parser/ogg/OggParser.java | 2 +-
.../org/apache/tika/parser/ogg/OpusParser.java | 2 +-
.../org/apache/tika/parser/ogg/SpeexParser.java | 2 +-
.../org/apache/tika/parser/ogg/TheoraParser.java | 2 +-
.../org/apache/tika/parser/ogg/VorbisParser.java | 2 +-
.../org/apache/tika/parser/video/FLVParser.java | 2 +-
.../org/apache/tika/parser/dgn/DGN8Parser.java | 2 +-
.../java/org/apache/tika/parser/dwg/DWGParser.java | 2 +-
.../org/apache/tika/parser/dwg/DWGReadParser.java | 2 +-
.../java/org/apache/tika/parser/prt/PRTParser.java | 2 +-
.../org/apache/tika/parser/asm/ClassParser.java | 2 +-
.../apache/tika/parser/asm/XHTMLClassVisitor.java | 5 +-
.../apache/tika/parser/code/SourceCodeParser.java | 2 +-
.../tika/parser/executable/ExecutableParser.java | 2 +-
.../executable/UniversalExecutableParser.java | 2 +-
.../java/org/apache/tika/parser/mat/MatParser.java | 2 +-
.../org/apache/tika/parser/sas/SAS7BDATParser.java | 2 +-
.../org/apache/tika/parser/crypto/TSDParser.java | 2 +-
.../tika/parser/font/AdobeFontMetricParser.java | 2 +-
.../apache/tika/parser/font/TrueTypeParser.java | 2 +-
.../org/apache/tika/parser/html/HtmlHandler.java | 2 +-
.../tika/parser/image/AbstractImageParser.java | 4 +-
.../org/apache/tika/parser/image/ICNSParser.java | 2 +-
.../org/apache/tika/parser/image/PSDParser.java | 2 +-
.../org/apache/tika/parser/image/WebPParser.java | 2 +-
.../apache/tika/parser/jdbc/AbstractDBParser.java | 2 +-
.../org/apache/tika/parser/mail/RFC822Parser.java | 2 +-
.../org/apache/tika/parser/mbox/MboxParser.java | 2 +-
.../apache/tika/parser/microsoft/EMFParser.java | 2 +-
.../tika/parser/microsoft/JackcessParser.java | 2 +-
.../tika/parser/microsoft/MSOwnerFileParser.java | 2 +-
.../apache/tika/parser/microsoft/OfficeParser.java | 2 +-
.../tika/parser/microsoft/OldExcelParser.java | 2 +-
.../apache/tika/parser/microsoft/TNEFParser.java | 2 +-
.../apache/tika/parser/microsoft/WMFParser.java | 2 +-
.../microsoft/activemime/ActiveMimeParser.java | 2 +-
.../tika/parser/microsoft/chm/ChmParser.java | 2 +-
.../tika/parser/microsoft/libpst/LibPstParser.java | 2 +-
.../parser/microsoft/onenote/OneNoteParser.java | 2 +-
.../microsoft/ooxml/AbstractOOXMLExtractor.java | 2 +-
.../ooxml/xwpf/ml2006/Word2006MLParser.java | 2 +-
.../parser/microsoft/pst/OutlookPSTParser.java | 2 +-
.../parser/microsoft/pst/PSTMailItemParser.java | 2 +-
.../tika/parser/microsoft/rtf/RTFParser.java | 2 +-
.../microsoft/xml/AbstractXML2003Parser.java | 2 +-
.../java/org/apache/tika/parser/dbf/DBFParser.java | 2 +-
.../java/org/apache/tika/parser/dif/DIFParser.java | 2 +-
.../org/apache/tika/parser/epub/EpubParser.java | 2 +-
.../org/apache/tika/parser/hwp/HwpV5Parser.java | 2 +-
.../apache/tika/parser/indesign/IDMLParser.java | 2 +-
.../java/org/apache/tika/parser/mif/MIFParser.java | 2 +-
.../tika/parser/odf/FlatOpenDocumentParser.java | 2 +-
.../tika/parser/odf/OpenDocumentContentParser.java | 2 +-
.../apache/tika/parser/odf/OpenDocumentParser.java | 2 +-
.../tika/parser/wordperfect/QuattroProParser.java | 2 +-
.../tika/parser/wordperfect/WordPerfectParser.java | 2 +-
.../org/apache/tika/parser/feed/FeedParser.java | 2 +-
.../apache/tika/parser/iptc/IptcAnpaParser.java | 2 +-
.../apache/tika/parser/ocr/TesseractOCRParser.java | 2 +-
.../apache/tika/parser/pdf/AbstractPDF2XHTML.java | 2 +-
.../java/org/apache/tika/parser/pdf/PDFParser.java | 2 +-
.../apache/tika/parser/pkg/CompressorParser.java | 2 +-
.../org/apache/tika/parser/pkg/PackageParser.java | 2 +-
.../java/org/apache/tika/parser/pkg/RarParser.java | 2 +-
.../org/apache/tika/parser/pkg/UnrarParser.java | 2 +-
.../apache/tika/parser/csv/TextAndCSVParser.java | 8 +-
.../tika/parser/strings/Latin1StringsParser.java | 2 +-
.../apache/tika/parser/strings/StringsParser.java | 2 +-
.../java/org/apache/tika/parser/txt/TXTParser.java | 2 +-
.../org/apache/tika/parser/http/HttpParser.java | 2 +-
.../org/apache/tika/parser/wacz/WACZParser.java | 2 +-
.../org/apache/tika/parser/warc/WARCParser.java | 2 +-
.../java/org/apache/tika/parser/tmx/TMXParser.java | 2 +-
.../apache/tika/parser/xliff/XLIFF12Parser.java | 2 +-
.../org/apache/tika/parser/xliff/XLZParser.java | 2 +-
.../java/org/apache/tika/parser/xml/XMLParser.java | 2 +-
.../tika/parser/AutoDetectReaderParserTest.java | 2 +-
.../org/apache/tika/config/loader/TikaLoader.java | 2 +
106 files changed, 384 insertions(+), 150 deletions(-)
create mode 100644
tika-core/src/main/java/org/apache/tika/sax/SAXOutputConfig.java