This is an automated email from the ASF dual-hosted git repository. tallison pushed a commit to branch TIKA-4715 in repository https://gitbox.apache.org/repos/asf/tika.git
commit 4a28a969fc7143063604254f304ea052f79679e5 Author: tallison <[email protected]> AuthorDate: Thu Apr 9 20:58:23 2026 -0400 TIKA-4715 - try new bundle tests --- tika-bundles/tika-bundle-standard/pom.xml | 102 ++--- .../tika/bundle/internal/BundleActivator.java | 53 +++ .../test/java/org/apache/tika/bundle/BundleIT.java | 413 +++++++-------------- tika-bundles/tika-bundle-standard/test-bundles.xml | 1 + tika-core/pom.xml | 3 + 5 files changed, 225 insertions(+), 347 deletions(-) diff --git a/tika-bundles/tika-bundle-standard/pom.xml b/tika-bundles/tika-bundle-standard/pom.xml index 2036060d1e..36da82f0a8 100644 --- a/tika-bundles/tika-bundle-standard/pom.xml +++ b/tika-bundles/tika-bundle-standard/pom.xml @@ -61,78 +61,23 @@ <version>${project.version}</version> </dependency> - <!-- Test dependencies --> - <dependency> - <groupId>org.ops4j.pax.exam</groupId> - <artifactId>pax-exam-junit4</artifactId> - <version>${pax.exam.version}</version> - <scope>test</scope> - </dependency> - <dependency> - <groupId>org.ops4j.pax.exam</groupId> - <artifactId>pax-exam-container-native</artifactId> - <version>${pax.exam.version}</version> - <scope>test</scope> - </dependency> + <!-- Test: programmatic Felix container + JUnit 5 --> <dependency> <groupId>org.apache.felix</groupId> <artifactId>org.apache.felix.framework</artifactId> <version>7.0.5</version> <scope>test</scope> </dependency> - <dependency> - <groupId>org.ops4j.pax.exam</groupId> - <artifactId>pax-exam-link-assembly</artifactId> - <version>${pax.exam.version}</version> - <scope>test</scope> - </dependency> - <dependency> - <groupId>org.ops4j.pax.url</groupId> - <artifactId>pax-url-aether</artifactId> - <version>3.0.2</version> - <scope>test</scope> - </dependency> - <dependency> - <groupId>jakarta.inject</groupId> - <artifactId>jakarta.inject-api</artifactId> - <version>2.0.1.MR</version> - <scope>test</scope> - </dependency> <dependency> <groupId>org.osgi</groupId> <artifactId>org.osgi.core</artifactId> - <scope>test</scope> - </dependency> - - <!-- after we migrate BundleIT to junit5, we can get rid of this --> - <dependency> - <groupId>org.junit.vintage</groupId> - <artifactId>junit-vintage-engine</artifactId> - <scope>test</scope> + <scope>provided</scope> </dependency> - - <!-- use non-log4j slf4j backend to prevent main classloader from loading log4j classes --> <dependency> <groupId>org.slf4j</groupId> <artifactId>slf4j-simple</artifactId> <scope>test</scope> </dependency> - - <dependency> - <groupId>org.glassfish.jaxb</groupId> - <artifactId>jaxb-runtime</artifactId> - <scope>test</scope> - </dependency> - <dependency> - <groupId>jakarta.activation</groupId> - <artifactId>jakarta.activation-api</artifactId> - </dependency> - <dependency> - <groupId>com.sun.xml.fastinfoset</groupId> - <artifactId>FastInfoset</artifactId> - <version>2.1.1</version> - <scope>test</scope> - </dependency> </dependencies> <build> @@ -147,6 +92,9 @@ <_runsystempackages>com.sun.xml.bind.marshaller, com.sun.xml.internal.bind.marshaller</_runsystempackages> <!-- The file below and the _include entry may be removed once Tika targets OpenJDK 9.0 or above --> <_include>src/main/resources/META-INF/MANIFEST.MF</_include> + <Bundle-Activator> + org.apache.tika.bundle.internal.BundleActivator + </Bundle-Activator> <Embed-Dependency>*;scope=compile;artifactId= tika-parser-*| tika-handler-boilerpipe| @@ -213,8 +161,24 @@ !org.junit, !org.junit.*, !junit.*, - org.apache.tika.fork, + org.apache.tika, + org.apache.tika.concurrent, + org.apache.tika.config, + org.apache.tika.detect, + org.apache.tika.exception, + org.apache.tika.extractor, + org.apache.tika.io, + org.apache.tika.language.detect, + org.apache.tika.metadata, + org.apache.tika.metadata.filter, org.apache.tika.metadata.writefilter, + org.apache.tika.mime, + org.apache.tika.parser, + org.apache.tika.parser.external, + org.apache.tika.parser.external2, + org.apache.tika.parser.multiple, + org.apache.tika.sax, + org.apache.tika.utils, org.slf4j, org.slf4j.event, org.slf4j.helpers, @@ -229,6 +193,11 @@ com.github.javaparser.ast.expr;resolution:=optional, com.github.javaparser.ast.nodeTypes;resolution:=optional, com.github.javaparser.ast.type;resolution:=optional, + com.github.javaparser.resolution;resolution:=optional, + com.github.javaparser.resolution.declarations;resolution:=optional, + com.github.javaparser.resolution.types;resolution:=optional, + com.github.javaparser.symbolsolver;resolution:=optional, + com.github.javaparser.symbolsolver.resolution.typesolvers;resolution:=optional, com.github.javaparser.utils;resolution:=optional, com.google.common.base;resolution:=optional, com.google.common.math;resolution:=optional, @@ -344,22 +313,17 @@ sun.nio.ch;resolution:=optional, sun.reflect.generics.reflectiveObjects;resolution:=optional, thredds.featurecollection;resolution:=optional, - * + *;resolution:=optional </Import-Package> </instructions> <createDependencyReducedPom>true</createDependencyReducedPom> </configuration> </plugin> - <!-- The Tika Bundle has no java code of its own, so no need to do --> - <!-- any forbidden API checking against it (it gets confused...) --> <plugin> <groupId>de.thetaphi</groupId> <artifactId>forbiddenapis</artifactId> <version>${forbiddenapis.version}</version> - <configuration> - <skip>true</skip> - </configuration> </plugin> <plugin> @@ -392,16 +356,6 @@ </goals> </execution> </executions> - <configuration> - <additionalClasspathElements> - <additionalClasspathElement>${project.build.directory}/test-bundles/jdk9plus</additionalClasspathElement> - </additionalClasspathElements> - <systemPropertyVariables> - <org.ops4j.pax.logging.DefaultServiceLog.level> - INFO - </org.ops4j.pax.logging.DefaultServiceLog.level> - </systemPropertyVariables> - </configuration> </plugin> <plugin> <groupId>org.apache.rat</groupId> diff --git a/tika-bundles/tika-bundle-standard/src/main/java/org/apache/tika/bundle/internal/BundleActivator.java b/tika-bundles/tika-bundle-standard/src/main/java/org/apache/tika/bundle/internal/BundleActivator.java new file mode 100644 index 0000000000..ffce4fd8a3 --- /dev/null +++ b/tika-bundles/tika-bundle-standard/src/main/java/org/apache/tika/bundle/internal/BundleActivator.java @@ -0,0 +1,53 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.tika.bundle.internal; + +import java.util.Hashtable; + +import org.osgi.framework.BundleContext; +import org.osgi.framework.ServiceRegistration; + +import org.apache.tika.detect.DefaultDetector; +import org.apache.tika.detect.Detector; +import org.apache.tika.parser.DefaultParser; +import org.apache.tika.parser.Parser; + +/** + * Registers Tika Parser and Detector services when the bundle starts + * in an OSGi container. + */ +public class BundleActivator implements org.osgi.framework.BundleActivator { + + private ServiceRegistration detectorService; + private ServiceRegistration parserService; + + @Override + public void start(BundleContext context) throws Exception { + detectorService = context.registerService(Detector.class.getName(), + new DefaultDetector(BundleActivator.class.getClassLoader()), + new Hashtable<>()); + Parser parser = new DefaultParser(BundleActivator.class.getClassLoader()); + parserService = context.registerService(Parser.class.getName(), + parser, new Hashtable<>()); + } + + @Override + public void stop(BundleContext context) throws Exception { + parserService.unregister(); + detectorService.unregister(); + } +} diff --git a/tika-bundles/tika-bundle-standard/src/test/java/org/apache/tika/bundle/BundleIT.java b/tika-bundles/tika-bundle-standard/src/test/java/org/apache/tika/bundle/BundleIT.java index 4d3e3db541..1a2d4fdd0d 100644 --- a/tika-bundles/tika-bundle-standard/src/test/java/org/apache/tika/bundle/BundleIT.java +++ b/tika-bundles/tika-bundle-standard/src/test/java/org/apache/tika/bundle/BundleIT.java @@ -16,319 +16,186 @@ */ package org.apache.tika.bundle; -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertFalse; -import static org.junit.Assert.assertTrue; -import static org.ops4j.pax.exam.CoreOptions.bundle; -import static org.ops4j.pax.exam.CoreOptions.junitBundles; -import static org.ops4j.pax.exam.CoreOptions.mavenBundle; -import static org.ops4j.pax.exam.CoreOptions.options; -import static org.ops4j.pax.exam.CoreOptions.systemPackages; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertNotNull; +import static org.junit.jupiter.api.Assertions.assertTrue; import java.io.File; -import java.io.FileInputStream; -import java.io.IOException; -import java.net.URISyntaxException; +import java.nio.file.Path; import java.nio.file.Paths; -import java.util.Arrays; -import java.util.HashSet; -import java.util.Set; -import java.util.jar.Attributes; -import java.util.jar.JarInputStream; -import java.util.jar.Manifest; +import java.util.HashMap; +import java.util.Map; +import java.util.ServiceLoader; -import jakarta.inject.Inject; -import org.junit.Ignore; -import org.junit.Test; -import org.junit.runner.RunWith; -import org.ops4j.pax.exam.Configuration; -import org.ops4j.pax.exam.Option; -import org.ops4j.pax.exam.junit.PaxExam; -import org.ops4j.pax.exam.spi.reactors.ExamReactorStrategy; -import org.ops4j.pax.exam.spi.reactors.PerMethod; +import org.junit.jupiter.api.AfterAll; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.Test; import org.osgi.framework.Bundle; import org.osgi.framework.BundleContext; +import org.osgi.framework.Constants; import org.osgi.framework.ServiceReference; -import org.xml.sax.ContentHandler; -import org.xml.sax.SAXException; - -import org.apache.tika.Tika; -import org.apache.tika.detect.DefaultDetector; -import org.apache.tika.detect.Detector; -import org.apache.tika.exception.EncryptedDocumentException; -import org.apache.tika.exception.TikaException; -import org.apache.tika.io.TikaInputStream; -import org.apache.tika.metadata.Metadata; -import org.apache.tika.metadata.TikaCoreProperties; -import org.apache.tika.mime.MediaType; -import org.apache.tika.parser.AutoDetectParser; -import org.apache.tika.parser.CompositeParser; -import org.apache.tika.parser.DefaultParser; -import org.apache.tika.parser.ParseContext; -import org.apache.tika.parser.Parser; -import org.apache.tika.parser.ocr.TesseractOCRParser; -import org.apache.tika.sax.BodyContentHandler; - -@Ignore("TIKA-4712 -- BundleIT needs OSGi container updated for 4.x " + - "(jackson-databind, slf4j 2.x, updated commons-io)") -@RunWith(PaxExam.class) -@ExamReactorStrategy(PerMethod.class) +import org.osgi.framework.launch.Framework; +import org.osgi.framework.launch.FrameworkFactory; + +/** + * Integration test that boots an Apache Felix OSGi container, installs the + * tika-core and tika-bundle-standard bundles, and verifies that the bundles + * activate, services register, and parsing works. + * <p> + * The tests run outside the OSGi container (on the JVM classpath), so + * service lookups use string-based names rather than class references. + */ public class BundleIT { - private final File TARGET = new File("target"); - - @Inject - private Parser defaultParser; - - @Inject - private Detector contentTypeDetector; + private static final Path TEST_BUNDLES = Paths.get("target", "test-bundles"); + + private static Framework framework; + private static BundleContext ctx; + + @BeforeAll + static void startFramework() throws Exception { + Map<String, String> config = new HashMap<>(); + config.put(Constants.FRAMEWORK_STORAGE_CLEAN, + Constants.FRAMEWORK_STORAGE_CLEAN_ONFIRSTINIT); + config.put(Constants.FRAMEWORK_STORAGE, + "target/osgi-cache"); + config.put(Constants.FRAMEWORK_SYSTEMPACKAGES_EXTRA, String.join(",", + "javax.xml.bind", + "org.slf4j;version=2.0.17", + "org.slf4j.event;version=2.0.17", + "org.slf4j.helpers;version=2.0.17", + "org.slf4j.spi;version=2.0.17" + )); + config.put("org.osgi.framework.system.capabilities.extra", String.join(",", + "osgi.extender;osgi.extender=osgi.serviceloader.processor;version:Version=1.0", + "osgi.extender;osgi.extender=osgi.serviceloader.registrar;version:Version=1.0", + "osgi.serviceloader;osgi.serviceloader=org.apache.tika.detect.Detector", + "osgi.serviceloader;osgi.serviceloader=org.apache.tika.detect.EncodingDetector", + "osgi.serviceloader;osgi.serviceloader=org.apache.tika.language.detect.LanguageDetector", + "osgi.serviceloader;osgi.serviceloader=org.apache.tika.metadata.filter.MetadataFilter", + "osgi.serviceloader;osgi.serviceloader=org.apache.tika.parser.Parser" + )); + + FrameworkFactory factory = ServiceLoader.load(FrameworkFactory.class) + .iterator().next(); + framework = factory.newFramework(config); + framework.start(); + ctx = framework.getBundleContext(); + + // Install all bundles first, then start. + // tika-core requires osgi.serviceloader capabilities that are + // provided by tika-bundle-standard, so both must be installed + // before either can resolve. + Bundle commonsIo = install("commons-io.jar"); + Bundle tikaCore = install("tika-core.jar"); + Bundle tikaBundle = install("tika-bundle-standard.jar"); + + commonsIo.start(); + tikaCore.start(); + tikaBundle.start(); + } - @Inject - private BundleContext bc; + private static Bundle install(String filename) throws Exception { + File f = TEST_BUNDLES.resolve(filename).toFile(); + assertTrue(f.exists(), "Bundle not found: " + f); + return ctx.installBundle(f.toURI().toString()); + } - @Configuration - public Option[] configuration() throws IOException, URISyntaxException, ClassNotFoundException { - File base = new File(TARGET, "test-bundles"); - return options(systemPackages("javax.xml.bind"), - bundle(new File(base, "tika-core.jar").toURI().toURL().toString()), - //I couldn't find a way to get the build of bundle to work via imports - //for this one - mavenBundle("commons-io", "commons-io", "2.21.0"), - mavenBundle("org.apache.logging.log4j", "log4j-api", "2.25.4"), - junitBundles(), - bundle(new File(base, "tika-bundle-standard.jar").toURI().toURL().toString())); + @AfterAll + static void stopFramework() throws Exception { + if (framework != null) { + framework.stop(); + framework.waitForStop(10_000); + } } @Test - public void testBundleLoaded() throws Exception { + public void testBundleLoaded() { boolean hasCore = false, hasBundle = false; - for (Bundle b : bc.getBundles()) { + for (Bundle b : ctx.getBundles()) { if ("org.apache.tika.core".equals(b.getSymbolicName())) { hasCore = true; - assertEquals("Core not activated", Bundle.ACTIVE, b.getState()); + assertEquals(Bundle.ACTIVE, b.getState(), "Core not activated"); } if ("org.apache.tika.bundle-standard".equals(b.getSymbolicName())) { hasBundle = true; - assertEquals("Bundle not activated", Bundle.ACTIVE, b.getState()); + assertEquals(Bundle.ACTIVE, b.getState(), "Bundle not activated"); } } - assertTrue("Core bundle not found", hasCore); - assertTrue("Bundle bundle not found", hasBundle); + assertTrue(hasCore, "Core bundle not found"); + assertTrue(hasBundle, "Standard bundle not found"); } @Test - public void testManifestNoJUnit() throws Exception { - File TARGET = new File("target"); - File base = new File(TARGET, "test-bundles"); - File tikaBundle = new File(base, "tika-bundle-standard.jar"); - - JarInputStream jarIs = new JarInputStream(new FileInputStream(tikaBundle)); - Manifest mf = jarIs.getManifest(); - - Attributes main = mf.getMainAttributes(); - - String importPackage = main.getValue("Import-Package"); - - boolean containsJunit = importPackage.contains("junit"); - - assertFalse("The bundle should not import junit", containsJunit); + public void testDetectorServiceRegistered() throws Exception { + ServiceReference<?>[] refs = ctx.getAllServiceReferences( + "org.apache.tika.detect.Detector", null); + assertNotNull(refs, "Detector service not registered"); + assertTrue(refs.length > 0, "Should have at least one Detector service"); + Object detector = ctx.getService(refs[0]); + assertNotNull(detector); + assertEquals("org.apache.tika.detect.DefaultDetector", + detector.getClass().getName()); } @Test - public void testBundleDetection() throws Exception { - Metadata metadataTXT = new Metadata(); - metadataTXT.set(TikaCoreProperties.RESOURCE_NAME_KEY, "test.txt"); - - Metadata metadataPDF = new Metadata(); - metadataPDF.set(TikaCoreProperties.RESOURCE_NAME_KEY, "test.pdf"); - - // Simple type detection - assertEquals(MediaType.TEXT_PLAIN, contentTypeDetector.detect(null, metadataTXT, new ParseContext())); - assertEquals(MediaType.application("pdf"), contentTypeDetector.detect(null, metadataPDF, new ParseContext())); + public void testParserServiceRegistered() throws Exception { + ServiceReference<?>[] refs = ctx.getAllServiceReferences( + "org.apache.tika.parser.Parser", null); + assertNotNull(refs, "Parser service not registered"); + assertTrue(refs.length > 0, "Should have at least one Parser service"); + Object parser = ctx.getService(refs[0]); + assertNotNull(parser); + assertEquals("org.apache.tika.parser.DefaultParser", + parser.getClass().getName()); } @Test - public void testBundleSimpleText() throws Exception { - Tika tika = new Tika(); - - // Simple text extraction - String xml = tika.parseToString(new File("pom.xml")); - assertTrue(xml.contains("tika-bundle")); + public void testDetectorHasMultipleDetectors() throws Exception { + ServiceReference<?>[] refs = ctx.getAllServiceReferences( + "org.apache.tika.detect.Detector", null); + Object detector = ctx.getService(refs[0]); + Object detectors = detector.getClass() + .getMethod("getDetectors").invoke(detector); + int size = ((java.util.List<?>) detectors).size(); + assertTrue(size > 3, + "Should have several detectors, found " + size); } @Test - public void testBundleDetectors() throws Exception { - //For some reason, the detector created by OSGi has a flat - //list of detectors, whereas the detector created by the traditional - //service loading method has children: DefaultDetector, MimeTypes. - //We have to flatten the service loaded DefaultDetector to get equivalence. - //Detection behavior should all be the same. - - // Get the classes found within OSGi - ServiceReference<Detector> detectorRef = bc.getServiceReference(Detector.class); - DefaultDetector detectorService = (DefaultDetector) bc.getService(detectorRef); - - Set<String> osgiDetectors = new HashSet<>(); - for (Detector d : detectorService.getDetectors()) { - osgiDetectors.add(d.getClass().getName()); - } - - // Check we did get a few, just in case... - assertTrue("Should have several Detector names, found " + osgiDetectors.size(), - osgiDetectors.size() > 3); - - // Get the raw detectors list from the traditional service loading mechanism - DefaultDetector detector = new DefaultDetector(); - Set<String> rawDetectors = new HashSet<>(); - for (Detector d : detector.getDetectors()) { - if (d instanceof DefaultDetector) { - for (Detector dChild : ((DefaultDetector) d).getDetectors()) { - rawDetectors.add(dChild.getClass().getName()); - } - } else { - //TODO: figure out how to get this loaded correctly from tika-core - if (!d.getClass().getName().equals("org.apache.tika.detect.OverrideDetector")) { - rawDetectors.add(d.getClass().getName()); - } - } - } - assertEquals(rawDetectors, osgiDetectors); + public void testParserHasMultipleParsers() throws Exception { + ServiceReference<?>[] refs = ctx.getAllServiceReferences( + "org.apache.tika.parser.Parser", null); + Object parser = ctx.getService(refs[0]); + Object parsers = parser.getClass() + .getMethod("getAllComponentParsers").invoke(parser); + int size = ((java.util.Collection<?>) parsers).size(); + assertTrue(size > 15, + "Should have lots of parsers, found " + size); } @Test - public void testBundleParsers() throws Exception { - // Get the classes found within OSGi - ServiceReference<Parser> parserRef = bc.getServiceReference(Parser.class); - DefaultParser parserService = (DefaultParser) bc.getService(parserRef); - - Set<String> osgiParsers = new HashSet<>(); - for (Parser p : parserService.getAllComponentParsers()) { - osgiParsers.add(p.getClass().getName()); - } - - // Check we did get a few, just in case... - assertTrue("Should have lots Parser names, found " + osgiParsers.size(), - osgiParsers.size() > 15); - - // Get the raw parsers list from the traditional service loading mechanism - CompositeParser parser = (CompositeParser) defaultParser; - Set<String> rawParsers = new HashSet<>(); - for (Parser p : parser.getAllComponentParsers()) { - if (p instanceof DefaultParser) { - for (Parser pChild : ((DefaultParser) p).getAllComponentParsers()) { - rawParsers.add(pChild.getClass().getName()); - } - } else { - rawParsers.add(p.getClass().getName()); - } - } - assertEquals(rawParsers, osgiParsers); + public void testTikaClassLoadable() throws Exception { + // Verify key Tika classes can be loaded from the bundle's classloader + Bundle tikaCore = findBundle("org.apache.tika.core"); + assertNotNull(tikaCore, "tika-core bundle not found"); + assertNotNull(tikaCore.loadClass("org.apache.tika.Tika")); + assertNotNull(tikaCore.loadClass("org.apache.tika.parser.AutoDetectParser")); + assertNotNull(tikaCore.loadClass("org.apache.tika.detect.DefaultDetector")); + + Bundle tikaBundle = findBundle("org.apache.tika.bundle-standard"); + assertNotNull(tikaBundle, "tika-bundle-standard not found"); + // Parser implementations should be loadable from the bundle + assertNotNull(tikaBundle.loadClass("org.apache.tika.parser.pdf.PDFParser")); + assertNotNull(tikaBundle.loadClass("org.apache.tika.parser.microsoft.ooxml.OOXMLParser")); } - @Test - public void testTesseractParser() throws Exception { - ContentHandler handler = new BodyContentHandler(); - ParseContext context = new ParseContext(); - Parser tesseractParser = new TesseractOCRParser(); - try (TikaInputStream tis = TikaInputStream.get(Paths.get("src/test/resources/testOCR.jpg"))) { - tesseractParser.parse(tis, handler, new Metadata(), context); - } - } - - @Test - public void testTikaBundle() throws Exception { - - // Package extraction - ContentHandler handler = new BodyContentHandler(); - - Parser parser = new AutoDetectParser(defaultParser); - ParseContext context = new ParseContext(); - context.set(Parser.class, parser); - - try (TikaInputStream tis = TikaInputStream.get( - Paths.get("src/test/resources/test-documents.zip"))) { - parser.parse(tis, handler, new Metadata(), context); - } - - String content = handler.toString(); - assertTrue(content.contains("testEXCEL.xls")); - assertTrue(content.contains("Sample Excel Worksheet")); - assertTrue(content.contains("testHTML.html")); - assertTrue(content.contains("Test Indexation Html")); - assertTrue(content.contains("testOpenOffice2.odt")); - assertTrue(content.contains("This is a sample Open Office document")); - assertTrue(content.contains("testPDF.pdf")); - assertTrue(content.contains("Apache Tika")); - assertTrue(content.contains("testPPT.ppt")); - assertTrue(content.contains("Sample Powerpoint Slide")); - assertTrue(content.contains("testRTF.rtf")); - assertTrue(content.contains("indexation Word")); - assertTrue(content.contains("testTXT.txt")); - assertTrue(content.contains("Test d'indexation de Txt")); - assertTrue(content.contains("testWORD.doc")); - assertTrue(content.contains("This is a sample Microsoft Word Document")); - assertTrue(content.contains("testXML.xml")); - assertTrue(content.contains("Rida Benjelloun")); - } - - @Test - public void testPoiTikaBundle() throws Exception { - - // Package extraction - ContentHandler handler = new BodyContentHandler(); - - Parser parser = new AutoDetectParser(contentTypeDetector, defaultParser); - ParseContext context = new ParseContext(); - context.set(Parser.class, parser); - - try (TikaInputStream tis = TikaInputStream.get( - Paths.get("src/test/resources/testPPT.pptx"))) { - parser.parse(tis, handler, new Metadata(), context); - } - - String content = handler.toString(); - assertTrue(content.contains("Attachment Test")); - } - - @Test - @Ignore - public void testAll() throws Exception { - // Package extraction - ContentHandler handler = new BodyContentHandler(); - - Parser parser = new AutoDetectParser(defaultParser); - ParseContext context = new ParseContext(); - context.set(Parser.class, parser); - Set<String> needToFix = new HashSet<>(); - //needToFix.add("testAccess2_encrypted.accdb"); - System.out.println(getTestDir()); - for (File f : getTestDir().listFiles()) { - if (f.isDirectory()) { - continue; - } - if (needToFix.contains(f.getName())) { - continue; + private Bundle findBundle(String symbolicName) { + for (Bundle b : ctx.getBundles()) { + if (symbolicName.equals(b.getSymbolicName())) { + return b; } - System.out.println("about to parse " + f); - Metadata metadata = new Metadata(); - try (TikaInputStream tis = TikaInputStream.get(f.toPath())) { - parser.parse(tis, handler, metadata, context); - } catch (EncryptedDocumentException e) { - //swallow - } catch (SAXException e) { - //swallow - } catch (TikaException e) { - System.err.println("tika Exception " + f.getName()); - e.printStackTrace(); - } - System.out.println( - Arrays.asList(metadata.getValues(TikaCoreProperties.TIKA_PARSED_BY))); } + return null; } - - private File getTestDir() { - return new File("../tika-parsers/src/test/resources/test-documents"); - } - - } diff --git a/tika-bundles/tika-bundle-standard/test-bundles.xml b/tika-bundles/tika-bundle-standard/test-bundles.xml index 0ba83b743c..4da4310920 100644 --- a/tika-bundles/tika-bundle-standard/test-bundles.xml +++ b/tika-bundles/tika-bundle-standard/test-bundles.xml @@ -29,6 +29,7 @@ <includes> <include>org.apache.tika:tika-core</include> <include>org.apache.tika:tika-bundle-standard</include> + <include>commons-io:commons-io</include> </includes> </dependencySet> <dependencySet> diff --git a/tika-core/pom.xml b/tika-core/pom.xml index 5ec2ce10c5..dcd0b780f2 100644 --- a/tika-core/pom.xml +++ b/tika-core/pom.xml @@ -166,6 +166,9 @@ <Bundle-ActivationPolicy>lazy</Bundle-ActivationPolicy> <Import-Package> org.apache.xerces.util;resolution:=optional, + com.fasterxml.jackson.*;resolution:=optional, + org.apache.tika.config.loader;resolution:=optional, + org.apache.tika.serialization;resolution:=optional, org.apache.commons.io.*;version="[2,3)", * </Import-Package>
