This is an automated email from the ASF dual-hosted git repository. jamesnetherton pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/camel-quarkus.git
The following commit(s) were added to refs/heads/main by this push: new 16abe1b5f9 Make tika extension work as per the vanilla Camel component 16abe1b5f9 is described below commit 16abe1b5f98ef60010104d842f487f8a6cf4e435 Author: James Netherton <jamesnether...@gmail.com> AuthorDate: Fri Jan 31 11:57:57 2025 +0000 Make tika extension work as per the vanilla Camel component Fixes #5234 Fixes #5393 --- .../ROOT/pages/reference/extensions/tika.adoc | 34 ------- extensions/tika/deployment/pom.xml | 12 ++- .../component/tika/deployment/TikaProcessor.java | 45 ++++----- extensions/tika/runtime/pom.xml | 12 ++- .../tika/runtime/src/main/doc/limitations.adoc | 29 ------ .../camel/quarkus/component/tika/TikaRecorder.java | 102 --------------------- integration-tests/tika/pom.xml | 6 +- .../quarkus/component/tika/it/TikaResource.java | 23 +---- .../tika/src/main/resources/application.properties | 6 +- .../tika/src/main/resources/{ => assets}/black.png | Bin .../src/main/resources/{ => assets}/quarkus.pdf | Bin .../src/main/resources/{ => assets}/quarkus.xml | 0 .../tika/src/main/resources/{ => assets}/test.doc | Bin .../resources/{ => assets}/testOpenOffice2.odt | Bin .../camel/quarkus/component/tika/it/TikaIT.java | 4 - .../camel/quarkus/component/tika/it/TikaTest.java | 51 ++--------- pom.xml | 1 - poms/bom/pom.xml | 22 ----- poms/bom/src/main/generated/flattened-full-pom.xml | 22 ----- .../src/main/generated/flattened-reduced-pom.xml | 22 ----- .../generated/flattened-reduced-verbose-pom.xml | 22 ----- 21 files changed, 50 insertions(+), 363 deletions(-) diff --git a/docs/modules/ROOT/pages/reference/extensions/tika.adoc b/docs/modules/ROOT/pages/reference/extensions/tika.adoc index 56712b2cda..3a0c3c1408 100644 --- a/docs/modules/ROOT/pages/reference/extensions/tika.adoc +++ b/docs/modules/ROOT/pages/reference/extensions/tika.adoc @@ -44,37 +44,3 @@ Or add the coordinates to your existing project: ifeval::[{doc-show-user-guide-link} == true] Check the xref:user-guide/index.adoc[User guide] for more information about writing Camel Quarkus applications. endif::[] - -[id="extensions-tika-camel-quarkus-limitations"] -== Camel Quarkus limitations - -Parameters `tikaConfig` and `tikaConfigUri` are not available in quarkus camel tika extension. Configuration -can be changed only via `application.properties`. - -While you can use any of the available https://tika.apache.org/1.24.1/formats.html[Tika parsers] in JVM mode, -only some of those are supported in native mode - see the https://quarkiverse.github.io/quarkiverse-docs/quarkus-tika/dev/index.html[Quarkus Tika guide]. - -PDF and ODF parsers can not be used both in JVM mode or in the native mode. Pdf extension is suggested for purposes of pdf consumption to avoid a version conflict between Camel and Quarkus-tika extension involving PdfBox dependency. - -Use of the Tika parser without any configuration will initialize all available parsers. Unfortunately as some of them -don't work in the native mode, the whole execution will fail. - -In order to make the Tika parser work in the native mode, selection of parsers for initialization should be used. - -* `quarkus.tika.parsers` Comma separated list of parsers (abbreviations). There are two predefined parsers: -`pdf` and `odf`. -* `quarkus.tika.parser.*` Adds new parser abbreviation to be used with previous property. Value is the full class of -the parser. - -Example of `application.properties`: -[source,properties] ----- -quarkus.tika.parsers = pdf,odf,office -quarkus.tika.parser.office = org.apache.tika.parser.microsoft.OfficeParser ----- - -For more information about selecting parsers see the https://quarkiverse.github.io/quarkiverse-docs/quarkus-tika/dev/index.html[Quarkus Tika guide]. - -You may need to add the `quarkus-awt` extension to build the native image. For more information, see https://quarkiverse.github.io/quarkiverse-docs/quarkus-tika/dev/index.html[Quarkus Tika guide]. - - diff --git a/extensions/tika/deployment/pom.xml b/extensions/tika/deployment/pom.xml index 95d8aedaae..124081f34b 100644 --- a/extensions/tika/deployment/pom.xml +++ b/extensions/tika/deployment/pom.xml @@ -30,6 +30,14 @@ <name>Camel Quarkus :: Tika :: Deployment</name> <dependencies> + <dependency> + <groupId>io.quarkus</groupId> + <artifactId>quarkus-netty-deployment</artifactId> + </dependency> + <dependency> + <groupId>io.quarkus</groupId> + <artifactId>quarkus-awt-deployment</artifactId> + </dependency> <dependency> <groupId>org.apache.camel.quarkus</groupId> <artifactId>camel-quarkus-core-deployment</artifactId> @@ -38,10 +46,6 @@ <groupId>org.apache.camel.quarkus</groupId> <artifactId>camel-quarkus-tika</artifactId> </dependency> - <dependency> - <groupId>io.quarkiverse.tika</groupId> - <artifactId>quarkus-tika-deployment</artifactId> - </dependency> <dependency> <groupId>org.apache.camel.quarkus</groupId> <artifactId>camel-quarkus-support-xalan-deployment</artifactId> diff --git a/extensions/tika/deployment/src/main/java/org/apache/camel/quarkus/component/tika/deployment/TikaProcessor.java b/extensions/tika/deployment/src/main/java/org/apache/camel/quarkus/component/tika/deployment/TikaProcessor.java index 40d323a34f..39654ce2eb 100644 --- a/extensions/tika/deployment/src/main/java/org/apache/camel/quarkus/component/tika/deployment/TikaProcessor.java +++ b/extensions/tika/deployment/src/main/java/org/apache/camel/quarkus/component/tika/deployment/TikaProcessor.java @@ -16,22 +16,18 @@ */ package org.apache.camel.quarkus.component.tika.deployment; -import io.quarkus.arc.deployment.BeanContainerBuildItem; +import java.util.Set; + +import io.quarkus.deployment.annotations.BuildProducer; import io.quarkus.deployment.annotations.BuildStep; -import io.quarkus.deployment.annotations.ExecutionTime; -import io.quarkus.deployment.annotations.Record; import io.quarkus.deployment.builditem.FeatureBuildItem; -import io.quarkus.deployment.builditem.nativeimage.RuntimeInitializedClassBuildItem; -import org.apache.camel.component.tika.TikaComponent; -import org.apache.camel.quarkus.component.tika.TikaRecorder; -import org.apache.camel.quarkus.core.deployment.spi.CamelRuntimeBeanBuildItem; -import org.apache.camel.quarkus.core.deployment.spi.CamelServiceFilter; -import org.apache.camel.quarkus.core.deployment.spi.CamelServiceFilterBuildItem; -import org.jboss.logging.Logger; +import io.quarkus.deployment.builditem.nativeimage.NativeImageResourceBuildItem; +import io.quarkus.deployment.builditem.nativeimage.ServiceProviderBuildItem; +import io.quarkus.deployment.util.ServiceUtil; +import org.apache.tika.detect.EncodingDetector; +import org.apache.tika.parser.Parser; class TikaProcessor { - - private static final Logger LOG = Logger.getLogger(TikaProcessor.class); private static final String FEATURE = "camel-tika"; @BuildStep @@ -39,26 +35,21 @@ class TikaProcessor { return new FeatureBuildItem(FEATURE); } - /* - * The tika component is programmatically configured by the extension thus - * we can safely prevent camel to instantiate a default instance. - */ @BuildStep - CamelServiceFilterBuildItem serviceFilter() { - return new CamelServiceFilterBuildItem(CamelServiceFilter.forComponent("tika")); + void registerTikaCoreResources(BuildProducer<NativeImageResourceBuildItem> resource) { + resource.produce(new NativeImageResourceBuildItem("org/apache/tika/mime/tika-mimetypes.xml")); + resource.produce(new NativeImageResourceBuildItem("org/apache/tika/parser/external/tika-external-parsers.xml")); } - @Record(ExecutionTime.STATIC_INIT) @BuildStep - CamelRuntimeBeanBuildItem tikaComponent(BeanContainerBuildItem beanContainer, TikaRecorder recorder) { - return new CamelRuntimeBeanBuildItem( - "tika", - TikaComponent.class.getName(), - recorder.createTikaComponent(beanContainer.getValue())); + void registerTikaServices(BuildProducer<ServiceProviderBuildItem> serviceProvider) throws Exception { + serviceProvider.produce(new ServiceProviderBuildItem(EncodingDetector.class.getName(), + getProviderNames(EncodingDetector.class.getName()))); + serviceProvider.produce(new ServiceProviderBuildItem(Parser.class.getName(), getProviderNames(Parser.class.getName()))); } - @BuildStep - RuntimeInitializedClassBuildItem runtimeInitializedClasses() { - return new RuntimeInitializedClassBuildItem("org.apache.pdfbox.text.LegacyPDFStreamEngine"); + private Set<String> getProviderNames(String serviceProviderName) throws Exception { + return ServiceUtil.classNamesNamedIn(Thread.currentThread().getContextClassLoader(), + "META-INF/services/" + serviceProviderName); } } diff --git a/extensions/tika/runtime/pom.xml b/extensions/tika/runtime/pom.xml index 861ca44f0f..79e49f4851 100644 --- a/extensions/tika/runtime/pom.xml +++ b/extensions/tika/runtime/pom.xml @@ -36,6 +36,14 @@ </properties> <dependencies> + <dependency> + <groupId>io.quarkus</groupId> + <artifactId>quarkus-netty</artifactId> + </dependency> + <dependency> + <groupId>io.quarkus</groupId> + <artifactId>quarkus-awt</artifactId> + </dependency> <dependency> <groupId>org.apache.camel.quarkus</groupId> <artifactId>camel-quarkus-core</artifactId> @@ -48,10 +56,6 @@ <groupId>org.apache.camel.quarkus</groupId> <artifactId>camel-quarkus-support-xalan</artifactId> </dependency> - <dependency> - <groupId>io.quarkiverse.tika</groupId> - <artifactId>quarkus-tika</artifactId> - </dependency> </dependencies> <build> diff --git a/extensions/tika/runtime/src/main/doc/limitations.adoc b/extensions/tika/runtime/src/main/doc/limitations.adoc deleted file mode 100644 index 1d0f7a75c4..0000000000 --- a/extensions/tika/runtime/src/main/doc/limitations.adoc +++ /dev/null @@ -1,29 +0,0 @@ -Parameters `tikaConfig` and `tikaConfigUri` are not available in quarkus camel tika extension. Configuration -can be changed only via `application.properties`. - -While you can use any of the available https://tika.apache.org/1.24.1/formats.html[Tika parsers] in JVM mode, -only some of those are supported in native mode - see the https://quarkiverse.github.io/quarkiverse-docs/quarkus-tika/dev/index.html[Quarkus Tika guide]. - -PDF and ODF parsers can not be used both in JVM mode or in the native mode. Pdf extension is suggested for purposes of pdf consumption to avoid a version conflict between Camel and Quarkus-tika extension involving PdfBox dependency. - -Use of the Tika parser without any configuration will initialize all available parsers. Unfortunately as some of them -don't work in the native mode, the whole execution will fail. - -In order to make the Tika parser work in the native mode, selection of parsers for initialization should be used. - -* `quarkus.tika.parsers` Comma separated list of parsers (abbreviations). There are two predefined parsers: -`pdf` and `odf`. -* `quarkus.tika.parser.*` Adds new parser abbreviation to be used with previous property. Value is the full class of -the parser. - -Example of `application.properties`: -[source,properties] ----- -quarkus.tika.parsers = pdf,odf,office -quarkus.tika.parser.office = org.apache.tika.parser.microsoft.OfficeParser ----- - -For more information about selecting parsers see the https://quarkiverse.github.io/quarkiverse-docs/quarkus-tika/dev/index.html[Quarkus Tika guide]. - -You may need to add the `quarkus-awt` extension to build the native image. For more information, see https://quarkiverse.github.io/quarkiverse-docs/quarkus-tika/dev/index.html[Quarkus Tika guide]. - diff --git a/extensions/tika/runtime/src/main/java/org/apache/camel/quarkus/component/tika/TikaRecorder.java b/extensions/tika/runtime/src/main/java/org/apache/camel/quarkus/component/tika/TikaRecorder.java deleted file mode 100644 index 7c45a9cef4..0000000000 --- a/extensions/tika/runtime/src/main/java/org/apache/camel/quarkus/component/tika/TikaRecorder.java +++ /dev/null @@ -1,102 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.camel.quarkus.component.tika; - -import java.io.IOException; -import java.io.InputStream; -import java.util.Collections; -import java.util.Set; - -import org.xml.sax.ContentHandler; -import org.xml.sax.SAXException; - -import io.quarkus.arc.runtime.BeanContainer; -import io.quarkus.runtime.RuntimeValue; -import io.quarkus.runtime.annotations.Recorder; -import io.quarkus.tika.TikaContent; -import io.quarkus.tika.TikaMetadata; -import io.quarkus.tika.TikaParser; -import io.quarkus.tika.runtime.TikaParserProducer; -import org.apache.camel.Component; -import org.apache.camel.Producer; -import org.apache.camel.component.tika.TikaComponent; -import org.apache.camel.component.tika.TikaConfiguration; -import org.apache.camel.component.tika.TikaEndpoint; -import org.apache.camel.component.tika.TikaProducer; -import org.apache.tika.exception.TikaException; -import org.apache.tika.metadata.Metadata; -import org.apache.tika.mime.MediaType; -import org.apache.tika.parser.ParseContext; -import org.apache.tika.parser.Parser; - -@Recorder -public class TikaRecorder { - - public RuntimeValue<TikaComponent> createTikaComponent(BeanContainer container) { - return new RuntimeValue<>(new QuarkusTikaComponent(container.beanInstance(TikaParserProducer.class))); - } - - @org.apache.camel.spi.annotations.Component("tika") - static class QuarkusTikaComponent extends TikaComponent { - - private final TikaParserProducer tikaParserProducer; - - public QuarkusTikaComponent(TikaParserProducer tikaParserProducer) { - this.tikaParserProducer = tikaParserProducer; - } - - @Override - protected TikaEndpoint createEndpoint(String uri, TikaConfiguration tikaConfiguration) { - return new QuarkusTikaEndpoint(uri, this, tikaConfiguration, tikaParserProducer); - } - } - - static class QuarkusTikaEndpoint extends TikaEndpoint { - - private final TikaParserProducer tikaParserProducer; - - public QuarkusTikaEndpoint(String endpointUri, Component component, TikaConfiguration tikaConfiguration, - TikaParserProducer tikaParserProducer) { - super(endpointUri, component, tikaConfiguration); - this.tikaParserProducer = tikaParserProducer; - } - - @Override - public Producer createProducer() throws Exception { - TikaParser tikaParser = tikaParserProducer.tikaParser(); - return new TikaProducer(this, new Parser() { - @Override - public Set<MediaType> getSupportedTypes(ParseContext parseContext) { - return Collections.emptySet(); - } - - @Override - public void parse(InputStream inputStream, ContentHandler contentHandler, Metadata metadata, - ParseContext parseContext) throws IOException, SAXException, TikaException { - TikaContent tc = tikaParser.parse(inputStream, contentHandler); - TikaMetadata tm = tc.getMetadata(); - if (tm != null) { - for (String name : tm.getNames()) { - tm.getValues(name).stream().forEach((v) -> metadata.add(name, v)); - } - } - } - }); - } - } - -} diff --git a/integration-tests/tika/pom.xml b/integration-tests/tika/pom.xml index ecd7f15006..b6d6bb339a 100644 --- a/integration-tests/tika/pom.xml +++ b/integration-tests/tika/pom.xml @@ -17,7 +17,8 @@ limitations under the License. --> -<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"> +<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" + xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"> <modelVersion>4.0.0</modelVersion> <parent> <groupId>org.apache.camel.quarkus</groupId> @@ -53,7 +54,7 @@ <dependency> <groupId>io.quarkus</groupId> <artifactId>quarkus-junit5</artifactId> - <scope>test</scope> + <scope>test</scope> <exclusions> <exclusion> <groupId>org.jsoup</groupId> @@ -68,7 +69,6 @@ </dependency> </dependencies> - <profiles> <profile> <id>native</id> diff --git a/integration-tests/tika/src/main/java/org/apache/camel/quarkus/component/tika/it/TikaResource.java b/integration-tests/tika/src/main/java/org/apache/camel/quarkus/component/tika/it/TikaResource.java index e751d48cc9..4b8ef5d87c 100644 --- a/integration-tests/tika/src/main/java/org/apache/camel/quarkus/component/tika/it/TikaResource.java +++ b/integration-tests/tika/src/main/java/org/apache/camel/quarkus/component/tika/it/TikaResource.java @@ -26,15 +26,12 @@ import jakarta.ws.rs.Path; import jakarta.ws.rs.Produces; import jakarta.ws.rs.core.MediaType; import jakarta.ws.rs.core.Response; +import org.apache.camel.Exchange; import org.apache.camel.ProducerTemplate; -import org.jboss.logging.Logger; @Path("/tika") @ApplicationScoped public class TikaResource { - - private static final Logger LOG = Logger.getLogger(TikaResource.class); - @Inject ProducerTemplate producerTemplate; @@ -43,23 +40,11 @@ public class TikaResource { @Consumes(MediaType.APPLICATION_OCTET_STREAM) @Produces(MediaType.TEXT_PLAIN) public Response parse(byte[] message) throws Exception { - final String response = producerTemplate.requestBody("tika:parse", message, String.class); - return Response - .created(new URI("https://camel.apache.org/")) - .entity(response) - .build(); - } - - @Path("/parseAsText") - @POST - @Consumes(MediaType.APPLICATION_OCTET_STREAM) - @Produces(MediaType.TEXT_PLAIN) - public Response parseAsTxt(byte[] message) throws Exception { - final String response = producerTemplate.requestBody("tika:parse?tikaParseOutputFormat=text", message, - String.class); + final Exchange response = producerTemplate.request("tika:parse", exchange -> exchange.getMessage().setBody(message)); return Response .created(new URI("https://camel.apache.org/")) - .entity(response) + .header("Parsed-Content-Type", response.getMessage().getHeader(Exchange.CONTENT_TYPE)) + .entity(response.getMessage().getBody(String.class)) .build(); } diff --git a/integration-tests/tika/src/main/resources/application.properties b/integration-tests/tika/src/main/resources/application.properties index 536d32868c..4ed43bf8ce 100644 --- a/integration-tests/tika/src/main/resources/application.properties +++ b/integration-tests/tika/src/main/resources/application.properties @@ -15,8 +15,4 @@ ## limitations under the License. ## --------------------------------------------------------------------------- -#quarkus.tika.parsers= pdf,odf,office,xml,image //Requires new release of quarkiverse-tike, which adopts tika with pdfBox 3.x -quarkus.tika.parsers= odf,office,xml,image -quarkus.tika.parser.office = org.apache.tika.parser.microsoft.OfficeParser -quarkus.tika.parser.image = org.apache.tika.parser.image.ImageParser -quarkus.tika.parser.xml = org.apache.tika.parser.xml.DcXMLParser \ No newline at end of file +quarkus.native.resources.includes=assets/* diff --git a/integration-tests/tika/src/main/resources/black.png b/integration-tests/tika/src/main/resources/assets/black.png similarity index 100% rename from integration-tests/tika/src/main/resources/black.png rename to integration-tests/tika/src/main/resources/assets/black.png diff --git a/integration-tests/tika/src/main/resources/quarkus.pdf b/integration-tests/tika/src/main/resources/assets/quarkus.pdf similarity index 100% rename from integration-tests/tika/src/main/resources/quarkus.pdf rename to integration-tests/tika/src/main/resources/assets/quarkus.pdf diff --git a/integration-tests/tika/src/main/resources/quarkus.xml b/integration-tests/tika/src/main/resources/assets/quarkus.xml similarity index 100% rename from integration-tests/tika/src/main/resources/quarkus.xml rename to integration-tests/tika/src/main/resources/assets/quarkus.xml diff --git a/integration-tests/tika/src/main/resources/test.doc b/integration-tests/tika/src/main/resources/assets/test.doc similarity index 100% rename from integration-tests/tika/src/main/resources/test.doc rename to integration-tests/tika/src/main/resources/assets/test.doc diff --git a/integration-tests/tika/src/main/resources/testOpenOffice2.odt b/integration-tests/tika/src/main/resources/assets/testOpenOffice2.odt similarity index 100% rename from integration-tests/tika/src/main/resources/testOpenOffice2.odt rename to integration-tests/tika/src/main/resources/assets/testOpenOffice2.odt diff --git a/integration-tests/tika/src/test/java/org/apache/camel/quarkus/component/tika/it/TikaIT.java b/integration-tests/tika/src/test/java/org/apache/camel/quarkus/component/tika/it/TikaIT.java index 2950b7fafb..1a71429569 100644 --- a/integration-tests/tika/src/test/java/org/apache/camel/quarkus/component/tika/it/TikaIT.java +++ b/integration-tests/tika/src/test/java/org/apache/camel/quarkus/component/tika/it/TikaIT.java @@ -17,12 +17,8 @@ package org.apache.camel.quarkus.component.tika.it; import io.quarkus.test.junit.QuarkusIntegrationTest; -import org.junit.jupiter.api.condition.DisabledOnOs; -import org.junit.jupiter.api.condition.OS; @QuarkusIntegrationTest -//https://github.com/apache/camel-quarkus/issues/3417 -@DisabledOnOs(OS.MAC) class TikaIT extends TikaTest { } diff --git a/integration-tests/tika/src/test/java/org/apache/camel/quarkus/component/tika/it/TikaTest.java b/integration-tests/tika/src/test/java/org/apache/camel/quarkus/component/tika/it/TikaTest.java index adf61f13f1..91033308b1 100644 --- a/integration-tests/tika/src/test/java/org/apache/camel/quarkus/component/tika/it/TikaTest.java +++ b/integration-tests/tika/src/test/java/org/apache/camel/quarkus/component/tika/it/TikaTest.java @@ -16,40 +16,23 @@ */ package org.apache.camel.quarkus.component.tika.it; -import java.io.ByteArrayOutputStream; import java.io.InputStream; import io.quarkus.test.junit.QuarkusTest; import io.restassured.RestAssured; import io.restassured.http.ContentType; import io.restassured.response.ValidatableResponse; -import org.junit.jupiter.api.Disabled; import org.junit.jupiter.api.Test; import static org.hamcrest.Matchers.containsStringIgnoringCase; import static org.hamcrest.Matchers.is; -import static org.hamcrest.Matchers.not; import static org.hamcrest.Matchers.startsWith; @QuarkusTest class TikaTest { - - @Disabled //Requires new release of quarkiverse-tike, which adopts tika with pdfBox 3.x https://github.com/apache/camel-quarkus/issues/5234 - @Test - public void testPdf() throws Exception { - testParse("quarkus.pdf", "application/pdf", "Hello Quarkus"); - } - - @Disabled //Requires new release of quarkiverse-tike, which adopts tika with pdfBox 3.x https://github.com/apache/camel-quarkus/issues/5234 - @Test - public void testOdf() throws Exception { - testParse("testOpenOffice2.odt", "application/vnd.oasis.opendocument.text", - "This is a sample Open Office document, written in NeoOffice 2.2.1 for the Mac"); - } - @Test public void testOffice() throws Exception { - testParse("test.doc", "application/msword", "test"); + testParse("test.doc", "application/x-tika-msoffice", null); } @Test @@ -62,11 +45,6 @@ class TikaTest { testParse("quarkus.xml", "application/xml", "Hello Quarkus"); } - @Test - public void testParseAsText() throws Exception { - testParseAsText("test.doc", "test"); - } - @Test public void testDetectDoc() throws Exception { testDetect("test.doc", "application/x-tika-msoffice"); @@ -86,23 +64,17 @@ class TikaTest { private void testParse(String fileName, String expectedContentType, String expectedBody) throws Exception { post(fileName, "/tika/parse") - .body(not(containsStringIgnoringCase("EmptyParser"))) - .body(containsStringIgnoringCase(expectedContentType)) + .header("Parsed-Content-Type", startsWith(expectedContentType)) .body(containsStringIgnoringCase(expectedBody == null ? "<body/>" : expectedBody)); } - private void testParseAsText(String fileName, String expectedBody) throws Exception { - post(fileName, "/tika/parseAsText") - .body(startsWith(expectedBody)); - } - private void testDetect(String fileName, String expectedContentType) throws Exception { post(fileName, "/tika/detect") .body(is(expectedContentType)); } private ValidatableResponse post(String fileName, String s) throws Exception { - return RestAssured.given() // + return RestAssured.given() .contentType(ContentType.BINARY) .body(readQuarkusFile(fileName)) .post(s) @@ -111,18 +83,11 @@ class TikaTest { } private byte[] readQuarkusFile(String fileName) throws Exception { - try (InputStream is = getClass().getClassLoader().getResourceAsStream(fileName)) { - return readBytes(is); - } - } - - static byte[] readBytes(InputStream is) throws Exception { - ByteArrayOutputStream os = new ByteArrayOutputStream(); - byte[] buffer = new byte[4096]; - int len; - while ((len = is.read(buffer)) != -1) { - os.write(buffer, 0, len); + try (InputStream is = getClass().getClassLoader().getResourceAsStream("assets/" + fileName)) { + if (is == null) { + throw new IllegalStateException("Unable to read file: " + fileName); + } + return is.readAllBytes(); } - return os.toByteArray(); } } diff --git a/pom.xml b/pom.xml index 732f75b7ec..11c0e0a070 100644 --- a/pom.xml +++ b/pom.xml @@ -61,7 +61,6 @@ <quarkiverse-minio.version>3.7.7</quarkiverse-minio.version><!-- https://repo1.maven.org/maven2/io/quarkiverse/minio/quarkus-minio-parent/ --> <quarkiverse-mybatis.version>2.2.4</quarkiverse-mybatis.version><!-- https://repo1.maven.org/maven2/io/quarkiverse/mybatis/quarkus-mybatis-parent/ --> <quarkiverse-pooled-jms.version>2.6.0</quarkiverse-pooled-jms.version><!-- https://repo1.maven.org/maven2/io/quarkiverse/messaginghub/quarkus-pooled-jms-parent/ --> - <quarkiverse-tika.version>2.0.4</quarkiverse-tika.version><!-- https://repo1.maven.org/maven2/io/quarkiverse/tika/quarkus-tika-parent/ --> <quarkus.version>3.18.0</quarkus.version><!-- https://repo1.maven.org/maven2/io/quarkus/quarkus-bom/ --> <quarkus-hazelcast-client.version>4.0.0</quarkus-hazelcast-client.version><!-- https://repo1.maven.org/maven2/com/hazelcast/quarkus-hazelcast-client-bom/ --> <quarkus-qpid-jms.version>2.7.1</quarkus-qpid-jms.version><!-- This should be in sync with quarkus-platform https://repo1.maven.org/maven2/org/amqphub/quarkus/quarkus-qpid-jms-bom/ --> diff --git a/poms/bom/pom.xml b/poms/bom/pom.xml index dfa613ff8e..63e4afde02 100644 --- a/poms/bom/pom.xml +++ b/poms/bom/pom.xml @@ -2702,12 +2702,6 @@ <groupId>org.apache.camel</groupId> <artifactId>camel-tika</artifactId> <version>${camel.version}</version> - <exclusions> - <exclusion> - <groupId>org.apache.tika</groupId> - <artifactId>*</artifactId> - </exclusion> - </exclusions> </dependency> <dependency> <groupId>org.apache.camel</groupId> @@ -6973,22 +6967,6 @@ <artifactId>quarkus-mybatis-deployment</artifactId> <version>${quarkiverse-mybatis.version}</version> </dependency> - <dependency> - <groupId>io.quarkiverse.tika</groupId> - <artifactId>quarkus-tika</artifactId> - <version>${quarkiverse-tika.version}</version> - <exclusions> - <exclusion> - <groupId>commons-logging</groupId> - <artifactId>commons-logging</artifactId> - </exclusion> - </exclusions> - </dependency> - <dependency> - <groupId>io.quarkiverse.tika</groupId> - <artifactId>quarkus-tika-deployment</artifactId> - <version>${quarkiverse-tika.version}</version> - </dependency> <dependency> <groupId>io.reactivex.rxjava3</groupId> <artifactId>rxjava</artifactId> diff --git a/poms/bom/src/main/generated/flattened-full-pom.xml b/poms/bom/src/main/generated/flattened-full-pom.xml index b4104efc80..af9eedf856 100644 --- a/poms/bom/src/main/generated/flattened-full-pom.xml +++ b/poms/bom/src/main/generated/flattened-full-pom.xml @@ -2639,12 +2639,6 @@ <groupId>org.apache.camel</groupId><!-- org.apache.camel.quarkus:camel-quarkus-bom:${project.version} --> <artifactId>camel-tika</artifactId><!-- org.apache.camel.quarkus:camel-quarkus-bom:${project.version} --> <version>4.9.0</version><!-- org.apache.camel.quarkus:camel-quarkus-bom:${project.version} --> - <exclusions> - <exclusion> - <groupId>org.apache.tika</groupId><!-- org.apache.camel.quarkus:camel-quarkus-bom:${project.version} --> - <artifactId>*</artifactId><!-- org.apache.camel.quarkus:camel-quarkus-bom:${project.version} --> - </exclusion> - </exclusions> </dependency> <dependency> <groupId>org.apache.camel</groupId><!-- org.apache.camel.quarkus:camel-quarkus-bom:${project.version} --> @@ -6895,22 +6889,6 @@ <artifactId>quarkus-mybatis-deployment</artifactId><!-- org.apache.camel.quarkus:camel-quarkus-bom:${project.version} --> <version>2.2.4</version><!-- org.apache.camel.quarkus:camel-quarkus-bom:${project.version} --> </dependency> - <dependency> - <groupId>io.quarkiverse.tika</groupId><!-- org.apache.camel.quarkus:camel-quarkus-bom:${project.version} --> - <artifactId>quarkus-tika</artifactId><!-- org.apache.camel.quarkus:camel-quarkus-bom:${project.version} --> - <version>2.0.4</version><!-- org.apache.camel.quarkus:camel-quarkus-bom:${project.version} --> - <exclusions> - <exclusion> - <groupId>commons-logging</groupId><!-- org.apache.camel.quarkus:camel-quarkus-bom:${project.version} --> - <artifactId>commons-logging</artifactId><!-- org.apache.camel.quarkus:camel-quarkus-bom:${project.version} --> - </exclusion> - </exclusions> - </dependency> - <dependency> - <groupId>io.quarkiverse.tika</groupId><!-- org.apache.camel.quarkus:camel-quarkus-bom:${project.version} --> - <artifactId>quarkus-tika-deployment</artifactId><!-- org.apache.camel.quarkus:camel-quarkus-bom:${project.version} --> - <version>2.0.4</version><!-- org.apache.camel.quarkus:camel-quarkus-bom:${project.version} --> - </dependency> <dependency> <groupId>io.reactivex.rxjava3</groupId><!-- org.apache.camel.quarkus:camel-quarkus-bom:${project.version} --> <artifactId>rxjava</artifactId><!-- org.apache.camel.quarkus:camel-quarkus-bom:${project.version} --> diff --git a/poms/bom/src/main/generated/flattened-reduced-pom.xml b/poms/bom/src/main/generated/flattened-reduced-pom.xml index 9c0d5a9d14..9821b26e6f 100644 --- a/poms/bom/src/main/generated/flattened-reduced-pom.xml +++ b/poms/bom/src/main/generated/flattened-reduced-pom.xml @@ -2629,12 +2629,6 @@ <groupId>org.apache.camel</groupId> <artifactId>camel-tika</artifactId> <version>4.9.0</version> - <exclusions> - <exclusion> - <groupId>org.apache.tika</groupId> - <artifactId>*</artifactId> - </exclusion> - </exclusions> </dependency> <dependency> <groupId>org.apache.camel</groupId> @@ -6850,22 +6844,6 @@ <artifactId>quarkus-mybatis-deployment</artifactId> <version>2.2.4</version> </dependency> - <dependency> - <groupId>io.quarkiverse.tika</groupId> - <artifactId>quarkus-tika</artifactId> - <version>2.0.4</version> - <exclusions> - <exclusion> - <groupId>commons-logging</groupId> - <artifactId>commons-logging</artifactId> - </exclusion> - </exclusions> - </dependency> - <dependency> - <groupId>io.quarkiverse.tika</groupId> - <artifactId>quarkus-tika-deployment</artifactId> - <version>2.0.4</version> - </dependency> <dependency> <groupId>io.reactivex.rxjava3</groupId> <artifactId>rxjava</artifactId> diff --git a/poms/bom/src/main/generated/flattened-reduced-verbose-pom.xml b/poms/bom/src/main/generated/flattened-reduced-verbose-pom.xml index 976917b3cb..77cca2c266 100644 --- a/poms/bom/src/main/generated/flattened-reduced-verbose-pom.xml +++ b/poms/bom/src/main/generated/flattened-reduced-verbose-pom.xml @@ -2629,12 +2629,6 @@ <groupId>org.apache.camel</groupId><!-- org.apache.camel.quarkus:camel-quarkus-bom:${project.version} --> <artifactId>camel-tika</artifactId><!-- org.apache.camel.quarkus:camel-quarkus-bom:${project.version} --> <version>4.9.0</version><!-- org.apache.camel.quarkus:camel-quarkus-bom:${project.version} --> - <exclusions> - <exclusion> - <groupId>org.apache.tika</groupId><!-- org.apache.camel.quarkus:camel-quarkus-bom:${project.version} --> - <artifactId>*</artifactId><!-- org.apache.camel.quarkus:camel-quarkus-bom:${project.version} --> - </exclusion> - </exclusions> </dependency> <dependency> <groupId>org.apache.camel</groupId><!-- org.apache.camel.quarkus:camel-quarkus-bom:${project.version} --> @@ -6850,22 +6844,6 @@ <artifactId>quarkus-mybatis-deployment</artifactId><!-- org.apache.camel.quarkus:camel-quarkus-bom:${project.version} --> <version>2.2.4</version><!-- org.apache.camel.quarkus:camel-quarkus-bom:${project.version} --> </dependency> - <dependency> - <groupId>io.quarkiverse.tika</groupId><!-- org.apache.camel.quarkus:camel-quarkus-bom:${project.version} --> - <artifactId>quarkus-tika</artifactId><!-- org.apache.camel.quarkus:camel-quarkus-bom:${project.version} --> - <version>2.0.4</version><!-- org.apache.camel.quarkus:camel-quarkus-bom:${project.version} --> - <exclusions> - <exclusion> - <groupId>commons-logging</groupId><!-- org.apache.camel.quarkus:camel-quarkus-bom:${project.version} --> - <artifactId>commons-logging</artifactId><!-- org.apache.camel.quarkus:camel-quarkus-bom:${project.version} --> - </exclusion> - </exclusions> - </dependency> - <dependency> - <groupId>io.quarkiverse.tika</groupId><!-- org.apache.camel.quarkus:camel-quarkus-bom:${project.version} --> - <artifactId>quarkus-tika-deployment</artifactId><!-- org.apache.camel.quarkus:camel-quarkus-bom:${project.version} --> - <version>2.0.4</version><!-- org.apache.camel.quarkus:camel-quarkus-bom:${project.version} --> - </dependency> <dependency> <groupId>io.reactivex.rxjava3</groupId><!-- org.apache.camel.quarkus:camel-quarkus-bom:${project.version} --> <artifactId>rxjava</artifactId><!-- org.apache.camel.quarkus:camel-quarkus-bom:${project.version} -->