Github user markap14 commented on a diff in the pull request:
https://github.com/apache/incubator-nifi/pull/27#discussion_r24956244
--- Diff:
nifi/nifi-nar-bundles/nifi-standard-bundle/nifi-standard-processors/src/main/java/org/apache/nifi/processors/standard/IdentifyMimeType.java
---
@@ -327,148 +168,41 @@ public void process(final InputStream in) throws
IOException {
session.transfer(flowFile, REL_SUCCESS);
}
- private static interface ContentScanningMimeTypeIdentifier {
-
- boolean isEnabled(ProcessContext context);
-
- String getMimeType(InputStream in) throws IOException;
- }
-
- private static class ZipIdentifier implements
ContentScanningMimeTypeIdentifier {
-
- @Override
- public String getMimeType(final InputStream in) throws IOException
{
- final ZipInputStream zipIn = new ZipInputStream(in);
- try {
- if (zipIn.getNextEntry() != null) {
- return "application/zip";
- }
- } catch (final Exception e) {
- }
- return null;
- }
-
- @Override
- public boolean isEnabled(final ProcessContext context) {
- return context.getProperty(IDENTIFY_ZIP).asBoolean();
- }
- }
-
- private static class TarIdentifier implements
ContentScanningMimeTypeIdentifier {
-
- @Override
- public String getMimeType(final InputStream in) throws IOException
{
- try (final TarArchiveInputStream tarIn = new
TarArchiveInputStream(in)) {
- final TarArchiveEntry firstEntry = tarIn.getNextTarEntry();
- if (firstEntry != null) {
- if
(firstEntry.getName().equals(FlowFilePackagerV1.FILENAME_ATTRIBUTES)) {
- final TarArchiveEntry secondEntry =
tarIn.getNextTarEntry();
- if (secondEntry != null &&
secondEntry.getName().equals(FlowFilePackagerV1.FILENAME_CONTENT)) {
- return "application/flowfile-v1";
- }
- }
- return "application/tar";
- }
- } catch (final Exception e) {
- }
- return null;
- }
-
- @Override
- public boolean isEnabled(final ProcessContext context) {
- return context.getProperty(IDENTIFY_TAR).asBoolean();
- }
+ private Detector getFlowFileV3Detector() {
+ return new MagicDetector(FLOWFILE_V3,
FlowFilePackagerV3.MAGIC_HEADER);
}
- private static interface MagicHeader {
-
- int getRequiredBufferLength();
-
- String getMimeType();
-
- boolean matches(final byte[] header);
+ private Detector getFlowFileV1Detector() {
+ return new FlowFileV1Detector();
}
- private static class SimpleMagicHeader implements MagicHeader {
-
- private final String mimeType;
- private final int offset;
- private final byte[] byteSequence;
-
- public SimpleMagicHeader(final String mimeType, final byte[]
byteSequence) {
- this(mimeType, byteSequence, 0);
- }
-
- public SimpleMagicHeader(final String mimeType, final byte[]
byteSequence, final int offset) {
- this.mimeType = mimeType;
- this.byteSequence = byteSequence;
- this.offset = offset;
- }
-
- @Override
- public int getRequiredBufferLength() {
- return byteSequence.length + offset;
- }
-
- @Override
- public String getMimeType() {
- return mimeType;
- }
+ private class FlowFileV1Detector implements Detector {
@Override
- public boolean matches(final byte[] header) {
- if (header.length < getRequiredBufferLength()) {
- return false;
+ public MediaType detect(InputStream in, Metadata mtdt) throws
IOException {
+ // Sanity check the stream. This may not be a tarfile at all
+ in.mark(FlowFilePackagerV1.FILENAME_ATTRIBUTES.length());
--- End diff --
wow good call on the first 100 bytes being the filename -- I looked up the
tar format to see if that was indeed the case but found this big, verbose,
confusing explanation of the header that I didn't understand -- should have
tried wikipedia first. I am sorry that I doubted you :)
---
If your project is set up for it, you can reply to this email and have your
reply appear on GitHub as well. If your project does not have this feature
enabled and wishes so, or if the feature is enabled but not working, please
contact infrastructure at [email protected] or file a JIRA ticket
with INFRA.
---