commit: f29cd9f1d4337cbf25b2224597c5366f5edbf4ce
Author: Fabian Groffen <grobian <AT> gentoo <DOT> org>
AuthorDate: Wed Dec 31 20:14:34 2025 +0000
Commit: Fabian Groffen <grobian <AT> gentoo <DOT> org>
CommitDate: Wed Dec 31 20:14:34 2025 +0000
URL: https://gitweb.gentoo.org/proj/portage-utils.git/commit/?id=f29cd9f1
libq/file_magic: extract file type guessing from qmerge for reuse
Signed-off-by: Fabian Groffen <grobian <AT> gentoo.org>
libq/Makefile.am | 1 +
libq/Makefile.in | 38 ++++++++++++++-----
libq/file_magic.c | 108 ++++++++++++++++++++++++++++++++++++++++++++++++++++++
libq/file_magic.h | 25 +++++++++++++
qmerge.c | 72 ++++++++++++++----------------------
5 files changed, 190 insertions(+), 54 deletions(-)
diff --git a/libq/Makefile.am b/libq/Makefile.am
index 2b66f2c..77ab467 100644
--- a/libq/Makefile.am
+++ b/libq/Makefile.am
@@ -8,6 +8,7 @@ QFILES = \
copy_file.c copy_file.h \
dep.c dep.h \
eat_file.c eat_file.h \
+ file_magic.c file_magic.h \
hash.c hash.h \
human_readable.c human_readable.h \
i18n.h \
diff --git a/libq/Makefile.in b/libq/Makefile.in
index 615bf2c..70695ba 100644
--- a/libq/Makefile.in
+++ b/libq/Makefile.in
@@ -300,15 +300,15 @@ libq_a_LIBADD =
am__objects_1 = libq_a-atom.$(OBJEXT) libq_a-basename.$(OBJEXT) \
libq_a-colors.$(OBJEXT) libq_a-contents.$(OBJEXT) \
libq_a-copy_file.$(OBJEXT) libq_a-dep.$(OBJEXT) \
- libq_a-eat_file.$(OBJEXT) libq_a-hash.$(OBJEXT) \
- libq_a-human_readable.$(OBJEXT) libq_a-move_file.$(OBJEXT) \
- libq_a-prelink.$(OBJEXT) libq_a-profile.$(OBJEXT) \
- libq_a-rmspace.$(OBJEXT) libq_a-safe_io.$(OBJEXT) \
- libq_a-scandirat.$(OBJEXT) libq_a-set.$(OBJEXT) \
- libq_a-tree.$(OBJEXT) libq_a-xarray.$(OBJEXT) \
- libq_a-xchdir.$(OBJEXT) libq_a-xmkdir.$(OBJEXT) \
- libq_a-xpak.$(OBJEXT) libq_a-xregex.$(OBJEXT) \
- libq_a-xsystem.$(OBJEXT)
+ libq_a-eat_file.$(OBJEXT) libq_a-file_magic.$(OBJEXT) \
+ libq_a-hash.$(OBJEXT) libq_a-human_readable.$(OBJEXT) \
+ libq_a-move_file.$(OBJEXT) libq_a-prelink.$(OBJEXT) \
+ libq_a-profile.$(OBJEXT) libq_a-rmspace.$(OBJEXT) \
+ libq_a-safe_io.$(OBJEXT) libq_a-scandirat.$(OBJEXT) \
+ libq_a-set.$(OBJEXT) libq_a-tree.$(OBJEXT) \
+ libq_a-xarray.$(OBJEXT) libq_a-xchdir.$(OBJEXT) \
+ libq_a-xmkdir.$(OBJEXT) libq_a-xpak.$(OBJEXT) \
+ libq_a-xregex.$(OBJEXT) libq_a-xsystem.$(OBJEXT)
am_libq_a_OBJECTS = $(am__objects_1)
libq_a_OBJECTS = $(am_libq_a_OBJECTS)
AM_V_P = $(am__v_P_@AM_V@)
@@ -330,7 +330,7 @@ am__depfiles_remade = ./$(DEPDIR)/libq_a-atom.Po \
./$(DEPDIR)/libq_a-basename.Po ./$(DEPDIR)/libq_a-colors.Po \
./$(DEPDIR)/libq_a-contents.Po ./$(DEPDIR)/libq_a-copy_file.Po \
./$(DEPDIR)/libq_a-dep.Po ./$(DEPDIR)/libq_a-eat_file.Po \
- ./$(DEPDIR)/libq_a-hash.Po \
+ ./$(DEPDIR)/libq_a-file_magic.Po ./$(DEPDIR)/libq_a-hash.Po \
./$(DEPDIR)/libq_a-human_readable.Po \
./$(DEPDIR)/libq_a-move_file.Po ./$(DEPDIR)/libq_a-prelink.Po \
./$(DEPDIR)/libq_a-profile.Po ./$(DEPDIR)/libq_a-rmspace.Po \
@@ -2078,6 +2078,7 @@ QFILES = \
copy_file.c copy_file.h \
dep.c dep.h \
eat_file.c eat_file.h \
+ file_magic.c file_magic.h \
hash.c hash.h \
human_readable.c human_readable.h \
i18n.h \
@@ -2162,6 +2163,7 @@ distclean-compile:
@AMDEP_TRUE@@am__include@
@am__quote@./$(DEPDIR)/libq_a-copy_file.Po@am__quote@ # am--include-marker
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libq_a-dep.Po@am__quote@ #
am--include-marker
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libq_a-eat_file.Po@am__quote@
# am--include-marker
+@AMDEP_TRUE@@am__include@
@am__quote@./$(DEPDIR)/libq_a-file_magic.Po@am__quote@ # am--include-marker
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libq_a-hash.Po@am__quote@ #
am--include-marker
@AMDEP_TRUE@@am__include@
@am__quote@./$(DEPDIR)/libq_a-human_readable.Po@am__quote@ # am--include-marker
@AMDEP_TRUE@@am__include@
@am__quote@./$(DEPDIR)/libq_a-move_file.Po@am__quote@ # am--include-marker
@@ -2297,6 +2299,20 @@ libq_a-eat_file.obj: eat_file.c
@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE)
$(depcomp) @AMDEPBACKSLASH@
@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES)
$(INCLUDES) $(libq_a_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o
libq_a-eat_file.obj `if test -f 'eat_file.c'; then $(CYGPATH_W) 'eat_file.c';
else $(CYGPATH_W) '$(srcdir)/eat_file.c'; fi`
+libq_a-file_magic.o: file_magic.c
+@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES)
$(libq_a_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT libq_a-file_magic.o
-MD -MP -MF $(DEPDIR)/libq_a-file_magic.Tpo -c -o libq_a-file_magic.o `test -f
'file_magic.c' || echo '$(srcdir)/'`file_magic.c
+@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/libq_a-file_magic.Tpo
$(DEPDIR)/libq_a-file_magic.Po
+@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='file_magic.c'
object='libq_a-file_magic.o' libtool=no @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE)
$(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES)
$(INCLUDES) $(libq_a_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o
libq_a-file_magic.o `test -f 'file_magic.c' || echo '$(srcdir)/'`file_magic.c
+
+libq_a-file_magic.obj: file_magic.c
+@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES)
$(libq_a_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT libq_a-file_magic.obj
-MD -MP -MF $(DEPDIR)/libq_a-file_magic.Tpo -c -o libq_a-file_magic.obj `if
test -f 'file_magic.c'; then $(CYGPATH_W) 'file_magic.c'; else $(CYGPATH_W)
'$(srcdir)/file_magic.c'; fi`
+@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/libq_a-file_magic.Tpo
$(DEPDIR)/libq_a-file_magic.Po
+@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='file_magic.c'
object='libq_a-file_magic.obj' libtool=no @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE)
$(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES)
$(INCLUDES) $(libq_a_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o
libq_a-file_magic.obj `if test -f 'file_magic.c'; then $(CYGPATH_W)
'file_magic.c'; else $(CYGPATH_W) '$(srcdir)/file_magic.c'; fi`
+
libq_a-hash.o: hash.c
@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES)
$(libq_a_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT libq_a-hash.o -MD -MP
-MF $(DEPDIR)/libq_a-hash.Tpo -c -o libq_a-hash.o `test -f 'hash.c' || echo
'$(srcdir)/'`hash.c
@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/libq_a-hash.Tpo
$(DEPDIR)/libq_a-hash.Po
@@ -2652,6 +2668,7 @@ distclean: distclean-am
-rm -f ./$(DEPDIR)/libq_a-copy_file.Po
-rm -f ./$(DEPDIR)/libq_a-dep.Po
-rm -f ./$(DEPDIR)/libq_a-eat_file.Po
+ -rm -f ./$(DEPDIR)/libq_a-file_magic.Po
-rm -f ./$(DEPDIR)/libq_a-hash.Po
-rm -f ./$(DEPDIR)/libq_a-human_readable.Po
-rm -f ./$(DEPDIR)/libq_a-move_file.Po
@@ -2720,6 +2737,7 @@ maintainer-clean: maintainer-clean-am
-rm -f ./$(DEPDIR)/libq_a-copy_file.Po
-rm -f ./$(DEPDIR)/libq_a-dep.Po
-rm -f ./$(DEPDIR)/libq_a-eat_file.Po
+ -rm -f ./$(DEPDIR)/libq_a-file_magic.Po
-rm -f ./$(DEPDIR)/libq_a-hash.Po
-rm -f ./$(DEPDIR)/libq_a-human_readable.Po
-rm -f ./$(DEPDIR)/libq_a-move_file.Po
diff --git a/libq/file_magic.c b/libq/file_magic.c
new file mode 100644
index 0000000..0bffbe0
--- /dev/null
+++ b/libq/file_magic.c
@@ -0,0 +1,108 @@
+/*
+ * Copyright 2025 Gentoo Foundation
+ * Distributed under the terms of the GNU General Public License v2
+ *
+ * Copyright 2025- Fabian Groffen - <[email protected]>
+ */
+
+#include "main.h"
+
+#include <unistd.h>
+
+#include "file_magic.h"
+
+file_magic_type file_magic_guess_fd
+(
+ int fd
+)
+{
+ unsigned char magic[257+6];
+ ssize_t mlen;
+ file_magic_type ret = FMAGIC_UNKNOWN;
+
+ /* using libmagic would probably be much more complete, but since we
+ * want to keep the dependencies minimal, we do some simple probing
+ * here ourselves */
+
+ /* bz2: 3-byte: 'B' 'Z' 'h' at byte 0
+ * gz: 2-byte: 1f 8b at byte 0
+ * xz: 4-byte: '7' 'z' 'X' 'Z' at byte 1
+ * tar: 6-byte: 'u' 's' 't' 'a' 'r' \0 at byte 257
+ * lz4: 4-byte: 4 22 4d 18 at byte 0
+ * zst: 4-byte: 22-28 b5 2f fd at byte 0
+ * lz: 4-byte: 'L' 'Z' 'I' 'P' at byte 0
+ * lzo: 9-byte: 89 'L' 'Z' 'O' 0 d a 1a a at byte 0
+ * br: Brotli is undetectcble */
+
+ if (fd < 0 ||
+ (mlen = read(fd, magic, sizeof(magic))) <= 0)
+ {
+ /* do nothing */
+ return ret;
+ } else if (mlen >= 3 &&
+ magic[0] == 'B' &&
+ magic[1] == 'Z' &&
+ magic[2] == 'h')
+ {
+ ret = FMAGIC_BZIP2;
+ } else if (mlen >= 2 &&
+ magic[0] == 037 &&
+ magic[1] == 0213)
+ {
+ ret = FMAGIC_GZIP;
+ } else if (mlen >= 5 &&
+ magic[1] == '7' &&
+ magic[2] == 'z' &&
+ magic[3] == 'X' &&
+ magic[4] == 'Z')
+ {
+ ret = FMAGIC_XZ;
+ } else if (mlen == 257+6 &&
+ magic[257] == 'u' &&
+ magic[258] == 's' &&
+ magic[259] == 't' &&
+ magic[260] == 'a' &&
+ magic[261] == 'r' &&
+ (magic[262] == '\0' ||
+ magic[262] == ' '))
+ {
+ ret = FMAGIC_TAR;
+ } else if (mlen >= 4 &&
+ magic[0] == 0x04 &&
+ magic[1] == 0x22 &&
+ magic[2] == 0x4D &&
+ magic[3] == 0x18)
+ {
+ ret = FMAGIC_LZ4;
+ } else if (mlen >= 4 &&
+ magic[0] >= 0x22 &&
+ magic[0] <= 0x28 &&
+ magic[1] == 0xB5 &&
+ magic[2] == 0x2F &&
+ magic[3] == 0xFD)
+ {
+ ret = FMAGIC_ZSTD;
+ } else if (mlen >= 4 &&
+ magic[0] == 'L' &&
+ magic[1] == 'Z' &&
+ magic[2] == 'I' &&
+ magic[3] == 'P')
+ {
+ ret = FMAGIC_LZIP;
+ } else if (mlen >= 9 &&
+ magic[0] == 0x89 &&
+ magic[1] == 'L' &&
+ magic[2] == 'Z' &&
+ magic[3] == 'O' &&
+ magic[4] == 0x00 &&
+ magic[5] == 0x0D &&
+ magic[6] == 0x0A &&
+ magic[7] == 0x1A &&
+ magic[8] == 0x0A)
+ {
+ ret = FMAGIC_LZO;
+ }
+
+ lseek(fd, SEEK_CUR, (off_t)-mlen);
+ return ret;
+}
diff --git a/libq/file_magic.h b/libq/file_magic.h
new file mode 100644
index 0000000..8fdecf1
--- /dev/null
+++ b/libq/file_magic.h
@@ -0,0 +1,25 @@
+/*
+ * Copyright 2025 Gentoo Foundation
+ * Distributed under the terms of the GNU General Public License v2
+ *
+ * Copyright 2025- Fabian Groffen - <[email protected]>
+ */
+
+#ifndef _FILE_MAGIC_H
+#define _FILE_MAGIC_H 1
+
+typedef enum _file_magic_type {
+ FMAGIC_UNKNOWN = 0,
+ FMAGIC_BZIP2,
+ FMAGIC_GZIP,
+ FMAGIC_XZ,
+ FMAGIC_LZ4,
+ FMAGIC_ZSTD,
+ FMAGIC_LZIP,
+ FMAGIC_LZO,
+ FMAGIC_TAR
+} file_magic_type;
+
+file_magic_type file_magic_guess_fd(int fd);
+
+#endif
diff --git a/qmerge.c b/qmerge.c
index 12614ac..9c57897 100644
--- a/qmerge.c
+++ b/qmerge.c
@@ -31,6 +31,7 @@
#include "move_file.h"
#include "contents.h"
#include "eat_file.h"
+#include "file_magic.h"
#include "hash.h"
#include "human_readable.h"
#include "profile.h"
@@ -1373,8 +1374,7 @@ pkg_merge(int level, const depend_atom *qatom, const
tree_match_ctx *mpkg)
#endif
} else {
int vdbfd;
- unsigned char magic[257+6];
- FILE *mfd;
+ int mfd;
FILE *tarpipe;
FILE *tbz2f;
unsigned char iobuf[8192];
@@ -1383,6 +1383,7 @@ pkg_merge(int level, const depend_atom *qatom, const
tree_match_ctx *mpkg)
size_t n;
size_t rd;
size_t wr;
+ file_magic_type fmt;
tbz2size = 0;
if ((vdbfd = open("vdb", O_RDONLY)) == -1)
@@ -1407,42 +1408,29 @@ pkg_merge(int level, const depend_atom *qatom, const
tree_match_ctx *mpkg)
* lzo: 9-byte: 89 'L' 'Z' 'O' 0 d a 1a a at byte 0
* br: anything else */
- compr = "brotli -dc"; /* default: brotli; has no magic header */
- mfd = fopen(mpkg->path, "r");
- if (mfd != NULL) {
- size_t mlen = fread(magic, 1, sizeof(magic), mfd);
- fclose(mfd);
+ mfd = open(mpkg->path, O_RDONLY);
+ fmt = file_magic_guess_fd(mfd);
+ if (mfd >= 0)
+ close(mfd);
- if (mlen >= 3 && magic[0] == 'B' && magic[1] == 'Z' &&
- magic[2] == 'h')
- {
+ compr = "brotli -dc"; /* default: brotli; has no magic header */
+ switch (fmt) {
+ case FMAGIC_BZIP2:
compr = "bzip2 -dc";
- } else if (mlen >= 2 &&
- magic[0] == 037 && magic[1] == 0213)
- {
+ break;
+ case FMAGIC_GZIP:
compr = "gzip -dc";
- } else if (mlen >= 5 &&
- magic[1] == '7' && magic[2] == 'z' &&
- magic[3] == 'X' && magic[4] == 'Z')
- {
+ break;
+ case FMAGIC_XZ:
compr = "xz -dc";
- } else if (mlen == 257+6 &&
- magic[257] == 'u' && magic[258] == 's'
&&
- magic[259] == 't' && magic[260] == 'a'
&&
- magic[261] == 'r' &&
- (magic[262] == '\0' || magic[262] == '
'))
- {
+ break;
+ case FMAGIC_TAR:
compr = "";
- } else if (mlen >= 4 &&
- magic[0] == 0x04 && magic[1] == 0x22 &&
- magic[2] == 0x4D && magic[3] == 0x18)
- {
+ break;
+ case FMAGIC_LZ4:
compr = "lz4 -dc";
- } else if (mlen >= 4 &&
- magic[0] >= 0x22 && magic[0] <= 0x28 &&
- magic[1] == 0xB5 && magic[2] == 0x2F &&
- magic[3] == 0xFD)
- {
+ break;
+ case FMAGIC_ZSTD:
/*
* --long=31 is needed to uncompress files
compressed with
* --long=xx where xx>27. The option is "safe"
in the sense
@@ -1462,20 +1450,16 @@ pkg_merge(int level, const depend_atom *qatom, const
tree_match_ctx *mpkg)
/* If really tar -I would be used we would have
to quote:
* compr = "I \"zstd --long=31\"";
* But actually we use a pipe (see below) */
- } else if (mlen >= 4 &&
- magic[0] == 'L' && magic[1] == 'Z' &&
- magic[2] == 'I' && magic[3] == 'P')
- {
+ break;
+ case FMAGIC_LZIP:
compr = "lzip -dc";
- } else if (mlen >= 9 &&
- magic[0] == 0x89 && magic[1] == 'L' &&
- magic[2] == 'Z' && magic[3] == 'O' &&
- magic[4] == 0x00 && magic[5] == 0x0D &&
- magic[6] == 0x0A && magic[7] == 0x1A &&
- magic[8] == 0x0A)
- {
+ break;
+ case FMAGIC_LZO:
compr = "lzop -dc";
- }
+ break;
+ default:
+ warn("unhandled compression type, please file a
bug");
+ break;
}
/* extract the binary package data */