commit:     f29cd9f1d4337cbf25b2224597c5366f5edbf4ce
Author:     Fabian Groffen <grobian <AT> gentoo <DOT> org>
AuthorDate: Wed Dec 31 20:14:34 2025 +0000
Commit:     Fabian Groffen <grobian <AT> gentoo <DOT> org>
CommitDate: Wed Dec 31 20:14:34 2025 +0000
URL:        https://gitweb.gentoo.org/proj/portage-utils.git/commit/?id=f29cd9f1

libq/file_magic: extract file type guessing from qmerge for reuse

Signed-off-by: Fabian Groffen <grobian <AT> gentoo.org>

 libq/Makefile.am  |   1 +
 libq/Makefile.in  |  38 ++++++++++++++-----
 libq/file_magic.c | 108 ++++++++++++++++++++++++++++++++++++++++++++++++++++++
 libq/file_magic.h |  25 +++++++++++++
 qmerge.c          |  72 ++++++++++++++----------------------
 5 files changed, 190 insertions(+), 54 deletions(-)

diff --git a/libq/Makefile.am b/libq/Makefile.am
index 2b66f2c..77ab467 100644
--- a/libq/Makefile.am
+++ b/libq/Makefile.am
@@ -8,6 +8,7 @@ QFILES = \
        copy_file.c copy_file.h \
        dep.c dep.h \
        eat_file.c eat_file.h \
+       file_magic.c file_magic.h \
        hash.c hash.h \
        human_readable.c human_readable.h \
        i18n.h \

diff --git a/libq/Makefile.in b/libq/Makefile.in
index 615bf2c..70695ba 100644
--- a/libq/Makefile.in
+++ b/libq/Makefile.in
@@ -300,15 +300,15 @@ libq_a_LIBADD =
 am__objects_1 = libq_a-atom.$(OBJEXT) libq_a-basename.$(OBJEXT) \
        libq_a-colors.$(OBJEXT) libq_a-contents.$(OBJEXT) \
        libq_a-copy_file.$(OBJEXT) libq_a-dep.$(OBJEXT) \
-       libq_a-eat_file.$(OBJEXT) libq_a-hash.$(OBJEXT) \
-       libq_a-human_readable.$(OBJEXT) libq_a-move_file.$(OBJEXT) \
-       libq_a-prelink.$(OBJEXT) libq_a-profile.$(OBJEXT) \
-       libq_a-rmspace.$(OBJEXT) libq_a-safe_io.$(OBJEXT) \
-       libq_a-scandirat.$(OBJEXT) libq_a-set.$(OBJEXT) \
-       libq_a-tree.$(OBJEXT) libq_a-xarray.$(OBJEXT) \
-       libq_a-xchdir.$(OBJEXT) libq_a-xmkdir.$(OBJEXT) \
-       libq_a-xpak.$(OBJEXT) libq_a-xregex.$(OBJEXT) \
-       libq_a-xsystem.$(OBJEXT)
+       libq_a-eat_file.$(OBJEXT) libq_a-file_magic.$(OBJEXT) \
+       libq_a-hash.$(OBJEXT) libq_a-human_readable.$(OBJEXT) \
+       libq_a-move_file.$(OBJEXT) libq_a-prelink.$(OBJEXT) \
+       libq_a-profile.$(OBJEXT) libq_a-rmspace.$(OBJEXT) \
+       libq_a-safe_io.$(OBJEXT) libq_a-scandirat.$(OBJEXT) \
+       libq_a-set.$(OBJEXT) libq_a-tree.$(OBJEXT) \
+       libq_a-xarray.$(OBJEXT) libq_a-xchdir.$(OBJEXT) \
+       libq_a-xmkdir.$(OBJEXT) libq_a-xpak.$(OBJEXT) \
+       libq_a-xregex.$(OBJEXT) libq_a-xsystem.$(OBJEXT)
 am_libq_a_OBJECTS = $(am__objects_1)
 libq_a_OBJECTS = $(am_libq_a_OBJECTS)
 AM_V_P = $(am__v_P_@AM_V@)
@@ -330,7 +330,7 @@ am__depfiles_remade = ./$(DEPDIR)/libq_a-atom.Po \
        ./$(DEPDIR)/libq_a-basename.Po ./$(DEPDIR)/libq_a-colors.Po \
        ./$(DEPDIR)/libq_a-contents.Po ./$(DEPDIR)/libq_a-copy_file.Po \
        ./$(DEPDIR)/libq_a-dep.Po ./$(DEPDIR)/libq_a-eat_file.Po \
-       ./$(DEPDIR)/libq_a-hash.Po \
+       ./$(DEPDIR)/libq_a-file_magic.Po ./$(DEPDIR)/libq_a-hash.Po \
        ./$(DEPDIR)/libq_a-human_readable.Po \
        ./$(DEPDIR)/libq_a-move_file.Po ./$(DEPDIR)/libq_a-prelink.Po \
        ./$(DEPDIR)/libq_a-profile.Po ./$(DEPDIR)/libq_a-rmspace.Po \
@@ -2078,6 +2078,7 @@ QFILES = \
        copy_file.c copy_file.h \
        dep.c dep.h \
        eat_file.c eat_file.h \
+       file_magic.c file_magic.h \
        hash.c hash.h \
        human_readable.c human_readable.h \
        i18n.h \
@@ -2162,6 +2163,7 @@ distclean-compile:
 @AMDEP_TRUE@@am__include@ 
@am__quote@./$(DEPDIR)/libq_a-copy_file.Po@am__quote@ # am--include-marker
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libq_a-dep.Po@am__quote@ # 
am--include-marker
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libq_a-eat_file.Po@am__quote@ 
# am--include-marker
+@AMDEP_TRUE@@am__include@ 
@am__quote@./$(DEPDIR)/libq_a-file_magic.Po@am__quote@ # am--include-marker
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libq_a-hash.Po@am__quote@ # 
am--include-marker
 @AMDEP_TRUE@@am__include@ 
@am__quote@./$(DEPDIR)/libq_a-human_readable.Po@am__quote@ # am--include-marker
 @AMDEP_TRUE@@am__include@ 
@am__quote@./$(DEPDIR)/libq_a-move_file.Po@am__quote@ # am--include-marker
@@ -2297,6 +2299,20 @@ libq_a-eat_file.obj: eat_file.c
 @AMDEP_TRUE@@am__fastdepCC_FALSE@      DEPDIR=$(DEPDIR) $(CCDEPMODE) 
$(depcomp) @AMDEPBACKSLASH@
 @am__fastdepCC_FALSE@  $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) 
$(INCLUDES) $(libq_a_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o 
libq_a-eat_file.obj `if test -f 'eat_file.c'; then $(CYGPATH_W) 'eat_file.c'; 
else $(CYGPATH_W) '$(srcdir)/eat_file.c'; fi`
 
+libq_a-file_magic.o: file_magic.c
+@am__fastdepCC_TRUE@   $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) 
$(libq_a_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT libq_a-file_magic.o 
-MD -MP -MF $(DEPDIR)/libq_a-file_magic.Tpo -c -o libq_a-file_magic.o `test -f 
'file_magic.c' || echo '$(srcdir)/'`file_magic.c
+@am__fastdepCC_TRUE@   $(AM_V_at)$(am__mv) $(DEPDIR)/libq_a-file_magic.Tpo 
$(DEPDIR)/libq_a-file_magic.Po
+@AMDEP_TRUE@@am__fastdepCC_FALSE@      $(AM_V_CC)source='file_magic.c' 
object='libq_a-file_magic.o' libtool=no @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCC_FALSE@      DEPDIR=$(DEPDIR) $(CCDEPMODE) 
$(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCC_FALSE@  $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) 
$(INCLUDES) $(libq_a_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o 
libq_a-file_magic.o `test -f 'file_magic.c' || echo '$(srcdir)/'`file_magic.c
+
+libq_a-file_magic.obj: file_magic.c
+@am__fastdepCC_TRUE@   $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) 
$(libq_a_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT libq_a-file_magic.obj 
-MD -MP -MF $(DEPDIR)/libq_a-file_magic.Tpo -c -o libq_a-file_magic.obj `if 
test -f 'file_magic.c'; then $(CYGPATH_W) 'file_magic.c'; else $(CYGPATH_W) 
'$(srcdir)/file_magic.c'; fi`
+@am__fastdepCC_TRUE@   $(AM_V_at)$(am__mv) $(DEPDIR)/libq_a-file_magic.Tpo 
$(DEPDIR)/libq_a-file_magic.Po
+@AMDEP_TRUE@@am__fastdepCC_FALSE@      $(AM_V_CC)source='file_magic.c' 
object='libq_a-file_magic.obj' libtool=no @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCC_FALSE@      DEPDIR=$(DEPDIR) $(CCDEPMODE) 
$(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCC_FALSE@  $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) 
$(INCLUDES) $(libq_a_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o 
libq_a-file_magic.obj `if test -f 'file_magic.c'; then $(CYGPATH_W) 
'file_magic.c'; else $(CYGPATH_W) '$(srcdir)/file_magic.c'; fi`
+
 libq_a-hash.o: hash.c
 @am__fastdepCC_TRUE@   $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) 
$(libq_a_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT libq_a-hash.o -MD -MP 
-MF $(DEPDIR)/libq_a-hash.Tpo -c -o libq_a-hash.o `test -f 'hash.c' || echo 
'$(srcdir)/'`hash.c
 @am__fastdepCC_TRUE@   $(AM_V_at)$(am__mv) $(DEPDIR)/libq_a-hash.Tpo 
$(DEPDIR)/libq_a-hash.Po
@@ -2652,6 +2668,7 @@ distclean: distclean-am
        -rm -f ./$(DEPDIR)/libq_a-copy_file.Po
        -rm -f ./$(DEPDIR)/libq_a-dep.Po
        -rm -f ./$(DEPDIR)/libq_a-eat_file.Po
+       -rm -f ./$(DEPDIR)/libq_a-file_magic.Po
        -rm -f ./$(DEPDIR)/libq_a-hash.Po
        -rm -f ./$(DEPDIR)/libq_a-human_readable.Po
        -rm -f ./$(DEPDIR)/libq_a-move_file.Po
@@ -2720,6 +2737,7 @@ maintainer-clean: maintainer-clean-am
        -rm -f ./$(DEPDIR)/libq_a-copy_file.Po
        -rm -f ./$(DEPDIR)/libq_a-dep.Po
        -rm -f ./$(DEPDIR)/libq_a-eat_file.Po
+       -rm -f ./$(DEPDIR)/libq_a-file_magic.Po
        -rm -f ./$(DEPDIR)/libq_a-hash.Po
        -rm -f ./$(DEPDIR)/libq_a-human_readable.Po
        -rm -f ./$(DEPDIR)/libq_a-move_file.Po

diff --git a/libq/file_magic.c b/libq/file_magic.c
new file mode 100644
index 0000000..0bffbe0
--- /dev/null
+++ b/libq/file_magic.c
@@ -0,0 +1,108 @@
+/*
+ * Copyright 2025 Gentoo Foundation
+ * Distributed under the terms of the GNU General Public License v2
+ *
+ * Copyright 2025-     Fabian Groffen  - <[email protected]>
+ */
+
+#include "main.h"
+
+#include <unistd.h>
+
+#include "file_magic.h"
+
+file_magic_type file_magic_guess_fd
+(
+       int fd
+)
+{
+       unsigned char   magic[257+6];
+       ssize_t         mlen;
+       file_magic_type ret   = FMAGIC_UNKNOWN;
+
+       /* using libmagic would probably be much more complete, but since we
+        * want to keep the dependencies minimal, we do some simple probing
+        * here ourselves */
+
+       /* bz2: 3-byte: 'B' 'Z' 'h'              at byte 0
+        * gz:  2-byte:  1f  8b                  at byte 0
+        * xz:  4-byte: '7' 'z' 'X' 'Z'          at byte 1
+        * tar: 6-byte: 'u' 's' 't' 'a' 'r' \0   at byte 257
+        * lz4: 4-byte:   4  22  4d  18          at byte 0
+        * zst: 4-byte: 22-28 b5 2f  fd          at byte 0
+        * lz:  4-byte: 'L' 'Z' 'I' 'P'          at byte 0
+        * lzo: 9-byte:  89 'L' 'Z' 'O' 0 d a 1a a at byte 0
+        * br:  Brotli is undetectcble */
+
+       if (fd < 0 ||
+               (mlen = read(fd, magic, sizeof(magic))) <= 0)
+       {
+               /* do nothing */
+               return ret;
+       } else if (mlen >= 3 &&
+                          magic[0] == 'B' &&
+                          magic[1] == 'Z' &&
+                          magic[2] == 'h')
+       {
+               ret = FMAGIC_BZIP2;
+       } else if (mlen >= 2 &&
+                          magic[0] == 037 &&
+                          magic[1] == 0213)
+       {
+               ret = FMAGIC_GZIP;
+       } else if (mlen >= 5 &&
+                          magic[1] == '7' &&
+                          magic[2] == 'z' &&
+                          magic[3] == 'X' &&
+                          magic[4] == 'Z')
+       {
+               ret = FMAGIC_XZ;
+       } else if (mlen == 257+6 &&
+                          magic[257] == 'u' &&
+                          magic[258] == 's' &&
+                          magic[259] == 't' &&
+                          magic[260] == 'a' &&
+                          magic[261] == 'r' &&
+                          (magic[262] == '\0' ||
+                               magic[262] == ' '))
+       {
+               ret = FMAGIC_TAR;
+       } else if (mlen >= 4 &&
+                          magic[0] == 0x04 &&
+                          magic[1] == 0x22 &&
+                          magic[2] == 0x4D &&
+                          magic[3] == 0x18)
+       {
+               ret = FMAGIC_LZ4;
+       } else if (mlen >= 4 &&
+                          magic[0] >= 0x22 &&
+                          magic[0] <= 0x28 &&
+                          magic[1] == 0xB5 &&
+                          magic[2] == 0x2F &&
+                          magic[3] == 0xFD)
+       {
+               ret = FMAGIC_ZSTD;
+       } else if (mlen >= 4 &&
+                          magic[0] == 'L' &&
+                          magic[1] == 'Z' &&
+                          magic[2] == 'I' &&
+                          magic[3] == 'P')
+       {
+               ret = FMAGIC_LZIP;
+       } else if (mlen >= 9 &&
+                          magic[0] == 0x89 &&
+                          magic[1] == 'L' &&
+                          magic[2] == 'Z' &&
+                          magic[3] == 'O' &&
+                          magic[4] == 0x00 &&
+                          magic[5] == 0x0D &&
+                          magic[6] == 0x0A &&
+                          magic[7] == 0x1A &&
+                          magic[8] == 0x0A)
+       {
+               ret = FMAGIC_LZO;
+       }
+
+       lseek(fd, SEEK_CUR, (off_t)-mlen);
+       return ret;
+}

diff --git a/libq/file_magic.h b/libq/file_magic.h
new file mode 100644
index 0000000..8fdecf1
--- /dev/null
+++ b/libq/file_magic.h
@@ -0,0 +1,25 @@
+/*
+ * Copyright 2025 Gentoo Foundation
+ * Distributed under the terms of the GNU General Public License v2
+ *
+ * Copyright 2025-     Fabian Groffen  - <[email protected]>
+ */
+
+#ifndef _FILE_MAGIC_H
+#define _FILE_MAGIC_H 1
+
+typedef enum _file_magic_type {
+       FMAGIC_UNKNOWN = 0,
+       FMAGIC_BZIP2,
+       FMAGIC_GZIP,
+       FMAGIC_XZ,
+       FMAGIC_LZ4,
+       FMAGIC_ZSTD,
+       FMAGIC_LZIP,
+       FMAGIC_LZO,
+       FMAGIC_TAR
+} file_magic_type;
+
+file_magic_type file_magic_guess_fd(int fd);
+
+#endif

diff --git a/qmerge.c b/qmerge.c
index 12614ac..9c57897 100644
--- a/qmerge.c
+++ b/qmerge.c
@@ -31,6 +31,7 @@
 #include "move_file.h"
 #include "contents.h"
 #include "eat_file.h"
+#include "file_magic.h"
 #include "hash.h"
 #include "human_readable.h"
 #include "profile.h"
@@ -1373,8 +1374,7 @@ pkg_merge(int level, const depend_atom *qatom, const 
tree_match_ctx *mpkg)
 #endif
        } else {
                int vdbfd;
-               unsigned char magic[257+6];
-               FILE *mfd;
+               int mfd;
                FILE *tarpipe;
                FILE *tbz2f;
                unsigned char iobuf[8192];
@@ -1383,6 +1383,7 @@ pkg_merge(int level, const depend_atom *qatom, const 
tree_match_ctx *mpkg)
                size_t n;
                size_t rd;
                size_t wr;
+               file_magic_type fmt;
 
                tbz2size = 0;
                if ((vdbfd = open("vdb", O_RDONLY)) == -1)
@@ -1407,42 +1408,29 @@ pkg_merge(int level, const depend_atom *qatom, const 
tree_match_ctx *mpkg)
                 * lzo: 9-byte:  89 'L' 'Z' 'O' 0 d a 1a a at byte 0
                 * br:  anything else */
 
-               compr = "brotli -dc"; /* default: brotli; has no magic header */
-               mfd = fopen(mpkg->path, "r");
-               if (mfd != NULL) {
-                       size_t mlen = fread(magic, 1, sizeof(magic), mfd);
-                       fclose(mfd);
+               mfd = open(mpkg->path, O_RDONLY);
+               fmt = file_magic_guess_fd(mfd);
+               if (mfd >= 0)
+                       close(mfd);
 
-                       if (mlen >= 3 && magic[0] == 'B' && magic[1] == 'Z' &&
-                                       magic[2] == 'h')
-                       {
+               compr = "brotli -dc"; /* default: brotli; has no magic header */
+               switch (fmt) {
+                       case FMAGIC_BZIP2:
                                compr = "bzip2 -dc";
-                       } else if (mlen >= 2 &&
-                                       magic[0] == 037 && magic[1] == 0213)
-                       {
+                               break;
+                       case FMAGIC_GZIP:
                                compr = "gzip -dc";
-                       } else if (mlen >= 5 &&
-                                       magic[1] == '7' && magic[2] == 'z' &&
-                                       magic[3] == 'X' && magic[4] == 'Z')
-                       {
+                               break;
+                       case FMAGIC_XZ:
                                compr = "xz -dc";
-                       } else if (mlen == 257+6 &&
-                                       magic[257] == 'u' && magic[258] == 's' 
&&
-                                       magic[259] == 't' && magic[260] == 'a' 
&&
-                                       magic[261] == 'r' &&
-                                       (magic[262] == '\0' || magic[262] == ' 
'))
-                       {
+                               break;
+                       case FMAGIC_TAR:
                                compr = "";
-                       } else if (mlen >= 4 &&
-                                       magic[0] == 0x04 && magic[1] == 0x22 &&
-                                       magic[2] == 0x4D && magic[3] == 0x18)
-                       {
+                               break;
+                       case FMAGIC_LZ4:
                                compr = "lz4 -dc";
-                       } else if (mlen >= 4 &&
-                                       magic[0] >= 0x22 && magic[0] <= 0x28 &&
-                                       magic[1] == 0xB5 && magic[2] == 0x2F &&
-                                       magic[3] == 0xFD)
-                       {
+                               break;
+                       case FMAGIC_ZSTD:
                                /*
                                 * --long=31 is needed to uncompress files 
compressed with
                                 * --long=xx where xx>27. The option is "safe" 
in the sense
@@ -1462,20 +1450,16 @@ pkg_merge(int level, const depend_atom *qatom, const 
tree_match_ctx *mpkg)
                                /* If really tar -I would be used we would have 
to quote:
                                 * compr = "I \"zstd --long=31\"";
                                 * But actually we use a pipe (see below) */
-                       } else if (mlen >= 4 &&
-                                       magic[0] == 'L' && magic[1] == 'Z' &&
-                                       magic[2] == 'I' && magic[3] == 'P')
-                       {
+                               break;
+                       case FMAGIC_LZIP:
                                compr = "lzip -dc";
-                       } else if (mlen >= 9 &&
-                                       magic[0] == 0x89 && magic[1] == 'L' &&
-                                       magic[2] == 'Z' && magic[3] == 'O' &&
-                                       magic[4] == 0x00 && magic[5] == 0x0D &&
-                                       magic[6] == 0x0A && magic[7] == 0x1A &&
-                                       magic[8] == 0x0A)
-                       {
+                               break;
+                       case FMAGIC_LZO:
                                compr = "lzop -dc";
-                       }
+                               break;
+                       default:
+                               warn("unhandled compression type, please file a 
bug");
+                               break;
                }
 
                /* extract the binary package data */

Reply via email to