I've been sitting on this for a few months expecting to finish it, but
still haven't found the time, so I'm just putting it out there now so
it isn't lost entirely. It's originally from Chandrakumar Muthaiah:
http://article.gmane.org/gmane.comp.gnu.coreutils.bugs/15684
I applied it to the latest git, added some tests, and cleaned it up a bit.
diff --git a/src/split.c b/src/split.c
index 3a630a0..8dd1571 100644
--- a/src/split.c
+++ b/src/split.c
@@ -13,7 +13,7 @@
 
    You should have received a copy of the GNU General Public License
    along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
-
+
 /* By [email protected], with rms.
 
    To do:
@@ -26,6 +26,7 @@
 #include <stdio.h>
 #include <getopt.h>
 #include <sys/types.h>
+#include <sys/wait.h>
 
 #include "system.h"
 #include "error.h"
@@ -84,6 +85,10 @@ enum Split_type
   type_undef, type_bytes, type_byteslines, type_lines, type_digits,
   type_chunk_bytes, type_chunk_lines, type_rr
 };
+ 
+/* gzip/bzip2 the output file. */
+static char *outhook = NULL;
+static char *outhooksuf = NULL;
 
 /* For long options that have no equivalent short option, use a
    non-character as a pseudo short option, starting with CHAR_MAX + 1.  */
@@ -106,6 +111,8 @@ static struct option const longopts[] =
   {"verbose", no_argument, NULL, VERBOSE_OPTION},
   {"-io-blksize", required_argument, NULL,
     IO_BLKSIZE_OPTION}, /* do not document */
+  {"on-output-hook", required_argument, NULL, 'k'},
+  {"on-output-hook-suffix", required_argument, NULL, 's'},
   {GETOPT_HELP_OPTION_DECL},
   {GETOPT_VERSION_OPTION_DECL},
   {NULL, 0, NULL, 0}
@@ -173,6 +180,8 @@ Mandatory arguments to long options are mandatory for short options too.\n\
   -l, --lines=NUMBER      put NUMBER lines per output file\n\
   -n, --number=CHUNKS     generate CHUNKS output files.  See below\n\
   -u, --unbuffered        immediately copy input to output with `-n r/...'\n\
+  -k, --on-output-hook     custom output compression hook command \n\
+  -s, --on-output-hook-suffix  custom compresion output extension\n\
 "), DEFAULT_SUFFIX_LENGTH);
       fputs (_("\
       --verbose           print a diagnostic just before each\n\
@@ -262,6 +271,93 @@ create (const char* name)
                (S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP | S_IROTH | S_IWOTH));
 }
 
+/* Opens a new fd based on the file type selection */
+
+static int
+fdpopen ( const char *command, const char *mode)
+{
+  int parent_end, child_end;
+  int pipe_fds[2];
+  pid_t child_pid;
+
+  int do_write = 0;
+  int do_cloexec = 0;
+
+  while (*mode != '\0')
+    {
+      switch (*mode++)
+        {
+        case 'w':
+          do_write = 1;
+          break;
+        case 'e':
+          do_cloexec = 1;
+          break;
+        default:
+          errno = EINVAL;
+          return -1;
+        }
+    }
+
+  if (pipe (pipe_fds) < 0)
+    return -1;
+
+  parent_end = pipe_fds[1];
+  child_end = pipe_fds[0];
+  child_pid = fork ();
+
+  if (child_pid == 0)
+    {
+      int child_std_end = 0; /* Make this as the stdin file descriptor */
+      close (parent_end);
+
+      if (child_end != child_std_end)
+        {
+          dup2 (child_end, child_std_end);
+          close (child_end);
+        }
+
+      /* TODO: get rid of this sh, pipe to the output in C */
+      execl ("/bin/sh", "sh", "-c", command, (char *) 0);
+      _exit (127);
+    }
+
+  close (child_end);
+  if (child_pid < 0)
+    {
+      close (parent_end);
+      return -1;
+    }
+
+  if (do_cloexec)
+    fcntl (parent_end, F_SETFD, FD_CLOEXEC);
+
+  return parent_end;
+}
+
+static void
+new_fd_pipe ()
+{
+  /* 'gzip < /1/2/3/4/5/outputfile.gz' */
+
+  size_t outzlength = strlen (outhook);
+  size_t outlength  = strlen (outfile);
+  size_t suflength  = strlen (outhooksuf);
+  size_t tlength    = outzlength + outlength + suflength + 3;
+
+  char* outfilez    = xmalloc (tlength + 1);
+
+  snprintf (outfilez, tlength + 1, "%s > %s%s", outhook, outfile, outhooksuf);
+
+  if (verbose)
+    fprintf (stdout, _("opening command %s\n"), quote (outfilez));
+
+  output_desc = fdpopen ( outfilez, "we");
+
+  if (output_desc < 0)
+    error (EXIT_FAILURE, errno, "%s", outfilez);
+}
+
 /* Write BYTES bytes at BP to an output file.
    If NEW_FILE_FLAG is true, open the next output file.
    Otherwise add to the same output file already in use.  */
@@ -275,10 +371,22 @@ cwrite (bool new_file_flag, const char *bp, size_t bytes)
         return;
       if (output_desc >= 0 && close (output_desc) < 0)
         error (EXIT_FAILURE, errno, "%s", outfile);
+
       next_file_name ();
-      if ((output_desc = create (outfile)) < 0)
-        error (EXIT_FAILURE, errno, "%s", outfile);
+
+      if (outhook)
+        {
+          int status;
+          waitpid (-1, &status, 0); /* FIXME: check status */
+          new_fd_pipe ();
+        }
+      else
+        {
+	  if ((output_desc = create (outfile)) < 0)
+	    error (EXIT_FAILURE, errno, "%s", outfile);
+        }
     }
+
   if (full_write (output_desc, bp, bytes) != bytes)
     error (EXIT_FAILURE, errno, "%s", outfile);
 }
@@ -824,7 +932,7 @@ main (int argc, char **argv)
       int this_optind = optind ? optind : 1;
       char *slash;
 
-      c = getopt_long (argc, argv, "0123456789C:a:b:del:n:u", longopts, NULL);
+      c = getopt_long (argc, argv, "0123456789C:a:b:dek:l:n:s:u", longopts, NULL);
       if (c == -1)
         break;
 
@@ -965,6 +1073,13 @@ main (int argc, char **argv)
             else
               in_blk_size = tmp_blk_size;
           }
+
+        case 'k':
+          outhook = optarg;
+          break;
+
+        case 's':
+          outhooksuf = optarg;
           break;
 
         case VERBOSE_OPTION:
@@ -980,6 +1095,12 @@ main (int argc, char **argv)
         }
     }
 
+  if (outhook && !outhooksuf)
+  {
+      error (0, 0, _("need a suffix for the split output files when used with on-output-hook"));
+      usage (EXIT_FAILURE);
+  }
+
   /* Handle default case.  */
   if (split_type == type_undef)
     {
@@ -1088,6 +1209,9 @@ main (int argc, char **argv)
     error (EXIT_FAILURE, errno, "%s", infile);
   if (output_desc >= 0 && close (output_desc) < 0)
     error (EXIT_FAILURE, errno, "%s", outfile);
+  if (outhook)
+    waitpid (-1, NULL, 0); /* FIXME: check status */
+
 
   exit (EXIT_SUCCESS);
 }
diff --git a/tests/Makefile.am b/tests/Makefile.am
index 2f4a561..6135dd2 100644
--- a/tests/Makefile.am
+++ b/tests/Makefile.am
@@ -250,6 +250,7 @@ TESTS =						\
   misc/split-a					\
   misc/split-bchunk				\
   misc/split-fail				\
+  misc/split-k					\
   misc/split-l					\
   misc/split-lchunk				\
   misc/split-rchunk				\
diff --git a/tests/misc/split-k b/tests/misc/split-k
new file mode 100755
index 0000000..0c517ad
--- /dev/null
+++ b/tests/misc/split-k
@@ -0,0 +1,43 @@
+#!/bin/sh
+# Show that split -k works.
+
+# Copyright (C) 2002-2010 Free Software Foundation, Inc.
+
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+
+. "${srcdir=.}/init.sh"; path_prepend_ ../src
+print_ver_ split
+
+printf '1\n2\n3\n4\n5\n' > in || framework_failure
+
+split --lines=2 --on-output-hook gzip --on-output-hook-suffix .gz in > out || fail=1
+gzip <<\EOF > exp-1.gz
+1
+2
+EOF
+gzip <<\EOF > exp-2.gz
+3
+4
+EOF
+gzip <<\EOF > exp-3.gz
+5
+EOF
+
+compare xaa.gz exp-1.gz || fail=1
+compare xab.gz exp-2.gz || fail=1
+compare xac.gz exp-3.gz || fail=1
+test -f xaa && fail=1
+test -f xad.gz && fail=1
+
+Exit $fail

Reply via email to