One of the worries in the back of my mind with findutils has always been
about whether all the fields in `struct predicate` actually get initialised
correctly by the parser functions.

I should explain that recently I've been using other languages which make
it possible to ensure at compile time that things are correctly initialised
and consistently used, and to be direct about it, I miss these things in C.

In object-oriented languages a "natural" way to address this problem is by
using a builder object which keeps track of which fields we already set
values for (and can signal an error if a mandatory field isn't set).  You
can achieve a similar effect in C (see attached path which changes "-a" and
"-atime" in this way).  But the method is quite cumbersome (at  least as
I've implemented it).

Another possible approach is to reduce the complexity of struct predicate
itself in order for there to be fewer opportunities to get things wrong.
For example, to remove the p_prec member (and compute it instead when we
need it).   Combine the need_* members into a bitmask (which perhaps could
then help us eliminate p_cost too).  The bug remaining area of consistency
management would then be between the pred_func and args members.  IOW the
part of struct predicate where we're faking dynamic dispatch.

My instinct at this point would be to begin by adopting the second approach
(eliminating some of the fields of struct predicate) because even if we do
end up using a builder, it would make the builder simpler, too.

However, perhaps there's another way to help us be confident that the
implementation initialises everything in all code paths and is consistent.
 Any ideas, folks?
From d1c51fc73f6a4ac3b1b7c5ca01348f4a16ebfbbd Mon Sep 17 00:00:00 2001
From: James Youngman <ja...@youngman.org>
Date: Tue, 14 May 2024 12:22:17 +0100
Subject: [PATCH] Initial implementation of a builder for struct predicate.
To: findutils-patc...@gnu.org

The idea behind this is that it should catch cases where the parser
fails to initialise all the fields of struct predicate.
---
 find/defs.h   |  46 ++++----
 find/parser.c | 290 +++++++++++++++++++++++++++++++++++++++++++++++---
 find/tree.c   | 151 ++++++++++++++++++++++----
 3 files changed, 434 insertions(+), 53 deletions(-)

diff --git a/find/defs.h b/find/defs.h
index 453dc272..7b162267 100644
--- a/find/defs.h
+++ b/find/defs.h
@@ -256,6 +256,25 @@ enum EvaluationCost
   NumEvaluationCosts
 };
 
+/* Information needed by the predicate processor.
+   Next to each member are listed the predicates that use it. */
+union pred_args
+{
+  const char *str;		/* fstype [i]lname [i]name [i]path */
+  struct re_pattern_buffer *regex; /* regex */
+  struct exec_val exec_vec;	/* exec ok */
+  struct long_val numinfo;	/* gid inum links  uid */
+  struct size_val size;	/* size */
+  uid_t uid;			/* user */
+  gid_t gid;			/* group */
+  struct time_val reftime;	/* newer newerXY anewer cnewer mtime atime ctime mmin amin cmin */
+  struct perm_val perm;	/* perm */
+  struct samefile_file_id samefileid; /* samefile */
+  bool types[FTYPE_COUNT];	/* file type(s) */
+  struct format_val printf_vec; /* printf fprintf fprint ls fls print0 fprint0 print */
+  char *scontext; /* security context */
+};
+
 struct predicate
 {
   /* Pointer to the function that implements this predicate.  */
@@ -294,30 +313,17 @@ struct predicate
   /* est_success_rate is a number between 0.0 and 1.0 */
   float est_success_rate;
 
-  /* True if this predicate didn't originate from the user. */
+  /* True if this predicate didn't originate from the user. Some
+   * "artificial" predicates are added to convert "-type f -name foo"
+   * to "\( -type f -name foo \) -print".
+   */
   bool artificial;
 
   /* The raw text of the argument of this predicate. */
   const char *arg_text;
 
-  /* Information needed by the predicate processor.
-     Next to each member are listed the predicates that use it. */
-  union
-  {
-    const char *str;		/* fstype [i]lname [i]name [i]path */
-    struct re_pattern_buffer *regex; /* regex */
-    struct exec_val exec_vec;	/* exec ok */
-    struct long_val numinfo;	/* gid inum links  uid */
-    struct size_val size;	/* size */
-    uid_t uid;			/* user */
-    gid_t gid;			/* group */
-    struct time_val reftime;	/* newer newerXY anewer cnewer mtime atime ctime mmin amin cmin */
-    struct perm_val perm;	/* perm */
-    struct samefile_file_id samefileid; /* samefile */
-    bool types[FTYPE_COUNT];	/* file type(s) */
-    struct format_val printf_vec; /* printf fprintf fprint ls fls print0 fprint0 print */
-    char *scontext; /* security context */
-  } args;
+  /* Information needed by the predicate processor. */
+  union pred_args args;
 
   /* The next predicate in the user input sequence,
      which represents the order in which the user supplied the
@@ -484,6 +490,8 @@ struct predicate *get_new_pred (const struct parser_table *entry);
 struct predicate *get_new_pred_chk_op (const struct parser_table *entry,
 					      const char *arg);
 float  calculate_derived_rates (struct predicate *p);
+void init_pred_perf(struct predicate *p);
+void insert_new_pred(struct predicate *p);
 
 /* util.c */
 struct predicate *insert_primary (const struct parser_table *entry, const char *arg);
diff --git a/find/parser.c b/find/parser.c
index f95adfc7..54e3a6d9 100644
--- a/find/parser.c
+++ b/find/parser.c
@@ -337,6 +337,259 @@ static struct parser_table const parse_table[] =
 static const char *first_nonoption_arg = NULL;
 static const struct parser_table *noop = NULL;
 
+
+/* Signals which fields of struct predicate are initialised in an
+   instance of predicate_builder. */
+struct predicate_builder_presence
+{
+  /* pred_func is always set. */
+  /* p_name is always set. */
+  /* p_type is always set. */
+  unsigned p_prec_set : 1;
+  /* side_effects is always set. */
+  unsigned no_default_print_set: 1;
+  unsigned need_stat_set : 1;
+  unsigned need_type_set : 1;
+  unsigned need_inum_set: 1;
+  unsigned p_cost_set: 1;
+  unsigned est_success_rate_set: 1;
+  unsigned arg_text_set: 1;
+  unsigned args_set: 1;
+};
+
+static void predicate_builder_presence_unset_all(struct predicate_builder_presence *p)
+{
+  memset(p, 0xFF, sizeof *p);
+  p->p_prec_set = 0;
+  p->no_default_print_set = 0;
+  p->need_stat_set = 0;
+  p->need_type_set = 0;
+  p->need_inum_set = 0;
+  p->p_cost_set = 0;
+  p->est_success_rate_set = 0;
+  p->arg_text_set = 0;
+  p->args_set = 0;
+}
+
+
+/*
+ * predicate_builder supports a builder pattern for construction of
+ * new instances of struct predicate.  It ensures that all mandatory
+ * fields have been initialised.
+ */
+struct predicate_builder
+{
+  /* bitfields signalling that members of `partial` are set. */
+  struct predicate_builder_presence present;
+
+  /* a partially-constructed struct predicate (this may not yet be in
+   * a valid state).  We deliberately don't put this at the front of
+   * the struct predicate_builder, so that if struct predicate_builder
+   * is accidentally cast to struct predicate, we will probably see
+   * symptoms.
+   */
+  struct predicate partial;
+};
+
+static void predicate_builder_assert_ready(const struct predicate_builder* builder)
+{
+  const struct predicate_builder_presence *p = &builder->present;
+  assert(p->p_prec_set);
+  assert(p->no_default_print_set);
+  assert(p->need_stat_set);
+  assert(p->need_type_set);
+  assert(p->need_inum_set);
+  assert(p->p_cost_set);
+  assert(p->est_success_rate_set);
+  assert(p->arg_text_set);
+  assert(p->args_set);
+}
+
+static void
+predicate_build_and_insert(struct predicate_builder* p)
+{
+  struct predicate *newpred = xmalloc(sizeof *newpred);
+  predicate_builder_assert_ready(p);
+  *newpred = p->partial;
+  insert_new_pred(newpred);
+}
+
+static struct predicate_builder*
+predicate_set_precedence(struct predicate_builder* p, enum predicate_precedence prec)
+{
+  assert(!p->present.p_prec_set);
+  p->present.p_prec_set = 1;
+  p->partial.p_prec = prec;
+  return p;
+}
+
+static struct predicate_builder*
+predicate_set_no_default_print(struct predicate_builder* p, bool no_default_print)
+{
+  assert(!p->present.no_default_print_set);
+  p->present.no_default_print_set = 1;
+  p->partial.no_default_print = no_default_print;
+  return p;
+}
+
+static struct predicate_builder*
+predicate_set_need_stat(struct predicate_builder* p, bool need_stat)
+{
+  assert(!p->present.need_stat_set);
+  p->present.need_stat_set = 1;
+  p->partial.need_stat = need_stat;
+
+  if (need_stat)
+    {
+      if (!p->present.p_cost_set || p->partial.p_cost < NeedsStatInfo)
+	{
+	  p->partial.p_cost = NeedsStatInfo;
+	  p->present.p_cost_set = 1;
+	}
+    }
+  return p;
+}
+
+static struct predicate_builder*
+predicate_set_need_filetype(struct predicate_builder* p, bool need_filetype)
+{
+  assert(!p->present.need_type_set);
+  p->present.need_type_set = 1;
+  p->partial.need_type = need_filetype;
+  return p;
+}
+
+static struct predicate_builder*
+predicate_set_needs_inode_number(struct predicate_builder* p, bool need_inode_num)
+{
+  assert(!p->present.need_inum_set);
+  p->present.need_inum_set = 1;
+  p->partial.need_inum = need_inode_num;
+  return p;
+}
+
+static struct predicate_builder*
+predicate_set_est_success_rate(struct predicate_builder* p, float rate)
+{
+  assert(!p->present.est_success_rate_set);
+  p->present.est_success_rate_set = 1;
+  p->partial.est_success_rate = rate;
+  return p;
+}
+
+static struct predicate_builder*
+predicate_set_arg_text(struct predicate_builder* p, const char *arg_text)
+{
+  assert(!p->present.arg_text_set);
+  p->present.arg_text_set = 1;
+  p->partial.arg_text = arg_text;
+  return p;
+}
+
+static struct predicate_builder*
+predicate_set_args_none(struct predicate_builder* p)
+{
+  memset(&p->partial.args, 0xFE, sizeof p->partial.args);
+  p->present.args_set = 1;
+  return p;
+}
+
+static struct predicate_builder*
+predicate_set_args_reftime(struct predicate_builder* p, struct time_val tv)
+{
+  p->partial.args.reftime = tv;
+  p->present.args_set = 1;
+  return p;
+}
+
+static struct predicate_builder*
+new_predicate_builder(PRED_FUNC func,
+		      enum predicate_type predtype,
+		      const struct parser_table *parse_entry,
+		      bool side_effects)
+{
+  struct predicate_builder *builder = xmalloc(sizeof *builder);
+  predicate_builder_presence_unset_all(&builder->present);
+
+  builder->partial.pred_func = func;
+  builder->partial.p_name = parse_entry->parser_name;
+  builder->partial.p_type = predtype;
+  builder->partial.parser_entry = parse_entry;
+  builder->partial.side_effects = side_effects;
+  /* artificial=true is so rare that it seems overly burdensome to
+   * require every caller to specify it.
+   */
+  builder->partial.artificial = false;
+  builder->partial.p_cost = NeedsUnknown;
+
+  switch (predtype)
+    {
+    case OPEN_PAREN:
+    case CLOSE_PAREN:
+      /* parens will eventually disappear from the predicate tree,
+       * so their est_success_rate value should not be interesting.
+       */
+      builder->partial.est_success_rate = NAN;
+      builder->partial.p_cost = NeedsNothing;
+      builder->present.est_success_rate_set = 1;
+      builder->present.p_cost_set = 1;
+      break;
+
+    case NO_TYPE:
+      /* Use a distinctive value so it's obvious when one of these
+       * turns up where it should not.
+       */
+      builder->partial.est_success_rate = NAN;
+      builder->partial.p_cost = NeedsUnknown;
+      builder->present.est_success_rate_set = 0;
+      builder->present.p_cost_set = 0;
+      break;
+
+    case PRIMARY_TYPE:
+      /* The caller should specify the expected success rate and cost,
+       * so require them to do that before building the predicate.
+       */
+      builder->partial.est_success_rate = NAN;
+      builder->partial.p_cost = NeedsUnknown;
+      builder->present.est_success_rate_set = 0;
+      builder->present.p_cost_set = 0;
+      break;
+
+    case UNI_OP:
+    case BI_OP:
+      /* Estimated success rates for these are generated from the
+       * success rates of the expressions on either side, so the
+       * parser doesn't need to pre-set them; they are set in
+       * calculate_derived_rates(). */
+      builder->partial.est_success_rate = NAN;
+      builder->partial.p_cost = NeedsUnknown;
+      builder->present.est_success_rate_set = 1;
+      builder->present.p_cost_set = 1;
+      break;
+    }
+
+  if (predtype == UNI_OP || predtype == BI_OP)
+    {
+      /* The caller should set the precedence of this predicate */
+      builder->present.p_prec_set = 0;
+    }
+  else
+    {
+      /* This is not an operator so the precedence does not matter. */
+      builder->partial.p_prec = NO_PREC;
+      builder->present.p_prec_set = 1;
+    }
+
+  init_pred_perf(&builder->partial);
+  builder->partial.pred_next = NULL;
+  builder->partial.pred_left = NULL;
+  builder->partial.pred_right = NULL;
+  return builder;
+}
+
+
+
+
 static int
 fallback_getfilecon (int fd, const char *name, char **p, int prev_rv)
 {
@@ -767,18 +1020,20 @@ collect_arg_stat_info (char **argv, int *arg_ptr, struct stat *p,
 
 
 static bool
-parse_and (const struct parser_table* entry, char **argv, int *arg_ptr)
+parse_and(const struct parser_table* entry, char **argv, int *arg_ptr)
 {
-  struct predicate *our_pred;
-
   (void) argv;
   (void) arg_ptr;
 
-  our_pred = get_new_pred_noarg (entry);
-  our_pred->pred_func = pred_and;
-  our_pred->p_type = BI_OP;
-  our_pred->p_prec = AND_PREC;
-  our_pred->need_stat = our_pred->need_type = false;
+  struct predicate_builder* builder = new_predicate_builder(pred_and, BI_OP, entry, false);
+  builder = predicate_set_precedence(builder, AND_PREC);
+  builder = predicate_set_need_stat(builder, false);
+  builder = predicate_set_needs_inode_number(builder, false);
+  builder = predicate_set_need_filetype(builder, false);
+  builder = predicate_set_arg_text(builder, NULL);
+  builder = predicate_set_no_default_print(builder, false);
+  builder = predicate_set_args_none(builder);
+  predicate_build_and_insert(builder);
   return true;
 }
 
@@ -791,11 +1046,20 @@ parse_anewer (const struct parser_table* entry, char **argv, int *arg_ptr)
   set_stat_placeholders (&stat_newer);
   if (collect_arg_stat_info (argv, arg_ptr, &stat_newer, &arg))
     {
-      struct predicate *our_pred = insert_primary (entry, arg);
-      our_pred->args.reftime.xval = XVAL_ATIME;
-      our_pred->args.reftime.ts = get_stat_mtime (&stat_newer);
-      our_pred->args.reftime.kind = COMP_GT;
-      our_pred->est_success_rate = estimate_timestamp_success_rate (stat_newer.st_mtime);
+      struct time_val reftime;
+      reftime.xval = XVAL_ATIME;
+      reftime.ts = get_stat_mtime (&stat_newer);
+      reftime.kind = COMP_GT;
+
+      struct predicate_builder* builder = new_predicate_builder(pred_anewer, PRIMARY_TYPE, entry, false);
+      builder = predicate_set_est_success_rate(builder, estimate_timestamp_success_rate (stat_newer.st_mtime));
+      builder = predicate_set_args_reftime(builder, reftime);
+      builder = predicate_set_need_stat(builder, true);
+      builder = predicate_set_needs_inode_number(builder, false);
+      builder = predicate_set_no_default_print(builder, false);
+      builder = predicate_set_need_filetype(builder, false);
+      builder = predicate_set_arg_text(builder, arg);
+      predicate_build_and_insert(builder);
       return true;
     }
   return false;
diff --git a/find/tree.c b/find/tree.c
index 1c6b4588..a9ef557d 100644
--- a/find/tree.c
+++ b/find/tree.c
@@ -20,6 +20,7 @@
 
 /* system headers. */
 #include <assert.h>
+#include <math.h>		/* isnan */
 #include <stdlib.h>
 
 /* gnulib headers. */
@@ -1239,6 +1240,102 @@ check_normalization (struct predicate *p, bool at_root)
     }
 }
 
+static void
+check_predicate_correctly_initialised(struct predicate* p)
+{
+  const char* problem = NULL;
+  if (!p->p_name)
+    problem = "p_name unset";
+
+  switch (p->p_type)
+    {
+    case NO_TYPE:
+      problem = "p_type is NO_TYPE";
+      break;
+
+    case PRIMARY_TYPE:
+      break;
+
+    case BI_OP:
+      switch (p->p_prec)
+	{
+	case NO_PREC:
+	case MAX_PREC:
+	  problem = "binary operators must set p_prec";
+	  break;
+	case COMMA_PREC:
+	case OR_PREC:
+	case AND_PREC:
+	  break;
+	case NEGATE_PREC:
+	  problem = "negation is not a binary operator";
+	  break;
+	}
+      break;
+
+    case UNI_OP:
+      switch (p->p_prec)
+	{
+	case NO_PREC:
+	case MAX_PREC:
+	  problem = "unary operators must set p_prec";
+	  break;
+	case COMMA_PREC:
+	  problem = "unary operators may not have comma precedence";
+	  break;
+	case OR_PREC:
+	  problem = "unary operators may not have OR precedence";
+	  break;
+	case AND_PREC:
+	  problem = "unary operators may not have AND precedence";
+	  break;
+	case NEGATE_PREC:
+	  break;
+	}
+      break;
+
+    case OPEN_PAREN:
+    case CLOSE_PAREN:
+      if (p->p_prec != NO_PREC)
+	{
+	  problem = "open and close paren must have precendence NO_PREC";
+	}
+      break;
+    }
+
+  if (NeedsUnknown == p->p_cost)
+    {
+      problem = "p_cost is uninitialized (its value is NeedsUnknown)";
+    }
+
+  if (isnan(p->est_success_rate))
+    {
+      problem = "est_success_rate is Not-a-Number";
+    }
+
+  if (problem)
+    {
+      fprintf(stderr, "error: predicate ");
+      print_predicate(stderr, p);
+      fprintf(stderr, " is not correctly initialized: %s", problem);
+      abort();
+    }
+}
+
+
+static void
+check_predicates_correctly_initialised(struct predicate* root)
+{
+  if (root)
+    {
+      check_predicate_correctly_initialised(root);
+      check_predicates_correctly_initialised(root->pred_left);
+      check_predicates_correctly_initialised(root->pred_right);
+    }
+}
+
+
+
 struct predicate*
 build_expression_tree (int argc, char *argv[], int end_of_leading_options)
 {
@@ -1390,6 +1487,7 @@ build_expression_tree (int argc, char *argv[], int end_of_leading_options)
   cur_pred = predicates;
   eval_tree = get_expr (&cur_pred, NO_PREC, NULL);
   calculate_derived_rates (eval_tree);
+  /* XXX: should we propagate predicate p_cost information up the tree? */
 
   /* Check if we have any left-over predicates (this fixes
    * Debian bug #185202).
@@ -1420,6 +1518,11 @@ build_expression_tree (int argc, char *argv[], int end_of_leading_options)
 
   estimate_costs (eval_tree);
 
+  /* Check that fields which should be initialised in the predicates in fact were
+   * (where it's possible to tell).
+   */
+  check_predicates_correctly_initialised(eval_tree);
+
   /* Rearrange the eval tree in optimal-predicate order. */
   opt_expr (&eval_tree);
 
@@ -1445,7 +1548,7 @@ build_expression_tree (int argc, char *argv[], int end_of_leading_options)
 
 /* Initialize the performance data for a predicate.
  */
-static void
+void
 init_pred_perf (struct predicate *pred)
 {
   struct predicate_performance_info *p = &pred->perf;
@@ -1464,6 +1567,20 @@ get_new_pred_noarg (const struct parser_table *entry)
   return p;
 }
 
+void
+insert_new_pred(struct predicate* p)
+{
+  if (predicates == NULL)
+    {
+      last_pred = predicates = p;
+    }
+  else
+    {
+      last_pred->pred_next = p;
+      last_pred = p;
+    }
+}
+
 
 /* Return a pointer to a new predicate structure, which has been
    linked in as the last one in the predicates list.
@@ -1472,7 +1589,6 @@ get_new_pred_noarg (const struct parser_table *entry)
    Set `last_pred' to point to the new last predicate in the list.
 
    Set all cells in the new structure to the default values. */
-
 struct predicate *
 get_new_pred (const struct parser_table *entry)
 {
@@ -1485,25 +1601,18 @@ get_new_pred (const struct parser_table *entry)
 
   /* Allocate + initialize a new predicate.  */
   new_pred = xzalloc (sizeof (struct predicate));
-  if (predicates == NULL)
-    {
-      last_pred = predicates = new_pred;
-    }
-  else
-    {
-      last_pred->pred_next = new_pred;
-      last_pred = new_pred;
-    }
-  last_pred->parser_entry = entry;
-  last_pred->p_type = NO_TYPE;
-  last_pred->p_prec = NO_PREC;
-  last_pred->need_stat = true;
-  last_pred->need_type = true;
-  last_pred->p_cost = NeedsUnknown;
-  last_pred->arg_text = "ThisShouldBeSetToSomethingElse";
-  last_pred->est_success_rate = 1.0;
-  init_pred_perf (last_pred);
-  return last_pred;
+  new_pred->parser_entry = entry;
+  new_pred->p_type = NO_TYPE;
+  new_pred->p_prec = NO_PREC;
+  new_pred->need_stat = true;
+  new_pred->need_type = true;
+  new_pred->p_cost = NeedsUnknown;
+  new_pred->arg_text = "ThisShouldBeSetToSomethingElse";
+  new_pred->est_success_rate = 1.0;
+  init_pred_perf (new_pred);
+  insert_new_pred(new_pred);
+  assert(last_pred == new_pred);
+  return new_pred;
 }
 
 /* Return a pointer to a new predicate, with operator check.
-- 
2.39.2

Reply via email to