As found by jsg the hard way, the first version of the EC constant time
crypto overhaul broke ssh on sparc64 and made several regress tests
fail.

The issue (as tracked down by Nicola Tuveri) is that on sparc64 some asm
bits aren't enabled in libcrypto, so EC_GFp_nist_method() are used as
default method. Unfortunately, the pull request didn't include the part
of the patch that explicitly sets .mul_generator_ct, etc. in this method
(ecp_nist.c below), which led to EC_POINT_mul() erroring out.

Only sparc64 should be affected by this mistake. I don't have access to
this architecture, so I would appreciate if someone could try it.
Any other tests are of course also welcome.

To test this, build and install libcrypto and run the libcrypto
regression tests in /usr/src/regress/lib/libcrypto (make obj && make).
If these pass, this should be good enough to know that this particular
problem is fixed.

Thanks.

Index: lib/libcrypto/ec/ec2_smpl.c
===================================================================
RCS file: /cvs/src/lib/libcrypto/ec/ec2_smpl.c,v
retrieving revision 1.19
diff -u -p -r1.19 ec2_smpl.c
--- lib/libcrypto/ec/ec2_smpl.c 15 Jul 2018 16:27:39 -0000      1.19
+++ lib/libcrypto/ec/ec2_smpl.c 15 Jul 2018 17:57:15 -0000
@@ -107,15 +107,11 @@ EC_GF2m_simple_method(void)
                .point_cmp = ec_GF2m_simple_cmp,
                .make_affine = ec_GF2m_simple_make_affine,
                .points_make_affine = ec_GF2m_simple_points_make_affine,
-
-               /*
-                * the following three method functions are defined in
-                * ec2_mult.c
-                */
-               .mul = ec_GF2m_simple_mul,
+               .mul_generator_ct = ec_GFp_simple_mul_generator_ct,
+               .mul_single_ct = ec_GFp_simple_mul_single_ct,
+               .mul_double_nonct = ec_GFp_simple_mul_double_nonct,
                .precompute_mult = ec_GF2m_precompute_mult,
                .have_precompute_mult = ec_GF2m_have_precompute_mult,
-
                .field_mul = ec_GF2m_simple_field_mul,
                .field_sqr = ec_GF2m_simple_field_sqr,
                .field_div = ec_GF2m_simple_field_div,
Index: lib/libcrypto/ec/ec_lcl.h
===================================================================
RCS file: /cvs/src/lib/libcrypto/ec/ec_lcl.h,v
retrieving revision 1.9
diff -u -p -r1.9 ec_lcl.h
--- lib/libcrypto/ec/ec_lcl.h   15 Jul 2018 05:38:48 -0000      1.9
+++ lib/libcrypto/ec/ec_lcl.h   15 Jul 2018 17:57:16 -0000
@@ -160,10 +160,12 @@ struct ec_method_st {
        int (*make_affine)(const EC_GROUP *, EC_POINT *, BN_CTX *);
        int (*points_make_affine)(const EC_GROUP *, size_t num, EC_POINT *[], 
BN_CTX *);
 
-       /* used by EC_POINTs_mul, EC_POINT_mul, EC_POINT_precompute_mult, 
EC_POINT_have_precompute_mult
-        * (default implementations are used if the 'mul' pointer is 0): */
-       int (*mul)(const EC_GROUP *group, EC_POINT *r, const BIGNUM *scalar,
-               size_t num, const EC_POINT *points[], const BIGNUM *scalars[], 
BN_CTX *);
+       /* used by EC_POINTs_mul, EC_POINT_mul, EC_POINT_precompute_mult, 
EC_POINT_have_precompute_mult */
+       int (*mul_generator_ct)(const EC_GROUP *, EC_POINT *r, const BIGNUM 
*scalar, BN_CTX *);
+       int (*mul_single_ct)(const EC_GROUP *group, EC_POINT *r, const BIGNUM 
*scalar,
+               const EC_POINT *point, BN_CTX *);
+       int (*mul_double_nonct)(const EC_GROUP *group, EC_POINT *r, const 
BIGNUM *g_scalar,
+               const BIGNUM *p_scalar, const EC_POINT *point, BN_CTX *);
        int (*precompute_mult)(EC_GROUP *group, BN_CTX *);
        int (*have_precompute_mult)(const EC_GROUP *group);
 
@@ -337,6 +339,11 @@ int ec_GFp_simple_make_affine(const EC_G
 int ec_GFp_simple_points_make_affine(const EC_GROUP *, size_t num, EC_POINT 
*[], BN_CTX *);
 int ec_GFp_simple_field_mul(const EC_GROUP *, BIGNUM *r, const BIGNUM *a, 
const BIGNUM *b, BN_CTX *);
 int ec_GFp_simple_field_sqr(const EC_GROUP *, BIGNUM *r, const BIGNUM *a, 
BN_CTX *);
+int ec_GFp_simple_mul_generator_ct(const EC_GROUP *, EC_POINT *r, const BIGNUM 
*scalar, BN_CTX *);
+int ec_GFp_simple_mul_single_ct(const EC_GROUP *, EC_POINT *r, const BIGNUM 
*scalar,
+       const EC_POINT *point, BN_CTX *);
+int ec_GFp_simple_mul_double_nonct(const EC_GROUP *, EC_POINT *r, const BIGNUM 
*g_scalar,
+       const BIGNUM *p_scalar, const EC_POINT *point, BN_CTX *);
 
 
 /* method functions in ecp_mont.c */
Index: lib/libcrypto/ec/ec_lib.c
===================================================================
RCS file: /cvs/src/lib/libcrypto/ec/ec_lib.c,v
retrieving revision 1.28
diff -u -p -r1.28 ec_lib.c
--- lib/libcrypto/ec/ec_lib.c   15 Jul 2018 16:27:39 -0000      1.28
+++ lib/libcrypto/ec/ec_lib.c   15 Jul 2018 17:57:16 -0000
@@ -1026,47 +1026,88 @@ EC_POINTs_make_affine(const EC_GROUP *gr
 }
 
 
-/* Functions for point multiplication.
- *
- * If group->meth->mul is 0, we use the wNAF-based implementations in 
ec_mult.c;
- * otherwise we dispatch through methods.
- */
-
+/* Functions for point multiplication */
 int 
 EC_POINTs_mul(const EC_GROUP *group, EC_POINT *r, const BIGNUM *scalar,
     size_t num, const EC_POINT *points[], const BIGNUM *scalars[], BN_CTX *ctx)
 {
-       if (group->meth->mul == 0)
-               /* use default */
-               return ec_wNAF_mul(group, r, scalar, num, points, scalars, ctx);
-
-       return group->meth->mul(group, r, scalar, num, points, scalars, ctx);
+       /*
+        * The function pointers must be set, and only support num == 0 and
+        * num == 1.
+        */
+       if (group->meth->mul_generator_ct == NULL ||
+           group->meth->mul_single_ct == NULL ||
+           group->meth->mul_double_nonct == NULL ||
+           num > 1) {
+               ECerror(ERR_R_SHOULD_NOT_HAVE_BEEN_CALLED);
+               return 0;
+       }
+       
+       /* Either bP or aG + bP, this is sane. */
+       if (num == 1 && points != NULL && scalars != NULL)
+               return EC_POINT_mul(group, r, scalar, points[0], scalars[0],
+                   ctx);
+       
+       /* aG, this is sane */
+       if (scalar != NULL && points == NULL && scalars == NULL)
+               return EC_POINT_mul(group, r, scalar, NULL, NULL, ctx);
+       
+       /* anything else is an error */
+       ECerror(ERR_R_EC_LIB);
+       return 0;
 }
 
 int 
 EC_POINT_mul(const EC_GROUP *group, EC_POINT *r, const BIGNUM *g_scalar,
     const EC_POINT *point, const BIGNUM *p_scalar, BN_CTX *ctx)
 {
-       /* just a convenient interface to EC_POINTs_mul() */
-
-       const EC_POINT *points[1];
-       const BIGNUM *scalars[1];
-
-       points[0] = point;
-       scalars[0] = p_scalar;
-
-       return EC_POINTs_mul(group, r, g_scalar,
-           (point != NULL && p_scalar != NULL),
-           points, scalars, ctx);
+       if (group->meth->mul_generator_ct == NULL ||
+           group->meth->mul_single_ct == NULL ||
+           group->meth->mul_double_nonct == NULL) {
+               ECerror(ERR_R_SHOULD_NOT_HAVE_BEEN_CALLED);
+               return 0;
+       }
+       if (g_scalar != NULL && point == NULL && p_scalar == NULL) {
+               /*
+                * In this case we want to compute g_scalar * GeneratorPoint:
+                * this codepath is reached most prominently by (ephemeral) key
+                * generation of EC cryptosystems (i.e. ECDSA keygen and sign
+                * setup, ECDH keygen/first half), where the scalar is always
+                * secret. This is why we ignore if BN_FLG_CONSTTIME is actually
+                * set and we always call the constant time version.
+                */
+               return group->meth->mul_generator_ct(group, r, g_scalar, ctx);
+       }
+       if (g_scalar == NULL && point != NULL && p_scalar != NULL) {
+               /* In this case we want to compute p_scalar * GenericPoint:
+                * this codepath is reached most prominently by the second half
+                * of ECDH, where the secret scalar is multiplied by the peer's
+                * public point. To protect the secret scalar, we ignore if
+                * BN_FLG_CONSTTIME is actually set and we always call the
+                * constant time version.
+                */
+               return group->meth->mul_single_ct(group, r, p_scalar, point,
+                   ctx);
+       }
+       if (g_scalar != NULL && point != NULL && p_scalar != NULL) {
+               /*
+                * In this case we want to compute
+                *   g_scalar * GeneratorPoint + p_scalar * GenericPoint:
+                * this codepath is reached most prominently by ECDSA signature
+                * verification. So we call the non-ct version.
+                */
+               return group->meth->mul_double_nonct(group, r, g_scalar,
+                   p_scalar, point, ctx);
+       }
+               
+       /* Anything else is an error. */
+       ECerror(ERR_R_EC_LIB);
+       return 0;
 }
 
 int 
 EC_GROUP_precompute_mult(EC_GROUP * group, BN_CTX * ctx)
 {
-       if (group->meth->mul == 0)
-               /* use default */
-               return ec_wNAF_precompute_mult(group, ctx);
-
        if (group->meth->precompute_mult != 0)
                return group->meth->precompute_mult(group, ctx);
        else
@@ -1076,10 +1117,6 @@ EC_GROUP_precompute_mult(EC_GROUP * grou
 int 
 EC_GROUP_have_precompute_mult(const EC_GROUP * group)
 {
-       if (group->meth->mul == 0)
-               /* use default */
-               return ec_wNAF_have_precompute_mult(group);
-
        if (group->meth->have_precompute_mult != 0)
                return group->meth->have_precompute_mult(group);
        else
Index: lib/libcrypto/ec/ecp_mont.c
===================================================================
RCS file: /cvs/src/lib/libcrypto/ec/ecp_mont.c,v
retrieving revision 1.15
diff -u -p -r1.15 ecp_mont.c
--- lib/libcrypto/ec/ecp_mont.c 15 Jul 2018 16:27:39 -0000      1.15
+++ lib/libcrypto/ec/ecp_mont.c 15 Jul 2018 17:57:16 -0000
@@ -102,6 +102,9 @@ EC_GFp_mont_method(void)
                .point_cmp = ec_GFp_simple_cmp,
                .make_affine = ec_GFp_simple_make_affine,
                .points_make_affine = ec_GFp_simple_points_make_affine,
+               .mul_generator_ct = ec_GFp_simple_mul_generator_ct,
+               .mul_single_ct = ec_GFp_simple_mul_single_ct,
+               .mul_double_nonct = ec_GFp_simple_mul_double_nonct,
                .field_mul = ec_GFp_mont_field_mul,
                .field_sqr = ec_GFp_mont_field_sqr,
                .field_encode = ec_GFp_mont_field_encode,
Index: lib/libcrypto/ec/ecp_nist.c
===================================================================
RCS file: /cvs/src/lib/libcrypto/ec/ecp_nist.c,v
retrieving revision 1.13
diff -u -p -r1.13 ecp_nist.c
--- lib/libcrypto/ec/ecp_nist.c 15 Jul 2018 16:27:39 -0000      1.13
+++ lib/libcrypto/ec/ecp_nist.c 15 Jul 2018 17:57:16 -0000
@@ -103,6 +103,9 @@ EC_GFp_nist_method(void)
                .point_cmp = ec_GFp_simple_cmp,
                .make_affine = ec_GFp_simple_make_affine,
                .points_make_affine = ec_GFp_simple_points_make_affine,
+               .mul_generator_ct = ec_GFp_simple_mul_generator_ct,
+               .mul_single_ct = ec_GFp_simple_mul_single_ct,
+               .mul_double_nonct = ec_GFp_simple_mul_double_nonct,
                .field_mul = ec_GFp_nist_field_mul,
                .field_sqr = ec_GFp_nist_field_sqr
        };
Index: lib/libcrypto/ec/ecp_smpl.c
===================================================================
RCS file: /cvs/src/lib/libcrypto/ec/ecp_smpl.c,v
retrieving revision 1.21
diff -u -p -r1.21 ecp_smpl.c
--- lib/libcrypto/ec/ecp_smpl.c 15 Jul 2018 16:27:39 -0000      1.21
+++ lib/libcrypto/ec/ecp_smpl.c 15 Jul 2018 17:57:17 -0000
@@ -103,6 +103,9 @@ EC_GFp_simple_method(void)
                .point_cmp = ec_GFp_simple_cmp,
                .make_affine = ec_GFp_simple_make_affine,
                .points_make_affine = ec_GFp_simple_points_make_affine,
+               .mul_generator_ct = ec_GFp_simple_mul_generator_ct,
+               .mul_single_ct = ec_GFp_simple_mul_single_ct,
+               .mul_double_nonct = ec_GFp_simple_mul_double_nonct,
                .field_mul = ec_GFp_simple_field_mul,
                .field_sqr = ec_GFp_simple_field_sqr
        };
@@ -1408,4 +1411,249 @@ int 
 ec_GFp_simple_field_sqr(const EC_GROUP * group, BIGNUM * r, const BIGNUM * a, 
BN_CTX * ctx)
 {
        return BN_mod_sqr(r, a, &group->field, ctx);
+}
+
+#define EC_POINT_BN_set_flags(P, flags) do {                           \
+       BN_set_flags(&(P)->X, (flags));                                 \
+       BN_set_flags(&(P)->Y, (flags));                                 \
+       BN_set_flags(&(P)->Z, (flags));                                 \
+} while(0)
+
+#define EC_POINT_CSWAP(c, a, b, w, t) do {                             \
+       if (!BN_swap_ct(c, &(a)->X, &(b)->X, w) ||                      \
+           !BN_swap_ct(c, &(a)->Y, &(b)->Y, w) ||                      \
+           !BN_swap_ct(c, &(a)->Z, &(b)->Z, w))                        \
+               goto err;                                               \
+       t = ((a)->Z_is_one ^ (b)->Z_is_one) & (c);                      \
+       (a)->Z_is_one ^= (t);                                           \
+       (b)->Z_is_one ^= (t);                                           \
+} while(0)
+
+/*
+ * This function computes (in constant time) a point multiplication over the
+ * EC group.
+ *
+ * At a high level, it is Montgomery ladder with conditional swaps.
+ *
+ * It performs either a fixed point multiplication
+ *          (scalar * generator)
+ * when point is NULL, or a variable point multiplication
+ *          (scalar * point)
+ * when point is not NULL.
+ *
+ * scalar should be in the range [0,n) otherwise all constant time bets are 
off.
+ *
+ * NB: This says nothing about EC_POINT_add and EC_POINT_dbl,
+ * which of course are not constant time themselves.
+ *
+ * The product is stored in r.
+ *
+ * Returns 1 on success, 0 otherwise.
+ */
+static int
+ec_GFp_simple_mul_ct(const EC_GROUP *group, EC_POINT *r, const BIGNUM *scalar,
+    const EC_POINT *point, BN_CTX *ctx)
+{
+       int i, cardinality_bits, group_top, kbit, pbit, Z_is_one;
+       EC_POINT *s = NULL;
+       BIGNUM *k = NULL;
+       BIGNUM *lambda = NULL;
+       BIGNUM *cardinality = NULL;
+       BN_CTX *new_ctx = NULL;
+       int ret = 0;
+
+       if (ctx == NULL && (ctx = new_ctx = BN_CTX_new()) == NULL)
+               return 0;
+
+       BN_CTX_start(ctx);
+
+       if ((s = EC_POINT_new(group)) == NULL)
+               goto err;
+
+       if (point == NULL) {
+               if (!EC_POINT_copy(s, group->generator))
+                       goto err;
+       } else {
+               if (!EC_POINT_copy(s, point))
+                       goto err;
+       }
+
+       EC_POINT_BN_set_flags(s, BN_FLG_CONSTTIME);
+
+       if ((cardinality = BN_CTX_get(ctx)) == NULL)
+               goto err;
+       if ((lambda = BN_CTX_get(ctx)) == NULL)
+               goto err;
+       if ((k = BN_CTX_get(ctx)) == NULL)
+               goto err;
+       if (!BN_mul(cardinality, &group->order, &group->cofactor, ctx))
+               goto err;
+
+       /*
+        * Group cardinalities are often on a word boundary.
+        * So when we pad the scalar, some timing diff might
+        * pop if it needs to be expanded due to carries.
+        * So expand ahead of time.
+        */
+       cardinality_bits = BN_num_bits(cardinality);
+       group_top = cardinality->top;
+       if ((bn_wexpand(k, group_top + 1) == NULL) ||
+           (bn_wexpand(lambda, group_top + 1) == NULL))
+               goto err;
+
+       if (!BN_copy(k, scalar))
+               goto err;
+
+       BN_set_flags(k, BN_FLG_CONSTTIME);
+
+       if (BN_num_bits(k) > cardinality_bits || BN_is_negative(k)) {
+               /*
+                * This is an unusual input, and we don't guarantee
+                * constant-timeness
+                */
+               if (!BN_nnmod(k, k, cardinality, ctx))
+                       goto err;
+       }
+
+       if (!BN_add(lambda, k, cardinality))
+               goto err;
+       BN_set_flags(lambda, BN_FLG_CONSTTIME);
+       if (!BN_add(k, lambda, cardinality))
+               goto err;
+       /*
+        * lambda := scalar + cardinality
+        * k := scalar + 2*cardinality
+        */
+       kbit = BN_is_bit_set(lambda, cardinality_bits);
+       if (!BN_swap_ct(kbit, k, lambda, group_top + 1))
+               goto err;
+
+       group_top = group->field.top;
+       if ((bn_wexpand(&s->X, group_top) == NULL) ||
+           (bn_wexpand(&s->Y, group_top) == NULL) ||
+           (bn_wexpand(&s->Z, group_top) == NULL) ||
+           (bn_wexpand(&r->X, group_top) == NULL) ||
+           (bn_wexpand(&r->Y, group_top) == NULL) ||
+           (bn_wexpand(&r->Z, group_top) == NULL))
+               goto err;
+
+       /* top bit is a 1, in a fixed pos */
+       if (!EC_POINT_copy(r, s))
+               goto err;
+
+       EC_POINT_BN_set_flags(r, BN_FLG_CONSTTIME);
+
+       if (!EC_POINT_dbl(group, s, s, ctx))
+               goto err;
+
+       pbit = 0;
+
+       /*
+        * The ladder step, with branches, is
+        *
+        * k[i] == 0: S = add(R, S), R = dbl(R)
+        * k[i] == 1: R = add(S, R), S = dbl(S)
+        *
+        * Swapping R, S conditionally on k[i] leaves you with state
+        *
+        * k[i] == 0: T, U = R, S
+        * k[i] == 1: T, U = S, R
+        *
+        * Then perform the ECC ops.
+        *
+        * U = add(T, U)
+        * T = dbl(T)
+        *
+        * Which leaves you with state
+        *
+        * k[i] == 0: U = add(R, S), T = dbl(R)
+        * k[i] == 1: U = add(S, R), T = dbl(S)
+        *
+        * Swapping T, U conditionally on k[i] leaves you with state
+        *
+        * k[i] == 0: R, S = T, U
+        * k[i] == 1: R, S = U, T
+        *
+        * Which leaves you with state
+        *
+        * k[i] == 0: S = add(R, S), R = dbl(R)
+        * k[i] == 1: R = add(S, R), S = dbl(S)
+        *
+        * So we get the same logic, but instead of a branch it's a
+        * conditional swap, followed by ECC ops, then another conditional swap.
+        *
+        * Optimization: The end of iteration i and start of i-1 looks like
+        *
+        * ...
+        * CSWAP(k[i], R, S)
+        * ECC
+        * CSWAP(k[i], R, S)
+        * (next iteration)
+        * CSWAP(k[i-1], R, S)
+        * ECC
+        * CSWAP(k[i-1], R, S)
+        * ...
+        *
+        * So instead of two contiguous swaps, you can merge the condition
+        * bits and do a single swap.
+        *
+        * k[i]   k[i-1]    Outcome
+        * 0      0         No Swap
+        * 0      1         Swap
+        * 1      0         Swap
+        * 1      1         No Swap
+        *
+        * This is XOR. pbit tracks the previous bit of k.
+        */
+
+       for (i = cardinality_bits - 1; i >= 0; i--) {
+               kbit = BN_is_bit_set(k, i) ^ pbit;
+               EC_POINT_CSWAP(kbit, r, s, group_top, Z_is_one);
+               if (!EC_POINT_add(group, s, r, s, ctx))
+                       goto err;
+               if (!EC_POINT_dbl(group, r, r, ctx))
+                       goto err;
+               /*
+                * pbit logic merges this cswap with that of the
+                * next iteration
+                */
+               pbit ^= kbit;
+       }
+       /* one final cswap to move the right value into r */
+       EC_POINT_CSWAP(pbit, r, s, group_top, Z_is_one);
+       
+       ret = 1;
+
+ err:
+       EC_POINT_free(s);
+       if (ctx != NULL)
+               BN_CTX_end(ctx);
+       BN_CTX_free(new_ctx);
+
+       return ret;
+}
+
+#undef EC_POINT_BN_set_flags
+#undef EC_POINT_CSWAP
+
+int
+ec_GFp_simple_mul_generator_ct(const EC_GROUP *group, EC_POINT *r,
+    const BIGNUM *scalar, BN_CTX *ctx)
+{
+       return ec_GFp_simple_mul_ct(group, r, scalar, NULL, ctx);
+}
+
+int
+ec_GFp_simple_mul_single_ct(const EC_GROUP *group, EC_POINT *r,
+    const BIGNUM *scalar, const EC_POINT *point, BN_CTX *ctx)
+{
+       return ec_GFp_simple_mul_ct(group, r, scalar, point, ctx);
+}
+
+int
+ec_GFp_simple_mul_double_nonct(const EC_GROUP *group, EC_POINT *r,
+    const BIGNUM *g_scalar, const BIGNUM *p_scalar, const EC_POINT *point,
+    BN_CTX *ctx)
+{
+       return ec_wNAF_mul(group, r, g_scalar, 1, &point, &p_scalar, ctx);
 }

Reply via email to