Richard Henderson <r...@redhat.com> writes: > +/* Recognize broadcast patterns for the Loongson. */ > + > +static bool > +mips_expand_vpc_loongson_bcast (struct expand_vec_perm_d *d) > +{ > + unsigned i, elt; > + rtx t0, t1; > + > + if (!(TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS)) > + return false; > + /* Note that we've already matched V2SI via punpck and V4HI via pshufh. */ > + if (d->vmode != V8QImode) > + return false; > + if (!d->one_vector_p) > + return false; > + > + elt = d->perm[0]; > + for (i = 1; i < 8; ++i) > + if (d->perm[i] != elt) > + return false; > + > + if (d->testing_p) > + return true; > + > + /* With one interleave we put two of the desired element adjacent. */ > + t0 = gen_reg_rtx (V8QImode); > + if (elt < 4) > + emit_insn (gen_loongson_punpcklbh (t0, d->op0, d->op0)); > + else > + emit_insn (gen_loongson_punpckhbh (t0, d->op0, d->op0)); > + > + /* Shuffle that one HImode element into all locations. */ > + elt &= 3; > + elt *= 0x55; > + t1 = gen_reg_rtx (V4HImode); > + emit_insn (gen_loongson_pshufh (t1, gen_lowpart (V4HImode, t0), > + force_reg (SImode, GEN_INT (elt)))); > + > + emit_move_insn (d->target, gen_lowpart (V8QImode, t1)); > + return true;
Probably one of those where you had to stop following the rathole, but could this be generalised to handle pairs in which perm[1] == perm[0] + 8? Something like: > + if (!(TARGET_HARD_FLOAT && TARGET_LOONGSON_VECTORS)) > + return false; > + /* Note that we've already matched V2SI via punpck and V4HI via pshufh. */ > + if (d->vmode != V8QImode) > + return false; > + if (d->perm[1] != (d->one_vector_p ? : d->perm[0] : d->perm[0] + 8)) > + return false; > + > + for (i = 2; i < 8; ++i) > + if (d->perm[i] != d->perm[i & 1]) > + return false; then use both d->op0 and d->op1 in the pack? > + memset (&d, 0, sizeof(d)); missing space Richard