BTW, you've a bug in your movqi pattern:
> diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
> index b4c9ac5..d8401b5 100644
> --- a/gcc/config/i386/i386.md
> +++ b/gcc/config/i386/i386.md
> @@ -2298,7 +2298,7 @@
> ;; partial register stall can be caused there. Then we use movzx.
> (define_insn "*movqi_internal"
> [(set (match_operand:QI 0 "nonimmediate_operand" "=q,q ,q ,r,r ,?r,m
> ,Yk,Yk,r
> - (match_operand:QI 1 "general_operand" "q
> ,qn,qm,q,rn,qm,qn,rm,Yk,Yk
> + (match_operand:QI 1 "general_operand" "q
> ,qn,qm,q,rn,qm,qn,r,Yk,Yk"
> "!(MEM_P (operands[0]) && MEM_P (operands[1]))"
> {
> switch (get_attr_type (insn))
You can't read from memory into a mask register in QImode.
This will fail if the memory was the last byte of a page,
and the following page is not mapped.
I expected you to need the following patch, to help spill
and fill QImode values, but I havn't found a test case that
actually needs it. Perhaps LRA is better than old reload
about guessing things like this?
> diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
> index fa79441..0e74ec6 100644
> --- a/gcc/config/i386/i386.c
> +++ b/gcc/config/i386/i386.c
> @@ -33926,10 +33926,11 @@ ix86_secondary_reload (bool in_p, rtx x,
> reg_class_t r
>
> /* QImode spills from non-QI registers require
> intermediate register on 32bit targets. */
> - if (!TARGET_64BIT
> - && !in_p && mode == QImode
> - && INTEGER_CLASS_P (rclass)
> - && MAYBE_NON_Q_CLASS_P (rclass))
> + if (mode == QImode
> + && (MAYBE_MASK_CLASS_P (rclass)
> + || (!TARGET_64BIT && !in_p
> + && INTEGER_CLASS_P (rclass)
> + && MAYBE_NON_Q_CLASS_P (rclass))))
> {
> int regno;
>
r~