[avr,committed] Use monic denominator polynomials to save a multiplication.
This is a small tweak in LibF7 to save one multiplication in computation of denominator polynomials. The polynomials are monic now, and f7_horner needs one multiplication less. Johann -- LibF7: Use monic denominator polynomials to save a multiplication. libgcc/config/avr/libf7/ * libf7.h (F7_FLAGNO_plusx, F7_FLAG_plusx): New macros. * libf7.c (f7_horner): Handle F7_FLAG_plusx in highest coefficient. * libf7-const.def [F7MOD_atan_]: Denominator: Set F7_FLAG_plusx and omit highest term. [F7MOD_asinacos_]: Use rational function with normalized denominator. diff --git a/libgcc/config/avr/libf7/libf7-const.def b/libgcc/config/avr/libf7/libf7-const.def index 8764c81ffa4..0e4c4d8701e 100644 --- a/libgcc/config/avr/libf7/libf7-const.def +++ b/libgcc/config/avr/libf7/libf7-const.def @@ -121,8 +121,7 @@ F7_CONST_DEF (X, 0, 0xd6,0xa5,0x2d,0x73,0x34,0xd8,0x60, 11) F7_CONST_DEF (X, 0, 0xe5,0x08,0xb8,0x24,0x20,0x81,0xe7, 11) F7_CONST_DEF (X, 0, 0xe3,0xb3,0x35,0xfa,0xbf,0x1f,0x81, 10) F7_CONST_DEF (X, 0, 0xd3,0x89,0x2b,0xb6,0x3e,0x2e,0x05, 8) -F7_CONST_DEF (X, 0, 0x9f,0xab,0xe9,0xd9,0x35,0xed,0x27, 5) -F7_CONST_DEF (X, 0, 0x80,0x00,0x00,0x00,0x00,0x00,0x00, 0) +F7_CONST_DEF (X, 8, 0x9f,0xab,0xe9,0xd9,0x35,0xed,0x27, 5) #endif #elif defined (SWIFT_3_4) @@ -147,24 +146,22 @@ F7_CONST_DEF (pi_6, 0, 0x86,0x0a,0x91,0xc1,0x6b,0x9b,0x2c, -1) #endif // which MiniMax #elif defined (F7MOD_asinacos_) -// Relative error < 5.6E-18, quality = 1.0037 (ideal = 1). +// f(x) = asin(w) / w, w = sqrt(x/2), w in [0, 0.5]. +// Relative error < 4.9E-18, Q10 = 21.7 #if defined (FOR_NUMERATOR) -// 0.9442491073135027586203 - 1.035234033892197627842731209x + 0.35290206232981519813422591897720574012x^2 - 0.04333483170641685705612351801x^3 + 0.0012557428614630796315205218507940285622x^4 + 0.084705471128435769021718764878041684288x^5 -// p = Poly ([Decimal('0.9442491073135027586203'), Decimal('-1.0352340338921976278427312087167692142'), Decimal('0.35290206232981519813422591897720574012'), Decimal('-0.043334831706416857056123518013656946650'), Decimal('0.0012557428614630796315205218507940285622'), Decimal('0.084705471128435769021718764878041684288')]) -F7_CONST_DEF (X, 0, 0x80,0x00,0x00,0x00,0x00,0x00,0x00, 0) -F7_CONST_DEF (X, 1, 0x84,0x82,0x8c,0x7f,0xa2,0xf6,0x65, 0) -F7_CONST_DEF (X, 0, 0xb4,0xaf,0x94,0x40,0xcb,0x86,0x69, -2) -F7_CONST_DEF (X, 1, 0xb1,0x7f,0xdd,0x4f,0x4e,0xbe,0x1d, -5) -F7_CONST_DEF (X, 0, 0xa4,0x97,0xbd,0x0b,0x59,0xc9,0x25, -10) -F7_CONST_DEF (X, 0, 0x8e,0x1c,0xb9,0x0b,0x50,0x6c,0xce, -17) +// -41050.4389591195072042579 + 43293.8985171424974364797 x - 15230.0535110759003163511 x^2 + 1996.35047839480810448269 x^3 - 72.2973010025603956782375 x^4 +F7_CONST_DEF (X, 1, 0xa0,0x5a,0x70,0x5f,0x9f,0xf6,0x90, 15) +F7_CONST_DEF (X, 0, 0xa9,0x1d,0xe6,0x05,0x38,0x2d,0xec, 15) +F7_CONST_DEF (X, 1, 0xed,0xf8,0x36,0xcb,0x9b,0x83,0xdd, 13) +F7_CONST_DEF (X, 0, 0xf9,0x8b,0x37,0x1e,0x77,0x74,0xf9, 10) +F7_CONST_DEF (X, 1, 0x90,0x98,0x37,0xd6,0x46,0x21,0x3c, 6) #elif defined (FOR_DENOMINATOR) -// 1 - 1.118567367225532923662371649x + 0.42736600959872448854098334016758333519x^2 - 0.06355588484963171659942148390x^3 + 0.0028820878185134035637440105959294542908x^4 -// q = Poly ([Decimal('1'), Decimal('-1.1185673672255329236623716486696411533'), Decimal('0.42736600959872448854098334016758333519'), Decimal('-0.063555884849631716599421483898013782858'), Decimal('0.0028820878185134035637440105959294542908')]) -F7_CONST_DEF (X, 0, 0x80,0x00,0x00,0x00,0x00,0x00,0x00, 0) -F7_CONST_DEF (X, 1, 0x8f,0x2d,0x37,0x2a,0x4d,0xa1,0x57, 0) -F7_CONST_DEF (X, 0, 0xda,0xcf,0xb7,0xb5,0x4c,0x0d,0xee, -2) -F7_CONST_DEF (X, 1, 0x82,0x29,0x96,0x77,0x2e,0x19,0xc7, -4) -F7_CONST_DEF (X, 0, 0xbc,0xe1,0x68,0xec,0xba,0x20,0x29, -9) +// -41050.4389591195074048679 + 46714.7684304025268691353 x - 18353.2551497967388796235 x^2 + 2878.9626098308300020834 x^3 - 150.822900775648362380508 x^4 + x^5 +F7_CONST_DEF (X, 1, 0xa0,0x5a,0x70,0x5f,0x9f,0xf6,0x91, 15) +F7_CONST_DEF (X, 0, 0xb6,0x7a,0xc4,0xb7,0xda,0xd8,0x1b, 15) +F7_CONST_DEF (X, 1, 0x8f,0x62,0x82,0xa2,0xfe,0x81,0x26, 14) +F7_CONST_DEF (X, 0, 0xb3,0xef,0x66,0xd9,0x90,0xe3,0x91, 11) +F7_CONST_DEF (X, 9, 0x96,0xd2,0xa9,0xa0,0x0f,0x43,0x44, 7) #endif #elif defined (F7MOD_sincos_) diff --git a/libgcc/config/avr/libf7/libf7.c b/libgcc/config/avr/libf7/libf7.c index 8fb57ef90cc..373a8a55d90 100644 --- a/libgcc/config/avr/libf7/libf7.c +++ b/libgcc/config/avr/libf7/libf7.c @@ -1527,6 +1527,9 @@ void f7_horner (f7_t *cc, const f7_t *xx, uint8_t n_coeff, const f7_t *coeff, f7_copy_flash (yy, pcoeff); + if (yy->flags & F7_FLAG_plusx) +f7_Iadd (yy, xx); + while (1) { --pcoeff; diff --git a/libgcc/config/avr/libf7/libf7.h b/libgcc/config/avr/libf7/libf7.h index 03fe6abe839..3f81b5f1f88 100644 --- a/libgcc/config/avr/libf7/libf7.h +++ b/libgcc/config/avr/libf7/libf7.h @@ -47,6 +47,1
[avr,committed] Remove all uses of attribute pure from LibF7.
Applied the following patch. Johann LibF7: Remove uses of attribute pure. libgcc/config/avr/libf7/ * libf7.h (F7_PURE): Remove all occurrences. * libf7.c: Same. diff --git a/libgcc/config/avr/libf7/libf7.c b/libgcc/config/avr/libf7/libf7.c index 373a8a55d90..0d9e4c325b2 100644 --- a/libgcc/config/avr/libf7/libf7.c +++ b/libgcc/config/avr/libf7/libf7.c @@ -352,7 +352,7 @@ float f7_get_float (const f7_t *aa) return make_float (mant); } -F7_PURE ALIAS (f7_get_float, f7_truncdfsf2) +ALIAS (f7_get_float, f7_truncdfsf2) #endif // F7MOD_get_float_ #define DBL_DIG_EXP 11 @@ -572,7 +572,7 @@ int32_t f7_get_s32 (const f7_t *aa) extern int32_t to_s32 (const f7_t*, uint8_t) F7ASM(f7_to_integer_asm); return to_s32 (aa, 0x1f); } -F7_PURE ALIAS (f7_get_s32, f7_fixdfsi) +ALIAS (f7_get_s32, f7_fixdfsi) #endif // F7MOD_get_s32_ @@ -583,7 +583,7 @@ F7_PURE ALIAS (f7_get_s32, f7_fixdfsi) extern int64_t to_s64 (const f7_t*, uint8_t) F7ASM(f7_to_integer_asm); return to_s64 (aa, 0x3f); } -F7_PURE ALIAS (f7_get_s64, f7_fixdfdi) +ALIAS (f7_get_s64, f7_fixdfdi) #endif // F7MOD_get_s64_ #ifdef F7MOD_get_u16_ @@ -603,7 +603,7 @@ uint32_t f7_get_u32 (const f7_t *aa) extern uint32_t to_u32 (const f7_t*, uint8_t) F7ASM(f7_to_unsigned_asm); return to_u32 (aa, 0x1f); } -F7_PURE ALIAS (f7_get_u32, f7_fixunsdfsi) +ALIAS (f7_get_u32, f7_fixunsdfsi) #endif // F7MOD_get_u32_ @@ -614,7 +614,7 @@ uint64_t f7_get_u64 (const f7_t *aa) extern int64_t to_u64 (const f7_t*, uint8_t) F7ASM(f7_to_unsigned_asm); return to_u64 (aa, 0x3f); } -F7_PURE ALIAS (f7_get_u64, f7_fixunsdfdi) +ALIAS (f7_get_u64, f7_fixunsdfdi) #endif // F7MOD_get_u64_ diff --git a/libgcc/config/avr/libf7/libf7.h b/libgcc/config/avr/libf7/libf7.h index 3f81b5f1f88..f692854dced 100644 --- a/libgcc/config/avr/libf7/libf7.h +++ b/libgcc/config/avr/libf7/libf7.h @@ -36,7 +36,7 @@ -- Inline asm -- Setting assembler names by means of __asm (GNU-C). -- Attributes: alias, always_inline, const, noinline, unused, -progmem, pure, weak, warning + progmem, weak, warning -- GCC built-ins: __builtin_abort, __builtin_constant_p -- AVR built-ins: __builtin_avr_bitsr, __builtin_avr_rbits */ @@ -112,7 +112,6 @@ extern "C" { #define F7_INLINE inline __attribute__((__always_inline__)) #define F7_NOINLINE __attribute__((__noinline__)) #define F7_WEAK __attribute__((__weak__)) -#define F7_PURE __attribute__((__pure__)) #define F7_UNUSED __attribute__((__unused__)) #define F7_CONST__attribute__((__const__)) @@ -150,7 +149,7 @@ typedef uint64_t f7_double_t; #define F7_MANT_HI2(X) \ (*(uint16_t*) & (X)->mant[F7_MANT_BYTES - 2]) -static F7_INLINE F7_PURE +static F7_INLINE uint8_t f7_classify (const f7_t *aa) { extern void f7_classify_asm (void); @@ -361,14 +360,14 @@ f7_t* f7_abs (f7_t *cc, const f7_t *aa) } -F7_PURE extern int8_t f7_cmp (const f7_t*, const f7_t*); -F7_PURE extern bool f7_lt_impl (const f7_t*, const f7_t*); -F7_PURE extern bool f7_le_impl (const f7_t*, const f7_t*); -F7_PURE extern bool f7_gt_impl (const f7_t*, const f7_t*); -F7_PURE extern bool f7_ge_impl (const f7_t*, const f7_t*); -F7_PURE extern bool f7_ne_impl (const f7_t*, const f7_t*); -F7_PURE extern bool f7_eq_impl (const f7_t*, const f7_t*); -F7_PURE extern bool f7_unord_impl (const f7_t*, const f7_t*); +extern int8_t f7_cmp (const f7_t*, const f7_t*); +extern bool f7_lt_impl (const f7_t*, const f7_t*); +extern bool f7_le_impl (const f7_t*, const f7_t*); +extern bool f7_gt_impl (const f7_t*, const f7_t*); +extern bool f7_ge_impl (const f7_t*, const f7_t*); +extern bool f7_ne_impl (const f7_t*, const f7_t*); +extern bool f7_eq_impl (const f7_t*, const f7_t*); +extern bool f7_unord_impl (const f7_t*, const f7_t*); static F7_INLINE bool f7_lt (const f7_t *aa, const f7_t *bb) @@ -541,14 +540,14 @@ extern f7_t* f7_set_u32 (f7_t*, uint32_t); extern void f7_set_float (f7_t*, float); extern void f7_set_pdouble (f7_t*, const f7_double_t*); -F7_PURE extern int16_t f7_get_s16 (const f7_t*); -F7_PURE extern int32_t f7_get_s32 (const f7_t*); -F7_PURE extern int64_t f7_get_s64 (const f7_t*); -F7_PURE extern uint16_t f7_get_u16 (const f7_t*); -F7_PURE extern uint32_t f7_get_u32 (const f7_t*); -F7_PURE extern uint64_t f7_get_u64 (const f7_t*); -F7_PURE extern float f7_get_float (const f7_t*); -F7_PURE extern f7_double_t f7_get_double (const f7_t*); +extern int16_t f7_get_s16 (const f7_t*); +extern int32_t f7_get_s32 (const f7_t*); +extern int64_t f7_get_s64 (const f7_t*); +extern uint16_t f7_get_u16 (const f7_t*); +extern uint32_t f7_get_u32 (const f7_t*); +extern uint64_t f7_get_u64 (const f7_t*); +extern float f7_get_float (const f7_t*); +extern f7_double_t f7_get_double (const f7_t*); #if USE_LPM == 1 #define F7_PGMSPACE __attribute__((__progmem__)) @@ -639,10 +638,10 @@ extern void f7_horner (f7_t*, const f7_t*, uint8_t, const f7_t *coeff, f7_t*); ex
[avr,committed] Implement atan2
This implements atan2 which was missing from LibF7. Johann -- LibF7: Implement atan2. libgcc/config/avr/libf7/ * libf7.c (F7MOD_atan2_, f7_atan2): New module and function. * libf7.h: Adjust comments. * libf7-common.mk (CALL_PROLOGUES): Add atan2. diff --git a/libgcc/config/avr/libf7/libf7-common.mk b/libgcc/config/avr/libf7/libf7-common.mk index 28663b52e6c..e417715a7e5 100644 --- a/libgcc/config/avr/libf7/libf7-common.mk +++ b/libgcc/config/avr/libf7/libf7-common.mk @@ -43,7 +43,7 @@ m_xd += lrint lround # -mcall-prologues CALL_PROLOGUES += divx sqrt cbrt get_double set_double logx exp exp10 pow10 CALL_PROLOGUES += put_C truncx round minmax sincos tan cotan pow powi fmod -CALL_PROLOGUES += atan asinacos madd_msub hypot init horner sinhcosh tanh +CALL_PROLOGUES += atan atan2 asinacos madd_msub hypot init horner sinhcosh tanh # -mstrict-X STRICT_X += log addsub truncx ldexp exp diff --git a/libgcc/config/avr/libf7/libf7.c b/libgcc/config/avr/libf7/libf7.c index 0d9e4c325b2..49baac73e6d 100644 --- a/libgcc/config/avr/libf7/libf7.c +++ b/libgcc/config/avr/libf7/libf7.c @@ -1099,7 +1099,7 @@ f7_t* f7_ldexp (f7_t *cc, const f7_t *aa, int delta) F7_CONST_ADDR ( CST, f7_t* PTMP) - Return an LD address to for some f7_const_X[_P] constant. + Return an LD address to some f7_const_X[_P] constant. *PTMP might be needed to hold a copy of f7_const_X_P in RAM. f7_t* F7_U16_ADDR (uint16_t X, f7_t* PTMP) // USE_LPM @@ -2189,6 +2189,64 @@ void f7_atan (f7_t *cc, const f7_t *aa) #endif // F7MOD_atan_ +#ifdef F7MOD_atan2_ +F7_WEAK +void f7_atan2 (f7_t *cc, const f7_t *yy, const f7_t *xx) +{ + uint8_t y_class = f7_classify (yy); + uint8_t x_class = f7_classify (xx); + + // (NaN, *) -> NaN + // (*, NaN) -> NaN + if (f7_class_nan (y_class | x_class)) +return f7_set_nan (cc); + + // (0, 0) -> 0 + if (f7_class_zero (y_class & x_class)) +return f7_clr (cc); + + f7_t pi7, *pi = &pi7; + f7_const (pi, pi); + + // (Inf, +Inf) -> +pi/4;(-Inf, +Inf) -> +3pi/4 + // (Inf, -Inf) -> -pi/4;(-Inf, -Inf) -> -3pi/4 + if (f7_class_inf (y_class & x_class)) +{ + f7_copy (cc, pi); + if (! f7_class_sign (x_class)) + cc->expo = F7_(const_pi_expo) - 1; // pi / 2 + pi->expo = F7_(const_pi_expo) - 2; // pi / 4 + f7_Isub (cc, pi); + cc->flags = y_class & F7_FLAG_sign; + return; +} + + // sign(pi) := sign(y) + pi->flags = y_class & F7_FLAG_sign; + + // Only use atan(*) with |*| <= 1. + + if (f7_cmp_abs (yy, xx) > 0) +{ + // |y| > |x|: atan2 = sgn(y) * pi/2 - atan (x / y); + pi->expo = F7_(const_pi_expo) - 1; // +- pi / 2 + f7_div (cc, xx, yy); + f7_atan (cc, cc); + f7_IRsub (cc, pi); +} + else +{ + // x > |y|: atan2 = atan (y / x) + // x < -|y|: atan2 = atan (y / x) +- pi + f7_div (cc, yy, xx); + f7_atan (cc, cc); + if (f7_class_sign (x_class)) + f7_Iadd (cc, pi); +} +} +#endif // F7MOD_atan2_ + + #ifdef F7MOD_asinacos_ #define ARRAY_NAME coeff_func_a_zahler diff --git a/libgcc/config/avr/libf7/libf7.h b/libgcc/config/avr/libf7/libf7.h index f692854dced..b50e6e218ba 100644 --- a/libgcc/config/avr/libf7/libf7.h +++ b/libgcc/config/avr/libf7/libf7.h @@ -606,6 +606,7 @@ extern void f7_sin (f7_t*, const f7_t*); extern void f7_cos (f7_t*, const f7_t*); extern void f7_tan (f7_t*, const f7_t*); extern void f7_atan (f7_t*, const f7_t*); +extern void f7_atan2 (f7_t*, const f7_t*, const f7_t*); extern void f7_asin (f7_t*, const f7_t*); extern void f7_acos (f7_t*, const f7_t*); extern void f7_tanh (f7_t*, const f7_t*); @@ -617,7 +618,6 @@ extern void f7_exp10 (f7_t*, const f7_t*); extern void f7_pow10 (f7_t*, const f7_t*); // Just prototypes, not implemented yet. -extern void f7_atan2 (f7_t*, const f7_t*, const f7_t*); extern long f7_lrint (const f7_t*); extern long f7_lround (const f7_t*);
[avr,committed] Implement fma, fmal.
This commit implements fma and fmal which were missing from LibF7. Johann -- LibF7: Implement fma / fmal. libgcc/config/avr/libf7/ * libf7.h (F7_SIZEOF): New macro. * libf7-asm.sx: Use F7_SIZEOF instead of magic number "10". (F7MOD_D_fma_, __fma): New module and function. (fma) [-mdouble=64]: Define as alias for __fma. (fmal) [-mlong-double=64]: Define as alias for __fma. * libf7-common.mk (F7_ASM_PARTS): Add D_fma. diff --git a/libgcc/config/avr/libf7/libf7-asm.sx b/libgcc/config/avr/libf7/libf7-asm.sx index 8fbd66bd290..5df167fe73c 100644 --- a/libgcc/config/avr/libf7/libf7-asm.sx +++ b/libgcc/config/avr/libf7/libf7-asm.sx @@ -283,8 +283,8 @@ DEFUN copy cp XL, ZL cpc XH, ZH breq 9f -adiwXL, 10 -adiwZL, 10 +adiwXL, F7_SIZEOF +adiwZL, F7_SIZEOF set bld ZERO, 1 bld ZERO, 3 ; ZERO = 0b1010 = 10. @@ -312,8 +312,8 @@ DEFUN copy_P st X+, TMP dec ZERO brne .Loop -sbiwX, 10 -sbiwZ, 10 +sbiwX, F7_SIZEOF +sbiwZ, F7_SIZEOF ret ENDF copy_P #endif /* F7MOD_copy_P_ */ @@ -1328,6 +1328,58 @@ DEFUN sqrt_approx #undef Carry +#ifdef F7MOD_D_fma_ +_DEFUN __fma +DALIAS fma +LALIAS fmal + +#define n_pushed4 +#define n_frame (2 * F7_SIZEOF) + +do_prologue_saves n_pushed, n_frame +;; Y = FramePointer + 1 +adiwY, 1 + +;; FP + 1 = (f7_t) arg1 +wmovr16,Y +;; The double argument arg1 is already in R18[]. +XCALL F7_NAME (set_double_impl) + +;; The double argument arg2 is in R10[]. Move it to R18[]. +wmovr18,r10 +wmovr20,r12 +wmovr22,r14 +;; R16, R17 are clobbered. Fetch them from where prologue_saves put them. +ldd r24,Y + n_frame + 3 ; Saved R16 +ldd r25,Y + n_frame + 2 ; Saved R17 +;; FP + 1 + 10 = (f7_t) arg2 +subir16,lo8 (-F7_SIZEOF) +sbcir17,hi8 (-F7_SIZEOF) +XCALL F7_NAME (set_double_impl) + +wmovr24,Y ; &arg1 +wmovr22,r16 ; &arg2 +XCALL F7_NAME (Imul) ; arg1 *= arg2 + +;; The 3rd double argument arg3 was passed on the stack. Move it to R18[], +;; Don't use f7_set_pdouble() because that function is unused (for now). +.irp n, 0, 1, 2, 3, 4, 5, 6, 7 +ldd 18+\n, Y + n_frame + n_pushed + PC_SIZE + \n +.endr +XCALL F7_NAME (set_double_impl) + +wmovr24,Y ; &arg1 +wmovr22,r16 ; &arg2 +XCALL F7_NAME (Iadd) ; arg1 += arg2 + +wmovr24,Y ; &arg1 +XCALL F7_NAME (get_double) + +do_epilogue_restores n_pushed, n_frame +_ENDF __fma +#endif /* F7MOD_D_fma_ */ + + #ifdef F7MOD_D_fabs_ _DEFUN __fabs DALIAS fabs @@ -1493,7 +1545,7 @@ DEFUN call_dd ; WHAT = R13 = 3 wmovr14, Z #define n_pushed4 -#define n_frame 10 +#define n_frame F7_SIZEOF do_prologue_saves n_pushed, n_frame ;; Y = FramePointer + 1 @@ -1565,7 +1617,7 @@ DEFUN call_ddd ret #define n_pushed4 -#define n_frame 20 +#define n_frame (2 * F7_SIZEOF) call.2: do_prologue_saves n_pushed, n_frame @@ -1576,9 +1628,8 @@ DEFUN call_ddd ;; First double argument is already in R18[]. XCALL F7_NAME (set_double_impl) ;; FP + 11 = (f7_t) arg2 -wmovr16,Y -subir16,lo8 (-10) -sbcir17,hi8 (-10) +subir16,lo8 (-F7_SIZEOF) +sbcir17,hi8 (-F7_SIZEOF) ;; Move second double argument to R18[]. wmovr18,r10 wmovr20,r12 diff --git a/libgcc/config/avr/libf7/libf7-common.mk b/libgcc/config/avr/libf7/libf7-common.mk index e417715a7e5..d541b48ff3c 100644 --- a/libgcc/config/avr/libf7/libf7-common.mk +++ b/libgcc/config/avr/libf7/libf7-common.mk @@ -22,7 +22,7 @@ F7_ASM_PARTS += addsub_mant_scaled store load F7_ASM_PARTS += to_integer to_unsigned clz normalize_with_carry normalize F7_ASM_PARTS += store_expo sqrt16 sqrt_approx div -F7_ASM_PARTS += D_class +F7_ASM_PARTS += D_class D_fma F7_ASM_PARTS += D_isnan D_isinf D_isfinite D_signbit D_copysign D_neg D_fabs F7_ASM_PARTS += call_dd call_ddd diff --git a/libgcc/config/avr/libf7/libf7.h b/libgcc/config/avr/libf7/libf7.h index b50e6e218ba..2b6beac0df8 100644 --- a/libgcc/config/avr/libf7/libf7.h +++ b/libgcc/config/avr/libf7/libf7.h @@ -29,6 +29,7 @@ #define F7_MANT_BYTES 7 #define F7_MANT_BITS (8 * F7_MANT_BYTES) +#define F7_SIZEOF (1 + F7_MANT_BYTES + 2) /* Using the following GCC features: -- Unnamed structs / unions (GNU-C)
[avr,committed] LibF7: Implement a function that was missing for devices without MUL.
This implements the worker function for double multiplication for devices without MUL instruction. Johann -- LibF7: Implement mul_mant for devices without MUL instruction. libgcc/config/avr/libf7/ * libf7-asm.sx (mul_mant): Implement for devices without MUL. * asm-defs.h (wmov) [!HAVE_MUL]: Fix regno computation. * t-libf7 (F7_ASM_FLAGS): Add -g0. diff --git a/libgcc/config/avr/libf7/asm-defs.h b/libgcc/config/avr/libf7/asm-defs.h index 4cfd3e61cbb..a50260a162f 100644 --- a/libgcc/config/avr/libf7/asm-defs.h +++ b/libgcc/config/avr/libf7/asm-defs.h @@ -134,14 +134,14 @@ ..regno = 0 .irpreg,\ -X, x, XL, xl, Xl, xL, x, x \ +X, x, XL, xl, Xl, xL, x, x, \ Y, y, YL, yl, Yl, yL, y, y, \ Z, z, ZL, zl, Zl, zL, z, z .ifc \reg,\dst -..dst = (..regno / 8) + 26 +..dst = 2 * (..regno / 8) + 26 .endif .ifc \reg,\src -..src = (..regno / 8) + 26 +..src = 2 * (..regno / 8) + 26 .endif ..regno = ..regno + 1 .endr diff --git a/libgcc/config/avr/libf7/libf7-asm.sx b/libgcc/config/avr/libf7/libf7-asm.sx index 5df167fe73c..4505764c126 100644 --- a/libgcc/config/avr/libf7/libf7-asm.sx +++ b/libgcc/config/avr/libf7/libf7-asm.sx @@ -1067,6 +1067,100 @@ DEFUN mul_mant ENDF mul_mant #endif /* F7MOD_mul_mant_ && MUL */ +#if defined F7MOD_mul_mant_ && ! defined (__AVR_HAVE_MUL__) +#define AA TMP +#define A0 13 +#define A1 A0+1 +#define A2 A0+2 +#define A3 A0+3 +#define A4 A0+4 +#define A5 r26 +#define A6 r27 +#define BB ZERO +#define Bitsr29 +#define Bytes r28 + +DEFUN mul_mant +do_prologue_saves 7 +bst r18,0 ; T = 1: Don't round. +;; Save result address for later. +pushr25 +pushr24 +;; Load 1st operand mantissa. +wmovr30,r22 +clr AA +LDD A0, Z+0+Off +LDD A1, Z+1+Off +LDD A2, Z+2+Off +LDD A3, Z+3+Off +LDD A4, Z+4+Off +LDD A5, Z+5+Off +LDD A6, Z+6+Off +;; Let Z point one past .mant of the 2nd input operand. +wmovr30,r20 +adiwr30,Expo + +;; Clear the result mantissa. +.global __clr_8 +XCALL __clr_8 + +;; Loop over the bytes of B's mantissa from highest to lowest. +;; "+1" because we jump into the loop. +ldi Bytes, 1 + F7_MANT_BYTES + +;; Divide one operand by 2 so that the result mantissa won't overflow. +;; This is accounted for by "Carry = 1" below. +ldi Bits, 1 +rjmp.Loop_entry + +.Loop_bytes: +ld BB, -Z +;; Loop over the bits of B's mantissa from highest to lowest. +ldi Bits, 8 +.Loop_bits: +lsl BB +brcc.Lnext_bit + +ADD CA, AA +adc C0, A0 +adc C1, A1 +adc C2, A2 +adc C3, A3 +adc C4, A4 +adc C5, A5 +adc C6, A6 + +.Lnext_bit: +.Loop_entry: +LSR A6 +ror A5 +ror A4 +ror A3 +ror A2 +ror A1 +ror A0 +ror AA + +dec Bits +brne.Loop_bits + +dec Bytes +brne.Loop_bytes + +;; Finally... + +pop ZL +pop ZH + +;; The result has to be left-shifted by one (multiplied by 2) in order +;; to undo the division by 2 of the 1st operand. +ldi Carry, 1 +F7call normalize.maybe_round.store_with_flags + +do_epilogue_restores 7 +ENDF mul_mant +#endif /* F7MOD_mul_mant_ && ! MUL */ + #if defined (F7MOD_div_) diff --git a/libgcc/config/avr/libf7/t-libf7 b/libgcc/config/avr/libf7/t-libf7 index 30aa280d11e..f17e67e8523 100644 --- a/libgcc/config/avr/libf7/t-libf7 +++ b/libgcc/config/avr/libf7/t-libf7 @@ -86,7 +86,7 @@ F7_C_FLAGS += $(F7_FLAGS) \ -fno-tree-loop-optimize \ -fno-tree-loop-im -fno-move-loop-invariants -F7_ASM_FLAGS +=$(F7_FLAGS) +F7_ASM_FLAGS +=$(F7_FLAGS) -g0 $(patsubst %, f7_c_%.o, $(CALL_PROLOGUES)) \ : F7_C_FLAGS += -mcall-prologues
[patch,libgcc,contrib]: Add some auto-generated files deps to gcc_update.
This patch adds two deps to gcc_update files_and_dependencies for two auto-generated headers from avr libgcc. Ok for master? Johann -- Add dependencies for some auto-generated files from avr-libgcc. / * contrib/gcc_update (files_and_dependencies): Add dependencies for: libgcc/config/avr/libf7/f7-renames.h, libgcc/config/avr/libf7/f7-wraps.h. diff --git a/contrib/gcc_update b/contrib/gcc_update index cda2bdb0df9..f9f9aed743e 100755 --- a/contrib/gcc_update +++ b/contrib/gcc_update @@ -183,6 +183,8 @@ libphobos/configure: libphobos/configure.ac libphobos/aclocal.m4 libphobos/src/Makefile.in: libphobos/src/Makefile.am libphobos/aclocal.m4 libphobos/testsuite/Makefile.in: libphobos/testsuite/Makefile.am libphobos/aclocal.m4 libstdc++-v3/include/bits/version.h: libstdc++-v3/include/bits/version.def libstdc++-v3/include/bits/version.tpl +libgcc/config/avr/libf7/f7-renames.h: libgcc/config/avr/libf7/f7renames.sh libgcc/config/avr/libf7/libf7-common.mk +libgcc/config/avr/libf7/f7-wraps.h: libgcc/config/avr/libf7/f7wraps.sh libgcc/config/avr/libf7/libf7-common.mk libgcc/config/avr/libf7/t-libf7-math # Top level Makefile.in: Makefile.tpl Makefile.def configure: configure.ac config/acx.m4
Ping #1: [patch,avr] Fix PR109650 wrong code
Ping #1 for: https://gcc.gnu.org/pipermail/gcc-patches/2023-May/618976.html https://gcc.gnu.org/pipermail/gcc-patches/attachments/20230519/9536bf8c/attachment-0001.bin Johann Am 19.05.23 um 10:49 schrieb Georg-Johann Lay: Here is a revised version of the patch. The difference to the previous one is that it adds some combine patterns for *cbranch insns that were lost in the PR92729 transition. The post-reload part of the patterns were still there. The new patterns are slightly more general in that they also handle fixed-point modes. Apart from that, the patch behaves the same: Am 15.05.23 um 20:05 schrieb Georg-Johann Lay: This patch fixes a wrong-code bug in the wake of PR92729, the transition that turned the AVR backend from cc0 to CCmode. In cc0, the insn that uses cc0 like a conditional branch always follows the cc0 setter, which is no more the case with CCmode where set and use of REG_CC might be in different basic blocks. This patch removes the machine-dependent reorg pass in avr_reorg entirely. It is replaced by a new, AVR specific mini-pass that runs prior to split2. Canonicalization of comparisons away from the "difficult" codes GT[U] and LE[U] is now mostly performed by implementing TARGET_CANONICALIZE_COMPARISON. Moreover: * Text peephole conditions get "dead_or_set_regno_p (*, REG_CC)" as needed. * RTL peephole conditions get "peep2_regno_dead_p (*, REG_CC)" as needed. * Conditional branches no more clobber REG_CC. * insn output for compares looks ahead to determine the branch mode in use. This needs also "dead_or_set_regno_p (*, REG_CC)". * Add RTL peepholes for decrement-and-branch detection. Finally, it fixes some of the many indentation glitches left over from PR92729. Ok? I'd also backport this one because all of v12+ is affected by the wrong code. Johann -- gcc/ PR target/109650 PR target/92729 * config/avr/avr-passes.def (avr_pass_ifelse): Insert new pass. * config/avr/avr.cc (avr_pass_ifelse): New RTL pass. (avr_pass_data_ifelse): New pass_data for it. (make_avr_pass_ifelse, avr_redundant_compare, avr_cbranch_cost) (avr_canonicalize_comparison, avr_out_plus_set_ZN) (avr_out_cmp_ext): New functions. (compare_condtition): Make sure REG_CC dies in the branch insn. (avr_rtx_costs_1): Add computation of cbranch costs. (avr_adjust_insn_length) [ADJUST_LEN_ADD_SET_ZN, ADJUST_LEN_CMP_ZEXT]: [ADJUST_LEN_CMP_SEXT]Handle them. (TARGET_CANONICALIZE_COMPARISON): New define. (avr_simplify_comparison_p, compare_diff_p, avr_compare_pattern) (avr_reorg_remove_redundant_compare, avr_reorg): Remove functions. (TARGET_MACHINE_DEPENDENT_REORG): Remove define. * avr-protos.h (avr_simplify_comparison_p): Remove proto. (make_avr_pass_ifelse, avr_out_plus_set_ZN, cc_reg_rtx) (avr_out_cmp_zext): New Protos * config/avr/avr.md (branch, difficult_branch): Don't split insns. (*cbranchhi.zero-extend.0", *cbranchhi.zero-extend.1") (*swapped_tst, *add.for.eqne.): New insns. (*cbranch4): Rename to cbranch4_insn. (define_peephole): Add dead_or_set_regno_p(insn,REG_CC) as needed. (define_deephole2): Add peep2_regno_dead_p(*,REG_CC) as needed. Add new RTL peepholes for decrement-and-branch and *swapped_tst. Rework signtest-and-branch peepholes for *sbrx_branch. (adjust_len) [add_set_ZN, cmp_zext]: New. (QIPSI): New mode iterator. (ALLs1, ALLs2, ALLs4, ALLs234): New mode iterators. (gelt): New code iterator. (gelt_eqne): New code attribute. (rvbranch, *rvbranch, difficult_rvbranch, *difficult_rvbranch) (branch_unspec, *negated_tst, *reversed_tst) (*cmpqi_sign_extend): Remove insns. (define_c_enum "unspec") [UNSPEC_IDENTITY]: Remove. * config/avr/avr-dimode.md (cbranch4): Canonicalize comparisons. * config/avr/predicates.md (scratch_or_d_register_operand): New. * config/avr/contraints.md (Yxx): New constraint. gcc/testsuite/ PR target/109650 * config/avr/torture/pr109650-1.c: New test. * config/avr/torture/pr109650-2.c: New test.
[testsuite,applied] PR52641: Fix more implicit int=32 fallout.
Committed to undo implicit assumptions. Johann testsuite/52641: Fix more of implicit int=32 assumption fallout. gcc/testsuite/ PR testsuite/52641 * gcc.dg/torture/pr107451.c: Require int32plus. * gcc.dg/torture/pr108574-3.c: Use __INT32_TYPE__ instead of int. * gcc.dg/torture/pr109940.c: Use __INTPTR_TYPE__ instead of long. * gcc.dg/torture/pr95248.c: Require size24plus. * gcc.dg/torture/pr95295-3.c: Use var_* with at least 32 bits int. * gcc.dg/torture/pr98640.c: Cast to __INT32_TYPE__ instead of int. * gcc.dg/tree-ssa/pr103771.c: Use int with at least 32 bits. diff --git a/gcc/testsuite/gcc.dg/torture/pr107451.c b/gcc/testsuite/gcc.dg/torture/pr107451.c index a17574c6896..fee010ac40a 100644 --- a/gcc/testsuite/gcc.dg/torture/pr107451.c +++ b/gcc/testsuite/gcc.dg/torture/pr107451.c @@ -1,4 +1,5 @@ /* { dg-do run } */ +/* { dg-require-effective-target int32plus } */ /* { dg-additional-options "-ftree-vectorize -fno-vect-cost-model" } */ /* { dg-additional-options "-mavx2" { target avx2_runtime } } */ diff --git a/gcc/testsuite/gcc.dg/torture/pr108574-3.c b/gcc/testsuite/gcc.dg/torture/pr108574-3.c index b4d5dae9f80..b5c85d1261a 100644 --- a/gcc/testsuite/gcc.dg/torture/pr108574-3.c +++ b/gcc/testsuite/gcc.dg/torture/pr108574-3.c @@ -1,6 +1,6 @@ /* { dg-do run } */ -int a = 3557301289, d; +__INT32_TYPE__ a = 3557301289, d; signed char b, f; unsigned short c = 241; short e, g; diff --git a/gcc/testsuite/gcc.dg/torture/pr109940.c b/gcc/testsuite/gcc.dg/torture/pr109940.c index 23364708e86..55082d0e312 100644 --- a/gcc/testsuite/gcc.dg/torture/pr109940.c +++ b/gcc/testsuite/gcc.dg/torture/pr109940.c @@ -11,7 +11,7 @@ e(int d, int f) { return 1; int g = d / 2; for (int h = 0; h < g; h++) -if (f == (long int)b > b[h]) +if (f == (__INTPTR_TYPE__)b > b[h]) c(&b[h]); e(g, f); e(g, f); diff --git a/gcc/testsuite/gcc.dg/torture/pr95248.c b/gcc/testsuite/gcc.dg/torture/pr95248.c index f0efcc12b51..e39eb22d04b 100644 --- a/gcc/testsuite/gcc.dg/torture/pr95248.c +++ b/gcc/testsuite/gcc.dg/torture/pr95248.c @@ -1,5 +1,6 @@ /* { dg-do run } */ /* { dg-require-effective-target int32plus } */ +/* { dg-require-effective-target size24plus } */ int var_2 = -2013646301; int var_3 = -1126567434; diff --git a/gcc/testsuite/gcc.dg/torture/pr95295-3.c b/gcc/testsuite/gcc.dg/torture/pr95295-3.c index a506af9a63f..f723020c0b3 100644 --- a/gcc/testsuite/gcc.dg/torture/pr95295-3.c +++ b/gcc/testsuite/gcc.dg/torture/pr95295-3.c @@ -1,7 +1,11 @@ /* { dg-do compile } */ extern short var_15, var_20; +#if __SIZEOF_INT__ >= 4 extern int var_18, var_21, var_23; +#else +extern __INT32_TYPE__ var_18, var_21, var_23; +#endif extern _Bool arr_2[]; extern long arr_3[]; void test() diff --git a/gcc/testsuite/gcc.dg/torture/pr98640.c b/gcc/testsuite/gcc.dg/torture/pr98640.c index b187781d614..426be66ec80 100644 --- a/gcc/testsuite/gcc.dg/torture/pr98640.c +++ b/gcc/testsuite/gcc.dg/torture/pr98640.c @@ -10,7 +10,7 @@ uint64_t var_83 = 10966786425750692026ULL; void test() { var_14 = var_0 + (_Bool)7; - var_83 = 1 + (int)var_0; // 1 + 888395530 + var_83 = 1 + (int32_t)var_0; // 1 + 888395530 } int main() diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr103771.c b/gcc/testsuite/gcc.dg/tree-ssa/pr103771.c index 8faa45a8222..8061e2df79e 100644 --- a/gcc/testsuite/gcc.dg/tree-ssa/pr103771.c +++ b/gcc/testsuite/gcc.dg/tree-ssa/pr103771.c @@ -4,6 +4,10 @@ typedef unsigned char uint8_t; +#if __SIZEOF_INT__ < 4 +#define int __INT32_TYPE__ +#endif + static uint8_t x264_clip_uint8 (int x) { return x & (~255) ? (-x) >> 31 : x;
[patch] Fix PR101188 wrong code from postreload
There is the following bug in postreload that can be traced back to v5 at least: In postreload.cc::reload_cse_move2add() there is a loop over all insns. If it encounters a SET, the next insn is analyzed if it is a single_set. After next has been analyzed, it continues with if (success) delete_insn (insn); changed |= success; insn = next; // This effectively skips analysis of next. move2add_record_mode (reg); reg_offset[regno] = trunc_int_for_mode (added_offset + base_offset, mode); continue; // for continues with insn = NEXT_INSN (insn). So it records the effect of next, but not the clobbers that next might have. This is a problem if next clobbers a GPR like it can happen for avr. What then can happen is that in a later round, it may use a value from a (partially) clobbered reg. The patch records the effects of potential clobbers. Bootstrapped and reg-tested on x86_64. Also tested on avr where the bug popped up. The testcase discriminates on avr, and for now I am not aware of any other target that's affected by the bug. The change is not intrusive and fixes wrong code, so I'd like to backport it. Ok to apply? Johann rtl-optimization/101188: Don't bypass clobbers of some insns that are optimized or are optimization candidates. gcc/ PR rtl-optimization/101188 * postreload.cc (reload_cse_move2add): Record clobbers of next insn using move2add_note_store. gcc/testsuite/ PR rtl-optimization/101188 * gcc.c-torture/execute/pr101188.c: New test. diff --git a/gcc/postreload.cc b/gcc/postreload.cc index fb392651e1b..2de3e2ea780 100644 --- a/gcc/postreload.cc +++ b/gcc/postreload.cc @@ -2033,6 +2033,14 @@ reload_cse_move2add (rtx_insn *first) if (success) delete_insn (insn); changed |= success; + // By setting "insn = next" below, we are bypassing the + // side-effects of next, see PR101188. Do them by hand + subrtx_iterator::array_type array; + FOR_EACH_SUBRTX (iter, array, PATTERN (next), NONCONST) + { + if (GET_CODE (*iter) == CLOBBER) + move2add_note_store (XEXP (*iter, 0), *iter, next); + } insn = next; move2add_record_mode (reg); reg_offset[regno] diff --git a/gcc/testsuite/gcc.c-torture/execute/pr101188.c b/gcc/testsuite/gcc.c-torture/execute/pr101188.c new file mode 100644 index 000..4817c69347c --- /dev/null +++ b/gcc/testsuite/gcc.c-torture/execute/pr101188.c @@ -0,0 +1,59 @@ +typedef __UINT8_TYPE__ uint8_t; +typedef __UINT16_TYPE__ uint16_t; + +typedef uint8_t (*fn1)(void *a); +typedef void (*fn2)(void *a, int *arg); + +struct S +{ +uint8_t buffer[64]; +uint16_t n; +fn2 f2; +void *a; +fn1 f1; +}; + +volatile uint16_t x; + +void __attribute__((__noinline__,__noclone__)) +foo (uint16_t n) +{ + x = n; +} + +void __attribute__((__noinline__,__noclone__)) +testfn (struct S *self) +{ +int arg; + +foo (self->n); +self->n++; +self->f2 (self->a, &arg); +self->buffer[0] = self->f1 (self->a); +} + +static unsigned char myfn2_called = 0; + +static void +myfn2 (void *a, int *arg) +{ + myfn2_called = 1; +} + +static uint8_t +myfn1 (void *a) +{ + return 0; +} + +int main (void) +{ + struct S s; + s.n = 0; + s.f2 = myfn2; + s.f1 = myfn1; + testfn (&s); + if (myfn2_called != 1) +__builtin_abort(); + return 0; +}
[avr, committed] Improve operations on non-LD_REGS when the operation follows a move from LD_REGS.
Applied the following patch to improve operations on no-LD_REGS when the operation follows a move from LD_REGS. Johann target/110088: Improve operation of l-reg with const after move from d-reg. After reload, there may be sequences like lreg = dreg lreg = lreg const with an LD_REGS dreg, non-LD_REGS lreg, and in PLUS, IOR, AND. If dreg dies after the first insn, it is possible to use dreg = dreg const lreg = dreg instead which is more efficient. gcc/ PR target/110088 * config/avr/avr.md: Add an RTL peephole to optimize operations on non-LD_REGS after a move from LD_REGS. (piaop): New code iterator. diff --git a/gcc/config/avr/avr.md b/gcc/config/avr/avr.md index 371965938a6..9f5fabc861f 100644 --- a/gcc/config/avr/avr.md +++ b/gcc/config/avr/avr.md @@ -279,6 +279,7 @@ (define_code_iterator any_extend2 [sign_extend zero_extend]) (define_code_iterator any_extract [sign_extract zero_extract]) (define_code_iterator any_shiftrt [lshiftrt ashiftrt]) +(define_code_iterator piaop [plus ior and]) (define_code_iterator bitop [xor ior and]) (define_code_iterator xior [xor ior]) (define_code_iterator eqne [eq ne]) @@ -4727,6 +4729,43 @@ (define_split DONE; }) +;; If $0 = $0 const requires a QI scratch, and d-reg $1 dies after +;; the first insn, then we can replace +;;$0 = $1 +;;$0 = $0 const +;; by +;;$1 = $1 const +;;$0 = $1 +;; This transforms constraint alternative "r,0,n,&d" of the first operation +;; to alternative "d,0,n,X". +;; "*addhi3_clobber" "*addpsi3" "*addsi3" +;; "*addhq3" "*adduhq3" "*addha3" "*adduha3" +;; "*addsq3" "*addusq3" "*addsa3" "*addusa3" +;; "*iorhi3" "*iorpsi3" "*iorsi3" +;; "*andhi3" "*andpsi3" "*andsi3" +(define_peephole2 + [(parallel [(set (match_operand:ORDERED234 0 "register_operand") + (match_operand:ORDERED234 1 "d_register_operand")) + (clobber (reg:CC REG_CC))]) + (parallel [(set (match_dup 0) + (piaop:ORDERED234 (match_dup 0) + (match_operand:ORDERED234 2 "const_operand"))) + ; A d-reg as scratch tells that this insn is expensive, and + ; that $0 is not a d-register: l-reg or something like SI:14 etc. + (clobber (match_operand:QI 3 "d_register_operand")) + (clobber (reg:CC REG_CC))])] + "peep2_reg_dead_p (1, operands[1])" + [(parallel [(set (match_dup 1) + (piaop:ORDERED234 (match_dup 1) + (match_dup 2))) + (clobber (scratch:QI)) + (clobber (reg:CC REG_CC))]) + ; Unfortunately, the following insn misses a REG_DEAD note for $1, + ; so this peep2 works only once. + (parallel [(set (match_dup 0) + (match_dup 1)) + (clobber (reg:CC REG_CC))])]) + ;; swap swap swap swap swap swap swap swap swap swap swap swap swap swap swap ;; swap
Re: [patch] Fix PR101188 wrong code from postreload
Am 03.06.23 um 17:53 schrieb Jeff Law: On 6/2/23 02:46, Georg-Johann Lay wrote: There is the following bug in postreload that can be traced back to v5 at least: In postreload.cc::reload_cse_move2add() there is a loop over all insns. If it encounters a SET, the next insn is analyzed if it is a single_set. After next has been analyzed, it continues with if (success) delete_insn (insn); changed |= success; insn = next; // This effectively skips analysis of next. move2add_record_mode (reg); reg_offset[regno] = trunc_int_for_mode (added_offset + base_offset, mode); continue; // for continues with insn = NEXT_INSN (insn). So it records the effect of next, but not the clobbers that next might have. This is a problem if next clobbers a GPR like it can happen for avr. What then can happen is that in a later round, it may use a value from a (partially) clobbered reg. The patch records the effects of potential clobbers. Bootstrapped and reg-tested on x86_64. Also tested on avr where the bug popped up. The testcase discriminates on avr, and for now I am not aware of any other target that's affected by the bug. The change is not intrusive and fixes wrong code, so I'd like to backport it. Ok to apply? Johann rtl-optimization/101188: Don't bypass clobbers of some insns that are optimized or are optimization candidates. gcc/ PR rtl-optimization/101188 * postreload.cc (reload_cse_move2add): Record clobbers of next insn using move2add_note_store. gcc/testsuite/ PR rtl-optimization/101188 * gcc.c-torture/execute/pr101188.c: New test. If I understand the code correctly, isn't the core of the problem that we "continue" rather than executing the rest of the code in the loop. In particular the continue bypasses this chunk of code: for (note = REG_NOTES (insn); note; note = XEXP (note, 1)) { if (REG_NOTE_KIND (note) == REG_INC && REG_P (XEXP (note, 0))) { /* Reset the information about this register. */ int regno = REGNO (XEXP (note, 0)); if (regno < FIRST_PSEUDO_REGISTER) { move2add_record_mode (XEXP (note, 0)); reg_mode[regno] = VOIDmode; } } } /* There are no REG_INC notes for SP autoinc. */ subrtx_var_iterator::array_type array; FOR_EACH_SUBRTX_VAR (iter, array, PATTERN (insn), NONCONST) { rtx mem = *iter; if (mem && MEM_P (mem) && GET_RTX_CLASS (GET_CODE (XEXP (mem, 0))) == RTX_AUTOINC) { if (XEXP (XEXP (mem, 0), 0) == stack_pointer_rtx) reg_mode[STACK_POINTER_REGNUM] = VOIDmode; } } note_stores (insn, move2add_note_store, insn); The point is that in the continue block, the effect of the insn is recorded even if !success, it's just the computed effect of the code. Moreover, "next" is REG = REG + CONST_INT, so there are no REG_INC notes, no? Also I don't have any testcases that break other than the one that has a clobber of a GPR along with the pointer addition. I tried some "smart" solutions before, but all failed for some reason, so I resorted to something that fixes the bug, and *only* fixes the bug, and which has clearly no other side effects than fixing the bug (I have to do all remote on compile farm). If a more elaborate fix is needed that also catches other PRs, then I would hand this over to a postreload maintainer please. Of particular importance for your case would be the note_stores call. But I could well see other targets needing the search for REG_INC notes as well as stack pushes. If I'm right, then wouldn't it be better to factor that blob of code above into its own function, then use it before the "continue" rather than implementing a custom can for CLOBBERS? I cannot answer that. Maybe the authors of the code have some ideas. Johann It also begs the question if the other case immediately above the code I quoted needs similar adjustment. It doesn't do the insn = next, but it does bypass the search for autoinc memory references and the note_stores call. Jeff
Re: [patch] Fix PR101188 wrong code from postreload
Am 03.06.23 um 17:53 schrieb Jeff Law: On 6/2/23 02:46, Georg-Johann Lay wrote: There is the following bug in postreload that can be traced back to v5 at least: In postreload.cc::reload_cse_move2add() there is a loop over all insns. If it encounters a SET, the next insn is analyzed if it is a single_set. After next has been analyzed, it continues with if (success) delete_insn (insn); changed |= success; insn = next; // This effectively skips analysis of next. move2add_record_mode (reg); reg_offset[regno] = trunc_int_for_mode (added_offset + base_offset, mode); continue; // for continues with insn = NEXT_INSN (insn). So it records the effect of next, but not the clobbers that next might have. This is a problem if next clobbers a GPR like it can happen for avr. What then can happen is that in a later round, it may use a value from a (partially) clobbered reg. The patch records the effects of potential clobbers. Bootstrapped and reg-tested on x86_64. Also tested on avr where the bug popped up. The testcase discriminates on avr, and for now I am not aware of any other target that's affected by the bug. The change is not intrusive and fixes wrong code, so I'd like to backport it. Ok to apply? Johann rtl-optimization/101188: Don't bypass clobbers of some insns that are optimized or are optimization candidates. gcc/ PR rtl-optimization/101188 * postreload.cc (reload_cse_move2add): Record clobbers of next insn using move2add_note_store. gcc/testsuite/ PR rtl-optimization/101188 * gcc.c-torture/execute/pr101188.c: New test. If I understand the code correctly, isn't the core of the problem that we "continue" rather than executing the rest of the code in the loop. In particular the continue bypasses this chunk of code: for (note = REG_NOTES (insn); note; note = XEXP (note, 1)) { if (REG_NOTE_KIND (note) == REG_INC && REG_P (XEXP (note, 0))) { /* Reset the information about this register. */ int regno = REGNO (XEXP (note, 0)); if (regno < FIRST_PSEUDO_REGISTER) { move2add_record_mode (XEXP (note, 0)); reg_mode[regno] = VOIDmode; } } } /* There are no REG_INC notes for SP autoinc. */ subrtx_var_iterator::array_type array; FOR_EACH_SUBRTX_VAR (iter, array, PATTERN (insn), NONCONST) { rtx mem = *iter; if (mem && MEM_P (mem) && GET_RTX_CLASS (GET_CODE (XEXP (mem, 0))) == RTX_AUTOINC) { if (XEXP (XEXP (mem, 0), 0) == stack_pointer_rtx) reg_mode[STACK_POINTER_REGNUM] = VOIDmode; } } note_stores (insn, move2add_note_store, insn); Of particular importance for your case would be the note_stores call. But I could well see other targets needing the search for REG_INC notes as well as stack pushes. If I'm right, then wouldn't it be better to factor that blob of code above into its own function, then use it before the "continue" rather than implementing a custom can for CLOBBERS? It also begs the question if the other case immediately above the code I quoted needs similar adjustment. It doesn't do the insn = next, but it does bypass the search for autoinc memory references and the note_stores call. Jeff So if I understand you correctly, this means that my patch is declined? Johann
Ping #2: [patch,avr] Fix PR109650 wrong code
Ping #2 for: https://gcc.gnu.org/pipermail/gcc-patches/2023-May/618976.html https://gcc.gnu.org/pipermail/gcc-patches/attachments/20230519/9536bf8c/attachment-0001.bin Ping #1: https://gcc.gnu.org/pipermail/gcc-patches/2023-May/620098.html Johann Am 19.05.23 um 10:49 schrieb Georg-Johann Lay: Here is a revised version of the patch. The difference to the previous one is that it adds some combine patterns for *cbranch insns that were lost in the PR92729 transition. The post-reload part of the patterns were still there. The new patterns are slightly more general in that they also handle fixed-point modes. Apart from that, the patch behaves the same: Am 15.05.23 um 20:05 schrieb Georg-Johann Lay: This patch fixes a wrong-code bug in the wake of PR92729, the transition that turned the AVR backend from cc0 to CCmode. In cc0, the insn that uses cc0 like a conditional branch always follows the cc0 setter, which is no more the case with CCmode where set and use of REG_CC might be in different basic blocks. This patch removes the machine-dependent reorg pass in avr_reorg entirely. It is replaced by a new, AVR specific mini-pass that runs prior to split2. Canonicalization of comparisons away from the "difficult" codes GT[U] and LE[U] is now mostly performed by implementing TARGET_CANONICALIZE_COMPARISON. Moreover: * Text peephole conditions get "dead_or_set_regno_p (*, REG_CC)" as needed. * RTL peephole conditions get "peep2_regno_dead_p (*, REG_CC)" as needed. * Conditional branches no more clobber REG_CC. * insn output for compares looks ahead to determine the branch mode in use. This needs also "dead_or_set_regno_p (*, REG_CC)". * Add RTL peepholes for decrement-and-branch detection. Finally, it fixes some of the many indentation glitches left over from PR92729. Ok? I'd also backport this one because all of v12+ is affected by the wrong code. Johann -- gcc/ PR target/109650 PR target/92729 * config/avr/avr-passes.def (avr_pass_ifelse): Insert new pass. * config/avr/avr.cc (avr_pass_ifelse): New RTL pass. (avr_pass_data_ifelse): New pass_data for it. (make_avr_pass_ifelse, avr_redundant_compare, avr_cbranch_cost) (avr_canonicalize_comparison, avr_out_plus_set_ZN) (avr_out_cmp_ext): New functions. (compare_condtition): Make sure REG_CC dies in the branch insn. (avr_rtx_costs_1): Add computation of cbranch costs. (avr_adjust_insn_length) [ADJUST_LEN_ADD_SET_ZN, ADJUST_LEN_CMP_ZEXT]: [ADJUST_LEN_CMP_SEXT]Handle them. (TARGET_CANONICALIZE_COMPARISON): New define. (avr_simplify_comparison_p, compare_diff_p, avr_compare_pattern) (avr_reorg_remove_redundant_compare, avr_reorg): Remove functions. (TARGET_MACHINE_DEPENDENT_REORG): Remove define. * avr-protos.h (avr_simplify_comparison_p): Remove proto. (make_avr_pass_ifelse, avr_out_plus_set_ZN, cc_reg_rtx) (avr_out_cmp_zext): New Protos * config/avr/avr.md (branch, difficult_branch): Don't split insns. (*cbranchhi.zero-extend.0", *cbranchhi.zero-extend.1") (*swapped_tst, *add.for.eqne.): New insns. (*cbranch4): Rename to cbranch4_insn. (define_peephole): Add dead_or_set_regno_p(insn,REG_CC) as needed. (define_deephole2): Add peep2_regno_dead_p(*,REG_CC) as needed. Add new RTL peepholes for decrement-and-branch and *swapped_tst. Rework signtest-and-branch peepholes for *sbrx_branch. (adjust_len) [add_set_ZN, cmp_zext]: New. (QIPSI): New mode iterator. (ALLs1, ALLs2, ALLs4, ALLs234): New mode iterators. (gelt): New code iterator. (gelt_eqne): New code attribute. (rvbranch, *rvbranch, difficult_rvbranch, *difficult_rvbranch) (branch_unspec, *negated_tst, *reversed_tst) (*cmpqi_sign_extend): Remove insns. (define_c_enum "unspec") [UNSPEC_IDENTITY]: Remove. * config/avr/avr-dimode.md (cbranch4): Canonicalize comparisons. * config/avr/predicates.md (scratch_or_d_register_operand): New. * config/avr/contraints.md (Yxx): New constraint. gcc/testsuite/ PR target/109650 * config/avr/torture/pr109650-1.c: New test. * config/avr/torture/pr109650-2.c: New test.
[patch,avr]: Improve bit-extractions as of PR109907.
This patch improves bit-extractions on AVR. Andrew added some patches so that more bit extractions are recognized in the middle-end and rtl optimizers. The patch adds pattern for "extzv" and replaces the deprecated "extzv". There are still situations where expensive shifts are passed down to the backend though , and in one situation the backend uses better sequences for right-shift with an offset of MSB: Instead of ROL/CLR/ROL sequence that needs constraint "0" for operand $1, BST/CLR/BLD just requires "r" for $1 thus less register pressure. Moreover, no scratch is required. Asm out for (inverted) bit-extraction was out-sourced to a C function which is more convenient. Ok for master? Johann -- target/19907: Overhaul bit extractions. o Logical right shift that shifts the MSB to position 0 can be performed in such a way that the input operand constraint can be relaxed from "0" to "r". This results in less register pressure. Moreover, no scratch register is required in that case. o The deprecated "extzv" pattern is replaced by "extzv" that allows inputs of scalar integer modes of different sizes (1 up to 4 bytes). o Existing patterns are adjusted to the more generic "extzv" pattern. Some patterns are added as the middle-end has been reworked to spot more bit-extraction opportunities. o A C function is used to print the asm for bit extractions, which is more convenient for complex output logic. gcc/ PR target/109907 * config/avr/avr.md (adjust_len) [extr, extr_not]: New elements. (MSB, SIZE): New mode attributes. (any_shift): New code iterator. (*lshr3_split, *lshr3, lshr3) (*lshr3_const_split): Add constraint alternative for the case of shift-offset = MSB. Ditch "length" attribute. (extzv, *extzv..subreg, *extzv.xor) (*extzv.ge, *neg.ashiftrt.msb, *extzv.io.lsr7): New. * config/avr/constraints.md (C15, C23, C31, Yil): New * config/avr/predicates.md (reg_or_low_io_operand) (const7_operand, reg_or_low_io_operand) (const15_operand, const_0_to_15_operand) (const23_operand, const_0_to_23_operand) (const31_operand, const_0_to_31_operand): New. * config/avr/avr-protos.h (avr_out_extr, avr_out_extr_not): New. * config/avr/avr.cc (avr_out_extr, avr_out_extr_not): New funcs. (lshrqi3_out, lshrhi3_out, lshrpsi3_out, lshrsi3_out): Adjust MSB case to new insn constraint "r" for operands[1]. (avr_adjust_insn_length) [ADJUST_LEN_EXTR_NOT, ADJUST_LEN_EXTR]: Handle these cases. (avr_rtx_costs_1): Adjust cost for a new pattern. gcc/testsuite/ * gcc.target/avr/pr109907.c: New test. * gcc.target/avr/torture/pr109907-1.c: New test. * gcc.target/avr/torture/pr109907-2.c: New test.diff --git a/gcc/config/avr/avr-protos.h b/gcc/config/avr/avr-protos.h index ec96fd45865..229854a19db 100644 --- a/gcc/config/avr/avr-protos.h +++ b/gcc/config/avr/avr-protos.h @@ -58,6 +58,8 @@ extern const char *ret_cond_branch (rtx x, int len, int reverse); extern const char *avr_out_movpsi (rtx_insn *, rtx*, int*); extern const char *avr_out_sign_extend (rtx_insn *, rtx*, int*); extern const char *avr_out_insert_notbit (rtx_insn *, rtx*, rtx, int*); +extern const char *avr_out_extr (rtx_insn *, rtx*, int*); +extern const char *avr_out_extr_not (rtx_insn *, rtx*, int*); extern const char *ashlqi3_out (rtx_insn *insn, rtx operands[], int *len); extern const char *ashlhi3_out (rtx_insn *insn, rtx operands[], int *len); diff --git a/gcc/config/avr/avr.cc b/gcc/config/avr/avr.cc index a90cade35c7..f69d79bf14e 100644 --- a/gcc/config/avr/avr.cc +++ b/gcc/config/avr/avr.cc @@ -7142,9 +7142,9 @@ lshrqi3_out (rtx_insn *insn, rtx operands[], int *len) case 7: *len = 3; - return ("rol %0" CR_TAB - "clr %0" CR_TAB - "rol %0"); + return ("bst %1,7" CR_TAB + "clr %0" CR_TAB + "bld %0,0"); } } else if (CONSTANT_P (operands[2])) @@ -7401,10 +7401,10 @@ lshrhi3_out (rtx_insn *insn, rtx operands[], int *len) case 15: *len = 4; - return ("clr %A0" CR_TAB - "lsl %B0" CR_TAB - "rol %A0" CR_TAB - "clr %B0"); + return ("bst %B1,7" CR_TAB + "clr %A0" CR_TAB + "clr %B0" CR_TAB + "bld %A0,0"); } len = t; } @@ -7453,11 +7453,11 @@ avr_out_lshrpsi3 (rtx_insn *insn, rtx *op, int *plen) /* fall through */ case 23: - return avr_asm_len ("clr %A0"CR_TAB - "sbrc %C0,7" CR_TAB - "inc %A0"CR_TAB - "clr %B0"CR_TAB - "clr %C0", op, plen, 5); + return avr_asm_len ("bst %C1,7" CR_TAB + "clr %A0" CR_TAB + "clr %B0" CR_TAB + "clr %C0" CR_TAB + "bld %A0,0", op, plen, 5); } /* swit
[avr,committed] Tidy code for inverted bit insertions
Applied this no-op change that tidies up the code for inverted bit insertions. Johann -- Use canonical form for reversed single-bit insertions after reload. We now split almost all insns after reload in order to add clobber of REG_CC. If insns are coming from insn combiner and there is no canonical form for the respective arithmetic (like for reversed bit insertions), there is no need to keep all these different representations after reload: Instead of splitting such patterns to their clobber-REG_CC-analogon, we can split to a canonical representation, which is insv_notbit for the present case. This is a no-op change. gcc/ * config/avr/avr.md (adjust_len) [insv_notbit_0, insv_notbit_7]: Remove attribute values. (insv_notbit): New post-reload insn. (*insv.not-shiftrt_split, *insv.xor1-bit.0_split) (*insv.not-bit.0_split, *insv.not-bit.7_split) (*insv.xor-extract_split): Split to insv_notbit. (*insv.not-shiftrt, *insv.xor1-bit.0, *insv.not-bit.0, *insv.not-bit.7) (*insv.xor-extract): Remove post-reload insns. * config/avr/avr.cc (avr_out_insert_notbit) [bitno]: Remove parameter. (avr_adjust_insn_length): Adjust call of avr_out_insert_notbit. [ADJUST_LEN_INSV_NOTBIT_0, ADJUST_LEN_INSV_NOTBIT_7]: Remove cases. * config/avr/avr-protos.h (avr_out_insert_notbit): Adjust prototype. diff --git a/gcc/config/avr/avr-protos.h b/gcc/config/avr/avr-protos.h index a10d91d186f..5c1343f0df8 100644 --- a/gcc/config/avr/avr-protos.h +++ b/gcc/config/avr/avr-protos.h @@ -57,7 +57,7 @@ extern const char *avr_out_compare64 (rtx_insn *, rtx*, int*); extern const char *ret_cond_branch (rtx x, int len, int reverse); extern const char *avr_out_movpsi (rtx_insn *, rtx*, int*); extern const char *avr_out_sign_extend (rtx_insn *, rtx*, int*); -extern const char *avr_out_insert_notbit (rtx_insn *, rtx*, rtx, int*); +extern const char *avr_out_insert_notbit (rtx_insn *, rtx*, int*); extern const char *avr_out_extr (rtx_insn *, rtx*, int*); extern const char *avr_out_extr_not (rtx_insn *, rtx*, int*); extern const char *avr_out_plus_set_ZN (rtx*, int*); diff --git a/gcc/config/avr/avr.cc b/gcc/config/avr/avr.cc index b02f5e2..ef6872a3f55 100644 --- a/gcc/config/avr/avr.cc +++ b/gcc/config/avr/avr.cc @@ -8995,20 +8995,15 @@ avr_out_addto_sp (rtx *op, int *plen) } -/* Output instructions to insert an inverted bit into OPERANDS[0]: - $0.$1 = ~$2.$3 if XBITNO = NULL - $0.$1 = ~$2.XBITNO if XBITNO != NULL. +/* Output instructions to insert an inverted bit into OP[0]: $0.$1 = ~$2.$3. If PLEN = NULL then output the respective instruction sequence which is a combination of BST / BLD and some instruction(s) to invert the bit. If PLEN != NULL then store the length of the sequence (in words) in *PLEN. Return "". */ const char* -avr_out_insert_notbit (rtx_insn *insn, rtx operands[], rtx xbitno, int *plen) +avr_out_insert_notbit (rtx_insn *insn, rtx op[], int *plen) { - rtx op[4] = { operands[0], operands[1], operands[2], -xbitno == NULL_RTX ? operands [3] : xbitno }; - if (INTVAL (op[1]) == 7 && test_hard_reg_class (LD_REGS, op[0])) { @@ -10038,15 +10033,7 @@ avr_adjust_insn_length (rtx_insn *insn, int len) case ADJUST_LEN_INSERT_BITS: avr_out_insert_bits (op, &len); break; case ADJUST_LEN_ADD_SET_ZN: avr_out_plus_set_ZN (op, &len); break; -case ADJUST_LEN_INSV_NOTBIT: - avr_out_insert_notbit (insn, op, NULL_RTX, &len); - break; -case ADJUST_LEN_INSV_NOTBIT_0: - avr_out_insert_notbit (insn, op, const0_rtx, &len); - break; -case ADJUST_LEN_INSV_NOTBIT_7: - avr_out_insert_notbit (insn, op, GEN_INT (7), &len); - break; +case ADJUST_LEN_INSV_NOTBIT: avr_out_insert_notbit (insn, op, &len); break; default: gcc_unreachable(); diff --git a/gcc/config/avr/avr.md b/gcc/config/avr/avr.md index eadc482da15..83dd15040b0 100644 --- a/gcc/config/avr/avr.md +++ b/gcc/config/avr/avr.md @@ -163,7 +163,7 @@ (define_attr "adjust_len" ashlhi, ashrhi, lshrhi, ashlsi, ashrsi, lshrsi, ashlpsi, ashrpsi, lshrpsi, - insert_bits, insv_notbit, insv_notbit_0, insv_notbit_7, + insert_bits, insv_notbit, add_set_ZN, cmp_uext, cmp_sext, no" (const_string "no")) @@ -9151,6 +9151,21 @@ (define_insn "*insv.shiftrt" [(set_attr "length" "2")]) ;; Same, but with a NOT inverting the source bit. +;; Insert bit ~$2.$3 into $0.$1 +(define_insn "insv_notbit" + [(set (zero_extract:QI (match_operand:QI 0 "register_operand" "+r") + (const_int 1) + (match_operand:QI 1 "const_0_to_7_operand" "n")) +(not:QI (zero_extract:QI (match_operand:QI 2 "register_operand" "r") + (const_int 1) + (match_operand:QI 3 "const_0_to_7_operand" "n" + (clobber (reg:CC REG_CC))] + "re
[Patch,avr,committed] Fix PR target/110220: Set JUMP_LABEL as required.
Committed as obvious. An insn emitted by avr specific RTL optimization pass missed setting of its JUMP_LABEL. Johann target/110220: Set JUMP_LABEL and LABEL_NUSES of new branch insn generated by target specific RTL optimization pass .avr-casesi. gcc/ PR target/110220 * config/avr/avr.cc (avr_optimize_casesi): Set JUMP_LABEL and LABEL_NUSES of new conditional branch instruction. diff --git a/gcc/config/avr/avr.cc b/gcc/config/avr/avr.cc index 0447641a8e9..25f3f4c22e0 100644 --- a/gcc/config/avr/avr.cc +++ b/gcc/config/avr/avr.cc @@ -644,9 +644,11 @@ avr_optimize_casesi (rtx_insn *insns[5], rtx *xop) emit_insn (gen_add (reg, reg, gen_int_mode (-low_idx, mode))); rtx op0 = reg; rtx op1 = gen_int_mode (num_idx, mode); rtx labelref = copy_rtx (xop[4]); - emit_jump_insn (gen_cbranch (gen_rtx_fmt_ee (GTU, VOIDmode, op0, op1), - op0, op1, - labelref)); + rtx xbranch = gen_cbranch (gen_rtx_fmt_ee (GTU, VOIDmode, op0, op1), +op0, op1, labelref); + rtx_insn *cbranch = emit_jump_insn (xbranch); + JUMP_LABEL (cbranch) = xop[4]; + ++LABEL_NUSES (xop[4]); seq1 = get_insns(); last1 = get_last_insn();
[avr,committed] Fix some typos in avr-mcus.def
This fixes some minor typos in avr-mcus.def. Johan gcc/ * config/avr/avr-mcus.def (avr128d*, avr64d*): Fix their FLASH_SIZE and PM_OFFSET entries. diff --git a/gcc/config/avr/avr-mcus.def b/gcc/config/avr/avr-mcus.def index ca99116adab..d0056c960ee 100644 --- a/gcc/config/avr/avr-mcus.def +++ b/gcc/config/avr/avr-mcus.def @@ -291,7 +291,7 @@ AVR_MCU ("atmega2560", ARCH_AVR6, AVR_ISA_NONE, "__AVR_ATmega2560__", AVR_MCU ("atmega2561", ARCH_AVR6, AVR_ISA_NONE, "__AVR_ATmega2561__",0x0200, 0x0, 0x4, 0) AVR_MCU ("atmega256rfr2",ARCH_AVR6, AVR_ISA_NONE, "__AVR_ATmega256RFR2__", 0x0200, 0x0, 0x4, 0) AVR_MCU ("atmega2564rfr2", ARCH_AVR6, AVR_ISA_NONE, "__AVR_ATmega2564RFR2__",0x0200, 0x0, 0x4, 0) -/* Xmega, 16K <= Flash < 64K, RAM <= 64K */ +/* Xmega, 16K <= Flash <= 64K, RAM <= 64K */ AVR_MCU ("avrxmega2",ARCH_AVRXMEGA2, AVR_ISA_NONE, NULL, 0x2000, 0x0, 0x9000, 0) AVR_MCU ("atxmega8e5", ARCH_AVRXMEGA2, AVR_ISA_NONE, "__AVR_ATxmega8E5__", 0x2000, 0x0, 0x2800, 0) AVR_MCU ("atxmega16a4", ARCH_AVRXMEGA2, AVR_ISA_NONE, "__AVR_ATxmega16A4__", 0x2000, 0x0, 0x5000, 0) @@ -306,14 +306,14 @@ AVR_MCU ("atxmega16c4", ARCH_AVRXMEGA2, AVR_ISA_RMW, "__AVR_ATxmega16C4__" AVR_MCU ("atxmega32a4u", ARCH_AVRXMEGA2, AVR_ISA_RMW, "__AVR_ATxmega32A4U__", 0x2000, 0x0, 0x9000, 0) AVR_MCU ("atxmega32c4", ARCH_AVRXMEGA2, AVR_ISA_RMW, "__AVR_ATxmega32C4__", 0x2000, 0x0, 0x9000, 0) AVR_MCU ("atxmega32e5", ARCH_AVRXMEGA2, AVR_ISA_NONE, "__AVR_ATxmega32E5__", 0x2000, 0x0, 0x9000, 0) -AVR_MCU ("avr64da28",ARCH_AVRXMEGA2, AVR_ISA_NONE, "__AVR_AVR64DA28__",0x6000, 0x0, 0x8000, 0x1) -AVR_MCU ("avr64da32",ARCH_AVRXMEGA2, AVR_ISA_NONE, "__AVR_AVR64DA32__",0x6000, 0x0, 0x8000, 0x1) -AVR_MCU ("avr64da48",ARCH_AVRXMEGA2, AVR_ISA_NONE, "__AVR_AVR64DA48__",0x6000, 0x0, 0x8000, 0x1) -AVR_MCU ("avr64da64",ARCH_AVRXMEGA2, AVR_ISA_NONE, "__AVR_AVR64DA64__",0x6000, 0x0, 0x8000, 0x1) -AVR_MCU ("avr64db28",ARCH_AVRXMEGA2, AVR_ISA_NONE, "__AVR_AVR64DB28__",0x6000, 0x0, 0x8000, 0x1) -AVR_MCU ("avr64db32",ARCH_AVRXMEGA2, AVR_ISA_NONE, "__AVR_AVR64DB32__",0x6000, 0x0, 0x8000, 0x1) -AVR_MCU ("avr64db48",ARCH_AVRXMEGA2, AVR_ISA_NONE, "__AVR_AVR64DB48__",0x6000, 0x0, 0x8000, 0x1) -AVR_MCU ("avr64db64",ARCH_AVRXMEGA2, AVR_ISA_NONE, "__AVR_AVR64DB64__",0x6000, 0x0, 0x8000, 0x1) +AVR_MCU ("avr64da28",ARCH_AVRXMEGA2, AVR_ISA_NONE, "__AVR_AVR64DA28__",0x6000, 0x0, 0x1, 0) +AVR_MCU ("avr64da32",ARCH_AVRXMEGA2, AVR_ISA_NONE, "__AVR_AVR64DA32__",0x6000, 0x0, 0x1, 0) +AVR_MCU ("avr64da48",ARCH_AVRXMEGA2, AVR_ISA_NONE, "__AVR_AVR64DA48__",0x6000, 0x0, 0x1, 0) +AVR_MCU ("avr64da64",ARCH_AVRXMEGA2, AVR_ISA_NONE, "__AVR_AVR64DA64__",0x6000, 0x0, 0x1, 0) +AVR_MCU ("avr64db28",ARCH_AVRXMEGA2, AVR_ISA_NONE, "__AVR_AVR64DB28__",0x6000, 0x0, 0x1, 0) +AVR_MCU ("avr64db32",ARCH_AVRXMEGA2, AVR_ISA_NONE, "__AVR_AVR64DB32__",0x6000, 0x0, 0x1, 0) +AVR_MCU ("avr64db48",ARCH_AVRXMEGA2, AVR_ISA_NONE, "__AVR_AVR64DB48__",0x6000, 0x0, 0x1, 0) +AVR_MCU ("avr64db64",ARCH_AVRXMEGA2, AVR_ISA_NONE, "__AVR_AVR64DB64__",0x6000, 0x0, 0x1, 0) /* Xmega, Flash + RAM < 64K, flash visible in RAM address space */ AVR_MCU ("avrxmega3",ARCH_AVRXMEGA3, AVR_ISA_NONE, NULL, 0x3f00, 0x0, 0x8000, 0) AVR_MCU ("attiny202",ARCH_AVRXMEGA3, AVR_ISA_RCALL, "__AVR_ATtiny202__", 0x3f80, 0x0, 0x800, 0x8000) @@ -366,14 +366,14 @@ AVR_MCU ("atxmega64b1", ARCH_AVRXMEGA4, AVR_ISA_RMW, "__AVR_ATxmega64B1__" AVR_MCU ("atxmega64b3", ARCH_AVRXMEGA4, AVR_ISA_RMW, "__AVR_ATxmega64B3__", 0x2000, 0x0, 0x11000, 0) AVR_MCU ("atxmega64c3", ARCH_AVRXMEGA4, AVR_ISA_RMW, "__AVR_ATxmega64C3__", 0x2000, 0x0, 0x11000, 0) AVR_MCU ("atxmega64d4", ARCH_AVRXMEGA4, AVR_ISA_NONE, "__AVR_ATxmega64D4__", 0x2000, 0x0, 0x11000, 0) -AVR_MCU ("avr128da28", ARCH_AVRXMEGA4, AVR_ISA_NONE, "__AVR_AVR128DA28__", 0x4000, 0x0, 0x8000, 0x2) -AVR_MCU ("avr128da32", ARCH_AVRXMEGA4, AVR_ISA_NONE, "__AVR_AVR128DA32__", 0x4000, 0x0, 0x8000, 0x2) -AVR_MCU ("avr128da48", ARCH_AVRXMEGA4, AVR_ISA_NONE, "__AVR_AVR128DA48__", 0x4000, 0x0, 0x8000, 0x2) -AVR_MCU ("avr128da64", ARCH_AVRXMEGA4, AVR_ISA_NONE, "__AVR_AVR128DA64__", 0x4000, 0x0, 0x8000, 0x2) -AVR_MCU ("avr128db28", ARCH_AVRXMEGA4, AVR_ISA_NONE, "__AVR_AVR128DB28__", 0x4000, 0x0, 0x8000, 0x2) -AVR_MCU ("avr128db32", ARCH_AVRXMEGA4, AVR_ISA_NONE, "__AVR_AVR128DB32__", 0x4000, 0x0, 0x8000, 0x2) -AVR_MCU ("avr128db48", ARCH_AVRXMEGA4, AVR_ISA_NONE, "__AVR_AVR128DB48__", 0x4000, 0x0, 0x8000, 0x2) -AVR_MCU ("avr128db64", ARCH_AVRXM
[avr,committed] Add some more devices to avr-mcus.def.
This adds some more Xmega like devices to the avr backend. Johann AVR: Add some more devices: AVR16DD*, AVR32DD*, AVR64DD*, AVR64EA*, ATtiny42*, ATtiny82*, ATtiny162*, ATtiny322*, ATtiny10*. gcc/ * config/avr/avr-mcus.def (avr64dd14, avr64dd20, avr64dd28, avr64dd32) (avr64ea28, avr64ea32, avr64ea48, attiny424, attiny426, attiny427) (attiny824, attiny826, attiny827, attiny1624, attiny1626, attiny1627) (attiny3224, attiny3226, attiny3227, avr16dd14, avr16dd20, avr16dd28) (avr16dd32, avr32dd14, avr32dd20, avr32dd28, avr32dd32) (attiny102, attiny104): New devices. * doc/avr-mmcu.texi: Regenerate.AVR: Add some more devices: AVR16DD*, AVR32DD*, AVR64DD*, AVR64EA*, ATtiny42*, ATtiny82*, ATtiny162*, ATtiny322*, ATtiny10*. gcc/ * config/avr/avr-mcus.def (avr64dd14, avr64dd20, avr64dd28, avr64dd32) (avr64ea28, avr64ea32, avr64ea48, attiny424, attiny426, attiny427) (attiny824, attiny826, attiny827, attiny1624, attiny1626, attiny1627) (attiny3224, attiny3226, attiny3227, avr16dd14, avr16dd20, avr16dd28) (avr16dd32, avr32dd14, avr32dd20, avr32dd28, avr32dd32) (attiny102, attiny104): New devices. * doc/avr-mmcu.texi: Regenerate. diff --git a/gcc/config/avr/avr-mcus.def b/gcc/config/avr/avr-mcus.def index d0056c960ee..4c4269cd429 100644 --- a/gcc/config/avr/avr-mcus.def +++ b/gcc/config/avr/avr-mcus.def @@ -314,6 +314,13 @@ AVR_MCU ("avr64db28",ARCH_AVRXMEGA2, AVR_ISA_NONE, "__AVR_AVR64DB28__", AVR_MCU ("avr64db32",ARCH_AVRXMEGA2, AVR_ISA_NONE, "__AVR_AVR64DB32__",0x6000, 0x0, 0x1, 0) AVR_MCU ("avr64db48",ARCH_AVRXMEGA2, AVR_ISA_NONE, "__AVR_AVR64DB48__",0x6000, 0x0, 0x1, 0) AVR_MCU ("avr64db64",ARCH_AVRXMEGA2, AVR_ISA_NONE, "__AVR_AVR64DB64__",0x6000, 0x0, 0x1, 0) +AVR_MCU ("avr64dd14",ARCH_AVRXMEGA2, AVR_ISA_NONE, "__AVR_AVR64DD14__",0x6000, 0x0, 0x1, 0) +AVR_MCU ("avr64dd20",ARCH_AVRXMEGA2, AVR_ISA_NONE, "__AVR_AVR64DD20__",0x6000, 0x0, 0x1, 0) +AVR_MCU ("avr64dd28",ARCH_AVRXMEGA2, AVR_ISA_NONE, "__AVR_AVR64DD28__",0x6000, 0x0, 0x1, 0) +AVR_MCU ("avr64dd32",ARCH_AVRXMEGA2, AVR_ISA_NONE, "__AVR_AVR64DD32__",0x6000, 0x0, 0x1, 0) +AVR_MCU ("avr64ea28",ARCH_AVRXMEGA2, AVR_ISA_NONE, "__AVR_AVR64EA28__",0x6800, 0x0, 0x1, 0) +AVR_MCU ("avr64ea32",ARCH_AVRXMEGA2, AVR_ISA_NONE, "__AVR_AVR64EA32__",0x6800, 0x0, 0x1, 0) +AVR_MCU ("avr64ea48",ARCH_AVRXMEGA2, AVR_ISA_NONE, "__AVR_AVR64EA48__",0x6800, 0x0, 0x1, 0) /* Xmega, Flash + RAM < 64K, flash visible in RAM address space */ AVR_MCU ("avrxmega3",ARCH_AVRXMEGA3, AVR_ISA_NONE, NULL, 0x3f00, 0x0, 0x8000, 0) AVR_MCU ("attiny202",ARCH_AVRXMEGA3, AVR_ISA_RCALL, "__AVR_ATtiny202__", 0x3f80, 0x0, 0x800, 0x8000) @@ -342,6 +349,18 @@ AVR_MCU ("attiny1617", ARCH_AVRXMEGA3, AVR_ISA_NONE, "__AVR_ATtiny1617__" AVR_MCU ("attiny3214", ARCH_AVRXMEGA3, AVR_ISA_NONE, "__AVR_ATtiny3214__", 0x3800, 0x0, 0x8000, 0x8000) AVR_MCU ("attiny3216", ARCH_AVRXMEGA3, AVR_ISA_NONE, "__AVR_ATtiny3216__", 0x3800, 0x0, 0x8000, 0x8000) AVR_MCU ("attiny3217", ARCH_AVRXMEGA3, AVR_ISA_NONE, "__AVR_ATtiny3217__", 0x3800, 0x0, 0x8000, 0x8000) +AVR_MCU ("attiny424",ARCH_AVRXMEGA3, AVR_ISA_RCALL, "__AVR_ATtiny424__", 0x3e00, 0x0, 0x1000, 0x8000) +AVR_MCU ("attiny426",ARCH_AVRXMEGA3, AVR_ISA_RCALL, "__AVR_ATtiny426__", 0x3e00, 0x0, 0x1000, 0x8000) +AVR_MCU ("attiny427",ARCH_AVRXMEGA3, AVR_ISA_RCALL, "__AVR_ATtiny427__", 0x3e00, 0x0, 0x1000, 0x8000) +AVR_MCU ("attiny824",ARCH_AVRXMEGA3, AVR_ISA_RCALL, "__AVR_ATtiny824__", 0x3c00, 0x0, 0x2000, 0x8000) +AVR_MCU ("attiny826",ARCH_AVRXMEGA3, AVR_ISA_RCALL, "__AVR_ATtiny826__", 0x3c00, 0x0, 0x2000, 0x8000) +AVR_MCU ("attiny827",ARCH_AVRXMEGA3, AVR_ISA_RCALL, "__AVR_ATtiny827__", 0x3c00, 0x0, 0x2000, 0x8000) +AVR_MCU ("attiny1624", ARCH_AVRXMEGA3, AVR_ISA_NONE, "__AVR_ATtiny1624__", 0x3800, 0x0, 0x4000, 0x8000) +AVR_MCU ("attiny1626", ARCH_AVRXMEGA3, AVR_ISA_NONE, "__AVR_ATtiny1626__", 0x3800, 0x0, 0x4000, 0x8000) +AVR_MCU ("attiny1627", ARCH_AVRXMEGA3, AVR_ISA_NONE, "__AVR_ATtiny1627__", 0x3800, 0x0, 0x4000, 0x8000) +AVR_MCU ("attiny3224", ARCH_AVRXMEGA3, AVR_ISA_NONE, "__AVR_ATtiny3224__", 0x3400, 0x0, 0x8000, 0x8000) +AVR_MCU ("attiny3226", ARCH_AVRXMEGA3, AVR_ISA_NONE, "__AVR_ATtiny3226__", 0x3400, 0x0, 0x8000, 0x8000) +AVR_MCU ("attiny3227", ARCH_AVRXMEGA3, AVR_ISA_NONE, "__AVR_ATtiny3227__", 0x3400, 0x0, 0x8000, 0x8000) AVR_MCU ("atmega808",ARCH_AVRXMEGA3, AVR_ISA_RCALL, "__AVR_ATmega808__", 0x3c00, 0x0, 0x2000, 0x4000) AVR_MCU ("atmega809",ARCH_AVRXMEGA3, AVR_ISA_RCALL, "__AVR_ATmega809__", 0x3c00, 0x0, 0x2000, 0x4000) AVR_MCU ("atmega1608", ARCH_AVRXMEGA3, AVR_ISA_NONE, "__AVR_ATmega1608__",
Re: [AVR PATCH] Improvements to SImode and PSImode shifts by constants.
Am 02.11.23 um 12:54 schrieb Roger Sayle: This patch provides non-looping implementations for more SImode (32-bit) and PSImode (24-bit) shifts on AVR. For most cases, these are shorter and faster than using a loop, but for a few (controlled by optimize_size) Maybe this should also adjust the insn costs, like in avr_rtx_costs_1? Depending on what you are outputting, avr_asm_len() might be more convenient. What I am not sure about are the text cases that expect exact sequences which might be annoying in the future? Johann they are a little larger but significantly faster, The approach is to perform byte-based shifts by 1, 2 or 3 bytes, followed by bit-based shifts (effectively in a narrower type) for the remaining bits, beyond 8, 16 or 24. For example, the simple test case below (inspired by PR 112268): unsigned long foo(unsigned long x) { return x >> 26; } gcc -O2 currently generates: foo:ldi r18,26 1: lsr r25 ror r24 ror r23 ror r22 dec r18 brne 1b ret which is 8 instructions, and takes ~158 cycles. With this patch, we now generate: foo:mov r22,r25 clr r23 clr r24 clr r25 lsr r22 lsr r22 ret which is 7 instructions, and takes ~7 cycles. One complication is that the modified functions sometimes use spaces instead of TABs, with occasional mistakes in GNU-style formatting, so I've fixed these indentation/whitespace issues. There's no change in the code for the cases previously handled/special-cased, with the exception of ashrqi3 reg,5 where with -Os a (4-instruction) loop is shorter than the five single-bit shifts of a fully unrolled implementation. This patch has been (partially) tested with a cross-compiler to avr-elf hosted on x86_64, without a simulator, where the compile-only tests in the gcc testsuite show no regressions. If someone could test this more thoroughly that would be great. 2023-11-02 Roger Sayle gcc/ChangeLog * config/avr/avr.cc (ashlqi3_out): Fix indentation whitespace. (ashlhi3_out): Likewise. (avr_out_ashlpsi3): Likewise. Handle shifts by 9 and 17-22. (ashlsi3_out): Fix formatting. Handle shifts by 9 and 25-30. (ashrqi3_our): Use loop for shifts by 5 when optimizing for size. Fix indentation whitespace. (ashrhi3_out): Likewise. (avr_out_ashrpsi3): Likewise. Handle shifts by 17. (ashrsi3_out): Fix indentation. Handle shifts by 17 and 25. (lshrqi3_out): Fix whitespace. (lshrhi3_out): Likewise. (avr_out_lshrpsi3): Likewise. Handle shifts by 9 and 17-22. (lshrsi3_out): Fix indentation. Handle shifts by 9,17,18 and 25-30. gcc/testsuite/ChangeLog * gcc.target/avr/ashlsi-1.c: New test case. * gcc.target/avr/ashlsi-2.c: Likewise. * gcc.target/avr/ashrsi-1.c: Likewise. * gcc.target/avr/ashrsi-2.c: Likewise. * gcc.target/avr/lshrsi-1.c: Likewise. * gcc.target/avr/lshrsi-2.c: Likewise. Thanks in advance, Roger --
[patch,avr] Fix PR109650 wrong code
This patch fixes a wrong-code bug in the wake of PR92729, the transition that turned the AVR backend from cc0 to CCmode. In cc0, the insn that uses cc0 like a conditional branch always follows the cc0 setter, which is no more the case with CCmode where set and use of REG_CC might be in different basic blocks. This patch removes the machine-dependent reorg pass in avr_reorg entirely. It is replaced by a new, AVR specific mini-pass that runs prior to split2. Canonicalization of comparisons away from the "difficult" codes GT[U] and LE[U] is now mostly performed by implementing TARGET_CANONICALIZE_COMPARISON. Moreover: * Text peephole conditions get "dead_or_set_regno_p (*, REG_CC)" as needed. * RTL peephole conditions get "peep2_regno_dead_p (*, REG_CC)" as needed. * Conditional branches no more clobber REG_CC. * insn output for compares looks ahead to determine the branch mode in use. This needs also "dead_or_set_regno_p (*, REG_CC)". * Add RTL peepholes for decrement-and-branch detection. Finally, it fixes some of the many indentation glitches left over from PR92729. Ok? I'd also backport this one because all of v12+ is affected by the wrong code. Johann -- gcc/ PR/target 109650 PR/target 97279 * config/avr/avr-passes.def (avr_pass_ifelse): Insert new pass. * config/avr/avr.cc (avr_pass_ifelse): New RTL pass. (avr_pass_data_ifelse): New pass_data for it. (make_avr_pass_ifelse, avr_redundant_compare, avr_cbranch_cost) (avr_canonicalize_comparison, avr_out_plus_set_ZN): New functions. (compare_condtition): Make sure REG_CC dies in the branch insn. (avr_rtx_costs_1): Add computation of cbranch costs. (avr_adjust_insn_length) [ADJUST_LEN_ADD_SET_ZN]: Handle case. (TARGET_CANONICALIZE_COMPARISON): New define. (avr_simplify_comparison_p, compare_diff_p, avr_compare_pattern) (avr_reorg_remove_redundant_compare, avr_reorg): Remove functions. (TARGET_MACHINE_DEPENDENT_REORG): Remove define. * avr-protos.h (avr_simplify_comparison_p): Remove proto. (make_avr_pass_ifelse, avr_out_plus_set_ZN, cc_reg_rtx): New Protos * config/avr/avr.md (branch, difficult_branch): Don't split insns. (*swapped_tst, *add.for.eqne.): New insns. (*cbranch4): Rename to cbranch4_insn. (cbranch4): Try to canonicalize comparisons at expand. (define_peephole): Add dead_or_set_regno_p(insn,REG_CC) as needed. (define_deephole2): Add peep2_regno_dead_p(*,REG_CC) as needed. Add new RTL peepholes for decrement-and-branch and *swapped_tst. (adjust_len) [add_set_ZN]: New. (rvbranch, *rvbranch, difficult_rvbranch, *difficult_rvbranch) (branch_unspec, *negated_tst, *reversed_tst): Remove insns. (define_c_enum "unspec") [UNSPEC_IDENTITY]: Remove. * config/avr/avr-dimode.md (cbranch4): Canonicalize comparisons. * config/avr/predicates.md (scratch_or_d_register_operand): New. * config/avr/contraints.md (Yxx): New constraint. gcc/testsuite/ PR/target 109650 * config/avr/torture/pr109650-1.c: New test.diff --git a/gcc/config/avr/avr-dimode.md b/gcc/config/avr/avr-dimode.md index c0bb04ff9e0..91f0d395761 100644 --- a/gcc/config/avr/avr-dimode.md +++ b/gcc/config/avr/avr-dimode.md @@ -455,12 +455,18 @@ (define_expand "conditional_jump" (define_expand "cbranch4" [(set (pc) (if_then_else (match_operator 0 "ordered_comparison_operator" -[(match_operand:ALL8 1 "register_operand" "") - (match_operand:ALL8 2 "nonmemory_operand" "")]) - (label_ref (match_operand 3 "" "")) - (pc)))] +[(match_operand:ALL8 1 "register_operand") + (match_operand:ALL8 2 "nonmemory_operand")]) + (label_ref (match_operand 3)) + (pc)))] "avr_have_dimode" { +int icode = (int) GET_CODE (operands[0]); + +targetm.canonicalize_comparison (&icode, &operands[1], &operands[2], false); +operands[0] = gen_rtx_fmt_ee ((enum rtx_code) icode, + VOIDmode, operands[1], operands[2]); + rtx acc_a = gen_rtx_REG (mode, ACC_A); avr_fix_inputs (operands, 1 << 2, regmask (mode, ACC_A)); @@ -490,8 +496,8 @@ (define_insn_and_split "cbranch_2_split" (if_then_else (match_operator 0 "ordered_comparison_operator" [(reg:ALL8 ACC_A) (reg:ALL8 ACC_B)]) - (label_ref (match_operand 1 "" "")) - (pc)))] + (label_ref (match_operand 1)) + (pc)))] "avr_have_dimode" "#" "&& reload_completed" @@ -544,8 +550,8 @@ (define_insn_and_split "cbranch_const_2_split" (if_then_else (match_operator 0 "ordered_comparison_operator" [(reg:ALL8 ACC_A) (match_op
[patch,avr] PR105753: Fix ICE in add_clobbers.
This patch removes the superfluous parallel in [u]divmod patterns in the AVR backend. Effect of extra parallel is that add_clobbers reaches gcc_unreachable() because the clobbers for [u]divmod are missing. The parallel around the parts of an insn pattern is implicit if it has multiple parts like clobbers, so extra parallel should be removed. Ok to apply? Johann -- gcc/ PR target/105753 * config/avr/avr.md (divmodpsi, udivmodpsi, divmodsi, udivmodsi): Remove superfluous "parallel" in insn pattern. ([u]divmod4): Tidy code. Use gcc_unreachable() instead of printing error text to assembly. gcc/testsuite/ PR target/105753 * gcc.target/avr/torture/pr105753.c: New test.diff --git a/gcc/config/avr/avr.md b/gcc/config/avr/avr.md index 43b75046384..a79c6824fad 100644 --- a/gcc/config/avr/avr.md +++ b/gcc/config/avr/avr.md @@ -3705,17 +3705,17 @@ (define_insn "*mulohisi3_call" ;;CSE has problems to operate on hard regs. ;; (define_insn_and_split "divmodqi4" - [(set (match_operand:QI 0 "pseudo_register_operand" "") -(div:QI (match_operand:QI 1 "pseudo_register_operand" "") -(match_operand:QI 2 "pseudo_register_operand" ""))) - (set (match_operand:QI 3 "pseudo_register_operand" "") + [(set (match_operand:QI 0 "pseudo_register_operand") +(div:QI (match_operand:QI 1 "pseudo_register_operand") +(match_operand:QI 2 "pseudo_register_operand"))) + (set (match_operand:QI 3 "pseudo_register_operand") (mod:QI (match_dup 1) (match_dup 2))) (clobber (reg:QI 22)) (clobber (reg:QI 23)) (clobber (reg:QI 24)) (clobber (reg:QI 25))] "" - "this divmodqi4 pattern should have been splitted;" + { gcc_unreachable(); } "" [(set (reg:QI 24) (match_dup 1)) (set (reg:QI 22) (match_dup 2)) @@ -3751,17 +3751,17 @@ (define_insn "*divmodqi4_call" [(set_attr "type" "xcall")]) (define_insn_and_split "udivmodqi4" - [(set (match_operand:QI 0 "pseudo_register_operand" "") - (udiv:QI (match_operand:QI 1 "pseudo_register_operand" "") -(match_operand:QI 2 "pseudo_register_operand" ""))) - (set (match_operand:QI 3 "pseudo_register_operand" "") -(umod:QI (match_dup 1) (match_dup 2))) - (clobber (reg:QI 22)) - (clobber (reg:QI 23)) - (clobber (reg:QI 24)) - (clobber (reg:QI 25))] - "" - "this udivmodqi4 pattern should have been splitted;" + [(set (match_operand:QI 0 "pseudo_register_operand") + (udiv:QI (match_operand:QI 1 "pseudo_register_operand") +(match_operand:QI 2 "pseudo_register_operand"))) + (set (match_operand:QI 3 "pseudo_register_operand") + (umod:QI (match_dup 1) (match_dup 2))) + (clobber (reg:QI 22)) + (clobber (reg:QI 23)) + (clobber (reg:QI 24)) + (clobber (reg:QI 25))] + "" + { gcc_unreachable(); } "" [(set (reg:QI 24) (match_dup 1)) (set (reg:QI 22) (match_dup 2)) @@ -3793,17 +3793,17 @@ (define_insn "*udivmodqi4_call" [(set_attr "type" "xcall")]) (define_insn_and_split "divmodhi4" - [(set (match_operand:HI 0 "pseudo_register_operand" "") -(div:HI (match_operand:HI 1 "pseudo_register_operand" "") -(match_operand:HI 2 "pseudo_register_operand" ""))) - (set (match_operand:HI 3 "pseudo_register_operand" "") + [(set (match_operand:HI 0 "pseudo_register_operand") +(div:HI (match_operand:HI 1 "pseudo_register_operand") +(match_operand:HI 2 "pseudo_register_operand"))) + (set (match_operand:HI 3 "pseudo_register_operand") (mod:HI (match_dup 1) (match_dup 2))) (clobber (reg:QI 21)) (clobber (reg:HI 22)) (clobber (reg:HI 24)) (clobber (reg:HI 26))] "" - "this should have been splitted;" + { gcc_unreachable(); } "" [(set (reg:HI 24) (match_dup 1)) (set (reg:HI 22) (match_dup 2)) @@ -3839,17 +3839,17 @@ (define_insn "*divmodhi4_call" [(set_attr "type" "xcall")]) (define_insn_and_split "udivmodhi4" - [(set (match_operand:HI 0 "pseudo_register_operand" "") -(udiv:HI (match_operand:HI 1 "pseudo_register_operand" "") - (match_operand:HI 2 "pseudo_register_operand" ""))) - (set (match_operand:HI 3 "pseudo_register_operand" "") + [(set (match_operand:HI 0 "pseudo_register_operand") +(udiv:HI (match_operand:HI 1 "pseudo_register_operand") + (match_operand:HI 2 "pseudo_register_operand"))) + (set (match_operand:HI 3 "pseudo_register_operand") (umod:HI (match_dup 1) (match_dup 2))) (clobber (reg:QI 21)) (clobber (reg:HI 22)) (clobber (reg:HI 24)) (clobber (reg:HI 26))] "" - "this udivmodhi4 pattern should have been splitted.;" + { gcc_unreachable(); } "" [(set (reg:HI 24) (match_dup 1)) (set (reg:HI 22) (match_dup 2)) @@ -4090,14 +4090,14 @@ (define_insn "*mulpsi3.libgcc" ;; implementation works the other way round. (define_insn_and_split "divmodpsi4" - [(parallel [(set (match_ope
[avr,committed] Fix a trivial typo in gen-avr-mmcu-specs.cc.
Applied as obvious, there was a trailing */ in a 1-line // comment. https://gcc.gnu.org/git/?p=gcc.git;a=commitdiff;h=a726d007f197d13ec80b9d625bf8bab97c96384c Johann gcc/ChangeLog * config/avr/gen-avr-mmcu-specs.cc: Remove stale */ after // comment. -- diff --git a/gcc/config/avr/gen-avr-mmcu-specs.cc b/gcc/config/avr/gen-avr-mmcu-specs.cc index 9344246cb7203a665db575a2bf7c0e8a29521963..b9a5ad44e4e5c350fbcc45d468684ff6d873574e 100644 (file) --- a/gcc/config/avr/gen-avr-mmcu-specs.cc +++ b/gcc/config/avr/gen-avr-mmcu-specs.cc @@ -30,7 +30,7 @@ #include "avr-devices.cc" // Get rid of "defaults.h". We just need tm.h for `WITH_AVRLIBC' and -// and `WITH_RTEMS'. */ +// and `WITH_RTEMS'. #define GCC_DEFAULTS_H #include "tm.h"
Re: [patch,avr] Fix PR109650 wrong code
Here is a revised version of the patch. The difference to the previous one is that it adds some combine patterns for *cbranch insns that were lost in the PR92729 transition. The post-reload part of the patterns were still there. The new patterns are slightly more general in that they also handle fixed-point modes. Apart from that, the patch behaves the same: Am 15.05.23 um 20:05 schrieb Georg-Johann Lay: This patch fixes a wrong-code bug in the wake of PR92729, the transition that turned the AVR backend from cc0 to CCmode. In cc0, the insn that uses cc0 like a conditional branch always follows the cc0 setter, which is no more the case with CCmode where set and use of REG_CC might be in different basic blocks. This patch removes the machine-dependent reorg pass in avr_reorg entirely. It is replaced by a new, AVR specific mini-pass that runs prior to split2. Canonicalization of comparisons away from the "difficult" codes GT[U] and LE[U] is now mostly performed by implementing TARGET_CANONICALIZE_COMPARISON. Moreover: * Text peephole conditions get "dead_or_set_regno_p (*, REG_CC)" as needed. * RTL peephole conditions get "peep2_regno_dead_p (*, REG_CC)" as needed. * Conditional branches no more clobber REG_CC. * insn output for compares looks ahead to determine the branch mode in use. This needs also "dead_or_set_regno_p (*, REG_CC)". * Add RTL peepholes for decrement-and-branch detection. Finally, it fixes some of the many indentation glitches left over from PR92729. Ok? I'd also backport this one because all of v12+ is affected by the wrong code. Johann -- gcc/ PR target/109650 PR target/97279 * config/avr/avr-passes.def (avr_pass_ifelse): Insert new pass. * config/avr/avr.cc (avr_pass_ifelse): New RTL pass. (avr_pass_data_ifelse): New pass_data for it. (make_avr_pass_ifelse, avr_redundant_compare, avr_cbranch_cost) (avr_canonicalize_comparison, avr_out_plus_set_ZN) (avr_out_cmp_ext): New functions. (compare_condtition): Make sure REG_CC dies in the branch insn. (avr_rtx_costs_1): Add computation of cbranch costs. (avr_adjust_insn_length) [ADJUST_LEN_ADD_SET_ZN, ADJUST_LEN_CMP_ZEXT]: [ADJUST_LEN_CMP_SEXT]Handle them. (TARGET_CANONICALIZE_COMPARISON): New define. (avr_simplify_comparison_p, compare_diff_p, avr_compare_pattern) (avr_reorg_remove_redundant_compare, avr_reorg): Remove functions. (TARGET_MACHINE_DEPENDENT_REORG): Remove define. * avr-protos.h (avr_simplify_comparison_p): Remove proto. (make_avr_pass_ifelse, avr_out_plus_set_ZN, cc_reg_rtx) (avr_out_cmp_zext): New Protos * config/avr/avr.md (branch, difficult_branch): Don't split insns. (*cbranchhi.zero-extend.0", *cbranchhi.zero-extend.1") (*swapped_tst, *add.for.eqne.): New insns. (*cbranch4): Rename to cbranch4_insn. (define_peephole): Add dead_or_set_regno_p(insn,REG_CC) as needed. (define_deephole2): Add peep2_regno_dead_p(*,REG_CC) as needed. Add new RTL peepholes for decrement-and-branch and *swapped_tst. Rework signtest-and-branch peepholes for *sbrx_branch. (adjust_len) [add_set_ZN, cmp_zext]: New. (QIPSI): New mode iterator. (ALLs1, ALLs2, ALLs4, ALLs234): New mode iterators. (gelt): New code iterator. (gelt_eqne): New code attribute. (rvbranch, *rvbranch, difficult_rvbranch, *difficult_rvbranch) (branch_unspec, *negated_tst, *reversed_tst) (*cmpqi_sign_extend): Remove insns. (define_c_enum "unspec") [UNSPEC_IDENTITY]: Remove. * config/avr/avr-dimode.md (cbranch4): Canonicalize comparisons. * config/avr/predicates.md (scratch_or_d_register_operand): New. * config/avr/contraints.md (Yxx): New constraint. gcc/testsuite/ PR target/109650 * config/avr/torture/pr109650-1.c: New test. * config/avr/torture/pr109650-2.c: New test.
Re: [patch,avr] Fix PR109650 wrong code
...Ok, and now with the patch attached... Here is a revised version of the patch. The difference to the previous one is that it adds some combine patterns for *cbranch insns that were lost in the PR92729 transition. The post-reload part of the patterns were still there. The new patterns are slightly more general in that they also handle fixed-point modes. Apart from that, the patch behaves the same: Am 15.05.23 um 20:05 schrieb Georg-Johann Lay: This patch fixes a wrong-code bug in the wake of PR92729, the transition that turned the AVR backend from cc0 to CCmode. In cc0, the insn that uses cc0 like a conditional branch always follows the cc0 setter, which is no more the case with CCmode where set and use of REG_CC might be in different basic blocks. This patch removes the machine-dependent reorg pass in avr_reorg entirely. It is replaced by a new, AVR specific mini-pass that runs prior to split2. Canonicalization of comparisons away from the "difficult" codes GT[U] and LE[U] is now mostly performed by implementing TARGET_CANONICALIZE_COMPARISON. Moreover: * Text peephole conditions get "dead_or_set_regno_p (*, REG_CC)" as needed. * RTL peephole conditions get "peep2_regno_dead_p (*, REG_CC)" as needed. * Conditional branches no more clobber REG_CC. * insn output for compares looks ahead to determine the branch mode in use. This needs also "dead_or_set_regno_p (*, REG_CC)". * Add RTL peepholes for decrement-and-branch detection. Finally, it fixes some of the many indentation glitches left over from PR92729. Ok? I'd also backport this one because all of v12+ is affected by the wrong code. Johann -- gcc/ PR target/109650 PR target/92729 * config/avr/avr-passes.def (avr_pass_ifelse): Insert new pass. * config/avr/avr.cc (avr_pass_ifelse): New RTL pass. (avr_pass_data_ifelse): New pass_data for it. (make_avr_pass_ifelse, avr_redundant_compare, avr_cbranch_cost) (avr_canonicalize_comparison, avr_out_plus_set_ZN) (avr_out_cmp_ext): New functions. (compare_condtition): Make sure REG_CC dies in the branch insn. (avr_rtx_costs_1): Add computation of cbranch costs. (avr_adjust_insn_length) [ADJUST_LEN_ADD_SET_ZN, ADJUST_LEN_CMP_ZEXT]: [ADJUST_LEN_CMP_SEXT]Handle them. (TARGET_CANONICALIZE_COMPARISON): New define. (avr_simplify_comparison_p, compare_diff_p, avr_compare_pattern) (avr_reorg_remove_redundant_compare, avr_reorg): Remove functions. (TARGET_MACHINE_DEPENDENT_REORG): Remove define. * avr-protos.h (avr_simplify_comparison_p): Remove proto. (make_avr_pass_ifelse, avr_out_plus_set_ZN, cc_reg_rtx) (avr_out_cmp_zext): New Protos * config/avr/avr.md (branch, difficult_branch): Don't split insns. (*cbranchhi.zero-extend.0", *cbranchhi.zero-extend.1") (*swapped_tst, *add.for.eqne.): New insns. (*cbranch4): Rename to cbranch4_insn. (define_peephole): Add dead_or_set_regno_p(insn,REG_CC) as needed. (define_deephole2): Add peep2_regno_dead_p(*,REG_CC) as needed. Add new RTL peepholes for decrement-and-branch and *swapped_tst. Rework signtest-and-branch peepholes for *sbrx_branch. (adjust_len) [add_set_ZN, cmp_zext]: New. (QIPSI): New mode iterator. (ALLs1, ALLs2, ALLs4, ALLs234): New mode iterators. (gelt): New code iterator. (gelt_eqne): New code attribute. (rvbranch, *rvbranch, difficult_rvbranch, *difficult_rvbranch) (branch_unspec, *negated_tst, *reversed_tst) (*cmpqi_sign_extend): Remove insns. (define_c_enum "unspec") [UNSPEC_IDENTITY]: Remove. * config/avr/avr-dimode.md (cbranch4): Canonicalize comparisons. * config/avr/predicates.md (scratch_or_d_register_operand): New. * config/avr/contraints.md (Yxx): New constraint. gcc/testsuite/ PR target/109650 * config/avr/torture/pr109650-1.c: New test. * config/avr/torture/pr109650-2.c: New test.diff --git a/gcc/config/avr/avr-dimode.md b/gcc/config/avr/avr-dimode.md index c0bb04ff9e0..91f0d395761 100644 --- a/gcc/config/avr/avr-dimode.md +++ b/gcc/config/avr/avr-dimode.md @@ -455,12 +455,18 @@ (define_expand "conditional_jump" (define_expand "cbranch4" [(set (pc) (if_then_else (match_operator 0 "ordered_comparison_operator" -[(match_operand:ALL8 1 "register_operand" "") - (match_operand:ALL8 2 "nonmemory_operand" "")]) - (label_ref (match_operand 3 "" "")) - (pc)))] +[(match_operand:ALL8 1 "register_operand") + (match_operand:ALL8 2 "nonmemory_operand")]) + (label_ref (match_operand 3)
[avr,committed] Fix PR90622
This patch fixes a minor optimization issue for an avr specific builtin. Applied as obvious. https://gcc.gnu.org/r14-1025 Johann -- target/90622: __builtin_avr_insert bits: Use BLD/BST for one bit in place. If just one bit is inserted in the same position like with: __builtin_avr_insert_bits (0xF2FF, src, dst); a BLD/BST sequence is better than XOR/AND/XOR. Thus, don't fold that case to the latter sequence. gcc/ PR target/90622 * config/avr/avr.cc (avr_fold_builtin) [AVR_BUILTIN_INSERT_BITS]: Don't fold to XOR / AND / XOR if just one bit is copied to the same position. diff --git a/gcc/config/avr/avr.cc b/gcc/config/avr/avr.cc index d5af40f7091..9fa50ca230d 100644 --- a/gcc/config/avr/avr.cc +++ b/gcc/config/avr/avr.cc @@ -14425,10 +14425,13 @@ avr_fold_builtin (tree fndecl, int n_args ATTRIBUTE_UNUSED, tree *arg, if (changed) return build_call_expr (fndecl, 3, tmap, tbits, tval); -/* If bits don't change their position we can use vanilla logic - to merge the two arguments. */ +/* If bits don't change their position, we can use vanilla logic + to merge the two arguments... */ - if (avr_map_metric (map, MAP_NONFIXED_0_7) == 0) +if (avr_map_metric (map, MAP_NONFIXED_0_7) == 0 +// ...except when we are copying just one bit. In that +// case, BLD/BST is better than XOR/AND/XOR, see PR90622. +&& avr_map_metric (map, MAP_FIXED_0_7) != 1) { int mask_f = avr_map_metric (map, MAP_MASK_PREIMAGE_F); tree tres, tmask = build_int_cst (val_type, mask_f ^ 0xff);
[avr,testsuite,committed] Skip test that fail for avr for this or that reason.
This annotates some tests that won't work for AVR like: * asm goto with output reload (AVR is not lra). * Using a program address as a ram address. * Float related stuff: AVR double is 32-bit, and long double is incomplete (some functions missing, no signed zeros, etc.) Applied as obvious. Johann -- Skip some tests that won't work for target AVR. gcc/testsuite/ * lib/target-supports.exp (check_effective_target_lra) [avr]: Return 0. * gcc.dg/pr19402-2.c: Skip for avr. * gcc.dg/pr86124.c: Same. * gcc.dg/pr94291.c: Same. * gcc.dg/torture/builtin-complex-1.c: Same. * gcc.dg/torture/fp-int-convert-float32x-timode.c: Same. * gcc.dg/torture/fp-int-convert-float32x.c: Same. * gcc.dg/torture/fp-int-convert-float64-timode.c: Same. * gcc.dg/torture/fp-int-convert-float64.c: Same. * gcc.dg/torture/fp-int-convert-long-double.c: Same. * gcc.dg/torture/fp-int-convert-timode.c: Same. * c-c++-common/torture/builtin-convertvector-1.c: Same. * c-c++-common/torture/complex-sign-add.c: Same. * c-c++-common/torture/complex-sign-mixed-add.c: Same. * c-c++-common/torture/complex-sign-mixed-div.c: Same. * c-c++-common/torture/complex-sign-mixed-mul.c: Same. * c-c++-common/torture/complex-sign-mixed-sub.c: Same. * c-c++-common/torture/complex-sign-mul-minus-one.c: Same. * c-c++-common/torture/complex-sign-mul-one.c: Same. * c-c++-common/torture/complex-sign-mul.c: Same. * c-c++-common/torture/complex-sign-sub.c: Same. diff --git a/gcc/testsuite/c-c++-common/torture/builtin-convertvector-1.c b/gcc/testsuite/c-c++-common/torture/builtin-convertvector-1.c index 347dda7692d..fababf1a9eb 100644 --- a/gcc/testsuite/c-c++-common/torture/builtin-convertvector-1.c +++ b/gcc/testsuite/c-c++-common/torture/builtin-convertvector-1.c @@ -1,3 +1,5 @@ +/* { dg-skip-if "double support is incomplete" { "avr-*-*" } } */ + extern #ifdef __cplusplus "C" diff --git a/gcc/testsuite/c-c++-common/torture/complex-sign-add.c b/gcc/testsuite/c-c++-common/torture/complex-sign-add.c index e81223224dc..c1e7886a0df 100644 --- a/gcc/testsuite/c-c++-common/torture/complex-sign-add.c +++ b/gcc/testsuite/c-c++-common/torture/complex-sign-add.c @@ -2,6 +2,7 @@ addition. */ /* { dg-do run } */ /* { dg-options "-std=gnu99" { target c } } */ +/* { dg-skip-if "double support is incomplete" { "avr-*-*" } } */ #include "complex-sign.h" diff --git a/gcc/testsuite/c-c++-common/torture/complex-sign-mixed-add.c b/gcc/testsuite/c-c++-common/torture/complex-sign-mixed-add.c index a209161e157..36d305baf53 100644 --- a/gcc/testsuite/c-c++-common/torture/complex-sign-mixed-add.c +++ b/gcc/testsuite/c-c++-common/torture/complex-sign-mixed-add.c @@ -3,6 +3,7 @@ /* { dg-do run } */ /* { dg-options "-std=gnu99" { target c } } */ /* { dg-skip-if "ptx can elide zero additions" { "nvptx-*-*" } { "-O0" } { "" } } */ +/* { dg-skip-if "double support is incomplete" { "avr-*-*" } } */ #include "complex-sign.h" diff --git a/gcc/testsuite/c-c++-common/torture/complex-sign-mixed-div.c b/gcc/testsuite/c-c++-common/torture/complex-sign-mixed-div.c index f7ee48341c0..a37074bb3b9 100644 --- a/gcc/testsuite/c-c++-common/torture/complex-sign-mixed-div.c +++ b/gcc/testsuite/c-c++-common/torture/complex-sign-mixed-div.c @@ -2,6 +2,7 @@ division. */ /* { dg-do run } */ /* { dg-options "-std=gnu99" { target c } } */ +/* { dg-skip-if "double support is incomplete" { "avr-*-*" } } */ #include "complex-sign.h" diff --git a/gcc/testsuite/c-c++-common/torture/complex-sign-mixed-mul.c b/gcc/testsuite/c-c++-common/torture/complex-sign-mixed-mul.c index 02f936b75bd..1e528b986c5 100644 --- a/gcc/testsuite/c-c++-common/torture/complex-sign-mixed-mul.c +++ b/gcc/testsuite/c-c++-common/torture/complex-sign-mixed-mul.c @@ -2,6 +2,7 @@ multiplication. */ /* { dg-do run } */ /* { dg-options "-std=gnu99" { target c } } */ +/* { dg-skip-if "double support is incomplete" { "avr-*-*" } } */ #include "complex-sign.h" diff --git a/gcc/testsuite/c-c++-common/torture/complex-sign-mixed-sub.c b/gcc/testsuite/c-c++-common/torture/complex-sign-mixed-sub.c index 02ab4db247c..63c75dfdff2 100644 --- a/gcc/testsuite/c-c++-common/torture/complex-sign-mixed-sub.c +++ b/gcc/testsuite/c-c++-common/torture/complex-sign-mixed-sub.c @@ -3,6 +3,7 @@ /* { dg-do run } */ /* { dg-options "-std=gnu99" { target c } } */ /* { dg-skip-if "ptx can elide zero additions" { "nvptx-*-*" } { "-O0" } { "" } } */ +/* { dg-skip-if "double support is incomplete" { "avr-*-*" } } */ #include "complex-sign.h" diff --git a/gcc/testsuite/c-c++-common/torture/complex-sign-mul-minus-one.c b/gcc/testsuite/c-c++-common/torture/complex-sign-mul-minus-one.c index 05cc4fabea4..f8abdd00e2e 100644 --- a/gcc/testsuite/c-c++-common/torture/co
[testsuite,committed] PR testsuite/52641
Applied more annotations to reduce testsuite fallout for 16-bit int / pointer targets. https://gcc.gnu.org/r14-1074 Most of the affected tests use constants not suitable for 16-bit int, bit-fields wider than 16 bits, etc. Johann -- commit 9f5065094c9632a50bea604d5896a139609e50cf Author: Georg-Johann Lay Date: Mon May 22 16:47:56 2023 +0200 testsuite/52641: Fix tests that fail for 16-bit int / pointer targets. gcc/testsuite/ PR testsuite/52641 * c-c++-common/pr19807-2.c: Use __SIZEOF_INT__ instead of 4. * gcc.c-torture/compile/pr103813.c: Require size32plus. * gcc.c-torture/execute/pr108498-2.c: Same. * gcc.c-torture/compile/pr96426.c: Condition on __SIZEOF_LONG_LONG__ == __SIZEOF_DOUBLE__. * gcc.c-torture/execute/pr103417.c: Require int32plus. * gcc.dg/pr104198.c: Same. * gcc.dg/pr21137.c: Same. * gcc.dg/pr88905.c: Same. * gcc.dg/pr90838.c: Same. * gcc.dg/pr97317.c: Same. * gcc.dg/pr100292.c: Require int32. * gcc.dg/pr101008.c: Same. * gcc.dg/pr96542.c: Same. * gcc.dg/pr96674.c: Same. * gcc.dg/pr97750.c: Require ptr_eq_long. diff --git a/gcc/testsuite/c-c++-common/pr19807-2.c b/gcc/testsuite/c-c++-common/pr19807-2.c index 529b9c97322..29a370304d3 100644 --- a/gcc/testsuite/c-c++-common/pr19807-2.c +++ b/gcc/testsuite/c-c++-common/pr19807-2.c @@ -6,7 +6,7 @@ int i; int main() { int a[4]; - if ((char*)&a[1] + 4*i + 4 != (char*)&a[i+2]) + if ((char*)&a[1] + __SIZEOF_INT__*i + __SIZEOF_INT__ != (char*)&a[i+2]) link_error(); return 0; } diff --git a/gcc/testsuite/gcc.c-torture/compile/pr103813.c b/gcc/testsuite/gcc.c-torture/compile/pr103813.c index b3fc066beed..0aa64fb3152 100644 --- a/gcc/testsuite/gcc.c-torture/compile/pr103813.c +++ b/gcc/testsuite/gcc.c-torture/compile/pr103813.c @@ -1,4 +1,5 @@ /* PR middle-end/103813 */ +/* { dg-require-effective-target size32plus } */ struct A { char b; char c[0x2100]; }; struct A d; diff --git a/gcc/testsuite/gcc.c-torture/compile/pr96426.c b/gcc/testsuite/gcc.c-torture/compile/pr96426.c index bd573fe5366..fdb441efc10 100644 --- a/gcc/testsuite/gcc.c-torture/compile/pr96426.c +++ b/gcc/testsuite/gcc.c-torture/compile/pr96426.c @@ -1,5 +1,7 @@ /* PR middle-end/96426 */ +#if __SIZEOF_LONG_LONG__ == __SIZEOF_DOUBLE__ + typedef long long V __attribute__((vector_size(16))); typedef double W __attribute__((vector_size(16))); @@ -8,3 +10,5 @@ foo (V *v) { __builtin_convertvector (*v, W); } + +#endif diff --git a/gcc/testsuite/gcc.c-torture/execute/pr103417.c b/gcc/testsuite/gcc.c-torture/execute/pr103417.c index 0fef8908036..ea4b99030a5 100644 --- a/gcc/testsuite/gcc.c-torture/execute/pr103417.c +++ b/gcc/testsuite/gcc.c-torture/execute/pr103417.c @@ -1,4 +1,5 @@ /* PR tree-optimization/103417 */ +/* { dg-require-effective-target int32plus } */ struct { int a : 8; int b : 24; } c = { 0, 1 }; diff --git a/gcc/testsuite/gcc.c-torture/execute/pr108498-2.c b/gcc/testsuite/gcc.c-torture/execute/pr108498-2.c index ad930488c33..fdd628cbc86 100644 --- a/gcc/testsuite/gcc.c-torture/execute/pr108498-2.c +++ b/gcc/testsuite/gcc.c-torture/execute/pr108498-2.c @@ -1,4 +1,5 @@ /* PR tree-optimization/108498 */ +/* { dg-require-effective-target int32plus } */ struct U { char c[16]; }; struct V { char c[16]; }; diff --git a/gcc/testsuite/gcc.dg/pr100292.c b/gcc/testsuite/gcc.dg/pr100292.c index 675a60c3412..147c9324d81 100644 --- a/gcc/testsuite/gcc.dg/pr100292.c +++ b/gcc/testsuite/gcc.dg/pr100292.c @@ -1,4 +1,5 @@ /* { dg-do compile } */ +/* { dg-require-effective-target int32 } */ typedef unsigned char __attribute__((__vector_size__ (4))) V; diff --git a/gcc/testsuite/gcc.dg/pr101008.c b/gcc/testsuite/gcc.dg/pr101008.c index c06208d3425..8229769c6ac 100644 --- a/gcc/testsuite/gcc.dg/pr101008.c +++ b/gcc/testsuite/gcc.dg/pr101008.c @@ -1,6 +1,7 @@ /* PR rtl-optimization/101008 */ /* { dg-do compile } */ /* { dg-options "-O2 -g" } */ +/* { dg-require-effective-target int32 } */ typedef unsigned __attribute__((__vector_size__(32))) U; typedef unsigned __attribute__((__vector_size__(16))) V; diff --git a/gcc/testsuite/gcc.dg/pr104198.c b/gcc/testsuite/gcc.dg/pr104198.c index bfc7a777184..de86f49c9dc 100644 --- a/gcc/testsuite/gcc.dg/pr104198.c +++ b/gcc/testsuite/gcc.dg/pr104198.c @@ -3,6 +3,7 @@ /* { dg-do run } */ /* { dg-options "-O2 -std=c99" } */ +/* { dg-require-effective-target int32plus } */ #include #include diff --git a/gcc/testsuite/gcc.dg/pr21137.c b/gcc/testsuite/gcc.dg/pr21137.c index 6d73deaee6c..199555a5017 100644 --- a/gcc/testsuite/gcc.dg/pr21137.c +++ b/gcc/testsuite/gcc.dg/pr21137.c @@ -1,5 +1,6 @@ /* { dg-do compile } */ /* { dg-options "-O2 -fdump-tree-optimized" } */ +/* { dg-require-effective-target int32plus } */
[testsuite,committed]: PR52614: Fix more of the int=32 assumption fallout.
Applied more of the int=32 assumption fallout. Johann -- testsuite/52641: Fix more of implicit int=32 assumption fallout. gcc/testsuite/ PR testsuite/52641 * gcc.c-torture/compile/pr108892.c: Require int32. * gcc.c-torture/compile/pr98199.c: Require int32plus. * gcc.dg/analyzer/call-summaries-pr107072.c: Same. * gcc.dg/analyzer/null-deref-pr105755.c: Same. * gcc.dg/tree-ssa/pr102232.c: Same. * gcc.dg/tree-ssa/pr105860.c: Same. * gcc.dg/tree-ssa/pr96730.c: Same. * gcc.dg/tree-ssa/pr96779-disabled.c: Same. * gcc.dg/tree-ssa/pr96779.c: Same. * gcc.dg/tree-ssa/pr98513.c: Same. * gcc.dg/tree-ssa/ssa-sink-18.c * gcc.dg/analyzer/coreutils-cksum-pr108664.c: Require int32plus, size24plus. * gcc.dg/analyzer/doom-s_sound-pr108867.c: Require size32plus. * gcc.dg/analyzer/malloc-CWE-590-examples.c: Same. * gcc.dg/debug/btf/btf-bitfields-4.c: Same. * gcc.dg/tree-ssa/pr93435.c: Same. * gcc.dg/analyzer/null-deref-pr102671-1.c: Require ptr_eq_long: * gcc.dg/analyzer/null-deref-pr102671-2.c: Same. * gcc.dg/analyzer/null-deref-pr108251-smp_fetch_ssl_fc_has_early-O2.c: Same. * gcc.dg/analyzer/null-deref-pr108251-smp_fetch_ssl_fc_has_early.c: Same. * gcc.dg/tree-ssa/pr103345.c: Use uint32_t. * gcc.dg/tree-ssa/ssa-ccp-41.c [sizeof(int)==2]: Same. * gcc.dg/tree-ssa/pr109031-1.c: Use uint16_t, uint32_t. * gcc.dg/tree-ssa/pr109031-2.c: Same. * gcc.dg/Warray-bounds-49.c (dg-warning): Discriminate int != short. * gcc.dg/Warray-bounds-52.c (dg-warning): Discriminate avr. * gcc.dg/Warray-bounds-33.c: Skip target avr. * gcc.dg/analyzer/fd-access-mode-target-headers.c: Same. * gcc.dg/analyzer/flex-with-call-summaries.c: Same. * gcc.dg/analyzer/isatty-1.c: Same. * gcc.dg/analyzer/pipe-glibc.c: Same. diff --git a/gcc/testsuite/gcc.c-torture/compile/pr108892.c b/gcc/testsuite/gcc.c-torture/compile/pr108892.c index d7fecd54ecf..fb0a258cdba 100644 --- a/gcc/testsuite/gcc.c-torture/compile/pr108892.c +++ b/gcc/testsuite/gcc.c-torture/compile/pr108892.c @@ -1,3 +1,5 @@ +/* { dg-require-effective-target int32 } */ + typedef char __attribute__((__vector_size__ (64))) U; typedef int __attribute__((__vector_size__ (64))) V; diff --git a/gcc/testsuite/gcc.c-torture/compile/pr98199.c b/gcc/testsuite/gcc.c-torture/compile/pr98199.c index b5c8d204f0e..6605d38788c 100644 --- a/gcc/testsuite/gcc.c-torture/compile/pr98199.c +++ b/gcc/testsuite/gcc.c-torture/compile/pr98199.c @@ -1,4 +1,5 @@ /* PR tree-optimization/98199 */ +/* { dg-require-effective-target int32plus } */ struct A { long a; short d; int c, f, e, g; }; struct B { int a, i; short j; struct A k; signed : 20; int e, g; } __attribute__((packed)); diff --git a/gcc/testsuite/gcc.dg/Warray-bounds-33.c b/gcc/testsuite/gcc.dg/Warray-bounds-33.c index 28f14b4722c..13efabe33b6 100644 --- a/gcc/testsuite/gcc.dg/Warray-bounds-33.c +++ b/gcc/testsuite/gcc.dg/Warray-bounds-33.c @@ -2,6 +2,7 @@ an object of incomplete type { dg-do compile } { dg-options "-O2 -Wall" } */ +/* { dg-skip-if "acessing data memory with program memory address" { "avr-*-*" } } */ struct S { diff --git a/gcc/testsuite/gcc.dg/Warray-bounds-49.c b/gcc/testsuite/gcc.dg/Warray-bounds-49.c index f271dd526b8..9335f1507e8 100644 --- a/gcc/testsuite/gcc.dg/Warray-bounds-49.c +++ b/gcc/testsuite/gcc.dg/Warray-bounds-49.c @@ -17,7 +17,8 @@ void test_a0 (void) // The first three elements fit in the tail padding. a0.a2[0] = 0; a0.a2[1] = 1; a0.a2[2] = 2; - a0.a2[3] = 3; // { dg-warning "array subscript 3 is above array bounds of 'short int\\\[]'" } + a0.a2[3] = 3; // { dg-warning "array subscript 3 is above array bounds of 'short int\\\[]'" "" { target { ! short_eq_int } } } + // { dg-warning "array subscript 3 is above array bounds of 'int\\\[]'" "" { target { short_eq_int } } .-1 } } @@ -27,7 +28,8 @@ void test_a1 (void) { a1.a2[0] = 0; a1.a2[1] = 1; a1.a2[2] = 2; - a1.a2[3] = 3; // { dg-warning "array subscript 3 is above array bounds of 'short int\\\[]'" } + a1.a2[3] = 3; // { dg-warning "array subscript 3 is above array bounds of 'short int\\\[]'" "" { target { ! short_eq_int } } } + // { dg-warning "array subscript 3 is above array bounds of 'int\\\[]'" "" { target { short_eq_int } } .-1 } } @@ -37,7 +39,8 @@ void test_a2 (void) { a2.a2[0] = 0; a2.a2[1] = 1; a2.a2[2] = 2; - a2.a2[3] = 3; // { dg-warning "array subscript 3 is above array bounds of 'short int\\\[]'" } + a2.a2[3] = 3; // { dg-warning "array subscript 3 is above array bounds of 'short int\\\[]'" "" { target { ! short_eq_int } } } + // { dg-warning "array subscript 3 is above array bounds of 'int\\\[]'" "" { target { short_eq_int } } .-1 } } @@ -47,7 +50,8 @@ void test_a3 (void) { a3.a2[
[patch]: Implement PR104327 for avr
PR target/104327 not only affects s390 but also avr: The avr backend pre-sets some options depending on optimization level. The inliner then thinks that always_inline functions are not eligible for inlining and terminates with an error. Proposing the following patch that implements TARGET_CAN_INLINE_P. Ok to apply? Johann -- target/104327: Allow more inlining between different optimization levels. avr-common.cc introduces the following options that are set depending on optimization level: -mgas-isr-prologues, -mmain-is-OS-task and -fsplit-wide-types-early. The inliner thinks that different options disallow cross-optimization inlining, so provide can_inline_p. gcc/ PR target/104327 * config/avr/avr.cc (avr_can_inline_p): New static function. (TARGET_CAN_INLINE_P): Define to that function. diff --git a/gcc/config/avr/avr.cc b/gcc/config/avr/avr.cc index 9fa50ca230d..55b48f63865 100644 --- a/gcc/config/avr/avr.cc +++ b/gcc/config/avr/avr.cc @@ -1018,6 +1018,22 @@ avr_no_gccisr_function_p (tree func) return avr_lookup_function_attribute1 (func, "no_gccisr"); } + +/* Implement `TARGET_CAN_INLINE_P'. */ +/* Some options like -mgas_isr_prologues depend on optimization level, + and the inliner might think that due to different options, inlining + is not permitted; see PR104327. */ + +static bool +avr_can_inline_p (tree /* caller */, tree callee) +{ + // For now, dont't allow to inline ISRs. If the user actually wants + // to inline ISR code, they have to turn the body of the ISR into an + // ordinary function. + + return ! avr_interrupt_function_p (callee); +} + /* Implement `TARGET_SET_CURRENT_FUNCTION'. */ /* Sanity cheching for above function attributes. */ @@ -14713,6 +14729,9 @@ avr_float_lib_compare_returns_bool (machine_mode mode, enum rtx_code) #undef TARGET_MD_ASM_ADJUST #define TARGET_MD_ASM_ADJUST avr_md_asm_adjust +#undef TARGET_CAN_INLINE_P +#define TARGET_CAN_INLINE_P avr_can_inline_p + struct gcc_target targetm = TARGET_INITIALIZER;
[avr,committed] Fix cost computation for bit insertions.
Applied this patchlet that implements proper cost computation of (set (zero_extract (...) ...)) kind patterns that do single-bit (inverted) bit insertions. Johann -- Improve cost computation for single-bit bit insertions. Some miscomputation of rtx_costs lead to sub-optimal code for single-bit bit insertions. This patch implements TARGET_INSN_COST, which has a chance to see the whole insn during insn combination; in particular the SET_DEST of (set (zero_extract (...) ...)). gcc/ * config/avr/avr.cc (avr_insn_cost): New static function. (TARGET_INSN_COST): Define to that function. diff --git a/gcc/config/avr/avr.cc b/gcc/config/avr/avr.cc index 9fa50ca230d..4fa6f5309b2 100644 --- a/gcc/config/avr/avr.cc +++ b/gcc/config/avr/avr.cc @@ -11514,6 +11514,52 @@ avr_rtx_costs (rtx x, machine_mode mode, int outer_code, } +/* Implement `TARGET_INSN_COST'. */ +/* For some insns, it is not enough to look at the cost of the SET_SRC. + In that case, have a look at the entire insn, e.g. during insn combine. */ + +static int +avr_insn_cost (rtx_insn *insn, bool speed) +{ + const int unknown_cost = -1; + int cost = unknown_cost; + + rtx set = single_set (insn); + + if (set + && ZERO_EXTRACT == GET_CODE (SET_DEST (set))) +{ + // Try find anything that would flip the extracted bit. + bool not_bit_p = false; + + subrtx_iterator::array_type array; + FOR_EACH_SUBRTX (iter, array, SET_SRC (set), NONCONST) + { + enum rtx_code code = GET_CODE (*iter); + not_bit_p |= code == NOT || code == XOR || code == GE; + } + + // Don't go too deep into the analysis. In almost all cases, + // using BLD/BST is the best we can do for single-bit moves, + // even considering CSE. + cost = COSTS_N_INSNS (2 + not_bit_p); +} + + if (cost != unknown_cost) +{ + if (avr_log.rtx_costs) + avr_edump ("\n%? (%s) insn_cost=%d\n%r\n", + speed ? "speed" : "size", cost, insn); + return cost; +} + + // Resort to what rtlanal.cc::insn_cost() implements as a default + // when targetm.insn_cost() is not implemented. + + return pattern_cost (PATTERN (insn), speed); +} + + /* Implement `TARGET_ADDRESS_COST'. */ static int @@ -14574,6 +14620,8 @@ avr_float_lib_compare_returns_bool (machine_mode mode, enum rtx_code) #undef TARGET_ASM_FINAL_POSTSCAN_INSN #define TARGET_ASM_FINAL_POSTSCAN_INSN avr_asm_final_postscan_insn +#undef TARGET_INSN_COST +#define TARGET_INSN_COST avr_insn_cost #undef TARGET_REGISTER_MOVE_COST #define TARGET_REGISTER_MOVE_COST avr_register_move_cost #undef TARGET_MEMORY_MOVE_COST
Re: [patch]: Implement PR104327 for avr
Am 24.05.23 um 11:38 schrieb Richard Biener: On Tue, May 23, 2023 at 2:56 PM Georg-Johann Lay wrote: PR target/104327 not only affects s390 but also avr: The avr backend pre-sets some options depending on optimization level. The inliner then thinks that always_inline functions are not eligible for inlining and terminates with an error. Proposing the following patch that implements TARGET_CAN_INLINE_P. Ok to apply? Johann -- target/104327: Allow more inlining between different optimization levels. avr-common.cc introduces the following options that are set depending on optimization level: -mgas-isr-prologues, -mmain-is-OS-task and -fsplit-wide-types-early. The inliner thinks that different options disallow cross-optimization inlining, so provide can_inline_p. gcc/ PR target/104327 * config/avr/avr.cc (avr_can_inline_p): New static function. (TARGET_CAN_INLINE_P): Define to that function. diff --git a/gcc/config/avr/avr.cc b/gcc/config/avr/avr.cc index 9fa50ca230d..55b48f63865 100644 --- a/gcc/config/avr/avr.cc +++ b/gcc/config/avr/avr.cc @@ -1018,6 +1018,22 @@ avr_no_gccisr_function_p (tree func) return avr_lookup_function_attribute1 (func, "no_gccisr"); } + +/* Implement `TARGET_CAN_INLINE_P'. */ +/* Some options like -mgas_isr_prologues depend on optimization level, + and the inliner might think that due to different options, inlining + is not permitted; see PR104327. */ + +static bool +avr_can_inline_p (tree /* caller */, tree callee) +{ + // For now, dont't allow to inline ISRs. If the user actually wants + // to inline ISR code, they have to turn the body of the ISR into an + // ordinary function. + + return ! avr_interrupt_function_p (callee); I'm not sure if AVR has ISA extensions but the above will likely break things like void __attribute__((target("-mX"))) foo () { asm ("isa X opcode"); stmt-that-generates-X-ISA; } This yields warning: target attribute is not supported on this machine [-Wattributes] avr has -mmcu= target options, but switching them in mid-air won't work because the file prologue might already be different and incompatible across different architectures. And I never saw any user requesting such a thing, and I can't imagine any reasonable use case... If the warning is not strong enough, may be it can be turned into an error, but -Wattributes is not specific enough for that. void bar () { if (cpu-has-X) foo (); } if always-inlines are the concern you can use bool always_inline = (DECL_DISREGARD_INLINE_LIMITS (callee) && lookup_attribute ("always_inline", DECL_ATTRIBUTES (callee))); /* Do what the user says. */ if (always_inline) return true; return default_target_can_inline_p (caller, callee); The default implementation of can_inline_p worked fine for avr. As far as I understand, the new behavior is due to clean-up of global states for options? So I need to take into account inlining costs and decide on that whether it's preferred to inline a function or not? Johann +} + /* Implement `TARGET_SET_CURRENT_FUNCTION'. */ /* Sanity cheching for above function attributes. */ @@ -14713,6 +14729,9 @@ avr_float_lib_compare_returns_bool (machine_mode mode, enum rtx_code) #undef TARGET_MD_ASM_ADJUST #define TARGET_MD_ASM_ADJUST avr_md_asm_adjust +#undef TARGET_CAN_INLINE_P +#define TARGET_CAN_INLINE_P avr_can_inline_p + struct gcc_target targetm = TARGET_INITIALIZER;
Re: [patch]: Implement PR104327 for avr
Am 25.05.23 um 08:35 schrieb Richard Biener: On Wed, May 24, 2023 at 5:44 PM Georg-Johann Lay wrote: Am 24.05.23 um 11:38 schrieb Richard Biener: On Tue, May 23, 2023 at 2:56 PM Georg-Johann Lay wrote: PR target/104327 not only affects s390 but also avr: The avr backend pre-sets some options depending on optimization level. The inliner then thinks that always_inline functions are not eligible for inlining and terminates with an error. Proposing the following patch that implements TARGET_CAN_INLINE_P. Ok to apply? Johann target/104327: Allow more inlining between different optimization levels. avr-common.cc introduces the following options that are set depending on optimization level: -mgas-isr-prologues, -mmain-is-OS-task and -fsplit-wide-types-early. The inliner thinks that different options disallow cross-optimization inlining, so provide can_inline_p. gcc/ PR target/104327 * config/avr/avr.cc (avr_can_inline_p): New static function. (TARGET_CAN_INLINE_P): Define to that function. diff --git a/gcc/config/avr/avr.cc b/gcc/config/avr/avr.cc index 9fa50ca230d..55b48f63865 100644 --- a/gcc/config/avr/avr.cc +++ b/gcc/config/avr/avr.cc @@ -1018,6 +1018,22 @@ avr_no_gccisr_function_p (tree func) return avr_lookup_function_attribute1 (func, "no_gccisr"); } + +/* Implement `TARGET_CAN_INLINE_P'. */ +/* Some options like -mgas_isr_prologues depend on optimization level, + and the inliner might think that due to different options, inlining + is not permitted; see PR104327. */ + +static bool +avr_can_inline_p (tree /* caller */, tree callee) +{ + // For now, dont't allow to inline ISRs. If the user actually wants + // to inline ISR code, they have to turn the body of the ISR into an + // ordinary function. + + return ! avr_interrupt_function_p (callee); I'm not sure if AVR has ISA extensions but the above will likely break things like void __attribute__((target("-mX"))) foo () { asm ("isa X opcode"); stmt-that-generates-X-ISA; } This yields warning: target attribute is not supported on this machine [-Wattributes] Ah, that's an interesting fact. So that indeed leaves __attribute__((optimize(...))) influencing the set of active target attributes via the generic option target hooks like in your case the different defaults. avr has -mmcu= target options, but switching them in mid-air won't work because the file prologue might already be different and incompatible across different architectures. And I never saw any user requesting such a thing, and I can't imagine any reasonable use case... If the warning is not strong enough, may be it can be turned into an error, but -Wattributes is not specific enough for that. Note the target attribute is then simply ignored. void bar () { if (cpu-has-X) foo (); } if always-inlines are the concern you can use bool always_inline = (DECL_DISREGARD_INLINE_LIMITS (callee) && lookup_attribute ("always_inline", DECL_ATTRIBUTES (callee))); /* Do what the user says. */ if (always_inline) return true; return default_target_can_inline_p (caller, callee); The default implementation of can_inline_p worked fine for avr. As far as I understand, the new behavior is due to clean-up of global states for options? I think the last change was r8-2658-g9b25e12d2d940a which for targets without target attribute support made it more likely to run into the default hook actually comparing the options. Previously the "default" was oddly special-cased but you could have still run into compares with two different set of defaults when there's another "default" default. Say, compile with -O2 and have one optimize(0) and one optimize(Os) function it would compare the optimize(0) and optimize(Os) set if they were distinct from the -O2 set. That probably never happened for AVR. So I need to take into account inlining costs and decide on that whether it's preferred to inline a function or not? No, the hook isn't about cost, it's about full incompatibility. So if the different -m options that could be in effect for AVR in a single TU for different functions never should prevent inlining then simply make the hook return true. If there's a specific option (that can differ from what specified on the compiler command line!) that should, then you should compare the setting of that option from the DECL_FUNCTION_SPECIFIC_TARGET of the caller and the callee. But as far as I can see simply returning true should be correct for AVR, or like your patch handle interrupts differently (though the -Winline diagnostic will tell the user there's a mismatch in target options which might be confusing). Ok, simply "true" sounds reasonable. Is that change ok then? Johann Richard. Johann
[avr,committed] PR82931: Improve single-bit transfers between registers.
Applied this patch that makes one insn more generic so it can handle more bit positions than just 0. Johann -- target/82931: Make a pattern more generic to match more bit-transfers. There is already a pattern in avr.md that matches single-bit transfers from one register to another one, but it only handled bit 0 of 8-bit registers. This change makes that pattern more generic so it matches more of similar single-bit transfers. gcc/ PR target/82931 * config/avr/avr.md (*movbitqi.0): Rename to *movbit.0-6. Handle any bit position and use mode QISI. * config/avr/avr.cc (avr_rtx_costs_1) [IOR]: Return a cost of 2 insns for bit-transfer of respective style. gcc/testsuite/ PR target/82931 * gcc.target/avr/pr82931.c: New test. diff --git a/gcc/config/avr/avr.cc b/gcc/config/avr/avr.cc index 4fa6f5309b2..31706964eb1 100644 --- a/gcc/config/avr/avr.cc +++ b/gcc/config/avr/avr.cc @@ -10843,6 +10843,15 @@ avr_rtx_costs_1 (rtx x, machine_mode mode, int outer_code, *total += COSTS_N_INSNS (1); return true; } + if (IOR == code + && AND == GET_CODE (XEXP (x, 0)) + && AND == GET_CODE (XEXP (x, 1)) + && single_zero_operand (XEXP (XEXP (x, 0), 1), mode)) +{ + // Open-coded bit transfer. + *total = COSTS_N_INSNS (2); + return true; +} *total = COSTS_N_INSNS (GET_MODE_SIZE (mode)); *total += avr_operand_rtx_cost (XEXP (x, 0), mode, code, 0, speed); if (!CONST_INT_P (XEXP (x, 1))) diff --git a/gcc/config/avr/avr.md b/gcc/config/avr/avr.md index a79c6824fad..371965938a6 100644 --- a/gcc/config/avr/avr.md +++ b/gcc/config/avr/avr.md @@ -9096,16 +9096,20 @@ (define_insn "*movbitqi.1-6.b" "bst %3,0\;bld %0,%4" [(set_attr "length" "2")]) -;; Move bit $3.0 into bit $0.0. -;; For bit 0, combiner generates slightly different pattern. -(define_insn "*movbitqi.0" - [(set (match_operand:QI 0 "register_operand" "=r") -(ior:QI (and:QI (match_operand:QI 1 "register_operand" "0") -(match_operand:QI 2 "single_zero_operand" "n")) -(and:QI (match_operand:QI 3 "register_operand" "r") -(const_int 1] - "0 == exact_log2 (~INTVAL(operands[2]) & GET_MODE_MASK (QImode))" - "bst %3,0\;bld %0,0" +;; Move bit $3.x into bit $0.x. +(define_insn "*movbit.0-6" + [(set (match_operand:QISI 0 "register_operand" "=r") +(ior:QISI (and:QISI (match_operand:QISI 1 "register_operand" "0") +(match_operand:QISI 2 "single_zero_operand" "n")) + (and:QISI (match_operand:QISI 3 "register_operand" "r") +(match_operand:QISI 4 "single_one_operand" "n"] + "GET_MODE_MASK(mode) + == (GET_MODE_MASK(mode) & (INTVAL(operands[2]) ^ INTVAL(operands[4])))" + { +auto bitmask = GET_MODE_MASK (mode) & UINTVAL (operands[4]); +operands[4] = GEN_INT (exact_log2 (bitmask)); +return "bst %T3%T4" CR_TAB "bld %T0%T4"; + } [(set_attr "length" "2")]) ;; Move bit $2.0 into bit $0.7. diff --git a/gcc/testsuite/gcc.target/avr/pr82931.c b/gcc/testsuite/gcc.target/avr/pr82931.c new file mode 100644 index 000..477284fa127 --- /dev/null +++ b/gcc/testsuite/gcc.target/avr/pr82931.c @@ -0,0 +1,29 @@ +/* { dg-options "-Os" } */ +/* { dg-final { scan-assembler-times "bst" 4 } } */ +/* { dg-final { scan-assembler-times "bld" 4 } } */ + +typedef __UINT8_TYPE__ uint8_t; +typedef __UINT16_TYPE__ uint16_t; + +#define BitMask (1u << 14) +#define Bit8Mask ((uint8_t) (1u << 4)) + +void merge1_8 (uint8_t *dst, const uint8_t *src) +{ +*dst = (*src & Bit8Mask) | (*dst & ~ Bit8Mask); +} + +void merge2_8 (uint8_t *dst, const uint8_t *src) +{ +*dst ^= (*dst ^ *src) & Bit8Mask; +} + +void merge1_16 (uint16_t *dst, const uint16_t *src) +{ +*dst = (*src & BitMask) | (*dst & ~ BitMask); +} + +void merge2_16 (uint16_t *dst, const uint16_t *src) +{ +*dst ^= (*dst ^ *src) & BitMask; +}
[avr,committed]: Implement PR104327 for avr
Am 25.05.23 um 17:07 schrieb Richard Biener: Am 25.05.2023 um 16:22 schrieb Georg-Johann Lay : Am 25.05.23 um 08:35 schrieb Richard Biener: On Wed, May 24, 2023 at 5:44 PM Georg-Johann Lay wrote: Am 24.05.23 um 11:38 schrieb Richard Biener: On Tue, May 23, 2023 at 2:56 PM Georg-Johann Lay wrote: PR target/104327 not only affects s390 but also avr: The avr backend pre-sets some options depending on optimization level. The inliner then thinks that always_inline functions are not eligible for inlining and terminates with an error. Proposing the following patch that implements TARGET_CAN_INLINE_P. Ok to apply? Johann target/104327: Allow more inlining between different optimization levels. avr-common.cc introduces the following options that are set depending on optimization level: -mgas-isr-prologues, -mmain-is-OS-task and -fsplit-wide-types-early. The inliner thinks that different options disallow cross-optimization inlining, so provide can_inline_p. gcc/ PR target/104327 * config/avr/avr.cc (avr_can_inline_p): New static function. (TARGET_CAN_INLINE_P): Define to that function. diff --git a/gcc/config/avr/avr.cc b/gcc/config/avr/avr.cc index 9fa50ca230d..55b48f63865 100644 --- a/gcc/config/avr/avr.cc +++ b/gcc/config/avr/avr.cc @@ -1018,6 +1018,22 @@ avr_no_gccisr_function_p (tree func) return avr_lookup_function_attribute1 (func, "no_gccisr"); } + +/* Implement `TARGET_CAN_INLINE_P'. */ +/* Some options like -mgas_isr_prologues depend on optimization level, + and the inliner might think that due to different options, inlining + is not permitted; see PR104327. */ + +static bool +avr_can_inline_p (tree /* caller */, tree callee) +{ + // For now, dont't allow to inline ISRs. If the user actually wants + // to inline ISR code, they have to turn the body of the ISR into an + // ordinary function. + + return ! avr_interrupt_function_p (callee); I'm not sure if AVR has ISA extensions but the above will likely break things like void __attribute__((target("-mX"))) foo () { asm ("isa X opcode"); stmt-that-generates-X-ISA; } This yields warning: target attribute is not supported on this machine [-Wattributes] Ah, that's an interesting fact. So that indeed leaves __attribute__((optimize(...))) influencing the set of active target attributes via the generic option target hooks like in your case the different defaults. avr has -mmcu= target options, but switching them in mid-air won't work because the file prologue might already be different and incompatible across different architectures. And I never saw any user requesting such a thing, and I can't imagine any reasonable use case... If the warning is not strong enough, may be it can be turned into an error, but -Wattributes is not specific enough for that. Note the target attribute is then simply ignored. void bar () { if (cpu-has-X) foo (); } if always-inlines are the concern you can use bool always_inline = (DECL_DISREGARD_INLINE_LIMITS (callee) && lookup_attribute ("always_inline", DECL_ATTRIBUTES (callee))); /* Do what the user says. */ if (always_inline) return true; return default_target_can_inline_p (caller, callee); The default implementation of can_inline_p worked fine for avr. As far as I understand, the new behavior is due to clean-up of global states for options? I think the last change was r8-2658-g9b25e12d2d940a which for targets without target attribute support made it more likely to run into the default hook actually comparing the options. Previously the "default" was oddly special-cased but you could have still run into compares with two different set of defaults when there's another "default" default. Say, compile with -O2 and have one optimize(0) and one optimize(Os) function it would compare the optimize(0) and optimize(Os) set if they were distinct from the -O2 set. That probably never happened for AVR. So I need to take into account inlining costs and decide on that whether it's preferred to inline a function or not? No, the hook isn't about cost, it's about full incompatibility. So if the different -m options that could be in effect for AVR in a single TU for different functions never should prevent inlining then simply make the hook return true. If there's a specific option (that can differ from what specified on the compiler command line!) that should, then you should compare the setting of that option from the DECL_FUNCTION_SPECIFIC_TARGET of the caller and the callee. But as far as I can see simply returning true should be correct for AVR, or like your patch handle interrupts differently (though the -Winline diagnostic will tell the user there's a mismatch in target options which might be confusing). Ok, simply "true" sou
[patch, avr] Fix PR target/99184: Wrong cast from double to 16-bit and 32-bit ints.
Hello, this patch fixed PR target/99184 which incorrectly rounded during 64-bit (long) double to 16-bit and 32-bit integers. The patch just removes the respective roundings from libf7-asm.sx::to_integer and ::to_unsigned. Luckily, LibF7 does nowhere use respective functions internally, the only user is in libf7.c::f7_exp which reads f7_round (qq, qq); int16_t q = f7_get_s16 (qq); so that f7_get_s16() operates on an already rounded value, and therefore this code works unaltered with or without rounding in to_integer. The patch applies to directory ./libgcc/config/avr/libf7/ and is the same for all GCC versions v10+. Please someone with write permissions commit it to trunk and backport to v12, v11, and v10 as it is a wrong-code issue. The patch will fit without problems (except for ChangeLog) because there is no traffic on that folder. Thanks, Johann libgcc/config/avr/libf7/ PR target/99184 Remove rounding from double to [u]int16 and [u]int32 casts. * libf7-asm.sx (to_integer, to_unsigned): Don't round 16-bit and 32-bit integers. diff --git a/ChangeLog b/ChangeLog index 7e06f52..3ec0082 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,10 @@ + + PR target/99184 + Remove rounding from double to [u]int16 and [u]int32 casts. + + * libf7-asm.sx (to_integer, to_unsigned): Don't round 16-bit + and 32-bit integers. + 2022-04-21 Release Manager * GCC 11.3.0 released. diff --git a/libf7-asm.sx b/libf7-asm.sx index 7629e23..9d701f2 100644 --- a/libf7-asm.sx +++ b/libf7-asm.sx @@ -601,9 +601,6 @@ DEFUN to_integer tst C6 brmi.Lsaturate.T; > INTxx_MAX => saturate -rcall .Lround -brmi.Lsaturate.T; > INTxx_MAX => saturate - brtc 9f ; >= 0 => return sbrcMask, 5 .global __negdi2 @@ -658,30 +655,6 @@ DEFUN to_integer .global __clr_8 XJMP__clr_8 -.Lround: -;; C6.7 is known to be 0 here. -;; Return N = 1 iff we have to saturate. -cpi Mask, 0xf -breq .Lround16 -cpi Mask, 0x1f -breq .Lround32 - -;; For now, no rounding in the 64-bit case. This rounding -;; would have to be integrated into the right-shift. -cln -ret - -.Lround32: -rol C2 -adc C3, ZERO -adc C4, ZERO -rjmp 2f - -.Lround16: -rol C4 -2: adc C5, ZERO -adc C6, ZERO -ret ENDF to_integer #endif /* F7MOD_to_integer_ */ @@ -725,29 +698,6 @@ DEFUN to_unsigned clr CA F7call lshrdi3 POP r16 - -;; Rounding -;; ??? C6.7 is known to be 0 here. -cpi Mask, 0xf -breq .Lround16 -cpi Mask, 0x1f -breq .Lround32 - -;; For now, no rounding in the 64-bit case. This rounding -;; would have to be integrated into the right-shift. -ret - -.Lround32: -rol C2 -adc C3, ZERO -adc C4, ZERO -rjmp 2f - -.Lround16: -rol C4 -2: adc C5, ZERO -adc C6, ZERO -brcs.Lset_0x; Rounding overflow => saturate ret .Lset_0x:
Re: [patch, avr] Fix PR target/99184: Wrong cast from double to 16-bit and 32-bit ints.
Am 19.09.22 um 09:51 schrieb Richard Biener: On Sun, Sep 18, 2022 at 7:40 PM Georg Johann Lay wrote: Hello, this patch fixed PR target/99184 which incorrectly rounded during 64-bit (long) double to 16-bit and 32-bit integers. The patch just removes the respective roundings from libf7-asm.sx::to_integer and ::to_unsigned. Luckily, LibF7 does nowhere use respective functions internally, the only user is in libf7.c::f7_exp which reads f7_round (qq, qq); int16_t q = f7_get_s16 (qq); so that f7_get_s16() operates on an already rounded value, and therefore this code works unaltered with or without rounding in to_integer. The patch applies to directory ./libgcc/config/avr/libf7/ and is the same for all GCC versions v10+. Please someone with write permissions commit it to trunk and backport to v12, v11, and v10 as it is a wrong-code issue. The patch will fit without problems (except for ChangeLog) because there is no traffic on that folder. Thanks, I've pushed the change. Please in future try to send patches that can be applied with git am, thus use git format-patch Richard. Thanks you so much. The patch I generated with "git diff > file.diff", so that is not correct? The only change is that I defined extra hunks for asm so that one can see the function like in @@ -601,9 +601,6 @@ DEFUN to_integer So git is not prepared to such hunks? Would you point me to some documentation on how to do it properly? Thanks, Johann
[PATCH] improved const shifts for AVR targets
Hi, recently I used some arduino uno for a project and realized some areas which do not output optimal asm code. Especially around shifts and function calls. With this as motivation and hacktoberfest I started patching things. Since patch files do not provide a good overview and I hope for a "hacktoberfest-accepted" label on the PR on github I also opened it there: https://github.com/gcc-mirror/gcc/pull/73 This patch improves shifts with const right hand operand. While 8bit and 16bit shifts where mostly fine 24bit and 32bit where not handled well. Testing I checked output with a local installation of compiler explorer in asm and a tiny unit test comparing shifts with mul/div by 2. I however did not write any testcases in gcc for it. Hi, for such large changes, IMO it's a good idea to run the testsuite against the changes and make sure that there are no regressions. Maybe even add new runtime tests in gcc.target/avr/torture to cover significant amount of the changes? For example a test could go like: __attribute__((__always_inline__)) static inline void shr (long x, int off) { long y = x >> off; __asm ("" : "+r" (x)); if (x >> off != y) __builtin_abort(); } void test_shr (void) { long x = 0x76543215; shr (x, 13); shr (x, 14); shr (x, 15); shr (x, 16); } One shift is folded away by the compiler, and the other one has to be carried out. However, the insn output also depends on available register classes like "ldi_ok" and whether a "d" class scratch is available, so it will be hard to achieve full coverage. As it appears, testing for the lower registers can be forced by, where this won't work for AVR_TINY, of course: static inline void shr (long x, int off) { long y = x >> off; __asm ("" : "+l" (x)); x >>= off; __asm ("" : "+l" (x)); if (x != y) __builtin_abort(); } Target This patch is only targeting atmel avr family of chips. Changelog improved const shifts for AVR targets You can have a look at existing ChangeLog files to see the format and style. Patch - diff --git a/gcc/config/avr/avr.cc b/gcc/config/avr/avr.cc index 4ed390e4cf9..c7b70812d5c 100644 --- a/gcc/config/avr/avr.cc +++ b/gcc/config/avr/avr.cc @@ -6043,9 +6043,6 @@ out_shift_with_cnt (const char *templ, rtx_insn *insn, rtx operands[], op[2] = operands[2]; op[3] = operands[3]; - if (plen) -*plen = 0; - This looks wrong. These functions are used in two different contexts: One is computing the instructions lengths (in words) which is needed for jump offset computations for relative jumps that are crossing the insn. This is done for plen != NULL, and the length must be returned in *plen. Second is actual output of the instruction sequence rest. return respective sting (depending on context), which must have a length no longer than computed. This is performed if plen == NULL. Not initializing *plen means that you get garbage for instruction lengths. Runtime errors will occur but just not very frequently, e.g. if an instruction sequence is longer than anticipated, a jump target might be out of reach which results in a linker error. if (CONST_INT_P (operands[2])) { /* Operand 3 is a scratch register if this is a @@ -6150,96 +6147,68 @@ out_shift_with_cnt (const char *templ, rtx_insn *insn, rtx operands[], /* 8bit shift left ((char)x << i) */ const char * -ashlqi3_out (rtx_insn *insn, rtx operands[], int *len) +ashlqi3_out (rtx_insn *insn, rtx operands[], int *plen) { if (CONST_INT_P (operands[2])) { - int k; - - if (!len) - len = &k; - switch (INTVAL (operands[2])) { default: if (INTVAL (operands[2]) < 8) break; - *len = 1; - return "clr %0"; - - case 1: - *len = 1; - return "lsl %0"; - - case 2: - *len = 2; - return ("lsl %0" CR_TAB - "lsl %0"); - - case 3: - *len = 3; - return ("lsl %0" CR_TAB - "lsl %0" CR_TAB - "lsl %0"); +return avr_asm_len ("clr %0", operands, plen, 1); I don't get it. This prints *one* CLR instruction for all shift offsets 1...3? case 4: if (test_hard_reg_class (LD_REGS, operands[0])) { - *len = 2; - return ("swap %0" CR_TAB - "andi %0,0xf0"); +return avr_asm_len ("swap %0" CR_TAB + "andi %0,0xf0", operands, plen, 2); Glitch of coding-rules (GNU style it is), similar in many placed down the line which seem to have incorrect indentations. It's not always easy to tell this just from looking at a patch, so better double-check your indentations. } - *len = 4; - return ("lsl %0" CR_TAB +return avr_asm_len ("lsl %0" CR_TAB "lsl %0" CR_TAB "lsl %0" CR_TAB - "lsl %0"); + "lsl %0", operands, plen, 4); case 5: if (test_hard_reg_class (LD_REGS, operands[0])) { - *len = 3; - return ("swap %0" CR_TAB +return avr_asm_len ("swap %0" CR_TAB "lsl %0" CR_TAB - "andi %0,0xe0"); + "andi %0,0xe0", operands, plen,
Re: [PATCH v3] c++: parser - Support for target address spaces in C++
[PATCH v3] c++: parser - Support for target address spaces in C++ First of all, it is great news that GCC is going to implement named address spaces for C++. I have some questions: 1. How is name-mangling going to work? == Clang supports address spaces in C++, and for address-space 1 it does generate code like the following: #define __flash __attribute__((__address_space__(1))) char get_p (const __flash char *p) { return *p; } _Z5get_pPU3AS1Kc: ... I.e. address-space 1 is mangled as "AS1". (Notice that Clang's attribute actually works like a qualifier here, one could not get this to work with GCC attributes.) 2. Will it work with compound literals? === Currently, the following C code works for target avr: const __flash char *pHallo = (const __flash char[]) { "Hallo" }; This is a pointer in RAM (AS0) that holds the address of a string in flash (AS1) and is initialized with that address. Unfortunately, this does not work locally: const __flash char* get_hallo (void) { [static] const __flash char *p2 = (const __flash char[]) { "Hallo2" }; return p2; } foo.c: In function 'get_hallo': foo.c: error: compound literal qualified by address-space qualifier Is there any way to make this work now? Would be great! 3. Will TARGET_ADDR_SPACE_DIAGNOSE_USAGE still work? Currently there is target hook TARGET_ADDR_SPACE_DIAGNOSE_USAGE. I did not see it in your patches, so maybe I just missed it? See https://gcc.gnu.org/onlinedocs/gcc-12.2.0/gccint/Named-Address-Spaces.html#index-TARGET_005fADDR_005fSPACE_005fDIAGNOSE_005fUSAGE 4. Will it be possible to put C++ virtual tables in ASs, and how? = One big complaint about avr-g++ is that there is no way to put vtables in flash (address-space 1) and to access them accordingly. How can this be achieved with C++ address spaces? Background: The AVR architecture has non-linear address space, and you cannot tell from the numeric value of an address whether it's in RAM or flash. You will have to use different instructions depending on the location. This means that .rodata must be located in RAM, because otherwise one would not know whether const char* pointed to RAM or flash, but to de-reference you's need different instructions. One way out is named address spaces, so we could finally fix https://gcc.gnu.org/PR43745 Regards, Johann
Re: [PATCH] PR85678: Change default to -fno-common
Wilco Dijkstra schrieb: GCC currently defaults to -fcommon. As discussed in the PR, this is an ancient C feature which is not conforming with the latest C standards. On many targets this means global variable accesses have a codesize and performance penalty. This applies to C code only, C++ code is not affected by -fcommon. It is about time to change the default. OK for commit? IIRC using -fno-common might lead to some testsuit fallout because some optimizations / test cases are sensitive to -f[no-]common. So I wonder that no adjustments to test cases are needed? ChangeLog 2019-10-25 Wilco Dijkstra PR85678 * common.opt (fcommon): Change init to 1. doc/ * invoke.texi (-fcommon): Update documentation. --- diff --git a/gcc/common.opt b/gcc/common.opt index 0195b0cb85a06dd043fd0412b42dfffddfa2495b..b0840f41a5e480f4428bd62724b0dc3d54c68c0b 100644 --- a/gcc/common.opt +++ b/gcc/common.opt @@ -1131,7 +1131,7 @@ Common Report Var(flag_combine_stack_adjustments) Optimization Looks for opportunities to reduce stack adjustments and stack references. fcommon -Common Report Var(flag_no_common,0) +Common Report Var(flag_no_common,0) Init(1) Put uninitialized globals in the common section. fcompare-debug diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi index 857d9692729e503657d0d0f44f1f6252ec90d49a..5b4ff66015f5f94a5bd89e4dc3d2d53553cc091e 100644 --- a/gcc/doc/invoke.texi +++ b/gcc/doc/invoke.texi @@ -568,7 +568,7 @@ Objective-C and Objective-C++ Dialects}. -fnon-call-exceptions -fdelete-dead-exceptions -funwind-tables @gol -fasynchronous-unwind-tables @gol -fno-gnu-unique @gol --finhibit-size-directive -fno-common -fno-ident @gol +-finhibit-size-directive -fcommon -fno-ident @gol -fpcc-struct-return -fpic -fPIC -fpie -fPIE -fno-plt @gol -fno-jump-tables @gol -frecord-gcc-switches @gol @@ -14050,35 +14050,27 @@ useful for building programs to run under WINE@. code that is not binary compatible with code generated without that switch. Use it to conform to a non-default application binary interface. -@item -fno-common -@opindex fno-common +@item -fcommon @opindex fcommon +@opindex fno-common @cindex tentative definitions -In C code, this option controls the placement of global variables -defined without an initializer, known as @dfn{tentative definitions} -in the C standard. Tentative definitions are distinct from declarations +In C code, this option controls the placement of global variables +defined without an initializer, known as @dfn{tentative definitions} +in the C standard. Tentative definitions are distinct from declarations of a variable with the @code{extern} keyword, which do not allocate storage. -Unix C compilers have traditionally allocated storage for -uninitialized global variables in a common block. This allows the -linker to resolve all tentative definitions of the same variable +The default is @option{-fno-common}, which specifies that the compiler places +uninitialized global variables in the BSS section of the object file. IMO "uninitialized" is confusing because the variables actually *are* initialized: with zero. It's just that the variables don't have explicit initializers. Dito for "uninitialized" in the --help message. Johann +This inhibits the merging of tentative definitions by the linker so you get a +multiple-definition error if the same variable is accidentally defined in more +than one compilation unit. + +The @option{-fcommon} places uninitialized global variables in a common block. +This allows the linker to resolve all tentative definitions of the same variable in different compilation units to the same object, or to a non-tentative -definition. -This is the behavior specified by @option{-fcommon}, and is the default for -GCC on most targets. -On the other hand, this behavior is not required by ISO -C, and on some targets may carry a speed or code size penalty on -variable references. - -The @option{-fno-common} option specifies that the compiler should instead -place uninitialized global variables in the BSS section of the object file. -This inhibits the merging of tentative definitions by the linker so -you get a multiple-definition error if the same -variable is defined in more than one compilation unit. -Compiling with @option{-fno-common} is useful on targets for which -it provides better performance, or if you wish to verify that the -program will work on other systems that always treat uninitialized -variable definitions this way. +definition. This behavior does not conform to ISO C, is inconsistent with C++, +and on many targets implies a speed and code size penalty on global variable +references. It is mainly useful to enable legacy code to link without errors. @item -fno-ident @opindex fno-ident
[patch,avr,committed] Remove an unused function (PR85969)
Applied as obvious Johann PR target/85969 * config/avr/gen-avr-mmcu-specs.c (str_prefix_p): Remove unused static function. --- trunk/gcc/config/avr/gen-avr-mmcu-specs.c 2019/10/25 14:39:06 277454 +++ trunk/gcc/config/avr/gen-avr-mmcu-specs.c 2019/10/25 15:13:23 277455 @@ -50,14 +50,6 @@ #define SPECFILE_USAGE_URL \ "https://gcc.gnu.org/gcc-5/changes.html"; -/* Return true iff STR starts with PREFIX. */ - -static bool -str_prefix_p (const char *str, const char *prefix) -{ - return strncmp (str, prefix, strlen (prefix)) == 0; -} - static const char header[] = "#\n"
[patch][avr] PR92055: Add switches to enable 64-bit [long] double.
Hi, this adds the possibility to enable IEEE compatible double and long double support in avr-gcc. It supports 2 configure options --with-double={32|64|32,64|64,32} --with-long-double={32|64|32,64|64,32|double} which select the default layout of these types and also chose which mutlilib variants are built and available. These two config option map to the new compiler options -mdouble= and -mlong-double= which are new multilib options. The patch only deals with option handling and multilib bits, it does not add any double functionality. The double support functions are supposed to be provided by avr-libc which also hosts all the float stuff, including __addsf3 etc. Ok for trunk? Johann gcc/ Support 64-bit double and 64-bit long double configurations. PR target/92055 * config.gcc (tm_defines) [avr]: Set from --with-double=, --with-long-double=. * config/avr/t-multilib: Remove. * config/avr/t-avr: Output of genmultilib.awk is now fully dynamically generated and no more part of the repo. (HAVE_DOUBLE_MULTILIB, HAVE_LONG_DOUBLE_MULTILIB): New variables. Pass them down to... * config/avr/genmultilib.awk: ...here and handle them. * gcc/config/avr/avr.opt (-mdouble=, avr_double). New option and var. (-mlong-double=, avr_long_double). New option and var. * common/config/avr/avr-common.c (opts.h): Include. (diagnostic.h): Include. (TARGET_OPTION_OPTIMIZATION_TABLE) <-mdouble=>: Set default as requested by --with-double=. <-mlong-double=>: Set default as requested by --with-long-double=. (TARGET_OPTION_OPTIMIZATION_TABLE) <-mdouble=, -mlong-double=>: Set default as requested by --with-double= (TARGET_HANDLE_OPTION): Define to this... (avr_handle_option): ...new hook worker. * config/avr/avr.h (DOUBLE_TYPE_SIZE): Define to avr_double. (LONG_DOUBLE_TYPE_SIZE): Define to avr_long_double. (avr_double_lib): New proto for spec function. (EXTRA_SPEC_FUNCTIONS): Add. (DRIVER_SELF_SPECS): Call %:double-lib. * config/avr/avr.c (avr_option_override): Assert sizeof(long double) >= sizeof(double) for the target. * config/avr/avr-c.c (avr_cpu_cpp_builtins) [__HAVE_DOUBLE_MULTILIB__, __HAVE_LONG_DOUBLE_MULTILIB__] [__HAVE_DOUBLE64__, __HAVE_DOUBLE32__, __DEFAULT_DOUBLE__=] [__HAVE_LONG_DOUBLE64__, __HAVE_LONG_DOUBLE32__] [__HAVE_LONG_DOUBLE_IS_DOUBLE__, __DEFAULT_LONG_DOUBLE__=]: New built-in defined depending on --with-double=, --with-long-double=. * config/avr/driver-avr.c (avr_double_lib): New spec function. * doc/invoke.tex (AVR Options) <-mdouble=,-mlong-double=>: Doc. libgcc/ Support 64-bit double and 64-bit long double configurations. PR target/92055 * config/avr/t-avr (HOST_LIBGCC2_CFLAGS): Only add -DF=SF if long double is a 32-bit type. * config/avr/t-avrlibc: Copy double64 and long-double64 multilib(s) from the vanilla one. * config/avr/t-copy-libgcc: New Makefile snip. Index: gcc/common/config/avr/avr-common.c === --- gcc/common/config/avr/avr-common.c (revision 277236) +++ gcc/common/config/avr/avr-common.c (working copy) @@ -23,6 +23,8 @@ #include "tm.h" #include "common/common-target.h" #include "common/common-target-def.h" +#include "opts.h" +#include "diagnostic.h" /* Implement TARGET_OPTION_OPTIMIZATION_TABLE. */ static const struct default_options avr_option_optimization_table[] = @@ -43,9 +45,97 @@ static const struct default_options avr_ performance decrease. For the AVR though, disallowing data races introduces additional code in LIM and increases reg pressure. */ { OPT_LEVELS_ALL, OPT_fallow_store_data_races, NULL, 1 }, + +#if defined (WITH_DOUBLE64) +{ OPT_LEVELS_ALL, OPT_mdouble_, NULL, 64 }, +#elif defined (WITH_DOUBLE32) +{ OPT_LEVELS_ALL, OPT_mdouble_, NULL, 32 }, +#else +#error "align this with config.gcc" +#endif + +#if defined (WITH_LONG_DOUBLE64) +{ OPT_LEVELS_ALL, OPT_mlong_double_, NULL, 64 }, +#elif defined (WITH_LONG_DOUBLE32) +{ OPT_LEVELS_ALL, OPT_mlong_double_, NULL, 32 }, +#else +#error "align this with config.gcc" +#endif + { OPT_LEVELS_NONE, 0, NULL, 0 } }; + +/* Implement `TARGET_HANDLE_OPTION'. */ + +static bool +avr_handle_option (struct gcc_options *opts, struct gcc_options*, + const struct cl_decoded_option *decoded, location_t loc) +{ + int value = decoded->value; + + switch (decoded->opt_index) +{ +case OPT_mdouble_: + if (value == 64) +{ +#if !defined (HAVE_DOUBLE64) + error_at (loc, "option %<-mdouble=64%> is only available if " +"configured %<--with-double={64|64,32|32,64}%>"); +#endif + opts->x_avr_long_double = 6
Re: [PATCH] Support multiple registers for the frame pointer
Am 04.11.19 um 16:22 schrieb Vladimir Makarov: On 2019-11-02 1:28 p.m., Kwok Cheung Yeung wrote: The AMD GCN architecture uses 64-bit pointers, but the scalar registers are 32-bit wide, so pointers must reside in a pair of registers. The two hard registers holding the frame pointer are currently fixed, but if they are changed to unfixed (so that the FP can be eliminated), GCC would sometimes allocate the second register to a pseudo while the frame pointer was in use, clobbering the value of the FP and crashing the program. GCC currently does not handle multi-register hard frame pointers properly - no_unit_alloc_regs, regs_ever_live, eliminable_regset and ira_no_alloc_regs (which gets copied to lra_no_alloc_regs) are only set for HARD_FRAME_POINTER_REGNUM and not for any subsequent registers that may be used, which means that the register allocators consider HARD_FRAME_POINTER_REGNUM+1 free. This patch determines the number of registers needed to store the frame pointer using hard_regno_nregs, and sets the required variables for HARD_FRAME_POINTER_REGNUM and however many adjacent registers are needed (which on most architectures should be zero). Bootstrapped on x86_64 and tested with no regressions, which is not surprising as nothing different happens when the FP fits into a single register. I believe this is true for the 64-bit variants of the more popular architectures as well (ARM, RS6000, MIPS, Sparc). Are there any other architectures similar to GCN (i.e. 64-bit pointers with 32-bit GPRs)? I have not included any specific testcases for this issue as it can affect pretty much everything not using -fomit-frame-pointer on AMD GCN. Okay for trunk? Yes. You can commit the patch to the trunk. Thank you. The avr port already uses 2 hard-reg frame pointer ever since... Does this patch has an impact on the avr port and its handling of the frame pointer? Johann
Re: [PATCH] Support multiple registers for the frame pointer
Kwok Cheung Yeung schrieb: The AMD GCN architecture uses 64-bit pointers, but the scalar registers are 32-bit wide, so pointers must reside in a pair of registers. The two hard registers holding the frame pointer are currently fixed, but if they are changed to unfixed (so that the FP can be eliminated), GCC would sometimes allocate the second register to a pseudo while the frame pointer was in use, clobbering the value of the FP and crashing the program. GCC currently does not handle multi-register hard frame pointers properly - no_unit_alloc_regs, regs_ever_live, eliminable_regset and ira_no_alloc_regs (which gets copied to lra_no_alloc_regs) are only set for HARD_FRAME_POINTER_REGNUM and not for any subsequent registers that may be used, which means that the register allocators consider HARD_FRAME_POINTER_REGNUM+1 free. This patch determines the number of registers needed to store the frame pointer using hard_regno_nregs, and sets the required variables for HARD_FRAME_POINTER_REGNUM and however many adjacent registers are needed (which on most architectures should be zero). Bootstrapped on x86_64 and tested with no regressions, which is not surprising as nothing different happens when the FP fits into a single register. I believe this is true for the 64-bit variants of the more popular architectures as well (ARM, RS6000, MIPS, Sparc). Are there any other architectures similar to GCN (i.e. 64-bit pointers with 32-bit GPRs)? If 16-bit pointers with 8-bit GPRs is similar enough: The avr port. Johann I have not included any specific testcases for this issue as it can affect pretty much everything not using -fomit-frame-pointer on AMD GCN. Okay for trunk? Kwok Yeung
Ping^1 [patch][avr] PR92055: Add switches to enable 64-bit [long] double.
Ping #1 Am 31.10.19 um 22:55 schrieb Georg-Johann Lay: Hi, this adds the possibility to enable IEEE compatible double and long double support in avr-gcc. It supports 2 configure options --with-double={32|64|32,64|64,32} --with-long-double={32|64|32,64|64,32|double} which select the default layout of these types and also chose which mutlilib variants are built and available. These two config option map to the new compiler options -mdouble= and -mlong-double= which are new multilib options. The patch only deals with option handling and multilib bits, it does not add any double functionality. The double support functions are supposed to be provided by avr-libc which also hosts all the float stuff, including __addsf3 etc. Ok for trunk? Johann gcc/ Support 64-bit double and 64-bit long double configurations. PR target/92055 * config.gcc (tm_defines) [avr]: Set from --with-double=, --with-long-double=. * config/avr/t-multilib: Remove. * config/avr/t-avr: Output of genmultilib.awk is now fully dynamically generated and no more part of the repo. (HAVE_DOUBLE_MULTILIB, HAVE_LONG_DOUBLE_MULTILIB): New variables. Pass them down to... * config/avr/genmultilib.awk: ...here and handle them. * gcc/config/avr/avr.opt (-mdouble=, avr_double). New option and var. (-mlong-double=, avr_long_double). New option and var. * common/config/avr/avr-common.c (opts.h): Include. (diagnostic.h): Include. (TARGET_OPTION_OPTIMIZATION_TABLE) <-mdouble=>: Set default as requested by --with-double=. <-mlong-double=>: Set default as requested by --with-long-double=. (TARGET_OPTION_OPTIMIZATION_TABLE) <-mdouble=, -mlong-double=>: Set default as requested by --with-double= (TARGET_HANDLE_OPTION): Define to this... (avr_handle_option): ...new hook worker. * config/avr/avr.h (DOUBLE_TYPE_SIZE): Define to avr_double. (LONG_DOUBLE_TYPE_SIZE): Define to avr_long_double. (avr_double_lib): New proto for spec function. (EXTRA_SPEC_FUNCTIONS) <double-lib>: Add. (DRIVER_SELF_SPECS): Call %:double-lib. * config/avr/avr.c (avr_option_override): Assert sizeof(long double) >= sizeof(double) for the target. * config/avr/avr-c.c (avr_cpu_cpp_builtins) [__HAVE_DOUBLE_MULTILIB__, __HAVE_LONG_DOUBLE_MULTILIB__] [__HAVE_DOUBLE64__, __HAVE_DOUBLE32__, __DEFAULT_DOUBLE__=] [__HAVE_LONG_DOUBLE64__, __HAVE_LONG_DOUBLE32__] [__HAVE_LONG_DOUBLE_IS_DOUBLE__, __DEFAULT_LONG_DOUBLE__=]: New built-in defined depending on --with-double=, --with-long-double=. * config/avr/driver-avr.c (avr_double_lib): New spec function. * doc/invoke.tex (AVR Options) <-mdouble=,-mlong-double=>: Doc. libgcc/ Support 64-bit double and 64-bit long double configurations. PR target/92055 * config/avr/t-avr (HOST_LIBGCC2_CFLAGS): Only add -DF=SF if long double is a 32-bit type. * config/avr/t-avrlibc: Copy double64 and long-double64 multilib(s) from the vanilla one. * config/avr/t-copy-libgcc: New Makefile snip.
Re: GCC wwwdocs move to git done
Am 09.10.19 um 02:27 schrieb Joseph Myers: I've done the move of GCC wwwdocs to git (using the previously posted and discussed scripts), including setting up the post-receive hook to do the same things previously covered by the old CVS hooks, and minimal updates to the web pages dealing with the CVS setup for wwwdocs. Hi, May it be the case that some parts are missing? In particular, I cannot find the source of https://gcc.gnu.org/install/configure.html Johann
Re: GCC wwwdocs move to git done
Am 06.11.19 um 15:03 schrieb Georg-Johann Lay: Am 09.10.19 um 02:27 schrieb Joseph Myers: I've done the move of GCC wwwdocs to git (using the previously posted and discussed scripts), including setting up the post-receive hook to do the same things previously covered by the old CVS hooks, and minimal updates to the web pages dealing with the CVS setup for wwwdocs. Hi, May it be the case that some parts are missing? In particular, I cannot find the source of https://gcc.gnu.org/install/configure.html Johann Ok, found it in install/README. knew it had something special about it... Johann
Re: Ping^1 [patch][avr] PR92055: Add switches to enable 64-bit [long] double.
Am 06.11.19 um 11:39 schrieb Georg-Johann Lay: Ping #1 Am 31.10.19 um 22:55 schrieb Georg-Johann Lay: Hi, this adds the possibility to enable IEEE compatible double and long double support in avr-gcc. It supports 2 configure options --with-double={32|64|32,64|64,32} --with-long-double={32|64|32,64|64,32|double} which select the default layout of these types and also chose which mutlilib variants are built and available. These two config option map to the new compiler options -mdouble= and -mlong-double= which are new multilib options. The patch only deals with option handling and multilib bits, it does not add any double functionality. The double support functions are supposed to be provided by avr-libc which also hosts all the float stuff, including __addsf3 etc. Ok for trunk? Johann ..and here is the addendum that documents the new configure options. Index: gcc/doc/install.texi === --- gcc/doc/install.texi(revision 277236) +++ gcc/doc/install.texi(working copy) @@ -2277,15 +2277,45 @@ omitted from @file{libgcc.a} on the assu @samp{newlib}. @item --with-avrlibc -Specifies that @samp{AVR-Libc} is -being used as the target C library. This causes float support +Only supported for the AVR target. Specifies that @samp{AVR-Libc} is +being used as the target C@tie{} library. This causes float support functions like @code{__addsf3} to be omitted from @file{libgcc.a} on the assumption that it will be provided by @file{libm.a}. For more technical details, cf. @uref{http://gcc.gnu.org/PR54461,,PR54461}. -This option is only supported for the AVR target. It is not supported for +It is not supported for RTEMS configurations, which currently use newlib. The option is supported since version 4.7.2 and is the default in 4.8.0 and newer. +@item --with-double=@{32|64|32,64|64,32@} +@itemx --with-long-double=@{32|64|32,64|64,32|double@} +Only supported for the AVR target since version@tie{}10. +Specify the default layout available for the C/C++ @samp{double} +and @samp{long double} type, respectively. The following rules apply: +@itemize +@item +The first value after the @samp{=} specifies the default layout (in bits) +of the type and also the default for the @option{-mdouble=} resp. +@option{-mlong-double=} compiler option. +@item +If more than one value is specified, respective multilib variants are +available, and @option{-mdouble=} resp. @option{-mlong-double=} acts +as a multilib option. +@item +If @option{--with-long-double=double} is specified, @samp{double} and +@samp{long double} will have the same layout. +@item +If the configure option is not set, it defaults to @samp{32} which +is compatible with older versions of the compiler that use non-standard +32-bit types for @samp{double} and @samp{long double}. +@end itemize +Not all combinations of @option{--with-double=} and +@option{--with-long-double=} are valid. For example, the combination +@option{--with-double=32,64} @option{--with-long-double=32} will be +rejected because the first option specifies the availability of +multilibs for @samp{double}, whereas the second option implies +that @samp{long double} --- and hence also @samp{double} --- is always +32@tie{}bits wide. + @item --with-nds32-lib=@var{library} Specifies that @var{library} setting is used for building @file{libgcc.a}. Currently, the valid @var{library} is @samp{newlib} or @samp{mculib}.
Re: [patch][avr] PR92055: Add switches to enable 64-bit [long] double.
Am 07.11.19 um 10:41 schrieb Martin Liška: Hello. I've noticed quite some GNU coding style violations with your patch. Please next time, use something like: $ git diff HEAD~ > /tmp/patch && ./contrib/check_GNU_style.py /tmp/patch Thanks, Martin hm, I am actually using GNU style with Emacs... You mean the lines > 80 chars in config.gcc? I assumed that is no issue because there are already quite some lines that don't follow the < 80 rule. Johann
Re: [patch][avr] PR92055: Add switches to enable 64-bit [long] double.
Am 07.11.19 um 13:49 schrieb Martin Liška: On 11/7/19 1:39 PM, Georg-Johann Lay wrote: Am 07.11.19 um 10:41 schrieb Martin Liška: Hello. I've noticed quite some GNU coding style violations with your patch. Please next time, use something like: $ git diff HEAD~ > /tmp/patch && ./contrib/check_GNU_style.py /tmp/patch Thanks, Martin hm, I am actually using GNU style with Emacs... You mean the lines > 80 chars in config.gcc? I assumed that is no issue because there are already quite some lines that don't follow the < 80 rule. That's fine. I'm mainly talking about: === ERROR type #1: blocks of 8 spaces should be replaced with tabs (45 error(s)) === gcc/common/config/avr/avr-common.c:78:0: const struct cl_decoded_option *decoded, location_t loc) gcc/common/config/avr/avr-common.c:86:0:{ gcc/common/config/avr/avr-common.c:88:0: error_at (loc, "option %<-mdouble=64%> is only available if " gcc/common/config/avr/avr-common.c:89:0: "configured %<--with-double={64|64,32|32,64}%>"); gcc/common/config/avr/avr-common.c:91:0: opts->x_avr_long_double = 64; gcc/common/config/avr/avr-common.c:92:0:} gcc/common/config/avr/avr-common.c:94:0:{ ... Martin My intention was to avoid a mixup of TABs and spaces mode, because the avr backend is indented with spaces. So the indentation picks up the style from the context (just like ypi would do it in Python to avoid dreaded mixing of tabs ans spaces). Tabyfying the complete sources is also something which I didn't consider, because that makes porting much harder... Johann
[patch,avr] Add suport for devices from the 0-series.
Hi, this patch adds support for a few more AVR devices. Because the offset where flash is seen in RAM deviates from the settings for the family (and hence also from the linker script defaults), a new field in avr_mcu_t is needed to express this so that specs can be generated appropriately. The AVR_MCU lines in avr-mcus.def are longer than 80 chars because it's easier to maintain 1 device = 1 line entries. And it's easier to scan them with the awk scripts. Ok for trunk? Johann Add support for AVR devices from the 0-series. * config/avr/avr-arch.h (avr_mcu_t) : New field. * config/avr/avr-devices.c (avr_mcu_types): Adjust initializers. * config/avr/avr-mcus.def (AVR_MCU): Add respective field. * config/avr/specs.h (LINK_SPEC) <%(link_pm_base_address)>: Add. * config/avr/gen-avr-mmcu-specs.c (print_mcu) <*cpp, *cpp_mcu, *cpp_avrlibc, *link_pm_base_address>: Emit code for spec definitions. * doc/avr-mmcu.texi: Regenerate. Index: config/avr/avr-arch.h === --- config/avr/avr-arch.h (revision 277953) +++ config/avr/avr-arch.h (working copy) @@ -126,6 +126,9 @@ const char *const macro; /* Flash size in bytes. */ int flash_size; + + /* Offset where flash is seen in the RAM address space. */ + int flash_pm_offset; } avr_mcu_t; /* AVR device specific features. Index: config/avr/avr-devices.c === --- config/avr/avr-devices.c (revision 277953) +++ config/avr/avr-devices.c (working copy) @@ -117,12 +117,12 @@ avr_texinfo[] = const avr_mcu_t avr_mcu_types[] = { -#define AVR_MCU(NAME, ARCH, DEV_ATTRIBUTE, MACRO, DATA_SEC, TEXT_SEC, FLASH_SIZE)\ - { NAME, ARCH, DEV_ATTRIBUTE, MACRO, DATA_SEC, TEXT_SEC, FLASH_SIZE }, +#define AVR_MCU(NAME, ARCH, DEV_ATTRIBUTE, MACRO, DATA_SEC, TEXT_SEC, FLASH_SIZE, PMOFF) \ + { NAME, ARCH, DEV_ATTRIBUTE, MACRO, DATA_SEC, TEXT_SEC, FLASH_SIZE, PMOFF }, #include "avr-mcus.def" #undef AVR_MCU /* End of list. */ - { NULL, ARCH_UNKNOWN, AVR_ISA_NONE, NULL, 0, 0, 0 } + { NULL, ARCH_UNKNOWN, AVR_ISA_NONE, NULL, 0, 0, 0, 0 } }; Index: config/avr/avr-mcus.def === --- config/avr/avr-mcus.def (revision 277953) +++ config/avr/avr-mcus.def (working copy) @@ -61,313 +61,327 @@ supply respective built-in macro. FLASH_SIZEFlash size in bytes. + RODATA_PM_OFFSET + Either 0x0 or the offset where flash memory is mirrored + into the RAM address space accessible by LD and LDS. + This is only needed if that value deviates from the + value for the respective family. + "avr2" must be first for the "0" default to work as intended. */ /* Classic, <= 8K. */ -AVR_MCU ("avr2", ARCH_AVR2, AVR_ERRATA_SKIP, NULL, 0x0060, 0x0, 0x6) +AVR_MCU ("avr2", ARCH_AVR2, AVR_ERRATA_SKIP, NULL, 0x0060, 0x0, 0x6, 0) -AVR_MCU ("at90s2313",ARCH_AVR2, AVR_SHORT_SP, "__AVR_AT90S2313__", 0x0060, 0x0, 0x800) -AVR_MCU ("at90s2323",ARCH_AVR2, AVR_SHORT_SP, "__AVR_AT90S2323__", 0x0060, 0x0, 0x800) -AVR_MCU ("at90s2333",ARCH_AVR2, AVR_SHORT_SP, "__AVR_AT90S2333__", 0x0060, 0x0, 0x800) -AVR_MCU ("at90s2343",ARCH_AVR2, AVR_SHORT_SP, "__AVR_AT90S2343__", 0x0060, 0x0, 0x800) -AVR_MCU ("attiny22", ARCH_AVR2, AVR_SHORT_SP, "__AVR_ATtiny22__", 0x0060, 0x0, 0x800) -AVR_MCU ("attiny26", ARCH_AVR2, AVR_SHORT_SP, "__AVR_ATtiny26__", 0x0060, 0x0, 0x800) -AVR_MCU ("at90s4414",ARCH_AVR2, AVR_ISA_NONE, "__AVR_AT90S4414__", 0x0060, 0x0, 0x1000) -AVR_MCU ("at90s4433",ARCH_AVR2, AVR_SHORT_SP, "__AVR_AT90S4433__", 0x0060, 0x0, 0x1000) -AVR_MCU ("at90s4434",ARCH_AVR2, AVR_ISA_NONE, "__AVR_AT90S4434__", 0x0060, 0x0, 0x1000) -AVR_MCU ("at90s8515",ARCH_AVR2, AVR_ERRATA_SKIP, "__AVR_AT90S8515__", 0x0060, 0x0, 0x2000) -AVR_MCU ("at90c8534",ARCH_AVR2, AVR_ISA_NONE, "__AVR_AT90C8534__", 0x0060, 0x0, 0x2000) -AVR_MCU ("at90s8535",ARCH_AVR2, AVR_ISA_NONE, "__AVR_AT90S8535__", 0x0060, 0x0, 0x2000) +AVR_MCU ("at90s2313",ARCH_AVR2, AVR_SHORT_SP, "__AVR_AT90S2313__", 0x0060, 0x0, 0x800, 0) +AVR_MCU ("at90s2323",ARCH_AVR2, AVR_SHORT_SP, "__AVR_AT90S2323__", 0x0060, 0x0, 0x800, 0) +AVR_MCU ("at90s2333",ARCH_AVR2, AVR_SHORT_SP, "__AVR_AT90S2333__", 0x0060, 0x0, 0x800, 0) +AVR_MCU ("at90s2343",ARCH_AVR2, AVR_SHORT_SP, "__AVR_AT90S2343__", 0x0060, 0x0, 0x800, 0) +AVR_MCU ("attiny22", ARCH_AVR2, AVR_SHORT_SP, "__AVR_ATtiny22__", 0x0060, 0x0, 0x800, 0) +AVR_MCU ("attiny26", ARCH_AVR2, AVR_SHORT_SP, "__AVR_ATtiny26__", 0x0060, 0x0, 0x800, 0) +AVR_MCU ("at90s4414",
Ping^1: [patch,avr] Add suport for devices from the 0-series.
Ping ? Am 08.11.19 um 17:19 schrieb Georg-Johann Lay: Hi, this patch adds support for a few more AVR devices. Because the offset where flash is seen in RAM deviates from the settings for the family (and hence also from the linker script defaults), a new field in avr_mcu_t is needed to express this so that specs can be generated appropriately. The AVR_MCU lines in avr-mcus.def are longer than 80 chars because it's easier to maintain 1 device = 1 line entries. And it's easier to scan them with the awk scripts. Ok for trunk? Johann Add support for AVR devices from the 0-series. * config/avr/avr-arch.h (avr_mcu_t) : New field. * config/avr/avr-devices.c (avr_mcu_types): Adjust initializers. * config/avr/avr-mcus.def (AVR_MCU): Add respective field. * config/avr/specs.h (LINK_SPEC) <%(link_pm_base_address)>: Add. * config/avr/gen-avr-mmcu-specs.c (print_mcu) <*cpp, *cpp_mcu, *cpp_avrlibc, *link_pm_base_address>: Emit code for spec definitions. * doc/avr-mmcu.texi: Regenerate.
[wwwdocs] Add AVR news.
Added the following change to the v10 changes site. Johann diff --git a/htdocs/gcc-10/changes.html b/htdocs/gcc-10/changes.html index d6108269..7d96bc66 100644 --- a/htdocs/gcc-10/changes.html +++ b/htdocs/gcc-10/changes.html @@ -334,7 +334,54 @@ a work-in-progress. arm-uclinuxfdpiceabi, and the C library is uclibc-ng. - + +AVR + + Support for the XMEGA-like devices + + ATtiny202, ATtiny204, ATtiny402, ATtiny404, ATtiny406, ATtiny804, + ATtiny806, ATtiny807, ATtiny1604, ATtiny1606, ATtiny1607, ATmega808, + ATmega809, ATmega1608, ATmega1609, ATmega3208, ATmega3209, + ATmega4808, ATmega4809 + +has been added. + + +A new command line option -nodevicespecs has been added. +It allows to provide a custom device-specs file by means of + + avr-gcc -nodevicespecs -specs=my-spec-file+ +and without the need to provide options -B and +-mmcu=. +See +href="https://gcc.gnu.org/onlinedocs/gcc/AVR-Options.html#index-nodevicespecs";>AVR + command line options for details. +This feature is also available in v9.3+ and v8.4+. + + +New command line options -mdouble=[32,64] and +-mlong-double=[32,64] have been added. They allow +to chose the size (in bits) of the double and +long double types, respectively. Whether or not the +mentioned layouts are available, whether the options act +as a multilib option, and what is the default for either option +is controlled by the new +https://gcc.gnu.org/install/configure.html#avr";>AVR configure + options +--with-double= and --with-long-double=. + + +A new configure option --with-libf7= has been added. +It controls to which level avr-libgcc provides 64-bit floating point +support by means of +https://gcc.gnu.org/wiki/avr-gcc#LibF7";>LibF7. + + +A new configure option --with-double-comparison= has been +added. It's unlikely you need to set this option by hand. + +
[patch,avr,applied]: Simplify asm macro skip.
Applied the following trivial and obvious patch to the avr back. Johann libgcc/ * config/avr/lib1funcs.S (skip): Simplify. diff --git a/libgcc/config/avr/lib1funcs.S b/libgcc/config/avr/lib1funcs.S index 8ebdc01c88c..2ffa2090b25 100644 --- a/libgcc/config/avr/lib1funcs.S +++ b/libgcc/config/avr/lib1funcs.S @@ -169,11 +169,7 @@ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see .endm ;; Skip next instruction, typically a jump target -#if defined(__AVR_TINY__) -#define skip cpse 0,0 -#else #define skip cpse 16,16 -#endif ;; Negate a 2-byte value held in consecutive registers .macro NEG2 reg
[avr,applied]: Remove a no more existing file from contrib/gcc_update::files_and_dependencies.
Hi, gcc/config/avr/t-multilib does no more exist, hence removed from the files to touch. Applied addendum to PR92055 (which removed that file) as obvious. Johann The mentioned auto-generated file is no more part of the GCC sources, it's auto-generated in $(builddir) during build. PR target/92055 * contrib/gcc_update (files_and_dependencies): Remove entry for gcc/config/avr/t-multilib. diff --git a/contrib/gcc_update b/contrib/gcc_update index c04b5dfb0a3..5df3297f7f8 100755 --- a/contrib/gcc_update +++ b/contrib/gcc_update @@ -82,7 +82,6 @@ gcc/fixinc/fixincl.x: gcc/fixinc/fixincl.tpl gcc/fixinc/inclhack.def gcc/config/aarch64/aarch64-tune.md: gcc/config/aarch64/aarch64-cores.def gcc/config/aarch64/gentune.sh gcc/config/arm/arm-tune.md: gcc/config/arm/arm-cpus.in gcc/config/arm/parsecpu.awk gcc/config/arm/arm-tables.opt: gcc/config/arm/arm-cpus.in gcc/config/arm/parsecpu.awk -gcc/config/avr/t-multilib: gcc/config/avr/avr-mcus.def gcc/config/avr/genmultilib.awk gcc/config/c6x/c6x-tables.opt: gcc/config/c6x/c6x-isas.def gcc/config/c6x/genopt.sh gcc/config/c6x/c6x-sched.md: gcc/config/c6x/c6x-sched.md.in gcc/config/c6x/gensched.sh gcc/config/c6x/c6x-mult.md: gcc/config/c6x/c6x-mult.md.in gcc/config/c6x/genmult.sh
[avr,committed] Add support for some avrxmega3 devices.
Applied this patchlet to add support for: ATtiny1604, ATtiny1606, ATtiny1607, ATtiny402, ATtiny404, ATtiny406, ATtiny804, ATtiny806, ATtiny807, ATtiny202, ATtiny204. Johann Add support for some more AVR devices from avrxmega3 family. * config/avr/avr-mcus.def (attiny1604, attiny1606, attiny1607) (attiny402, attiny404, attiny406) (attiny804, attiny806, attiny807) (attiny202, attiny204): Add AVR_MCU lines to support them. * doc/avr-mmcu.texi: Regenerate. Index: config/avr/avr-mcus.def === --- config/avr/avr-mcus.def (revision 279308) +++ config/avr/avr-mcus.def (revision 279309) @@ -307,6 +307,17 @@ AVR_MCU ("atxmega32c4", ARCH_AVRXME AVR_MCU ("atxmega32e5", ARCH_AVRXMEGA2, AVR_ISA_NONE, "__AVR_ATxmega32E5__", 0x2000, 0x0, 0x9000, 0) /* Xmega, Flash + RAM < 64K, flash visible in RAM address space */ AVR_MCU ("avrxmega3",ARCH_AVRXMEGA3, AVR_ISA_NONE, NULL, 0x3f00, 0x0, 0x8000, 0) +AVR_MCU ("attiny202",ARCH_AVRXMEGA3, AVR_ISA_RCALL, "__AVR_ATtiny202__", 0x3f80, 0x0, 0x800, 0x8000) +AVR_MCU ("attiny204",ARCH_AVRXMEGA3, AVR_ISA_RCALL, "__AVR_ATtiny204__", 0x3f80, 0x0, 0x800, 0x8000) +AVR_MCU ("attiny402",ARCH_AVRXMEGA3, AVR_ISA_RCALL, "__AVR_ATtiny402__", 0x3f00, 0x0, 0x1000, 0x8000) +AVR_MCU ("attiny404",ARCH_AVRXMEGA3, AVR_ISA_RCALL, "__AVR_ATtiny404__", 0x3f00, 0x0, 0x1000, 0x8000) +AVR_MCU ("attiny406",ARCH_AVRXMEGA3, AVR_ISA_RCALL, "__AVR_ATtiny406__", 0x3f00, 0x0, 0x1000, 0x8000) +AVR_MCU ("attiny804",ARCH_AVRXMEGA3, AVR_ISA_RCALL, "__AVR_ATtiny804__", 0x3e00, 0x0, 0x2000, 0x8000) +AVR_MCU ("attiny806",ARCH_AVRXMEGA3, AVR_ISA_RCALL, "__AVR_ATtiny806__", 0x3e00, 0x0, 0x2000, 0x8000) +AVR_MCU ("attiny807",ARCH_AVRXMEGA3, AVR_ISA_RCALL, "__AVR_ATtiny807__", 0x3e00, 0x0, 0x2000, 0x8000) +AVR_MCU ("attiny1604", ARCH_AVRXMEGA3, AVR_ISA_NONE, "__AVR_ATtiny1604__", 0x3c00, 0x0, 0x4000, 0x8000) +AVR_MCU ("attiny1606", ARCH_AVRXMEGA3, AVR_ISA_NONE, "__AVR_ATtiny1606__", 0x3c00, 0x0, 0x4000, 0x8000) +AVR_MCU ("attiny1607", ARCH_AVRXMEGA3, AVR_ISA_NONE, "__AVR_ATtiny1607__", 0x3c00, 0x0, 0x4000, 0x8000) AVR_MCU ("attiny212",ARCH_AVRXMEGA3, AVR_ISA_RCALL, "__AVR_ATtiny212__", 0x3f80, 0x0, 0x800, 0x8000) AVR_MCU ("attiny214",ARCH_AVRXMEGA3, AVR_ISA_RCALL, "__AVR_ATtiny214__", 0x3f80, 0x0, 0x800, 0x8000) AVR_MCU ("attiny412",ARCH_AVRXMEGA3, AVR_ISA_RCALL, "__AVR_ATtiny412__", 0x3f00, 0x0, 0x1000, 0x8000) Index: doc/avr-mmcu.texi === --- doc/avr-mmcu.texi (revision 279308) +++ doc/avr-mmcu.texi (revision 279309) @@ -54,7 +54,7 @@ @item avrxmega3 ``XMEGA'' devices with up to 64@tie{}KiB of combined program memory and RAM, and with program memory visible in the RAM address space. -@*@var{mcu}@tie{}= @code{attiny212}, @code{attiny214}, @code{attiny412}, @code{attiny414}, @code{attiny416}, @code{attiny417}, @code{attiny814}, @code{attiny816}, @code{attiny817}, @code{attiny1614}, @code{attiny1616}, @code{attiny1617}, @code{attiny3214}, @code{attiny3216}, @code{attiny3217}, @code{atmega808}, @code{atmega809}, @code{atmega1608}, @code{atmega1609}, @code{atmega3208}, @code{atmega3209}, @code{atmega4808}, @code{atmega4809}. +@*@var{mcu}@tie{}= @code{attiny202}, @code{attiny204}, @code{attiny212}, @code{attiny214}, @code{attiny402}, @code{attiny404}, @code{attiny406}, @code{attiny412}, @code{attiny414}, @code{attiny416}, @code{attiny417}, @code{attiny804}, @code{attiny806}, @code{attiny807}, @code{attiny814}, @code{attiny816}, @code{attiny817}, @code{attiny1604}, @code{attiny1606}, @code{attiny1607}, @code{attiny1614}, @code{attiny1616}, @code{attiny1617}, @code{attiny3214}, @code{attiny3216}, @code{attiny3217}, @code{atmega808}, @code{atmega809}, @code{atmega1608}, @code{atmega1609}, @code{atmega3208}, @code{atmega3209}, @code{atmega4808}, @code{atmega4809}. @item avrxmega4 ``XMEGA'' devices with more than 64@tie{}KiB and up to 128@tie{}KiB of program memory.
[patch,avr, 0/3] Support 64-bit (long) double.
Now that the avr backend can support 64-bit floats by means of configure-options --with-double= and --with-long-double=, this patch series adds some routines to support it. It's an ad-hoc, avr-specific implementation in assembly and GNU-C which is added as a new subfolder in libgcc/config/avr/libf7. Patch 1/3 is the GCC changes: Documentation and new avr-specific configure options: --with-libf7 selects to which level double support from libf7 is added to libgcc. --with-double-comparison select what FLOAT_LIB_COMPARE_RETURNS_BOOL returns. I wrote the libf7 code from scratch and put it under GPL v3 + library exception, so it should be no problem to have it as part of libgcc. Patch 2/3 is the libgcc additions: --with-libf7 selects which makefile-snips from libf7 to use. Patch 3/3 is the actual libf7 implementation. A great deal of which is assembly, together with C + inline assembly for higher routines. Ok for trunk? Johann
[patch,avr, 1/3] Support 64-bit (long) double: The gcc part.
Am 16.12.19 um 17:40 schrieb Georg-Johann Lay: Patch 1/3 is the GCC changes: Documentation and new avr-specific configure options: --with-libf7 selects to which level double support from libf7 is added to libgcc. --with-double-comparison select what FLOAT_LIB_COMPARE_RETURNS_BOOL returns. Johann gcc/ * config.gcc (tm_defines) [target=avr]: Support --with-libf7, --with-double-comparison. * doc/install.texi: Document them. * config/avr/avr-c.c (avr_cpu_cpp_builtins) : New built-in defines. * doc/invoke.texi (AVR Built-in Macros): Document them. * config/avr/avr-protos.h (avr_float_lib_compare_returns_bool): New. * config/avr/avr.c (avr_float_lib_compare_returns_bool): New function. * config/avr/avr.h (FLOAT_LIB_COMPARE_RETURNS_BOOL): New macro. Index: gcc/config/avr/avr-c.c === --- gcc/config/avr/avr-c.c (revision 278667) +++ gcc/config/avr/avr-c.c (working copy) @@ -390,6 +390,20 @@ start address. This macro shall be used cpp_define (pfile, "__WITH_AVRLIBC__"); #endif /* WITH_AVRLIBC */ + // From configure --with-libf7={|libgcc|math|math-symbols|yes|no} + +#ifdef WITH_LIBF7_LIBGCC + cpp_define (pfile, "__WITH_LIBF7_LIBGCC__"); +#endif /* WITH_LIBF7_LIBGCC */ + +#ifdef WITH_LIBF7_MATH + cpp_define (pfile, "__WITH_LIBF7_MATH__"); +#endif /* WITH_LIBF7_MATH */ + +#ifdef WITH_LIBF7_MATH_SYMBOLS + cpp_define (pfile, "__WITH_LIBF7_MATH_SYMBOLS__"); +#endif /* WITH_LIBF7_MATH_SYMBOLS */ + // From configure --with-double={|32|32,64|64,32|64} #ifdef HAVE_DOUBLE_MULTILIB @@ -438,7 +452,23 @@ start address. This macro shall be used #error "align this with config.gcc" #endif - + // From configure --with-double-comparison={2|3} --with-libf7. + +#if defined (WITH_DOUBLE_COMPARISON) +#if WITH_DOUBLE_COMPARISON == 2 || WITH_DOUBLE_COMPARISON == 3 + /* The number of states a DFmode comparison libcall might take and + reflects what avr.c:FLOAT_LIB_COMPARE_RETURNS_BOOL returns for + DFmode. GCC's default is 3-state, but some libraries like libf7 + implement true / false (2-state). */ + cpp_define_formatted (pfile, "__WITH_DOUBLE_COMPARISON__=%d", + WITH_DOUBLE_COMPARISON); +#else +#error "align this with config.gcc" +#endif +#else +#error "align this with config.gcc" +#endif + /* Define builtin macros so that the user can easily query whether non-generic address spaces (and which) are supported or not. This is only supported for C. For C++, a language extension is needed Index: gcc/config/avr/avr-protos.h === --- gcc/config/avr/avr-protos.h (revision 278667) +++ gcc/config/avr/avr-protos.h (working copy) @@ -128,6 +128,8 @@ extern bool avr_xload_libgcc_p (machine_ extern rtx avr_eval_addr_attrib (rtx x); extern bool avr_casei_sequence_check_operands (rtx *xop); +extern bool avr_float_lib_compare_returns_bool (machine_mode, enum rtx_code); + static inline unsigned regmask (machine_mode mode, unsigned regno) { Index: gcc/config/avr/avr.c === --- gcc/config/avr/avr.c (revision 278667) +++ gcc/config/avr/avr.c (working copy) @@ -14575,6 +14575,23 @@ avr_fold_builtin (tree fndecl, int n_arg return NULL_TREE; } + +/* Worker function for `FLOAT_LIB_COMPARE_RETURNS_BOOL'. */ + +bool +avr_float_lib_compare_returns_bool (machine_mode mode, enum rtx_code) +{ + if (mode == DFmode) +{ +#if WITH_DOUBLE_COMPARISON == 2 + return true; +#endif +} + + // This is the GCC default and also what AVR-LibC implements. + return false; +} + /* Initialize the GCC target structure. */ Index: gcc/config/avr/avr.h === --- gcc/config/avr/avr.h (revision 278667) +++ gcc/config/avr/avr.h (working copy) @@ -107,6 +107,9 @@ These two properties are reflected by bu #define BYTES_BIG_ENDIAN 0 #define WORDS_BIG_ENDIAN 0 +#define FLOAT_LIB_COMPARE_RETURNS_BOOL(mode, comparison) \ + avr_float_lib_compare_returns_bool (mode, comparison) + #ifdef IN_LIBGCC2 /* This is to get correct SI and DI modes in libgcc2.c (32 and 64 bits). */ #define UNITS_PER_WORD 4 Index: gcc/config.gcc === --- gcc/config.gcc (revision 278552) +++ gcc/config.gcc (working copy) @@ -1303,6 +1303,46 @@ avr-*-*) tm_file="${tm_file} ${cpu_type}/avrlibc.h" tm_defines="${tm_defines} WITH_AVRLIBC" fi + # Work out avr_double_comparison which is 2 or 3 and is used in + # target hook FLOAT_LIB_COMPARE_RETURNS_BOOL to determine whether + # DFmode comparisons return 3-state or 2-state results. + case y${with_double_comparison} in + y | ytristate) + avr_double_comparison=3
Re: [patch,avr, 2/3] Support 64-bit (long) double: The libgcc changes.
Am 16.12.19 um 17:40 schrieb Georg-Johann Lay: Patch 2/3 is the libgcc additions: --with-libf7 selects which makefile-snips from libf7 to use. libgcc/ * config.host (tmake_file) [target=avr]: Add t-libf7, t-libf7-math, t-libf7-math-symbols as specified by --with-libf7=. * config/avr/t-avrlibc: Don't copy libgcc.a if there are modules depending on sizeof (double) or sizeof (long double). * config/avr/libf7: New folder. Index: libgcc/config.host === --- libgcc/config.host (revision 278552) +++ libgcc/config.host (working copy) @@ -514,6 +514,29 @@ arm*-*-eabi* | arm*-*-symbianelf* | arm* avr-*-*) # Make HImode functions for AVR tmake_file="${cpu_type}/t-avr t-fpbit" + # Make some DFmode functions from libf7, part of avr-libgcc. + # This must be prior to adding t-avrlibc. + case "y${with_libf7}" in + yno) + # No libf7 support. + ;; + ylibgcc) + tmake_file="$tmake_file ${cpu_type}/libf7/t-libf7" + ;; + ymath) + tmake_file="$tmake_file ${cpu_type}/libf7/t-libf7-math" + tmake_file="$tmake_file ${cpu_type}/libf7/t-libf7" + ;; + ymath-symbols | yyes | y) + tmake_file="$tmake_file ${cpu_type}/libf7/t-libf7-math-symbols" + tmake_file="$tmake_file ${cpu_type}/libf7/t-libf7-math" + tmake_file="$tmake_file ${cpu_type}/libf7/t-libf7" + ;; + *) + echo "Error: --with-libf7=${with_libf7} but can only be used with: 'libgcc', 'math', 'math-symbols', 'yes', 'no'" 1>&2 + exit 1 + ;; + esac if test x${with_avrlibc} != xno; then tmake_file="$tmake_file ${cpu_type}/t-avrlibc" fi Index: libgcc/config/avr/t-avrlibc === --- libgcc/config/avr/t-avrlibc (revision 278992) +++ libgcc/config/avr/t-avrlibc (working copy) @@ -65,6 +65,12 @@ LIB2FUNCS_EXCLUDE += \ _fixunssfdi \ _floatdisf _floatundisf +ifeq (,$(WITH_LIBF7_MATH_SYMBOLS)) + +# No modules depend on __SIZEOF_LONG_DOUBLE__ or __SIZEOF_DOUBLE__ +# which means we might have an opportunity to copy libgcc.a. +# WITH_LIBF7_MATH_SYMBOLS is set by libf7/t-libf7-math-symbols. + ifneq (,$(findstring avr,$(MULTISUBDIR))) # We are not in the avr2 (default) subdir, hence copying will work. @@ -95,3 +101,4 @@ Makefile: t-copy-libgcc.dep endif endif +endif
[patch][avr] PR92606: Disable -fipa-icf-variables because it generates wrong code.
Hi, this patch turns off -fipa-icf-variables because it generates wrong code like for PR92606. As there is no target hook that could decide whether such optimizations are obsolete, disable such optimizations alltogether until PR92932 (target hook to disable such optimizations depending on object attributes and address-spcace) is available. Ok to apply? Johann Work around PR ipa/92932 by disabling -fipa-icf-variables until PR92932 will have been solved. PR ipa/92932 PR target/92606 * common/config/avr/avr-common.c (avr_option_optimization_table) <-fipa-icf-variables>: Disable. Index: common/config/avr/avr-common.c === --- common/config/avr/avr-common.c (revision 279522) +++ common/config/avr/avr-common.c (working copy) @@ -38,6 +38,14 @@ static const struct default_options avr_ { OPT_LEVELS_ALL, OPT_fcaller_saves, NULL, 0 }, { OPT_LEVELS_1_PLUS_NOT_DEBUG, OPT_mgas_isr_prologues, NULL, 1 }, { OPT_LEVELS_1_PLUS, OPT_mmain_is_OS_task, NULL, 1 }, + // FIXME: IPA incorrectly identifies variables in .progmem.data (accessed + // via LPM) with variables in .rodata (accessed via LD, LDD, LDS) like + // in PR92606. As there is no target hook to disable such optimizations + // depending on target attributes and / or address-spaces of the involved + // objects (filed as PR92932), ditch such malicious optimizations now until + // PR92932 is implemented and we can use that target hook to solve PR92606 + // properly. +{ OPT_LEVELS_ALL, OPT_fipa_icf_variables, NULL, 0 }, { OPT_LEVELS_NONE, 0, NULL, 0 } };
[patch][avr] New option -nodevicespecs to omit -specs=... in self specs.
Hi, currently device support in avr-gcc is accomplished by injecting a specs file my means of -specs=... in dirver self specs. This patch adds a new avr driver option to omit the addition of respective -specs option so give the user more freedom. Ok to apply? Johann * config/avr/avr.opt (-nodevicespecs): New driver option. * config/avr/driver-avr.c (avr_devicespecs_file): Only issue "-specs=device-specs/..." if that option is not set. * doc/invoke.texi (AVR Options) <-nodevicespecs>: Document. Index: config/avr/avr.opt === --- config/avr/avr.opt (revision 279522) +++ config/avr/avr.opt (working copy) @@ -118,3 +118,7 @@ Assume that all data in static storage c nodevicelib Driver Target Report RejectNegative Do not link against the device-specific library lib.a. + +nodevicespecs +Driver Target Report RejectNegative +Do not use the device-specific specs file device-specs/specs-. Index: config/avr/driver-avr.c === --- config/avr/driver-avr.c (revision 279522) +++ config/avr/driver-avr.c (working copy) @@ -26,8 +26,8 @@ along with GCC; see the file COPYING3. #include "diagnostic.h" #include "tm.h" -// Remove -nodevicelib from the command line if not needed -#define X_NODEVLIB "%
[PING^1][patch][avr] PR92606: Disable -fipa-icf-variables because it generates wrong code.
Ping #1. Hi, this patch turns off -fipa-icf-variables because it generates wrong code like for PR92606. As there is no target hook that could decide whether such optimizations are obsolete, disable such optimizations alltogether until PR92932 (target hook to disable such optimizations depending on object attributes and address-spcace) is available. Ok to apply? Johann Work around PR ipa/92932 by disabling -fipa-icf-variables until PR92932 will have been solved. PR ipa/92932 PR target/92606 * common/config/avr/avr-common.c (avr_option_optimization_table) <-fipa-icf-variables>: Disable.
[PING^1][patch][avr] New option -nodevicespecs to omit -specs=... in self specs.
Ping #1 Hi, currently device support in avr-gcc is accomplished by injecting a specs file my means of -specs=... in dirver self specs. This patch adds a new avr driver option to omit the addition of respective -specs option so give the user more freedom. Ok to apply? Johann * config/avr/avr.opt (-nodevicespecs): New driver option. * config/avr/driver-avr.c (avr_devicespecs_file): Only issue "-specs=device-specs/..." if that option is not set. * doc/invoke.texi (AVR Options) <-nodevicespecs>: Document.
[PING^1][patch,avr, 0/3] Support 64-bit (long) double.
Ping #1 Now that the avr backend can support 64-bit floats by means of configure-options --with-double= and --with-long-double=, this patch series adds some routines to support it. It's an ad-hoc, avr-specific implementation in assembly and GNU-C which is added as a new subfolder in libgcc/config/avr/libf7. Patch 1/3 is the GCC changes: Documentation and new avr-specific configure options: --with-libf7 selects to which level double support from libf7 is added to libgcc. --with-double-comparison select what FLOAT_LIB_COMPARE_RETURNS_BOOL returns. I wrote the libf7 code from scratch and put it under GPL v3 + library exception, so it should be no problem to have it as part of libgcc. Patch 2/3 is the libgcc additions: --with-libf7 selects which makefile-snips from libf7 to use. Patch 3/3 is the actual libf7 implementation. A great deal of which is assembly, together with C + inline assembly for higher routines. Ok for trunk? Johann
[PING^1][patch,avr, 0/3] Support 64-bit (long) double.
Ping #1 Now that the avr backend can support 64-bit floats by means of configure-options --with-double= and --with-long-double=, this patch series adds some routines to support it. It's an ad-hoc, avr-specific implementation in assembly and GNU-C which is added as a new subfolder in libgcc/config/avr/libf7. Patch 1/3 is the GCC changes: Documentation and new avr-specific configure options: --with-libf7 selects to which level double support from libf7 is added to libgcc. --with-double-comparison select what FLOAT_LIB_COMPARE_RETURNS_BOOL returns. I wrote the libf7 code from scratch and put it under GPL v3 + library exception, so it should be no problem to have it as part of libgcc. Patch 2/3 is the libgcc additions: --with-libf7 selects which makefile-snips from libf7 to use. Patch 3/3 is the actual libf7 implementation. A great deal of which is assembly, together with C + inline assembly for higher routines. Ok for trunk? Johann
[PING^1][patch,avr, 2/3] Support 64-bit (long) double: The libgcc changes.
Ping #1 Am 16.12.19 um 17:40 schrieb Georg-Johann Lay: Patch 2/3 is the libgcc additions: --with-libf7 selects which makefile-snips from libf7 to use. libgcc/ * config.host (tmake_file) [target=avr]: Add t-libf7, t-libf7-math, t-libf7-math-symbols as specified by --with-libf7=. * config/avr/t-avrlibc: Don't copy libgcc.a if there are modules depending on sizeof (double) or sizeof (long double). * config/avr/libf7: New folder.
[PING^1][patch,avr, 3/3] Support 64-bit (long) double: libf7.
Ping #1 Am 16.12.19 um 17:40 schrieb Georg-Johann Lay: Patch 3/3 is the actual libf7 implementation. A great deal of which is assembly, together with C + inline assembly for higher routines. Johann libgcc/config/avr/libf7/ * t-libf7: New file. * t-libf7-math: New file. * t-libf7-math-symbols: New file. * libf7-common.mk: New file. * libf7-asm-object.mk: New file. * libf7-c-object.mk: New file. * asm-defs.h: New file. * libf7.h: New file. * libf7.c: New file. * libf7-asm.sx: New file. * libf7-array.def: New file. * libf7-const.def: New file. * libf7-constdef.h: New file. * f7renames.sh: New script. * f7wraps.sh: New script. * f7-renames.h: New generated file. * f7-wraps.h: New generated file.
[PING^1][patch,avr, 1/3] Support 64-bit (long) double: The gcc part.
Ping #1 Am 16.12.19 um 17:40 schrieb Georg-Johann Lay: Patch 1/3 is the GCC changes: Documentation and new avr-specific configure options: --with-libf7 selects to which level double support from libf7 is added to libgcc. --with-double-comparison select what FLOAT_LIB_COMPARE_RETURNS_BOOL returns. Johann gcc/ * config.gcc (tm_defines) [target=avr]: Support --with-libf7, --with-double-comparison. * doc/install.texi: Document them. * config/avr/avr-c.c (avr_cpu_cpp_builtins) : New built-in defines. * doc/invoke.texi (AVR Built-in Macros): Document them. * config/avr/avr-protos.h (avr_float_lib_compare_returns_bool): New. * config/avr/avr.c (avr_float_lib_compare_returns_bool): New function. * config/avr/avr.h (FLOAT_LIB_COMPARE_RETURNS_BOOL): New macro.
[PING^2][patch][avr] PR92606: Disable -fipa-icf-variables because it generates wrong code.
Ping #2. Hi, this patch turns off -fipa-icf-variables because it generates wrong code like for PR92606. As there is no target hook that could decide whether such optimizations are obsolete, disable such optimizations alltogether until PR92932 (target hook to disable such optimizations depending on object attributes and address-spcace) is available. Ok to apply? Johann Work around PR ipa/92932 by disabling -fipa-icf-variables until PR92932 will have been solved. PR ipa/92932 PR target/92606 * common/config/avr/avr-common.c (avr_option_optimization_table) <-fipa-icf-variables>: Disable.
[PING^2][patch][avr] New option -nodevicespecs to omit -specs=... in self specs.
Ping #2 Hi, currently device support in avr-gcc is accomplished by injecting a specs file my means of -specs=... in dirver self specs. This patch adds a new avr driver option to omit the addition of respective -specs option so give the user more freedom. Ok to apply? Johann * config/avr/avr.opt (-nodevicespecs): New driver option. * config/avr/driver-avr.c (avr_devicespecs_file): Only issue "-specs=device-specs/..." if that option is not set. * doc/invoke.texi (AVR Options) <-nodevicespecs>: Document.
[Ping^2][patch,avr, 2/3] Support 64-bit (long) double: The libgcc changes.
Ping #2 Georg-Johann Lay schrieb: Am 16.12.19 um 17:40 schrieb Georg-Johann Lay: Patch 2/3 is the libgcc additions: --with-libf7 selects which makefile-snips from libf7 to use. libgcc/ * config.host (tmake_file) [target=avr]: Add t-libf7, t-libf7-math, t-libf7-math-symbols as specified by --with-libf7=. * config/avr/t-avrlibc: Don't copy libgcc.a if there are modules depending on sizeof (double) or sizeof (long double). * config/avr/libf7: New folder. Index: libgcc/config.host === --- libgcc/config.host (revision 278552) +++ libgcc/config.host (working copy) @@ -514,6 +514,29 @@ arm*-*-eabi* | arm*-*-symbianelf* | arm* avr-*-*) # Make HImode functions for AVR tmake_file="${cpu_type}/t-avr t-fpbit" + # Make some DFmode functions from libf7, part of avr-libgcc. + # This must be prior to adding t-avrlibc. + case "y${with_libf7}" in + yno) + # No libf7 support. + ;; + ylibgcc) + tmake_file="$tmake_file ${cpu_type}/libf7/t-libf7" + ;; + ymath) + tmake_file="$tmake_file ${cpu_type}/libf7/t-libf7-math" + tmake_file="$tmake_file ${cpu_type}/libf7/t-libf7" + ;; + ymath-symbols | yyes | y) + tmake_file="$tmake_file ${cpu_type}/libf7/t-libf7-math-symbols" + tmake_file="$tmake_file ${cpu_type}/libf7/t-libf7-math" + tmake_file="$tmake_file ${cpu_type}/libf7/t-libf7" + ;; + *) + echo "Error: --with-libf7=${with_libf7} but can only be used with: 'libgcc', 'math', 'math-symbols', 'yes', 'no'" 1>&2 + exit 1 + ;; + esac if test x${with_avrlibc} != xno; then tmake_file="$tmake_file ${cpu_type}/t-avrlibc" fi Index: libgcc/config/avr/t-avrlibc === --- libgcc/config/avr/t-avrlibc (revision 278992) +++ libgcc/config/avr/t-avrlibc (working copy) @@ -65,6 +65,12 @@ LIB2FUNCS_EXCLUDE += \ _fixunssfdi \ _floatdisf _floatundisf +ifeq (,$(WITH_LIBF7_MATH_SYMBOLS)) + +# No modules depend on __SIZEOF_LONG_DOUBLE__ or __SIZEOF_DOUBLE__ +# which means we might have an opportunity to copy libgcc.a. +# WITH_LIBF7_MATH_SYMBOLS is set by libf7/t-libf7-math-symbols. + ifneq (,$(findstring avr,$(MULTISUBDIR))) # We are not in the avr2 (default) subdir, hence copying will work. @@ -95,3 +101,4 @@ Makefile: t-copy-libgcc.dep endif endif +endif
[Ping^2][patch,avr, 0/3] Support 64-bit (long) double.
Ping #2 Georg-Johann Lay schrieb: Now that the avr backend can support 64-bit floats by means of configure-options --with-double= and --with-long-double=, this patch series adds some routines to support it. It's an ad-hoc, avr-specific implementation in assembly and GNU-C which is added as a new subfolder in libgcc/config/avr/libf7. Patch 1/3 is the GCC changes: Documentation and new avr-specific configure options: --with-libf7 selects to which level double support from libf7 is added to libgcc. --with-double-comparison select what FLOAT_LIB_COMPARE_RETURNS_BOOL returns. I wrote the libf7 code from scratch and put it under GPL v3 + library exception, so it should be no problem to have it as part of libgcc. Patch 2/3 is the libgcc additions: --with-libf7 selects which makefile-snips from libf7 to use. Patch 3/3 is the actual libf7 implementation. A great deal of which is assembly, together with C + inline assembly for higher routines. Ok for trunk? Johann
[Ping^2][patch,avr, 1/3] Support 64-bit (long) double: The gcc part.
Ping #2 Georg-Johann Lay schrieb: Am 16.12.19 um 17:40 schrieb Georg-Johann Lay: Patch 1/3 is the GCC changes: Documentation and new avr-specific configure options: --with-libf7 selects to which level double support from libf7 is added to libgcc. --with-double-comparison select what FLOAT_LIB_COMPARE_RETURNS_BOOL returns. Johann gcc/ * config.gcc (tm_defines) [target=avr]: Support --with-libf7, --with-double-comparison. * doc/install.texi: Document them. * config/avr/avr-c.c (avr_cpu_cpp_builtins) : New built-in defines. * doc/invoke.texi (AVR Built-in Macros): Document them. * config/avr/avr-protos.h (avr_float_lib_compare_returns_bool): New. * config/avr/avr.c (avr_float_lib_compare_returns_bool): New function. * config/avr/avr.h (FLOAT_LIB_COMPARE_RETURNS_BOOL): New macro. Index: gcc/config/avr/avr-c.c === --- gcc/config/avr/avr-c.c (revision 278667) +++ gcc/config/avr/avr-c.c (working copy) @@ -390,6 +390,20 @@ start address. This macro shall be used cpp_define (pfile, "__WITH_AVRLIBC__"); #endif /* WITH_AVRLIBC */ + // From configure --with-libf7={|libgcc|math|math-symbols|yes|no} + +#ifdef WITH_LIBF7_LIBGCC + cpp_define (pfile, "__WITH_LIBF7_LIBGCC__"); +#endif /* WITH_LIBF7_LIBGCC */ + +#ifdef WITH_LIBF7_MATH + cpp_define (pfile, "__WITH_LIBF7_MATH__"); +#endif /* WITH_LIBF7_MATH */ + +#ifdef WITH_LIBF7_MATH_SYMBOLS + cpp_define (pfile, "__WITH_LIBF7_MATH_SYMBOLS__"); +#endif /* WITH_LIBF7_MATH_SYMBOLS */ + // From configure --with-double={|32|32,64|64,32|64} #ifdef HAVE_DOUBLE_MULTILIB @@ -438,7 +452,23 @@ start address. This macro shall be used #error "align this with config.gcc" #endif - + // From configure --with-double-comparison={2|3} --with-libf7. + +#if defined (WITH_DOUBLE_COMPARISON) +#if WITH_DOUBLE_COMPARISON == 2 || WITH_DOUBLE_COMPARISON == 3 + /* The number of states a DFmode comparison libcall might take and + reflects what avr.c:FLOAT_LIB_COMPARE_RETURNS_BOOL returns for + DFmode. GCC's default is 3-state, but some libraries like libf7 + implement true / false (2-state). */ + cpp_define_formatted (pfile, "__WITH_DOUBLE_COMPARISON__=%d", + WITH_DOUBLE_COMPARISON); +#else +#error "align this with config.gcc" +#endif +#else +#error "align this with config.gcc" +#endif + /* Define builtin macros so that the user can easily query whether non-generic address spaces (and which) are supported or not. This is only supported for C. For C++, a language extension is needed Index: gcc/config/avr/avr-protos.h === --- gcc/config/avr/avr-protos.h (revision 278667) +++ gcc/config/avr/avr-protos.h (working copy) @@ -128,6 +128,8 @@ extern bool avr_xload_libgcc_p (machine_ extern rtx avr_eval_addr_attrib (rtx x); extern bool avr_casei_sequence_check_operands (rtx *xop); +extern bool avr_float_lib_compare_returns_bool (machine_mode, enum rtx_code); + static inline unsigned regmask (machine_mode mode, unsigned regno) { Index: gcc/config/avr/avr.c === --- gcc/config/avr/avr.c(revision 278667) +++ gcc/config/avr/avr.c(working copy) @@ -14575,6 +14575,23 @@ avr_fold_builtin (tree fndecl, int n_arg return NULL_TREE; } + +/* Worker function for `FLOAT_LIB_COMPARE_RETURNS_BOOL'. */ + +bool +avr_float_lib_compare_returns_bool (machine_mode mode, enum rtx_code) +{ + if (mode == DFmode) +{ +#if WITH_DOUBLE_COMPARISON == 2 + return true; +#endif +} + + // This is the GCC default and also what AVR-LibC implements. + return false; +} + /* Initialize the GCC target structure. */ Index: gcc/config/avr/avr.h === --- gcc/config/avr/avr.h(revision 278667) +++ gcc/config/avr/avr.h(working copy) @@ -107,6 +107,9 @@ These two properties are reflected by bu #define BYTES_BIG_ENDIAN 0 #define WORDS_BIG_ENDIAN 0 +#define FLOAT_LIB_COMPARE_RETURNS_BOOL(mode, comparison) \ + avr_float_lib_compare_returns_bool (mode, comparison) + #ifdef IN_LIBGCC2 /* This is to get correct SI and DI modes in libgcc2.c (32 and 64 bits). */ #define UNITS_PER_WORD 4 Index: gcc/config.gcc === --- gcc/config.gcc (revision 278552) +++ gcc/config.gcc (working copy) @@ -1303,6 +1303,46 @@ avr-*-*) tm_file="${tm_file} ${cpu_type}/avrlibc.h" tm_defines="${tm_defines} WITH_AVRLIBC" fi + # Work out avr_double_comparison which is 2 or 3 and is used in + # target hook FLOAT_LIB_COMPARE_RETURNS_BOOL to determine whether + # DFmode compa
Re: [patch,avr, 1/3] Support 64-bit (long) double: The gcc part.
Jeff Law schrieb: On Mon, 2019-12-16 at 17:43 +0100, Georg-Johann Lay wrote: Am 16.12.19 um 17:40 schrieb Georg-Johann Lay: Patch 1/3 is the GCC changes: Documentation and new avr-specific configure options: --with-libf7 selects to which level double support from libf7 is added to libgcc. --with-double-comparison select what FLOAT_LIB_COMPARE_RETURNS_BOOL returns. Johann gcc/ * config.gcc (tm_defines) [target=avr]: Support --with-libf7, --with-double-comparison. * doc/install.texi: Document them. * config/avr/avr-c.c (avr_cpu_cpp_builtins) : New built-in defines. * doc/invoke.texi (AVR Built-in Macros): Document them. * config/avr/avr-protos.h (avr_float_lib_compare_returns_bool): New. * config/avr/avr.c (avr_float_lib_compare_returns_bool): New function. * config/avr/avr.h (FLOAT_LIB_COMPARE_RETURNS_BOOL): New macro. I'd suggest "--with-libf77" rather than "--with-libf7". jeff Why that second 7? I called it "libf7" because the internal _f_loat representation has a mantissa of 7 bytes. So libf56 would also be indicative. Actually the name does not matter very much... It just needs to have /some/ name. "f77" however puts it close to Fortran, but that code has absolutely nothing to do with Fortran whatsoever. Johann p.s. You know anything about Denis Chertykov? He used to reply to avr patches within hours, but my latest patches (after a long period where I didn't propose any patches) where all approved by you; not a single mail from Denis. So I am concerned if he's well. Maybe he left you or some other global maintainer a note? He's still listed as maintainer though.
Re: [patch][avr] PR92606: Disable -fipa-icf-variables because it generates wrong code.
Jeff Law schrieb: On Wed, 2019-12-18 at 16:30 +0100, Georg-Johann Lay wrote: Hi, this patch turns off -fipa-icf-variables because it generates wrong code like for PR92606. As there is no target hook that could decide whether such optimizations are obsolete, disable such optimizations alltogether until PR92932 (target hook to disable such optimizations depending on object attributes and address-spcace) is available. Ok to apply? Johann Work around PR ipa/92932 by disabling -fipa-icf-variables until PR92932 will have been solved. PR ipa/92932 PR target/92606 * common/config/avr/avr-common.c (avr_option_optimization_table) <-fipa-icf-variables>: Disable. This seems backwards to me. Instead of disabling the optimization in the target files we should prevent the optimization from firing in cases where it can't reasonably work. Jeff The chances that this will be fixed are... tiny. As Andrew notes in a comment to PR92932, there are at least 2 other PRs that report wrong-code due to similar data optimization. He mentions different passes however. Whatever passes perform such wrong-code transforms, apart from more conservative approach they will need a new target hook to properly fix PR92606 because target attributes / address spaces are involved. I'd highly appreciate correct code, even if it's at the expense of (yah, yet another) hack in the avr backend. In particular, because such optimizations will improve code only a tiny little bit -- if at all. Hence kicking out the culprit does not reduce code quality, also because IF such merging is legitimate, some cases can be catched by the linker with, say -fmerge-all-constants. If PR92932, PR92294, PR954666 will ever be fixed, I'd gladly remove the proposed 1-line disable-culprit-hack and implement the new target hook that PR92932 is supposed to bring. Johann
[patch,avr,applied]: Set -fsplit-wide-types-early.
This patch sets -fsplit-wide-types-early for avr as it appears that the old placement of that pass gives better code for that target. Applied as r380033. Johann * common/config/avr/avr-common.c (avr_option_optimization_table) [OPT_LEVELS_1_PLUS]: Set -fsplit-wide-types-early. Index: common/config/avr/avr-common.c === --- common/config/avr/avr-common.c +++ common/config/avr/avr-common.c @@ -40,6 +40,8 @@ static const struct default_options avr_ { OPT_LEVELS_ALL, OPT_fcaller_saves, NULL, 0 }, { OPT_LEVELS_1_PLUS_NOT_DEBUG, OPT_mgas_isr_prologues, NULL, 1 }, { OPT_LEVELS_1_PLUS, OPT_mmain_is_OS_task, NULL, 1 }, +// Stick to the "old" placement of the subreg lowering pass. +{ OPT_LEVELS_1_PLUS, OPT_fsplit_wide_types_early, NULL, 1 }, /* Allow optimizer to introduce store data races. This used to be the default -- it was changed because bigger targets did not see any performance decrease. For the AVR though, disallowing data races
[patch,avr,applied] Adjust help messages.
This patch unifies help screen messages. Johann -- AVR: Overhaul help screen gcc/ * config/avr/avr.opt: Overhaul help screen.diff --git a/gcc/config/avr/avr.opt b/gcc/config/avr/avr.opt index ea35b7d5b4e..c3ca8379ee3 100644 --- a/gcc/config/avr/avr.opt +++ b/gcc/config/avr/avr.opt @@ -20,27 +20,27 @@ mcall-prologues Target Mask(CALL_PROLOGUES) Optimization -Use subroutines for function prologues and epilogues. +Optimization. Use subroutines for function prologues and epilogues. mmcu= Target RejectNegative Joined Var(avr_mmcu) MissingArgError(missing device or architecture after %qs) --mmcu=MCU Select the target MCU. +-mmcu= Select the target MCU. mgas-isr-prologues Target Var(avr_gasisr_prologues) UInteger Init(0) Optimization -Allow usage of __gcc_isr pseudo instructions in ISR prologues and epilogues. +Optimization. Allow usage of __gcc_isr pseudo instructions in ISR prologues and epilogues. mn-flash= Target RejectNegative Joined Var(avr_n_flash) UInteger Init(-1) -Set the number of 64 KiB flash segments. +This option is used internally. Set the number of 64 KiB flash segments. mskip-bug Target Mask(SKIP_BUG) -Indicate presence of a processor erratum. +This option is used internally. Indicate presence of a processor erratum. Do not skip 32-bit instructions. mrmw Target Mask(RMW) -Enable Read-Modify-Write (RMW) instructions support/use. +This option is used internally. Enable Read-Modify-Write (RMW) instructions support/use. mdeb Target Undocumented Mask(ALL_DEBUG) @@ -50,7 +50,7 @@ Target RejectNegative Joined Undocumented Var(avr_log_details) mshort-calls Target RejectNegative Mask(SHORT_CALLS) -Use RJMP / RCALL even though CALL / JMP are available. +This option is used internally for multilib generation and selection. Assume RJMP / RCALL can target all program memory. mint8 Target Mask(INT8) @@ -62,11 +62,11 @@ Change the stack pointer without disabling interrupts. mbranch-cost= Target Joined RejectNegative UInteger Var(avr_branch_cost) Init(0) Optimization -Set the branch costs for conditional branch instructions. Reasonable values are small, non-negative integers. The default branch cost is 0. +-mbranch-cost= Optimization. Set the branch costs for conditional branch instructions. Reasonable values are small, non-negative integers. The default branch cost is 0. mmain-is-OS_task Target Mask(MAIN_IS_OS_TASK) Optimization -Treat main as if it had attribute OS_task. +Optimization. Treat main as if it had attribute OS_task. morder1 Target Undocumented Mask(ORDER_1) @@ -80,7 +80,7 @@ Change only the low 8 bits of the stack pointer. mrelax Target Optimization -Relax branches. +Optimization. Relax branches. mpmem-wrap-around Target @@ -88,15 +88,15 @@ Make the linker relaxation machine assume that a program counter wrap-around occ maccumulate-args Target Mask(ACCUMULATE_OUTGOING_ARGS) Optimization -Accumulate outgoing function arguments and acquire/release the needed stack space for outgoing function arguments in function prologue/epilogue. Without this option, outgoing arguments are pushed before calling a function and popped afterwards. This option can lead to reduced code size for functions that call many functions that get their arguments on the stack like, for example printf. +Optimization. Accumulate outgoing function arguments and acquire/release the needed stack space for outgoing function arguments in function prologue/epilogue. Without this option, outgoing arguments are pushed before calling a function and popped afterwards. This option can lead to reduced code size for functions that call many functions that get their arguments on the stack like, for example printf. mstrict-X Target Var(avr_strict_X) Init(0) Optimization -When accessing RAM, use X as imposed by the hardware, i.e. just use pre-decrement, post-increment and indirect addressing with the X register. Without this option, the compiler may assume that there is an addressing mode X+const similar to Y+const and Z+const and emit instructions to emulate such an addressing mode for X. +Optimization. When accessing RAM, use X as imposed by the hardware, i.e. just use pre-decrement, post-increment and indirect addressing with the X register. Without this option, the compiler may assume that there is an addressing mode X+const similar to Y+const and Z+const and emit instructions to emulate such an addressing mode for X. mflmap Target Var(avr_flmap) Init(0) -The device has the bitfield NVMCTRL_CTRLB.FLMAP. This option is used internally. +This option is used internally. The device has the bitfield NVMCTRL_CTRLB.FLMAP. mrodata-in-ram Target Var(avr_rodata_in_ram) Init(-1) @@ -105,15 +105,15 @@ The device has the .rodata section located in the RAM area. ;; For rationale behind -msp8 see explanation in avr.h. msp8 Target RejectNegative Var(avr_sp8) Init(0) -The device has no SPH special function register. This option will be overridden by the compile
[patch,avr,applied] Take into account -mtiny-stack in frame pointer adjustments
Applied this addendum to avr PR114100: When the frame pointer is adjusted and -mtiny-stack is set, then it is enough to adjust the low part of the frame pointer. Johann -- AVR: target/114100 - Factor in -mtiny-stack in frame pointer adjustments gcc/ PR target/114100 * config/avr/avr.cc (avr_out_plus_1) [-mtiny-stack]: Only adjust the low part of the frame pointer with 8-bit stack pointer. diff --git a/gcc/config/avr/avr.cc b/gcc/config/avr/avr.cc index 94ef7c591a9..d39d6707c97 100644 --- a/gcc/config/avr/avr.cc +++ b/gcc/config/avr/avr.cc @@ -8983,14 +8983,17 @@ avr_out_plus_1 (rtx *xop, int *plen, enum rtx_code code, int *pcc, && frame_pointer_needed && REGNO (xop[0]) == FRAME_POINTER_REGNUM) { - rtx xval16 = simplify_gen_subreg (HImode, xval, imode, i); - if (xval16 == const1_rtx || xval16 == constm1_rtx) + if (AVR_HAVE_8BIT_SP) + { + avr_asm_len ("subi %A0,%n2", xop, plen, 1); + return; + } + else if (xop[2] == const1_rtx || xop[2] == constm1_rtx) { - avr_asm_len ((code == PLUS) == (xval16 == const1_rtx) + avr_asm_len (xop[2] == const1_rtx ? "ld __tmp_reg__,%a0+" : "ld __tmp_reg__,-%a0", xop, plen, 1); - i++; - continue; + return; } }
[patch,avr,applied] Avoid magic numbers for register numbers.
There are some places where avr.cc uses magic numbers like 17 that are actually register numbers. This patch defines constants like REG_17 and uses them instead of the magic numbers when a register number is meant. Johann -- AVR: Use REG_ constants instead of magic numbers . There are some places where avr.cc uses magic numbers like 17 that are actually register numbers. This patch defines constants like REG_17 and uses them instead of the magic numbers when a register number is meant. gcc/ * config/avr/avr.md (REG_0, ... REG_36): New define_constants. * config/avr/avr.cc: Use them instead of magic numbers when it means a register number. diff --git a/gcc/config/avr/avr.cc b/gcc/config/avr/avr.cc index e312ddfbff4..5c71c7f8c0d 100644 --- a/gcc/config/avr/avr.cc +++ b/gcc/config/avr/avr.cc @@ -171,10 +171,10 @@ static bool avr_rtx_costs (rtx, machine_mode, int, int, int *, bool); /* Allocate registers from r25 to r8 for parameters for function calls. */ -#define FIRST_CUM_REG 26 +#define FIRST_CUM_REG REG_26 /* Last call saved register */ -#define LAST_CALLEE_SAVED_REG (AVR_TINY ? 19 : 17) +#define LAST_CALLEE_SAVED_REG (AVR_TINY ? REG_19 : REG_17) /* Implicit target register of LPM instruction (R0) */ extern GTY(()) rtx lpm_reg_rtx; @@ -197,8 +197,8 @@ extern GTY(()) rtx cc_reg_rtx; rtx cc_reg_rtx; /* RTXs for all general purpose registers as QImode */ -extern GTY(()) rtx all_regs_rtx[32]; -rtx all_regs_rtx[32]; +extern GTY(()) rtx all_regs_rtx[REG_32]; +rtx all_regs_rtx[REG_32]; /* SREG, the processor status */ extern GTY(()) rtx sreg_rtx; @@ -542,7 +542,7 @@ avr_casei_sequence_check_operands (rtx *xop) if (AVR_HAVE_EIJMP_EICALL // The last clobber op of the tablejump. - && xop[8] == all_regs_rtx[24]) + && xop[8] == all_regs_rtx[REG_24]) { // $6 is: (subreg:SI ($5) 0) sub_5 = xop[6]; @@ -1171,7 +1171,7 @@ avr_init_machine_status (void) void avr_init_expanders (void) { - for (int regno = 0; regno < 32; regno ++) + for (int regno = REG_0; regno < REG_32; regno ++) all_regs_rtx[regno] = gen_rtx_REG (QImode, regno); lpm_reg_rtx = all_regs_rtx[LPM_REGNO]; @@ -1549,7 +1549,7 @@ avr_regs_to_save (HARD_REG_SET *set) || cfun->machine->is_OS_main) return 0; - for (int reg = 0; reg < 32; reg++) + for (int reg = REG_0; reg < REG_32; reg++) { /* Do not push/pop __tmp_reg__, __zero_reg__, as well as any global register variables. */ @@ -2300,9 +2300,9 @@ avr_pass_fuse_add::execute (function *func) FOR_EACH_BB_FN (bb, func) { - Ldi_Insn prev_ldi_insns[32]; - Add_Insn prev_add_insns[32]; - Mem_Insn prev_mem_insns[32]; + Ldi_Insn prev_ldi_insns[REG_32]; + Add_Insn prev_add_insns[REG_32]; + Mem_Insn prev_mem_insns[REG_32]; rtx_insn *insn, *curr; avr_dump ("\n;; basic block %d\n\n", bb->index); @@ -2484,7 +2484,7 @@ avr_incoming_return_addr_rtx (void) static int avr_hregs_split_reg (HARD_REG_SET *set) { - for (int regno = 0; regno < 32; regno++) + for (int regno = REG_0; regno < REG_32; regno++) if (TEST_HARD_REG_BIT (*set, regno)) { // Don't remove a register from *SET which might indicate that @@ -2620,9 +2620,9 @@ avr_prologue_setup_frame (HOST_WIDE_INT size, HARD_REG_SET set) first_reg = (LAST_CALLEE_SAVED_REG + 1) - (live_seq - 2); - for (reg = 29, offset = -live_seq + 1; + for (reg = REG_29, offset = -live_seq + 1; reg >= first_reg; - reg = (reg == 28 ? LAST_CALLEE_SAVED_REG : reg - 1), ++offset) + reg = (reg == REG_28 ? LAST_CALLEE_SAVED_REG : reg - 1), ++offset) { rtx m, r; @@ -2636,7 +2636,7 @@ avr_prologue_setup_frame (HOST_WIDE_INT size, HARD_REG_SET set) } else /* !minimize */ { - for (int reg = 0; reg < 32; ++reg) + for (int reg = REG_0; reg < REG_32; ++reg) if (TEST_HARD_REG_BIT (set, reg)) emit_push_byte (reg, true); @@ -3795,7 +3795,7 @@ avr_print_operand (FILE *file, rtx x, int code) { if (x == zero_reg_rtx) fprintf (file, "__zero_reg__"); - else if (code == 'r' && REGNO (x) < 32) + else if (code == 'r' && REGNO (x) < REG_32) fprintf (file, "%d", (int) REGNO (x)); else fprintf (file, "%s", reg_names[REGNO (x) + abcd]); @@ -4136,7 +4136,9 @@ avr_asm_final_postscan_insn (FILE *stream, rtx_insn *insn, rtx *, int) int avr_function_arg_regno_p (int r) { - return AVR_TINY ? IN_RANGE (r, 20, 25) : IN_RANGE (r, 8, 25); + return AVR_TINY +? IN_RANGE (r, REG_20, REG_25) +: IN_RANGE (r, REG_8, REG_25); } @@ -4148,7 +4150,7 @@ void avr_init_cumulative_args (CUMULATIVE_ARGS *cum, tree fntype, rtx libname, tree fndecl ATTRIBUTE_UNUSED) { - cum->nregs = AVR_TINY ? 6 : 18; + cum->nregs = 1 + AVR_TINY ? REG_25 - REG_20 : REG_25 - REG_8; cum->regno = FIRST_CUM_REG; cum->has_stack_args = 0; if (!libname && stdarg_p (fntype)) @@ -4216,7 +4218,7 @@ avr_function_arg_advance (
[patch,avr,applied] ad target/92729: Remove last cc0 remains.
Removed the last cc0 remains. Johann -- AVR: ad target/92792 - Remove insn attribute "cc" and its (dead) uses. The backend has remains of cc0 condition code. Unfortunately, all that information is useless with CCmode, and their use was removed with the removal of NOTICE_UPDATE_CC in PR92729 with r12-226 and r12-327. gcc/ PR target/92729 * config/avr/avr.md (define_attr "cc"): Remove. * config/avr/avr-protos.h (avr_out_plus): Remove pcc argument from prototype. * config/avr/avr.cc (avr_out_plus_1): Remove pcc argument and its uses. Add insn argument. (avr_out_plus_symbol): Remove pcc argument and its uses. (avr_out_plus): Remove pcc argument and its uses. Adjust calls of avr_out_plus_symbol and avr_out_plus_1. (avr_out_round): Adjust call of avr_out_plus.diff --git a/gcc/config/avr/avr-protos.h b/gcc/config/avr/avr-protos.h index 064a3d23322..f4f3ffd8f28 100644 --- a/gcc/config/avr/avr-protos.h +++ b/gcc/config/avr/avr-protos.h @@ -93,7 +93,7 @@ extern bool avr_split_tiny_move (rtx_insn *insn, rtx *operands); extern void avr_output_addr_vec (rtx_insn*, rtx); extern const char *avr_out_sbxx_branch (rtx_insn *insn, rtx operands[]); extern const char* avr_out_bitop (rtx, rtx*, int*); -extern const char* avr_out_plus (rtx, rtx*, int* =NULL, int* =NULL, bool =true); +extern const char* avr_out_plus (rtx, rtx*, int* =NULL, bool =true); extern const char* avr_out_round (rtx_insn *, rtx*, int* =NULL); extern const char* avr_out_addto_sp (rtx*, int*); extern const char* avr_out_xload (rtx_insn *, rtx*, int*); diff --git a/gcc/config/avr/avr.cc b/gcc/config/avr/avr.cc index b86f4313fe2..44d6e141b62 100644 --- a/gcc/config/avr/avr.cc +++ b/gcc/config/avr/avr.cc @@ -8799,6 +8799,7 @@ lshrsi3_out (rtx_insn *insn, rtx operands[], int *len) /* Output addition of register XOP[0] and compile time constant XOP[2]. + INSN is a single_set insn or an insn pattern. CODE == PLUS: perform addition by using ADD instructions or CODE == MINUS: perform addition by using SUB instructions: @@ -8811,7 +8812,6 @@ lshrsi3_out (rtx_insn *insn, rtx operands[], int *len) If PLEN == NULL, print assembler instructions to perform the operation; otherwise, set *PLEN to the length of the instruction sequence (in words) printed with PLEN == NULL. XOP[3] is an 8-bit scratch register or NULL_RTX. - Set *PCC to effect on cc0 according to respective CC_* insn attribute. CODE_SAT == UNKNOWN: Perform ordinary, non-saturating operation. CODE_SAT != UNKNOWN: Perform operation and saturate according to CODE_SAT. @@ -8825,7 +8825,7 @@ lshrsi3_out (rtx_insn *insn, rtx operands[], int *len) fixed-point rounding, cf. `avr_out_round'. */ static void -avr_out_plus_1 (rtx *xop, int *plen, enum rtx_code code, int *pcc, +avr_out_plus_1 (rtx /*insn*/, rtx *xop, int *plen, enum rtx_code code, enum rtx_code code_sat, int sign, bool out_label) { /* MODE of the operation. */ @@ -8861,8 +8861,6 @@ avr_out_plus_1 (rtx *xop, int *plen, enum rtx_code code, int *pcc, if (REG_P (xop[2])) { - *pcc = MINUS == code ? (int) CC_SET_CZN : (int) CC_CLOBBER; - for (int i = 0; i < n_bytes; i++) { /* We operate byte-wise on the destination. */ @@ -,21 +8886,13 @@ avr_out_plus_1 (rtx *xop, int *plen, enum rtx_code code, int *pcc, goto saturate; } - /* Except in the case of ADIW with 16-bit register (see below) - addition does not set cc0 in a usable way. */ - - *pcc = (MINUS == code) ? CC_SET_CZN : CC_CLOBBER; - if (CONST_FIXED_P (xval)) xval = avr_to_int_mode (xval); /* Adding/Subtracting zero is a no-op. */ if (xval == const0_rtx) -{ - *pcc = CC_NONE; - return; -} +return; if (MINUS == code) xval = simplify_unary_operation (NEG, imode, xval, imode); @@ -8947,9 +8937,6 @@ avr_out_plus_1 (rtx *xop, int *plen, enum rtx_code code, int *pcc, /* To get usable cc0 no low-bytes must have been skipped. */ - if (i && !started) - *pcc = CC_CLOBBER; - if (!started && i % 2 == 0 && i + 2 <= n_bytes @@ -8968,9 +8955,6 @@ avr_out_plus_1 (rtx *xop, int *plen, enum rtx_code code, int *pcc, started = true; avr_asm_len (code == PLUS ? "adiw %0,%1" : "sbiw %0,%1", op, plen, 1); - - if (n_bytes == 2 && PLUS == code) - *pcc = CC_SET_CZN; } i++; @@ -9018,7 +9002,6 @@ avr_out_plus_1 (rtx *xop, int *plen, enum rtx_code code, int *pcc, { avr_asm_len ((code == PLUS) ^ (val8 == 1) ? "dec %0" : "inc %0", op, plen, 1); - *pcc = CC_CLOBBER; break; } @@ -9077,8 +9060,6 @@ avr_out_plus_1 (rtx *xop, int *plen, enum rtx_code code, int *pcc, if (UNKNOWN == code_sat) return; - *pcc = (int) CC_CLOBBER; - /* Vanilla addition/subtraction is done. We are left with saturation. We have to compute A = A B where A is a register and @@ -9298,7 +9279,7
[avr,patch,applied] ad target/114100 - Don't print unused frame pointer adjustments.
This addendum ports a corner case optimization from -mno-fuse-add to -mfuse-add: When a base register needs temporal adjustment, and the base is the frame pointer, then there are cases where the post-adjustment is not needed. Passes without new regressions on ATtiny40. Johann -- AVR: ad target/114100 - Don't print unused frame pointer adjustments. Without -mfuse-add, when fake reg+offset addressing is used, the output routines are saving some instructions when the base reg is unused after. This patch adds that optimization for the case when the base is the frame pointer and the frame pointer adjustments are split away from the move insn by -mfuse-add in .split2. Direct usage of reg_unused_after is not possible because that function looks at the destination of the current insn, which won't work for offsetting the frame pointer in printing PLUS code. It can use an extended version of _reg_unused_after though. gcc/ PR target/114100 * config/avr/avr-protos.h (_reg_unused_after): Remove proto. * config/avr/avr.cc (_reg_unused_after): Make static. And add 3rd argument to skip the current insn. (reg_unused_after): Adjust call of reg_unused_after. (avr_out_plus_1) [AVR_TINY && -mfuse-add >= 2]: Don't output unneeded frame pointer adjustments.diff --git a/gcc/config/avr/avr-protos.h b/gcc/config/avr/avr-protos.h index f4f3ffd8f28..3e19409d636 100644 --- a/gcc/config/avr/avr-protos.h +++ b/gcc/config/avr/avr-protos.h @@ -110,7 +110,6 @@ extern const char* avr_out_reload_inpsi (rtx*, rtx, int*); extern const char* avr_out_lpm (rtx_insn *, rtx*, int*); extern void avr_notice_update_cc (rtx body, rtx_insn *insn); extern int reg_unused_after (rtx_insn *insn, rtx reg); -extern int _reg_unused_after (rtx_insn *insn, rtx reg); extern int avr_jump_mode (rtx x, rtx_insn *insn); extern int test_hard_reg_class (enum reg_class rclass, rtx x); extern int jump_over_one_insn_p (rtx_insn *insn, rtx dest); diff --git a/gcc/config/avr/avr.cc b/gcc/config/avr/avr.cc index 44d6e141b62..7df21432dda 100644 --- a/gcc/config/avr/avr.cc +++ b/gcc/config/avr/avr.cc @@ -163,6 +163,7 @@ static int avr_operand_rtx_cost (rtx, machine_mode, enum rtx_code, int, bool); static void output_reload_in_const (rtx *, rtx, int *, bool); static struct machine_function *avr_init_machine_status (void); +static int _reg_unused_after (rtx_insn *insn, rtx reg, bool look_at_insn); /* Prototypes for hook implementors if needed before their implementation. */ @@ -8825,7 +8826,7 @@ lshrsi3_out (rtx_insn *insn, rtx operands[], int *len) fixed-point rounding, cf. `avr_out_round'. */ static void -avr_out_plus_1 (rtx /*insn*/, rtx *xop, int *plen, enum rtx_code code, +avr_out_plus_1 (rtx insn, rtx *xop, int *plen, enum rtx_code code, enum rtx_code code_sat, int sign, bool out_label) { /* MODE of the operation. */ @@ -8973,6 +8974,10 @@ avr_out_plus_1 (rtx /*insn*/, rtx *xop, int *plen, enum rtx_code code, && frame_pointer_needed && REGNO (xop[0]) == FRAME_POINTER_REGNUM) { + if (INSN_P (insn) + && _reg_unused_after (as_a (insn), xop[0], false)) + return; + if (AVR_HAVE_8BIT_SP) { avr_asm_len ("subi %A0,%n2", xop, plen, 1); @@ -10818,31 +10823,32 @@ int reg_unused_after (rtx_insn *insn, rtx reg) { return (dead_or_set_p (insn, reg) - || (REG_P (reg) && _reg_unused_after (insn, reg))); + || (REG_P (reg) && _reg_unused_after (insn, reg, true))); } -/* Return nonzero if REG is not used after INSN. +/* A helper for the previous function. + Return nonzero if REG is not used after INSN. We assume REG is a reload reg, and therefore does not live past labels. It may live past calls or jumps though. */ int -_reg_unused_after (rtx_insn *insn, rtx reg) +_reg_unused_after (rtx_insn *insn, rtx reg, bool look_at_insn) { - enum rtx_code code; - rtx set; - - /* If the reg is set by this instruction, then it is safe for our - case. Disregard the case where this is a store to memory, since - we are checking a register used in the store address. */ - set = single_set (insn); - if (set && !MEM_P (SET_DEST (set)) - && reg_overlap_mentioned_p (reg, SET_DEST (set))) -return 1; + if (look_at_insn) +{ + /* If the reg is set by this instruction, then it is safe for our + case. Disregard the case where this is a store to memory, since + we are checking a register used in the store address. */ + rtx set = single_set (insn); + if (set && !MEM_P (SET_DEST (set)) + && reg_overlap_mentioned_p (reg, SET_DEST (set))) + return 1; +} while ((insn = NEXT_INSN (insn))) { rtx set; - code = GET_CODE (insn); + enum rtx_code code = GET_CODE (insn); #if 0 /* If this is a label that existed before reload, then the register
[patch,avr,applied] Use more C++ ish coding style.
This is a no-op patch that uses some more C++ / C99 features if possible. Johann -- AVR: Use more C++ ish coding style. gcc/ * config/avr/avr.cc: Resolve ATTRIBUTE_UNUSED. Use bool in place of int for boolean logic (if possible). Move declarations to definitions (if possible). * config/avr/avr.md: Use C++ comments. Fix some indentation glitches. * config/avr/avr-dimode.md: Same. * config/avr/constraints.md: Same. * config/avr/predicates.md: Same.diff --git a/gcc/config/avr/avr-dimode.md b/gcc/config/avr/avr-dimode.md index 6fcabdaaf6e..4b74e77e5e5 100644 --- a/gcc/config/avr/avr-dimode.md +++ b/gcc/config/avr/avr-dimode.md @@ -43,8 +43,8 @@ ;; so that no DImode insn contains pseudos or needs reloading. (define_constants - [(ACC_A 18) - (ACC_B 10)]) + [(ACC_A 18) + (ACC_B 10)]) ;; Supported modes that are 8 bytes wide (define_mode_iterator ALL8 [DI DQ UDQ DA UDA TA UTA]) diff --git a/gcc/config/avr/avr.cc b/gcc/config/avr/avr.cc index 7df21432dda..c8b2b504e3f 100644 --- a/gcc/config/avr/avr.cc +++ b/gcc/config/avr/avr.cc @@ -163,7 +163,7 @@ static int avr_operand_rtx_cost (rtx, machine_mode, enum rtx_code, int, bool); static void output_reload_in_const (rtx *, rtx, int *, bool); static struct machine_function *avr_init_machine_status (void); -static int _reg_unused_after (rtx_insn *insn, rtx reg, bool look_at_insn); +static bool _reg_unused_after (rtx_insn *insn, rtx reg, bool look_at_insn); /* Prototypes for hook implementors if needed before their implementation. */ @@ -648,8 +648,6 @@ avr_optimize_casesi (rtx_insn *insns[5], rtx *xop) start_sequence(); - rtx_insn *seq1, *seq2, *last1, *last2; - rtx reg = copy_to_mode_reg (mode, xop[10]); rtx (*gen_add)(rtx,rtx,rtx) = QImode == mode ? gen_addqi3 : gen_addhi3; @@ -665,8 +663,8 @@ avr_optimize_casesi (rtx_insn *insns[5], rtx *xop) JUMP_LABEL (cbranch) = xop[4]; ++LABEL_NUSES (xop[4]); - seq1 = get_insns(); - last1 = get_last_insn(); + rtx_insn *seq1 = get_insns(); + rtx_insn *last1 = get_last_insn(); end_sequence(); emit_insn_after (seq1, insns[2]); @@ -686,8 +684,8 @@ avr_optimize_casesi (rtx_insn *insns[5], rtx *xop) emit_insn (pat_4); - seq2 = get_insns(); - last2 = get_last_insn(); + rtx_insn *seq2 = get_insns(); + rtx_insn *last2 = get_last_insn(); end_sequence(); emit_insn_after (seq2, insns[3]); @@ -1309,7 +1307,7 @@ avr_mem_memx_p (rtx x) /* A helper for the subsequent function attribute used to dig for attribute 'name' in a FUNCTION_DECL or FUNCTION_TYPE */ -static inline int +static inline bool avr_lookup_function_attribute1 (const_tree func, const char *name) { if (FUNCTION_DECL == TREE_CODE (func)) @@ -1329,7 +1327,7 @@ avr_lookup_function_attribute1 (const_tree func, const char *name) /* Return nonzero if FUNC is a naked function. */ -static int +static bool avr_naked_function_p (tree func) { return avr_lookup_function_attribute1 (func, "naked"); @@ -1338,7 +1336,7 @@ avr_naked_function_p (tree func) /* Return nonzero if FUNC is an interrupt function as specified by the "interrupt" attribute. */ -static int +static bool avr_interrupt_function_p (tree func) { return avr_lookup_function_attribute1 (func, "interrupt"); @@ -1347,7 +1345,7 @@ avr_interrupt_function_p (tree func) /* Return nonzero if FUNC is a signal function as specified by the "signal" attribute. */ -static int +static bool avr_signal_function_p (tree func) { return avr_lookup_function_attribute1 (func, "signal"); @@ -1355,7 +1353,7 @@ avr_signal_function_p (tree func) /* Return nonzero if FUNC is an OS_task function. */ -static int +static bool avr_OS_task_function_p (tree func) { return avr_lookup_function_attribute1 (func, "OS_task"); @@ -1363,7 +1361,7 @@ avr_OS_task_function_p (tree func) /* Return nonzero if FUNC is an OS_main function. */ -static int +static bool avr_OS_main_function_p (tree func) { return avr_lookup_function_attribute1 (func, "OS_main"); @@ -1373,7 +1371,7 @@ avr_OS_main_function_p (tree func) /* Return nonzero if FUNC is a no_gccisr function as specified by the "no_gccisr" attribute. */ -static int +static bool avr_no_gccisr_function_p (tree func) { return avr_lookup_function_attribute1 (func, "no_gccisr"); @@ -1536,12 +1534,11 @@ avr_starting_frame_offset (void) static int avr_regs_to_save (HARD_REG_SET *set) { - int count; + int count = 0; int int_or_sig_p = cfun->machine->is_interrupt || cfun->machine->is_signal; if (set) CLEAR_HARD_REG_SET (*set); - count = 0; /* No need to save any registers if the function never returns or has the "OS_task" or "OS_main" attribute. */ @@ -1589,7 +1586,7 @@ avr_allocate_stack_slots_for_args (void) /* Return true if register FROM can be eliminated via register TO. */ static bool -avr_can_eliminate (const int from ATTRIBUTE_UNUSED, const int to) +avr_can_el
[patch,avr,applied] Improve output of insn "*insv.any_shift.".
Applied Roger's proposed improvements with some changes: Lengthy code is more convenient in avr.cc than in an insn output function, and it makes it easy to work out the exact instruction length. Moreover, the code can handle shifts with offset zero (cases of *and3 insns). Passed with no new regressions on ATmega128. Applied as https://gcc.gnu.org/r14-9317 Johann -- AVR: Improve output of insn "*insv.any_shift._split". The instructions printed by insn "*insv.any_shift._split" were sub-optimal. The code to print the improved output is lengthy and performed by new function avr_out_insv. As it turns out, the function can also handle shift-offsets of zero, which is "*andhi3", "*andpsi3" and "*andsi3". Thus, these tree insns get a new 3-operand alternative where the 3rd operand is an exact power of 2. gcc/ * config/avr/avr-protos.h (avr_out_insv): New proto. * config/avr/avr.cc (avr_out_insv): New function. (avr_adjust_insn_length) [ADJUST_LEN_INSV]: Handle case. (avr_cbranch_cost) [ZERO_EXTRACT]: Adjust rtx costs. * config/avr/avr.md (define_attr "adjust_len") Add insv. (andhi3, *andhi3, andpsi3, *andpsi3, andsi3, *andsi3): Add constraint alternative where the 3rd operand is a power of 2, and the source register may differ from the destination. (*insv.any_shift._split): Call avr_out_insv to output instructions. Set attr "length" to "insv". * config/avr/constraints.md (Cb2, Cb3, Cb4): New constraints. gcc/testsuite/ * gcc.target/avr/torture/insv-anyshift-hi.c: New test. * gcc.target/avr/torture/insv-anyshift-si.c: New test. commit 49a1a340ea0eef681f23b6861f3cdb6840aadd99 Author: Roger Sayle Date: Tue Mar 5 11:06:17 2024 +0100 AVR: Improve output of insn "*insv.any_shift._split". The instructions printed by insn "*insv.any_shift._split" were sub-optimal. The code to print the improved output is lengthy and performed by new function avr_out_insv. As it turns out, the function can also handle shift-offsets of zero, which is "*andhi3", "*andpsi3" and "*andsi3". Thus, these tree insns get a new 3-operand alternative where the 3rd operand is an exact power of 2. gcc/ * config/avr/avr-protos.h (avr_out_insv): New proto. * config/avr/avr.cc (avr_out_insv): New function. (avr_adjust_insn_length) [ADJUST_LEN_INSV]: Handle case. (avr_cbranch_cost) [ZERO_EXTRACT]: Adjust rtx costs. * config/avr/avr.md (define_attr "adjust_len") Add insv. (andhi3, *andhi3, andpsi3, *andpsi3, andsi3, *andsi3): Add constraint alternative where the 3rd operand is a power of 2, and the source register may differ from the destination. (*insv.any_shift._split): Call avr_out_insv to output instructions. Set attr "length" to "insv". * config/avr/constraints.md (Cb2, Cb3, Cb4): New constraints. gcc/testsuite/ * gcc.target/avr/torture/insv-anyshift-hi.c: New test. * gcc.target/avr/torture/insv-anyshift-si.c: New test. diff --git a/gcc/config/avr/avr-protos.h b/gcc/config/avr/avr-protos.h index 3e19409d636..bb680312117 100644 --- a/gcc/config/avr/avr-protos.h +++ b/gcc/config/avr/avr-protos.h @@ -58,6 +58,7 @@ extern const char *ret_cond_branch (rtx x, int len, int reverse); extern const char *avr_out_movpsi (rtx_insn *, rtx*, int*); extern const char *avr_out_sign_extend (rtx_insn *, rtx*, int*); extern const char *avr_out_insert_notbit (rtx_insn *, rtx*, int*); +extern const char *avr_out_insv (rtx_insn *, rtx*, int*); extern const char *avr_out_extr (rtx_insn *, rtx*, int*); extern const char *avr_out_extr_not (rtx_insn *, rtx*, int*); extern const char *avr_out_plus_set_ZN (rtx*, int*); diff --git a/gcc/config/avr/avr.cc b/gcc/config/avr/avr.cc index c8b2b504e3f..36995e05cbe 100644 --- a/gcc/config/avr/avr.cc +++ b/gcc/config/avr/avr.cc @@ -9795,6 +9795,178 @@ avr_out_insert_notbit (rtx_insn *insn, rtx op[], int *plen) } +/* Output instructions for XOP[0] = (XOP[1] XOP[2]) & XOP[3] where + - XOP[0] and XOP[1] have the same mode which is one of: QI, HI, PSI, SI. + - XOP[3] is an exact const_int power of 2. + - XOP[2] and XOP[3] are const_int. + - is any of: ASHIFT, LSHIFTRT, ASHIFTRT. + - The result depends on XOP[1]. + or XOP[0] = XOP[1] & XOP[2] where + - XOP[0] and XOP[1] have the same mode which is one of: HI, PSI, SI. + - XOP[2] is an exact const_int power of 2. + Returns "". + PLEN != 0: Set *PLEN to the code length in words. Don't output anything. + PLEN == 0: Output instructions. */ + +const char* +avr_out_insv (rtx_insn *insn, rtx xop[], int *plen) +{ + machine_mode mode = GET_MODE (xop[0]); + int n_bytes = GET_MODE_SIZE (mode); + rtx xsrc = SET_SRC (single_set (insn)); + + gcc_assert (AND == GET_CODE (xsrc)); + + rtx xop2 = xop[2]; + rtx xop3 = xop[3]; + +
[patch,avr,applied] Add two RTL peepholes.
Register alloc may expand a 3-operand arithmetic X = Y o CST as X = CST X o= Y where it may be better to instead: X = Y X o= CST Johann -- AVR: Add two RTL peepholes. Register alloc may expand a 3-operand arithmetic X = Y o CST as X = CST X o= Y where it may be better to instead: X = Y X o= CST because 1) the first insn may use MOVW for "X = Y", and 2) the operation may be more efficient when performed with a constant, for example when ADIW or SBIW can be used, or some bytes of the constant are 0x00 or 0xff. gcc/ * config/avr/avr.md: Add two RTL peepholes for PLUS, IOR and AND in HI, PSI, SI that swap operation order from "X = CST, X o= Y" to "X = Y, X o= CST".diff --git a/gcc/config/avr/avr.md b/gcc/config/avr/avr.md index 6bdf4682fab..bc8a59c956c 100644 --- a/gcc/config/avr/avr.md +++ b/gcc/config/avr/avr.md @@ -932,6 +932,55 @@ (define_peephole2 ; movw_r operands[5] = gen_rtx_REG (HImode, REGNO (operands[3])); }) + +;; Register alloc may expand a 3-operand arithmetic X = Y o CST as +;;X = CST +;;X o= Y +;; where it may be better to instead: +;;X = Y +;;X o= CST +;; because 1) the first insn may use MOVW for "X = Y", and 2) the +;; operation may be more efficient when performed with a constant, +;; for example when ADIW or SBIW can be used, or some bytes of +;; the constant are 0x00 or 0xff. +(define_peephole2 + [(parallel [(set (match_operand:HISI 0 "d_register_operand") + (match_operand:HISI 1 "const_int_operand")) + (clobber (reg:CC REG_CC))]) + (parallel [(set (match_dup 0) + (piaop:HISI (match_dup 0) + (match_operand:HISI 2 "register_operand"))) + (clobber (scratch:QI)) + (clobber (reg:CC REG_CC))])] + "! reg_overlap_mentioned_p (operands[0], operands[2])" + [(parallel [(set (match_dup 0) + (match_dup 2)) + (clobber (reg:CC REG_CC))]) + (parallel [(set (match_dup 0) + (piaop:HISI (match_dup 0) + (match_dup 1))) + (clobber (scratch:QI)) + (clobber (reg:CC REG_CC))])]) + +;; Same, but just for plus:HI without a scratch:QI. +(define_peephole2 + [(parallel [(set (match_operand:HI 0 "d_register_operand") + (match_operand:HI 1 "const_int_operand")) + (clobber (reg:CC REG_CC))]) + (parallel [(set (match_dup 0) + (plus:HI (match_dup 0) +(match_operand:HI 2 "register_operand"))) + (clobber (reg:CC REG_CC))])] + "! reg_overlap_mentioned_p (operands[0], operands[2])" + [(parallel [(set (match_dup 0) + (match_dup 2)) + (clobber (reg:CC REG_CC))]) + (parallel [(set (match_dup 0) + (plus:HI (match_dup 0) +(match_dup 1))) + (clobber (reg:CC REG_CC))])]) + + ;; For LPM loads from AS1 we split ;;R = *Z ;; to @@ -1644,9 +1693,9 @@ (define_insn_and_split "*addhi3_sp" [(set_attr "length" "6") (set_attr "adjust_len" "addto_sp")]) -;; "*addhi3" -;; "*addhq3" "*adduhq3" -;; "*addha3" "*adduha3" +;; "*addhi3_split" +;; "*addhq3_split" "*adduhq3_split" +;; "*addha3_split" "*adduha3_split" (define_insn_and_split "*add3_split" [(set (match_operand:ALL2 0 "register_operand" "=??r,d,!w,d") (plus:ALL2 (match_operand:ALL2 1 "register_operand" "%0,0,0 ,0") @@ -1661,6 +1710,9 @@ (define_insn_and_split "*add3_split" "" [(set_attr "isa" "*,*,adiw,*")]) +;; "*addhi3" +;; "*addhq3" "*adduhq3" +;; "*addha3" "*adduha3" (define_insn "*add3" [(set (match_operand:ALL2 0 "register_operand" "=??r,d,!w,d") (plus:ALL2 (match_operand:ALL2 1 "register_operand" "%0,0,0 ,0") @@ -1732,6 +1784,9 @@ (define_insn_and_split "add3_clobber" (clobber (match_dup 3)) (clobber (reg:CC REG_CC))])]) +;; "*addhi3_clobber" +;; "*addhq3_clobber" "*adduhq3_clobber" +;; "*addha3_clobber" "*adduha3_clobber" (define_insn "*add3_clobber" [(set (match_operand:ALL2 0 "register_operand""=!w,d,r") (plus:ALL2 (match_operand:ALL2 1 "register_operand" "%0,0,0")
[patch,avr.applied] Adjusted rtx costs of plus + zero_extend
Adjusted rtx costs of (plus (zero_extend (...)) reg). Johann -- AVR: Adjust rtx cost of plus + zero_extend. gcc/ * config/avr/avr.cc (avr_rtx_costs_1) [PLUS+ZERO_EXTEND]: Adjust rtx cost. diff --git a/gcc/config/avr/avr.cc b/gcc/config/avr/avr.cc index 36995e05cbe..b87ae6a256d 100644 --- a/gcc/config/avr/avr.cc +++ b/gcc/config/avr/avr.cc @@ -12513,6 +12513,13 @@ avr_rtx_costs_1 (rtx x, machine_mode mode, int outer_code, return true; case PLUS: + if (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND + && REG_P (XEXP (x, 1))) + { + *total = COSTS_N_INSNS (GET_MODE_SIZE (mode) - 1); + return true; + } + switch (mode) { case E_QImode:
[patch,avr,applied] Add an insn combine pattern for offset computation.
Computing uint16_t += 2 * uint8_t can occur when an offset into a 16-bit array is computed. Without this pattern is costs six instructions: A move (1), a zero-extend (1), a shift (2) and an addition (2). With this pattern it costs 4. Johann -- AVR: Add an insn combine pattern for offset computation. Computing uint16_t += 2 * uint8_t can occur when an offset into a 16-bit array is computed. Without this pattern is costs six instructions: A move (1), a zero-extend (1), a shift (2) and an addition (2). With this pattern it costs 4. gcc/ * config/avr/avr.md (*addhi3_zero_extend.ashift1): New pattern. * config/avr/avr.cc (avr_rtx_costs_1) [PLUS]: Compute its cost.diff --git a/gcc/config/avr/avr.cc b/gcc/config/avr/avr.cc index b87ae6a256d..1fa4b557f5d 100644 --- a/gcc/config/avr/avr.cc +++ b/gcc/config/avr/avr.cc @@ -12513,6 +12513,17 @@ avr_rtx_costs_1 (rtx x, machine_mode mode, int outer_code, return true; case PLUS: + // uint16_t += 2 * uint8_t; + if (mode == HImode + && GET_CODE (XEXP (x, 0)) == ASHIFT + && REG_P (XEXP (x, 1)) + && XEXP (XEXP (x, 0), 1) == const1_rtx + && GET_CODE (XEXP (XEXP (x, 0), 0)) == ZERO_EXTEND) + { + *total = COSTS_N_INSNS (4); + return true; + } + if (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND && REG_P (XEXP (x, 1))) { diff --git a/gcc/config/avr/avr.md b/gcc/config/avr/avr.md index bc8a59c956c..52b6cff4a8b 100644 --- a/gcc/config/avr/avr.md +++ b/gcc/config/avr/avr.md @@ -1630,6 +1630,39 @@ (define_insn "*addhi3_zero_extend.const" "subi %A0,%n2\;sbc %B0,%B0" [(set_attr "length" "2")]) + +;; Occurs when computing offsets into 16-bit arrays. +;; Saves up to 2 instructions. +(define_insn_and_split "*addhi3_zero_extend.ashift1.split" + [(set (match_operand:HI 0 "register_operand""=r") +(plus:HI (ashift:HI (zero_extend:HI (match_operand:QI 1 "register_operand" "r")) +(const_int 1)) + (match_operand:HI 2 "register_operand""0")))] + "" + "#" + "&& reload_completed" + [(parallel [(set (match_dup 0) + (plus:HI (ashift:HI (zero_extend:HI (match_dup 1)) + (const_int 1)) +(match_dup 2))) + (clobber (reg:CC REG_CC))])]) + +(define_insn "*addhi3_zero_extend.ashift1" + [(set (match_operand:HI 0 "register_operand""=r") +(plus:HI (ashift:HI (zero_extend:HI (match_operand:QI 1 "register_operand" "r")) +(const_int 1)) + (match_operand:HI 2 "register_operand""0"))) + (clobber (reg:CC REG_CC))] + "reload_completed" + { +return reg_overlap_mentioned_p (operands[1], operands[0]) + ? "mov __tmp_reg__,%1\;add %A0,__tmp_reg__\;adc %B0,__zero_reg__\;add %A0,__tmp_reg__\;adc %B0,__zero_reg__" + : "add %A0,%1\;adc %B0,__zero_reg__\;add %A0,%1\;adc %B0,__zero_reg__"; + } + [(set (attr "length") +(symbol_ref ("4 + reg_overlap_mentioned_p (operands[1], operands[0])")))]) + + (define_insn_and_split "*usum_widenqihi3_split" [(set (match_operand:HI 0 "register_operand" "=r") (plus:HI (zero_extend:HI (match_operand:QI 1 "register_operand" "0"))
[patch,avr,applied] Add some more cost computation
This adds cost computation for some insn combiner patterns and improves a few other nits. Johann -- AVR: Add cost computation for some insn combine patterns. gcc/ * config/avr/avr.cc (avr_rtx_costs_1) [PLUS]: Determine cost for usum_widenqihi and add_zero_extend1. [MINUS]: Determine costs for udiff_widenqihi, sub+zero_extend, sub+sign_extend. * config/avr/avr.md (*addhi3.sign_extend1, *subhi3.sign_extend2): Compute exact insn lengths. (*usum_widenqihi3): Allow input operands to commute.diff --git a/gcc/config/avr/avr.cc b/gcc/config/avr/avr.cc index 1fa4b557f5d..00fce8da15f 100644 --- a/gcc/config/avr/avr.cc +++ b/gcc/config/avr/avr.cc @@ -12524,10 +12524,25 @@ avr_rtx_costs_1 (rtx x, machine_mode mode, int outer_code, return true; } + // *usum_widenqihi + if (mode == HImode + && GET_CODE (XEXP (x, 0)) == ZERO_EXTEND + && GET_CODE (XEXP (x, 1)) == ZERO_EXTEND) + { + *total = COSTS_N_INSNS (3); + return true; + } + if (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND && REG_P (XEXP (x, 1))) { - *total = COSTS_N_INSNS (GET_MODE_SIZE (mode) - 1); + *total = COSTS_N_INSNS (GET_MODE_SIZE (mode)); + return true; + } + if (REG_P (XEXP (x, 0)) + && GET_CODE (XEXP (x, 1)) == ZERO_EXTEND) + { + *total = COSTS_N_INSNS (GET_MODE_SIZE (mode)); return true; } @@ -12610,6 +12625,29 @@ avr_rtx_costs_1 (rtx x, machine_mode mode, int outer_code, return true; case MINUS: + // *udiff_widenqihi + if (mode == HImode + && GET_CODE (XEXP (x, 0)) == ZERO_EXTEND + && GET_CODE (XEXP (x, 1)) == ZERO_EXTEND) + { + *total = COSTS_N_INSNS (2); + return true; + } + // *sub3_zero_extend1 + if (REG_P (XEXP (x, 0)) + && GET_CODE (XEXP (x, 1)) == ZERO_EXTEND) + { + *total = COSTS_N_INSNS (GET_MODE_SIZE (mode)); + return true; + } + // *sub3.sign_extend2 + if (REG_P (XEXP (x, 0)) + && GET_CODE (XEXP (x, 1)) == SIGN_EXTEND) + { + *total = COSTS_N_INSNS (2 + GET_MODE_SIZE (mode)); + return true; + } + if (AVR_HAVE_MUL && QImode == mode && register_operand (XEXP (x, 0), QImode) diff --git a/gcc/config/avr/avr.md b/gcc/config/avr/avr.md index 52b6cff4a8b..59ec724f7da 100644 --- a/gcc/config/avr/avr.md +++ b/gcc/config/avr/avr.md @@ -1588,12 +1588,10 @@ (define_insn_and_split "*addhi3.sign_extend1_split" "" "#" "&& reload_completed" - [(parallel - [(set (match_dup 0) -(plus:HI - (sign_extend:HI (match_dup 1)) - (match_dup 2))) - (clobber (reg:CC REG_CC))])]) + [(parallel [(set (match_dup 0) + (plus:HI (sign_extend:HI (match_dup 1)) +(match_dup 2))) + (clobber (reg:CC REG_CC))])]) (define_insn "*addhi3.sign_extend1" @@ -1607,7 +1605,8 @@ (define_insn "*addhi3.sign_extend1" ? "mov __tmp_reg__,%1\;add %A0,%1\;adc %B0,__zero_reg__\;sbrc __tmp_reg__,7\;dec %B0" : "add %A0,%1\;adc %B0,__zero_reg__\;sbrc %1,7\;dec %B0"; } - [(set_attr "length" "5")]) + [(set (attr "length") +(symbol_ref ("4 + reg_overlap_mentioned_p (operands[0], operands[1])")))]) (define_insn_and_split "*addhi3_zero_extend.const_split" [(set (match_operand:HI 0 "register_operand" "=d") @@ -1665,7 +1664,7 @@ (define_insn "*addhi3_zero_extend.ashift1" (define_insn_and_split "*usum_widenqihi3_split" [(set (match_operand:HI 0 "register_operand" "=r") -(plus:HI (zero_extend:HI (match_operand:QI 1 "register_operand" "0")) +(plus:HI (zero_extend:HI (match_operand:QI 1 "register_operand" "%0")) (zero_extend:HI (match_operand:QI 2 "register_operand" "r"] "" "#" @@ -1678,7 +1677,7 @@ (define_insn_and_split "*usum_widenqihi3_split" (define_insn "*usum_widenqihi3" [(set (match_operand:HI 0 "register_operand" "=r") -(plus:HI (zero_extend:HI (match_operand:QI 1 "register_operand" "0")) +(plus:HI (zero_extend:HI (match_operand:QI 1 "register_operand" "%0")) (zero_extend:HI (match_operand:QI 2 "register_operand" "r" (clobber (reg:CC REG_CC))] "reload_completed" @@ -2186,7 +2185,8 @@ (define_insn "*subhi3.sign_extend2" ? "mov __tmp_reg__,%2\;sub %A0,%2\;sbc %B0,__zero_reg__\;sbrc __tmp_reg__,7\;inc %B0" : "sub %A0,%2\;sbc %B0,__zero_reg__\;sbrc %2,7\;inc %B0"; } - [(set_attr "length" "5")]) + [(set (attr "length") +(symbol_ref ("4 + reg_overlap_mentioned_p (operands[0], operands[2])")))]) ;; "subsi3" ;; "subsq3" "subusq3"
[patch,avr,applied] Tweak xor insn constraints
xor insn allows some more values without the requirement of a scratch register. This patch adds new constraint alternative for such values. The output function avr_out_bitop already handles these cases, so no change is needed there. Johann -- avr.md - Tweak xor insn constraints. xor insn can handle some more values without the requirement of a scratch register. This patch adds a new constraint alternative for such values. The output function avr_out_bitop already handles these cases, so no change is needed there. gcc/ * config/avr/constraints.md (CX2, CX3, CX4): New constraints. * config/avr/avr-protos.h (avr_xor_noclobber_dconst): New proto. * config/avr/avr.cc (avr_xor_noclobber_dconst): New function. * config/avr/avr.md (xorhi3, *xorhi3): Add "d,0,CX2,X" alternative. (xorpsi3, *xorpsi3): Add "d,0,CX3,X" alternative. (xorsi3, *xorsi3): Add "d,0,CX4,X" alternative.diff --git a/gcc/config/avr/avr-protos.h b/gcc/config/avr/avr-protos.h index bb680312117..dc23cfbf461 100644 --- a/gcc/config/avr/avr-protos.h +++ b/gcc/config/avr/avr-protos.h @@ -101,6 +101,7 @@ extern const char* avr_out_xload (rtx_insn *, rtx*, int*); extern const char* avr_out_cpymem (rtx_insn *, rtx*, int*); extern const char* avr_out_insert_bits (rtx*, int*); extern bool avr_popcount_each_byte (rtx, int, int); +extern bool avr_xor_noclobber_dconst (rtx, int); extern bool avr_has_nibble_0xf (rtx); extern int extra_constraint_Q (rtx x); diff --git a/gcc/config/avr/avr.cc b/gcc/config/avr/avr.cc index 00fce8da15f..12c59668b4c 100644 --- a/gcc/config/avr/avr.cc +++ b/gcc/config/avr/avr.cc @@ -281,6 +281,31 @@ avr_popcount_each_byte (rtx xval, int n_bytes, int pop_mask) } +/* Constraint helper function. XVAL is a CONST_INT. Return true if we + can perform XOR without a clobber reg, provided the operation is on + a d-register. This means each byte is in { 0, 0xff, 0x80 }. */ + +bool +avr_xor_noclobber_dconst (rtx xval, int n_bytes) +{ + machine_mode mode = GET_MODE (xval); + + if (VOIDmode == mode) +mode = SImode; + + for (int i = 0; i < n_bytes; ++i) +{ + rtx xval8 = simplify_gen_subreg (QImode, xval, mode, i); + unsigned int val8 = UINTVAL (xval8) & GET_MODE_MASK (QImode); + + if (val8 != 0 && val8 != 0xff && val8 != 0x80) + return false; +} + + return true; +} + + /* Access some RTX as INT_MODE. If X is a CONST_FIXED we can get the bit representation of X by "casting" it to CONST_INT. */ diff --git a/gcc/config/avr/avr.md b/gcc/config/avr/avr.md index bc408633eb5..97f42be7729 100644 --- a/gcc/config/avr/avr.md +++ b/gcc/config/avr/avr.md @@ -4741,10 +4741,10 @@ (define_insn "*xorqi3" [(set_attr "length" "1")]) (define_insn_and_split "xorhi3" - [(set (match_operand:HI 0 "register_operand" "=??r,r ,r") -(xor:HI (match_operand:HI 1 "register_operand" "%0,0 ,0") -(match_operand:HI 2 "nonmemory_operand" "r,Cx2,n"))) - (clobber (match_scratch:QI 3"=X,X ,&d"))] + [(set (match_operand:HI 0 "register_operand" "=??r,r ,d ,r") +(xor:HI (match_operand:HI 1 "register_operand" "%0,0 ,0 ,0") +(match_operand:HI 2 "nonmemory_operand" "r,Cx2,CX2,n"))) + (clobber (match_scratch:QI 3"=X,X ,X ,&d"))] "" "#" "&& reload_completed" @@ -4755,10 +4755,10 @@ (define_insn_and_split "xorhi3" (clobber (reg:CC REG_CC))])]) (define_insn "*xorhi3" - [(set (match_operand:HI 0 "register_operand" "=??r,r ,r") -(xor:HI (match_operand:HI 1 "register_operand" "%0,0 ,0") -(match_operand:HI 2 "nonmemory_operand" "r,Cx2,n"))) - (clobber (match_scratch:QI 3"=X,X ,&d")) + [(set (match_operand:HI 0 "register_operand" "=??r,r ,d ,r") +(xor:HI (match_operand:HI 1 "register_operand" "%0,0 ,0 ,0") +(match_operand:HI 2 "nonmemory_operand" "r,Cx2,CX2,n"))) + (clobber (match_scratch:QI 3"=X,X ,X ,&d")) (clobber (reg:CC REG_CC))] "reload_completed" { @@ -4767,14 +4767,14 @@ (define_insn "*xorhi3" return avr_out_bitop (insn, operands, NULL); } - [(set_attr "length" "2,2,4") - (set_attr "adjust_len" "*,out_bitop,out_bitop")]) + [(set_attr "length" "2,2,4,4") + (set_attr "adjust_len" "*,out_bitop,out_bitop,out_bitop")]) (define_insn_and_split "xorpsi3" - [(set (match_operand:PSI 0 "register_operand""=??r,r ,r") -(xor:PSI (match_operand:PSI 1 "register_operand" "%0,0 ,0") - (match_operand:PSI 2 "nonmemory_operand" "r,Cx3,n"))) - (clobber (match_scratch:QI 3 "=X,X ,&d"))] + [(set (match_operand:PSI 0 "register_operand""=??r,r ,d ,r") +(xor:PSI (match_operand:PSI 1 "register_operand" "%0,0 ,0 ,0") + (match_operand:PSI 2 "nonmemory_operand" "r,Cx3,CX3,n"))) + (clobber (match_scratch
[patch,avr,applied] Adjust message for SIGNAL and INTERRUPT usage
Applied this patchlet for a more precise diagnostic. Johann -- AVR: Adjust message for SIGNAL and INTERRUPT usage gcc/ * config/avr/avr.cc (avr_set_current_function): Adjust diagnostic for deprecated SIGNAL and INTERRUPT usage without respective header. diff --git a/gcc/config/avr/avr.cc b/gcc/config/avr/avr.cc index 12c59668b4c..4a5a921107b 100644 --- a/gcc/config/avr/avr.cc +++ b/gcc/config/avr/avr.cc @@ -1495,14 +1495,20 @@ avr_set_current_function (tree decl) // Common problem is using "ISR" without first including avr/interrupt.h. const char *name = IDENTIFIER_POINTER (DECL_NAME (decl)); name = default_strip_name_encoding (name); - if (strcmp ("ISR", name) == 0 - || strcmp ("INTERRUPT", name) == 0 - || strcmp ("SIGNAL", name) == 0) + if (strcmp ("ISR", name) == 0) { warning_at (loc, OPT_Wmisspelled_isr, "%qs is a reserved identifier" " in AVR-LibC. Consider %<#include %>" " before using the %qs macro", name, name); } + if (strcmp ("INTERRUPT", name) == 0 + || strcmp ("SIGNAL", name) == 0) +{ + warning_at (loc, OPT_Wmisspelled_isr, "%qs is a deprecated identifier" + " in AVR-LibC. Consider %<#include %>" + " or %<#include %>" + " before using the %qs macro", name, name); +} #endif // AVR-LibC naming conventions /* Don't print the above diagnostics more than once. */
[patch,avr,applied]: Rename %_misc specs.
This renames pecs like cc1_misc to cc1_rodata_in_ram to point out their purpose. Johann -- AVR: Rename device-specs %_misc to %_rodata_in_ram. gcc/ * config/avr/gen-avr-mmcu-specs.cc: Rename spec cc1_misc to cc1_rodata_in_ram. Rename spec link_misc to link_rodata_in_ram. Remove spec asm_misc. * config/avr/specs.h: Same. diff --git a/gcc/config/avr/gen-avr-mmcu-specs.cc b/gcc/config/avr/gen-avr-mmcu-specs.cc index 02778aa3ce8..06d9d3c8d7d 100644 --- a/gcc/config/avr/gen-avr-mmcu-specs.cc +++ b/gcc/config/avr/gen-avr-mmcu-specs.cc @@ -294,7 +294,7 @@ print_mcu (const avr_mcu_t *mcu) : "\t%{mabsdata}"); // -m[no-]rodata-in-ram basically affects linking, but sanity-check early. - fprintf (f, "*cc1_misc:\n\t%%(check_rodata_in_ram)\n\n"); + fprintf (f, "*cc1_rodata_in_ram:\n\t%%(check_rodata_in_ram)\n\n"); // avr-gcc specific specs for assembling / the assembler. @@ -319,8 +319,6 @@ print_mcu (const avr_mcu_t *mcu) ? "\t%{mno-skip-bug}" : "\t%{!mskip-bug: -mno-skip-bug}"); - fprintf (f, "*asm_misc:\n" /* empty */ "\n\n"); - // avr-specific specs for linking / the linker. int wrap_k = @@ -361,7 +359,7 @@ print_mcu (const avr_mcu_t *mcu) } // -m[no-]rodata-in-ram affects linking. Sanity check its usage. - fprintf (f, "*link_misc:\n\t%%(check_rodata_in_ram)\n\n"); + fprintf (f, "*link_rodata_in_ram:\n\t%%(check_rodata_in_ram)\n\n"); // Specs known to GCC. diff --git a/gcc/config/avr/specs.h b/gcc/config/avr/specs.h index 574402035bc..0ccc37b8844 100644 --- a/gcc/config/avr/specs.h +++ b/gcc/config/avr/specs.h @@ -36,7 +36,7 @@ along with GCC; see the file COPYING3. If not see "%(cc1_errata_skip) " \ "%(cc1_rmw) " \ "%(cc1_absdata) " \ - "%(cc1_misc) " + "%(cc1_rodata_in_ram) " #undef CC1PLUS_SPEC #define CC1PLUS_SPEC\ @@ -54,8 +54,7 @@ along with GCC; see the file COPYING3. If not see "%(asm_relax) " \ "%(asm_rmw) " \ "%(asm_gccisr) " \ - "%(asm_errata_skip) " \ - "%(asm_misc) " + "%(asm_errata_skip) " #define LINK_RELAX_SPEC \ "%{mrelax:--relax} " @@ -67,7 +66,7 @@ along with GCC; see the file COPYING3. If not see "%(link_text_start) " \ "%(link_relax) " \ "%(link_pmem_wrap) " \ - "%(link_misc) " \ + "%(link_rodata_in_ram) " \ "%{shared:%eshared is not supported} " #undef LIB_SPEC
[patch, avr, applied] Specs always define __AVR_PM_BASE_ADDRESS__ when the core has it
This defines the spec always when the core has it, not only override it when it differs from the core's value. Johann -- AVR: Always define __AVR_PM_BASE_ADDRESS__ in specs provided the core has it. gcc/ * config/avr/gen-avr-mmcu-specs.cc (print_mcu) <*cpp_mcu>: Spec always defines __AVR_PM_BASE_ADDRESS__ if the core has it. diff --git a/gcc/config/avr/gen-avr-mmcu-specs.cc b/gcc/config/avr/gen-avr-mmcu-specs.cc index 06d9d3c8d7d..41ebfa82eb5 100644 --- a/gcc/config/avr/gen-avr-mmcu-specs.cc +++ b/gcc/config/avr/gen-avr-mmcu-specs.cc @@ -199,13 +199,21 @@ print_mcu (const avr_mcu_t *mcu) bool flmap = (mcu->dev_attribute & AVR_ISA_FLMAP); bool is_arch = mcu->macro == NULL; bool is_device = ! is_arch; - int flash_pm_offset = 0; + int rodata_pm_offset = 0; + int pm_base_address = 0; if (arch->flash_pm_offset && mcu->flash_pm_offset && mcu->flash_pm_offset != arch->flash_pm_offset) { - flash_pm_offset = mcu->flash_pm_offset; + rodata_pm_offset = mcu->flash_pm_offset; +} + + if (arch->flash_pm_offset) +{ + pm_base_address = mcu->flash_pm_offset + ? mcu->flash_pm_offset + : arch->flash_pm_offset; } if (is_arch @@ -339,8 +347,8 @@ print_mcu (const avr_mcu_t *mcu) fprintf (f, "*link_arch:\n\t%s", link_arch_spec); if (is_device - && flash_pm_offset) -fprintf (f, " --defsym=__RODATA_PM_OFFSET__=0x%x", flash_pm_offset); + && rodata_pm_offset) +fprintf (f, " --defsym=__RODATA_PM_OFFSET__=0x%x", rodata_pm_offset); fprintf (f, "\n\n"); if (is_device) @@ -381,10 +389,10 @@ print_mcu (const avr_mcu_t *mcu) fprintf (f, "*cpp_mcu:\n"); fprintf (f, "\t-D%s", mcu->macro); - if (flash_pm_offset) + if (pm_base_address) { fprintf (f, " -U__AVR_PM_BASE_ADDRESS__"); - fprintf (f, " -D__AVR_PM_BASE_ADDRESS__=0x%x", flash_pm_offset); + fprintf (f, " -D__AVR_PM_BASE_ADDRESS__=0x%x", pm_base_address); } if (have_flmap) fprintf (f, " -D__AVR_HAVE_FLMAP__");
[patch,avr,applied] PR113824: Fix multilib set for ATA5795
This device was in the wrong multilib set. Johann -- AVR: target/113824 - Fix multilib set for ATA5795. gcc/ PR target/113824 * config/avr/avr-mcus.def (ata5797): Move from avr5 to avr4. * doc/avr-mmcu.texi: Rebuild.diff --git a/gcc/config/avr/avr-mcus.def b/gcc/config/avr/avr-mcus.def index 7ddfba0a13c..27812d441f7 100644 --- a/gcc/config/avr/avr-mcus.def +++ b/gcc/config/avr/avr-mcus.def @@ -138,9 +138,10 @@ AVR_MCU ("attiny167",ARCH_AVR35, AVR_ISA_NONE, "__AVR_ATtiny167__", AVR_MCU ("attiny1634", ARCH_AVR35, AVR_ISA_NONE, "__AVR_ATtiny1634__", 0x0100, 0x0, 0x4000, 0) /* Enhanced, <= 8K. */ AVR_MCU ("avr4", ARCH_AVR4, AVR_ISA_NONE, NULL, 0x0060, 0x0, 0x2000, 0) +AVR_MCU ("ata5795", ARCH_AVR4, AVR_ISA_NONE, "__AVR_ATA5795__", 0x0100, 0x0, 0x2000, 0) AVR_MCU ("ata6285", ARCH_AVR4, AVR_ISA_NONE, "__AVR_ATA6285__", 0x0100, 0x0, 0x2000, 0) AVR_MCU ("ata6286", ARCH_AVR4, AVR_ISA_NONE, "__AVR_ATA6286__", 0x0100, 0x0, 0x2000, 0) -AVR_MCU ("ata6289", ARCH_AVR4, AVR_ISA_NONE, "__AVR_ATA6289__", 0x0100, 0x0, 0x2000, 0) +AVR_MCU ("ata6289", ARCH_AVR4, AVR_ISA_NONE, "__AVR_ATA6289__", 0x0100, 0x0, 0x2000, 0) AVR_MCU ("ata6612c", ARCH_AVR4, AVR_ISA_NONE, "__AVR_ATA6612C__", 0x0100, 0x0, 0x2000, 0) AVR_MCU ("atmega8", ARCH_AVR4, AVR_ISA_NONE, "__AVR_ATmega8__", 0x0060, 0x0, 0x2000, 0) AVR_MCU ("atmega8a", ARCH_AVR4, AVR_ISA_NONE, "__AVR_ATmega8A__", 0x0060, 0x0, 0x2000, 0) @@ -172,7 +173,6 @@ AVR_MCU ("ata5787", ARCH_AVR5, AVR_ISA_NONE, "__AVR_ATA5787__", AVR_MCU ("ata5790", ARCH_AVR5, AVR_ISA_NONE, "__AVR_ATA5790__", 0x0100, 0x0, 0x4000, 0) AVR_MCU ("ata5790n", ARCH_AVR5, AVR_ISA_NONE, "__AVR_ATA5790N__", 0x0100, 0x0, 0x4000, 0) AVR_MCU ("ata5791", ARCH_AVR5, AVR_ISA_NONE, "__AVR_ATA5791__", 0x0100, 0x0, 0x4000, 0) -AVR_MCU ("ata5795", ARCH_AVR5, AVR_ISA_NONE, "__AVR_ATA5795__", 0x0100, 0x0, 0x2000, 0) AVR_MCU ("ata5831", ARCH_AVR5, AVR_ISA_NONE, "__AVR_ATA5831__", 0x0200, 0x8000, 0xd000, 0) AVR_MCU ("ata5835", ARCH_AVR5, AVR_ISA_NONE, "__AVR_ATA5835__", 0x0200, 0x8000, 0xd200, 0) AVR_MCU ("ata6613c", ARCH_AVR5, AVR_ISA_NONE, "__AVR_ATA6613C__", 0x0100, 0x0, 0x4000, 0) diff --git a/gcc/doc/avr-mmcu.texi b/gcc/doc/avr-mmcu.texi index f38a0e06343..dcbf4ef7247 100644 --- a/gcc/doc/avr-mmcu.texi +++ b/gcc/doc/avr-mmcu.texi @@ -34,11 +34,11 @@ @item @anchor{avr4}avr4 ``Enhanced'' devices with up to 8@tie{}KiB of program memory. -@*@var{mcu}@tie{}= @code{atmega48}, @code{atmega48a}, @code{atmega48p}, @code{atmega48pa}, @code{atmega48pb}, @code{atmega8}, @code{atmega8a}, @code{atmega8hva}, @code{atmega88}, @code{atmega88a}, @code{atmega88p}, @code{atmega88pa}, @code{atmega88pb}, @code{atmega8515}, @code{atmega8535}, @code{ata6285}, @code{ata6286}, @code{ata6289}, @code{ata6612c}, @code{at90pwm1}, @code{at90pwm2}, @code{at90pwm2b}, @code{at90pwm3}, @code{at90pwm3b}, @code{at90pwm81}. +@*@var{mcu}@tie{}= @code{atmega48}, @code{atmega48a}, @code{atmega48p}, @code{atmega48pa}, @code{atmega48pb}, @code{atmega8}, @code{atmega8a}, @code{atmega8hva}, @code{atmega88}, @code{atmega88a}, @code{atmega88p}, @code{atmega88pa}, @code{atmega88pb}, @code{atmega8515}, @code{atmega8535}, @code{ata5795}, @code{ata6285}, @code{ata6286}, @code{ata6289}, @code{ata6612c}, @code{at90pwm1}, @code{at90pwm2}, @code{at90pwm2b}, @code{at90pwm3}, @code{at90pwm3b}, @code{at90pwm81}. @item @anchor{avr5}avr5 ``Enhanced'' devices with 16@tie{}KiB up to 64@tie{}KiB of program memory. -@*@var{mcu}@tie{}= @code{atmega16}, @code{atmega16a}, @code{atmega16hva}, @code{atmega16hva2}, @code{atmega16hvb}, @code{atmega16hvbrevb}, @code{atmega16m1}, @code{atmega16u4}, @code{atmega161}, @code{atmega162}, @code{atmega163}, @code{atmega164a}, @code{atmega164p}, @code{atmega164pa}, @code{atmega165}, @code{atmega165a}, @code{atmega165p}, @code{atmega165pa}, @code{atmega168}, @code{atmega168a}, @code{atmega168p}, @code{atmega168pa}, @code{atmega168pb}, @code{atmega169}, @code{atmega169a}, @code{atmega169p}, @code{atmega169pa}, @code{atmega32}, @code{atmega32a}, @code{atmega32c1}, @code{atmega32hvb}, @code{atmega32hvbrevb}, @code{atmega32m1}, @code{atmega32u4}, @code{atmega32u6}, @code{atmega323}, @code{atmega324a}, @code{atmega324p}, @code{atmega324pa}, @code{atmega324pb}, @code{atmega325}, @code{atmega325a}, @code{atmega325p}, @code{atmega325pa}, @code{atmega328}, @code{atmega328p}, @code{atmega328pb}, @code{atmega329}, @code{atmega329a}, @code{atmega329p}, @code{atmega329pa}, @code{atmega3250}, @code{atmega3250a}, @code{atmega3250p}, @code{atmega3250pa}, @code{atmega3290}, @code{atmega3290a}, @code{atmega3290p}, @code{atmega3290pa}, @code{atmega406}, @code{atmega64}, @code{atmega64a}, @co
[patch,avr,applied] Tidy up gen-avr-mmcu-specs.cc
This patchlet tidies up gen-avr-mmcu-specs.cc. Some information was computed more than once, in different functions. The patch uses a new struct to pass around information. Johann AVR: Tidy up gen-avr-mmcu-specs.cc gcc/ * config/avr/gen-avr-mmcu-specs.cc (struct McuInfo): New. (main, print_mcu, diagnose_mrodata_in_ram): Pass it down. --diff --git a/gcc/config/avr/gen-avr-mmcu-specs.cc b/gcc/config/avr/gen-avr-mmcu-specs.cc index 41ebfa82eb5..ea69145d404 100644 --- a/gcc/config/avr/gen-avr-mmcu-specs.cc +++ b/gcc/config/avr/gen-avr-mmcu-specs.cc @@ -129,62 +129,70 @@ static const bool have_avrxmega3_rodata_in_flash = false; #endif -static void -diagnose_mrodata_in_ram (FILE *f, const char *spec, const avr_mcu_t *mcu) +struct McuInfo { - enum avr_arch_id arch_id = mcu->arch_id; - const avr_arch_t *arch = &avr_arch_types[arch_id]; - const bool is_arch = mcu->macro == NULL; - const bool flmap = (mcu->dev_attribute & AVR_ISA_FLMAP); - const bool have_flmap2 = have_avrxmega2_flmap && arch_id == ARCH_AVRXMEGA2; - const bool have_flmap4 = have_avrxmega4_flmap && arch_id == ARCH_AVRXMEGA4; - const bool have_flmap = flmap && (have_flmap2 || have_flmap4); - - const bool rodata_in_flash = (arch_id == ARCH_AVRTINY -|| (arch_id == ARCH_AVRXMEGA3 -&& have_avrxmega3_rodata_in_flash)); + enum avr_arch_id arch_id; + const avr_arch_t *arch; + bool is_arch, is_device; + bool flmap, have_flmap2, have_flmap4, have_flmap; + bool rodata_in_flash; // Device name as used by the vendor, extracted from "__AVR___". char mcu_Name[50] = { 0 }; - if (! is_arch) -snprintf (mcu_Name, 1 + strlen (mcu->macro) - strlen ("__AVR___"), - "%s", mcu->macro + strlen ("__AVR_")); + McuInfo (const avr_mcu_t *mcu) +: arch_id (mcu->arch_id), arch (& avr_arch_types[arch_id]), + is_arch (mcu->macro == NULL), is_device (! is_arch), + flmap (mcu->dev_attribute & AVR_ISA_FLMAP), + have_flmap2 (have_avrxmega2_flmap && arch_id == ARCH_AVRXMEGA2), + have_flmap4 (have_avrxmega4_flmap && arch_id == ARCH_AVRXMEGA4), + have_flmap (flmap && (have_flmap2 || have_flmap4)), + rodata_in_flash (arch_id == ARCH_AVRTINY + || (arch_id == ARCH_AVRXMEGA3 + && have_avrxmega3_rodata_in_flash)) + { +if (is_device) + snprintf (mcu_Name, 1 + strlen (mcu->macro) - strlen ("__AVR_" "__"), + "%s", mcu->macro + strlen ("__AVR_")); + } +}; + + +static void +diagnose_mrodata_in_ram (FILE *f, const char *spec, const avr_mcu_t *mcu, + const McuInfo &mi) +{ fprintf (f, "%s:\n", spec); - if (rodata_in_flash && is_arch) + if (mi.rodata_in_flash && mi.is_arch) fprintf (f, "\t%%{mrodata-in-ram: %%e-mrodata-in-ram is not supported" " for %s}", mcu->name); - else if (rodata_in_flash) + else if (mi.rodata_in_flash) fprintf (f, "\t%%{mrodata-in-ram: %%e-mrodata-in-ram is not supported" - " for %s (arch=%s)}", mcu_Name, arch->name); - else if (is_arch) + " for %s (arch=%s)}", mi.mcu_Name, mi.arch->name); + else if (mi.is_arch) { - if (! have_flmap2 && ! have_flmap4) + if (! mi.have_flmap2 && ! mi.have_flmap4) fprintf (f, "\t%%{mno-rodata-in-ram: %%e-mno-rodata-in-ram is not" " supported for %s}", mcu->name); } - else if (! have_flmap) + else if (! mi.have_flmap) fprintf (f, "\t%%{mno-rodata-in-ram: %%e-mno-rodata-in-ram is not supported" - " for %s (arch=%s)}", mcu_Name, arch->name); + " for %s (arch=%s)}", mi.mcu_Name, mi.arch->name); fprintf (f, "\n\n"); } static void -print_mcu (const avr_mcu_t *mcu) +print_mcu (const avr_mcu_t *mcu, const McuInfo &mi) { const char *sp8_spec; const char *rcall_spec; const avr_mcu_t *arch_mcu; - const avr_arch_t *arch; - enum avr_arch_id arch_id = mcu->arch_id; for (arch_mcu = mcu; arch_mcu->macro; ) arch_mcu--; - if (arch_mcu->arch_id != arch_id) + if (arch_mcu->arch_id != mi.arch_id) exit (EXIT_FAILURE); - arch = &avr_arch_types[arch_id]; - char name[100]; if (snprintf (name, sizeof name, "specs-%s", mcu->name) >= (int) sizeof name) exit (EXIT_FAILURE); @@ -196,29 +204,26 @@ print_mcu (const avr_mcu_t *mcu) bool rmw = (mcu->dev_attribute & AVR_ISA_RMW) != 0; bool sp8 = (mcu->dev_attribute & AVR_SHORT_SP) != 0; bool rcall = (mcu->dev_attribute & AVR_ISA_RCALL); - bool flmap = (mcu->dev_attribute & AVR_ISA_FLMAP); - bool is_arch = mcu->macro == NULL; - bool is_device = ! is_arch; int rodata_pm_offset = 0; int pm_base_address = 0; - if (arch->flash_pm_offset + if (mi.arch->flash_pm_offset && mcu->flash_pm_offset - && mcu->flash_pm_offset != arch->flash_pm_offset) + && mcu->flash_pm_offset != mi.arch->flash_pm_offset) { rodata_pm_offset = mcu->flash_pm_offset; } - if (arch->flash_pm_offset) + if (mi.arch->flash_pm_offset) { pm_base_address = mcu->flash_pm_offset ? mcu->flash_pm_offset - : arch->flash_pm_offset; + : mi.arch->flash_pm
[patch,avr,applied] Addendum to target/112944: Initialize FLMAP as needed
This code will link against parts of the startup code from AVR-LibC when it is needed to init bit-field FLMAP. Johann -- AVR: target/112944 - Addendum: Link code to initialize NVMCTRL_CTRLB.FLMAP For devices that see a part for the flash memory in the RAM address space, bit-field NVMCTRL_CTRLB.FLMAP must match the value of symbol __flmap. This is achieved by dragging in startup code from lib.a. The mechanism is the same like for libgcc's __do_copy_data and __do_clear_bss. The code is implemented in AVR-LibC #931 and can be dragged by referencing __do_flmap_init. In addition to setting FLMAP, that code also sets bit FLMAPLOCK provided symbol __flmap_lock has a non-zero value. This protects FLMAP from future changes. When the __do_flmap_init code is not wanted, the symbol can be satisfied by linking with -Wl,--defsym,__do_flmap_init=0 gcc/ PR target/112944 * config/avr/gen-avr-mmcu-specs.cc (print_mcu) [have_flmap]: <*link_rodata_in_ram>: Spec undefs symbol __do_flmap_init when not linked with -mrodata-in-ram. diff --git a/gcc/config/avr/gen-avr-mmcu-specs.cc b/gcc/config/avr/gen-avr-mmcu-specs.cc index ea69145d404..bb94bea12b0 100644 --- a/gcc/config/avr/gen-avr-mmcu-specs.cc +++ b/gcc/config/avr/gen-avr-mmcu-specs.cc @@ -369,7 +369,10 @@ print_mcu (const avr_mcu_t *mcu, const McuInfo &mi) } // -m[no-]rodata-in-ram affects linking. Sanity check its usage. - fprintf (f, "*link_rodata_in_ram:\n\t%%(check_rodata_in_ram)\n\n"); + fprintf (f, "*link_rodata_in_ram:\n\t%%(check_rodata_in_ram)"); + if (mi.is_device && mi.have_flmap) +fprintf (f, " %%{!mrodata-in-ram:-u __do_flmap_init}"); + fprintf (f, "\n\n"); // Specs known to GCC.
Re: [pushed] wwwdocs: gcc-14: Fix typo in AVR section
Am 14.02.24 um 01:40 schrieb Gerald Pfeifer: Note that is not part of current HTML standards; can we simply remove it? Hi Gerald, thanks for looking into this. The is not strictly needed, I just has the case that "-Wl,--defsym,__RODATA_FLASH_START__=32k" had a line-break in it. In addition, I believe it might be good to rephrase that sentence. Do you mean "the linker will not pull in that code from ... any more"? Yes. When the symbol is satisfied by --defsym, then the code is not dragged or pulled from that static lib. I am not a native speaker, and it is great when you fix any awkward formulations or grammar. Thanks a lot Johann Gerald --- htdocs/gcc-14/changes.html | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/htdocs/gcc-14/changes.html b/htdocs/gcc-14/changes.html index 6ac7c8b1..92bd0a7b 100644 --- a/htdocs/gcc-14/changes.html +++ b/htdocs/gcc-14/changes.html @@ -370,7 +370,7 @@ a work-in-progress. precedence over __flmap. For example, linking with -Wl,--defsym,__RODATA_FLASH_START__=32k - choses the second 32 KiB block. + chooses the second 32 KiB block. The default uses the last 32 KiB block, which is also the hardware default for bit-field NVMCTRL_CTRLB.FLMAP. When a non-default block is used,
[patch,avr,applied] Fix PR target/113927: Simple code triggers a stack frame
Applied this patch Johann -- AVR: target 113927 - Simple code triggers stack frame for Reduced Tiny. The -mmcu=avrtiny cores have no ADIW and SBIW instructions. This was implemented by clearing all regs out of regclass ADDW_REGS so that constraint "w" never matched. This corrupted the subset relations of the register classes as they appear in enum reg_class. This patch keeps ADDW_REGS like for all other cores, i.e. it contains R24...R31. Instead of tests like test_hard_reg_class (ADDW_REGS, *) the code now uses avr_adiw_reg_p (*). And all insns with constraint "w" get "isa" insn attribute value of "adiw". Plus, a new built-in macro __AVR_HAVE_ADIW__ is provided, which is more specific than __AVR_TINY__. gcc/ PR target/113927 * config/avr/avr.h (AVR_HAVE_ADIW): New macro. * config/avr/avr-protos.h (avr_adiw_reg_p): New proto. * config/avr/avr.cc (avr_adiw_reg_p): New function. (avr_conditional_register_usage) [AVR_TINY]: Don't clear ADDW_REGS. Replace test_hard_reg_class (ADDW_REGS, ...) with calls to * config/avr/avr.md: Same. (attr "isa") : Remove. : Add. (define_insn, define_insn_and_split): When an alternative has constraint "w", then set attribute "isa" to "adiw". * config/avr/avr-c.cc (avr_cpu_cpp_builtins) [AVR_HAVE_ADIW]: Built-in define __AVR_HAVE_ADIW__. * doc/invoke.texi (AVR Options): Document it. diff --git a/gcc/config/avr/avr-c.cc b/gcc/config/avr/avr-c.cc index 60905a76556..5e7f759ed73 100644 --- a/gcc/config/avr/avr-c.cc +++ b/gcc/config/avr/avr-c.cc @@ -307,6 +307,7 @@ avr_cpu_cpp_builtins (struct cpp_reader *pfile) if (AVR_HAVE_ELPMX)cpp_define (pfile, "__AVR_HAVE_ELPMX__"); if (AVR_HAVE_MOVW) cpp_define (pfile, "__AVR_HAVE_MOVW__"); if (AVR_HAVE_LPMX) cpp_define (pfile, "__AVR_HAVE_LPMX__"); + if (AVR_HAVE_ADIW) cpp_define (pfile, "__AVR_HAVE_ADIW__"); if (avr_arch->asm_only) cpp_define (pfile, "__AVR_ASM_ONLY__"); diff --git a/gcc/config/avr/avr-protos.h b/gcc/config/avr/avr-protos.h index 46b75f96b9c..7d1f815c664 100644 --- a/gcc/config/avr/avr-protos.h +++ b/gcc/config/avr/avr-protos.h @@ -123,6 +123,7 @@ extern enum reg_class avr_mode_code_base_reg_class (machine_mode, addr_space_t, extern bool avr_regno_mode_code_ok_for_base_p (int, machine_mode, addr_space_t, RTX_CODE, RTX_CODE); extern rtx avr_incoming_return_addr_rtx (void); extern rtx avr_legitimize_reload_address (rtx*, machine_mode, int, int, int, int, rtx (*)(rtx,int)); +extern bool avr_adiw_reg_p (rtx); extern bool avr_mem_flash_p (rtx); extern bool avr_mem_memx_p (rtx); extern bool avr_load_libgcc_p (rtx); diff --git a/gcc/config/avr/avr.cc b/gcc/config/avr/avr.cc index d21b286ed8b..4a55f14bff7 100644 --- a/gcc/config/avr/avr.cc +++ b/gcc/config/avr/avr.cc @@ -292,6 +292,17 @@ avr_to_int_mode (rtx x) : simplify_gen_subreg (int_mode_for_mode (mode).require (), x, mode, 0); } + +/* Return true if hard register REG supports the ADIW and SBIW instructions. */ + +bool +avr_adiw_reg_p (rtx reg) +{ + return (AVR_HAVE_ADIW + && test_hard_reg_class (ADDW_REGS, reg)); +} + + namespace { static const pass_data avr_pass_data_recompute_notes = @@ -6272,7 +6283,7 @@ avr_out_compare (rtx_insn *insn, rtx *xop, int *plen) /* Word registers >= R24 can use SBIW/ADIW with 0..63. */ if (i == 0 - && test_hard_reg_class (ADDW_REGS, reg8)) + && avr_adiw_reg_p (reg8)) { int val16 = trunc_int_for_mode (INTVAL (xval), HImode); @@ -8186,7 +8197,7 @@ avr_out_plus_1 (rtx *xop, int *plen, enum rtx_code code, int *pcc, if (!started && i % 2 == 0 && i + 2 <= n_bytes - && test_hard_reg_class (ADDW_REGS, reg8)) + && avr_adiw_reg_p (reg8)) { rtx xval16 = simplify_gen_subreg (HImode, xval, imode, i); unsigned int val16 = UINTVAL (xval16) & GET_MODE_MASK (HImode); @@ -8678,7 +8689,7 @@ avr_out_plus_set_ZN (rtx *xop, int *plen) } if (n_bytes == 2 - && test_hard_reg_class (ADDW_REGS, xreg) + && avr_adiw_reg_p (xreg) && IN_RANGE (INTVAL (xval), 1, 63)) { // Add 16-bit value in [1..63] to a w register. @@ -8705,7 +8716,7 @@ avr_out_plus_set_ZN (rtx *xop, int *plen) if (i == 0 && n_bytes >= 2 - && test_hard_reg_class (ADDW_REGS, op[0])) + && avr_adiw_reg_p (op[0])) { op[1] = simplify_gen_subreg (HImode, xval, mode, 0); if (IN_RANGE (INTVAL (op[1]), 0, 63)) @@ -13312,7 +13323,6 @@ avr_conditional_register_usage (void) reg_alloc_order[i] = tiny_reg_alloc_order[i]; } - CLEAR_HARD_REG_SET (reg_class_contents[(int) ADDW_REGS]); CLEAR_HARD_REG_SET (reg_class_contents[(int) NO_LD_REGS]); } } @@ -14043,7 +14053,7 @@ avr_out_cpymem (rtx_insn *insn ATTRIBUTE_UNUSED, rtx *op, int *plen) { addr_space_t as = (addr_space_t) INTVAL (op[0]); machine_mode loop_mode = GET_MODE (op[1]); - bool sbiw_p = test_hard_reg_class (ADDW_REGS, op[1]); + bool
[patch,avr,applied] Minor improvements to option and attribute documentation.
Applied this patch. Johann -- AVR: Improve documentation for -mmcu=. gcc/ * doc/invoke.texi (AVR Options) <-mmcu>: Remove "Atmel". Note on complete device support. AVR: Add examples for ISR macro to interrupt attribute doc. gcc/ * doc/extend.texi (AVR Function Attributes): Fuse description of "signal" and "interrupt" attribute. Link pseudo instruction. diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi index 2b8ba1949bf..e048404dffe 100644 --- a/gcc/doc/extend.texi +++ b/gcc/doc/extend.texi @@ -5060,20 +5060,47 @@ without modifying an existing @option{-march=} or @option{-mcpu} option. These function attributes are supported by the AVR back end: @table @code +@cindex @code{signal} function attribute, AVR @cindex @code{interrupt} function attribute, AVR -@item interrupt -Use this attribute to indicate -that the specified function is an interrupt handler. The compiler generates +@item signal +@itemx interrupt +The function is an interrupt service routine (ISR). The compiler generates function entry and exit sequences suitable for use in an interrupt handler -when this attribute is present. +when one of the attributes is present. + +The AVR hardware globally disables interrupts when an interrupt is executed. + +@itemize @bullet +@item ISRs with the @code{signal} attribute do not re-enable interrupts. +It is save to enable interrupts in a @code{signal} handler. +This ``save'' only applies to the code +generated by the compiler and not to the IRQ layout of the +application which is responsibility of the application. + +@item ISRs with the @code{interrupt} attribute re-enable interrupts. +The first instruction of the routine is a @code{SEI} instruction to +globally enable interrupts. +@end itemize + +The recommended way to use these attributes is by means of the +@code{ISR} macro provided by @code{avr/interrupt.h} from +@w{@uref{https://www.nongnu.org/avr-libc/user-manual/group__avr__interrupts.html,,AVR-LibC}}: +@example +#include -On the AVR, the hardware globally disables interrupts when an -interrupt is executed. The first instruction of an interrupt handler -declared with this attribute is a @code{SEI} instruction to -re-enable interrupts. See also the @code{signal} function attribute -that does not insert a @code{SEI} instruction. If both @code{signal} and -@code{interrupt} are specified for the same function, @code{signal} -is silently ignored. +ISR (INT0_vect) // Uses the "signal" attribute. +@{ +// Code +@} + +ISR (ADC_vect, ISR_NOBLOCK) // Uses the "interrupt" attribute. +@{ +// Code +@} +@end example + +When both @code{signal} and @code{interrupt} are specified for the same +function, then @code{signal} is silently ignored. @cindex @code{naked} function attribute, AVR @item naked @@ -5088,7 +5115,9 @@ depended upon to work reliably and are not supported. @cindex @code{no_gccisr} function attribute, AVR @item no_gccisr -Do not use @code{__gcc_isr} pseudo instructions in a function with +Do not use the @code{__gcc_isr} +@uref{https://sourceware.org/binutils/docs/as/AVR-Pseudo-Instructions.html,pseudo instruction} +in a function with the @code{interrupt} or @code{signal} attribute aka. interrupt service routine (ISR). Use this attribute if the preamble of the ISR prologue should always read @@ -5141,24 +5170,6 @@ or a frame pointer whereas @code{OS_main} and @code{OS_task} do this as needed. @end itemize -@cindex @code{signal} function attribute, AVR -@item signal -Use this attribute on the AVR to indicate that the specified -function is an interrupt handler. The compiler generates function -entry and exit sequences suitable for use in an interrupt handler when this -attribute is present. - -See also the @code{interrupt} function attribute. - -The AVR hardware globally disables interrupts when an interrupt is executed. -Interrupt handler functions defined with the @code{signal} attribute -do not re-enable interrupts. It is save to enable interrupts in a -@code{signal} handler. This ``save'' only applies to the code -generated by the compiler and not to the IRQ layout of the -application which is responsibility of the application. - -If both @code{signal} and @code{interrupt} are specified for the same -function, @code{signal} is silently ignored. @end table @node Blackfin Function Attributes diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi index d0e67729f56..e18886e0ac7 100644 --- a/gcc/doc/invoke.texi +++ b/gcc/doc/invoke.texi @@ -23732,12 +23732,22 @@ These options are defined for AVR implementations: @table @gcctabopt @opindex mmcu @item -mmcu=@var{mcu} -Specify Atmel AVR instruction set architectures (ISA) or MCU type. - -The default for this option is@tie{}@samp{avr2}. - -GCC supports the following AVR devices and ISAs: - +Specify the AVR instruction set architecture (ISA) or device type. +The default for this option is@tie{}@code{avr2}. + +The following AVR devices and ISAs are supported. +@emph{Note
[patch,avr,applied] Use @defbuiltin to document built-ins.
This patch uses @defbuiltin to document built-in functions so that the functions are listed in the index. Previously, @table @code was used. Johann -- AVR: extend.texi - Use @defbuiltin to document built-ins. gcc/ * doc/extend.texi (AVR Built-in Functions): Use @defbuiltin instead of @table.diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi index e048404dffe..b2383b55666 100644 --- a/gcc/doc/extend.texi +++ b/gcc/doc/extend.texi @@ -16782,37 +16782,41 @@ or if not a specific built-in is implemented or not. For example, if @code{__builtin_avr_nop} is available the macro @code{__BUILTIN_AVR_NOP} is defined to @code{1} and undefined otherwise. -@table @code +@defbuiltin{void __builtin_avr_nop (void)} +@defbuiltinx{void __builtin_avr_nop (void)} +@defbuiltinx{void __builtin_avr_sei (void)} +@defbuiltinx{void __builtin_avr_cli (void)} +@defbuiltinx{void __builtin_avr_sleep (void)} +@defbuiltinx{void __builtin_avr_wdr (void)} +@defbuiltinx{{unsigned char} __builtin_avr_swap (unsigned char)} +@defbuiltinx{{unsigned int} __builtin_avr_fmul (unsigned char, unsigned char)} +@defbuiltinx{int __builtin_avr_fmuls (char, char)} +@defbuiltinx{int __builtin_avr_fmulsu (char, unsigned char)} -@item void __builtin_avr_nop (void) -@itemx void __builtin_avr_sei (void) -@itemx void __builtin_avr_cli (void) -@itemx void __builtin_avr_sleep (void) -@itemx void __builtin_avr_wdr (void) -@itemx unsigned char __builtin_avr_swap (unsigned char) -@itemx unsigned int __builtin_avr_fmul (unsigned char, unsigned char) -@itemx int __builtin_avr_fmuls (char, char) -@itemx int __builtin_avr_fmulsu (char, unsigned char) These built-in functions map to the respective machine instruction, i.e.@: @code{nop}, @code{sei}, @code{cli}, @code{sleep}, @code{wdr}, @code{swap}, @code{fmul}, @code{fmuls} resp. @code{fmulsu}. The three @code{fmul*} built-ins are implemented as library call if no hardware multiplier is available. -@item void __builtin_avr_delay_cycles (unsigned long ticks) +@enddefbuiltin + +@defbuiltin{void __builtin_avr_delay_cycles (unsigned long @var{ticks})} Delay execution for @var{ticks} cycles. Note that this built-in does not take into account the effect of interrupts that might increase delay time. @var{ticks} must be a compile-time integer constant; delays with a variable number of cycles are not supported. +@enddefbuiltin -@item char __builtin_avr_flash_segment (const __memx void*) +@defbuiltin{char __builtin_avr_flash_segment (const __memx void*)} This built-in takes a byte address to the 24-bit @ref{AVR Named Address Spaces,address space} @code{__memx} and returns the number of the flash segment (the 64 KiB chunk) where the address points to. Counting starts at @code{0}. If the address does not point to flash memory, return @code{-1}. +@enddefbuiltin -@item uint8_t __builtin_avr_insert_bits (uint32_t map, uint8_t bits, uint8_t val) +@defbuiltin{uint8_t __builtin_avr_insert_bits (uint32_t @var{map}, uint8_t @var{bits}, uint8_t @var{val})} Insert bits from @var{bits} into @var{val} and return the resulting value. The nibbles of @var{map} determine how the insertion is performed: Let @var{X} be the @var{n}-th nibble of @var{map} @@ -16856,12 +16860,12 @@ __builtin_avr_insert_bits (0x3210, bits, val); // reverse the bit order of bits __builtin_avr_insert_bits (0x01234567, bits, 0); @end smallexample +@enddefbuiltin -@item void __builtin_avr_nops (unsigned count) +@defbuiltin{void __builtin_avr_nops (unsigned @var{count})} Insert @var{count} @code{NOP} instructions. The number of instructions must be a compile-time integer constant. - -@end table +@enddefbuiltin @noindent There are many more AVR-specific built-in functions that are used to
[patch,avr,applied] Use int types of exact width and signedness in built-ins prototypes
AVR: Use types of exact size and signedness in built-ins. The AVR built-ins used types like "int" or "char" that don't have exact signedness or type size which depend on -mint8 and -f[no-][un-]signed-char etc. As the built-ins are modelling machine instructions of given type sizes and signedness, also use according types in their prototypes. gcc/ * config/avr/builtins.def: Use function prototypes of given size and signedness. * config/avr/avr.cc (avr_init_builtins): Adjust types required by builtins.def. * doc/extend.texi (AVR Built-in Functions): Adjust accordingly.diff --git a/gcc/config/avr/avr.cc b/gcc/config/avr/avr.cc index 4a55f14bff7..d3756a2f036 100644 --- a/gcc/config/avr/avr.cc +++ b/gcc/config/avr/avr.cc @@ -14605,35 +14605,35 @@ avr_init_builtins (void) { tree void_ftype_void = build_function_type_list (void_type_node, NULL_TREE); - tree uchar_ftype_uchar -= build_function_type_list (unsigned_char_type_node, -unsigned_char_type_node, + tree uintQI_ftype_uintQI += build_function_type_list (unsigned_intQI_type_node, +unsigned_intQI_type_node, NULL_TREE); - tree uint_ftype_uchar_uchar -= build_function_type_list (unsigned_type_node, -unsigned_char_type_node, -unsigned_char_type_node, + tree uintHI_ftype_uintQI_uintQI += build_function_type_list (unsigned_intHI_type_node, +unsigned_intQI_type_node, +unsigned_intQI_type_node, NULL_TREE); - tree int_ftype_char_char -= build_function_type_list (integer_type_node, -char_type_node, -char_type_node, + tree intHI_ftype_intQI_intQI += build_function_type_list (intHI_type_node, +intQI_type_node, +intQI_type_node, NULL_TREE); - tree int_ftype_char_uchar -= build_function_type_list (integer_type_node, -char_type_node, -unsigned_char_type_node, + tree intHI_ftype_intQI_uintQI += build_function_type_list (intHI_type_node, +intQI_type_node, +unsigned_intQI_type_node, NULL_TREE); - tree void_ftype_ulong + tree void_ftype_uintSI = build_function_type_list (void_type_node, -long_unsigned_type_node, +unsigned_intSI_type_node, NULL_TREE); - tree uchar_ftype_ulong_uchar_uchar -= build_function_type_list (unsigned_char_type_node, -long_unsigned_type_node, -unsigned_char_type_node, -unsigned_char_type_node, + tree uintQI_ftype_uintSI_uintQI_uintQI += build_function_type_list (unsigned_intQI_type_node, +unsigned_intSI_type_node, +unsigned_intQI_type_node, +unsigned_intQI_type_node, NULL_TREE); tree const_memx_void_node @@ -14644,8 +14644,8 @@ avr_init_builtins (void) tree const_memx_ptr_type_node = build_pointer_type_for_mode (const_memx_void_node, PSImode, false); - tree char_ftype_const_memx_ptr -= build_function_type_list (char_type_node, + tree intQI_ftype_const_memx_ptr += build_function_type_list (intQI_type_node, const_memx_ptr_type_node, NULL); diff --git a/gcc/config/avr/builtins.def b/gcc/config/avr/builtins.def index b4bf7beb590..316bdebe498 100644 --- a/gcc/config/avr/builtins.def +++ b/gcc/config/avr/builtins.def @@ -43,17 +43,17 @@ DEF_BUILTIN (SLEEP, 0, void_ftype_void, sleep, NULL) /* Mapped to respective instruction but might also be folded away or emit as libgcc call if ISA does not provide the instruction. */ -DEF_BUILTIN (SWAP, 1, uchar_ftype_uchar, rotlqi3_4, NULL) -DEF_BUILTIN (FMUL, 2, uint_ftype_uchar_uchar, fmul, NULL) -DEF_BUILTIN (FMULS, 2, int_ftype_char_char,fmuls, NULL) -DEF_BUILTIN (FMULSU, 2, int_ftype_char_uchar, fmulsu, NULL) +DEF_BUILTIN (SWAP, 1, uintQI_ftype_uintQI,rotlqi3_4, NULL) +DEF_BUILTIN (FMUL, 2, uintHI_ftype_uintQI_uintQI, fmul, NULL) +DEF_BUILTIN (FMULS, 2, intHI_ftype_intQI_intQI,fmuls, NULL) +DEF_BUILTIN (FMULSU, 2, intHI_ftype_intQI_uintQI, fmulsu, NULL) /* More complex stuff that cannot be mapped 1:1 to an instruction. */ -DEF_BUILTIN (DELAY_CYCLES, -1, void_ftype_ulong, nothing, NULL) -DEF_BUILTIN (NOPS, -1, void_ftype_ulong, nothing, NULL) -DEF_BUILTIN (INSERT_BITS, 3, uchar_ftype_ulong_uchar_uchar, insert_bits, NULL) -DEF_BUILTIN (FLASH_SEGMENT, 1, char_ftype_const_memx_ptr, flash_segment, NULL) +DEF_BUILTIN (DELAY_CYCLES, -1, void_ftype_uintSI, nothing, NULL) +DEF_BUILTIN (NOPS, -1, void_ftype_uintSI, nothing, NULL) +DEF_BUILTIN (INSERT_BITS, 3, uintQI_ftype_uintSI_uintQI_uintQI, insert_bits, NULL) +DEF_BUILTIN (FLASH_SEGMENT, 1, intQI_ftype_const_memx_ptr, flash_segment, NULL) /* ISO/IEC TR 18037 "Embedded C" The following builtins are undocumented and used by stdfix.h. */ diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi index b2383b55666..2135dfde9c8 100644 --- a/gcc/doc/extend.texi +++ b/gcc/doc/extend.texi @@ -16783,32 +16783,30 @@ or if not a specific built-in is implemented or not. For example, if @code{__BUILTIN_AVR_NOP} is defined to @code{1} and undefined otherwise. @defbuilti
[patch,avr] PR114100 : Better indirect addressing on reduced cores
A description of what the patch does follows in the commit message below. On ATmega128, there are no changes in test results. On ATtiny40 (reduced core) there are no new execution fails, but apart from that there is quite some noise in the delta: unsupported (memory full) -> pass unsupported (memory full) -> fail due to unresolved symbol (printf, float, ...) unsupported (memory full) -> fail (excess errors) this is because the testsuite is far from being diagnostic-clean. All these transitions are because the code size shrinks a lot, sometimes 20% or more. When there are no objections or improvements, I would go ahead and install it so it can go into v14. Johann -- The Reduced Tiny core does not support indirect addressing with offset, which basically means that every indirect memory access with a size of more than one byte is effectively POST_INC or PRE_DEC. The lack of that addressing mode is currently handled by pretending to support it, and then let the insn printers add and subtract again offsets as needed. For example, the following C code int vars[10]; void inc_var2 (void) { ++vars[2]; } is compiled to: ldi r30,lo8(vars) ; 14 [c=4 l=2] *movhi/4 ldi r31,hi8(vars) subi r30,lo8(-(4)); 15 [c=8 l=6] *movhi/2 sbci r31,hi8(-(4)) ld r20,Z+ ld r21,Z subi r30,lo8((4+1)) sbci r31,hi8((4+1)) subi r20,-1 ; 16 [c=4 l=2] *addhi3_clobber/1 sbci r21,-1 subi r30,lo8(-(4+1)); 17 [c=4 l=4] *movhi/3 sbci r31,hi8(-(4+1)) st Z,r21 st -Z,r20 where the code could be -- and with this patch actually is -- like ldi r30,lo8(vars+4); 28 [c=4 l=2] *movhi/4 ldi r31,hi8(vars+4) ld r20,Z+ ; 17 [c=8 l=2] *movhi/2 ld r21,Z+ subi r20,-1; 19 [c=4 l=2] *addhi3_clobber/1 sbci r21,-1 st -Z,r21 ; 30 [c=4 l=2] *movhi/3 st -Z,r20 This is achieved in two steps: - A post-reload split into "real" instructions during .split2. - A new avr-specific mini pass .avr-fuse-add that runs before RTL peephole and that tries to combine the generated pointer additions into memory accesses to form POST_INC or PRE_DEC. gcc/ PR target/114100 * doc/invoke.texi (AVR Options) <-mfuse-add>: Document. * config/avr/avr.opt (-mfuse-add=): New target option. * common/config/avr/avr-common.cc (avr_option_optimization_table) [OPT_LEVELS_1_PLUS]: Set -mfuse-add=1. [OPT_LEVELS_2_PLUS]: Set -mfuse-add=2. * config/avr/avr-passes.def (avr_pass_fuse_add): Insert new pass. * config/avr/avr-protos.h (avr_split_tiny_move) (make_avr_pass_fuse_add): New protos. * config/avr/avr.md [AVR_TINY]: New post-reload splitter uses avr_split_tiny_move to split indirect memory accesses. (gen_move_clobbercc): New define_expand helper. * config/avr/avr.cc (avr_pass_data_fuse_add): New pass data. (avr_pass_fuse_add): New class from rtl_opt_pass. (make_avr_pass_fuse_add, avr_split_tiny_move): New functions. (reg_seen_between_p, emit_move_ccc, emit_move_ccc_after): New functions. (avr_legitimate_address_p) [AVR_TINY]: Don't restrict offsets of PLUS addressing for AVR_TINY. (avr_regno_mode_code_ok_for_base_p) [AVR_TINY]: Ignore -mstrict-X. (avr_out_plus_1) [AVR_TINY]: Tweak ++Y and --Y. (avr_mode_code_base_reg_class) [AVR_TINY]: Always return POINTER_REGS. gcc/common/config/avr/avr-common.cc | 2 + gcc/config/avr/avr-passes.def | 9 ++ gcc/config/avr/avr-protos.h | 2 + gcc/config/avr/avr.cc | 787 - gcc/config/avr/avr.md | 29 + gcc/config/avr/avr.opt | 8 ++ gcc/doc/invoke.texi | 10 +- 7 files changed, 845 insertions(+), 2 deletions(-)diff --git a/gcc/common/config/avr/avr-common.cc b/gcc/common/config/avr/avr-common.cc index 7867483909d..fdf130f1e1a 100644 --- a/gcc/common/config/avr/avr-common.cc +++ b/gcc/common/config/avr/avr-common.cc @@ -34,6 +34,8 @@ static const struct default_options avr_option_optimization_table[] = { OPT_LEVELS_ALL, OPT_fcaller_saves, NULL, 0 }, { OPT_LEVELS_1_PLUS_NOT_DEBUG, OPT_mgas_isr_prologues, NULL, 1 }, { OPT_LEVELS_1_PLUS, OPT_mmain_is_OS_task, NULL, 1 }, +{ OPT_LEVELS_1_PLUS, OPT_mfuse_add_, NULL, 1 }, +{ OPT_LEVELS_2_PLUS, OPT_mfuse_add_, NULL, 2 }, // Stick to the "old" placement of the subreg lowering pass. { OPT_LEVELS_1_PLUS, OPT_fsplit_wide_types_early, NULL, 1 }, /* Allow optimizer to introduce store data races. This used to be the diff --git a/gcc/config/avr/avr-passes.def b/gcc/config/avr/avr-passes.def index 34e5b95f920..748260edaef 100644 --- a/gcc/config/avr/avr-passes.def +++ b/gcc/config/avr/avr-passes.def @@ -17,6 +17,15 @@ along with GCC;
[avr,patch,applied] Remove some dead code
This code was dead in the block where it lived, because avr_adiw_reg_p() is only true when ADIW and SBIW are available -- which is not the case for AVR_TINY. Johann -- AVR: Dead code removal. gcc/ * config/avr/avr.cc (avr_out_compare) [AVR_TINY]: Remove code in an "if avr_adiw_reg_p()" block that's dead for AVR_TINY. diff --git a/gcc/config/avr/avr.cc b/gcc/config/avr/avr.cc index d3756a2f036..655a8e89fdc 100644 --- a/gcc/config/avr/avr.cc +++ b/gcc/config/avr/avr.cc @@ -6291,10 +6291,7 @@ avr_out_compare (rtx_insn *insn, rtx *xop, int *plen) && (val8 == 0 || reg_unused_after (insn, xreg))) { - if (AVR_TINY) - avr_asm_len (TINY_SBIW (%A0, %B0, %1), xop, plen, 2); - else - avr_asm_len ("sbiw %0,%1", xop, plen, 1); + avr_asm_len ("sbiw %0,%1", xop, plen, 1); i++; continue; @@ -6305,9 +6302,7 @@ avr_out_compare (rtx_insn *insn, rtx *xop, int *plen) && compare_eq_p (insn) && reg_unused_after (insn, xreg)) { - return AVR_TINY - ? avr_asm_len (TINY_ADIW (%A0, %B0, %n1), xop, plen, 2) - : avr_asm_len ("adiw %0,%n1", xop, plen, 1); + return avr_asm_len ("adiw %0,%n1", xop, plen, 1); } }