[avr,committed] Use monic denominator polynomials to save a multiplication.

2023-10-05 Thread Georg-Johann Lay

This is a small tweak in LibF7 to save one multiplication in computation
of denominator polynomials.  The polynomials are monic now, and
f7_horner needs one multiplication less.

Johann

--

LibF7: Use monic denominator polynomials to save a multiplication.

libgcc/config/avr/libf7/
* libf7.h (F7_FLAGNO_plusx, F7_FLAG_plusx): New macros.
* libf7.c (f7_horner): Handle F7_FLAG_plusx in highest coefficient.
* libf7-const.def [F7MOD_atan_]: Denominator: Set F7_FLAG_plusx
and omit highest term.
[F7MOD_asinacos_]: Use rational function with normalized denominator.


diff --git a/libgcc/config/avr/libf7/libf7-const.def 
b/libgcc/config/avr/libf7/libf7-const.def

index 8764c81ffa4..0e4c4d8701e 100644
--- a/libgcc/config/avr/libf7/libf7-const.def
+++ b/libgcc/config/avr/libf7/libf7-const.def
@@ -121,8 +121,7 @@ F7_CONST_DEF (X, 0, 
0xd6,0xa5,0x2d,0x73,0x34,0xd8,0x60, 11)

 F7_CONST_DEF (X, 0, 0xe5,0x08,0xb8,0x24,0x20,0x81,0xe7, 11)
 F7_CONST_DEF (X, 0, 0xe3,0xb3,0x35,0xfa,0xbf,0x1f,0x81, 10)
 F7_CONST_DEF (X, 0, 0xd3,0x89,0x2b,0xb6,0x3e,0x2e,0x05, 8)
-F7_CONST_DEF (X, 0, 0x9f,0xab,0xe9,0xd9,0x35,0xed,0x27, 5)
-F7_CONST_DEF (X, 0, 0x80,0x00,0x00,0x00,0x00,0x00,0x00, 0)
+F7_CONST_DEF (X, 8, 0x9f,0xab,0xe9,0xd9,0x35,0xed,0x27, 5)
 #endif

 #elif defined (SWIFT_3_4)
@@ -147,24 +146,22 @@ F7_CONST_DEF (pi_6, 0, 
0x86,0x0a,0x91,0xc1,0x6b,0x9b,0x2c, -1)

 #endif // which MiniMax

 #elif defined (F7MOD_asinacos_)
-// Relative error < 5.6E-18, quality = 1.0037 (ideal = 1).
+// f(x) = asin(w) / w,  w = sqrt(x/2),  w in [0, 0.5].
+// Relative error < 4.9E-18, Q10 = 21.7
 #if defined (FOR_NUMERATOR)
-// 0.9442491073135027586203 - 
1.035234033892197627842731209x + 
0.35290206232981519813422591897720574012x^2 - 
0.04333483170641685705612351801x^3 + 
0.0012557428614630796315205218507940285622x^4 + 
0.084705471128435769021718764878041684288x^5
-// p = Poly ([Decimal('0.9442491073135027586203'), 
Decimal('-1.0352340338921976278427312087167692142'), 
Decimal('0.35290206232981519813422591897720574012'), 
Decimal('-0.043334831706416857056123518013656946650'), 
Decimal('0.0012557428614630796315205218507940285622'), 
Decimal('0.084705471128435769021718764878041684288')])

-F7_CONST_DEF (X, 0, 0x80,0x00,0x00,0x00,0x00,0x00,0x00, 0)
-F7_CONST_DEF (X, 1, 0x84,0x82,0x8c,0x7f,0xa2,0xf6,0x65, 0)
-F7_CONST_DEF (X, 0, 0xb4,0xaf,0x94,0x40,0xcb,0x86,0x69, -2)
-F7_CONST_DEF (X, 1, 0xb1,0x7f,0xdd,0x4f,0x4e,0xbe,0x1d, -5)
-F7_CONST_DEF (X, 0, 0xa4,0x97,0xbd,0x0b,0x59,0xc9,0x25, -10)
-F7_CONST_DEF (X, 0, 0x8e,0x1c,0xb9,0x0b,0x50,0x6c,0xce, -17)
+// -41050.4389591195072042579 + 43293.8985171424974364797 x - 
15230.0535110759003163511 x^2 + 1996.35047839480810448269 x^3 - 
72.2973010025603956782375 x^4

+F7_CONST_DEF (X, 1, 0xa0,0x5a,0x70,0x5f,0x9f,0xf6,0x90, 15)
+F7_CONST_DEF (X, 0, 0xa9,0x1d,0xe6,0x05,0x38,0x2d,0xec, 15)
+F7_CONST_DEF (X, 1, 0xed,0xf8,0x36,0xcb,0x9b,0x83,0xdd, 13)
+F7_CONST_DEF (X, 0, 0xf9,0x8b,0x37,0x1e,0x77,0x74,0xf9, 10)
+F7_CONST_DEF (X, 1, 0x90,0x98,0x37,0xd6,0x46,0x21,0x3c, 6)
 #elif defined (FOR_DENOMINATOR)
-// 1 - 1.118567367225532923662371649x + 
0.42736600959872448854098334016758333519x^2 - 
0.06355588484963171659942148390x^3 + 
0.0028820878185134035637440105959294542908x^4
-// q = Poly ([Decimal('1'), 
Decimal('-1.1185673672255329236623716486696411533'), 
Decimal('0.42736600959872448854098334016758333519'), 
Decimal('-0.063555884849631716599421483898013782858'), 
Decimal('0.0028820878185134035637440105959294542908')])

-F7_CONST_DEF (X, 0, 0x80,0x00,0x00,0x00,0x00,0x00,0x00, 0)
-F7_CONST_DEF (X, 1, 0x8f,0x2d,0x37,0x2a,0x4d,0xa1,0x57, 0)
-F7_CONST_DEF (X, 0, 0xda,0xcf,0xb7,0xb5,0x4c,0x0d,0xee, -2)
-F7_CONST_DEF (X, 1, 0x82,0x29,0x96,0x77,0x2e,0x19,0xc7, -4)
-F7_CONST_DEF (X, 0, 0xbc,0xe1,0x68,0xec,0xba,0x20,0x29, -9)
+// -41050.4389591195074048679 + 46714.7684304025268691353 x - 
18353.2551497967388796235 x^2 + 2878.9626098308300020834 x^3 - 
150.822900775648362380508 x^4 + x^5

+F7_CONST_DEF (X, 1, 0xa0,0x5a,0x70,0x5f,0x9f,0xf6,0x91, 15)
+F7_CONST_DEF (X, 0, 0xb6,0x7a,0xc4,0xb7,0xda,0xd8,0x1b, 15)
+F7_CONST_DEF (X, 1, 0x8f,0x62,0x82,0xa2,0xfe,0x81,0x26, 14)
+F7_CONST_DEF (X, 0, 0xb3,0xef,0x66,0xd9,0x90,0xe3,0x91, 11)
+F7_CONST_DEF (X, 9, 0x96,0xd2,0xa9,0xa0,0x0f,0x43,0x44, 7)
 #endif

 #elif defined (F7MOD_sincos_)
diff --git a/libgcc/config/avr/libf7/libf7.c 
b/libgcc/config/avr/libf7/libf7.c

index 8fb57ef90cc..373a8a55d90 100644
--- a/libgcc/config/avr/libf7/libf7.c
+++ b/libgcc/config/avr/libf7/libf7.c
@@ -1527,6 +1527,9 @@ void f7_horner (f7_t *cc, const f7_t *xx, uint8_t 
n_coeff, const f7_t *coeff,


   f7_copy_flash (yy, pcoeff);

+  if (yy->flags & F7_FLAG_plusx)
+f7_Iadd (yy, xx);
+
   while (1)
 {
   --pcoeff;
diff --git a/libgcc/config/avr/libf7/libf7.h 
b/libgcc/config/avr/libf7/libf7.h

index 03fe6abe839..3f81b5f1f88 100644
--- a/libgcc/config/avr/libf7/libf7.h
+++ b/libgcc/config/avr/libf7/libf7.h
@@ -47,6 +47,1

[avr,committed] Remove all uses of attribute pure from LibF7.

2023-10-05 Thread Georg-Johann Lay

Applied the following patch.

Johann


LibF7: Remove uses of attribute pure.

libgcc/config/avr/libf7/
* libf7.h (F7_PURE): Remove all occurrences.
* libf7.c: Same.

diff --git a/libgcc/config/avr/libf7/libf7.c 
b/libgcc/config/avr/libf7/libf7.c

index 373a8a55d90..0d9e4c325b2 100644
--- a/libgcc/config/avr/libf7/libf7.c
+++ b/libgcc/config/avr/libf7/libf7.c
@@ -352,7 +352,7 @@ float f7_get_float (const f7_t *aa)

   return make_float (mant);
 }
-F7_PURE ALIAS (f7_get_float, f7_truncdfsf2)
+ALIAS (f7_get_float, f7_truncdfsf2)
 #endif // F7MOD_get_float_

 #define DBL_DIG_EXP   11
@@ -572,7 +572,7 @@ int32_t f7_get_s32 (const f7_t *aa)
   extern int32_t to_s32 (const f7_t*, uint8_t) F7ASM(f7_to_integer_asm);
   return to_s32 (aa, 0x1f);
 }
-F7_PURE ALIAS (f7_get_s32, f7_fixdfsi)
+ALIAS (f7_get_s32, f7_fixdfsi)
 #endif // F7MOD_get_s32_


@@ -583,7 +583,7 @@ F7_PURE ALIAS (f7_get_s32, f7_fixdfsi)
   extern int64_t to_s64 (const f7_t*, uint8_t) F7ASM(f7_to_integer_asm);
   return to_s64 (aa, 0x3f);
 }
-F7_PURE ALIAS (f7_get_s64, f7_fixdfdi)
+ALIAS (f7_get_s64, f7_fixdfdi)
 #endif // F7MOD_get_s64_

 #ifdef F7MOD_get_u16_
@@ -603,7 +603,7 @@ uint32_t f7_get_u32 (const f7_t *aa)
   extern uint32_t to_u32 (const f7_t*, uint8_t) F7ASM(f7_to_unsigned_asm);
   return to_u32 (aa, 0x1f);
 }
-F7_PURE ALIAS (f7_get_u32, f7_fixunsdfsi)
+ALIAS (f7_get_u32, f7_fixunsdfsi)
 #endif // F7MOD_get_u32_


@@ -614,7 +614,7 @@ uint64_t f7_get_u64 (const f7_t *aa)
   extern int64_t to_u64 (const f7_t*, uint8_t) F7ASM(f7_to_unsigned_asm);
   return to_u64 (aa, 0x3f);
 }
-F7_PURE ALIAS (f7_get_u64, f7_fixunsdfdi)
+ALIAS (f7_get_u64, f7_fixunsdfdi)
 #endif // F7MOD_get_u64_


diff --git a/libgcc/config/avr/libf7/libf7.h 
b/libgcc/config/avr/libf7/libf7.h

index 3f81b5f1f88..f692854dced 100644
--- a/libgcc/config/avr/libf7/libf7.h
+++ b/libgcc/config/avr/libf7/libf7.h
@@ -36,7 +36,7 @@
 --  Inline asm
 --  Setting assembler names by means of __asm (GNU-C).
 --  Attributes: alias, always_inline, const, noinline, unused,
-progmem, pure, weak, warning
+   progmem, weak, warning
 --  GCC built-ins: __builtin_abort, __builtin_constant_p
 --  AVR built-ins: __builtin_avr_bitsr, __builtin_avr_rbits
 */
@@ -112,7 +112,6 @@ extern "C" {
 #define F7_INLINE   inline __attribute__((__always_inline__))
 #define F7_NOINLINE __attribute__((__noinline__))
 #define F7_WEAK __attribute__((__weak__))
-#define F7_PURE __attribute__((__pure__))
 #define F7_UNUSED   __attribute__((__unused__))
 #define F7_CONST__attribute__((__const__))

@@ -150,7 +149,7 @@ typedef uint64_t f7_double_t;
 #define F7_MANT_HI2(X) \
   (*(uint16_t*) & (X)->mant[F7_MANT_BYTES - 2])

-static F7_INLINE F7_PURE
+static F7_INLINE
 uint8_t f7_classify (const f7_t *aa)
 {
   extern void f7_classify_asm (void);
@@ -361,14 +360,14 @@ f7_t* f7_abs (f7_t *cc, const f7_t *aa)
 }


-F7_PURE extern int8_t f7_cmp (const f7_t*, const f7_t*);
-F7_PURE extern bool f7_lt_impl (const f7_t*, const f7_t*);
-F7_PURE extern bool f7_le_impl (const f7_t*, const f7_t*);
-F7_PURE extern bool f7_gt_impl (const f7_t*, const f7_t*);
-F7_PURE extern bool f7_ge_impl (const f7_t*, const f7_t*);
-F7_PURE extern bool f7_ne_impl (const f7_t*, const f7_t*);
-F7_PURE extern bool f7_eq_impl (const f7_t*, const f7_t*);
-F7_PURE extern bool f7_unord_impl (const f7_t*, const f7_t*);
+extern int8_t f7_cmp (const f7_t*, const f7_t*);
+extern bool f7_lt_impl (const f7_t*, const f7_t*);
+extern bool f7_le_impl (const f7_t*, const f7_t*);
+extern bool f7_gt_impl (const f7_t*, const f7_t*);
+extern bool f7_ge_impl (const f7_t*, const f7_t*);
+extern bool f7_ne_impl (const f7_t*, const f7_t*);
+extern bool f7_eq_impl (const f7_t*, const f7_t*);
+extern bool f7_unord_impl (const f7_t*, const f7_t*);

 static F7_INLINE
 bool f7_lt (const f7_t *aa, const f7_t *bb)
@@ -541,14 +540,14 @@ extern f7_t* f7_set_u32 (f7_t*, uint32_t);
 extern void f7_set_float (f7_t*, float);
 extern void f7_set_pdouble (f7_t*, const f7_double_t*);

-F7_PURE extern int16_t f7_get_s16 (const f7_t*);
-F7_PURE extern int32_t f7_get_s32 (const f7_t*);
-F7_PURE extern int64_t f7_get_s64 (const f7_t*);
-F7_PURE extern uint16_t f7_get_u16 (const f7_t*);
-F7_PURE extern uint32_t f7_get_u32 (const f7_t*);
-F7_PURE extern uint64_t f7_get_u64 (const f7_t*);
-F7_PURE extern float f7_get_float (const f7_t*);
-F7_PURE extern f7_double_t f7_get_double (const f7_t*);
+extern int16_t f7_get_s16 (const f7_t*);
+extern int32_t f7_get_s32 (const f7_t*);
+extern int64_t f7_get_s64 (const f7_t*);
+extern uint16_t f7_get_u16 (const f7_t*);
+extern uint32_t f7_get_u32 (const f7_t*);
+extern uint64_t f7_get_u64 (const f7_t*);
+extern float f7_get_float (const f7_t*);
+extern f7_double_t f7_get_double (const f7_t*);

 #if USE_LPM == 1
   #define F7_PGMSPACE __attribute__((__progmem__))
@@ -639,10 +638,10 @@ extern void f7_horner (f7_t*, const f7_t*, 
uint8_t, const f7_t *coeff, f7_t*);

 ex

[avr,committed] Implement atan2

2023-10-12 Thread Georg-Johann Lay

This implements atan2 which was missing from LibF7.

Johann

--

LibF7: Implement atan2.

libgcc/config/avr/libf7/
* libf7.c (F7MOD_atan2_, f7_atan2): New module and function.
* libf7.h: Adjust comments.
* libf7-common.mk (CALL_PROLOGUES): Add atan2.


diff --git a/libgcc/config/avr/libf7/libf7-common.mk 
b/libgcc/config/avr/libf7/libf7-common.mk

index 28663b52e6c..e417715a7e5 100644
--- a/libgcc/config/avr/libf7/libf7-common.mk
+++ b/libgcc/config/avr/libf7/libf7-common.mk
@@ -43,7 +43,7 @@ m_xd += lrint lround
 # -mcall-prologues
 CALL_PROLOGUES += divx sqrt cbrt get_double set_double logx exp exp10 
pow10

 CALL_PROLOGUES += put_C truncx round minmax sincos tan cotan pow powi fmod
-CALL_PROLOGUES += atan asinacos madd_msub hypot init horner sinhcosh tanh
+CALL_PROLOGUES += atan atan2 asinacos madd_msub hypot init horner 
sinhcosh tanh


 # -mstrict-X
 STRICT_X += log addsub truncx ldexp exp
diff --git a/libgcc/config/avr/libf7/libf7.c 
b/libgcc/config/avr/libf7/libf7.c

index 0d9e4c325b2..49baac73e6d 100644
--- a/libgcc/config/avr/libf7/libf7.c
+++ b/libgcc/config/avr/libf7/libf7.c
@@ -1099,7 +1099,7 @@ f7_t* f7_ldexp (f7_t *cc, const f7_t *aa, int delta)

   F7_CONST_ADDR ( CST, f7_t* PTMP)

-  Return an LD address to for some f7_const_X[_P] constant.
+  Return an LD address to some f7_const_X[_P] constant.
   *PTMP might be needed to hold a copy of f7_const_X_P in RAM.

   f7_t*   F7_U16_ADDR (uint16_t X, f7_t* PTMP)   // USE_LPM
@@ -2189,6 +2189,64 @@ void f7_atan (f7_t *cc, const f7_t *aa)
 #endif // F7MOD_atan_


+#ifdef F7MOD_atan2_
+F7_WEAK
+void f7_atan2 (f7_t *cc, const f7_t *yy, const f7_t *xx)
+{
+  uint8_t y_class = f7_classify (yy);
+  uint8_t x_class = f7_classify (xx);
+
+  // (NaN, *) -> NaN
+  // (*, NaN) -> NaN
+  if (f7_class_nan (y_class | x_class))
+return f7_set_nan (cc);
+
+  // (0, 0) -> 0
+  if (f7_class_zero (y_class & x_class))
+return f7_clr (cc);
+
+  f7_t pi7, *pi = &pi7;
+  f7_const (pi, pi);
+
+  // (Inf, +Inf) -> +pi/4;(-Inf, +Inf) -> +3pi/4
+  // (Inf, -Inf) -> -pi/4;(-Inf, -Inf) -> -3pi/4
+  if (f7_class_inf (y_class & x_class))
+{
+  f7_copy (cc, pi);
+  if (! f7_class_sign (x_class))
+   cc->expo = F7_(const_pi_expo) - 1; // pi / 2
+  pi->expo = F7_(const_pi_expo) - 2;   // pi / 4
+  f7_Isub (cc, pi);
+  cc->flags = y_class & F7_FLAG_sign;
+  return;
+}
+
+  // sign(pi) := sign(y)
+  pi->flags = y_class & F7_FLAG_sign;
+
+  // Only use atan(*) with |*| <= 1.
+
+  if (f7_cmp_abs (yy, xx) > 0)
+{
+  // |y| > |x|:  atan2 = sgn(y) * pi/2 - atan (x / y);
+  pi->expo = F7_(const_pi_expo) - 1;  // +- pi / 2
+  f7_div (cc, xx, yy);
+  f7_atan (cc, cc);
+  f7_IRsub (cc, pi);
+}
+  else
+{
+  // x >  |y|:  atan2 = atan (y / x)
+  // x < -|y|:  atan2 = atan (y / x) +- pi
+  f7_div (cc, yy, xx);
+  f7_atan (cc, cc);
+  if (f7_class_sign (x_class))
+   f7_Iadd (cc, pi);
+}
+}
+#endif // F7MOD_atan2_
+
+
 #ifdef F7MOD_asinacos_

 #define ARRAY_NAME coeff_func_a_zahler
diff --git a/libgcc/config/avr/libf7/libf7.h 
b/libgcc/config/avr/libf7/libf7.h

index f692854dced..b50e6e218ba 100644
--- a/libgcc/config/avr/libf7/libf7.h
+++ b/libgcc/config/avr/libf7/libf7.h
@@ -606,6 +606,7 @@ extern void f7_sin (f7_t*, const f7_t*);
 extern void f7_cos (f7_t*, const f7_t*);
 extern void f7_tan (f7_t*, const f7_t*);
 extern void f7_atan (f7_t*, const f7_t*);
+extern void f7_atan2 (f7_t*, const f7_t*, const f7_t*);
 extern void f7_asin (f7_t*, const f7_t*);
 extern void f7_acos (f7_t*, const f7_t*);
 extern void f7_tanh (f7_t*, const f7_t*);
@@ -617,7 +618,6 @@ extern void f7_exp10 (f7_t*, const f7_t*);
 extern void f7_pow10 (f7_t*, const f7_t*);

 // Just prototypes, not implemented yet.
-extern void f7_atan2 (f7_t*, const f7_t*, const f7_t*);
 extern long f7_lrint (const f7_t*);
 extern long f7_lround (const f7_t*);



[avr,committed] Implement fma, fmal.

2023-10-17 Thread Georg-Johann Lay

This commit implements fma and fmal which were missing from LibF7.

Johann

--

LibF7: Implement fma / fmal.

libgcc/config/avr/libf7/
* libf7.h (F7_SIZEOF): New macro.
* libf7-asm.sx: Use F7_SIZEOF instead of magic number "10".
(F7MOD_D_fma_, __fma): New module and function.
(fma) [-mdouble=64]: Define as alias for __fma.
(fmal) [-mlong-double=64]: Define as alias for __fma.
* libf7-common.mk (F7_ASM_PARTS): Add D_fma.


diff --git a/libgcc/config/avr/libf7/libf7-asm.sx 
b/libgcc/config/avr/libf7/libf7-asm.sx

index 8fbd66bd290..5df167fe73c 100644
--- a/libgcc/config/avr/libf7/libf7-asm.sx
+++ b/libgcc/config/avr/libf7/libf7-asm.sx
@@ -283,8 +283,8 @@ DEFUN copy
 cp  XL, ZL
 cpc XH, ZH
 breq 9f
-adiwXL, 10
-adiwZL, 10
+adiwXL, F7_SIZEOF
+adiwZL, F7_SIZEOF
 set
 bld ZERO,   1
 bld ZERO,   3   ; ZERO = 0b1010 = 10.
@@ -312,8 +312,8 @@ DEFUN copy_P
 st  X+, TMP
 dec ZERO
 brne .Loop
-sbiwX,  10
-sbiwZ,  10
+sbiwX,  F7_SIZEOF
+sbiwZ,  F7_SIZEOF
 ret
 ENDF copy_P
 #endif /* F7MOD_copy_P_ */
@@ -1328,6 +1328,58 @@ DEFUN sqrt_approx
 #undef Carry


+#ifdef F7MOD_D_fma_
+_DEFUN __fma
+DALIAS fma
+LALIAS fmal
+
+#define n_pushed4
+#define n_frame (2 * F7_SIZEOF)
+
+do_prologue_saves n_pushed, n_frame
+;; Y = FramePointer + 1
+adiwY,  1
+
+;; FP + 1 = (f7_t) arg1
+wmovr16,Y
+;; The double argument arg1 is already in R18[].
+XCALL   F7_NAME (set_double_impl)
+
+;; The double argument arg2 is in R10[].  Move it to R18[].
+wmovr18,r10
+wmovr20,r12
+wmovr22,r14
+;; R16, R17 are clobbered.  Fetch them from where prologue_saves 
put them.

+ldd r24,Y + n_frame + 3 ; Saved R16
+ldd r25,Y + n_frame + 2 ; Saved R17
+;; FP + 1 + 10 = (f7_t) arg2
+subir16,lo8 (-F7_SIZEOF)
+sbcir17,hi8 (-F7_SIZEOF)
+XCALL   F7_NAME (set_double_impl)
+
+wmovr24,Y   ; &arg1
+wmovr22,r16 ; &arg2
+XCALL   F7_NAME (Imul)  ; arg1 *= arg2
+
+;; The 3rd double argument arg3 was passed on the stack.  Move it 
to R18[],
+;; Don't use f7_set_pdouble() because that function is unused (for 
now).

+.irp n, 0, 1, 2, 3, 4, 5, 6, 7
+ldd 18+\n,  Y + n_frame + n_pushed + PC_SIZE + \n
+.endr
+XCALL   F7_NAME (set_double_impl)
+
+wmovr24,Y   ; &arg1
+wmovr22,r16 ; &arg2
+XCALL   F7_NAME (Iadd)  ; arg1 += arg2
+
+wmovr24,Y   ; &arg1
+XCALL   F7_NAME (get_double)
+
+do_epilogue_restores n_pushed, n_frame
+_ENDF __fma
+#endif /* F7MOD_D_fma_ */
+
+
 #ifdef F7MOD_D_fabs_
 _DEFUN __fabs
 DALIAS fabs
@@ -1493,7 +1545,7 @@ DEFUN call_dd   ; WHAT = R13 = 3
 wmovr14, Z

 #define n_pushed4
-#define n_frame 10
+#define n_frame F7_SIZEOF

 do_prologue_saves n_pushed, n_frame
 ;; Y = FramePointer + 1
@@ -1565,7 +1617,7 @@ DEFUN call_ddd
 ret

 #define n_pushed4
-#define n_frame 20
+#define n_frame (2 * F7_SIZEOF)

 call.2:
 do_prologue_saves n_pushed, n_frame
@@ -1576,9 +1628,8 @@ DEFUN call_ddd
 ;; First double argument is already in R18[].
 XCALL   F7_NAME (set_double_impl)
 ;; FP + 11 = (f7_t) arg2
-wmovr16,Y
-subir16,lo8 (-10)
-sbcir17,hi8 (-10)
+subir16,lo8 (-F7_SIZEOF)
+sbcir17,hi8 (-F7_SIZEOF)
 ;; Move second double argument to R18[].
 wmovr18,r10
 wmovr20,r12
diff --git a/libgcc/config/avr/libf7/libf7-common.mk 
b/libgcc/config/avr/libf7/libf7-common.mk

index e417715a7e5..d541b48ff3c 100644
--- a/libgcc/config/avr/libf7/libf7-common.mk
+++ b/libgcc/config/avr/libf7/libf7-common.mk
@@ -22,7 +22,7 @@ F7_ASM_PARTS += addsub_mant_scaled store load
 F7_ASM_PARTS += to_integer to_unsigned clz normalize_with_carry normalize
 F7_ASM_PARTS += store_expo sqrt16 sqrt_approx div

-F7_ASM_PARTS += D_class
+F7_ASM_PARTS += D_class D_fma
 F7_ASM_PARTS += D_isnan D_isinf D_isfinite D_signbit D_copysign D_neg 
D_fabs


 F7_ASM_PARTS += call_dd call_ddd
diff --git a/libgcc/config/avr/libf7/libf7.h 
b/libgcc/config/avr/libf7/libf7.h

index b50e6e218ba..2b6beac0df8 100644
--- a/libgcc/config/avr/libf7/libf7.h
+++ b/libgcc/config/avr/libf7/libf7.h
@@ -29,6 +29,7 @@

 #define F7_MANT_BYTES 7
 #define F7_MANT_BITS (8 * F7_MANT_BYTES)
+#define F7_SIZEOF (1 + F7_MANT_BYTES + 2)

 /*  Using the following GCC features:
 --  Unnamed structs / unions (GNU-C)


[avr,committed] LibF7: Implement a function that was missing for devices without MUL.

2023-10-18 Thread Georg-Johann Lay

This implements the worker function for double multiplication
for devices without MUL instruction.

Johann

--

LibF7: Implement mul_mant for devices without MUL instruction.

libgcc/config/avr/libf7/
* libf7-asm.sx (mul_mant): Implement for devices without MUL.
* asm-defs.h (wmov) [!HAVE_MUL]: Fix regno computation.
* t-libf7 (F7_ASM_FLAGS): Add -g0.

diff --git a/libgcc/config/avr/libf7/asm-defs.h 
b/libgcc/config/avr/libf7/asm-defs.h

index 4cfd3e61cbb..a50260a162f 100644
--- a/libgcc/config/avr/libf7/asm-defs.h
+++ b/libgcc/config/avr/libf7/asm-defs.h
@@ -134,14 +134,14 @@
 ..regno = 0

 .irpreg,\
-X, x, XL, xl, Xl, xL, x, x  \
+X, x, XL, xl, Xl, xL, x, x, \
 Y, y, YL, yl, Yl, yL, y, y, \
 Z, z, ZL, zl, Zl, zL, z, z
 .ifc  \reg,\dst
-..dst = (..regno / 8) + 26
+..dst = 2 * (..regno / 8) + 26
 .endif
 .ifc  \reg,\src
-..src = (..regno / 8) + 26
+..src = 2 * (..regno / 8) + 26
 .endif
 ..regno = ..regno + 1
 .endr
diff --git a/libgcc/config/avr/libf7/libf7-asm.sx 
b/libgcc/config/avr/libf7/libf7-asm.sx

index 5df167fe73c..4505764c126 100644
--- a/libgcc/config/avr/libf7/libf7-asm.sx
+++ b/libgcc/config/avr/libf7/libf7-asm.sx
@@ -1067,6 +1067,100 @@ DEFUN mul_mant
 ENDF mul_mant
 #endif /* F7MOD_mul_mant_ && MUL */

+#if defined F7MOD_mul_mant_ && ! defined (__AVR_HAVE_MUL__)
+#define AA  TMP
+#define A0  13
+#define A1  A0+1
+#define A2  A0+2
+#define A3  A0+3
+#define A4  A0+4
+#define A5  r26
+#define A6  r27
+#define BB  ZERO
+#define Bitsr29
+#define Bytes   r28
+
+DEFUN mul_mant
+do_prologue_saves 7
+bst r18,0   ; T = 1: Don't round.
+;; Save result address for later.
+pushr25
+pushr24
+;; Load 1st operand mantissa.
+wmovr30,r22
+clr AA
+LDD A0, Z+0+Off
+LDD A1, Z+1+Off
+LDD A2, Z+2+Off
+LDD A3, Z+3+Off
+LDD A4, Z+4+Off
+LDD A5, Z+5+Off
+LDD A6, Z+6+Off
+;; Let Z point one past .mant of the 2nd input operand.
+wmovr30,r20
+adiwr30,Expo
+
+;; Clear the result mantissa.
+.global __clr_8
+XCALL   __clr_8
+
+;; Loop over the bytes of B's mantissa from highest to lowest.
+;; "+1" because we jump into the loop.
+ldi Bytes,  1 + F7_MANT_BYTES
+
+;; Divide one operand by 2 so that the result mantissa won't overflow.
+;; This is accounted for by "Carry = 1" below.
+ldi Bits,   1
+rjmp.Loop_entry
+
+.Loop_bytes:
+ld  BB, -Z
+;;  Loop over the bits of B's mantissa from highest to lowest.
+ldi Bits,   8
+.Loop_bits:
+lsl BB
+brcc.Lnext_bit
+
+ADD CA, AA
+adc C0, A0
+adc C1, A1
+adc C2, A2
+adc C3, A3
+adc C4, A4
+adc C5, A5
+adc C6, A6
+
+.Lnext_bit:
+.Loop_entry:
+LSR A6
+ror A5
+ror A4
+ror A3
+ror A2
+ror A1
+ror A0
+ror AA
+
+dec Bits
+brne.Loop_bits
+
+dec Bytes
+brne.Loop_bytes
+
+;; Finally...
+
+pop ZL
+pop ZH
+
+;; The result has to be left-shifted by one (multiplied by 2) in order
+;; to undo the division by 2 of the 1st operand.
+ldi Carry,  1
+F7call  normalize.maybe_round.store_with_flags
+
+do_epilogue_restores 7
+ENDF mul_mant
+#endif /* F7MOD_mul_mant_ && ! MUL */
+

 #if defined (F7MOD_div_)

diff --git a/libgcc/config/avr/libf7/t-libf7 
b/libgcc/config/avr/libf7/t-libf7

index 30aa280d11e..f17e67e8523 100644
--- a/libgcc/config/avr/libf7/t-libf7
+++ b/libgcc/config/avr/libf7/t-libf7
@@ -86,7 +86,7 @@ F7_C_FLAGS +=   $(F7_FLAGS) \
-fno-tree-loop-optimize \
-fno-tree-loop-im -fno-move-loop-invariants

-F7_ASM_FLAGS +=$(F7_FLAGS)
+F7_ASM_FLAGS +=$(F7_FLAGS) -g0

 $(patsubst %, f7_c_%.o, $(CALL_PROLOGUES)) \
: F7_C_FLAGS += -mcall-prologues


[patch,libgcc,contrib]: Add some auto-generated files deps to gcc_update.

2023-10-19 Thread Georg-Johann Lay

This patch adds two deps to gcc_update files_and_dependencies for
two auto-generated headers from avr libgcc.

Ok for master?

Johann

--

Add dependencies for some auto-generated files from avr-libgcc.

/
* contrib/gcc_update (files_and_dependencies): Add dependencies for:
libgcc/config/avr/libf7/f7-renames.h,
libgcc/config/avr/libf7/f7-wraps.h.


diff --git a/contrib/gcc_update b/contrib/gcc_update
index cda2bdb0df9..f9f9aed743e 100755
--- a/contrib/gcc_update
+++ b/contrib/gcc_update
@@ -183,6 +183,8 @@ libphobos/configure: libphobos/configure.ac 
libphobos/aclocal.m4

 libphobos/src/Makefile.in: libphobos/src/Makefile.am libphobos/aclocal.m4
 libphobos/testsuite/Makefile.in: libphobos/testsuite/Makefile.am 
libphobos/aclocal.m4
 libstdc++-v3/include/bits/version.h: 
libstdc++-v3/include/bits/version.def libstdc++-v3/include/bits/version.tpl
+libgcc/config/avr/libf7/f7-renames.h: 
libgcc/config/avr/libf7/f7renames.sh libgcc/config/avr/libf7/libf7-common.mk
+libgcc/config/avr/libf7/f7-wraps.h: libgcc/config/avr/libf7/f7wraps.sh 
libgcc/config/avr/libf7/libf7-common.mk libgcc/config/avr/libf7/t-libf7-math

 # Top level
 Makefile.in: Makefile.tpl Makefile.def
 configure: configure.ac config/acx.m4


Ping #1: [patch,avr] Fix PR109650 wrong code

2023-05-30 Thread Georg-Johann Lay

Ping #1 for:

https://gcc.gnu.org/pipermail/gcc-patches/2023-May/618976.html

https://gcc.gnu.org/pipermail/gcc-patches/attachments/20230519/9536bf8c/attachment-0001.bin

Johann

Am 19.05.23 um 10:49 schrieb Georg-Johann Lay:


Here is a revised version of the patch.  The difference to the
previous one is that it adds some combine patterns for *cbranch
insns that were lost in the PR92729 transition.  The post-reload
part of the patterns were still there.  The new patterns are
slightly more general in that they also handle fixed-point modes.

Apart from that, the patch behaves the same:

Am 15.05.23 um 20:05 schrieb Georg-Johann Lay:

This patch fixes a wrong-code bug in the wake of PR92729, the transition
that turned the AVR backend from cc0 to CCmode.  In cc0, the insn that
uses cc0 like a conditional branch always follows the cc0 setter, which
is no more the case with CCmode where set and use of REG_CC might be in
different basic blocks.

This patch removes the machine-dependent reorg pass in avr_reorg 
entirely.


It is replaced by a new, AVR specific mini-pass that runs prior to
split2. Canonicalization of comparisons away from the "difficult"
codes GT[U] and LE[U] is now mostly performed by implementing
TARGET_CANONICALIZE_COMPARISON.

Moreover:

* Text peephole conditions get "dead_or_set_regno_p (*, REG_CC)" as
needed.

* RTL peephole conditions get "peep2_regno_dead_p (*, REG_CC)" as
needed.

* Conditional branches no more clobber REG_CC.

* insn output for compares looks ahead to determine the branch mode in
use. This needs also "dead_or_set_regno_p (*, REG_CC)".

* Add RTL peepholes for decrement-and-branch detection.

Finally, it fixes some of the many indentation glitches left over from
PR92729.

Ok?

I'd also backport this one because all of v12+ is affected by the 
wrong code.


Johann

--

gcc/
 PR target/109650
 PR target/92729

 * config/avr/avr-passes.def (avr_pass_ifelse): Insert new pass.
 * config/avr/avr.cc (avr_pass_ifelse): New RTL pass.
 (avr_pass_data_ifelse): New pass_data for it.
 (make_avr_pass_ifelse, avr_redundant_compare, avr_cbranch_cost)
 (avr_canonicalize_comparison, avr_out_plus_set_ZN)
 (avr_out_cmp_ext): New functions.
 (compare_condtition): Make sure REG_CC dies in the branch insn.
 (avr_rtx_costs_1): Add computation of cbranch costs.
 (avr_adjust_insn_length) [ADJUST_LEN_ADD_SET_ZN, ADJUST_LEN_CMP_ZEXT]:
 [ADJUST_LEN_CMP_SEXT]Handle them.
 (TARGET_CANONICALIZE_COMPARISON): New define.
 (avr_simplify_comparison_p, compare_diff_p, avr_compare_pattern)
 (avr_reorg_remove_redundant_compare, avr_reorg): Remove functions.
 (TARGET_MACHINE_DEPENDENT_REORG): Remove define.

 * avr-protos.h (avr_simplify_comparison_p): Remove proto.
 (make_avr_pass_ifelse, avr_out_plus_set_ZN, cc_reg_rtx)
 (avr_out_cmp_zext): New Protos

 * config/avr/avr.md (branch, difficult_branch): Don't split insns.
 (*cbranchhi.zero-extend.0", *cbranchhi.zero-extend.1")
 (*swapped_tst, *add.for.eqne.): New insns.
 (*cbranch4): Rename to cbranch4_insn.
 (define_peephole): Add dead_or_set_regno_p(insn,REG_CC) as needed.
 (define_deephole2): Add peep2_regno_dead_p(*,REG_CC) as needed.
 Add new RTL peepholes for decrement-and-branch and *swapped_tst.
 Rework signtest-and-branch peepholes for *sbrx_branch.
 (adjust_len) [add_set_ZN, cmp_zext]: New.
 (QIPSI): New mode iterator.
 (ALLs1, ALLs2, ALLs4, ALLs234): New mode iterators.
 (gelt): New code iterator.
 (gelt_eqne): New code attribute.
 (rvbranch, *rvbranch, difficult_rvbranch, *difficult_rvbranch)
 (branch_unspec, *negated_tst, *reversed_tst)
 (*cmpqi_sign_extend): Remove insns.
 (define_c_enum "unspec") [UNSPEC_IDENTITY]: Remove.

 * config/avr/avr-dimode.md (cbranch4): Canonicalize comparisons.
 * config/avr/predicates.md (scratch_or_d_register_operand): New.
 * config/avr/contraints.md (Yxx): New constraint.

gcc/testsuite/
 PR target/109650
 * config/avr/torture/pr109650-1.c: New test.
 * config/avr/torture/pr109650-2.c: New test.


[testsuite,applied] PR52641: Fix more implicit int=32 fallout.

2023-05-30 Thread Georg-Johann Lay

Committed to undo implicit assumptions.

Johann

testsuite/52641: Fix more of implicit int=32 assumption fallout.

gcc/testsuite/
PR testsuite/52641
* gcc.dg/torture/pr107451.c: Require int32plus.
* gcc.dg/torture/pr108574-3.c: Use __INT32_TYPE__ instead of int.
* gcc.dg/torture/pr109940.c: Use __INTPTR_TYPE__ instead of long.
* gcc.dg/torture/pr95248.c: Require size24plus.
* gcc.dg/torture/pr95295-3.c: Use var_* with at least 32 bits int.
* gcc.dg/torture/pr98640.c: Cast to __INT32_TYPE__ instead of int.
* gcc.dg/tree-ssa/pr103771.c: Use int with at least 32 bits.


diff --git a/gcc/testsuite/gcc.dg/torture/pr107451.c 
b/gcc/testsuite/gcc.dg/torture/pr107451.c

index a17574c6896..fee010ac40a 100644
--- a/gcc/testsuite/gcc.dg/torture/pr107451.c
+++ b/gcc/testsuite/gcc.dg/torture/pr107451.c
@@ -1,4 +1,5 @@
 /* { dg-do run } */
+/* { dg-require-effective-target int32plus } */
 /* { dg-additional-options "-ftree-vectorize -fno-vect-cost-model" } */
 /* { dg-additional-options "-mavx2" { target avx2_runtime } } */

diff --git a/gcc/testsuite/gcc.dg/torture/pr108574-3.c 
b/gcc/testsuite/gcc.dg/torture/pr108574-3.c

index b4d5dae9f80..b5c85d1261a 100644
--- a/gcc/testsuite/gcc.dg/torture/pr108574-3.c
+++ b/gcc/testsuite/gcc.dg/torture/pr108574-3.c
@@ -1,6 +1,6 @@
 /* { dg-do run } */

-int a = 3557301289, d;
+__INT32_TYPE__ a = 3557301289, d;
 signed char b, f;
 unsigned short c = 241;
 short e, g;
diff --git a/gcc/testsuite/gcc.dg/torture/pr109940.c 
b/gcc/testsuite/gcc.dg/torture/pr109940.c

index 23364708e86..55082d0e312 100644
--- a/gcc/testsuite/gcc.dg/torture/pr109940.c
+++ b/gcc/testsuite/gcc.dg/torture/pr109940.c
@@ -11,7 +11,7 @@ e(int d, int f) {
 return 1;
   int g = d / 2;
   for (int h = 0; h < g; h++)
-if (f == (long int)b > b[h])
+if (f == (__INTPTR_TYPE__)b > b[h])
   c(&b[h]);
   e(g, f);
   e(g, f);
diff --git a/gcc/testsuite/gcc.dg/torture/pr95248.c 
b/gcc/testsuite/gcc.dg/torture/pr95248.c

index f0efcc12b51..e39eb22d04b 100644
--- a/gcc/testsuite/gcc.dg/torture/pr95248.c
+++ b/gcc/testsuite/gcc.dg/torture/pr95248.c
@@ -1,5 +1,6 @@
 /* { dg-do run } */
 /* { dg-require-effective-target int32plus } */
+/* { dg-require-effective-target size24plus } */

 int var_2 = -2013646301;
 int var_3 = -1126567434;
diff --git a/gcc/testsuite/gcc.dg/torture/pr95295-3.c 
b/gcc/testsuite/gcc.dg/torture/pr95295-3.c

index a506af9a63f..f723020c0b3 100644
--- a/gcc/testsuite/gcc.dg/torture/pr95295-3.c
+++ b/gcc/testsuite/gcc.dg/torture/pr95295-3.c
@@ -1,7 +1,11 @@
 /* { dg-do compile } */

 extern short var_15, var_20;
+#if __SIZEOF_INT__ >= 4
 extern int var_18, var_21, var_23;
+#else
+extern __INT32_TYPE__ var_18, var_21, var_23;
+#endif
 extern _Bool arr_2[];
 extern long arr_3[];
 void test()
diff --git a/gcc/testsuite/gcc.dg/torture/pr98640.c 
b/gcc/testsuite/gcc.dg/torture/pr98640.c

index b187781d614..426be66ec80 100644
--- a/gcc/testsuite/gcc.dg/torture/pr98640.c
+++ b/gcc/testsuite/gcc.dg/torture/pr98640.c
@@ -10,7 +10,7 @@ uint64_t var_83 = 10966786425750692026ULL;
 void test()
 {
   var_14 = var_0 + (_Bool)7;
-  var_83 = 1 + (int)var_0; // 1 + 888395530
+  var_83 = 1 + (int32_t)var_0; // 1 + 888395530
 }

 int main()
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr103771.c 
b/gcc/testsuite/gcc.dg/tree-ssa/pr103771.c

index 8faa45a8222..8061e2df79e 100644
--- a/gcc/testsuite/gcc.dg/tree-ssa/pr103771.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/pr103771.c
@@ -4,6 +4,10 @@

 typedef unsigned char uint8_t;

+#if __SIZEOF_INT__ < 4
+#define int __INT32_TYPE__
+#endif
+
 static uint8_t x264_clip_uint8 (int x)
 {
   return x & (~255) ? (-x) >> 31 : x;


[patch] Fix PR101188 wrong code from postreload

2023-06-02 Thread Georg-Johann Lay

There is the following bug in postreload that can be traced back
to v5 at least:

In postreload.cc::reload_cse_move2add() there is a loop over all
insns.  If it encounters a SET, the next insn is analyzed if it
is a single_set.

After next has been analyzed, it continues with

  if (success)
delete_insn (insn);
  changed |= success;
  insn = next; // This effectively skips analysis of next.
  move2add_record_mode (reg);
  reg_offset[regno]
= trunc_int_for_mode (added_offset + base_offset,
  mode);
  continue; // for continues with insn = NEXT_INSN (insn).

So it records the effect of next, but not the clobbers that
next might have.  This is a problem if next clobbers a GPR
like it can happen for avr.  What then can happen is that in a
later round, it may use a value from a (partially) clobbered reg.

The patch records the effects of potential clobbers.

Bootstrapped and reg-tested on x86_64.  Also tested on avr where
the bug popped up.  The testcase discriminates on avr, and for now
I am not aware of any other target that's affected by the bug.

The change is not intrusive and fixes wrong code, so I'd like
to backport it.

Ok to apply?

Johann

rtl-optimization/101188: Don't bypass clobbers of some insns that are
optimized or are optimization candidates.

gcc/
PR rtl-optimization/101188
* postreload.cc (reload_cse_move2add): Record clobbers of next
insn using move2add_note_store.

gcc/testsuite/
PR rtl-optimization/101188
* gcc.c-torture/execute/pr101188.c: New test.


diff --git a/gcc/postreload.cc b/gcc/postreload.cc
index fb392651e1b..2de3e2ea780 100644
--- a/gcc/postreload.cc
+++ b/gcc/postreload.cc
@@ -2033,6 +2033,14 @@ reload_cse_move2add (rtx_insn *first)
  if (success)
delete_insn (insn);
  changed |= success;
+ // By setting "insn = next" below, we are bypassing the
+ // side-effects of next, see PR101188.  Do them by hand
+ subrtx_iterator::array_type array;
+ FOR_EACH_SUBRTX (iter, array, PATTERN (next), NONCONST)
+   {
+ if (GET_CODE (*iter) == CLOBBER)
+   move2add_note_store (XEXP (*iter, 0), *iter, next);
+   }
  insn = next;
  move2add_record_mode (reg);
  reg_offset[regno]
diff --git a/gcc/testsuite/gcc.c-torture/execute/pr101188.c 
b/gcc/testsuite/gcc.c-torture/execute/pr101188.c

new file mode 100644
index 000..4817c69347c
--- /dev/null
+++ b/gcc/testsuite/gcc.c-torture/execute/pr101188.c
@@ -0,0 +1,59 @@
+typedef __UINT8_TYPE__ uint8_t;
+typedef __UINT16_TYPE__ uint16_t;
+
+typedef uint8_t (*fn1)(void *a);
+typedef void (*fn2)(void *a, int *arg);
+
+struct S
+{
+uint8_t buffer[64];
+uint16_t n;
+fn2 f2;
+void *a;
+fn1 f1;
+};
+
+volatile uint16_t x;
+
+void __attribute__((__noinline__,__noclone__))
+foo (uint16_t n)
+{
+  x = n;
+}
+
+void __attribute__((__noinline__,__noclone__))
+testfn (struct S *self)
+{
+int arg;
+
+foo (self->n);
+self->n++;
+self->f2 (self->a, &arg);
+self->buffer[0] = self->f1 (self->a);
+}
+
+static unsigned char myfn2_called = 0;
+
+static void
+myfn2 (void *a, int *arg)
+{
+  myfn2_called = 1;
+}
+
+static uint8_t
+myfn1 (void *a)
+{
+  return 0;
+}
+
+int main (void)
+{
+  struct S s;
+  s.n = 0;
+  s.f2 = myfn2;
+  s.f1 = myfn1;
+  testfn (&s);
+  if (myfn2_called != 1)
+__builtin_abort();
+  return 0;
+}


[avr, committed] Improve operations on non-LD_REGS when the operation follows a move from LD_REGS.

2023-06-02 Thread Georg-Johann Lay

Applied the following patch to improve operations on no-LD_REGS
when the operation follows a move from LD_REGS.

Johann

target/110088: Improve operation of l-reg with const after move from d-reg.

After reload, there may be sequences like
   lreg = dreg
   lreg = lreg  const
with an LD_REGS dreg, non-LD_REGS lreg, and  in PLUS, IOR, AND.
If dreg dies after the first insn, it is possible to use
   dreg = dreg  const
   lreg = dreg
instead which is more efficient.

gcc/
PR target/110088
* config/avr/avr.md: Add an RTL peephole to optimize operations on
non-LD_REGS after a move from LD_REGS.
(piaop): New code iterator.

diff --git a/gcc/config/avr/avr.md b/gcc/config/avr/avr.md
index 371965938a6..9f5fabc861f 100644
--- a/gcc/config/avr/avr.md
+++ b/gcc/config/avr/avr.md
@@ -279,6 +279,7 @@ (define_code_iterator any_extend2 [sign_extend 
zero_extend])

 (define_code_iterator any_extract [sign_extract zero_extract])
 (define_code_iterator any_shiftrt [lshiftrt ashiftrt])

+(define_code_iterator piaop [plus ior and])
 (define_code_iterator bitop [xor ior and])
 (define_code_iterator xior [xor ior])
 (define_code_iterator eqne [eq ne])
@@ -4727,6 +4729,43 @@ (define_split
 DONE;
   })

+;; If  $0 = $0  const  requires a QI scratch, and d-reg $1 dies after
+;; the first insn, then we can replace
+;;$0 = $1
+;;$0 = $0  const
+;; by
+;;$1 = $1  const
+;;$0 = $1
+;; This transforms constraint alternative "r,0,n,&d" of the first operation
+;; to alternative "d,0,n,X".
+;; "*addhi3_clobber"  "*addpsi3"  "*addsi3"
+;; "*addhq3"  "*adduhq3"  "*addha3"  "*adduha3"
+;; "*addsq3"  "*addusq3"  "*addsa3"  "*addusa3"
+;; "*iorhi3"  "*iorpsi3"  "*iorsi3"
+;; "*andhi3"  "*andpsi3"  "*andsi3"
+(define_peephole2
+  [(parallel [(set (match_operand:ORDERED234 0 "register_operand")
+   (match_operand:ORDERED234 1 "d_register_operand"))
+  (clobber (reg:CC REG_CC))])
+   (parallel [(set (match_dup 0)
+   (piaop:ORDERED234 (match_dup 0)
+ (match_operand:ORDERED234 2 
"const_operand")))

+  ; A d-reg as scratch tells that this insn is expensive, and
+  ; that $0 is not a d-register: l-reg or something like 
SI:14 etc.

+  (clobber (match_operand:QI 3 "d_register_operand"))
+  (clobber (reg:CC REG_CC))])]
+  "peep2_reg_dead_p (1, operands[1])"
+  [(parallel [(set (match_dup 1)
+   (piaop:ORDERED234 (match_dup 1)
+ (match_dup 2)))
+  (clobber (scratch:QI))
+  (clobber (reg:CC REG_CC))])
+   ; Unfortunately, the following insn misses a REG_DEAD note for $1,
+   ; so this peep2 works only once.
+   (parallel [(set (match_dup 0)
+   (match_dup 1))
+  (clobber (reg:CC REG_CC))])])
+

 ;; swap swap swap swap swap swap swap swap swap swap swap swap swap 
swap swap

 ;; swap


Re: [patch] Fix PR101188 wrong code from postreload

2023-06-03 Thread Georg-Johann Lay




Am 03.06.23 um 17:53 schrieb Jeff Law:



On 6/2/23 02:46, Georg-Johann Lay wrote:

There is the following bug in postreload that can be traced back
to v5 at least:

In postreload.cc::reload_cse_move2add() there is a loop over all
insns.  If it encounters a SET, the next insn is analyzed if it
is a single_set.

After next has been analyzed, it continues with

   if (success)
 delete_insn (insn);
   changed |= success;
   insn = next; // This effectively skips analysis of next.
   move2add_record_mode (reg);
   reg_offset[regno]
 = trunc_int_for_mode (added_offset + base_offset,
   mode);
   continue; // for continues with insn = NEXT_INSN (insn).

So it records the effect of next, but not the clobbers that
next might have.  This is a problem if next clobbers a GPR
like it can happen for avr.  What then can happen is that in a
later round, it may use a value from a (partially) clobbered reg.

The patch records the effects of potential clobbers.

Bootstrapped and reg-tested on x86_64.  Also tested on avr where
the bug popped up.  The testcase discriminates on avr, and for now
I am not aware of any other target that's affected by the bug.

The change is not intrusive and fixes wrong code, so I'd like
to backport it.

Ok to apply?

Johann

rtl-optimization/101188: Don't bypass clobbers of some insns that are
optimized or are optimization candidates.

gcc/
 PR rtl-optimization/101188
 * postreload.cc (reload_cse_move2add): Record clobbers of next
 insn using move2add_note_store.

gcc/testsuite/
 PR rtl-optimization/101188
 * gcc.c-torture/execute/pr101188.c: New test.
If I understand the code correctly, isn't the core of the problem that 
we "continue" rather than executing the rest of the code in the loop. In 
particular the continue bypasses this chunk of code:



 for (note = REG_NOTES (insn); note; note = XEXP (note, 1))
    {
  if (REG_NOTE_KIND (note) == REG_INC
  && REG_P (XEXP (note, 0)))
    {
  /* Reset the information about this register.  */
  int regno = REGNO (XEXP (note, 0));
  if (regno < FIRST_PSEUDO_REGISTER)
    {
  move2add_record_mode (XEXP (note, 0));
  reg_mode[regno] = VOIDmode;
    }
    }
    }

  /* There are no REG_INC notes for SP autoinc.  */
  subrtx_var_iterator::array_type array;
  FOR_EACH_SUBRTX_VAR (iter, array, PATTERN (insn), NONCONST)
    {
  rtx mem = *iter;
  if (mem
  && MEM_P (mem)
  && GET_RTX_CLASS (GET_CODE (XEXP (mem, 0))) == RTX_AUTOINC)
    {
  if (XEXP (XEXP (mem, 0), 0) == stack_pointer_rtx)
    reg_mode[STACK_POINTER_REGNUM] = VOIDmode;
    }
    }

  note_stores (insn, move2add_note_store, insn);


The point is that in the continue block, the effect of the insn is
recorded even if !success, it's just the computed effect of the code.

Moreover, "next" is REG = REG + CONST_INT, so there are no REG_INC
notes, no?

Also I don't have any testcases that break other than the one
that has a clobber of a GPR along with the pointer addition.

I tried some "smart" solutions before, but all failed for some
reason, so I resorted to something that fixes the bug, and
*only* fixes the bug, and which has clearly no other side
effects than fixing the bug (I have to do all remote on compile
farm).  If a more elaborate fix is needed that also catches other
PRs, then I would hand this over to a postreload maintainer please.

Of particular importance for your case would be the note_stores call. 
But I could well see other targets needing the search for REG_INC notes 
as well as stack pushes.


If I'm right, then wouldn't it be better to factor that blob of code 
above into its own function, then use it before the "continue" rather 
than implementing a custom can for CLOBBERS?


I cannot answer that.  Maybe the authors of the code have some ideas.

Johann

It also begs the question if the other case immediately above the code I 
quoted needs similar adjustment.  It doesn't do the insn = next, but it 
does bypass the search for autoinc memory references and the note_stores 
call.


Jeff


Re: [patch] Fix PR101188 wrong code from postreload

2023-06-05 Thread Georg-Johann Lay




Am 03.06.23 um 17:53 schrieb Jeff Law:



On 6/2/23 02:46, Georg-Johann Lay wrote:

There is the following bug in postreload that can be traced back
to v5 at least:

In postreload.cc::reload_cse_move2add() there is a loop over all
insns.  If it encounters a SET, the next insn is analyzed if it
is a single_set.

After next has been analyzed, it continues with

   if (success)
 delete_insn (insn);
   changed |= success;
   insn = next; // This effectively skips analysis of next.
   move2add_record_mode (reg);
   reg_offset[regno]
 = trunc_int_for_mode (added_offset + base_offset,
   mode);
   continue; // for continues with insn = NEXT_INSN (insn).

So it records the effect of next, but not the clobbers that
next might have.  This is a problem if next clobbers a GPR
like it can happen for avr.  What then can happen is that in a
later round, it may use a value from a (partially) clobbered reg.

The patch records the effects of potential clobbers.

Bootstrapped and reg-tested on x86_64.  Also tested on avr where
the bug popped up.  The testcase discriminates on avr, and for now
I am not aware of any other target that's affected by the bug.

The change is not intrusive and fixes wrong code, so I'd like
to backport it.

Ok to apply?

Johann

rtl-optimization/101188: Don't bypass clobbers of some insns that are
optimized or are optimization candidates.

gcc/
 PR rtl-optimization/101188
 * postreload.cc (reload_cse_move2add): Record clobbers of next
 insn using move2add_note_store.

gcc/testsuite/
 PR rtl-optimization/101188
 * gcc.c-torture/execute/pr101188.c: New test.
If I understand the code correctly, isn't the core of the problem that 
we "continue" rather than executing the rest of the code in the loop. In 
particular the continue bypasses this chunk of code:



 for (note = REG_NOTES (insn); note; note = XEXP (note, 1))
    {
  if (REG_NOTE_KIND (note) == REG_INC
  && REG_P (XEXP (note, 0)))
    {
  /* Reset the information about this register.  */
  int regno = REGNO (XEXP (note, 0));
  if (regno < FIRST_PSEUDO_REGISTER)
    {
  move2add_record_mode (XEXP (note, 0));
  reg_mode[regno] = VOIDmode;
    }
    }
    }

  /* There are no REG_INC notes for SP autoinc.  */
  subrtx_var_iterator::array_type array;
  FOR_EACH_SUBRTX_VAR (iter, array, PATTERN (insn), NONCONST)
    {
  rtx mem = *iter;
  if (mem
  && MEM_P (mem)
  && GET_RTX_CLASS (GET_CODE (XEXP (mem, 0))) == RTX_AUTOINC)
    {
  if (XEXP (XEXP (mem, 0), 0) == stack_pointer_rtx)
    reg_mode[STACK_POINTER_REGNUM] = VOIDmode;
    }
    }

  note_stores (insn, move2add_note_store, insn);


Of particular importance for your case would be the note_stores call. 
But I could well see other targets needing the search for REG_INC notes 
as well as stack pushes.


If I'm right, then wouldn't it be better to factor that blob of code 
above into its own function, then use it before the "continue" rather 
than implementing a custom can for CLOBBERS?


It also begs the question if the other case immediately above the code I 
quoted needs similar adjustment.  It doesn't do the insn = next, but it 
does bypass the search for autoinc memory references and the note_stores 
call.



Jeff


So if I understand you correctly, this means that my patch is declined?

Johann


Ping #2: [patch,avr] Fix PR109650 wrong code

2023-06-07 Thread Georg-Johann Lay

Ping #2 for:

https://gcc.gnu.org/pipermail/gcc-patches/2023-May/618976.html

https://gcc.gnu.org/pipermail/gcc-patches/attachments/20230519/9536bf8c/attachment-0001.bin

Ping #1:
https://gcc.gnu.org/pipermail/gcc-patches/2023-May/620098.html

Johann

Am 19.05.23 um 10:49 schrieb Georg-Johann Lay:


Here is a revised version of the patch.  The difference to the
previous one is that it adds some combine patterns for *cbranch
insns that were lost in the PR92729 transition.  The post-reload
part of the patterns were still there.  The new patterns are
slightly more general in that they also handle fixed-point modes.

Apart from that, the patch behaves the same:

Am 15.05.23 um 20:05 schrieb Georg-Johann Lay:

This patch fixes a wrong-code bug in the wake of PR92729, the transition
that turned the AVR backend from cc0 to CCmode.  In cc0, the insn that
uses cc0 like a conditional branch always follows the cc0 setter, which
is no more the case with CCmode where set and use of REG_CC might be in
different basic blocks.

This patch removes the machine-dependent reorg pass in avr_reorg 
entirely.


It is replaced by a new, AVR specific mini-pass that runs prior to
split2. Canonicalization of comparisons away from the "difficult"
codes GT[U] and LE[U] is now mostly performed by implementing
TARGET_CANONICALIZE_COMPARISON.

Moreover:

* Text peephole conditions get "dead_or_set_regno_p (*, REG_CC)" as
needed.

* RTL peephole conditions get "peep2_regno_dead_p (*, REG_CC)" as
needed.

* Conditional branches no more clobber REG_CC.

* insn output for compares looks ahead to determine the branch mode in
use. This needs also "dead_or_set_regno_p (*, REG_CC)".

* Add RTL peepholes for decrement-and-branch detection.

Finally, it fixes some of the many indentation glitches left over from
PR92729.

Ok?

I'd also backport this one because all of v12+ is affected by the 
wrong code.


Johann

--

gcc/
 PR target/109650
 PR target/92729

 * config/avr/avr-passes.def (avr_pass_ifelse): Insert new pass.
 * config/avr/avr.cc (avr_pass_ifelse): New RTL pass.
 (avr_pass_data_ifelse): New pass_data for it.
 (make_avr_pass_ifelse, avr_redundant_compare, avr_cbranch_cost)
 (avr_canonicalize_comparison, avr_out_plus_set_ZN)
 (avr_out_cmp_ext): New functions.
 (compare_condtition): Make sure REG_CC dies in the branch insn.
 (avr_rtx_costs_1): Add computation of cbranch costs.
 (avr_adjust_insn_length) [ADJUST_LEN_ADD_SET_ZN, ADJUST_LEN_CMP_ZEXT]:
 [ADJUST_LEN_CMP_SEXT]Handle them.
 (TARGET_CANONICALIZE_COMPARISON): New define.
 (avr_simplify_comparison_p, compare_diff_p, avr_compare_pattern)
 (avr_reorg_remove_redundant_compare, avr_reorg): Remove functions.
 (TARGET_MACHINE_DEPENDENT_REORG): Remove define.

 * avr-protos.h (avr_simplify_comparison_p): Remove proto.
 (make_avr_pass_ifelse, avr_out_plus_set_ZN, cc_reg_rtx)
 (avr_out_cmp_zext): New Protos

 * config/avr/avr.md (branch, difficult_branch): Don't split insns.
 (*cbranchhi.zero-extend.0", *cbranchhi.zero-extend.1")
 (*swapped_tst, *add.for.eqne.): New insns.
 (*cbranch4): Rename to cbranch4_insn.
 (define_peephole): Add dead_or_set_regno_p(insn,REG_CC) as needed.
 (define_deephole2): Add peep2_regno_dead_p(*,REG_CC) as needed.
 Add new RTL peepholes for decrement-and-branch and *swapped_tst.
 Rework signtest-and-branch peepholes for *sbrx_branch.
 (adjust_len) [add_set_ZN, cmp_zext]: New.
 (QIPSI): New mode iterator.
 (ALLs1, ALLs2, ALLs4, ALLs234): New mode iterators.
 (gelt): New code iterator.
 (gelt_eqne): New code attribute.
 (rvbranch, *rvbranch, difficult_rvbranch, *difficult_rvbranch)
 (branch_unspec, *negated_tst, *reversed_tst)
 (*cmpqi_sign_extend): Remove insns.
 (define_c_enum "unspec") [UNSPEC_IDENTITY]: Remove.

 * config/avr/avr-dimode.md (cbranch4): Canonicalize comparisons.
 * config/avr/predicates.md (scratch_or_d_register_operand): New.
 * config/avr/contraints.md (Yxx): New constraint.

gcc/testsuite/
 PR target/109650
 * config/avr/torture/pr109650-1.c: New test.
 * config/avr/torture/pr109650-2.c: New test.


[patch,avr]: Improve bit-extractions as of PR109907.

2023-06-07 Thread Georg-Johann Lay

This patch improves bit-extractions on AVR.

Andrew added some patches so that more bit extractions are
recognized in the middle-end and rtl optimizers.

The patch adds pattern for "extzv" and replaces the
deprecated "extzv".

There are still situations where expensive shifts are passed
down to the backend though , and in one situation the backend
uses better sequences for right-shift with an offset of MSB:

Instead of ROL/CLR/ROL sequence that needs constraint "0" for
operand $1, BST/CLR/BLD just requires "r" for $1 thus less
register pressure.  Moreover, no scratch is required.

Asm out for (inverted) bit-extraction was out-sourced to a
C function which is more convenient.

Ok for master?

Johann

--

target/19907: Overhaul bit extractions.

o Logical right shift that shifts the MSB to position 0 can be performed in
  such a way that the input operand constraint can be relaxed from "0" 
to "r".

  This results in less register pressure.  Moreover, no scratch register is
  required in that case.

o The deprecated "extzv" pattern is replaced by "extzv" that allows
  inputs of scalar integer modes of different sizes (1 up to 4 bytes).

o Existing patterns are adjusted to the more generic "extzv" pattern.
  Some patterns are added as the middle-end has been reworked to spot
  more bit-extraction opportunities.

o A C function is used to print the asm for bit extractions, which is more
  convenient for complex output logic.

gcc/
PR target/109907
* config/avr/avr.md (adjust_len) [extr, extr_not]: New elements.
(MSB, SIZE): New mode attributes.
(any_shift): New code iterator.
(*lshr3_split, *lshr3, lshr3)
(*lshr3_const_split): Add constraint alternative for
the case of shift-offset = MSB.  Ditch "length" attribute.
(extzv, *extzv..subreg, *extzv.xor)
(*extzv.ge, *neg.ashiftrt.msb, *extzv.io.lsr7): New.
* config/avr/constraints.md (C15, C23, C31, Yil): New
* config/avr/predicates.md (reg_or_low_io_operand)
(const7_operand, reg_or_low_io_operand)
(const15_operand, const_0_to_15_operand)
(const23_operand, const_0_to_23_operand)
(const31_operand, const_0_to_31_operand): New.
* config/avr/avr-protos.h (avr_out_extr, avr_out_extr_not): New.
* config/avr/avr.cc (avr_out_extr, avr_out_extr_not): New funcs.
(lshrqi3_out, lshrhi3_out, lshrpsi3_out, lshrsi3_out): Adjust
MSB case to new insn constraint "r" for operands[1].
(avr_adjust_insn_length) [ADJUST_LEN_EXTR_NOT, ADJUST_LEN_EXTR]:
Handle these cases.
(avr_rtx_costs_1): Adjust cost for a new pattern.
gcc/testsuite/
* gcc.target/avr/pr109907.c: New test.
* gcc.target/avr/torture/pr109907-1.c: New test.
* gcc.target/avr/torture/pr109907-2.c: New test.diff --git a/gcc/config/avr/avr-protos.h b/gcc/config/avr/avr-protos.h
index ec96fd45865..229854a19db 100644
--- a/gcc/config/avr/avr-protos.h
+++ b/gcc/config/avr/avr-protos.h
@@ -58,6 +58,8 @@ extern const char *ret_cond_branch (rtx x, int len, int reverse);
 extern const char *avr_out_movpsi (rtx_insn *, rtx*, int*);
 extern const char *avr_out_sign_extend (rtx_insn *, rtx*, int*);
 extern const char *avr_out_insert_notbit (rtx_insn *, rtx*, rtx, int*);
+extern const char *avr_out_extr (rtx_insn *, rtx*, int*);
+extern const char *avr_out_extr_not (rtx_insn *, rtx*, int*);
 
 extern const char *ashlqi3_out (rtx_insn *insn, rtx operands[], int *len);
 extern const char *ashlhi3_out (rtx_insn *insn, rtx operands[], int *len);
diff --git a/gcc/config/avr/avr.cc b/gcc/config/avr/avr.cc
index a90cade35c7..f69d79bf14e 100644
--- a/gcc/config/avr/avr.cc
+++ b/gcc/config/avr/avr.cc
@@ -7142,9 +7142,9 @@ lshrqi3_out (rtx_insn *insn, rtx operands[], int *len)
 
 	case 7:
 	  *len = 3;
-	  return ("rol %0" CR_TAB
-		  "clr %0" CR_TAB
-		  "rol %0");
+	  return ("bst %1,7" CR_TAB
+		  "clr %0"   CR_TAB
+		  "bld %0,0");
 	}
 }
   else if (CONSTANT_P (operands[2]))
@@ -7401,10 +7401,10 @@ lshrhi3_out (rtx_insn *insn, rtx operands[], int *len)
 
 	case 15:
 	  *len = 4;
-	  return ("clr %A0" CR_TAB
-		  "lsl %B0" CR_TAB
-		  "rol %A0" CR_TAB
-		  "clr %B0");
+	  return ("bst %B1,7" CR_TAB
+		  "clr %A0"   CR_TAB
+		  "clr %B0"   CR_TAB
+		  "bld %A0,0");
 	}
   len = t;
 }
@@ -7453,11 +7453,11 @@ avr_out_lshrpsi3 (rtx_insn *insn, rtx *op, int *plen)
   /* fall through */
 
 case 23:
-  return avr_asm_len ("clr %A0"CR_TAB
-  "sbrc %C0,7" CR_TAB
-  "inc %A0"CR_TAB
-  "clr %B0"CR_TAB
-  "clr %C0", op, plen, 5);
+  return avr_asm_len ("bst %C1,7" CR_TAB
+  "clr %A0"   CR_TAB
+  "clr %B0"   CR_TAB
+  "clr %C0"   CR_TAB
+  "bld %A0,0", op, plen, 5);
 } /* swit

[avr,committed] Tidy code for inverted bit insertions

2023-06-11 Thread Georg-Johann Lay
Applied this no-op change that tidies up the code for inverted bit 
insertions.


Johann

--

Use canonical form for reversed single-bit insertions after reload.

We now split almost all insns after reload in order to add clobber of 
REG_CC.

If insns are coming from insn combiner and there is no canonical form for
the respective arithmetic (like for reversed bit insertions), there is
no need to keep all these different representations after reload:
Instead of splitting such patterns to their clobber-REG_CC-analogon, we can
split to a canonical representation, which is insv_notbit for the 
present case.

This is a no-op change.

gcc/
* config/avr/avr.md (adjust_len) [insv_notbit_0, insv_notbit_7]:
Remove attribute values.
(insv_notbit): New post-reload insn.
(*insv.not-shiftrt_split, *insv.xor1-bit.0_split)
(*insv.not-bit.0_split, *insv.not-bit.7_split)
(*insv.xor-extract_split): Split to insv_notbit.
(*insv.not-shiftrt, *insv.xor1-bit.0, *insv.not-bit.0, *insv.not-bit.7)
(*insv.xor-extract): Remove post-reload insns.
* config/avr/avr.cc (avr_out_insert_notbit) [bitno]: Remove parameter.
(avr_adjust_insn_length): Adjust call of avr_out_insert_notbit.
[ADJUST_LEN_INSV_NOTBIT_0, ADJUST_LEN_INSV_NOTBIT_7]: Remove cases.
* config/avr/avr-protos.h (avr_out_insert_notbit): Adjust prototype.


diff --git a/gcc/config/avr/avr-protos.h b/gcc/config/avr/avr-protos.h
index a10d91d186f..5c1343f0df8 100644
--- a/gcc/config/avr/avr-protos.h
+++ b/gcc/config/avr/avr-protos.h
@@ -57,7 +57,7 @@ extern const char *avr_out_compare64 (rtx_insn *, 
rtx*, int*);

 extern const char *ret_cond_branch (rtx x, int len, int reverse);
 extern const char *avr_out_movpsi (rtx_insn *, rtx*, int*);
 extern const char *avr_out_sign_extend (rtx_insn *, rtx*, int*);
-extern const char *avr_out_insert_notbit (rtx_insn *, rtx*, rtx, int*);
+extern const char *avr_out_insert_notbit (rtx_insn *, rtx*, int*);
 extern const char *avr_out_extr (rtx_insn *, rtx*, int*);
 extern const char *avr_out_extr_not (rtx_insn *, rtx*, int*);
 extern const char *avr_out_plus_set_ZN (rtx*, int*);
diff --git a/gcc/config/avr/avr.cc b/gcc/config/avr/avr.cc
index b02f5e2..ef6872a3f55 100644
--- a/gcc/config/avr/avr.cc
+++ b/gcc/config/avr/avr.cc
@@ -8995,20 +8995,15 @@ avr_out_addto_sp (rtx *op, int *plen)
 }


-/* Output instructions to insert an inverted bit into OPERANDS[0]:
-   $0.$1 = ~$2.$3  if XBITNO = NULL
-   $0.$1 = ~$2.XBITNO  if XBITNO != NULL.
+/* Output instructions to insert an inverted bit into OP[0]: $0.$1 = 
~$2.$3.

If PLEN = NULL then output the respective instruction sequence which
is a combination of BST / BLD and some instruction(s) to invert the 
bit.
If PLEN != NULL then store the length of the sequence (in words) in 
*PLEN.

Return "".  */

 const char*
-avr_out_insert_notbit (rtx_insn *insn, rtx operands[], rtx xbitno, int 
*plen)

+avr_out_insert_notbit (rtx_insn *insn, rtx op[], int *plen)
 {
-  rtx op[4] = { operands[0], operands[1], operands[2],
-xbitno == NULL_RTX ? operands [3] : xbitno };
-
   if (INTVAL (op[1]) == 7
   && test_hard_reg_class (LD_REGS, op[0]))
 {
@@ -10038,15 +10033,7 @@ avr_adjust_insn_length (rtx_insn *insn, int len)
 case ADJUST_LEN_INSERT_BITS: avr_out_insert_bits (op, &len); break;
 case ADJUST_LEN_ADD_SET_ZN: avr_out_plus_set_ZN (op, &len); break;

-case ADJUST_LEN_INSV_NOTBIT:
-  avr_out_insert_notbit (insn, op, NULL_RTX, &len);
-  break;
-case ADJUST_LEN_INSV_NOTBIT_0:
-  avr_out_insert_notbit (insn, op, const0_rtx, &len);
-  break;
-case ADJUST_LEN_INSV_NOTBIT_7:
-  avr_out_insert_notbit (insn, op, GEN_INT (7), &len);
-  break;
+case ADJUST_LEN_INSV_NOTBIT: avr_out_insert_notbit (insn, op, 
&len); break;


 default:
   gcc_unreachable();
diff --git a/gcc/config/avr/avr.md b/gcc/config/avr/avr.md
index eadc482da15..83dd15040b0 100644
--- a/gcc/config/avr/avr.md
+++ b/gcc/config/avr/avr.md
@@ -163,7 +163,7 @@ (define_attr "adjust_len"
ashlhi, ashrhi, lshrhi,
ashlsi, ashrsi, lshrsi,
ashlpsi, ashrpsi, lshrpsi,
-   insert_bits, insv_notbit, insv_notbit_0, insv_notbit_7,
+   insert_bits, insv_notbit,
add_set_ZN, cmp_uext, cmp_sext,
no"
   (const_string "no"))
@@ -9151,6 +9151,21 @@ (define_insn "*insv.shiftrt"
   [(set_attr "length" "2")])

 ;; Same, but with a NOT inverting the source bit.
+;; Insert bit ~$2.$3 into $0.$1
+(define_insn "insv_notbit"
+  [(set (zero_extract:QI (match_operand:QI 0 "register_operand" 
   "+r")

+ (const_int 1)
+ (match_operand:QI 1 "const_0_to_7_operand" 
"n"))
+(not:QI (zero_extract:QI (match_operand:QI 2 "register_operand" 
"r")

+ (const_int 1)
+ (match_operand:QI 3 
"const_0_to_7_operand" "n"

+   (clobber (reg:CC REG_CC))]
+  "re

[Patch,avr,committed] Fix PR target/110220: Set JUMP_LABEL as required.

2023-08-01 Thread Georg-Johann Lay
Committed as obvious.  An insn emitted by avr specific RTL optimization 
pass missed setting of its JUMP_LABEL.


Johann

target/110220: Set JUMP_LABEL and LABEL_NUSES of new branch insn 
generated by

target specific RTL optimization pass .avr-casesi.

gcc/
PR target/110220
* config/avr/avr.cc (avr_optimize_casesi): Set JUMP_LABEL and
LABEL_NUSES of new conditional branch instruction.

diff --git a/gcc/config/avr/avr.cc b/gcc/config/avr/avr.cc
index 0447641a8e9..25f3f4c22e0 100644
--- a/gcc/config/avr/avr.cc
+++ b/gcc/config/avr/avr.cc
@@ -644,9 +644,11 @@ avr_optimize_casesi (rtx_insn *insns[5], rtx *xop)
   emit_insn (gen_add (reg, reg, gen_int_mode (-low_idx, mode)));
   rtx op0 = reg; rtx op1 = gen_int_mode (num_idx, mode);
   rtx labelref = copy_rtx (xop[4]);
-  emit_jump_insn (gen_cbranch (gen_rtx_fmt_ee (GTU, VOIDmode, op0, op1),
-   op0, op1,
-   labelref));
+  rtx xbranch = gen_cbranch (gen_rtx_fmt_ee (GTU, VOIDmode, op0, op1),
+op0, op1, labelref);
+  rtx_insn *cbranch = emit_jump_insn (xbranch);
+  JUMP_LABEL (cbranch) = xop[4];
+  ++LABEL_NUSES (xop[4]);

   seq1 = get_insns();
   last1 = get_last_insn();


[avr,committed] Fix some typos in avr-mcus.def

2023-08-04 Thread Georg-Johann Lay

This fixes some minor typos in avr-mcus.def.

Johan


gcc/
* config/avr/avr-mcus.def (avr128d*, avr64d*): Fix their 
FLASH_SIZE

and PM_OFFSET entries.

diff --git a/gcc/config/avr/avr-mcus.def b/gcc/config/avr/avr-mcus.def
index ca99116adab..d0056c960ee 100644
--- a/gcc/config/avr/avr-mcus.def
+++ b/gcc/config/avr/avr-mcus.def
@@ -291,7 +291,7 @@ AVR_MCU ("atmega2560",   ARCH_AVR6, 
AVR_ISA_NONE, "__AVR_ATmega2560__",
 AVR_MCU ("atmega2561",   ARCH_AVR6, AVR_ISA_NONE, 
"__AVR_ATmega2561__",0x0200, 0x0, 0x4, 0)
 AVR_MCU ("atmega256rfr2",ARCH_AVR6, AVR_ISA_NONE, 
"__AVR_ATmega256RFR2__", 0x0200, 0x0, 0x4, 0)
 AVR_MCU ("atmega2564rfr2",   ARCH_AVR6, AVR_ISA_NONE, 
"__AVR_ATmega2564RFR2__",0x0200, 0x0, 0x4, 0)

-/* Xmega, 16K <= Flash < 64K, RAM <= 64K */
+/* Xmega, 16K <= Flash <= 64K, RAM <= 64K */
 AVR_MCU ("avrxmega2",ARCH_AVRXMEGA2, AVR_ISA_NONE, NULL, 
0x2000, 0x0, 0x9000, 0)
 AVR_MCU ("atxmega8e5",   ARCH_AVRXMEGA2, AVR_ISA_NONE, 
"__AVR_ATxmega8E5__",   0x2000, 0x0, 0x2800, 0)
 AVR_MCU ("atxmega16a4",  ARCH_AVRXMEGA2, AVR_ISA_NONE, 
"__AVR_ATxmega16A4__",  0x2000, 0x0, 0x5000, 0)
@@ -306,14 +306,14 @@ AVR_MCU ("atxmega16c4",  ARCH_AVRXMEGA2, 
AVR_ISA_RMW,  "__AVR_ATxmega16C4__"
 AVR_MCU ("atxmega32a4u", ARCH_AVRXMEGA2, AVR_ISA_RMW, 
"__AVR_ATxmega32A4U__", 0x2000, 0x0, 0x9000, 0)
 AVR_MCU ("atxmega32c4",  ARCH_AVRXMEGA2, AVR_ISA_RMW, 
"__AVR_ATxmega32C4__",  0x2000, 0x0, 0x9000, 0)
 AVR_MCU ("atxmega32e5",  ARCH_AVRXMEGA2, AVR_ISA_NONE, 
"__AVR_ATxmega32E5__",  0x2000, 0x0, 0x9000, 0)
-AVR_MCU ("avr64da28",ARCH_AVRXMEGA2, AVR_ISA_NONE, 
"__AVR_AVR64DA28__",0x6000, 0x0, 0x8000, 0x1)
-AVR_MCU ("avr64da32",ARCH_AVRXMEGA2, AVR_ISA_NONE, 
"__AVR_AVR64DA32__",0x6000, 0x0, 0x8000, 0x1)
-AVR_MCU ("avr64da48",ARCH_AVRXMEGA2, AVR_ISA_NONE, 
"__AVR_AVR64DA48__",0x6000, 0x0, 0x8000, 0x1)
-AVR_MCU ("avr64da64",ARCH_AVRXMEGA2, AVR_ISA_NONE, 
"__AVR_AVR64DA64__",0x6000, 0x0, 0x8000, 0x1)
-AVR_MCU ("avr64db28",ARCH_AVRXMEGA2, AVR_ISA_NONE, 
"__AVR_AVR64DB28__",0x6000, 0x0, 0x8000, 0x1)
-AVR_MCU ("avr64db32",ARCH_AVRXMEGA2, AVR_ISA_NONE, 
"__AVR_AVR64DB32__",0x6000, 0x0, 0x8000, 0x1)
-AVR_MCU ("avr64db48",ARCH_AVRXMEGA2, AVR_ISA_NONE, 
"__AVR_AVR64DB48__",0x6000, 0x0, 0x8000, 0x1)
-AVR_MCU ("avr64db64",ARCH_AVRXMEGA2, AVR_ISA_NONE, 
"__AVR_AVR64DB64__",0x6000, 0x0, 0x8000, 0x1)
+AVR_MCU ("avr64da28",ARCH_AVRXMEGA2, AVR_ISA_NONE, 
"__AVR_AVR64DA28__",0x6000, 0x0, 0x1, 0)
+AVR_MCU ("avr64da32",ARCH_AVRXMEGA2, AVR_ISA_NONE, 
"__AVR_AVR64DA32__",0x6000, 0x0, 0x1, 0)
+AVR_MCU ("avr64da48",ARCH_AVRXMEGA2, AVR_ISA_NONE, 
"__AVR_AVR64DA48__",0x6000, 0x0, 0x1, 0)
+AVR_MCU ("avr64da64",ARCH_AVRXMEGA2, AVR_ISA_NONE, 
"__AVR_AVR64DA64__",0x6000, 0x0, 0x1, 0)
+AVR_MCU ("avr64db28",ARCH_AVRXMEGA2, AVR_ISA_NONE, 
"__AVR_AVR64DB28__",0x6000, 0x0, 0x1, 0)
+AVR_MCU ("avr64db32",ARCH_AVRXMEGA2, AVR_ISA_NONE, 
"__AVR_AVR64DB32__",0x6000, 0x0, 0x1, 0)
+AVR_MCU ("avr64db48",ARCH_AVRXMEGA2, AVR_ISA_NONE, 
"__AVR_AVR64DB48__",0x6000, 0x0, 0x1, 0)
+AVR_MCU ("avr64db64",ARCH_AVRXMEGA2, AVR_ISA_NONE, 
"__AVR_AVR64DB64__",0x6000, 0x0, 0x1, 0)

 /* Xmega, Flash + RAM < 64K, flash visible in RAM address space */
 AVR_MCU ("avrxmega3",ARCH_AVRXMEGA3, AVR_ISA_NONE,  NULL, 
0x3f00, 0x0, 0x8000, 0)
 AVR_MCU ("attiny202",ARCH_AVRXMEGA3, AVR_ISA_RCALL, 
"__AVR_ATtiny202__",   0x3f80, 0x0, 0x800,  0x8000)
@@ -366,14 +366,14 @@ AVR_MCU ("atxmega64b1",  ARCH_AVRXMEGA4, 
AVR_ISA_RMW,  "__AVR_ATxmega64B1__"
 AVR_MCU ("atxmega64b3",  ARCH_AVRXMEGA4, AVR_ISA_RMW, 
"__AVR_ATxmega64B3__",  0x2000, 0x0, 0x11000, 0)
 AVR_MCU ("atxmega64c3",  ARCH_AVRXMEGA4, AVR_ISA_RMW, 
"__AVR_ATxmega64C3__",  0x2000, 0x0, 0x11000, 0)
 AVR_MCU ("atxmega64d4",  ARCH_AVRXMEGA4, AVR_ISA_NONE, 
"__AVR_ATxmega64D4__",  0x2000, 0x0, 0x11000, 0)
-AVR_MCU ("avr128da28",   ARCH_AVRXMEGA4, AVR_ISA_NONE, 
"__AVR_AVR128DA28__",   0x4000, 0x0, 0x8000,  0x2)
-AVR_MCU ("avr128da32",   ARCH_AVRXMEGA4, AVR_ISA_NONE, 
"__AVR_AVR128DA32__",   0x4000, 0x0, 0x8000,  0x2)
-AVR_MCU ("avr128da48",   ARCH_AVRXMEGA4, AVR_ISA_NONE, 
"__AVR_AVR128DA48__",   0x4000, 0x0, 0x8000,  0x2)
-AVR_MCU ("avr128da64",   ARCH_AVRXMEGA4, AVR_ISA_NONE, 
"__AVR_AVR128DA64__",   0x4000, 0x0, 0x8000,  0x2)
-AVR_MCU ("avr128db28",   ARCH_AVRXMEGA4, AVR_ISA_NONE, 
"__AVR_AVR128DB28__",   0x4000, 0x0, 0x8000,  0x2)
-AVR_MCU ("avr128db32",   ARCH_AVRXMEGA4, AVR_ISA_NONE, 
"__AVR_AVR128DB32__",   0x4000, 0x0, 0x8000,  0x2)
-AVR_MCU ("avr128db48",   ARCH_AVRXMEGA4, AVR_ISA_NONE, 
"__AVR_AVR128DB48__",   0x4000, 0x0, 0x8000,  0x2)
-AVR_MCU ("avr128db64",   ARCH_AVRXM

[avr,committed] Add some more devices to avr-mcus.def.

2023-08-04 Thread Georg-Johann Lay

This adds some more Xmega like devices to the avr backend.

Johann

AVR: Add some more devices: AVR16DD*, AVR32DD*, AVR64DD*, AVR64EA*, 
ATtiny42*, ATtiny82*, ATtiny162*, ATtiny322*, ATtiny10*.


gcc/
* config/avr/avr-mcus.def (avr64dd14, avr64dd20, avr64dd28, 
avr64dd32)
(avr64ea28, avr64ea32, avr64ea48, attiny424, attiny426, 
attiny427)
(attiny824, attiny826, attiny827, attiny1624, attiny1626, 
attiny1627)
(attiny3224, attiny3226, attiny3227, avr16dd14, avr16dd20, 
avr16dd28)

(avr16dd32, avr32dd14, avr32dd20, avr32dd28, avr32dd32)
(attiny102, attiny104): New devices.
* doc/avr-mmcu.texi: Regenerate.AVR: Add some more devices: AVR16DD*, AVR32DD*, AVR64DD*, AVR64EA*, ATtiny42*, ATtiny82*, ATtiny162*, ATtiny322*, ATtiny10*.

gcc/
	* config/avr/avr-mcus.def (avr64dd14, avr64dd20, avr64dd28, avr64dd32)
	(avr64ea28, avr64ea32, avr64ea48, attiny424, attiny426, attiny427)
	(attiny824, attiny826, attiny827, attiny1624, attiny1626, attiny1627)
	(attiny3224, attiny3226, attiny3227, avr16dd14, avr16dd20, avr16dd28)
	(avr16dd32, avr32dd14, avr32dd20, avr32dd28, avr32dd32)
	(attiny102, attiny104): New devices.
	* doc/avr-mmcu.texi: Regenerate.

diff --git a/gcc/config/avr/avr-mcus.def b/gcc/config/avr/avr-mcus.def
index d0056c960ee..4c4269cd429 100644
--- a/gcc/config/avr/avr-mcus.def
+++ b/gcc/config/avr/avr-mcus.def
@@ -314,6 +314,13 @@ AVR_MCU ("avr64db28",ARCH_AVRXMEGA2, AVR_ISA_NONE, "__AVR_AVR64DB28__",
 AVR_MCU ("avr64db32",ARCH_AVRXMEGA2, AVR_ISA_NONE, "__AVR_AVR64DB32__",0x6000, 0x0, 0x1, 0)
 AVR_MCU ("avr64db48",ARCH_AVRXMEGA2, AVR_ISA_NONE, "__AVR_AVR64DB48__",0x6000, 0x0, 0x1, 0)
 AVR_MCU ("avr64db64",ARCH_AVRXMEGA2, AVR_ISA_NONE, "__AVR_AVR64DB64__",0x6000, 0x0, 0x1, 0)
+AVR_MCU ("avr64dd14",ARCH_AVRXMEGA2, AVR_ISA_NONE, "__AVR_AVR64DD14__",0x6000, 0x0, 0x1, 0)
+AVR_MCU ("avr64dd20",ARCH_AVRXMEGA2, AVR_ISA_NONE, "__AVR_AVR64DD20__",0x6000, 0x0, 0x1, 0)
+AVR_MCU ("avr64dd28",ARCH_AVRXMEGA2, AVR_ISA_NONE, "__AVR_AVR64DD28__",0x6000, 0x0, 0x1, 0)
+AVR_MCU ("avr64dd32",ARCH_AVRXMEGA2, AVR_ISA_NONE, "__AVR_AVR64DD32__",0x6000, 0x0, 0x1, 0)
+AVR_MCU ("avr64ea28",ARCH_AVRXMEGA2, AVR_ISA_NONE, "__AVR_AVR64EA28__",0x6800, 0x0, 0x1, 0)
+AVR_MCU ("avr64ea32",ARCH_AVRXMEGA2, AVR_ISA_NONE, "__AVR_AVR64EA32__",0x6800, 0x0, 0x1, 0)
+AVR_MCU ("avr64ea48",ARCH_AVRXMEGA2, AVR_ISA_NONE, "__AVR_AVR64EA48__",0x6800, 0x0, 0x1, 0)
 /* Xmega, Flash + RAM < 64K, flash visible in RAM address space */
 AVR_MCU ("avrxmega3",ARCH_AVRXMEGA3, AVR_ISA_NONE,  NULL,  0x3f00, 0x0, 0x8000, 0)
 AVR_MCU ("attiny202",ARCH_AVRXMEGA3, AVR_ISA_RCALL, "__AVR_ATtiny202__",   0x3f80, 0x0, 0x800,  0x8000)
@@ -342,6 +349,18 @@ AVR_MCU ("attiny1617",   ARCH_AVRXMEGA3, AVR_ISA_NONE,  "__AVR_ATtiny1617__"
 AVR_MCU ("attiny3214",   ARCH_AVRXMEGA3, AVR_ISA_NONE,  "__AVR_ATtiny3214__",  0x3800, 0x0, 0x8000, 0x8000)
 AVR_MCU ("attiny3216",   ARCH_AVRXMEGA3, AVR_ISA_NONE,  "__AVR_ATtiny3216__",  0x3800, 0x0, 0x8000, 0x8000)
 AVR_MCU ("attiny3217",   ARCH_AVRXMEGA3, AVR_ISA_NONE,  "__AVR_ATtiny3217__",  0x3800, 0x0, 0x8000, 0x8000)
+AVR_MCU ("attiny424",ARCH_AVRXMEGA3, AVR_ISA_RCALL, "__AVR_ATtiny424__",   0x3e00, 0x0, 0x1000, 0x8000)
+AVR_MCU ("attiny426",ARCH_AVRXMEGA3, AVR_ISA_RCALL, "__AVR_ATtiny426__",   0x3e00, 0x0, 0x1000, 0x8000)
+AVR_MCU ("attiny427",ARCH_AVRXMEGA3, AVR_ISA_RCALL, "__AVR_ATtiny427__",   0x3e00, 0x0, 0x1000, 0x8000)
+AVR_MCU ("attiny824",ARCH_AVRXMEGA3, AVR_ISA_RCALL, "__AVR_ATtiny824__",   0x3c00, 0x0, 0x2000, 0x8000)
+AVR_MCU ("attiny826",ARCH_AVRXMEGA3, AVR_ISA_RCALL, "__AVR_ATtiny826__",   0x3c00, 0x0, 0x2000, 0x8000)
+AVR_MCU ("attiny827",ARCH_AVRXMEGA3, AVR_ISA_RCALL, "__AVR_ATtiny827__",   0x3c00, 0x0, 0x2000, 0x8000)
+AVR_MCU ("attiny1624",   ARCH_AVRXMEGA3, AVR_ISA_NONE,  "__AVR_ATtiny1624__",  0x3800, 0x0, 0x4000, 0x8000)
+AVR_MCU ("attiny1626",   ARCH_AVRXMEGA3, AVR_ISA_NONE,  "__AVR_ATtiny1626__",  0x3800, 0x0, 0x4000, 0x8000)
+AVR_MCU ("attiny1627",   ARCH_AVRXMEGA3, AVR_ISA_NONE,  "__AVR_ATtiny1627__",  0x3800, 0x0, 0x4000, 0x8000)
+AVR_MCU ("attiny3224",   ARCH_AVRXMEGA3, AVR_ISA_NONE,  "__AVR_ATtiny3224__",  0x3400, 0x0, 0x8000, 0x8000)
+AVR_MCU ("attiny3226",   ARCH_AVRXMEGA3, AVR_ISA_NONE,  "__AVR_ATtiny3226__",  0x3400, 0x0, 0x8000, 0x8000)
+AVR_MCU ("attiny3227",   ARCH_AVRXMEGA3, AVR_ISA_NONE,  "__AVR_ATtiny3227__",  0x3400, 0x0, 0x8000, 0x8000)
 AVR_MCU ("atmega808",ARCH_AVRXMEGA3, AVR_ISA_RCALL, "__AVR_ATmega808__",   0x3c00, 0x0, 0x2000, 0x4000)
 AVR_MCU ("atmega809",ARCH_AVRXMEGA3, AVR_ISA_RCALL, "__AVR_ATmega809__",   0x3c00, 0x0, 0x2000, 0x4000)
 AVR_MCU ("atmega1608",   ARCH_AVRXMEGA3, AVR_ISA_NONE,  "__AVR_ATmega1608__",

Re: [AVR PATCH] Improvements to SImode and PSImode shifts by constants.

2023-11-03 Thread Georg-Johann Lay




Am 02.11.23 um 12:54 schrieb Roger Sayle:


This patch provides non-looping implementations for more SImode (32-bit)
and PSImode (24-bit) shifts on AVR.  For most cases, these are shorter
and faster than using a loop, but for a few (controlled by optimize_size)


Maybe this should also adjust the insn costs, like in avr_rtx_costs_1?

Depending on what you are outputting, avr_asm_len() might be more
convenient.

What I am not sure about are the text cases that expect exact sequences
which might be annoying in the future?

Johann



they are a little larger but significantly faster,  The approach is to
perform byte-based shifts by 1, 2 or 3 bytes, followed by bit-based shifts
(effectively in a narrower type) for the remaining bits, beyond 8, 16 or 24.

For example, the simple test case below (inspired by PR 112268):

unsigned long foo(unsigned long x)
{
   return x >> 26;
}

gcc -O2 currently generates:

foo:ldi r18,26
1:  lsr r25
 ror r24
 ror r23
 ror r22
 dec r18
 brne 1b
 ret

which is 8 instructions, and takes ~158 cycles.
With this patch, we now generate:

foo:mov r22,r25
 clr r23
 clr r24
 clr r25
 lsr r22
 lsr r22
 ret

which is 7 instructions, and takes ~7 cycles.

One complication is that the modified functions sometimes use spaces instead
of TABs, with occasional mistakes in GNU-style formatting, so I've fixed
these indentation/whitespace issues.  There's no change in the code for the
cases previously handled/special-cased, with the exception of ashrqi3 reg,5
where with -Os a (4-instruction) loop is shorter than the five single-bit
shifts of a fully unrolled implementation.

This patch has been (partially) tested with a cross-compiler to avr-elf
hosted on x86_64, without a simulator, where the compile-only tests in
the gcc testsuite show no regressions.  If someone could test this more
thoroughly that would be great.


2023-11-02  Roger Sayle  

gcc/ChangeLog
 * config/avr/avr.cc (ashlqi3_out): Fix indentation whitespace.
 (ashlhi3_out): Likewise.
 (avr_out_ashlpsi3): Likewise.  Handle shifts by 9 and 17-22.
 (ashlsi3_out): Fix formatting.  Handle shifts by 9 and 25-30.
 (ashrqi3_our): Use loop for shifts by 5 when optimizing for size.
 Fix indentation whitespace.
 (ashrhi3_out): Likewise.
 (avr_out_ashrpsi3): Likewise.  Handle shifts by 17.
 (ashrsi3_out): Fix indentation.  Handle shifts by 17 and 25.
 (lshrqi3_out): Fix whitespace.
 (lshrhi3_out): Likewise.
 (avr_out_lshrpsi3): Likewise.  Handle shifts by 9 and 17-22.
 (lshrsi3_out): Fix indentation.  Handle shifts by 9,17,18 and 25-30.

gcc/testsuite/ChangeLog
 * gcc.target/avr/ashlsi-1.c: New test case.
 * gcc.target/avr/ashlsi-2.c: Likewise.
 * gcc.target/avr/ashrsi-1.c: Likewise.
 * gcc.target/avr/ashrsi-2.c: Likewise.
 * gcc.target/avr/lshrsi-1.c: Likewise.
 * gcc.target/avr/lshrsi-2.c: Likewise.


Thanks in advance,
Roger
--



[patch,avr] Fix PR109650 wrong code

2023-05-15 Thread Georg-Johann Lay

This patch fixes a wrong-code bug in the wake of PR92729, the transition
that turned the AVR backend from cc0 to CCmode.  In cc0, the insn that
uses cc0 like a conditional branch always follows the cc0 setter, which
is no more the case with CCmode where set and use of REG_CC might be in
different basic blocks.

This patch removes the machine-dependent reorg pass in avr_reorg entirely.

It is replaced by a new, AVR specific mini-pass that runs prior to
split2. Canonicalization of comparisons away from the "difficult"
codes GT[U] and LE[U] is now mostly performed by implementing
TARGET_CANONICALIZE_COMPARISON.

Moreover:

* Text peephole conditions get "dead_or_set_regno_p (*, REG_CC)" as
needed.

* RTL peephole conditions get "peep2_regno_dead_p (*, REG_CC)" as
needed.

* Conditional branches no more clobber REG_CC.

* insn output for compares looks ahead to determine the branch mode in
use. This needs also "dead_or_set_regno_p (*, REG_CC)".

* Add RTL peepholes for decrement-and-branch detection.

Finally, it fixes some of the many indentation glitches left over from
PR92729.

Ok?

I'd also backport this one because all of v12+ is affected by the wrong 
code.


Johann

--

gcc/
PR/target 109650
PR/target 97279

* config/avr/avr-passes.def (avr_pass_ifelse): Insert new pass.
* config/avr/avr.cc (avr_pass_ifelse): New RTL pass.
(avr_pass_data_ifelse): New pass_data for it.
(make_avr_pass_ifelse, avr_redundant_compare, avr_cbranch_cost)
(avr_canonicalize_comparison, avr_out_plus_set_ZN): New functions.
(compare_condtition): Make sure REG_CC dies in the branch insn.
(avr_rtx_costs_1): Add computation of cbranch costs.
(avr_adjust_insn_length) [ADJUST_LEN_ADD_SET_ZN]: Handle case.
(TARGET_CANONICALIZE_COMPARISON): New define.
(avr_simplify_comparison_p, compare_diff_p, avr_compare_pattern)
(avr_reorg_remove_redundant_compare, avr_reorg): Remove functions.
(TARGET_MACHINE_DEPENDENT_REORG): Remove define.

* avr-protos.h (avr_simplify_comparison_p): Remove proto.
(make_avr_pass_ifelse, avr_out_plus_set_ZN, cc_reg_rtx): New Protos

* config/avr/avr.md (branch, difficult_branch): Don't split insns.
(*swapped_tst, *add.for.eqne.): New insns.
(*cbranch4): Rename to cbranch4_insn.
(cbranch4): Try to canonicalize comparisons at expand.
(define_peephole): Add dead_or_set_regno_p(insn,REG_CC) as needed.
(define_deephole2): Add peep2_regno_dead_p(*,REG_CC) as needed.
Add new RTL peepholes for decrement-and-branch and *swapped_tst.
(adjust_len) [add_set_ZN]: New.
(rvbranch, *rvbranch, difficult_rvbranch, *difficult_rvbranch)
(branch_unspec, *negated_tst, *reversed_tst): Remove insns.
(define_c_enum "unspec") [UNSPEC_IDENTITY]: Remove.

* config/avr/avr-dimode.md (cbranch4): Canonicalize comparisons.
* config/avr/predicates.md (scratch_or_d_register_operand): New.
* config/avr/contraints.md (Yxx): New constraint.

gcc/testsuite/
PR/target 109650
* config/avr/torture/pr109650-1.c: New test.diff --git a/gcc/config/avr/avr-dimode.md b/gcc/config/avr/avr-dimode.md
index c0bb04ff9e0..91f0d395761 100644
--- a/gcc/config/avr/avr-dimode.md
+++ b/gcc/config/avr/avr-dimode.md
@@ -455,12 +455,18 @@ (define_expand "conditional_jump"
 (define_expand "cbranch4"
   [(set (pc)
 (if_then_else (match_operator 0 "ordered_comparison_operator"
-[(match_operand:ALL8 1 "register_operand"  "")
- (match_operand:ALL8 2 "nonmemory_operand" "")])
- (label_ref (match_operand 3 "" ""))
- (pc)))]
+[(match_operand:ALL8 1 "register_operand")
+ (match_operand:ALL8 2 "nonmemory_operand")])
+  (label_ref (match_operand 3))
+  (pc)))]
   "avr_have_dimode"
{
+int icode = (int) GET_CODE (operands[0]);
+
+targetm.canonicalize_comparison (&icode, &operands[1], &operands[2], false);
+operands[0] = gen_rtx_fmt_ee ((enum rtx_code) icode,
+  VOIDmode, operands[1], operands[2]);
+
 rtx acc_a = gen_rtx_REG (mode, ACC_A);
 
 avr_fix_inputs (operands, 1 << 2, regmask (mode, ACC_A));
@@ -490,8 +496,8 @@ (define_insn_and_split "cbranch_2_split"
 (if_then_else (match_operator 0 "ordered_comparison_operator"
 [(reg:ALL8 ACC_A)
  (reg:ALL8 ACC_B)])
- (label_ref (match_operand 1 "" ""))
- (pc)))]
+  (label_ref (match_operand 1))
+  (pc)))]
   "avr_have_dimode"
   "#"
   "&& reload_completed"
@@ -544,8 +550,8 @@ (define_insn_and_split "cbranch_const_2_split"
 (if_then_else (match_operator 0 "ordered_comparison_operator"
 [(reg:ALL8 ACC_A)
  (match_op

[patch,avr] PR105753: Fix ICE in add_clobbers.

2023-05-16 Thread Georg-Johann Lay

This patch removes the superfluous parallel in [u]divmod patterns
in the AVR backend.  Effect of extra parallel is that add_clobbers
reaches gcc_unreachable() because the clobbers for [u]divmod are
missing.  The parallel around the parts of an insn pattern is
implicit if it has multiple parts like clobbers, so extra parallel
should be removed.

Ok to apply?

Johann

--

gcc/
PR target/105753
* config/avr/avr.md (divmodpsi, udivmodpsi, divmodsi, udivmodsi):
Remove superfluous "parallel" in insn pattern.
([u]divmod4): Tidy code.  Use gcc_unreachable() instead of
printing error text to assembly.

gcc/testsuite/
PR target/105753
* gcc.target/avr/torture/pr105753.c: New test.diff --git a/gcc/config/avr/avr.md b/gcc/config/avr/avr.md
index 43b75046384..a79c6824fad 100644
--- a/gcc/config/avr/avr.md
+++ b/gcc/config/avr/avr.md
@@ -3705,17 +3705,17 @@ (define_insn "*mulohisi3_call"
 ;;CSE has problems to operate on hard regs.
 ;;
 (define_insn_and_split "divmodqi4"
-  [(set (match_operand:QI 0 "pseudo_register_operand" "")
-(div:QI (match_operand:QI 1 "pseudo_register_operand" "")
-(match_operand:QI 2 "pseudo_register_operand" "")))
-   (set (match_operand:QI 3 "pseudo_register_operand" "")
+  [(set (match_operand:QI 0 "pseudo_register_operand")
+(div:QI (match_operand:QI 1 "pseudo_register_operand")
+(match_operand:QI 2 "pseudo_register_operand")))
+   (set (match_operand:QI 3 "pseudo_register_operand")
 (mod:QI (match_dup 1) (match_dup 2)))
(clobber (reg:QI 22))
(clobber (reg:QI 23))
(clobber (reg:QI 24))
(clobber (reg:QI 25))]
   ""
-  "this divmodqi4 pattern should have been splitted;"
+  { gcc_unreachable(); }
   ""
   [(set (reg:QI 24) (match_dup 1))
(set (reg:QI 22) (match_dup 2))
@@ -3751,17 +3751,17 @@ (define_insn "*divmodqi4_call"
   [(set_attr "type" "xcall")])
 
 (define_insn_and_split "udivmodqi4"
- [(set (match_operand:QI 0 "pseudo_register_operand" "")
-   (udiv:QI (match_operand:QI 1 "pseudo_register_operand" "")
-(match_operand:QI 2 "pseudo_register_operand" "")))
-   (set (match_operand:QI 3 "pseudo_register_operand" "")
-(umod:QI (match_dup 1) (match_dup 2)))
-   (clobber (reg:QI 22))
-   (clobber (reg:QI 23))
-   (clobber (reg:QI 24))
-   (clobber (reg:QI 25))]
-  ""
-  "this udivmodqi4 pattern should have been splitted;"
+ [(set (match_operand:QI 0 "pseudo_register_operand")
+   (udiv:QI (match_operand:QI 1 "pseudo_register_operand")
+(match_operand:QI 2 "pseudo_register_operand")))
+  (set (match_operand:QI 3 "pseudo_register_operand")
+   (umod:QI (match_dup 1) (match_dup 2)))
+  (clobber (reg:QI 22))
+  (clobber (reg:QI 23))
+  (clobber (reg:QI 24))
+  (clobber (reg:QI 25))]
+  ""
+  { gcc_unreachable(); }
   ""
   [(set (reg:QI 24) (match_dup 1))
(set (reg:QI 22) (match_dup 2))
@@ -3793,17 +3793,17 @@ (define_insn "*udivmodqi4_call"
   [(set_attr "type" "xcall")])
 
 (define_insn_and_split "divmodhi4"
-  [(set (match_operand:HI 0 "pseudo_register_operand" "")
-(div:HI (match_operand:HI 1 "pseudo_register_operand" "")
-(match_operand:HI 2 "pseudo_register_operand" "")))
-   (set (match_operand:HI 3 "pseudo_register_operand" "")
+  [(set (match_operand:HI 0 "pseudo_register_operand")
+(div:HI (match_operand:HI 1 "pseudo_register_operand")
+(match_operand:HI 2 "pseudo_register_operand")))
+   (set (match_operand:HI 3 "pseudo_register_operand")
 (mod:HI (match_dup 1) (match_dup 2)))
(clobber (reg:QI 21))
(clobber (reg:HI 22))
(clobber (reg:HI 24))
(clobber (reg:HI 26))]
   ""
-  "this should have been splitted;"
+  { gcc_unreachable(); }
   ""
   [(set (reg:HI 24) (match_dup 1))
(set (reg:HI 22) (match_dup 2))
@@ -3839,17 +3839,17 @@ (define_insn "*divmodhi4_call"
   [(set_attr "type" "xcall")])
 
 (define_insn_and_split "udivmodhi4"
-  [(set (match_operand:HI 0 "pseudo_register_operand" "")
-(udiv:HI (match_operand:HI 1 "pseudo_register_operand" "")
- (match_operand:HI 2 "pseudo_register_operand" "")))
-   (set (match_operand:HI 3 "pseudo_register_operand" "")
+  [(set (match_operand:HI 0 "pseudo_register_operand")
+(udiv:HI (match_operand:HI 1 "pseudo_register_operand")
+ (match_operand:HI 2 "pseudo_register_operand")))
+   (set (match_operand:HI 3 "pseudo_register_operand")
 (umod:HI (match_dup 1) (match_dup 2)))
(clobber (reg:QI 21))
(clobber (reg:HI 22))
(clobber (reg:HI 24))
(clobber (reg:HI 26))]
   ""
-  "this udivmodhi4 pattern should have been splitted.;"
+  { gcc_unreachable(); }
   ""
   [(set (reg:HI 24) (match_dup 1))
(set (reg:HI 22) (match_dup 2))
@@ -4090,14 +4090,14 @@ (define_insn "*mulpsi3.libgcc"
 ;; implementation works the other way round.
 
 (define_insn_and_split "divmodpsi4"
-  [(parallel [(set (match_ope

[avr,committed] Fix a trivial typo in gen-avr-mmcu-specs.cc.

2023-05-18 Thread Georg-Johann Lay

Applied as obvious, there was a trailing */ in a 1-line // comment.

https://gcc.gnu.org/git/?p=gcc.git;a=commitdiff;h=a726d007f197d13ec80b9d625bf8bab97c96384c

Johann


gcc/ChangeLog
* config/avr/gen-avr-mmcu-specs.cc: Remove stale */ after // comment.

--

diff --git a/gcc/config/avr/gen-avr-mmcu-specs.cc 
b/gcc/config/avr/gen-avr-mmcu-specs.cc
index 
9344246cb7203a665db575a2bf7c0e8a29521963..b9a5ad44e4e5c350fbcc45d468684ff6d873574e 
100644 (file)

--- a/gcc/config/avr/gen-avr-mmcu-specs.cc
+++ b/gcc/config/avr/gen-avr-mmcu-specs.cc
@@ -30,7 +30,7 @@
 #include "avr-devices.cc"

 // Get rid of "defaults.h".  We just need tm.h for `WITH_AVRLIBC' and
-// and `WITH_RTEMS'.  */
+// and `WITH_RTEMS'.
 #define GCC_DEFAULTS_H

 #include "tm.h"


Re: [patch,avr] Fix PR109650 wrong code

2023-05-19 Thread Georg-Johann Lay

Here is a revised version of the patch.  The difference to the
previous one is that it adds some combine patterns for *cbranch
insns that were lost in the PR92729 transition.  The post-reload
part of the patterns were still there.  The new patterns are
slightly more general in that they also handle fixed-point modes.

Apart from that, the patch behaves the same:

Am 15.05.23 um 20:05 schrieb Georg-Johann Lay:

This patch fixes a wrong-code bug in the wake of PR92729, the transition
that turned the AVR backend from cc0 to CCmode.  In cc0, the insn that
uses cc0 like a conditional branch always follows the cc0 setter, which
is no more the case with CCmode where set and use of REG_CC might be in
different basic blocks.

This patch removes the machine-dependent reorg pass in avr_reorg entirely.

It is replaced by a new, AVR specific mini-pass that runs prior to
split2. Canonicalization of comparisons away from the "difficult"
codes GT[U] and LE[U] is now mostly performed by implementing
TARGET_CANONICALIZE_COMPARISON.

Moreover:

* Text peephole conditions get "dead_or_set_regno_p (*, REG_CC)" as
needed.

* RTL peephole conditions get "peep2_regno_dead_p (*, REG_CC)" as
needed.

* Conditional branches no more clobber REG_CC.

* insn output for compares looks ahead to determine the branch mode in
use. This needs also "dead_or_set_regno_p (*, REG_CC)".

* Add RTL peepholes for decrement-and-branch detection.

Finally, it fixes some of the many indentation glitches left over from
PR92729.

Ok?

I'd also backport this one because all of v12+ is affected by the wrong 
code.


Johann

--

gcc/
PR target/109650
PR target/97279

* config/avr/avr-passes.def (avr_pass_ifelse): Insert new pass.
* config/avr/avr.cc (avr_pass_ifelse): New RTL pass.
(avr_pass_data_ifelse): New pass_data for it.
(make_avr_pass_ifelse, avr_redundant_compare, avr_cbranch_cost)
(avr_canonicalize_comparison, avr_out_plus_set_ZN)
(avr_out_cmp_ext): New functions.
(compare_condtition): Make sure REG_CC dies in the branch insn.
(avr_rtx_costs_1): Add computation of cbranch costs.
(avr_adjust_insn_length) [ADJUST_LEN_ADD_SET_ZN, ADJUST_LEN_CMP_ZEXT]:
[ADJUST_LEN_CMP_SEXT]Handle them.
(TARGET_CANONICALIZE_COMPARISON): New define.
(avr_simplify_comparison_p, compare_diff_p, avr_compare_pattern)
(avr_reorg_remove_redundant_compare, avr_reorg): Remove functions.
(TARGET_MACHINE_DEPENDENT_REORG): Remove define.

* avr-protos.h (avr_simplify_comparison_p): Remove proto.
(make_avr_pass_ifelse, avr_out_plus_set_ZN, cc_reg_rtx)
(avr_out_cmp_zext): New Protos

* config/avr/avr.md (branch, difficult_branch): Don't split insns.
(*cbranchhi.zero-extend.0", *cbranchhi.zero-extend.1")
(*swapped_tst, *add.for.eqne.): New insns.
(*cbranch4): Rename to cbranch4_insn.
(define_peephole): Add dead_or_set_regno_p(insn,REG_CC) as needed.
(define_deephole2): Add peep2_regno_dead_p(*,REG_CC) as needed.
Add new RTL peepholes for decrement-and-branch and *swapped_tst.
Rework signtest-and-branch peepholes for *sbrx_branch.
(adjust_len) [add_set_ZN, cmp_zext]: New.
(QIPSI): New mode iterator.
(ALLs1, ALLs2, ALLs4, ALLs234): New mode iterators.
(gelt): New code iterator.
(gelt_eqne): New code attribute.
(rvbranch, *rvbranch, difficult_rvbranch, *difficult_rvbranch)
(branch_unspec, *negated_tst, *reversed_tst)
(*cmpqi_sign_extend): Remove insns.
(define_c_enum "unspec") [UNSPEC_IDENTITY]: Remove.

* config/avr/avr-dimode.md (cbranch4): Canonicalize comparisons.
* config/avr/predicates.md (scratch_or_d_register_operand): New.
* config/avr/contraints.md (Yxx): New constraint.

gcc/testsuite/
PR target/109650
* config/avr/torture/pr109650-1.c: New test.
* config/avr/torture/pr109650-2.c: New test.


Re: [patch,avr] Fix PR109650 wrong code

2023-05-19 Thread Georg-Johann Lay

...Ok, and now with the patch attached...

Here is a revised version of the patch.  The difference to the
previous one is that it adds some combine patterns for *cbranch
insns that were lost in the PR92729 transition.  The post-reload
part of the patterns were still there.  The new patterns are
slightly more general in that they also handle fixed-point modes.

Apart from that, the patch behaves the same:

Am 15.05.23 um 20:05 schrieb Georg-Johann Lay:

This patch fixes a wrong-code bug in the wake of PR92729, the transition
that turned the AVR backend from cc0 to CCmode.  In cc0, the insn that
uses cc0 like a conditional branch always follows the cc0 setter, which
is no more the case with CCmode where set and use of REG_CC might be in
different basic blocks.

This patch removes the machine-dependent reorg pass in avr_reorg entirely.

It is replaced by a new, AVR specific mini-pass that runs prior to
split2. Canonicalization of comparisons away from the "difficult"
codes GT[U] and LE[U] is now mostly performed by implementing
TARGET_CANONICALIZE_COMPARISON.

Moreover:

* Text peephole conditions get "dead_or_set_regno_p (*, REG_CC)" as
needed.

* RTL peephole conditions get "peep2_regno_dead_p (*, REG_CC)" as
needed.

* Conditional branches no more clobber REG_CC.

* insn output for compares looks ahead to determine the branch mode in
use. This needs also "dead_or_set_regno_p (*, REG_CC)".

* Add RTL peepholes for decrement-and-branch detection.

Finally, it fixes some of the many indentation glitches left over from
PR92729.

Ok?

I'd also backport this one because all of v12+ is affected by the wrong 
code.


Johann

--

gcc/
PR target/109650
PR target/92729

* config/avr/avr-passes.def (avr_pass_ifelse): Insert new pass.
* config/avr/avr.cc (avr_pass_ifelse): New RTL pass.
(avr_pass_data_ifelse): New pass_data for it.
(make_avr_pass_ifelse, avr_redundant_compare, avr_cbranch_cost)
(avr_canonicalize_comparison, avr_out_plus_set_ZN)
(avr_out_cmp_ext): New functions.
(compare_condtition): Make sure REG_CC dies in the branch insn.
(avr_rtx_costs_1): Add computation of cbranch costs.
(avr_adjust_insn_length) [ADJUST_LEN_ADD_SET_ZN, ADJUST_LEN_CMP_ZEXT]:
[ADJUST_LEN_CMP_SEXT]Handle them.
(TARGET_CANONICALIZE_COMPARISON): New define.
(avr_simplify_comparison_p, compare_diff_p, avr_compare_pattern)
(avr_reorg_remove_redundant_compare, avr_reorg): Remove functions.
(TARGET_MACHINE_DEPENDENT_REORG): Remove define.

* avr-protos.h (avr_simplify_comparison_p): Remove proto.
(make_avr_pass_ifelse, avr_out_plus_set_ZN, cc_reg_rtx)
(avr_out_cmp_zext): New Protos

* config/avr/avr.md (branch, difficult_branch): Don't split insns.
(*cbranchhi.zero-extend.0", *cbranchhi.zero-extend.1")
(*swapped_tst, *add.for.eqne.): New insns.
(*cbranch4): Rename to cbranch4_insn.
(define_peephole): Add dead_or_set_regno_p(insn,REG_CC) as needed.
(define_deephole2): Add peep2_regno_dead_p(*,REG_CC) as needed.
Add new RTL peepholes for decrement-and-branch and *swapped_tst.
Rework signtest-and-branch peepholes for *sbrx_branch.
(adjust_len) [add_set_ZN, cmp_zext]: New.
(QIPSI): New mode iterator.
(ALLs1, ALLs2, ALLs4, ALLs234): New mode iterators.
(gelt): New code iterator.
(gelt_eqne): New code attribute.
(rvbranch, *rvbranch, difficult_rvbranch, *difficult_rvbranch)
(branch_unspec, *negated_tst, *reversed_tst)
(*cmpqi_sign_extend): Remove insns.
(define_c_enum "unspec") [UNSPEC_IDENTITY]: Remove.

* config/avr/avr-dimode.md (cbranch4): Canonicalize comparisons.
* config/avr/predicates.md (scratch_or_d_register_operand): New.
* config/avr/contraints.md (Yxx): New constraint.

gcc/testsuite/
PR target/109650
* config/avr/torture/pr109650-1.c: New test.
* config/avr/torture/pr109650-2.c: New test.diff --git a/gcc/config/avr/avr-dimode.md b/gcc/config/avr/avr-dimode.md
index c0bb04ff9e0..91f0d395761 100644
--- a/gcc/config/avr/avr-dimode.md
+++ b/gcc/config/avr/avr-dimode.md
@@ -455,12 +455,18 @@ (define_expand "conditional_jump"
 (define_expand "cbranch4"
   [(set (pc)
 (if_then_else (match_operator 0 "ordered_comparison_operator"
-[(match_operand:ALL8 1 "register_operand"  "")
- (match_operand:ALL8 2 "nonmemory_operand" "")])
- (label_ref (match_operand 3 "" ""))
- (pc)))]
+[(match_operand:ALL8 1 "register_operand")
+ (match_operand:ALL8 2 "nonmemory_operand")])
+  (label_ref (match_operand 3)

[avr,committed] Fix PR90622

2023-05-21 Thread Georg-Johann Lay

This patch fixes a minor optimization issue for an avr specific builtin.
Applied as obvious.

https://gcc.gnu.org/r14-1025

Johann

--


target/90622: __builtin_avr_insert bits: Use BLD/BST for one bit in place.

If just one bit is inserted in the same position like with:
__builtin_avr_insert_bits (0xF2FF, src, dst);
a BLD/BST sequence is better than XOR/AND/XOR.  Thus, don't fold that
case to the latter sequence.

gcc/
PR target/90622
* config/avr/avr.cc (avr_fold_builtin) [AVR_BUILTIN_INSERT_BITS]:
Don't fold to XOR / AND / XOR if just one bit is copied to the
same position.

diff --git a/gcc/config/avr/avr.cc b/gcc/config/avr/avr.cc
index d5af40f7091..9fa50ca230d 100644
--- a/gcc/config/avr/avr.cc
+++ b/gcc/config/avr/avr.cc
@@ -14425,10 +14425,13 @@ avr_fold_builtin (tree fndecl, int n_args 
ATTRIBUTE_UNUSED, tree *arg,

 if (changed)
   return build_call_expr (fndecl, 3, tmap, tbits, tval);

-/* If bits don't change their position we can use vanilla logic
-   to merge the two arguments.  */
+/* If bits don't change their position, we can use vanilla logic
+   to merge the two arguments...  */

-   if (avr_map_metric (map, MAP_NONFIXED_0_7) == 0)
+if (avr_map_metric (map, MAP_NONFIXED_0_7) == 0
+// ...except when we are copying just one bit. In that
+// case, BLD/BST is better than XOR/AND/XOR, see PR90622.
+&& avr_map_metric (map, MAP_FIXED_0_7) != 1)
   {
 int mask_f = avr_map_metric (map, MAP_MASK_PREIMAGE_F);
 tree tres, tmask = build_int_cst (val_type, mask_f ^ 0xff);


[avr,testsuite,committed] Skip test that fail for avr for this or that reason.

2023-05-22 Thread Georg-Johann Lay

This annotates some tests that won't work for AVR like:

* asm goto with output reload (AVR is not lra).

* Using a program address as a ram address.

* Float related stuff: AVR double is 32-bit, and long double
  is incomplete (some functions missing, no signed zeros, etc.)

Applied as obvious.

Johann

--

Skip some tests that won't work for target AVR.

gcc/testsuite/
* lib/target-supports.exp (check_effective_target_lra) 
[avr]: Return 0.

* gcc.dg/pr19402-2.c: Skip for avr.
* gcc.dg/pr86124.c: Same.
* gcc.dg/pr94291.c: Same.
* gcc.dg/torture/builtin-complex-1.c: Same.
* gcc.dg/torture/fp-int-convert-float32x-timode.c: Same.
* gcc.dg/torture/fp-int-convert-float32x.c: Same.
* gcc.dg/torture/fp-int-convert-float64-timode.c: Same.
* gcc.dg/torture/fp-int-convert-float64.c: Same.
* gcc.dg/torture/fp-int-convert-long-double.c: Same.
* gcc.dg/torture/fp-int-convert-timode.c: Same.
* c-c++-common/torture/builtin-convertvector-1.c: Same.
* c-c++-common/torture/complex-sign-add.c: Same.
* c-c++-common/torture/complex-sign-mixed-add.c: Same.
* c-c++-common/torture/complex-sign-mixed-div.c: Same.
* c-c++-common/torture/complex-sign-mixed-mul.c: Same.
* c-c++-common/torture/complex-sign-mixed-sub.c: Same.
* c-c++-common/torture/complex-sign-mul-minus-one.c: Same.
* c-c++-common/torture/complex-sign-mul-one.c: Same.
* c-c++-common/torture/complex-sign-mul.c: Same.
* c-c++-common/torture/complex-sign-sub.c: Same.

diff --git 
a/gcc/testsuite/c-c++-common/torture/builtin-convertvector-1.c 
b/gcc/testsuite/c-c++-common/torture/builtin-convertvector-1.c

index 347dda7692d..fababf1a9eb 100644
--- a/gcc/testsuite/c-c++-common/torture/builtin-convertvector-1.c
+++ b/gcc/testsuite/c-c++-common/torture/builtin-convertvector-1.c
@@ -1,3 +1,5 @@
+/* { dg-skip-if "double support is incomplete" { "avr-*-*" } } */
+
 extern
 #ifdef __cplusplus
 "C"
diff --git a/gcc/testsuite/c-c++-common/torture/complex-sign-add.c 
b/gcc/testsuite/c-c++-common/torture/complex-sign-add.c

index e81223224dc..c1e7886a0df 100644
--- a/gcc/testsuite/c-c++-common/torture/complex-sign-add.c
+++ b/gcc/testsuite/c-c++-common/torture/complex-sign-add.c
@@ -2,6 +2,7 @@
addition.  */
 /* { dg-do run } */
 /* { dg-options "-std=gnu99" { target c } } */
+/* { dg-skip-if "double support is incomplete" { "avr-*-*" } } */

 #include "complex-sign.h"

diff --git a/gcc/testsuite/c-c++-common/torture/complex-sign-mixed-add.c 
b/gcc/testsuite/c-c++-common/torture/complex-sign-mixed-add.c

index a209161e157..36d305baf53 100644
--- a/gcc/testsuite/c-c++-common/torture/complex-sign-mixed-add.c
+++ b/gcc/testsuite/c-c++-common/torture/complex-sign-mixed-add.c
@@ -3,6 +3,7 @@
 /* { dg-do run } */
 /* { dg-options "-std=gnu99" { target c } } */
 /* { dg-skip-if "ptx can elide zero additions" { "nvptx-*-*" } { "-O0" 
} { "" } } */

+/* { dg-skip-if "double support is incomplete" { "avr-*-*" } } */

 #include "complex-sign.h"

diff --git a/gcc/testsuite/c-c++-common/torture/complex-sign-mixed-div.c 
b/gcc/testsuite/c-c++-common/torture/complex-sign-mixed-div.c

index f7ee48341c0..a37074bb3b9 100644
--- a/gcc/testsuite/c-c++-common/torture/complex-sign-mixed-div.c
+++ b/gcc/testsuite/c-c++-common/torture/complex-sign-mixed-div.c
@@ -2,6 +2,7 @@
division.  */
 /* { dg-do run } */
 /* { dg-options "-std=gnu99" { target c } } */
+/* { dg-skip-if "double support is incomplete" { "avr-*-*" } } */

 #include "complex-sign.h"

diff --git a/gcc/testsuite/c-c++-common/torture/complex-sign-mixed-mul.c 
b/gcc/testsuite/c-c++-common/torture/complex-sign-mixed-mul.c

index 02f936b75bd..1e528b986c5 100644
--- a/gcc/testsuite/c-c++-common/torture/complex-sign-mixed-mul.c
+++ b/gcc/testsuite/c-c++-common/torture/complex-sign-mixed-mul.c
@@ -2,6 +2,7 @@
multiplication.  */
 /* { dg-do run } */
 /* { dg-options "-std=gnu99" { target c } } */
+/* { dg-skip-if "double support is incomplete" { "avr-*-*" } } */

 #include "complex-sign.h"

diff --git a/gcc/testsuite/c-c++-common/torture/complex-sign-mixed-sub.c 
b/gcc/testsuite/c-c++-common/torture/complex-sign-mixed-sub.c

index 02ab4db247c..63c75dfdff2 100644
--- a/gcc/testsuite/c-c++-common/torture/complex-sign-mixed-sub.c
+++ b/gcc/testsuite/c-c++-common/torture/complex-sign-mixed-sub.c
@@ -3,6 +3,7 @@
 /* { dg-do run } */
 /* { dg-options "-std=gnu99" { target c } } */
 /* { dg-skip-if "ptx can elide zero additions" { "nvptx-*-*" } { "-O0" 
} { "" } } */

+/* { dg-skip-if "double support is incomplete" { "avr-*-*" } } */

 #include "complex-sign.h"

diff --git 
a/gcc/testsuite/c-c++-common/torture/complex-sign-mul-minus-one.c 
b/gcc/testsuite/c-c++-common/torture/complex-sign-mul-minus-one.c

index 05cc4fabea4..f8abdd00e2e 100644
--- a/gcc/testsuite/c-c++-common/torture/co

[testsuite,committed] PR testsuite/52641

2023-05-22 Thread Georg-Johann Lay
Applied more annotations to reduce testsuite fallout for 16-bit int / 
pointer targets.


https://gcc.gnu.org/r14-1074

Most of the affected tests use constants not suitable for 16-bit int, 
bit-fields wider than 16 bits, etc.


Johann

--

commit 9f5065094c9632a50bea604d5896a139609e50cf
Author: Georg-Johann Lay 
Date:   Mon May 22 16:47:56 2023 +0200

testsuite/52641: Fix tests that fail for 16-bit int / pointer targets.

gcc/testsuite/
PR testsuite/52641
* c-c++-common/pr19807-2.c: Use __SIZEOF_INT__ instead of 4.
* gcc.c-torture/compile/pr103813.c: Require size32plus.
* gcc.c-torture/execute/pr108498-2.c: Same.
* gcc.c-torture/compile/pr96426.c: Condition on
__SIZEOF_LONG_LONG__ == __SIZEOF_DOUBLE__.
* gcc.c-torture/execute/pr103417.c: Require int32plus.
* gcc.dg/pr104198.c: Same.
* gcc.dg/pr21137.c: Same.
* gcc.dg/pr88905.c: Same.
* gcc.dg/pr90838.c: Same.
* gcc.dg/pr97317.c: Same.
* gcc.dg/pr100292.c: Require int32.
* gcc.dg/pr101008.c: Same.
* gcc.dg/pr96542.c: Same.
* gcc.dg/pr96674.c: Same.
* gcc.dg/pr97750.c: Require ptr_eq_long.

diff --git a/gcc/testsuite/c-c++-common/pr19807-2.c 
b/gcc/testsuite/c-c++-common/pr19807-2.c

index 529b9c97322..29a370304d3 100644
--- a/gcc/testsuite/c-c++-common/pr19807-2.c
+++ b/gcc/testsuite/c-c++-common/pr19807-2.c
@@ -6,7 +6,7 @@ int i;
 int main()
 {
   int a[4];
-  if ((char*)&a[1] + 4*i + 4 != (char*)&a[i+2])
+  if ((char*)&a[1] + __SIZEOF_INT__*i + __SIZEOF_INT__ != (char*)&a[i+2])
 link_error();
   return 0;
 }
diff --git a/gcc/testsuite/gcc.c-torture/compile/pr103813.c 
b/gcc/testsuite/gcc.c-torture/compile/pr103813.c

index b3fc066beed..0aa64fb3152 100644
--- a/gcc/testsuite/gcc.c-torture/compile/pr103813.c
+++ b/gcc/testsuite/gcc.c-torture/compile/pr103813.c
@@ -1,4 +1,5 @@
 /* PR middle-end/103813 */
+/* { dg-require-effective-target size32plus } */

 struct A { char b; char c[0x2100]; };
 struct A d;
diff --git a/gcc/testsuite/gcc.c-torture/compile/pr96426.c 
b/gcc/testsuite/gcc.c-torture/compile/pr96426.c

index bd573fe5366..fdb441efc10 100644
--- a/gcc/testsuite/gcc.c-torture/compile/pr96426.c
+++ b/gcc/testsuite/gcc.c-torture/compile/pr96426.c
@@ -1,5 +1,7 @@
 /* PR middle-end/96426 */

+#if __SIZEOF_LONG_LONG__ == __SIZEOF_DOUBLE__
+
 typedef long long V __attribute__((vector_size(16)));
 typedef double W __attribute__((vector_size(16)));

@@ -8,3 +10,5 @@ foo (V *v)
 {
   __builtin_convertvector (*v, W);
 }
+
+#endif
diff --git a/gcc/testsuite/gcc.c-torture/execute/pr103417.c 
b/gcc/testsuite/gcc.c-torture/execute/pr103417.c

index 0fef8908036..ea4b99030a5 100644
--- a/gcc/testsuite/gcc.c-torture/execute/pr103417.c
+++ b/gcc/testsuite/gcc.c-torture/execute/pr103417.c
@@ -1,4 +1,5 @@
 /* PR tree-optimization/103417 */
+/* { dg-require-effective-target int32plus } */

 struct { int a : 8; int b : 24; } c = { 0, 1 };

diff --git a/gcc/testsuite/gcc.c-torture/execute/pr108498-2.c 
b/gcc/testsuite/gcc.c-torture/execute/pr108498-2.c

index ad930488c33..fdd628cbc86 100644
--- a/gcc/testsuite/gcc.c-torture/execute/pr108498-2.c
+++ b/gcc/testsuite/gcc.c-torture/execute/pr108498-2.c
@@ -1,4 +1,5 @@
 /* PR tree-optimization/108498 */
+/* { dg-require-effective-target int32plus } */

 struct U { char c[16]; };
 struct V { char c[16]; };
diff --git a/gcc/testsuite/gcc.dg/pr100292.c 
b/gcc/testsuite/gcc.dg/pr100292.c

index 675a60c3412..147c9324d81 100644
--- a/gcc/testsuite/gcc.dg/pr100292.c
+++ b/gcc/testsuite/gcc.dg/pr100292.c
@@ -1,4 +1,5 @@
 /* { dg-do compile } */
+/* { dg-require-effective-target int32 } */

 typedef unsigned char __attribute__((__vector_size__ (4))) V;

diff --git a/gcc/testsuite/gcc.dg/pr101008.c 
b/gcc/testsuite/gcc.dg/pr101008.c

index c06208d3425..8229769c6ac 100644
--- a/gcc/testsuite/gcc.dg/pr101008.c
+++ b/gcc/testsuite/gcc.dg/pr101008.c
@@ -1,6 +1,7 @@
 /* PR rtl-optimization/101008 */
 /* { dg-do compile } */
 /* { dg-options "-O2 -g" } */
+/* { dg-require-effective-target int32 } */

 typedef unsigned __attribute__((__vector_size__(32))) U;
 typedef unsigned __attribute__((__vector_size__(16))) V;
diff --git a/gcc/testsuite/gcc.dg/pr104198.c 
b/gcc/testsuite/gcc.dg/pr104198.c

index bfc7a777184..de86f49c9dc 100644
--- a/gcc/testsuite/gcc.dg/pr104198.c
+++ b/gcc/testsuite/gcc.dg/pr104198.c
@@ -3,6 +3,7 @@

 /* { dg-do run } */
 /* { dg-options "-O2 -std=c99" } */
+/* { dg-require-effective-target int32plus } */

 #include 
 #include 
diff --git a/gcc/testsuite/gcc.dg/pr21137.c b/gcc/testsuite/gcc.dg/pr21137.c
index 6d73deaee6c..199555a5017 100644
--- a/gcc/testsuite/gcc.dg/pr21137.c
+++ b/gcc/testsuite/gcc.dg/pr21137.c
@@ -1,5 +1,6 @@
 /* { dg-do compile } */
 /* { dg-options "-O2 -fdump-tree-optimized" } */
+/* { dg-require-effective-target int32plus } */

 

[testsuite,committed]: PR52614: Fix more of the int=32 assumption fallout.

2023-05-22 Thread Georg-Johann Lay

Applied more of the int=32 assumption fallout.

Johann

--

testsuite/52641: Fix more of implicit int=32 assumption fallout.

gcc/testsuite/
PR testsuite/52641
* gcc.c-torture/compile/pr108892.c: Require int32.
* gcc.c-torture/compile/pr98199.c: Require int32plus.
* gcc.dg/analyzer/call-summaries-pr107072.c: Same.
* gcc.dg/analyzer/null-deref-pr105755.c: Same.
* gcc.dg/tree-ssa/pr102232.c: Same.
* gcc.dg/tree-ssa/pr105860.c: Same.
* gcc.dg/tree-ssa/pr96730.c: Same.
* gcc.dg/tree-ssa/pr96779-disabled.c: Same.
* gcc.dg/tree-ssa/pr96779.c: Same.
* gcc.dg/tree-ssa/pr98513.c: Same.
* gcc.dg/tree-ssa/ssa-sink-18.c
* gcc.dg/analyzer/coreutils-cksum-pr108664.c: Require int32plus,
size24plus.
* gcc.dg/analyzer/doom-s_sound-pr108867.c: Require size32plus.
* gcc.dg/analyzer/malloc-CWE-590-examples.c: Same.
* gcc.dg/debug/btf/btf-bitfields-4.c: Same.
* gcc.dg/tree-ssa/pr93435.c: Same.
* gcc.dg/analyzer/null-deref-pr102671-1.c: Require ptr_eq_long:
* gcc.dg/analyzer/null-deref-pr102671-2.c: Same.
* gcc.dg/analyzer/null-deref-pr108251-smp_fetch_ssl_fc_has_early-O2.c:
Same.
* gcc.dg/analyzer/null-deref-pr108251-smp_fetch_ssl_fc_has_early.c:
Same.
* gcc.dg/tree-ssa/pr103345.c: Use uint32_t.
* gcc.dg/tree-ssa/ssa-ccp-41.c [sizeof(int)==2]: Same.
* gcc.dg/tree-ssa/pr109031-1.c: Use uint16_t, uint32_t.
* gcc.dg/tree-ssa/pr109031-2.c: Same.
* gcc.dg/Warray-bounds-49.c (dg-warning): Discriminate int != short.
* gcc.dg/Warray-bounds-52.c (dg-warning): Discriminate avr.
* gcc.dg/Warray-bounds-33.c: Skip target avr.
* gcc.dg/analyzer/fd-access-mode-target-headers.c: Same.
* gcc.dg/analyzer/flex-with-call-summaries.c: Same.
* gcc.dg/analyzer/isatty-1.c: Same.
* gcc.dg/analyzer/pipe-glibc.c: Same.

diff --git a/gcc/testsuite/gcc.c-torture/compile/pr108892.c 
b/gcc/testsuite/gcc.c-torture/compile/pr108892.c

index d7fecd54ecf..fb0a258cdba 100644
--- a/gcc/testsuite/gcc.c-torture/compile/pr108892.c
+++ b/gcc/testsuite/gcc.c-torture/compile/pr108892.c
@@ -1,3 +1,5 @@
+/* { dg-require-effective-target int32 } */
+
 typedef char __attribute__((__vector_size__ (64))) U;
 typedef int __attribute__((__vector_size__ (64))) V;

diff --git a/gcc/testsuite/gcc.c-torture/compile/pr98199.c 
b/gcc/testsuite/gcc.c-torture/compile/pr98199.c

index b5c8d204f0e..6605d38788c 100644
--- a/gcc/testsuite/gcc.c-torture/compile/pr98199.c
+++ b/gcc/testsuite/gcc.c-torture/compile/pr98199.c
@@ -1,4 +1,5 @@
 /* PR tree-optimization/98199 */
+/* { dg-require-effective-target int32plus } */

 struct A { long a; short d; int c, f, e, g; };
 struct B { int a, i; short j; struct A k; signed : 20; int e, g; } 
__attribute__((packed));
diff --git a/gcc/testsuite/gcc.dg/Warray-bounds-33.c 
b/gcc/testsuite/gcc.dg/Warray-bounds-33.c

index 28f14b4722c..13efabe33b6 100644
--- a/gcc/testsuite/gcc.dg/Warray-bounds-33.c
+++ b/gcc/testsuite/gcc.dg/Warray-bounds-33.c
@@ -2,6 +2,7 @@
an object of incomplete type
{ dg-do compile }
{ dg-options "-O2 -Wall" }  */
+/* { dg-skip-if "acessing data memory with program memory address" { 
"avr-*-*" } } */


 struct S
 {
diff --git a/gcc/testsuite/gcc.dg/Warray-bounds-49.c 
b/gcc/testsuite/gcc.dg/Warray-bounds-49.c

index f271dd526b8..9335f1507e8 100644
--- a/gcc/testsuite/gcc.dg/Warray-bounds-49.c
+++ b/gcc/testsuite/gcc.dg/Warray-bounds-49.c
@@ -17,7 +17,8 @@ void test_a0 (void)
   // The first three elements fit in the tail padding.
   a0.a2[0] = 0; a0.a2[1] = 1; a0.a2[2] = 2;

-  a0.a2[3] = 3; // { dg-warning "array subscript 3 is above array 
bounds of 'short int\\\[]'" }
+  a0.a2[3] = 3; // { dg-warning "array subscript 3 is above array 
bounds of 'short int\\\[]'" "" { target { ! short_eq_int } } }
+  // { dg-warning "array subscript 3 is above array bounds of 
'int\\\[]'" "" { target { short_eq_int } } .-1 }

 }


@@ -27,7 +28,8 @@ void test_a1 (void)
 {
   a1.a2[0] = 0; a1.a2[1] = 1; a1.a2[2] = 2;

-  a1.a2[3] = 3; // { dg-warning "array subscript 3 is above array 
bounds of 'short int\\\[]'" }
+  a1.a2[3] = 3; // { dg-warning "array subscript 3 is above array 
bounds of 'short int\\\[]'" "" { target { ! short_eq_int } } }
+  // { dg-warning "array subscript 3 is above array bounds of 
'int\\\[]'" "" { target { short_eq_int } } .-1 }

 }


@@ -37,7 +39,8 @@ void test_a2 (void)
 {
   a2.a2[0] = 0; a2.a2[1] = 1; a2.a2[2] = 2;

-  a2.a2[3] = 3; // { dg-warning "array subscript 3 is above array 
bounds of 'short int\\\[]'" }
+  a2.a2[3] = 3; // { dg-warning "array subscript 3 is above array 
bounds of 'short int\\\[]'" "" { target { ! short_eq_int } } }
+  // { dg-warning "array subscript 3 is above array bounds of 
'int\\\[]'" "" { target { short_eq_int } } .-1 }

 }


@@ -47,7 +50,8 @@ void test_a3 (void)
 {
   a3.a2[

[patch]: Implement PR104327 for avr

2023-05-23 Thread Georg-Johann Lay

PR target/104327 not only affects s390 but also avr:
The avr backend pre-sets some options depending on optimization level.
The inliner then thinks that always_inline functions are not eligible
for inlining and terminates with an error.

Proposing the following patch that implements TARGET_CAN_INLINE_P.

Ok to apply?

Johann

--

target/104327: Allow more inlining between different optimization levels.

avr-common.cc introduces the following options that are set depending
on optimization level: -mgas-isr-prologues, -mmain-is-OS-task and
-fsplit-wide-types-early.  The inliner thinks that different options
disallow cross-optimization inlining, so provide can_inline_p.

gcc/
PR target/104327
* config/avr/avr.cc (avr_can_inline_p): New static function.
(TARGET_CAN_INLINE_P): Define to that function.
diff --git a/gcc/config/avr/avr.cc b/gcc/config/avr/avr.cc
index 9fa50ca230d..55b48f63865 100644
--- a/gcc/config/avr/avr.cc
+++ b/gcc/config/avr/avr.cc
@@ -1018,6 +1018,22 @@ avr_no_gccisr_function_p (tree func)
   return avr_lookup_function_attribute1 (func, "no_gccisr");
 }

+
+/* Implement `TARGET_CAN_INLINE_P'.  */
+/* Some options like -mgas_isr_prologues depend on optimization level,
+   and the inliner might think that due to different options, inlining
+   is not permitted; see PR104327.  */
+
+static bool
+avr_can_inline_p (tree /* caller */, tree callee)
+{
+  // For now, dont't allow to inline ISRs.  If the user actually wants
+  // to inline ISR code, they have to turn the body of the ISR into an
+  // ordinary function.
+
+  return ! avr_interrupt_function_p (callee);
+}
+
 /* Implement `TARGET_SET_CURRENT_FUNCTION'.  */
 /* Sanity cheching for above function attributes.  */

@@ -14713,6 +14729,9 @@ avr_float_lib_compare_returns_bool (machine_mode 
mode, enum rtx_code)

 #undef  TARGET_MD_ASM_ADJUST
 #define TARGET_MD_ASM_ADJUST avr_md_asm_adjust

+#undef  TARGET_CAN_INLINE_P
+#define TARGET_CAN_INLINE_P avr_can_inline_p
+
 struct gcc_target targetm = TARGET_INITIALIZER;

 



[avr,committed] Fix cost computation for bit insertions.

2023-05-23 Thread Georg-Johann Lay

Applied this patchlet that implements proper cost computation of

(set (zero_extract (...) ...))

kind patterns that do single-bit (inverted) bit insertions.


Johann

--

Improve cost computation for single-bit bit insertions.

Some miscomputation of rtx_costs lead to sub-optimal code for
single-bit bit insertions.  This patch implements TARGET_INSN_COST,
which has a chance to see the whole insn during insn combination;
in particular the SET_DEST of (set (zero_extract (...) ...)).

gcc/
* config/avr/avr.cc (avr_insn_cost): New static function.
(TARGET_INSN_COST): Define to that function.

diff --git a/gcc/config/avr/avr.cc b/gcc/config/avr/avr.cc
index 9fa50ca230d..4fa6f5309b2 100644
--- a/gcc/config/avr/avr.cc
+++ b/gcc/config/avr/avr.cc
@@ -11514,6 +11514,52 @@ avr_rtx_costs (rtx x, machine_mode mode, int 
outer_code,

 }


+/* Implement `TARGET_INSN_COST'.  */
+/* For some insns, it is not enough to look at the cost of the SET_SRC.
+   In that case, have a look at the entire insn, e.g. during insn 
combine.  */

+
+static int
+avr_insn_cost (rtx_insn *insn, bool speed)
+{
+  const int unknown_cost = -1;
+  int cost = unknown_cost;
+
+  rtx set = single_set (insn);
+
+  if (set
+  && ZERO_EXTRACT == GET_CODE (SET_DEST (set)))
+{
+  // Try find anything that would flip the extracted bit.
+  bool not_bit_p = false;
+
+  subrtx_iterator::array_type array;
+  FOR_EACH_SUBRTX (iter, array, SET_SRC (set), NONCONST)
+   {
+ enum rtx_code code = GET_CODE (*iter);
+ not_bit_p |= code == NOT || code == XOR || code == GE;
+   }
+
+  // Don't go too deep into the analysis.  In almost all cases,
+  // using BLD/BST is the best we can do for single-bit moves,
+  // even considering CSE.
+  cost = COSTS_N_INSNS (2 + not_bit_p);
+}
+
+  if (cost != unknown_cost)
+{
+  if (avr_log.rtx_costs)
+   avr_edump ("\n%? (%s) insn_cost=%d\n%r\n",
+  speed ? "speed" : "size", cost, insn);
+  return cost;
+}
+
+  // Resort to what rtlanal.cc::insn_cost() implements as a default
+  // when targetm.insn_cost() is not implemented.
+
+  return pattern_cost (PATTERN (insn), speed);
+}
+
+
 /* Implement `TARGET_ADDRESS_COST'.  */

 static int
@@ -14574,6 +14620,8 @@ avr_float_lib_compare_returns_bool (machine_mode 
mode, enum rtx_code)

 #undef  TARGET_ASM_FINAL_POSTSCAN_INSN
 #define TARGET_ASM_FINAL_POSTSCAN_INSN avr_asm_final_postscan_insn

+#undef  TARGET_INSN_COST
+#define TARGET_INSN_COST avr_insn_cost
 #undef  TARGET_REGISTER_MOVE_COST
 #define TARGET_REGISTER_MOVE_COST avr_register_move_cost
 #undef  TARGET_MEMORY_MOVE_COST


Re: [patch]: Implement PR104327 for avr

2023-05-24 Thread Georg-Johann Lay




Am 24.05.23 um 11:38 schrieb Richard Biener:

On Tue, May 23, 2023 at 2:56 PM Georg-Johann Lay  wrote:


PR target/104327 not only affects s390 but also avr:
The avr backend pre-sets some options depending on optimization level.
The inliner then thinks that always_inline functions are not eligible
for inlining and terminates with an error.

Proposing the following patch that implements TARGET_CAN_INLINE_P.

Ok to apply?

Johann

--

target/104327: Allow more inlining between different optimization levels.

avr-common.cc introduces the following options that are set depending
on optimization level: -mgas-isr-prologues, -mmain-is-OS-task and
-fsplit-wide-types-early.  The inliner thinks that different options
disallow cross-optimization inlining, so provide can_inline_p.

gcc/
 PR target/104327
 * config/avr/avr.cc (avr_can_inline_p): New static function.
 (TARGET_CAN_INLINE_P): Define to that function.
diff --git a/gcc/config/avr/avr.cc b/gcc/config/avr/avr.cc
index 9fa50ca230d..55b48f63865 100644
--- a/gcc/config/avr/avr.cc
+++ b/gcc/config/avr/avr.cc
@@ -1018,6 +1018,22 @@ avr_no_gccisr_function_p (tree func)
 return avr_lookup_function_attribute1 (func, "no_gccisr");
   }

+
+/* Implement `TARGET_CAN_INLINE_P'.  */
+/* Some options like -mgas_isr_prologues depend on optimization level,
+   and the inliner might think that due to different options, inlining
+   is not permitted; see PR104327.  */
+
+static bool
+avr_can_inline_p (tree /* caller */, tree callee)
+{
+  // For now, dont't allow to inline ISRs.  If the user actually wants
+  // to inline ISR code, they have to turn the body of the ISR into an
+  // ordinary function.
+
+  return ! avr_interrupt_function_p (callee);


I'm not sure if AVR has ISA extensions but the above will likely break
things like

void __attribute__((target("-mX"))) foo () { asm ("isa X opcode");
stmt-that-generates-X-ISA; }


This yields

warning: target attribute is not supported on this machine [-Wattributes]

avr has -mmcu= target options, but switching them in mid-air
won't work because the file prologue might already be different
and incompatible across different architectures.  And I never
saw any user requesting such a thing, and I can't imagine
any reasonable use case...  If the warning is not strong enough,
may be it can be turned into an error, but -Wattributes is not
specific enough for that.


void bar ()
{
   if (cpu-has-X)
 foo ();
}

if always-inlines are the concern you can use

   bool always_inline
 = (DECL_DISREGARD_INLINE_LIMITS (callee)
&& lookup_attribute ("always_inline",
 DECL_ATTRIBUTES (callee)));
   /* Do what the user says.  */
   if (always_inline)
 return true;

   return default_target_can_inline_p (caller, callee);


The default implementation of can_inline_p worked fine for avr.
As far as I understand, the new behavior is due to clean-up
of global states for options?

So I need to take into account inlining costs and decide on that
whether it's preferred to inline a function or not?

Johann


+}
+
   /* Implement `TARGET_SET_CURRENT_FUNCTION'.  */
   /* Sanity cheching for above function attributes.  */

@@ -14713,6 +14729,9 @@ avr_float_lib_compare_returns_bool (machine_mode
mode, enum rtx_code)
   #undef  TARGET_MD_ASM_ADJUST
   #define TARGET_MD_ASM_ADJUST avr_md_asm_adjust

+#undef  TARGET_CAN_INLINE_P
+#define TARGET_CAN_INLINE_P avr_can_inline_p
+
   struct gcc_target targetm = TARGET_INITIALIZER;


Re: [patch]: Implement PR104327 for avr

2023-05-25 Thread Georg-Johann Lay




Am 25.05.23 um 08:35 schrieb Richard Biener:

On Wed, May 24, 2023 at 5:44 PM Georg-Johann Lay  wrote:

Am 24.05.23 um 11:38 schrieb Richard Biener:

On Tue, May 23, 2023 at 2:56 PM Georg-Johann Lay  wrote:


PR target/104327 not only affects s390 but also avr:
The avr backend pre-sets some options depending on optimization level.
The inliner then thinks that always_inline functions are not eligible
for inlining and terminates with an error.

Proposing the following patch that implements TARGET_CAN_INLINE_P.

Ok to apply?

Johann

target/104327: Allow more inlining between different optimization levels.

avr-common.cc introduces the following options that are set depending
on optimization level: -mgas-isr-prologues, -mmain-is-OS-task and
-fsplit-wide-types-early.  The inliner thinks that different options
disallow cross-optimization inlining, so provide can_inline_p.

gcc/
  PR target/104327
  * config/avr/avr.cc (avr_can_inline_p): New static function.
  (TARGET_CAN_INLINE_P): Define to that function.
diff --git a/gcc/config/avr/avr.cc b/gcc/config/avr/avr.cc
index 9fa50ca230d..55b48f63865 100644
--- a/gcc/config/avr/avr.cc
+++ b/gcc/config/avr/avr.cc
@@ -1018,6 +1018,22 @@ avr_no_gccisr_function_p (tree func)
  return avr_lookup_function_attribute1 (func, "no_gccisr");
}

+
+/* Implement `TARGET_CAN_INLINE_P'.  */
+/* Some options like -mgas_isr_prologues depend on optimization level,
+   and the inliner might think that due to different options, inlining
+   is not permitted; see PR104327.  */
+
+static bool
+avr_can_inline_p (tree /* caller */, tree callee)
+{
+  // For now, dont't allow to inline ISRs.  If the user actually wants
+  // to inline ISR code, they have to turn the body of the ISR into an
+  // ordinary function.
+
+  return ! avr_interrupt_function_p (callee);


I'm not sure if AVR has ISA extensions but the above will likely break
things like

void __attribute__((target("-mX"))) foo () { asm ("isa X opcode");
stmt-that-generates-X-ISA; }


This yields

warning: target attribute is not supported on this machine [-Wattributes]


Ah, that's an interesting fact.  So that indeed leaves
__attribute__((optimize(...)))
influencing the set of active target attributes via the generic option target
hooks like in your case the different defaults.


avr has -mmcu= target options, but switching them in mid-air
won't work because the file prologue might already be different
and incompatible across different architectures.  And I never
saw any user requesting such a thing, and I can't imagine
any reasonable use case...  If the warning is not strong enough,
may be it can be turned into an error, but -Wattributes is not
specific enough for that.


Note the target attribute is then simply ignored.


void bar ()
{
if (cpu-has-X)
  foo ();
}

if always-inlines are the concern you can use

bool always_inline
  = (DECL_DISREGARD_INLINE_LIMITS (callee)
 && lookup_attribute ("always_inline",
  DECL_ATTRIBUTES (callee)));
/* Do what the user says.  */
if (always_inline)
  return true;

return default_target_can_inline_p (caller, callee);


The default implementation of can_inline_p worked fine for avr.
As far as I understand, the new behavior is due to clean-up
of global states for options?


I think the last change was r8-2658-g9b25e12d2d940a which
for targets without target attribute support made it more likely
to run into the default hook actually comparing the options.
Previously the "default" was oddly special-cased but you
could have still run into compares with two different set of
defaults when there's another "default" default.  Say, compile
with -O2 and have one optimize(0) and one optimize(Os)
function it would compare the optimize(0) and optimize(Os)
set if they were distinct from the -O2 set.  That probably never
happened for AVR.


So I need to take into account inlining costs and decide on that
whether it's preferred to inline a function or not?


No, the hook isn't about cost, it's about full incompatibility.  So
if the different -m options that could be in effect for AVR in
a single TU for different functions never should prevent inlining
then simply make the hook return true.  If there's a specific
option (that can differ from what specified on the compiler
command line!) that should, then you should compare the
setting of that option from the DECL_FUNCTION_SPECIFIC_TARGET
of the caller and the callee.

But as far as I can see simply returning true should be correct
for AVR, or like your patch handle interrupts differently (though
the -Winline diagnostic will tell the user there's a mismatch in
target options which might be confusing).


Ok, simply "true" sounds reasonable.  Is that change ok then?

Johann



Richard.


Johann

[avr,committed] PR82931: Improve single-bit transfers between registers.

2023-05-25 Thread Georg-Johann Lay

Applied this patch that makes one insn more generic so it can handle
more bit positions than just 0.

Johann

--

target/82931: Make a pattern more generic to match more bit-transfers.

There is already a pattern in avr.md that matches single-bit transfers
from one register to another one, but it only handled bit 0 of 8-bit
registers.  This change makes that pattern more generic so it matches
more of similar single-bit transfers.

gcc/
PR target/82931
* config/avr/avr.md (*movbitqi.0): Rename to *movbit.0-6.
Handle any bit position and use mode QISI.
* config/avr/avr.cc (avr_rtx_costs_1) [IOR]: Return a cost
of 2 insns for bit-transfer of respective style.

gcc/testsuite/
PR target/82931
* gcc.target/avr/pr82931.c: New test.

diff --git a/gcc/config/avr/avr.cc b/gcc/config/avr/avr.cc
index 4fa6f5309b2..31706964eb1 100644
--- a/gcc/config/avr/avr.cc
+++ b/gcc/config/avr/avr.cc
@@ -10843,6 +10843,15 @@ avr_rtx_costs_1 (rtx x, machine_mode mode, int 
outer_code,

 *total += COSTS_N_INSNS (1);
   return true;
 }
+  if (IOR == code
+  && AND == GET_CODE (XEXP (x, 0))
+  && AND == GET_CODE (XEXP (x, 1))
+  && single_zero_operand (XEXP (XEXP (x, 0), 1), mode))
+{
+  // Open-coded bit transfer.
+  *total = COSTS_N_INSNS (2);
+  return true;
+}
   *total = COSTS_N_INSNS (GET_MODE_SIZE (mode));
   *total += avr_operand_rtx_cost (XEXP (x, 0), mode, code, 0, speed);
   if (!CONST_INT_P (XEXP (x, 1)))
diff --git a/gcc/config/avr/avr.md b/gcc/config/avr/avr.md
index a79c6824fad..371965938a6 100644
--- a/gcc/config/avr/avr.md
+++ b/gcc/config/avr/avr.md
@@ -9096,16 +9096,20 @@ (define_insn "*movbitqi.1-6.b"
   "bst %3,0\;bld %0,%4"
   [(set_attr "length" "2")])

-;; Move bit $3.0 into bit $0.0.
-;; For bit 0, combiner generates slightly different pattern.
-(define_insn "*movbitqi.0"
-  [(set (match_operand:QI 0 "register_operand" "=r")
-(ior:QI (and:QI (match_operand:QI 1 "register_operand"  "0")
-(match_operand:QI 2 "single_zero_operand"   "n"))
-(and:QI (match_operand:QI 3 "register_operand"  "r")
-(const_int 1]
-  "0 == exact_log2 (~INTVAL(operands[2]) & GET_MODE_MASK (QImode))"
-  "bst %3,0\;bld %0,0"
+;; Move bit $3.x into bit $0.x.
+(define_insn "*movbit.0-6"
+  [(set (match_operand:QISI 0 "register_operand" 
"=r")
+(ior:QISI (and:QISI (match_operand:QISI 1 "register_operand" 
"0")
+(match_operand:QISI 2 "single_zero_operand" 
"n"))
+  (and:QISI (match_operand:QISI 3 "register_operand" 
"r")
+(match_operand:QISI 4 "single_one_operand" 
"n"]

+  "GET_MODE_MASK(mode)
+   == (GET_MODE_MASK(mode) & (INTVAL(operands[2]) ^ 
INTVAL(operands[4])))"

+  {
+auto bitmask = GET_MODE_MASK (mode) & UINTVAL (operands[4]);
+operands[4] = GEN_INT (exact_log2 (bitmask));
+return "bst %T3%T4" CR_TAB "bld %T0%T4";
+  }
   [(set_attr "length" "2")])

 ;; Move bit $2.0 into bit $0.7.
diff --git a/gcc/testsuite/gcc.target/avr/pr82931.c 
b/gcc/testsuite/gcc.target/avr/pr82931.c

new file mode 100644
index 000..477284fa127
--- /dev/null
+++ b/gcc/testsuite/gcc.target/avr/pr82931.c
@@ -0,0 +1,29 @@
+/* { dg-options "-Os" } */
+/* { dg-final { scan-assembler-times "bst" 4 } } */
+/* { dg-final { scan-assembler-times "bld" 4 } } */
+
+typedef __UINT8_TYPE__ uint8_t;
+typedef __UINT16_TYPE__ uint16_t;
+
+#define BitMask (1u << 14)
+#define Bit8Mask ((uint8_t) (1u << 4))
+
+void merge1_8 (uint8_t *dst, const uint8_t *src)
+{
+*dst = (*src & Bit8Mask) | (*dst & ~ Bit8Mask);
+}
+
+void merge2_8 (uint8_t *dst, const uint8_t *src)
+{
+*dst ^= (*dst ^ *src) & Bit8Mask;
+}
+
+void merge1_16 (uint16_t *dst, const uint16_t *src)
+{
+*dst = (*src & BitMask) | (*dst & ~ BitMask);
+}
+
+void merge2_16 (uint16_t *dst, const uint16_t *src)
+{
+*dst ^= (*dst ^ *src) & BitMask;
+}


[avr,committed]: Implement PR104327 for avr

2023-05-25 Thread Georg-Johann Lay




Am 25.05.23 um 17:07 schrieb Richard Biener:




Am 25.05.2023 um 16:22 schrieb Georg-Johann Lay :




Am 25.05.23 um 08:35 schrieb Richard Biener:

On Wed, May 24, 2023 at 5:44 PM Georg-Johann Lay  wrote:
Am 24.05.23 um 11:38 schrieb Richard Biener:

On Tue, May 23, 2023 at 2:56 PM Georg-Johann Lay  wrote:


PR target/104327 not only affects s390 but also avr:
The avr backend pre-sets some options depending on optimization level.
The inliner then thinks that always_inline functions are not eligible
for inlining and terminates with an error.

Proposing the following patch that implements TARGET_CAN_INLINE_P.

Ok to apply?

Johann

target/104327: Allow more inlining between different optimization levels.

avr-common.cc introduces the following options that are set depending
on optimization level: -mgas-isr-prologues, -mmain-is-OS-task and
-fsplit-wide-types-early.  The inliner thinks that different options
disallow cross-optimization inlining, so provide can_inline_p.

gcc/
  PR target/104327
  * config/avr/avr.cc (avr_can_inline_p): New static function.
  (TARGET_CAN_INLINE_P): Define to that function.
diff --git a/gcc/config/avr/avr.cc b/gcc/config/avr/avr.cc
index 9fa50ca230d..55b48f63865 100644
--- a/gcc/config/avr/avr.cc
+++ b/gcc/config/avr/avr.cc
@@ -1018,6 +1018,22 @@ avr_no_gccisr_function_p (tree func)
  return avr_lookup_function_attribute1 (func, "no_gccisr");
}

+
+/* Implement `TARGET_CAN_INLINE_P'.  */
+/* Some options like -mgas_isr_prologues depend on optimization level,
+   and the inliner might think that due to different options, inlining
+   is not permitted; see PR104327.  */
+
+static bool
+avr_can_inline_p (tree /* caller */, tree callee)
+{
+  // For now, dont't allow to inline ISRs.  If the user actually wants
+  // to inline ISR code, they have to turn the body of the ISR into an
+  // ordinary function.
+
+  return ! avr_interrupt_function_p (callee);


I'm not sure if AVR has ISA extensions but the above will likely break
things like

void __attribute__((target("-mX"))) foo () { asm ("isa X opcode");
stmt-that-generates-X-ISA; }


This yields

warning: target attribute is not supported on this machine [-Wattributes]

Ah, that's an interesting fact.  So that indeed leaves
__attribute__((optimize(...)))
influencing the set of active target attributes via the generic option target
hooks like in your case the different defaults.

avr has -mmcu= target options, but switching them in mid-air
won't work because the file prologue might already be different
and incompatible across different architectures.  And I never
saw any user requesting such a thing, and I can't imagine
any reasonable use case...  If the warning is not strong enough,
may be it can be turned into an error, but -Wattributes is not
specific enough for that.

Note the target attribute is then simply ignored.

void bar ()
{
if (cpu-has-X)
  foo ();
}

if always-inlines are the concern you can use

bool always_inline
  = (DECL_DISREGARD_INLINE_LIMITS (callee)
 && lookup_attribute ("always_inline",
  DECL_ATTRIBUTES (callee)));
/* Do what the user says.  */
if (always_inline)
  return true;

return default_target_can_inline_p (caller, callee);


The default implementation of can_inline_p worked fine for avr.
As far as I understand, the new behavior is due to clean-up
of global states for options?

I think the last change was r8-2658-g9b25e12d2d940a which
for targets without target attribute support made it more likely
to run into the default hook actually comparing the options.
Previously the "default" was oddly special-cased but you
could have still run into compares with two different set of
defaults when there's another "default" default.  Say, compile
with -O2 and have one optimize(0) and one optimize(Os)
function it would compare the optimize(0) and optimize(Os)
set if they were distinct from the -O2 set.  That probably never
happened for AVR.

So I need to take into account inlining costs and decide on that
whether it's preferred to inline a function or not?

No, the hook isn't about cost, it's about full incompatibility.  So
if the different -m options that could be in effect for AVR in
a single TU for different functions never should prevent inlining
then simply make the hook return true.  If there's a specific
option (that can differ from what specified on the compiler
command line!) that should, then you should compare the
setting of that option from the DECL_FUNCTION_SPECIFIC_TARGET
of the caller and the callee.
But as far as I can see simply returning true should be correct
for AVR, or like your patch handle interrupts differently (though
the -Winline diagnostic will tell the user there's a mismatch in
target options which might be confusing).


Ok, simply "true" sou

[patch, avr] Fix PR target/99184: Wrong cast from double to 16-bit and 32-bit ints.

2022-09-18 Thread Georg Johann Lay

Hello,

this patch fixed PR target/99184 which incorrectly rounded during 64-bit 
(long) double to 16-bit and 32-bit integers.


The patch just removes the respective roundings from 
libf7-asm.sx::to_integer and ::to_unsigned.  Luckily, LibF7 does nowhere 
use respective functions internally, the only user is in libf7.c::f7_exp


which reads

  f7_round (qq, qq);
  int16_t q = f7_get_s16 (qq);

so that f7_get_s16() operates on an already rounded value, and therefore 
this code works unaltered with or without rounding in to_integer.


The patch applies to directory

./libgcc/config/avr/libf7/

and is the same for all GCC versions v10+.

Please someone with write permissions commit it to trunk and backport to 
v12, v11, and v10 as it is a wrong-code issue.


The patch will fit without problems (except for ChangeLog) because there 
is no traffic on that folder.



Thanks, Johann


libgcc/config/avr/libf7/
PR target/99184
Remove rounding from double to [u]int16 and [u]int32 casts.

* libf7-asm.sx (to_integer, to_unsigned): Don't round 16-bit
and 32-bit integers.

diff --git a/ChangeLog b/ChangeLog
index 7e06f52..3ec0082 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,10 @@
+
+	PR target/99184
+	Remove rounding from double to [u]int16 and [u]int32 casts.
+
+	* libf7-asm.sx (to_integer, to_unsigned): Don't round 16-bit
+	and 32-bit integers.
+
 2022-04-21  Release Manager
 
 	* GCC 11.3.0 released.
diff --git a/libf7-asm.sx b/libf7-asm.sx
index 7629e23..9d701f2 100644
--- a/libf7-asm.sx
+++ b/libf7-asm.sx
@@ -601,9 +601,6 @@ DEFUN to_integer
 tst C6
 brmi.Lsaturate.T;   > INTxx_MAX  =>  saturate
 
-rcall   .Lround
-brmi.Lsaturate.T;   > INTxx_MAX  =>  saturate
-
 brtc 9f ;   >= 0 =>  return
 sbrcMask,   5
 .global __negdi2
@@ -658,30 +655,6 @@ DEFUN to_integer
 .global __clr_8
 XJMP__clr_8
 
-.Lround:
-;; C6.7 is known to be 0 here.
-;; Return N = 1 iff we have to saturate.
-cpi Mask,   0xf
-breq .Lround16
-cpi Mask,   0x1f
-breq .Lround32
-
-;; For now, no rounding in the 64-bit case.  This rounding
-;; would have to be integrated into the right-shift.
-cln
-ret
-
-.Lround32:
-rol C2
-adc C3, ZERO
-adc C4, ZERO
-rjmp 2f
-
-.Lround16:
-rol C4
-2:  adc C5, ZERO
-adc C6, ZERO
-ret
 ENDF to_integer
 #endif /* F7MOD_to_integer_ */
 
@@ -725,29 +698,6 @@ DEFUN to_unsigned
 clr CA
 F7call  lshrdi3
 POP r16
-
-;; Rounding
-;; ??? C6.7 is known to be 0 here.
-cpi Mask,   0xf
-breq .Lround16
-cpi Mask,   0x1f
-breq .Lround32
-
-;; For now, no rounding in the 64-bit case.  This rounding
-;; would have to be integrated into the right-shift.
-ret
-
-.Lround32:
-rol C2
-adc C3, ZERO
-adc C4, ZERO
-rjmp 2f
-
-.Lround16:
-rol C4
-2:  adc C5, ZERO
-adc C6, ZERO
-brcs.Lset_0x; Rounding overflow  =>  saturate
 ret
 
 .Lset_0x:


Re: [patch, avr] Fix PR target/99184: Wrong cast from double to 16-bit and 32-bit ints.

2022-09-19 Thread Georg Johann Lay




Am 19.09.22 um 09:51 schrieb Richard Biener:

On Sun, Sep 18, 2022 at 7:40 PM Georg Johann Lay  wrote:


Hello,

this patch fixed PR target/99184 which incorrectly rounded during 64-bit
(long) double to 16-bit and 32-bit integers.

The patch just removes the respective roundings from
libf7-asm.sx::to_integer and ::to_unsigned.  Luckily, LibF7 does nowhere
use respective functions internally, the only user is in libf7.c::f7_exp

which reads

f7_round (qq, qq);
int16_t q = f7_get_s16 (qq);

so that f7_get_s16() operates on an already rounded value, and therefore
this code works unaltered with or without rounding in to_integer.

The patch applies to directory

./libgcc/config/avr/libf7/

and is the same for all GCC versions v10+.

Please someone with write permissions commit it to trunk and backport to
v12, v11, and v10 as it is a wrong-code issue.

The patch will fit without problems (except for ChangeLog) because there
is no traffic on that folder.


Thanks, I've pushed the change.  Please in future try to send patches
that can be applied with git am, thus use git format-patch

Richard.


Thanks you so much.  The patch I generated with "git diff > file.diff", 
so that is not correct? The only change is that I defined extra hunks 
for asm so that one can see the function like in


 @@ -601,9 +601,6 @@ DEFUN to_integer

So git is not prepared to such hunks? Would you point me to some 
documentation on how to do it properly?


Thanks,

Johann


[PATCH] improved const shifts for AVR targets

2022-10-15 Thread Georg Johann Lay

Hi,
recently I used some arduino uno for a project and realized some areas
which do not output optimal asm code. Especially around shifts and function
calls.
With this as motivation and hacktoberfest I started patching things.
Since patch files do not provide a good overview and I hope for a
"hacktoberfest-accepted" label on the PR on github I also opened it there:
https://github.com/gcc-mirror/gcc/pull/73

This patch improves shifts with const right hand operand. While 8bit and
16bit shifts where mostly fine 24bit and 32bit where not handled well.

Testing
I checked output with a local installation of compiler explorer in asm and
a tiny unit test comparing shifts with mul/div by 2.
I however did not write any testcases in gcc for it.


Hi, for such large changes, IMO it's a good idea to run the testsuite 
against the changes and make sure that there are no regressions.  Maybe 
even add new runtime tests in gcc.target/avr/torture to cover 
significant amount of the changes?


For example a test could go like:

__attribute__((__always_inline__))
static inline void shr (long x, int off)
{
long y = x >> off;
__asm ("" : "+r" (x));
if (x >> off != y)
__builtin_abort();
}

void test_shr (void)
{
long x = 0x76543215;
shr (x, 13);
shr (x, 14);
shr (x, 15);
shr (x, 16);
}

One shift is folded away by the compiler, and the other one has to be 
carried out.


However, the insn output also depends on available register classes like 
"ldi_ok" and whether a "d" class scratch is available, so it will be 
hard to achieve full coverage.  As it appears, testing for the lower 
registers can be forced by, where this won't work for AVR_TINY, of course:


static inline void shr (long x, int off)
{
long y = x >> off;
__asm ("" : "+l" (x));
x >>= off;
__asm ("" : "+l" (x));
if (x != y)
__builtin_abort();
}


Target
This patch is only targeting atmel avr family of chips.

Changelog
improved const shifts for AVR targets


You can have a look at existing ChangeLog files to see the format and style.



Patch
-
diff --git a/gcc/config/avr/avr.cc b/gcc/config/avr/avr.cc
index 4ed390e4cf9..c7b70812d5c 100644
--- a/gcc/config/avr/avr.cc
+++ b/gcc/config/avr/avr.cc
@@ -6043,9 +6043,6 @@ out_shift_with_cnt (const char *templ, rtx_insn
*insn, rtx operands[],
   op[2] = operands[2];
   op[3] = operands[3];

-  if (plen)
-*plen = 0;
-


This looks wrong.  These functions are used in two different contexts:

One is computing the instructions lengths (in words) which is needed for 
jump offset computations for relative jumps that are crossing the insn. 
This is done for plen != NULL, and the length must be returned in *plen.


Second is actual output of the instruction sequence rest. return 
respective sting (depending on context), which must have a length no 
longer than computed.  This is performed if plen == NULL.


Not initializing *plen means that you get garbage for instruction 
lengths.  Runtime errors will occur but just not very frequently, e.g. 
if an instruction sequence is longer than anticipated, a jump target 
might be out of reach which results in a linker error.



   if (CONST_INT_P (operands[2]))
 {
   /* Operand 3 is a scratch register if this is a
@@ -6150,96 +6147,68 @@ out_shift_with_cnt (const char *templ, rtx_insn
*insn, rtx operands[],
 /* 8bit shift left ((char)x << i)   */

 const char *
-ashlqi3_out (rtx_insn *insn, rtx operands[], int *len)
+ashlqi3_out (rtx_insn *insn, rtx operands[], int *plen)
 {
   if (CONST_INT_P (operands[2]))
 {
-  int k;
-
-  if (!len)
- len = &k;
-
   switch (INTVAL (operands[2]))
  {
  default:
   if (INTVAL (operands[2]) < 8)
 break;

-  *len = 1;
-  return "clr %0";
-
- case 1:
-  *len = 1;
-  return "lsl %0";
-
- case 2:
-  *len = 2;
-  return ("lsl %0" CR_TAB
-  "lsl %0");
-
- case 3:
-  *len = 3;
-  return ("lsl %0" CR_TAB
-  "lsl %0" CR_TAB
-  "lsl %0");
+return avr_asm_len ("clr %0", operands, plen, 1);


I don't get it.  This prints *one* CLR instruction for all shift offsets 
1...3?




  case 4:
   if (test_hard_reg_class (LD_REGS, operands[0]))
 {
-  *len = 2;
-  return ("swap %0" CR_TAB
-  "andi %0,0xf0");
+return avr_asm_len ("swap %0" CR_TAB
+  "andi %0,0xf0", operands, plen, 2);


Glitch of coding-rules (GNU style it is), similar in many placed down 
the line which seem to have incorrect indentations.  It's not always 
easy to tell this just from looking at a patch, so better double-check 
your indentations.



 }
-  *len = 4;
-  return ("lsl %0" CR_TAB
+return avr_asm_len ("lsl %0" CR_TAB
   "lsl %0" CR_TAB
   "lsl %0" CR_TAB
-  "lsl %0");
+  "lsl %0", operands, plen, 4);

  case 5:
   if (test_hard_reg_class (LD_REGS, operands[0]))
 {
-  *len = 3;
-  return ("swap %0" CR_TAB
+return avr_asm_len ("swap %0" CR_TAB
   "lsl %0"  CR_TAB
-  "andi %0,0xe0");
+  "andi %0,0xe0", operands, plen, 

Re: [PATCH v3] c++: parser - Support for target address spaces in C++

2022-11-03 Thread Georg-Johann Lay

[PATCH v3] c++: parser - Support for target address spaces in C++


First of all, it is great news that GCC is going to implement named 
address spaces for C++.


I have some questions:

1. How is name-mangling going to work?
==

Clang supports address spaces in C++, and for address-space 1 it does 
generate code like the following:


#define __flash __attribute__((__address_space__(1)))

char get_p (const __flash char *p)
{
return *p;
}


_Z5get_pPU3AS1Kc:
   ...

I.e. address-space 1 is mangled as "AS1".

(Notice that Clang's attribute actually works like a qualifier here, one 
could not get this to work with GCC attributes.)



2. Will it work with compound literals?
===

Currently, the following C code works for target avr:

const __flash char *pHallo = (const __flash char[]) { "Hallo" };

This is a pointer in RAM (AS0) that holds the address of a string in 
flash (AS1) and is initialized with that address. Unfortunately, this 
does not work locally:


const __flash char* get_hallo (void)
{
[static] const __flash char *p2 = (const __flash char[]) { "Hallo2" };
return p2;
}

foo.c: In function 'get_hallo':
foo.c: error: compound literal qualified by address-space qualifier

Is there any way to make this work now? Would be great!


3. Will TARGET_ADDR_SPACE_DIAGNOSE_USAGE still work?


Currently there is target hook TARGET_ADDR_SPACE_DIAGNOSE_USAGE.
I did not see it in your patches, so maybe I just missed it? See
https://gcc.gnu.org/onlinedocs/gcc-12.2.0/gccint/Named-Address-Spaces.html#index-TARGET_005fADDR_005fSPACE_005fDIAGNOSE_005fUSAGE


4. Will it be possible to put C++ virtual tables in ASs, and how?
=

One big complaint about avr-g++ is that there is no way to put vtables 
in flash (address-space 1) and to access them accordingly.  How can this 
be achieved with C++ address spaces?


Background: The AVR architecture has non-linear address space, and you 
cannot tell from the numeric value of an address whether it's in RAM or 
flash. You will have to use different instructions depending on the 
location.


This means that .rodata must be located in RAM, because otherwise one 
would not know whether const char* pointed to RAM or flash, but to 
de-reference you's need different instructions.


One way out is named address spaces, so we could finally fix

https://gcc.gnu.org/PR43745


Regards,

Johann



Re: [PATCH] PR85678: Change default to -fno-common

2019-10-25 Thread Georg-Johann Lay

Wilco Dijkstra schrieb:

GCC currently defaults to -fcommon.  As discussed in the PR, this is an ancient
C feature which is not conforming with the latest C standards.  On many targets
this means global variable accesses have a codesize and performance penalty.
This applies to C code only, C++ code is not affected by -fcommon.  It is about
time to change the default.

OK for commit?


IIRC using -fno-common might lead to some testsuit fallout because
some optimizations / test cases are sensitive to -f[no-]common.
So I wonder that no adjustments to test cases are needed?


ChangeLog
2019-10-25  Wilco Dijkstra  

PR85678
* common.opt (fcommon): Change init to 1.

doc/
* invoke.texi (-fcommon): Update documentation.
---

diff --git a/gcc/common.opt b/gcc/common.opt
index 
0195b0cb85a06dd043fd0412b42dfffddfa2495b..b0840f41a5e480f4428bd62724b0dc3d54c68c0b
 100644
--- a/gcc/common.opt
+++ b/gcc/common.opt
@@ -1131,7 +1131,7 @@ Common Report Var(flag_combine_stack_adjustments) 
Optimization
 Looks for opportunities to reduce stack adjustments and stack references.
 
 fcommon

-Common Report Var(flag_no_common,0)
+Common Report Var(flag_no_common,0) Init(1)
 Put uninitialized globals in the common section.
 
 fcompare-debug

diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
index 
857d9692729e503657d0d0f44f1f6252ec90d49a..5b4ff66015f5f94a5bd89e4dc3d2d53553cc091e
 100644
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@@ -568,7 +568,7 @@ Objective-C and Objective-C++ Dialects}.
 -fnon-call-exceptions  -fdelete-dead-exceptions  -funwind-tables @gol
 -fasynchronous-unwind-tables @gol
 -fno-gnu-unique @gol
--finhibit-size-directive  -fno-common  -fno-ident @gol
+-finhibit-size-directive  -fcommon  -fno-ident @gol
 -fpcc-struct-return  -fpic  -fPIC  -fpie  -fPIE  -fno-plt @gol
 -fno-jump-tables @gol
 -frecord-gcc-switches @gol
@@ -14050,35 +14050,27 @@ useful for building programs to run under WINE@.
 code that is not binary compatible with code generated without that switch.
 Use it to conform to a non-default application binary interface.
 
-@item -fno-common

-@opindex fno-common
+@item -fcommon
 @opindex fcommon
+@opindex fno-common
 @cindex tentative definitions
-In C code, this option controls the placement of global variables 
-defined without an initializer, known as @dfn{tentative definitions} 
-in the C standard.  Tentative definitions are distinct from declarations 
+In C code, this option controls the placement of global variables

+defined without an initializer, known as @dfn{tentative definitions}
+in the C standard.  Tentative definitions are distinct from declarations
 of a variable with the @code{extern} keyword, which do not allocate storage.
 
-Unix C compilers have traditionally allocated storage for

-uninitialized global variables in a common block.  This allows the
-linker to resolve all tentative definitions of the same variable
+The default is @option{-fno-common}, which specifies that the compiler places
+uninitialized global variables in the BSS section of the object file.


IMO "uninitialized" is confusing because the variables actually
*are* initialized: with zero.  It's just that the variables don't have
explicit initializers.  Dito for "uninitialized" in the --help message.

Johann



+This inhibits the merging of tentative definitions by the linker so you get a
+multiple-definition error if the same variable is accidentally defined in more
+than one compilation unit.
+
+The @option{-fcommon} places uninitialized global variables in a common block.
+This allows the linker to resolve all tentative definitions of the same 
variable
 in different compilation units to the same object, or to a non-tentative
-definition.  
-This is the behavior specified by @option{-fcommon}, and is the default for 
-GCC on most targets.  
-On the other hand, this behavior is not required by ISO

-C, and on some targets may carry a speed or code size penalty on
-variable references.
-
-The @option{-fno-common} option specifies that the compiler should instead
-place uninitialized global variables in the BSS section of the object file.
-This inhibits the merging of tentative definitions by the linker so
-you get a multiple-definition error if the same 
-variable is defined in more than one compilation unit.

-Compiling with @option{-fno-common} is useful on targets for which
-it provides better performance, or if you wish to verify that the
-program will work on other systems that always treat uninitialized
-variable definitions this way.
+definition.  This behavior does not conform to ISO C, is inconsistent with C++,
+and on many targets implies a speed and code size penalty on global variable
+references.  It is mainly useful to enable legacy code to link without errors.
 
 @item -fno-ident

 @opindex fno-ident





[patch,avr,committed] Remove an unused function (PR85969)

2019-10-27 Thread Georg-Johann Lay

Applied as obvious

Johann

PR target/85969
* config/avr/gen-avr-mmcu-specs.c (str_prefix_p): Remove unused
static function.
--- trunk/gcc/config/avr/gen-avr-mmcu-specs.c   2019/10/25 14:39:06 277454
+++ trunk/gcc/config/avr/gen-avr-mmcu-specs.c   2019/10/25 15:13:23 277455
@@ -50,14 +50,6 @@
 #define SPECFILE_USAGE_URL  \
   "https://gcc.gnu.org/gcc-5/changes.html";
 
-/* Return true iff STR starts with PREFIX.  */
-
-static bool
-str_prefix_p (const char *str, const char *prefix)
-{
-  return strncmp (str, prefix, strlen (prefix)) == 0;
-}
-
 
 static const char header[] =
   "#\n"


[patch][avr] PR92055: Add switches to enable 64-bit [long] double.

2019-10-31 Thread Georg-Johann Lay

Hi, this adds the possibility to enable IEEE compatible double
and long double support in avr-gcc.

It supports 2 configure options

--with-double={32|64|32,64|64,32}
--with-long-double={32|64|32,64|64,32|double}

which select the default layout of these types and also chose
which mutlilib variants are built and available.

These two config option map to the new compiler options
-mdouble= and -mlong-double= which are new multilib options.

The patch only deals with option handling and multilib bits,
it does not add any double functionality.  The double support
functions are supposed to be provided by avr-libc which also hosts
all the float stuff, including __addsf3 etc.

Ok for trunk?

Johann


gcc/
Support 64-bit double and 64-bit long double configurations.

PR target/92055
* config.gcc (tm_defines) [avr]: Set from --with-double=,
--with-long-double=.
* config/avr/t-multilib: Remove.
* config/avr/t-avr: Output of genmultilib.awk is now fully
dynamically generated and no more part of the repo.
(HAVE_DOUBLE_MULTILIB, HAVE_LONG_DOUBLE_MULTILIB): New variables.
Pass them down to...
* config/avr/genmultilib.awk: ...here and handle them.
* gcc/config/avr/avr.opt (-mdouble=, avr_double). New option and var.
(-mlong-double=, avr_long_double). New option and var.
* common/config/avr/avr-common.c (opts.h): Include.
(diagnostic.h): Include.
(TARGET_OPTION_OPTIMIZATION_TABLE) <-mdouble=>: Set default as
requested by --with-double=.
<-mlong-double=>: Set default as requested by --with-long-double=.
(TARGET_OPTION_OPTIMIZATION_TABLE) <-mdouble=, -mlong-double=>:
Set default as requested by --with-double=
(TARGET_HANDLE_OPTION): Define to this...
(avr_handle_option): ...new hook worker.
* config/avr/avr.h (DOUBLE_TYPE_SIZE): Define to avr_double.
(LONG_DOUBLE_TYPE_SIZE): Define to avr_long_double.
(avr_double_lib): New proto for spec function.
(EXTRA_SPEC_FUNCTIONS) : Add.
(DRIVER_SELF_SPECS): Call %:double-lib.
* config/avr/avr.c (avr_option_override): Assert
sizeof(long double) >= sizeof(double) for the target.
* config/avr/avr-c.c (avr_cpu_cpp_builtins)
[__HAVE_DOUBLE_MULTILIB__, __HAVE_LONG_DOUBLE_MULTILIB__]
[__HAVE_DOUBLE64__, __HAVE_DOUBLE32__, __DEFAULT_DOUBLE__=]
[__HAVE_LONG_DOUBLE64__, __HAVE_LONG_DOUBLE32__]
[__HAVE_LONG_DOUBLE_IS_DOUBLE__, __DEFAULT_LONG_DOUBLE__=]:
New built-in defined depending on --with-double=, --with-long-double=.
* config/avr/driver-avr.c (avr_double_lib): New spec function.
* doc/invoke.tex (AVR Options) <-mdouble=,-mlong-double=>: Doc.

libgcc/
Support 64-bit double and 64-bit long double configurations.

PR target/92055
* config/avr/t-avr (HOST_LIBGCC2_CFLAGS): Only add -DF=SF if
long double is a 32-bit type.
* config/avr/t-avrlibc: Copy double64 and long-double64
multilib(s) from the vanilla one.
* config/avr/t-copy-libgcc: New Makefile snip.




Index: gcc/common/config/avr/avr-common.c
===
--- gcc/common/config/avr/avr-common.c  (revision 277236)
+++ gcc/common/config/avr/avr-common.c  (working copy)
@@ -23,6 +23,8 @@
 #include "tm.h"
 #include "common/common-target.h"
 #include "common/common-target-def.h"
+#include "opts.h"
+#include "diagnostic.h"
 
 /* Implement TARGET_OPTION_OPTIMIZATION_TABLE.  */
 static const struct default_options avr_option_optimization_table[] =
@@ -43,9 +45,97 @@ static const struct default_options avr_
performance decrease. For the AVR though, disallowing data races
introduces additional code in LIM and increases reg pressure.  */
 { OPT_LEVELS_ALL, OPT_fallow_store_data_races, NULL, 1 },
+
+#if defined (WITH_DOUBLE64)
+{ OPT_LEVELS_ALL, OPT_mdouble_, NULL, 64 },
+#elif defined (WITH_DOUBLE32)
+{ OPT_LEVELS_ALL, OPT_mdouble_, NULL, 32 },
+#else
+#error "align this with config.gcc"
+#endif
+
+#if defined (WITH_LONG_DOUBLE64)
+{ OPT_LEVELS_ALL, OPT_mlong_double_, NULL, 64 },
+#elif defined (WITH_LONG_DOUBLE32)
+{ OPT_LEVELS_ALL, OPT_mlong_double_, NULL, 32 },
+#else
+#error "align this with config.gcc"
+#endif
+
 { OPT_LEVELS_NONE, 0, NULL, 0 }
   };
 
+
+/* Implement `TARGET_HANDLE_OPTION'.  */
+
+static bool
+avr_handle_option (struct gcc_options *opts, struct gcc_options*,
+   const struct cl_decoded_option *decoded, location_t loc)
+{
+  int value = decoded->value;
+
+  switch (decoded->opt_index)
+{
+case OPT_mdouble_:
+  if (value == 64)
+{
+#if !defined (HAVE_DOUBLE64)
+  error_at (loc, "option %<-mdouble=64%> is only available if "
+"configured %<--with-double={64|64,32|32,64}%>");
+#endif
+  opts->x_avr_long_double = 6

Re: [PATCH] Support multiple registers for the frame pointer

2019-11-04 Thread Georg-Johann Lay

Am 04.11.19 um 16:22 schrieb Vladimir Makarov:


On 2019-11-02 1:28 p.m., Kwok Cheung Yeung wrote:
The AMD GCN architecture uses 64-bit pointers, but the scalar 
registers are 32-bit wide, so pointers must reside in a pair of 
registers.


The two hard registers holding the frame pointer are currently fixed, 
but if they are changed to unfixed (so that the FP can be eliminated), 
GCC would sometimes allocate the second register to a pseudo while the 
frame pointer was in use, clobbering the value of the FP and crashing 
the program.


GCC currently does not handle multi-register hard frame pointers 
properly - no_unit_alloc_regs, regs_ever_live, eliminable_regset and 
ira_no_alloc_regs (which gets copied to lra_no_alloc_regs) are only 
set for HARD_FRAME_POINTER_REGNUM and not for any subsequent registers 
that may be used, which means that the register allocators consider 
HARD_FRAME_POINTER_REGNUM+1 free. This patch determines the number of 
registers needed to store the frame pointer using hard_regno_nregs, 
and sets the required variables for HARD_FRAME_POINTER_REGNUM and 
however many adjacent registers are needed (which on most 
architectures should be zero).


Bootstrapped on x86_64 and tested with no regressions, which is not 
surprising as nothing different happens when the FP fits into a single 
register. I believe this is true for the 64-bit variants of the more 
popular architectures as well (ARM, RS6000, MIPS, Sparc). Are there 
any other architectures similar to GCN (i.e. 64-bit pointers with 
32-bit GPRs)?


I have not included any specific testcases for this issue as it can 
affect pretty much everything not using -fomit-frame-pointer on AMD GCN.


Okay for trunk?



Yes.  You can commit the patch to the trunk.

Thank you.


The avr port already uses 2 hard-reg frame pointer ever since...

Does this patch has an impact on the avr port and its handling of
the frame pointer?

Johann




Re: [PATCH] Support multiple registers for the frame pointer

2019-11-04 Thread Georg-Johann Lay

Kwok Cheung Yeung schrieb:
The AMD GCN architecture uses 64-bit pointers, but the scalar registers 
are 32-bit wide, so pointers must reside in a pair of registers.


The two hard registers holding the frame pointer are currently fixed, 
but if they are changed to unfixed (so that the FP can be eliminated), 
GCC would sometimes allocate the second register to a pseudo while the 
frame pointer was in use, clobbering the value of the FP and crashing 
the program.


GCC currently does not handle multi-register hard frame pointers 
properly - no_unit_alloc_regs, regs_ever_live, eliminable_regset and 
ira_no_alloc_regs (which gets copied to lra_no_alloc_regs) are only set 
for HARD_FRAME_POINTER_REGNUM and not for any subsequent registers that 
may be used, which means that the register allocators consider 
HARD_FRAME_POINTER_REGNUM+1 free. This patch determines the number of 
registers needed to store the frame pointer using hard_regno_nregs, and 
sets the required variables for HARD_FRAME_POINTER_REGNUM and however 
many adjacent registers are needed (which on most architectures should 
be zero).


Bootstrapped on x86_64 and tested with no regressions, which is not 
surprising as nothing different happens when the FP fits into a single 
register. I believe this is true for the 64-bit variants of the more 
popular architectures as well (ARM, RS6000, MIPS, Sparc). Are there any 
other architectures similar to GCN (i.e. 64-bit pointers with 32-bit GPRs)?


If 16-bit pointers with 8-bit GPRs is similar enough: The avr port.

Johann

I have not included any specific testcases for this issue as it can 
affect pretty much everything not using -fomit-frame-pointer on AMD GCN.


Okay for trunk?

Kwok Yeung


Ping^1 [patch][avr] PR92055: Add switches to enable 64-bit [long] double.

2019-11-06 Thread Georg-Johann Lay

Ping #1

Am 31.10.19 um 22:55 schrieb Georg-Johann Lay:

Hi, this adds the possibility to enable IEEE compatible double
and long double support in avr-gcc.

It supports 2 configure options

--with-double={32|64|32,64|64,32}
--with-long-double={32|64|32,64|64,32|double}

which select the default layout of these types and also chose
which mutlilib variants are built and available.

These two config option map to the new compiler options
-mdouble= and -mlong-double= which are new multilib options.

The patch only deals with option handling and multilib bits,
it does not add any double functionality.  The double support
functions are supposed to be provided by avr-libc which also hosts
all the float stuff, including __addsf3 etc.

Ok for trunk?

Johann


gcc/
 Support 64-bit double and 64-bit long double configurations.

 PR target/92055
 * config.gcc (tm_defines) [avr]: Set from --with-double=,
 --with-long-double=.
 * config/avr/t-multilib: Remove.
 * config/avr/t-avr: Output of genmultilib.awk is now fully
 dynamically generated and no more part of the repo.
 (HAVE_DOUBLE_MULTILIB, HAVE_LONG_DOUBLE_MULTILIB): New variables.
 Pass them down to...
 * config/avr/genmultilib.awk: ...here and handle them.
 * gcc/config/avr/avr.opt (-mdouble=, avr_double). New option and var.
 (-mlong-double=, avr_long_double). New option and var.
 * common/config/avr/avr-common.c (opts.h): Include.
 (diagnostic.h): Include.
 (TARGET_OPTION_OPTIMIZATION_TABLE) <-mdouble=>: Set default as
 requested by --with-double=.
 <-mlong-double=>: Set default as requested by 
--with-long-double=.

 (TARGET_OPTION_OPTIMIZATION_TABLE) <-mdouble=, -mlong-double=>:
 Set default as requested by --with-double=
 (TARGET_HANDLE_OPTION): Define to this...
 (avr_handle_option): ...new hook worker.
 * config/avr/avr.h (DOUBLE_TYPE_SIZE): Define to avr_double.
 (LONG_DOUBLE_TYPE_SIZE): Define to avr_long_double.
 (avr_double_lib): New proto for spec function.
 (EXTRA_SPEC_FUNCTIONS) <double-lib>: Add.
 (DRIVER_SELF_SPECS): Call %:double-lib.
 * config/avr/avr.c (avr_option_override): Assert
 sizeof(long double) >= sizeof(double) for the target.
 * config/avr/avr-c.c (avr_cpu_cpp_builtins)
 [__HAVE_DOUBLE_MULTILIB__, __HAVE_LONG_DOUBLE_MULTILIB__]
 [__HAVE_DOUBLE64__, __HAVE_DOUBLE32__, __DEFAULT_DOUBLE__=]
 [__HAVE_LONG_DOUBLE64__, __HAVE_LONG_DOUBLE32__]
 [__HAVE_LONG_DOUBLE_IS_DOUBLE__, __DEFAULT_LONG_DOUBLE__=]:
 New built-in defined depending on --with-double=, --with-long-double=.
 * config/avr/driver-avr.c (avr_double_lib): New spec function.
 * doc/invoke.tex (AVR Options) <-mdouble=,-mlong-double=>: Doc.

libgcc/
 Support 64-bit double and 64-bit long double configurations.

 PR target/92055
 * config/avr/t-avr (HOST_LIBGCC2_CFLAGS): Only add -DF=SF if
 long double is a 32-bit type.
 * config/avr/t-avrlibc: Copy double64 and long-double64
 multilib(s) from the vanilla one.
 * config/avr/t-copy-libgcc: New Makefile snip.




Re: GCC wwwdocs move to git done

2019-11-06 Thread Georg-Johann Lay

Am 09.10.19 um 02:27 schrieb Joseph Myers:

I've done the move of GCC wwwdocs to git (using the previously posted and
discussed scripts), including setting up the post-receive hook to do the
same things previously covered by the old CVS hooks, and minimal updates
to the web pages dealing with the CVS setup for wwwdocs.


Hi,

May it be the case that some parts are missing?  In particular, I cannot
find the source of

https://gcc.gnu.org/install/configure.html

Johann


Re: GCC wwwdocs move to git done

2019-11-06 Thread Georg-Johann Lay

Am 06.11.19 um 15:03 schrieb Georg-Johann Lay:

Am 09.10.19 um 02:27 schrieb Joseph Myers:

I've done the move of GCC wwwdocs to git (using the previously posted and
discussed scripts), including setting up the post-receive hook to do the
same things previously covered by the old CVS hooks, and minimal updates
to the web pages dealing with the CVS setup for wwwdocs.


Hi,

May it be the case that some parts are missing?  In particular, I cannot
find the source of

https://gcc.gnu.org/install/configure.html

Johann



Ok, found it in install/README. knew it had something special about it...

Johann






Re: Ping^1 [patch][avr] PR92055: Add switches to enable 64-bit [long] double.

2019-11-06 Thread Georg-Johann Lay

Am 06.11.19 um 11:39 schrieb Georg-Johann Lay:

Ping #1

Am 31.10.19 um 22:55 schrieb Georg-Johann Lay:

Hi, this adds the possibility to enable IEEE compatible double
and long double support in avr-gcc.

It supports 2 configure options

--with-double={32|64|32,64|64,32}
--with-long-double={32|64|32,64|64,32|double}

which select the default layout of these types and also chose
which mutlilib variants are built and available.

These two config option map to the new compiler options
-mdouble= and -mlong-double= which are new multilib options.

The patch only deals with option handling and multilib bits,
it does not add any double functionality.  The double support
functions are supposed to be provided by avr-libc which also hosts
all the float stuff, including __addsf3 etc.

Ok for trunk?

Johann


..and here is the addendum that documents the new configure options.

Index: gcc/doc/install.texi
===
--- gcc/doc/install.texi(revision 277236)
+++ gcc/doc/install.texi(working copy)
@@ -2277,15 +2277,45 @@ omitted from @file{libgcc.a} on the assu
 @samp{newlib}.

 @item --with-avrlibc
-Specifies that @samp{AVR-Libc} is
-being used as the target C library.  This causes float support
+Only supported for the AVR target. Specifies that @samp{AVR-Libc} is
+being used as the target C@tie{} library.  This causes float support
 functions like @code{__addsf3} to be omitted from @file{libgcc.a} on
 the assumption that it will be provided by @file{libm.a}.  For more
 technical details, cf. @uref{http://gcc.gnu.org/PR54461,,PR54461}.
-This option is only supported for the AVR target.  It is not supported for
+It is not supported for
 RTEMS configurations, which currently use newlib.  The option is
 supported since version 4.7.2 and is the default in 4.8.0 and newer.

+@item --with-double=@{32|64|32,64|64,32@}
+@itemx --with-long-double=@{32|64|32,64|64,32|double@}
+Only supported for the AVR target since version@tie{}10.
+Specify the default layout available for the C/C++ @samp{double}
+and @samp{long double} type, respectively. The following rules apply:
+@itemize
+@item
+The first value after the @samp{=} specifies the default layout (in bits)
+of the type and also the default for the @option{-mdouble=} resp.
+@option{-mlong-double=} compiler option.
+@item
+If more than one value is specified, respective multilib variants are
+available, and  @option{-mdouble=} resp. @option{-mlong-double=} acts
+as a multilib option.
+@item
+If @option{--with-long-double=double} is specified, @samp{double} and
+@samp{long double} will have the same layout.
+@item
+If the configure option is not set, it defaults to @samp{32} which
+is compatible with older versions of the compiler that use non-standard
+32-bit types for @samp{double} and @samp{long double}.
+@end itemize
+Not all combinations of @option{--with-double=} and
+@option{--with-long-double=} are valid.  For example, the combination
+@option{--with-double=32,64} @option{--with-long-double=32} will be
+rejected because the first option specifies the availability of
+multilibs for @samp{double}, whereas the second option implies
+that @samp{long double} --- and hence also @samp{double} --- is always
+32@tie{}bits wide.
+
 @item --with-nds32-lib=@var{library}
 Specifies that @var{library} setting is used for building @file{libgcc.a}.
 Currently, the valid @var{library} is @samp{newlib} or @samp{mculib}.


Re: [patch][avr] PR92055: Add switches to enable 64-bit [long] double.

2019-11-07 Thread Georg-Johann Lay

Am 07.11.19 um 10:41 schrieb Martin Liška:

Hello.

I've noticed quite some GNU coding style violations with your patch.
Please next time, use something like:

$ git diff HEAD~ > /tmp/patch && ./contrib/check_GNU_style.py /tmp/patch

Thanks,
Martin



hm, I am actually using GNU style with Emacs...

You mean the lines > 80 chars in config.gcc?

I assumed that is no issue because there are already quite some lines 
that don't follow the < 80 rule.


Johann



Re: [patch][avr] PR92055: Add switches to enable 64-bit [long] double.

2019-11-07 Thread Georg-Johann Lay

Am 07.11.19 um 13:49 schrieb Martin Liška:

On 11/7/19 1:39 PM, Georg-Johann Lay wrote:

Am 07.11.19 um 10:41 schrieb Martin Liška:

Hello.

I've noticed quite some GNU coding style violations with your patch.
Please next time, use something like:

$ git diff HEAD~ > /tmp/patch && ./contrib/check_GNU_style.py /tmp/patch

Thanks,
Martin



hm, I am actually using GNU style with Emacs...

You mean the lines > 80 chars in config.gcc?

I assumed that is no issue because there are already quite some lines 
that don't follow the < 80 rule.


That's fine. I'm mainly talking about:

=== ERROR type #1: blocks of 8 spaces should be replaced with tabs (45 
error(s)) ===
gcc/common/config/avr/avr-common.c:78:0:   const struct 
cl_decoded_option *decoded, location_t loc)

gcc/common/config/avr/avr-common.c:86:0:{
gcc/common/config/avr/avr-common.c:88:0:  error_at (loc, "option 
%<-mdouble=64%> is only available if "
gcc/common/config/avr/avr-common.c:89:0:    "configured 
%<--with-double={64|64,32|32,64}%>");
gcc/common/config/avr/avr-common.c:91:0:  
opts->x_avr_long_double = 64;

gcc/common/config/avr/avr-common.c:92:0:}
gcc/common/config/avr/avr-common.c:94:0:{
...

Martin


My intention was to avoid a mixup of TABs and spaces mode, because
the avr backend is indented with spaces. So the indentation picks
up the style from the context (just like ypi would do it in Python
to avoid dreaded mixing of tabs ans spaces). Tabyfying the complete
sources is also something which I didn't consider, because that
makes porting much harder...

Johann



[patch,avr] Add suport for devices from the 0-series.

2019-11-08 Thread Georg-Johann Lay

Hi,

this patch adds support for a few more AVR devices.  Because the offset 
where flash is seen in RAM deviates from the settings for the family 
(and hence also from the linker script defaults), a new field in 
avr_mcu_t is needed to express this so that specs can be generated 
appropriately.


The AVR_MCU lines in avr-mcus.def are longer than 80 chars because it's 
easier to maintain 1 device = 1 line entries.  And it's easier to scan 
them with the awk scripts.


Ok for trunk?

Johann

Add support for AVR devices from the 0-series.

* config/avr/avr-arch.h (avr_mcu_t) : New field.
* config/avr/avr-devices.c (avr_mcu_types): Adjust initializers.
* config/avr/avr-mcus.def (AVR_MCU): Add respective field.
* config/avr/specs.h (LINK_SPEC) <%(link_pm_base_address)>: Add.
* config/avr/gen-avr-mmcu-specs.c (print_mcu)
<*cpp, *cpp_mcu, *cpp_avrlibc, *link_pm_base_address>: Emit code
for spec definitions.
* doc/avr-mmcu.texi: Regenerate.
Index: config/avr/avr-arch.h
===
--- config/avr/avr-arch.h	(revision 277953)
+++ config/avr/avr-arch.h	(working copy)
@@ -126,6 +126,9 @@ const char *const macro;
 
   /* Flash size in bytes.  */
   int flash_size;
+
+  /* Offset where flash is seen in the RAM address space.  */
+  int flash_pm_offset;
 } avr_mcu_t;
 
 /* AVR device specific features.
Index: config/avr/avr-devices.c
===
--- config/avr/avr-devices.c	(revision 277953)
+++ config/avr/avr-devices.c	(working copy)
@@ -117,12 +117,12 @@ avr_texinfo[] =
 const avr_mcu_t
 avr_mcu_types[] =
 {
-#define AVR_MCU(NAME, ARCH, DEV_ATTRIBUTE, MACRO, DATA_SEC, TEXT_SEC, FLASH_SIZE)\
-  { NAME, ARCH, DEV_ATTRIBUTE, MACRO, DATA_SEC, TEXT_SEC, FLASH_SIZE },
+#define AVR_MCU(NAME, ARCH, DEV_ATTRIBUTE, MACRO, DATA_SEC, TEXT_SEC, FLASH_SIZE, PMOFF) \
+  { NAME, ARCH, DEV_ATTRIBUTE, MACRO, DATA_SEC, TEXT_SEC, FLASH_SIZE, PMOFF },
 #include "avr-mcus.def"
 #undef AVR_MCU
 /* End of list.  */
-  { NULL, ARCH_UNKNOWN, AVR_ISA_NONE, NULL, 0, 0, 0 }
+  { NULL, ARCH_UNKNOWN, AVR_ISA_NONE, NULL, 0, 0, 0, 0 }
 };
 
 
Index: config/avr/avr-mcus.def
===
--- config/avr/avr-mcus.def	(revision 277953)
+++ config/avr/avr-mcus.def	(working copy)
@@ -61,313 +61,327 @@ supply respective built-in macro.
 
FLASH_SIZEFlash size in bytes.
 
+   RODATA_PM_OFFSET
+		 Either 0x0 or the offset where flash memory is mirrored
+		 into the RAM address space accessible by LD and LDS.
+		 This is only needed if that value deviates from the
+		 value for the respective family.
+
"avr2" must be first for the "0" default to work as intended.  */
 
 /* Classic, <= 8K.  */
-AVR_MCU ("avr2", ARCH_AVR2, AVR_ERRATA_SKIP, NULL, 0x0060, 0x0, 0x6)
+AVR_MCU ("avr2", ARCH_AVR2, AVR_ERRATA_SKIP, NULL, 0x0060, 0x0, 0x6, 0)
 
-AVR_MCU ("at90s2313",ARCH_AVR2, AVR_SHORT_SP, "__AVR_AT90S2313__", 0x0060, 0x0, 0x800)
-AVR_MCU ("at90s2323",ARCH_AVR2, AVR_SHORT_SP, "__AVR_AT90S2323__", 0x0060, 0x0, 0x800)
-AVR_MCU ("at90s2333",ARCH_AVR2, AVR_SHORT_SP, "__AVR_AT90S2333__", 0x0060, 0x0, 0x800)
-AVR_MCU ("at90s2343",ARCH_AVR2, AVR_SHORT_SP, "__AVR_AT90S2343__", 0x0060, 0x0, 0x800)
-AVR_MCU ("attiny22", ARCH_AVR2, AVR_SHORT_SP, "__AVR_ATtiny22__",  0x0060, 0x0, 0x800)
-AVR_MCU ("attiny26", ARCH_AVR2, AVR_SHORT_SP, "__AVR_ATtiny26__",  0x0060, 0x0, 0x800)
-AVR_MCU ("at90s4414",ARCH_AVR2, AVR_ISA_NONE, "__AVR_AT90S4414__", 0x0060, 0x0, 0x1000)
-AVR_MCU ("at90s4433",ARCH_AVR2, AVR_SHORT_SP, "__AVR_AT90S4433__", 0x0060, 0x0, 0x1000)
-AVR_MCU ("at90s4434",ARCH_AVR2, AVR_ISA_NONE, "__AVR_AT90S4434__", 0x0060, 0x0, 0x1000)
-AVR_MCU ("at90s8515",ARCH_AVR2, AVR_ERRATA_SKIP, "__AVR_AT90S8515__",  0x0060, 0x0, 0x2000)
-AVR_MCU ("at90c8534",ARCH_AVR2, AVR_ISA_NONE, "__AVR_AT90C8534__", 0x0060, 0x0, 0x2000)
-AVR_MCU ("at90s8535",ARCH_AVR2, AVR_ISA_NONE, "__AVR_AT90S8535__", 0x0060, 0x0, 0x2000)
+AVR_MCU ("at90s2313",ARCH_AVR2, AVR_SHORT_SP, "__AVR_AT90S2313__", 0x0060, 0x0, 0x800, 0)
+AVR_MCU ("at90s2323",ARCH_AVR2, AVR_SHORT_SP, "__AVR_AT90S2323__", 0x0060, 0x0, 0x800, 0)
+AVR_MCU ("at90s2333",ARCH_AVR2, AVR_SHORT_SP, "__AVR_AT90S2333__", 0x0060, 0x0, 0x800, 0)
+AVR_MCU ("at90s2343",ARCH_AVR2, AVR_SHORT_SP, "__AVR_AT90S2343__", 0x0060, 0x0, 0x800, 0)
+AVR_MCU ("attiny22", ARCH_AVR2, AVR_SHORT_SP, "__AVR_ATtiny22__",  0x0060, 0x0, 0x800, 0)
+AVR_MCU ("attiny26", ARCH_AVR2, AVR_SHORT_SP, "__AVR_ATtiny26__",  0x0060, 0x0, 0x800, 0)
+AVR_MCU ("at90s4414", 

Ping^1: [patch,avr] Add suport for devices from the 0-series.

2019-11-13 Thread Georg-Johann Lay

Ping ?

Am 08.11.19 um 17:19 schrieb Georg-Johann Lay:

Hi,

this patch adds support for a few more AVR devices.  Because the offset 
where flash is seen in RAM deviates from the settings for the family 
(and hence also from the linker script defaults), a new field in 
avr_mcu_t is needed to express this so that specs can be generated 
appropriately.


The AVR_MCU lines in avr-mcus.def are longer than 80 chars because it's 
easier to maintain 1 device = 1 line entries.  And it's easier to scan 
them with the awk scripts.


Ok for trunk?

Johann

 Add support for AVR devices from the 0-series.

 * config/avr/avr-arch.h (avr_mcu_t) : New field.
 * config/avr/avr-devices.c (avr_mcu_types): Adjust initializers.
 * config/avr/avr-mcus.def (AVR_MCU): Add respective field.
 * config/avr/specs.h (LINK_SPEC) <%(link_pm_base_address)>: Add.
 * config/avr/gen-avr-mmcu-specs.c (print_mcu)
 <*cpp, *cpp_mcu, *cpp_avrlibc, *link_pm_base_address>: Emit code
 for spec definitions.
 * doc/avr-mmcu.texi: Regenerate.




[wwwdocs] Add AVR news.

2020-01-10 Thread Georg-Johann Lay

Added the following change to the v10 changes site.

Johann


diff --git a/htdocs/gcc-10/changes.html b/htdocs/gcc-10/changes.html
index d6108269..7d96bc66 100644
--- a/htdocs/gcc-10/changes.html
+++ b/htdocs/gcc-10/changes.html
@@ -334,7 +334,54 @@ a work-in-progress.
   arm-uclinuxfdpiceabi, and the C library is uclibc-ng.
   
 
-
+
+AVR
+
+  Support for the XMEGA-like devices
+
+  ATtiny202, ATtiny204, ATtiny402, ATtiny404, ATtiny406, ATtiny804,
+  ATtiny806, ATtiny807, ATtiny1604, ATtiny1606, ATtiny1607, ATmega808,
+  ATmega809, ATmega1608, ATmega1609, ATmega3208, ATmega3209,
+  ATmega4808, ATmega4809
+
+has been added.
+  
+  
+A new command line option -nodevicespecs has been added.
+It allows to provide a custom device-specs file by means of
+
+   avr-gcc -nodevicespecs -specs=my-spec-file 
+
+and without the need to provide options -B and
+-mmcu=.
+See
+href="https://gcc.gnu.org/onlinedocs/gcc/AVR-Options.html#index-nodevicespecs";>AVR

+  command line options for details.
+This feature is also available in v9.3+ and v8.4+.
+  
+  
+New command line options -mdouble=[32,64] and
+-mlong-double=[32,64] have been added.  They allow
+to chose the size (in bits) of the double and
+long double types, respectively.  Whether or not the
+mentioned layouts are available, whether the options act
+as a multilib option, and what is the default for either option
+is controlled by the new
+https://gcc.gnu.org/install/configure.html#avr";>AVR configure
+  options
+--with-double= and --with-long-double=.
+  
+  
+A new configure option --with-libf7= has been added.
+It controls to which level avr-libgcc provides 64-bit floating point
+support by means of
+https://gcc.gnu.org/wiki/avr-gcc#LibF7";>LibF7.
+  
+  
+A new configure option --with-double-comparison= has been
+added. It's unlikely you need to set this option by hand.
+  
+

 



[patch,avr,applied]: Simplify asm macro skip.

2020-01-14 Thread Georg-Johann Lay

Applied the following trivial and obvious patch to the avr back.

Johann

libgcc/
* config/avr/lib1funcs.S (skip): Simplify.


diff --git a/libgcc/config/avr/lib1funcs.S b/libgcc/config/avr/lib1funcs.S
index 8ebdc01c88c..2ffa2090b25 100644
--- a/libgcc/config/avr/lib1funcs.S
+++ b/libgcc/config/avr/lib1funcs.S
@@ -169,11 +169,7 @@ see the files COPYING3 and COPYING.RUNTIME 
respectively.  If not, see

 .endm

 ;; Skip next instruction, typically a jump target
-#if defined(__AVR_TINY__)
-#define skip cpse 0,0
-#else
 #define skip cpse 16,16
-#endif

 ;; Negate a 2-byte value held in consecutive registers
 .macro NEG2  reg


[avr,applied]: Remove a no more existing file from contrib/gcc_update::files_and_dependencies.

2020-01-14 Thread Georg-Johann Lay

Hi,

gcc/config/avr/t-multilib does no more exist, hence removed from the 
files to touch.  Applied addendum to PR92055 (which removed that file) 
as obvious.


Johann


The mentioned auto-generated file is no more part of the
GCC sources, it's auto-generated in $(builddir) during build.

PR target/92055
* contrib/gcc_update (files_and_dependencies): Remove
entry for gcc/config/avr/t-multilib.


diff --git a/contrib/gcc_update b/contrib/gcc_update
index c04b5dfb0a3..5df3297f7f8 100755
--- a/contrib/gcc_update
+++ b/contrib/gcc_update
@@ -82,7 +82,6 @@ gcc/fixinc/fixincl.x: gcc/fixinc/fixincl.tpl 
gcc/fixinc/inclhack.def
 gcc/config/aarch64/aarch64-tune.md: 
gcc/config/aarch64/aarch64-cores.def gcc/config/aarch64/gentune.sh
 gcc/config/arm/arm-tune.md: gcc/config/arm/arm-cpus.in 
gcc/config/arm/parsecpu.awk
 gcc/config/arm/arm-tables.opt: gcc/config/arm/arm-cpus.in 
gcc/config/arm/parsecpu.awk
-gcc/config/avr/t-multilib: gcc/config/avr/avr-mcus.def 
gcc/config/avr/genmultilib.awk
 gcc/config/c6x/c6x-tables.opt: gcc/config/c6x/c6x-isas.def 
gcc/config/c6x/genopt.sh
 gcc/config/c6x/c6x-sched.md: gcc/config/c6x/c6x-sched.md.in 
gcc/config/c6x/gensched.sh
 gcc/config/c6x/c6x-mult.md: gcc/config/c6x/c6x-mult.md.in 
gcc/config/c6x/genmult.sh


[avr,committed] Add support for some avrxmega3 devices.

2019-12-12 Thread Georg-Johann Lay

Applied this patchlet to add support for:

ATtiny1604, ATtiny1606, ATtiny1607, ATtiny402, ATtiny404, ATtiny406, 
ATtiny804, ATtiny806, ATtiny807, ATtiny202, ATtiny204.


Johann

Add support for some more AVR devices from avrxmega3 family.

* config/avr/avr-mcus.def (attiny1604, attiny1606, attiny1607)
(attiny402, attiny404, attiny406)
(attiny804, attiny806, attiny807)
(attiny202, attiny204): Add AVR_MCU lines to support them.
* doc/avr-mmcu.texi: Regenerate.

Index: config/avr/avr-mcus.def
===
--- config/avr/avr-mcus.def	(revision 279308)
+++ config/avr/avr-mcus.def	(revision 279309)
@@ -307,6 +307,17 @@ AVR_MCU ("atxmega32c4",  ARCH_AVRXME
 AVR_MCU ("atxmega32e5",  ARCH_AVRXMEGA2, AVR_ISA_NONE, "__AVR_ATxmega32E5__",  0x2000, 0x0, 0x9000, 0)
 /* Xmega, Flash + RAM < 64K, flash visible in RAM address space */
 AVR_MCU ("avrxmega3",ARCH_AVRXMEGA3, AVR_ISA_NONE,  NULL,  0x3f00, 0x0, 0x8000, 0)
+AVR_MCU ("attiny202",ARCH_AVRXMEGA3, AVR_ISA_RCALL, "__AVR_ATtiny202__",   0x3f80, 0x0, 0x800,  0x8000)
+AVR_MCU ("attiny204",ARCH_AVRXMEGA3, AVR_ISA_RCALL, "__AVR_ATtiny204__",   0x3f80, 0x0, 0x800,  0x8000)
+AVR_MCU ("attiny402",ARCH_AVRXMEGA3, AVR_ISA_RCALL, "__AVR_ATtiny402__",   0x3f00, 0x0, 0x1000, 0x8000)
+AVR_MCU ("attiny404",ARCH_AVRXMEGA3, AVR_ISA_RCALL, "__AVR_ATtiny404__",   0x3f00, 0x0, 0x1000, 0x8000)
+AVR_MCU ("attiny406",ARCH_AVRXMEGA3, AVR_ISA_RCALL, "__AVR_ATtiny406__",   0x3f00, 0x0, 0x1000, 0x8000)
+AVR_MCU ("attiny804",ARCH_AVRXMEGA3, AVR_ISA_RCALL, "__AVR_ATtiny804__",   0x3e00, 0x0, 0x2000, 0x8000)
+AVR_MCU ("attiny806",ARCH_AVRXMEGA3, AVR_ISA_RCALL, "__AVR_ATtiny806__",   0x3e00, 0x0, 0x2000, 0x8000)
+AVR_MCU ("attiny807",ARCH_AVRXMEGA3, AVR_ISA_RCALL, "__AVR_ATtiny807__",   0x3e00, 0x0, 0x2000, 0x8000)
+AVR_MCU ("attiny1604",   ARCH_AVRXMEGA3, AVR_ISA_NONE,  "__AVR_ATtiny1604__",  0x3c00, 0x0, 0x4000, 0x8000)
+AVR_MCU ("attiny1606",   ARCH_AVRXMEGA3, AVR_ISA_NONE,  "__AVR_ATtiny1606__",  0x3c00, 0x0, 0x4000, 0x8000)
+AVR_MCU ("attiny1607",   ARCH_AVRXMEGA3, AVR_ISA_NONE,  "__AVR_ATtiny1607__",  0x3c00, 0x0, 0x4000, 0x8000)
 AVR_MCU ("attiny212",ARCH_AVRXMEGA3, AVR_ISA_RCALL, "__AVR_ATtiny212__",   0x3f80, 0x0, 0x800,  0x8000)
 AVR_MCU ("attiny214",ARCH_AVRXMEGA3, AVR_ISA_RCALL, "__AVR_ATtiny214__",   0x3f80, 0x0, 0x800,  0x8000)
 AVR_MCU ("attiny412",ARCH_AVRXMEGA3, AVR_ISA_RCALL, "__AVR_ATtiny412__",   0x3f00, 0x0, 0x1000, 0x8000)
Index: doc/avr-mmcu.texi
===
--- doc/avr-mmcu.texi	(revision 279308)
+++ doc/avr-mmcu.texi	(revision 279309)
@@ -54,7 +54,7 @@
 
 @item avrxmega3
 ``XMEGA'' devices with up to 64@tie{}KiB of combined program memory and RAM, and with program memory visible in the RAM address space.
-@*@var{mcu}@tie{}= @code{attiny212}, @code{attiny214}, @code{attiny412}, @code{attiny414}, @code{attiny416}, @code{attiny417}, @code{attiny814}, @code{attiny816}, @code{attiny817}, @code{attiny1614}, @code{attiny1616}, @code{attiny1617}, @code{attiny3214}, @code{attiny3216}, @code{attiny3217}, @code{atmega808}, @code{atmega809}, @code{atmega1608}, @code{atmega1609}, @code{atmega3208}, @code{atmega3209}, @code{atmega4808}, @code{atmega4809}.
+@*@var{mcu}@tie{}= @code{attiny202}, @code{attiny204}, @code{attiny212}, @code{attiny214}, @code{attiny402}, @code{attiny404}, @code{attiny406}, @code{attiny412}, @code{attiny414}, @code{attiny416}, @code{attiny417}, @code{attiny804}, @code{attiny806}, @code{attiny807}, @code{attiny814}, @code{attiny816}, @code{attiny817}, @code{attiny1604}, @code{attiny1606}, @code{attiny1607}, @code{attiny1614}, @code{attiny1616}, @code{attiny1617}, @code{attiny3214}, @code{attiny3216}, @code{attiny3217}, @code{atmega808}, @code{atmega809}, @code{atmega1608}, @code{atmega1609}, @code{atmega3208}, @code{atmega3209}, @code{atmega4808}, @code{atmega4809}.
 
 @item avrxmega4
 ``XMEGA'' devices with more than 64@tie{}KiB and up to 128@tie{}KiB of program memory.


[patch,avr, 0/3] Support 64-bit (long) double.

2019-12-16 Thread Georg-Johann Lay
Now that the avr backend can support 64-bit floats by means of 
configure-options --with-double= and --with-long-double=, this patch 
series adds some routines to support it.


It's an ad-hoc, avr-specific implementation in assembly and GNU-C which 
is added as a new subfolder in libgcc/config/avr/libf7.


Patch 1/3 is the GCC changes: Documentation and new avr-specific 
configure options:


--with-libf7 selects to which level double support from libf7 is added 
to libgcc.


--with-double-comparison select what FLOAT_LIB_COMPARE_RETURNS_BOOL returns.

I wrote the libf7 code from scratch and put it under GPL v3 + library 
exception, so it should be no problem to have it as part of libgcc.


Patch 2/3 is the libgcc additions:

--with-libf7 selects which makefile-snips from libf7 to use.

Patch 3/3 is the actual libf7 implementation.  A great deal of which is 
assembly, together with C + inline assembly for higher routines.


Ok for trunk?

Johann


[patch,avr, 1/3] Support 64-bit (long) double: The gcc part.

2019-12-16 Thread Georg-Johann Lay

Am 16.12.19 um 17:40 schrieb Georg-Johann Lay:
Patch 1/3 is the GCC changes: Documentation and new avr-specific
configure options:

--with-libf7 selects to which level double support from libf7 is added
to libgcc.

--with-double-comparison select what FLOAT_LIB_COMPARE_RETURNS_BOOL
returns.

Johann

gcc/
* config.gcc (tm_defines) [target=avr]: Support --with-libf7,
--with-double-comparison.
* doc/install.texi: Document them.
* config/avr/avr-c.c (avr_cpu_cpp_builtins)

: New built-in defines.
* doc/invoke.texi (AVR Built-in Macros): Document them.
* config/avr/avr-protos.h (avr_float_lib_compare_returns_bool): New.
* config/avr/avr.c (avr_float_lib_compare_returns_bool): New function.
* config/avr/avr.h (FLOAT_LIB_COMPARE_RETURNS_BOOL): New macro.
Index: gcc/config/avr/avr-c.c
===
--- gcc/config/avr/avr-c.c	(revision 278667)
+++ gcc/config/avr/avr-c.c	(working copy)
@@ -390,6 +390,20 @@ start address.  This macro shall be used
   cpp_define (pfile, "__WITH_AVRLIBC__");
 #endif /* WITH_AVRLIBC */
 
+  // From configure --with-libf7={|libgcc|math|math-symbols|yes|no}
+
+#ifdef WITH_LIBF7_LIBGCC
+  cpp_define (pfile, "__WITH_LIBF7_LIBGCC__");
+#endif /* WITH_LIBF7_LIBGCC */
+
+#ifdef WITH_LIBF7_MATH
+  cpp_define (pfile, "__WITH_LIBF7_MATH__");
+#endif /* WITH_LIBF7_MATH */
+
+#ifdef WITH_LIBF7_MATH_SYMBOLS
+  cpp_define (pfile, "__WITH_LIBF7_MATH_SYMBOLS__");
+#endif /* WITH_LIBF7_MATH_SYMBOLS */
+
   // From configure --with-double={|32|32,64|64,32|64}
 
 #ifdef HAVE_DOUBLE_MULTILIB
@@ -438,7 +452,23 @@ start address.  This macro shall be used
 #error "align this with config.gcc"
 #endif
 
-  
+  // From configure --with-double-comparison={2|3} --with-libf7.
+
+#if defined (WITH_DOUBLE_COMPARISON)
+#if WITH_DOUBLE_COMPARISON == 2 || WITH_DOUBLE_COMPARISON == 3
+  /* The number of states a DFmode comparison libcall might take and
+ reflects what avr.c:FLOAT_LIB_COMPARE_RETURNS_BOOL returns for
+ DFmode.  GCC's default is 3-state, but some libraries like libf7
+ implement true / false (2-state).  */
+  cpp_define_formatted (pfile, "__WITH_DOUBLE_COMPARISON__=%d",
+			WITH_DOUBLE_COMPARISON);
+#else
+#error "align this with config.gcc"
+#endif
+#else
+#error "align this with config.gcc"
+#endif
+
   /* Define builtin macros so that the user can easily query whether
  non-generic address spaces (and which) are supported or not.
  This is only supported for C.  For C++, a language extension is needed
Index: gcc/config/avr/avr-protos.h
===
--- gcc/config/avr/avr-protos.h	(revision 278667)
+++ gcc/config/avr/avr-protos.h	(working copy)
@@ -128,6 +128,8 @@ extern bool avr_xload_libgcc_p (machine_
 extern rtx avr_eval_addr_attrib (rtx x);
 extern bool avr_casei_sequence_check_operands (rtx *xop);
 
+extern bool avr_float_lib_compare_returns_bool (machine_mode, enum rtx_code);
+
 static inline unsigned
 regmask (machine_mode mode, unsigned regno)
 {
Index: gcc/config/avr/avr.c
===
--- gcc/config/avr/avr.c	(revision 278667)
+++ gcc/config/avr/avr.c	(working copy)
@@ -14575,6 +14575,23 @@ avr_fold_builtin (tree fndecl, int n_arg
   return NULL_TREE;
 }
 
+
+/* Worker function for `FLOAT_LIB_COMPARE_RETURNS_BOOL'.  */
+
+bool
+avr_float_lib_compare_returns_bool (machine_mode mode, enum rtx_code)
+{
+  if (mode == DFmode)
+{
+#if WITH_DOUBLE_COMPARISON == 2
+  return true;
+#endif
+}
+
+  // This is the GCC default and also what AVR-LibC implements.
+  return false;
+}
+
 
 
 /* Initialize the GCC target structure.  */
Index: gcc/config/avr/avr.h
===
--- gcc/config/avr/avr.h	(revision 278667)
+++ gcc/config/avr/avr.h	(working copy)
@@ -107,6 +107,9 @@ These two properties are reflected by bu
 #define BYTES_BIG_ENDIAN 0
 #define WORDS_BIG_ENDIAN 0
 
+#define FLOAT_LIB_COMPARE_RETURNS_BOOL(mode, comparison) \
+  avr_float_lib_compare_returns_bool (mode, comparison)
+
 #ifdef IN_LIBGCC2
 /* This is to get correct SI and DI modes in libgcc2.c (32 and 64 bits).  */
 #define UNITS_PER_WORD 4
Index: gcc/config.gcc
===
--- gcc/config.gcc	(revision 278552)
+++ gcc/config.gcc	(working copy)
@@ -1303,6 +1303,46 @@ avr-*-*)
 	tm_file="${tm_file} ${cpu_type}/avrlibc.h"
 	tm_defines="${tm_defines} WITH_AVRLIBC"
 	fi
+	# Work out avr_double_comparison which is 2 or 3 and is used in
+	# target hook FLOAT_LIB_COMPARE_RETURNS_BOOL to determine whether
+	# DFmode comparisons return 3-state or 2-state results.
+	case y${with_double_comparison} in
+	y | ytristate)
+		avr_double_comparison=3

Re: [patch,avr, 2/3] Support 64-bit (long) double: The libgcc changes.

2019-12-16 Thread Georg-Johann Lay

Am 16.12.19 um 17:40 schrieb Georg-Johann Lay:

Patch 2/3 is the libgcc additions:

--with-libf7 selects which makefile-snips from libf7 to use.

libgcc/
* config.host (tmake_file) [target=avr]: Add t-libf7,
t-libf7-math, t-libf7-math-symbols as specified by --with-libf7=.
* config/avr/t-avrlibc: Don't copy libgcc.a if there are modules
depending on sizeof (double) or sizeof (long double).
* config/avr/libf7: New folder.
Index: libgcc/config.host
===
--- libgcc/config.host	(revision 278552)
+++ libgcc/config.host	(working copy)
@@ -514,6 +514,29 @@ arm*-*-eabi* | arm*-*-symbianelf* | arm*
 avr-*-*)
 	# Make HImode functions for AVR
 	tmake_file="${cpu_type}/t-avr t-fpbit"
+	# Make some DFmode functions from libf7, part of avr-libgcc.
+	# This must be prior to adding t-avrlibc.
+	case "y${with_libf7}" in
+	yno)
+	# No libf7 support.
+	;;
+	ylibgcc)
+		tmake_file="$tmake_file ${cpu_type}/libf7/t-libf7"
+		;;
+	ymath)
+		tmake_file="$tmake_file ${cpu_type}/libf7/t-libf7-math"
+		tmake_file="$tmake_file ${cpu_type}/libf7/t-libf7"
+		;;
+	ymath-symbols | yyes | y)
+		tmake_file="$tmake_file ${cpu_type}/libf7/t-libf7-math-symbols"
+		tmake_file="$tmake_file ${cpu_type}/libf7/t-libf7-math"
+		tmake_file="$tmake_file ${cpu_type}/libf7/t-libf7"
+		;;
+	*)
+		echo "Error: --with-libf7=${with_libf7} but can only be used with: 'libgcc', 'math', 'math-symbols', 'yes', 'no'" 1>&2
+		exit 1
+		;;
+	esac
 	if test x${with_avrlibc} != xno; then
 	tmake_file="$tmake_file ${cpu_type}/t-avrlibc"
 	fi
Index: libgcc/config/avr/t-avrlibc
===
--- libgcc/config/avr/t-avrlibc	(revision 278992)
+++ libgcc/config/avr/t-avrlibc	(working copy)
@@ -65,6 +65,12 @@ LIB2FUNCS_EXCLUDE += \
 	_fixunssfdi \
 	_floatdisf _floatundisf
 
+ifeq (,$(WITH_LIBF7_MATH_SYMBOLS))
+
+# No modules depend on __SIZEOF_LONG_DOUBLE__ or __SIZEOF_DOUBLE__
+# which means we might have an opportunity to copy libgcc.a.
+# WITH_LIBF7_MATH_SYMBOLS is set by libf7/t-libf7-math-symbols.
+
 ifneq (,$(findstring avr,$(MULTISUBDIR)))
 
 # We are not in the avr2 (default) subdir, hence copying will work.
@@ -95,3 +101,4 @@ Makefile: t-copy-libgcc.dep
 
 endif
 endif
+endif


[patch][avr] PR92606: Disable -fipa-icf-variables because it generates wrong code.

2019-12-18 Thread Georg-Johann Lay
Hi, this patch turns off -fipa-icf-variables because it generates wrong 
code like for PR92606.  As there is no target hook that could decide 
whether such optimizations are obsolete, disable such optimizations 
alltogether until PR92932 (target hook to disable such optimizations 
depending on object attributes and address-spcace) is available.


Ok to apply?

Johann


Work around PR ipa/92932 by disabling -fipa-icf-variables until
PR92932 will have been solved.

PR ipa/92932
PR target/92606
* common/config/avr/avr-common.c (avr_option_optimization_table)
<-fipa-icf-variables>: Disable.
Index: common/config/avr/avr-common.c
===
--- common/config/avr/avr-common.c	(revision 279522)
+++ common/config/avr/avr-common.c	(working copy)
@@ -38,6 +38,14 @@ static const struct default_options avr_
 { OPT_LEVELS_ALL, OPT_fcaller_saves, NULL, 0 },
 { OPT_LEVELS_1_PLUS_NOT_DEBUG, OPT_mgas_isr_prologues, NULL, 1 },
 { OPT_LEVELS_1_PLUS, OPT_mmain_is_OS_task, NULL, 1 },
+	// FIXME: IPA incorrectly identifies variables in .progmem.data (accessed
+	// via LPM) with variables in .rodata (accessed via LD, LDD, LDS) like
+	// in PR92606.  As there is no target hook to disable such optimizations
+	// depending on target attributes and / or address-spaces of the involved
+	// objects (filed as PR92932), ditch such malicious optimizations now until
+	// PR92932 is implemented and we can use that target hook to solve PR92606
+	// properly.
+{ OPT_LEVELS_ALL, OPT_fipa_icf_variables, NULL, 0 },
 { OPT_LEVELS_NONE, 0, NULL, 0 }
   };
 


[patch][avr] New option -nodevicespecs to omit -specs=... in self specs.

2019-12-18 Thread Georg-Johann Lay
Hi, currently device support in avr-gcc is accomplished by injecting a 
specs file my means of -specs=... in dirver self specs.


This patch adds a new avr driver option to omit the addition of 
respective -specs option so give the user more freedom.


Ok to apply?

Johann

* config/avr/avr.opt (-nodevicespecs): New driver option.
* config/avr/driver-avr.c (avr_devicespecs_file): Only issue
"-specs=device-specs/..." if that option is not set.
* doc/invoke.texi (AVR Options) <-nodevicespecs>: Document.
Index: config/avr/avr.opt
===
--- config/avr/avr.opt	(revision 279522)
+++ config/avr/avr.opt	(working copy)
@@ -118,3 +118,7 @@ Assume that all data in static storage c
 nodevicelib
 Driver Target Report RejectNegative
 Do not link against the device-specific library lib.a.
+
+nodevicespecs
+Driver Target Report RejectNegative
+Do not use the device-specific specs file device-specs/specs-.
Index: config/avr/driver-avr.c
===
--- config/avr/driver-avr.c	(revision 279522)
+++ config/avr/driver-avr.c	(working copy)
@@ -26,8 +26,8 @@ along with GCC; see the file COPYING3.
 #include "diagnostic.h"
 #include "tm.h"
 
-// Remove -nodevicelib from the command line if not needed
-#define X_NODEVLIB "%

[PING^1][patch][avr] PR92606: Disable -fipa-icf-variables because it generates wrong code.

2019-12-28 Thread Georg-Johann Lay

Ping #1.

Hi, this patch turns off -fipa-icf-variables because it generates wrong 
code like for PR92606.  As there is no target hook that could decide 
whether such optimizations are obsolete, disable such optimizations 
alltogether until PR92932 (target hook to disable such optimizations 
depending on object attributes and address-spcace) is available.


Ok to apply?

Johann


Work around PR ipa/92932 by disabling -fipa-icf-variables until
PR92932 will have been solved.

PR ipa/92932
PR target/92606
* common/config/avr/avr-common.c (avr_option_optimization_table)
<-fipa-icf-variables>: Disable.





[PING^1][patch][avr] New option -nodevicespecs to omit -specs=... in self specs.

2019-12-28 Thread Georg-Johann Lay

Ping #1

Hi, currently device support in avr-gcc is accomplished by injecting a 
specs file my means of -specs=... in dirver self specs.


This patch adds a new avr driver option to omit the addition of 
respective -specs option so give the user more freedom.


Ok to apply?

Johann

* config/avr/avr.opt (-nodevicespecs): New driver option.
* config/avr/driver-avr.c (avr_devicespecs_file): Only issue
"-specs=device-specs/..." if that option is not set.
* doc/invoke.texi (AVR Options) <-nodevicespecs>: Document.





[PING^1][patch,avr, 0/3] Support 64-bit (long) double.

2019-12-28 Thread Georg-Johann Lay

Ping #1

Now that the avr backend can support 64-bit floats by means of 
configure-options --with-double= and --with-long-double=, this patch 
series adds some routines to support it.


It's an ad-hoc, avr-specific implementation in assembly and GNU-C which 
is added as a new subfolder in libgcc/config/avr/libf7.


Patch 1/3 is the GCC changes: Documentation and new avr-specific 
configure options:


--with-libf7 selects to which level double support from libf7 is added 
to libgcc.


--with-double-comparison select what FLOAT_LIB_COMPARE_RETURNS_BOOL 
returns.


I wrote the libf7 code from scratch and put it under GPL v3 + library 
exception, so it should be no problem to have it as part of libgcc.


Patch 2/3 is the libgcc additions:

--with-libf7 selects which makefile-snips from libf7 to use.

Patch 3/3 is the actual libf7 implementation.  A great deal of which is 
assembly, together with C + inline assembly for higher routines.


Ok for trunk?

Johann





[PING^1][patch,avr, 0/3] Support 64-bit (long) double.

2019-12-28 Thread Georg-Johann Lay

Ping #1

Now that the avr backend can support 64-bit floats by means of 
configure-options --with-double= and --with-long-double=, this patch 
series adds some routines to support it.


It's an ad-hoc, avr-specific implementation in assembly and GNU-C which 
is added as a new subfolder in libgcc/config/avr/libf7.


Patch 1/3 is the GCC changes: Documentation and new avr-specific 
configure options:


--with-libf7 selects to which level double support from libf7 is added 
to libgcc.


--with-double-comparison select what FLOAT_LIB_COMPARE_RETURNS_BOOL 
returns.


I wrote the libf7 code from scratch and put it under GPL v3 + library 
exception, so it should be no problem to have it as part of libgcc.


Patch 2/3 is the libgcc additions:

--with-libf7 selects which makefile-snips from libf7 to use.

Patch 3/3 is the actual libf7 implementation.  A great deal of which is 
assembly, together with C + inline assembly for higher routines.


Ok for trunk?

Johann





[PING^1][patch,avr, 2/3] Support 64-bit (long) double: The libgcc changes.

2019-12-28 Thread Georg-Johann Lay

Ping #1


Am 16.12.19 um 17:40 schrieb Georg-Johann Lay:

Patch 2/3 is the libgcc additions:

--with-libf7 selects which makefile-snips from libf7 to use.

libgcc/
* config.host (tmake_file) [target=avr]: Add t-libf7,
t-libf7-math, t-libf7-math-symbols as specified by --with-libf7=.
* config/avr/t-avrlibc: Don't copy libgcc.a if there are modules
depending on sizeof (double) or sizeof (long double).
* config/avr/libf7: New folder.





[PING^1][patch,avr, 3/3] Support 64-bit (long) double: libf7.

2019-12-28 Thread Georg-Johann Lay

Ping #1


Am 16.12.19 um 17:40 schrieb Georg-Johann Lay:
Patch 3/3 is the actual libf7 implementation.  A great deal of which is
assembly, together with C + inline assembly for higher routines.

Johann

libgcc/config/avr/libf7/
* t-libf7: New file.
* t-libf7-math: New file.
* t-libf7-math-symbols: New file.
* libf7-common.mk: New file.
* libf7-asm-object.mk: New file.
* libf7-c-object.mk: New file.
* asm-defs.h: New file.
* libf7.h: New file.
* libf7.c: New file.
* libf7-asm.sx: New file.
* libf7-array.def: New file.
* libf7-const.def: New file.
* libf7-constdef.h: New file.
* f7renames.sh: New script.
* f7wraps.sh: New script.
* f7-renames.h: New generated file.
* f7-wraps.h: New generated file.





[PING^1][patch,avr, 1/3] Support 64-bit (long) double: The gcc part.

2019-12-28 Thread Georg-Johann Lay

Ping #1


Am 16.12.19 um 17:40 schrieb Georg-Johann Lay:
Patch 1/3 is the GCC changes: Documentation and new avr-specific
configure options:

--with-libf7 selects to which level double support from libf7 is added
to libgcc.

--with-double-comparison select what FLOAT_LIB_COMPARE_RETURNS_BOOL
returns.

Johann

gcc/
* config.gcc (tm_defines) [target=avr]: Support --with-libf7,
--with-double-comparison.
* doc/install.texi: Document them.
* config/avr/avr-c.c (avr_cpu_cpp_builtins)

: New built-in defines.
* doc/invoke.texi (AVR Built-in Macros): Document them.
* config/avr/avr-protos.h (avr_float_lib_compare_returns_bool): New.
* config/avr/avr.c (avr_float_lib_compare_returns_bool): New function.
* config/avr/avr.h (FLOAT_LIB_COMPARE_RETURNS_BOOL): New macro.





[PING^2][patch][avr] PR92606: Disable -fipa-icf-variables because it generates wrong code.

2020-01-06 Thread Georg-Johann Lay

Ping #2.

Hi, this patch turns off -fipa-icf-variables because it generates wrong 
code like for PR92606.  As there is no target hook that could decide 
whether such optimizations are obsolete, disable such optimizations 
alltogether until PR92932 (target hook to disable such optimizations 
depending on object attributes and address-spcace) is available.


Ok to apply?

Johann


Work around PR ipa/92932 by disabling -fipa-icf-variables until
PR92932 will have been solved.

PR ipa/92932
PR target/92606
* common/config/avr/avr-common.c (avr_option_optimization_table)
<-fipa-icf-variables>: Disable.






[PING^2][patch][avr] New option -nodevicespecs to omit -specs=... in self specs.

2020-01-06 Thread Georg-Johann Lay

Ping #2

Hi, currently device support in avr-gcc is accomplished by injecting a 
specs file my means of -specs=... in dirver self specs.


This patch adds a new avr driver option to omit the addition of 
respective -specs option so give the user more freedom.


Ok to apply?

Johann

* config/avr/avr.opt (-nodevicespecs): New driver option.
* config/avr/driver-avr.c (avr_devicespecs_file): Only issue
"-specs=device-specs/..." if that option is not set.
* doc/invoke.texi (AVR Options) <-nodevicespecs>: Document.






[Ping^2][patch,avr, 2/3] Support 64-bit (long) double: The libgcc changes.

2020-01-06 Thread Georg-Johann Lay

Ping #2

Georg-Johann Lay schrieb:

Am 16.12.19 um 17:40 schrieb Georg-Johann Lay:

Patch 2/3 is the libgcc additions:

--with-libf7 selects which makefile-snips from libf7 to use.

libgcc/
* config.host (tmake_file) [target=avr]: Add t-libf7,
t-libf7-math, t-libf7-math-symbols as specified by --with-libf7=.
* config/avr/t-avrlibc: Don't copy libgcc.a if there are modules
depending on sizeof (double) or sizeof (long double).
* config/avr/libf7: New folder.



Index: libgcc/config.host
===
--- libgcc/config.host  (revision 278552)
+++ libgcc/config.host  (working copy)
@@ -514,6 +514,29 @@ arm*-*-eabi* | arm*-*-symbianelf* | arm*
 avr-*-*)
# Make HImode functions for AVR
tmake_file="${cpu_type}/t-avr t-fpbit"
+   # Make some DFmode functions from libf7, part of avr-libgcc.
+   # This must be prior to adding t-avrlibc.
+   case "y${with_libf7}" in
+   yno)
+   # No libf7 support.
+   ;;
+   ylibgcc)
+   tmake_file="$tmake_file ${cpu_type}/libf7/t-libf7"
+   ;;
+   ymath)
+   tmake_file="$tmake_file ${cpu_type}/libf7/t-libf7-math"
+   tmake_file="$tmake_file ${cpu_type}/libf7/t-libf7"
+   ;;
+   ymath-symbols | yyes | y)
+   tmake_file="$tmake_file ${cpu_type}/libf7/t-libf7-math-symbols"
+   tmake_file="$tmake_file ${cpu_type}/libf7/t-libf7-math"
+   tmake_file="$tmake_file ${cpu_type}/libf7/t-libf7"
+   ;;
+   *)
+   echo "Error: --with-libf7=${with_libf7} but can only be used 
with: 'libgcc', 'math', 'math-symbols', 'yes', 'no'" 1>&2
+   exit 1
+   ;;
+   esac
if test x${with_avrlibc} != xno; then
tmake_file="$tmake_file ${cpu_type}/t-avrlibc"
fi
Index: libgcc/config/avr/t-avrlibc
===
--- libgcc/config/avr/t-avrlibc (revision 278992)
+++ libgcc/config/avr/t-avrlibc (working copy)
@@ -65,6 +65,12 @@ LIB2FUNCS_EXCLUDE += \
_fixunssfdi \
_floatdisf _floatundisf
 
+ifeq (,$(WITH_LIBF7_MATH_SYMBOLS))
+
+# No modules depend on __SIZEOF_LONG_DOUBLE__ or __SIZEOF_DOUBLE__
+# which means we might have an opportunity to copy libgcc.a.
+# WITH_LIBF7_MATH_SYMBOLS is set by libf7/t-libf7-math-symbols.
+
 ifneq (,$(findstring avr,$(MULTISUBDIR)))
 
 # We are not in the avr2 (default) subdir, hence copying will work.
@@ -95,3 +101,4 @@ Makefile: t-copy-libgcc.dep
 
 endif
 endif
+endif


[Ping^2][patch,avr, 0/3] Support 64-bit (long) double.

2020-01-06 Thread Georg-Johann Lay

Ping #2

Georg-Johann Lay schrieb:
Now that the avr backend can support 64-bit floats by means of 
configure-options --with-double= and --with-long-double=, this patch 
series adds some routines to support it.


It's an ad-hoc, avr-specific implementation in assembly and GNU-C which 
is added as a new subfolder in libgcc/config/avr/libf7.


Patch 1/3 is the GCC changes: Documentation and new avr-specific 
configure options:


--with-libf7 selects to which level double support from libf7 is added 
to libgcc.


--with-double-comparison select what FLOAT_LIB_COMPARE_RETURNS_BOOL 
returns.


I wrote the libf7 code from scratch and put it under GPL v3 + library 
exception, so it should be no problem to have it as part of libgcc.


Patch 2/3 is the libgcc additions:

--with-libf7 selects which makefile-snips from libf7 to use.

Patch 3/3 is the actual libf7 implementation.  A great deal of which is 
assembly, together with C + inline assembly for higher routines.


Ok for trunk?

Johann





[Ping^2][patch,avr, 1/3] Support 64-bit (long) double: The gcc part.

2020-01-06 Thread Georg-Johann Lay

Ping #2

Georg-Johann Lay schrieb:

Am 16.12.19 um 17:40 schrieb Georg-Johann Lay:
Patch 1/3 is the GCC changes: Documentation and new avr-specific
configure options:

--with-libf7 selects to which level double support from libf7 is added
to libgcc.

--with-double-comparison select what FLOAT_LIB_COMPARE_RETURNS_BOOL
returns.

Johann

gcc/
* config.gcc (tm_defines) [target=avr]: Support --with-libf7,
--with-double-comparison.
* doc/install.texi: Document them.
* config/avr/avr-c.c (avr_cpu_cpp_builtins)

: New built-in defines.
* doc/invoke.texi (AVR Built-in Macros): Document them.
* config/avr/avr-protos.h (avr_float_lib_compare_returns_bool): New.
* config/avr/avr.c (avr_float_lib_compare_returns_bool): New function.
* config/avr/avr.h (FLOAT_LIB_COMPARE_RETURNS_BOOL): New macro.



Index: gcc/config/avr/avr-c.c
===
--- gcc/config/avr/avr-c.c  (revision 278667)
+++ gcc/config/avr/avr-c.c  (working copy)
@@ -390,6 +390,20 @@ start address.  This macro shall be used
   cpp_define (pfile, "__WITH_AVRLIBC__");
 #endif /* WITH_AVRLIBC */
 
+  // From configure --with-libf7={|libgcc|math|math-symbols|yes|no}
+
+#ifdef WITH_LIBF7_LIBGCC
+  cpp_define (pfile, "__WITH_LIBF7_LIBGCC__");
+#endif /* WITH_LIBF7_LIBGCC */
+
+#ifdef WITH_LIBF7_MATH
+  cpp_define (pfile, "__WITH_LIBF7_MATH__");
+#endif /* WITH_LIBF7_MATH */
+
+#ifdef WITH_LIBF7_MATH_SYMBOLS
+  cpp_define (pfile, "__WITH_LIBF7_MATH_SYMBOLS__");
+#endif /* WITH_LIBF7_MATH_SYMBOLS */
+
   // From configure --with-double={|32|32,64|64,32|64}
 
 #ifdef HAVE_DOUBLE_MULTILIB
@@ -438,7 +452,23 @@ start address.  This macro shall be used
 #error "align this with config.gcc"
 #endif
 
-  
+  // From configure --with-double-comparison={2|3} --with-libf7.
+
+#if defined (WITH_DOUBLE_COMPARISON)
+#if WITH_DOUBLE_COMPARISON == 2 || WITH_DOUBLE_COMPARISON == 3
+  /* The number of states a DFmode comparison libcall might take and
+ reflects what avr.c:FLOAT_LIB_COMPARE_RETURNS_BOOL returns for
+ DFmode.  GCC's default is 3-state, but some libraries like libf7
+ implement true / false (2-state).  */
+  cpp_define_formatted (pfile, "__WITH_DOUBLE_COMPARISON__=%d",
+   WITH_DOUBLE_COMPARISON);
+#else
+#error "align this with config.gcc"
+#endif
+#else
+#error "align this with config.gcc"
+#endif
+
   /* Define builtin macros so that the user can easily query whether
  non-generic address spaces (and which) are supported or not.
  This is only supported for C.  For C++, a language extension is needed
Index: gcc/config/avr/avr-protos.h
===
--- gcc/config/avr/avr-protos.h (revision 278667)
+++ gcc/config/avr/avr-protos.h (working copy)
@@ -128,6 +128,8 @@ extern bool avr_xload_libgcc_p (machine_
 extern rtx avr_eval_addr_attrib (rtx x);
 extern bool avr_casei_sequence_check_operands (rtx *xop);
 
+extern bool avr_float_lib_compare_returns_bool (machine_mode, enum rtx_code);
+
 static inline unsigned
 regmask (machine_mode mode, unsigned regno)
 {
Index: gcc/config/avr/avr.c
===
--- gcc/config/avr/avr.c(revision 278667)
+++ gcc/config/avr/avr.c(working copy)
@@ -14575,6 +14575,23 @@ avr_fold_builtin (tree fndecl, int n_arg
   return NULL_TREE;
 }
 
+
+/* Worker function for `FLOAT_LIB_COMPARE_RETURNS_BOOL'.  */
+
+bool
+avr_float_lib_compare_returns_bool (machine_mode mode, enum rtx_code)
+{
+  if (mode == DFmode)
+{
+#if WITH_DOUBLE_COMPARISON == 2
+  return true;
+#endif
+}
+
+  // This is the GCC default and also what AVR-LibC implements.
+  return false;
+}
+
 
 
 /* Initialize the GCC target structure.  */
Index: gcc/config/avr/avr.h
===
--- gcc/config/avr/avr.h(revision 278667)
+++ gcc/config/avr/avr.h(working copy)
@@ -107,6 +107,9 @@ These two properties are reflected by bu
 #define BYTES_BIG_ENDIAN 0
 #define WORDS_BIG_ENDIAN 0
 
+#define FLOAT_LIB_COMPARE_RETURNS_BOOL(mode, comparison) \
+  avr_float_lib_compare_returns_bool (mode, comparison)
+
 #ifdef IN_LIBGCC2
 /* This is to get correct SI and DI modes in libgcc2.c (32 and 64 bits).  */
 #define UNITS_PER_WORD 4
Index: gcc/config.gcc
===
--- gcc/config.gcc  (revision 278552)
+++ gcc/config.gcc  (working copy)
@@ -1303,6 +1303,46 @@ avr-*-*)
tm_file="${tm_file} ${cpu_type}/avrlibc.h"
tm_defines="${tm_defines} WITH_AVRLIBC"
fi
+   # Work out avr_double_comparison which is 2 or 3 and is used in
+   # target hook FLOAT_LIB_COMPARE_RETURNS_BOOL to determine whether
+   # DFmode compa

Re: [patch,avr, 1/3] Support 64-bit (long) double: The gcc part.

2020-01-06 Thread Georg-Johann Lay

Jeff Law schrieb:

On Mon, 2019-12-16 at 17:43 +0100, Georg-Johann Lay wrote:

Am 16.12.19 um 17:40 schrieb Georg-Johann Lay:
Patch 1/3 is the GCC changes: Documentation and new avr-specific
configure options:

--with-libf7 selects to which level double support from libf7 is added
to libgcc.

--with-double-comparison select what FLOAT_LIB_COMPARE_RETURNS_BOOL
returns.

Johann

gcc/
* config.gcc (tm_defines) [target=avr]: Support --with-libf7,
--with-double-comparison.
* doc/install.texi: Document them.
* config/avr/avr-c.c (avr_cpu_cpp_builtins)

: New built-in defines.
* doc/invoke.texi (AVR Built-in Macros): Document them.
* config/avr/avr-protos.h (avr_float_lib_compare_returns_bool): New.
* config/avr/avr.c (avr_float_lib_compare_returns_bool): New function.
* config/avr/avr.h (FLOAT_LIB_COMPARE_RETURNS_BOOL): New macro.
I'd suggest "--with-libf77" rather than "--with-libf7".  


jeff



Why that second 7?

I called it "libf7" because the internal _f_loat representation has a 
mantissa of 7 bytes.  So libf56 would also be indicative.


Actually the name does not matter very much... It just needs to have 
/some/ name.  "f77" however puts it close to Fortran, but that code has 
absolutely nothing to do with Fortran whatsoever.


Johann

p.s. You know anything about Denis Chertykov? He used to reply to avr 
patches within hours, but my latest patches (after a long period where I 
didn't propose any patches) where all approved by you; not a single mail 
from Denis.  So I am concerned if he's well.  Maybe he left you or some 
other global maintainer a note? He's still listed as maintainer though.




Re: [patch][avr] PR92606: Disable -fipa-icf-variables because it generates wrong code.

2020-01-06 Thread Georg-Johann Lay

Jeff Law schrieb:

On Wed, 2019-12-18 at 16:30 +0100, Georg-Johann Lay wrote:
Hi, this patch turns off -fipa-icf-variables because it generates wrong 
code like for PR92606.  As there is no target hook that could decide 
whether such optimizations are obsolete, disable such optimizations 
alltogether until PR92932 (target hook to disable such optimizations 
depending on object attributes and address-spcace) is available.


Ok to apply?

Johann


Work around PR ipa/92932 by disabling -fipa-icf-variables until
PR92932 will have been solved.

PR ipa/92932
PR target/92606
* common/config/avr/avr-common.c (avr_option_optimization_table)
<-fipa-icf-variables>: Disable.

This seems backwards to me.  Instead of disabling the optimization in
the target files we should prevent the optimization from firing in
cases where it can't reasonably work.

Jeff


The chances that this will be fixed are... tiny.  As Andrew notes in a 
comment to PR92932, there are at least 2 other PRs that report 
wrong-code due to similar data optimization.  He mentions different 
passes however.


Whatever passes perform such wrong-code transforms, apart from more 
conservative approach they will need a new target hook to properly fix 
PR92606 because target attributes / address spaces are involved.


I'd highly appreciate correct code, even if it's at the expense of (yah, 
yet another) hack in the avr backend.  In particular, because such 
optimizations will improve code only a tiny little bit -- if at all. 
Hence kicking out the culprit does not reduce code quality, also because 
 IF such merging is legitimate, some cases can be catched by the linker 
with, say -fmerge-all-constants.


If PR92932, PR92294, PR954666 will ever be fixed, I'd gladly remove the 
proposed 1-line disable-culprit-hack and implement the new target hook 
that PR92932 is supposed to bring.


Johann



[patch,avr,applied]: Set -fsplit-wide-types-early.

2020-01-09 Thread Georg-Johann Lay
This patch sets -fsplit-wide-types-early for avr as it appears that the 
old placement of that pass gives better code for that target.


Applied as r380033.

Johann

* common/config/avr/avr-common.c (avr_option_optimization_table)
[OPT_LEVELS_1_PLUS]: Set -fsplit-wide-types-early.

Index: common/config/avr/avr-common.c
===
--- common/config/avr/avr-common.c
+++ common/config/avr/avr-common.c
@@ -40,6 +40,8 @@ static const struct default_options avr_
 { OPT_LEVELS_ALL, OPT_fcaller_saves, NULL, 0 },
 { OPT_LEVELS_1_PLUS_NOT_DEBUG, OPT_mgas_isr_prologues, NULL, 1 },
 { OPT_LEVELS_1_PLUS, OPT_mmain_is_OS_task, NULL, 1 },
+// Stick to the "old" placement of the subreg lowering pass.
+{ OPT_LEVELS_1_PLUS, OPT_fsplit_wide_types_early, NULL, 1 },
 /* Allow optimizer to introduce store data races. This used to be the
default -- it was changed because bigger targets did not see any
performance decrease. For the AVR though, disallowing data races


[patch,avr,applied] Adjust help messages.

2024-03-01 Thread Georg-Johann Lay

This patch unifies help screen messages.

Johann

--

AVR: Overhaul help screen

gcc/
* config/avr/avr.opt: Overhaul help screen.diff --git a/gcc/config/avr/avr.opt b/gcc/config/avr/avr.opt
index ea35b7d5b4e..c3ca8379ee3 100644
--- a/gcc/config/avr/avr.opt
+++ b/gcc/config/avr/avr.opt
@@ -20,27 +20,27 @@
 
 mcall-prologues
 Target Mask(CALL_PROLOGUES) Optimization
-Use subroutines for function prologues and epilogues.
+Optimization. Use subroutines for function prologues and epilogues.
 
 mmcu=
 Target RejectNegative Joined Var(avr_mmcu) MissingArgError(missing device or architecture after %qs)
--mmcu=MCU	Select the target MCU.
+-mmcu=	Select the target MCU.
 
 mgas-isr-prologues
 Target Var(avr_gasisr_prologues) UInteger Init(0) Optimization
-Allow usage of __gcc_isr pseudo instructions in ISR prologues and epilogues.
+Optimization. Allow usage of __gcc_isr pseudo instructions in ISR prologues and epilogues.
 
 mn-flash=
 Target RejectNegative Joined Var(avr_n_flash) UInteger Init(-1)
-Set the number of 64 KiB flash segments.
+This option is used internally. Set the number of 64 KiB flash segments.
 
 mskip-bug
 Target Mask(SKIP_BUG)
-Indicate presence of a processor erratum.
+This option is used internally. Indicate presence of a processor erratum.  Do not skip 32-bit instructions.
 
 mrmw
 Target Mask(RMW)
-Enable Read-Modify-Write (RMW) instructions support/use.
+This option is used internally. Enable Read-Modify-Write (RMW) instructions support/use.
 
 mdeb
 Target Undocumented Mask(ALL_DEBUG)
@@ -50,7 +50,7 @@ Target RejectNegative Joined Undocumented Var(avr_log_details)
 
 mshort-calls
 Target RejectNegative Mask(SHORT_CALLS)
-Use RJMP / RCALL even though CALL / JMP are available.
+This option is used internally for multilib generation and selection.  Assume RJMP / RCALL can target all program memory.
 
 mint8
 Target Mask(INT8)
@@ -62,11 +62,11 @@ Change the stack pointer without disabling interrupts.
 
 mbranch-cost=
 Target Joined RejectNegative UInteger Var(avr_branch_cost) Init(0) Optimization
-Set the branch costs for conditional branch instructions.  Reasonable values are small, non-negative integers.  The default branch cost is 0.
+-mbranch-cost=	Optimization. Set the branch costs for conditional branch instructions.  Reasonable values are small, non-negative integers.  The default branch cost is 0.
 
 mmain-is-OS_task
 Target Mask(MAIN_IS_OS_TASK) Optimization
-Treat main as if it had attribute OS_task.
+Optimization. Treat main as if it had attribute OS_task.
 
 morder1
 Target Undocumented Mask(ORDER_1)
@@ -80,7 +80,7 @@ Change only the low 8 bits of the stack pointer.
 
 mrelax
 Target Optimization
-Relax branches.
+Optimization. Relax branches.
 
 mpmem-wrap-around
 Target
@@ -88,15 +88,15 @@ Make the linker relaxation machine assume that a program counter wrap-around occ
 
 maccumulate-args
 Target Mask(ACCUMULATE_OUTGOING_ARGS) Optimization
-Accumulate outgoing function arguments and acquire/release the needed stack space for outgoing function arguments in function prologue/epilogue.  Without this option, outgoing arguments are pushed before calling a function and popped afterwards.  This option can lead to reduced code size for functions that call many functions that get their arguments on the stack like, for example printf.
+Optimization. Accumulate outgoing function arguments and acquire/release the needed stack space for outgoing function arguments in function prologue/epilogue.  Without this option, outgoing arguments are pushed before calling a function and popped afterwards.  This option can lead to reduced code size for functions that call many functions that get their arguments on the stack like, for example printf.
 
 mstrict-X
 Target Var(avr_strict_X) Init(0) Optimization
-When accessing RAM, use X as imposed by the hardware, i.e. just use pre-decrement, post-increment and indirect addressing with the X register.  Without this option, the compiler may assume that there is an addressing mode X+const similar to Y+const and Z+const and emit instructions to emulate such an addressing mode for X.
+Optimization. When accessing RAM, use X as imposed by the hardware, i.e. just use pre-decrement, post-increment and indirect addressing with the X register.  Without this option, the compiler may assume that there is an addressing mode X+const similar to Y+const and Z+const and emit instructions to emulate such an addressing mode for X.
 
 mflmap
 Target Var(avr_flmap) Init(0)
-The device has the bitfield NVMCTRL_CTRLB.FLMAP.  This option is used internally.
+This option is used internally. The device has the bitfield NVMCTRL_CTRLB.FLMAP.
 
 mrodata-in-ram
 Target Var(avr_rodata_in_ram) Init(-1)
@@ -105,15 +105,15 @@ The device has the .rodata section located in the RAM area.
 ;; For rationale behind -msp8 see explanation in avr.h.
 msp8
 Target RejectNegative Var(avr_sp8) Init(0)
-The device has no SPH special function register. This option will be overridden by the compile

[patch,avr,applied] Take into account -mtiny-stack in frame pointer adjustments

2024-03-02 Thread Georg-Johann Lay

Applied this addendum to avr PR114100:

When the frame pointer is adjusted and -mtiny-stack is set,
then it is enough to adjust the low part of the frame pointer.

Johann

--

AVR: target/114100 - Factor in -mtiny-stack in frame pointer adjustments

gcc/
PR target/114100
* config/avr/avr.cc (avr_out_plus_1) [-mtiny-stack]: Only adjust
the low part of the frame pointer with 8-bit stack pointer.


diff --git a/gcc/config/avr/avr.cc b/gcc/config/avr/avr.cc
index 94ef7c591a9..d39d6707c97 100644
--- a/gcc/config/avr/avr.cc
+++ b/gcc/config/avr/avr.cc
@@ -8983,14 +8983,17 @@ avr_out_plus_1 (rtx *xop, int *plen, enum 
rtx_code code, int *pcc,

  && frame_pointer_needed
  && REGNO (xop[0]) == FRAME_POINTER_REGNUM)
{
- rtx xval16 = simplify_gen_subreg (HImode, xval, imode, i);
- if (xval16 == const1_rtx || xval16 == constm1_rtx)
+ if (AVR_HAVE_8BIT_SP)
+   {
+ avr_asm_len ("subi %A0,%n2", xop, plen, 1);
+ return;
+   }
+ else if (xop[2] == const1_rtx || xop[2] == constm1_rtx)
{
- avr_asm_len ((code == PLUS) == (xval16 == const1_rtx)
+ avr_asm_len (xop[2] == const1_rtx
   ? "ld __tmp_reg__,%a0+"
   : "ld __tmp_reg__,-%a0", xop, plen, 1);
- i++;
- continue;
+ return;
}
}



[patch,avr,applied] Avoid magic numbers for register numbers.

2024-03-02 Thread Georg-Johann Lay

There are some places where avr.cc uses magic numbers like 17 that
are actually register numbers.  This patch defines constants like
REG_17 and uses them instead of the magic numbers when a register
number is meant.

Johann

--

AVR: Use REG_ constants instead of magic numbers .

There are some places where avr.cc uses magic numbers like 17 that
are actually register numbers.  This patch defines constants like
REG_17 and uses them instead of the magic numbers when a register
number is meant.

gcc/
* config/avr/avr.md (REG_0, ... REG_36): New define_constants.
* config/avr/avr.cc: Use them instead of magic numbers when it
means a register number.
diff --git a/gcc/config/avr/avr.cc b/gcc/config/avr/avr.cc
index e312ddfbff4..5c71c7f8c0d 100644
--- a/gcc/config/avr/avr.cc
+++ b/gcc/config/avr/avr.cc
@@ -171,10 +171,10 @@ static bool avr_rtx_costs (rtx, machine_mode, int, int, int *, bool);
 
 
 /* Allocate registers from r25 to r8 for parameters for function calls.  */
-#define FIRST_CUM_REG 26
+#define FIRST_CUM_REG REG_26
 
 /* Last call saved register */
-#define LAST_CALLEE_SAVED_REG (AVR_TINY ? 19 : 17)
+#define LAST_CALLEE_SAVED_REG (AVR_TINY ? REG_19 : REG_17)
 
 /* Implicit target register of LPM instruction (R0) */
 extern GTY(()) rtx lpm_reg_rtx;
@@ -197,8 +197,8 @@ extern GTY(()) rtx cc_reg_rtx;
 rtx cc_reg_rtx;
 
 /* RTXs for all general purpose registers as QImode */
-extern GTY(()) rtx all_regs_rtx[32];
-rtx all_regs_rtx[32];
+extern GTY(()) rtx all_regs_rtx[REG_32];
+rtx all_regs_rtx[REG_32];
 
 /* SREG, the processor status */
 extern GTY(()) rtx sreg_rtx;
@@ -542,7 +542,7 @@ avr_casei_sequence_check_operands (rtx *xop)
 
   if (AVR_HAVE_EIJMP_EICALL
   // The last clobber op of the tablejump.
-  && xop[8] == all_regs_rtx[24])
+  && xop[8] == all_regs_rtx[REG_24])
 {
   // $6 is: (subreg:SI ($5) 0)
   sub_5 = xop[6];
@@ -1171,7 +1171,7 @@ avr_init_machine_status (void)
 void
 avr_init_expanders (void)
 {
-  for (int regno = 0; regno < 32; regno ++)
+  for (int regno = REG_0; regno < REG_32; regno ++)
 all_regs_rtx[regno] = gen_rtx_REG (QImode, regno);
 
   lpm_reg_rtx  = all_regs_rtx[LPM_REGNO];
@@ -1549,7 +1549,7 @@ avr_regs_to_save (HARD_REG_SET *set)
   || cfun->machine->is_OS_main)
 return 0;
 
-  for (int reg = 0; reg < 32; reg++)
+  for (int reg = REG_0; reg < REG_32; reg++)
 {
   /* Do not push/pop __tmp_reg__, __zero_reg__, as well as
 	 any global register variables.  */
@@ -2300,9 +2300,9 @@ avr_pass_fuse_add::execute (function *func)
 
   FOR_EACH_BB_FN (bb, func)
 {
-  Ldi_Insn prev_ldi_insns[32];
-  Add_Insn prev_add_insns[32];
-  Mem_Insn prev_mem_insns[32];
+  Ldi_Insn prev_ldi_insns[REG_32];
+  Add_Insn prev_add_insns[REG_32];
+  Mem_Insn prev_mem_insns[REG_32];
   rtx_insn *insn, *curr;
 
   avr_dump ("\n;; basic block %d\n\n", bb->index);
@@ -2484,7 +2484,7 @@ avr_incoming_return_addr_rtx (void)
 static int
 avr_hregs_split_reg (HARD_REG_SET *set)
 {
-  for (int regno = 0; regno < 32; regno++)
+  for (int regno = REG_0; regno < REG_32; regno++)
 if (TEST_HARD_REG_BIT (*set, regno))
   {
 	// Don't remove a register from *SET which might indicate that
@@ -2620,9 +2620,9 @@ avr_prologue_setup_frame (HOST_WIDE_INT size, HARD_REG_SET set)
 
   first_reg = (LAST_CALLEE_SAVED_REG + 1) - (live_seq - 2);
 
-  for (reg = 29, offset = -live_seq + 1;
+  for (reg = REG_29, offset = -live_seq + 1;
 	   reg >= first_reg;
-	   reg = (reg == 28 ? LAST_CALLEE_SAVED_REG : reg - 1), ++offset)
+	   reg = (reg == REG_28 ? LAST_CALLEE_SAVED_REG : reg - 1), ++offset)
 	{
 	  rtx m, r;
 
@@ -2636,7 +2636,7 @@ avr_prologue_setup_frame (HOST_WIDE_INT size, HARD_REG_SET set)
 }
   else /* !minimize */
 {
-  for (int reg = 0; reg < 32; ++reg)
+  for (int reg = REG_0; reg < REG_32; ++reg)
 	if (TEST_HARD_REG_BIT (set, reg))
 	  emit_push_byte (reg, true);
 
@@ -3795,7 +3795,7 @@ avr_print_operand (FILE *file, rtx x, int code)
 {
   if (x == zero_reg_rtx)
 	fprintf (file, "__zero_reg__");
-  else if (code == 'r' && REGNO (x) < 32)
+  else if (code == 'r' && REGNO (x) < REG_32)
 	fprintf (file, "%d", (int) REGNO (x));
   else
 	fprintf (file, "%s", reg_names[REGNO (x) + abcd]);
@@ -4136,7 +4136,9 @@ avr_asm_final_postscan_insn (FILE *stream, rtx_insn *insn, rtx *, int)
 int
 avr_function_arg_regno_p (int r)
 {
-  return AVR_TINY ? IN_RANGE (r, 20, 25) : IN_RANGE (r, 8, 25);
+  return AVR_TINY
+? IN_RANGE (r, REG_20, REG_25)
+: IN_RANGE (r, REG_8, REG_25);
 }
 
 
@@ -4148,7 +4150,7 @@ void
 avr_init_cumulative_args (CUMULATIVE_ARGS *cum, tree fntype, rtx libname,
 			  tree fndecl ATTRIBUTE_UNUSED)
 {
-  cum->nregs = AVR_TINY ? 6 : 18;
+  cum->nregs = 1 + AVR_TINY ? REG_25 - REG_20 : REG_25 - REG_8;
   cum->regno = FIRST_CUM_REG;
   cum->has_stack_args = 0;
   if (!libname && stdarg_p (fntype))
@@ -4216,7 +4218,7 @@ avr_function_arg_advance (

[patch,avr,applied] ad target/92729: Remove last cc0 remains.

2024-03-03 Thread Georg-Johann Lay

Removed the last cc0 remains.

Johann

--

AVR: ad target/92792 - Remove insn attribute "cc" and its (dead) uses.

The backend has remains of cc0 condition code.  Unfortunately,
all that information is useless with CCmode, and their use was
removed with the removal of NOTICE_UPDATE_CC in PR92729 with
r12-226 and r12-327.

gcc/
PR target/92729
* config/avr/avr.md (define_attr "cc"): Remove.
* config/avr/avr-protos.h (avr_out_plus): Remove pcc argument
from prototype.
* config/avr/avr.cc (avr_out_plus_1): Remove pcc argument and
its uses.  Add insn argument.
(avr_out_plus_symbol): Remove pcc argument and its uses.
(avr_out_plus): Remove pcc argument and its uses.
Adjust calls of avr_out_plus_symbol and avr_out_plus_1.
(avr_out_round): Adjust call of avr_out_plus.diff --git a/gcc/config/avr/avr-protos.h b/gcc/config/avr/avr-protos.h
index 064a3d23322..f4f3ffd8f28 100644
--- a/gcc/config/avr/avr-protos.h
+++ b/gcc/config/avr/avr-protos.h
@@ -93,7 +93,7 @@ extern bool avr_split_tiny_move (rtx_insn *insn, rtx *operands);
 extern void avr_output_addr_vec (rtx_insn*, rtx);
 extern const char *avr_out_sbxx_branch (rtx_insn *insn, rtx operands[]);
 extern const char* avr_out_bitop (rtx, rtx*, int*);
-extern const char* avr_out_plus (rtx, rtx*, int* =NULL, int* =NULL, bool =true);
+extern const char* avr_out_plus (rtx, rtx*, int* =NULL, bool =true);
 extern const char* avr_out_round (rtx_insn *, rtx*, int* =NULL);
 extern const char* avr_out_addto_sp (rtx*, int*);
 extern const char* avr_out_xload (rtx_insn *, rtx*, int*);
diff --git a/gcc/config/avr/avr.cc b/gcc/config/avr/avr.cc
index b86f4313fe2..44d6e141b62 100644
--- a/gcc/config/avr/avr.cc
+++ b/gcc/config/avr/avr.cc
@@ -8799,6 +8799,7 @@ lshrsi3_out (rtx_insn *insn, rtx operands[], int *len)
 
 
 /* Output addition of register XOP[0] and compile time constant XOP[2].
+   INSN is a single_set insn or an insn pattern.
CODE == PLUS:  perform addition by using ADD instructions or
CODE == MINUS: perform addition by using SUB instructions:
 
@@ -8811,7 +8812,6 @@ lshrsi3_out (rtx_insn *insn, rtx operands[], int *len)
If PLEN == NULL, print assembler instructions to perform the operation;
otherwise, set *PLEN to the length of the instruction sequence (in words)
printed with PLEN == NULL.  XOP[3] is an 8-bit scratch register or NULL_RTX.
-   Set *PCC to effect on cc0 according to respective CC_* insn attribute.
 
CODE_SAT == UNKNOWN: Perform ordinary, non-saturating operation.
CODE_SAT != UNKNOWN: Perform operation and saturate according to CODE_SAT.
@@ -8825,7 +8825,7 @@ lshrsi3_out (rtx_insn *insn, rtx operands[], int *len)
fixed-point rounding, cf. `avr_out_round'.  */
 
 static void
-avr_out_plus_1 (rtx *xop, int *plen, enum rtx_code code, int *pcc,
+avr_out_plus_1 (rtx /*insn*/, rtx *xop, int *plen, enum rtx_code code,
 		enum rtx_code code_sat, int sign, bool out_label)
 {
   /* MODE of the operation.  */
@@ -8861,8 +8861,6 @@ avr_out_plus_1 (rtx *xop, int *plen, enum rtx_code code, int *pcc,
 
   if (REG_P (xop[2]))
 {
-  *pcc = MINUS == code ? (int) CC_SET_CZN : (int) CC_CLOBBER;
-
   for (int i = 0; i < n_bytes; i++)
 	{
 	  /* We operate byte-wise on the destination.  */
@@ -,21 +8886,13 @@ avr_out_plus_1 (rtx *xop, int *plen, enum rtx_code code, int *pcc,
   goto saturate;
 }
 
-  /* Except in the case of ADIW with 16-bit register (see below)
- addition does not set cc0 in a usable way.  */
-
-  *pcc = (MINUS == code) ? CC_SET_CZN : CC_CLOBBER;
-
   if (CONST_FIXED_P (xval))
 xval = avr_to_int_mode (xval);
 
   /* Adding/Subtracting zero is a no-op.  */
 
   if (xval == const0_rtx)
-{
-  *pcc = CC_NONE;
-  return;
-}
+return;
 
   if (MINUS == code)
 xval = simplify_unary_operation (NEG, imode, xval, imode);
@@ -8947,9 +8937,6 @@ avr_out_plus_1 (rtx *xop, int *plen, enum rtx_code code, int *pcc,
 
   /* To get usable cc0 no low-bytes must have been skipped.  */
 
-  if (i && !started)
-	*pcc = CC_CLOBBER;
-
   if (!started
 	  && i % 2 == 0
 	  && i + 2 <= n_bytes
@@ -8968,9 +8955,6 @@ avr_out_plus_1 (rtx *xop, int *plen, enum rtx_code code, int *pcc,
 		  started = true;
 		  avr_asm_len (code == PLUS ? "adiw %0,%1" : "sbiw %0,%1",
 			   op, plen, 1);
-
-		  if (n_bytes == 2 && PLUS == code)
-		*pcc = CC_SET_CZN;
 		}
 
 	  i++;
@@ -9018,7 +9002,6 @@ avr_out_plus_1 (rtx *xop, int *plen, enum rtx_code code, int *pcc,
 	{
 	  avr_asm_len ((code == PLUS) ^ (val8 == 1) ? "dec %0" : "inc %0",
 		   op, plen, 1);
-	  *pcc = CC_CLOBBER;
 	  break;
 	}
 
@@ -9077,8 +9060,6 @@ avr_out_plus_1 (rtx *xop, int *plen, enum rtx_code code, int *pcc,
   if (UNKNOWN == code_sat)
 return;
 
-  *pcc = (int) CC_CLOBBER;
-
   /* Vanilla addition/subtraction is done.  We are left with saturation.
 
  We have to compute  A = A  B  where  A  is a register and
@@ -9298,7 +9279,7 

[avr,patch,applied] ad target/114100 - Don't print unused frame pointer adjustments.

2024-03-03 Thread Georg-Johann Lay

This addendum ports a corner case optimization from -mno-fuse-add
to -mfuse-add:  When a base register needs temporal adjustment,
and the base is the frame pointer, then there are cases where the
post-adjustment is not needed.

Passes without new regressions on ATtiny40.

Johann

--

AVR: ad target/114100 - Don't print unused frame pointer adjustments.

Without -mfuse-add, when fake reg+offset addressing is used, the
output routines are saving some instructions when the base reg
is unused after.  This patch adds that optimization for the case
when the base is the frame pointer and the frame pointer adjustments
are split away from the move insn by -mfuse-add in .split2.
   Direct usage of reg_unused_after is not possible because that
function looks at the destination of the current insn, which won't
work for offsetting the frame pointer in printing PLUS code.
It can use an extended version of _reg_unused_after though.

gcc/
PR target/114100
* config/avr/avr-protos.h (_reg_unused_after): Remove proto.
* config/avr/avr.cc (_reg_unused_after): Make static.  And
add 3rd argument to skip the current insn.
(reg_unused_after): Adjust call of reg_unused_after.
(avr_out_plus_1) [AVR_TINY && -mfuse-add >= 2]: Don't output
unneeded frame pointer adjustments.diff --git a/gcc/config/avr/avr-protos.h b/gcc/config/avr/avr-protos.h
index f4f3ffd8f28..3e19409d636 100644
--- a/gcc/config/avr/avr-protos.h
+++ b/gcc/config/avr/avr-protos.h
@@ -110,7 +110,6 @@ extern const char* avr_out_reload_inpsi (rtx*, rtx, int*);
 extern const char* avr_out_lpm (rtx_insn *, rtx*, int*);
 extern void avr_notice_update_cc (rtx body, rtx_insn *insn);
 extern int reg_unused_after (rtx_insn *insn, rtx reg);
-extern int _reg_unused_after (rtx_insn *insn, rtx reg);
 extern int avr_jump_mode (rtx x, rtx_insn *insn);
 extern int test_hard_reg_class (enum reg_class rclass, rtx x);
 extern int jump_over_one_insn_p (rtx_insn *insn, rtx dest);
diff --git a/gcc/config/avr/avr.cc b/gcc/config/avr/avr.cc
index 44d6e141b62..7df21432dda 100644
--- a/gcc/config/avr/avr.cc
+++ b/gcc/config/avr/avr.cc
@@ -163,6 +163,7 @@ static int avr_operand_rtx_cost (rtx, machine_mode, enum rtx_code,
  int, bool);
 static void output_reload_in_const (rtx *, rtx, int *, bool);
 static struct machine_function *avr_init_machine_status (void);
+static int _reg_unused_after (rtx_insn *insn, rtx reg, bool look_at_insn);
 
 
 /* Prototypes for hook implementors if needed before their implementation.  */
@@ -8825,7 +8826,7 @@ lshrsi3_out (rtx_insn *insn, rtx operands[], int *len)
fixed-point rounding, cf. `avr_out_round'.  */
 
 static void
-avr_out_plus_1 (rtx /*insn*/, rtx *xop, int *plen, enum rtx_code code,
+avr_out_plus_1 (rtx insn, rtx *xop, int *plen, enum rtx_code code,
 		enum rtx_code code_sat, int sign, bool out_label)
 {
   /* MODE of the operation.  */
@@ -8973,6 +8974,10 @@ avr_out_plus_1 (rtx /*insn*/, rtx *xop, int *plen, enum rtx_code code,
 	  && frame_pointer_needed
 	  && REGNO (xop[0]) == FRAME_POINTER_REGNUM)
 	{
+	  if (INSN_P (insn)
+	  && _reg_unused_after (as_a  (insn), xop[0], false))
+	return;
+
 	  if (AVR_HAVE_8BIT_SP)
 	{
 	  avr_asm_len ("subi %A0,%n2", xop, plen, 1);
@@ -10818,31 +10823,32 @@ int
 reg_unused_after (rtx_insn *insn, rtx reg)
 {
   return (dead_or_set_p (insn, reg)
-	  || (REG_P (reg) && _reg_unused_after (insn, reg)));
+	  || (REG_P (reg) && _reg_unused_after (insn, reg, true)));
 }
 
-/* Return nonzero if REG is not used after INSN.
+/* A helper for the previous function.
+   Return nonzero if REG is not used after INSN.
We assume REG is a reload reg, and therefore does
not live past labels.  It may live past calls or jumps though.  */
 
 int
-_reg_unused_after (rtx_insn *insn, rtx reg)
+_reg_unused_after (rtx_insn *insn, rtx reg, bool look_at_insn)
 {
-  enum rtx_code code;
-  rtx set;
-
-  /* If the reg is set by this instruction, then it is safe for our
- case.  Disregard the case where this is a store to memory, since
- we are checking a register used in the store address.  */
-  set = single_set (insn);
-  if (set && !MEM_P (SET_DEST (set))
-  && reg_overlap_mentioned_p (reg, SET_DEST (set)))
-return 1;
+  if (look_at_insn)
+{
+  /* If the reg is set by this instruction, then it is safe for our
+	 case.  Disregard the case where this is a store to memory, since
+	 we are checking a register used in the store address.  */
+  rtx set = single_set (insn);
+  if (set && !MEM_P (SET_DEST (set))
+	  && reg_overlap_mentioned_p (reg, SET_DEST (set)))
+	return 1;
+}
 
   while ((insn = NEXT_INSN (insn)))
 {
   rtx set;
-  code = GET_CODE (insn);
+  enum rtx_code code = GET_CODE (insn);
 
 #if 0
   /* If this is a label that existed before reload, then the register


[patch,avr,applied] Use more C++ ish coding style.

2024-03-03 Thread Georg-Johann Lay

This is a no-op patch that uses some more C++ / C99
features if possible.

Johann

--

AVR: Use more C++ ish coding style.

gcc/
* config/avr/avr.cc: Resolve ATTRIBUTE_UNUSED.
Use bool in place of int for boolean logic (if possible).
Move declarations to definitions (if possible).
* config/avr/avr.md: Use C++ comments.  Fix some indentation 
glitches.

* config/avr/avr-dimode.md: Same.
* config/avr/constraints.md: Same.
* config/avr/predicates.md: Same.diff --git a/gcc/config/avr/avr-dimode.md b/gcc/config/avr/avr-dimode.md
index 6fcabdaaf6e..4b74e77e5e5 100644
--- a/gcc/config/avr/avr-dimode.md
+++ b/gcc/config/avr/avr-dimode.md
@@ -43,8 +43,8 @@
 ;; so that no DImode insn contains pseudos or needs reloading.
 
 (define_constants
-  [(ACC_A	18)
-   (ACC_B	10)])
+  [(ACC_A  18)
+   (ACC_B  10)])
 
 ;; Supported modes that are 8 bytes wide
 (define_mode_iterator ALL8 [DI DQ UDQ DA UDA TA UTA])
diff --git a/gcc/config/avr/avr.cc b/gcc/config/avr/avr.cc
index 7df21432dda..c8b2b504e3f 100644
--- a/gcc/config/avr/avr.cc
+++ b/gcc/config/avr/avr.cc
@@ -163,7 +163,7 @@ static int avr_operand_rtx_cost (rtx, machine_mode, enum rtx_code,
  int, bool);
 static void output_reload_in_const (rtx *, rtx, int *, bool);
 static struct machine_function *avr_init_machine_status (void);
-static int _reg_unused_after (rtx_insn *insn, rtx reg, bool look_at_insn);
+static bool _reg_unused_after (rtx_insn *insn, rtx reg, bool look_at_insn);
 
 
 /* Prototypes for hook implementors if needed before their implementation.  */
@@ -648,8 +648,6 @@ avr_optimize_casesi (rtx_insn *insns[5], rtx *xop)
 
   start_sequence();
 
-  rtx_insn *seq1, *seq2, *last1, *last2;
-
   rtx reg = copy_to_mode_reg (mode, xop[10]);
 
   rtx (*gen_add)(rtx,rtx,rtx) = QImode == mode ? gen_addqi3 : gen_addhi3;
@@ -665,8 +663,8 @@ avr_optimize_casesi (rtx_insn *insns[5], rtx *xop)
   JUMP_LABEL (cbranch) = xop[4];
   ++LABEL_NUSES (xop[4]);
 
-  seq1 = get_insns();
-  last1 = get_last_insn();
+  rtx_insn *seq1 = get_insns();
+  rtx_insn *last1 = get_last_insn();
   end_sequence();
 
   emit_insn_after (seq1, insns[2]);
@@ -686,8 +684,8 @@ avr_optimize_casesi (rtx_insn *insns[5], rtx *xop)
 
   emit_insn (pat_4);
 
-  seq2 = get_insns();
-  last2 = get_last_insn();
+  rtx_insn *seq2 = get_insns();
+  rtx_insn *last2 = get_last_insn();
   end_sequence();
 
   emit_insn_after (seq2, insns[3]);
@@ -1309,7 +1307,7 @@ avr_mem_memx_p (rtx x)
 /* A helper for the subsequent function attribute used to dig for
attribute 'name' in a FUNCTION_DECL or FUNCTION_TYPE */
 
-static inline int
+static inline bool
 avr_lookup_function_attribute1 (const_tree func, const char *name)
 {
   if (FUNCTION_DECL == TREE_CODE (func))
@@ -1329,7 +1327,7 @@ avr_lookup_function_attribute1 (const_tree func, const char *name)
 
 /* Return nonzero if FUNC is a naked function.  */
 
-static int
+static bool
 avr_naked_function_p (tree func)
 {
   return avr_lookup_function_attribute1 (func, "naked");
@@ -1338,7 +1336,7 @@ avr_naked_function_p (tree func)
 /* Return nonzero if FUNC is an interrupt function as specified
by the "interrupt" attribute.  */
 
-static int
+static bool
 avr_interrupt_function_p (tree func)
 {
   return avr_lookup_function_attribute1 (func, "interrupt");
@@ -1347,7 +1345,7 @@ avr_interrupt_function_p (tree func)
 /* Return nonzero if FUNC is a signal function as specified
by the "signal" attribute.  */
 
-static int
+static bool
 avr_signal_function_p (tree func)
 {
   return avr_lookup_function_attribute1 (func, "signal");
@@ -1355,7 +1353,7 @@ avr_signal_function_p (tree func)
 
 /* Return nonzero if FUNC is an OS_task function.  */
 
-static int
+static bool
 avr_OS_task_function_p (tree func)
 {
   return avr_lookup_function_attribute1 (func, "OS_task");
@@ -1363,7 +1361,7 @@ avr_OS_task_function_p (tree func)
 
 /* Return nonzero if FUNC is an OS_main function.  */
 
-static int
+static bool
 avr_OS_main_function_p (tree func)
 {
   return avr_lookup_function_attribute1 (func, "OS_main");
@@ -1373,7 +1371,7 @@ avr_OS_main_function_p (tree func)
 /* Return nonzero if FUNC is a no_gccisr function as specified
by the "no_gccisr" attribute.  */
 
-static int
+static bool
 avr_no_gccisr_function_p (tree func)
 {
   return avr_lookup_function_attribute1 (func, "no_gccisr");
@@ -1536,12 +1534,11 @@ avr_starting_frame_offset (void)
 static int
 avr_regs_to_save (HARD_REG_SET *set)
 {
-  int count;
+  int count = 0;
   int int_or_sig_p = cfun->machine->is_interrupt || cfun->machine->is_signal;
 
   if (set)
 CLEAR_HARD_REG_SET (*set);
-  count = 0;
 
   /* No need to save any registers if the function never returns or
  has the "OS_task" or "OS_main" attribute.  */
@@ -1589,7 +1586,7 @@ avr_allocate_stack_slots_for_args (void)
 /* Return true if register FROM can be eliminated via register TO.  */
 
 static bool
-avr_can_eliminate (const int from ATTRIBUTE_UNUSED, const int to)
+avr_can_el

[patch,avr,applied] Improve output of insn "*insv.any_shift.".

2024-03-05 Thread Georg-Johann Lay

Applied Roger's proposed improvements with some changes:

Lengthy code is more convenient in avr.cc than in an insn
output function, and it makes it easy to work out the exact
instruction length.  Moreover, the code can handle shifts
with offset zero (cases of *and3 insns).

Passed with no new regressions on ATmega128.

Applied as https://gcc.gnu.org/r14-9317

Johann

--

AVR: Improve output of insn "*insv.any_shift._split".

The instructions printed by insn "*insv.any_shift._split" were
sub-optimal.  The code to print the improved output is lengthy and
performed by new function avr_out_insv.  As it turns out, the function
can also handle shift-offsets of zero, which is "*andhi3", "*andpsi3"
and "*andsi3".  Thus, these tree insns get a new 3-operand alternative
where the 3rd operand is an exact power of 2.

gcc/
* config/avr/avr-protos.h (avr_out_insv): New proto.
* config/avr/avr.cc (avr_out_insv): New function.
(avr_adjust_insn_length) [ADJUST_LEN_INSV]: Handle case.
(avr_cbranch_cost) [ZERO_EXTRACT]: Adjust rtx costs.
* config/avr/avr.md (define_attr "adjust_len") Add insv.
(andhi3, *andhi3, andpsi3, *andpsi3, andsi3, *andsi3):
Add constraint alternative where the 3rd operand is a power
of 2, and the source register may differ from the destination.
(*insv.any_shift._split): Call avr_out_insv to output
instructions.  Set attr "length" to "insv".
* config/avr/constraints.md (Cb2, Cb3, Cb4): New constraints.

gcc/testsuite/
* gcc.target/avr/torture/insv-anyshift-hi.c: New test.
* gcc.target/avr/torture/insv-anyshift-si.c: New test.
commit 49a1a340ea0eef681f23b6861f3cdb6840aadd99
Author: Roger Sayle 
Date:   Tue Mar 5 11:06:17 2024 +0100

AVR: Improve output of insn "*insv.any_shift._split".

The instructions printed by insn "*insv.any_shift._split" were
sub-optimal.  The code to print the improved output is lengthy and
performed by new function avr_out_insv.  As it turns out, the function
can also handle shift-offsets of zero, which is "*andhi3", "*andpsi3"
and "*andsi3".  Thus, these tree insns get a new 3-operand alternative
where the 3rd operand is an exact power of 2.

gcc/
* config/avr/avr-protos.h (avr_out_insv): New proto.
* config/avr/avr.cc (avr_out_insv): New function.
(avr_adjust_insn_length) [ADJUST_LEN_INSV]: Handle case.
(avr_cbranch_cost) [ZERO_EXTRACT]: Adjust rtx costs.
* config/avr/avr.md (define_attr "adjust_len") Add insv.
(andhi3, *andhi3, andpsi3, *andpsi3, andsi3, *andsi3):
Add constraint alternative where the 3rd operand is a power
of 2, and the source register may differ from the destination.
(*insv.any_shift._split): Call avr_out_insv to output
instructions.  Set attr "length" to "insv".
* config/avr/constraints.md (Cb2, Cb3, Cb4): New constraints.

gcc/testsuite/
* gcc.target/avr/torture/insv-anyshift-hi.c: New test.
* gcc.target/avr/torture/insv-anyshift-si.c: New test.

diff --git a/gcc/config/avr/avr-protos.h b/gcc/config/avr/avr-protos.h
index 3e19409d636..bb680312117 100644
--- a/gcc/config/avr/avr-protos.h
+++ b/gcc/config/avr/avr-protos.h
@@ -58,6 +58,7 @@ extern const char *ret_cond_branch (rtx x, int len, int reverse);
 extern const char *avr_out_movpsi (rtx_insn *, rtx*, int*);
 extern const char *avr_out_sign_extend (rtx_insn *, rtx*, int*);
 extern const char *avr_out_insert_notbit (rtx_insn *, rtx*, int*);
+extern const char *avr_out_insv (rtx_insn *, rtx*, int*);
 extern const char *avr_out_extr (rtx_insn *, rtx*, int*);
 extern const char *avr_out_extr_not (rtx_insn *, rtx*, int*);
 extern const char *avr_out_plus_set_ZN (rtx*, int*);
diff --git a/gcc/config/avr/avr.cc b/gcc/config/avr/avr.cc
index c8b2b504e3f..36995e05cbe 100644
--- a/gcc/config/avr/avr.cc
+++ b/gcc/config/avr/avr.cc
@@ -9795,6 +9795,178 @@ avr_out_insert_notbit (rtx_insn *insn, rtx op[], int *plen)
 }
 
 
+/* Output instructions for  XOP[0] = (XOP[1]  XOP[2]) & XOP[3]  where
+   -  XOP[0] and XOP[1] have the same mode which is one of: QI, HI, PSI, SI.
+   -  XOP[3] is an exact const_int power of 2.
+   -  XOP[2] and XOP[3] are const_int.
+   -   is any of: ASHIFT, LSHIFTRT, ASHIFTRT.
+   -  The result depends on XOP[1].
+   or  XOP[0] = XOP[1] & XOP[2]  where
+   -  XOP[0] and XOP[1] have the same mode which is one of: HI, PSI, SI.
+   -  XOP[2] is an exact const_int power of 2.
+   Returns "".
+   PLEN != 0: Set *PLEN to the code length in words.  Don't output anything.
+   PLEN == 0: Output instructions.  */
+
+const char*
+avr_out_insv (rtx_insn *insn, rtx xop[], int *plen)
+{
+  machine_mode mode = GET_MODE (xop[0]);
+  int n_bytes = GET_MODE_SIZE (mode);
+  rtx xsrc = SET_SRC (single_set (insn));
+
+  gcc_assert (AND == GET_CODE (xsrc));
+
+  rtx xop2 = xop[2];
+  rtx xop3 = xop[3];
+
+  

[patch,avr,applied] Add two RTL peepholes.

2024-03-05 Thread Georg-Johann Lay

Register alloc may expand a 3-operand arithmetic X = Y o CST as
   X = CST
   X o= Y
where it may be better to instead:
   X = Y
   X o= CST

Johann

--

AVR: Add two RTL peepholes.

Register alloc may expand a 3-operand arithmetic X = Y o CST as
   X = CST
   X o= Y
where it may be better to instead:
   X = Y
   X o= CST
because 1) the first insn may use MOVW for "X = Y", and 2) the
operation may be more efficient when performed with a constant,
for example when ADIW or SBIW can be used, or some bytes of
the constant are 0x00 or 0xff.

gcc/
* config/avr/avr.md: Add two RTL peepholes for PLUS, IOR and AND
in HI, PSI, SI that swap operation order from "X = CST, X o= Y"
to "X = Y, X o= CST".diff --git a/gcc/config/avr/avr.md b/gcc/config/avr/avr.md
index 6bdf4682fab..bc8a59c956c 100644
--- a/gcc/config/avr/avr.md
+++ b/gcc/config/avr/avr.md
@@ -932,6 +932,55 @@ (define_peephole2 ; movw_r
 operands[5] = gen_rtx_REG (HImode, REGNO (operands[3]));
   })
 
+
+;; Register alloc may expand a 3-operand arithmetic X = Y o CST as
+;;X = CST
+;;X o= Y
+;; where it may be better to instead:
+;;X = Y
+;;X o= CST
+;; because 1) the first insn may use MOVW for "X = Y", and 2) the
+;; operation may be more efficient when performed with a constant,
+;; for example when ADIW or SBIW can be used, or some bytes of
+;; the constant are 0x00 or 0xff.
+(define_peephole2
+  [(parallel [(set (match_operand:HISI 0 "d_register_operand")
+   (match_operand:HISI 1 "const_int_operand"))
+  (clobber (reg:CC REG_CC))])
+   (parallel [(set (match_dup 0)
+   (piaop:HISI (match_dup 0)
+   (match_operand:HISI 2 "register_operand")))
+  (clobber (scratch:QI))
+  (clobber (reg:CC REG_CC))])]
+  "! reg_overlap_mentioned_p (operands[0], operands[2])"
+  [(parallel [(set (match_dup 0)
+   (match_dup 2))
+  (clobber (reg:CC REG_CC))])
+   (parallel [(set (match_dup 0)
+   (piaop:HISI (match_dup 0)
+   (match_dup 1)))
+  (clobber (scratch:QI))
+  (clobber (reg:CC REG_CC))])])
+
+;; Same, but just for plus:HI without a scratch:QI.
+(define_peephole2
+  [(parallel [(set (match_operand:HI 0 "d_register_operand")
+   (match_operand:HI 1 "const_int_operand"))
+  (clobber (reg:CC REG_CC))])
+   (parallel [(set (match_dup 0)
+   (plus:HI (match_dup 0)
+(match_operand:HI 2 "register_operand")))
+  (clobber (reg:CC REG_CC))])]
+  "! reg_overlap_mentioned_p (operands[0], operands[2])"
+  [(parallel [(set (match_dup 0)
+   (match_dup 2))
+  (clobber (reg:CC REG_CC))])
+   (parallel [(set (match_dup 0)
+   (plus:HI (match_dup 0)
+(match_dup 1)))
+  (clobber (reg:CC REG_CC))])])
+
+
 ;; For LPM loads from AS1 we split
 ;;R = *Z
 ;; to
@@ -1644,9 +1693,9 @@ (define_insn_and_split "*addhi3_sp"
   [(set_attr "length" "6")
(set_attr "adjust_len" "addto_sp")])
 
-;; "*addhi3"
-;; "*addhq3" "*adduhq3"
-;; "*addha3" "*adduha3"
+;; "*addhi3_split"
+;; "*addhq3_split"  "*adduhq3_split"
+;; "*addha3_split"  "*adduha3_split"
 (define_insn_and_split "*add3_split"
   [(set (match_operand:ALL2 0 "register_operand"   "=??r,d,!w,d")
 (plus:ALL2 (match_operand:ALL2 1 "register_operand"  "%0,0,0 ,0")
@@ -1661,6 +1710,9 @@ (define_insn_and_split "*add3_split"
   ""
   [(set_attr "isa" "*,*,adiw,*")])
 
+;; "*addhi3"
+;; "*addhq3"  "*adduhq3"
+;; "*addha3"  "*adduha3"
 (define_insn "*add3"
   [(set (match_operand:ALL2 0 "register_operand"   "=??r,d,!w,d")
 (plus:ALL2 (match_operand:ALL2 1 "register_operand"  "%0,0,0 ,0")
@@ -1732,6 +1784,9 @@ (define_insn_and_split "add3_clobber"
   (clobber (match_dup 3))
   (clobber (reg:CC REG_CC))])])
 
+;; "*addhi3_clobber"
+;; "*addhq3_clobber"  "*adduhq3_clobber"
+;; "*addha3_clobber"  "*adduha3_clobber"
 (define_insn "*add3_clobber"
   [(set (match_operand:ALL2 0 "register_operand""=!w,d,r")
 (plus:ALL2 (match_operand:ALL2 1 "register_operand"  "%0,0,0")


[patch,avr.applied] Adjusted rtx costs of plus + zero_extend

2024-03-06 Thread Georg-Johann Lay

Adjusted rtx costs of (plus (zero_extend (...)) reg).

Johann

--

AVR: Adjust rtx cost of plus + zero_extend.

gcc/
* config/avr/avr.cc (avr_rtx_costs_1) [PLUS+ZERO_EXTEND]: Adjust
rtx cost.

diff --git a/gcc/config/avr/avr.cc b/gcc/config/avr/avr.cc
index 36995e05cbe..b87ae6a256d 100644
--- a/gcc/config/avr/avr.cc
+++ b/gcc/config/avr/avr.cc
@@ -12513,6 +12513,13 @@ avr_rtx_costs_1 (rtx x, machine_mode mode, int 
outer_code,

   return true;

 case PLUS:
+  if (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
+ && REG_P (XEXP (x, 1)))
+   {
+ *total = COSTS_N_INSNS (GET_MODE_SIZE (mode) - 1);
+ return true;
+   }
+
   switch (mode)
{
case E_QImode:


[patch,avr,applied] Add an insn combine pattern for offset computation.

2024-03-08 Thread Georg-Johann Lay

Computing  uint16_t += 2 * uint8_t  can occur when an offset
into a 16-bit array is computed.  Without this pattern is costs
six instructions: A move (1), a zero-extend (1), a shift (2) and
an addition (2).  With this pattern it costs 4.

Johann

--

AVR: Add an insn combine pattern for offset computation.

Computing  uint16_t += 2 * uint8_t  can occur when an offset
into a 16-bit array is computed.  Without this pattern is costs
six instructions: A move (1), a zero-extend (1), a shift (2) and
an addition (2).  With this pattern it costs 4.

gcc/
* config/avr/avr.md (*addhi3_zero_extend.ashift1): New pattern.
* config/avr/avr.cc (avr_rtx_costs_1) [PLUS]: Compute its cost.diff --git a/gcc/config/avr/avr.cc b/gcc/config/avr/avr.cc
index b87ae6a256d..1fa4b557f5d 100644
--- a/gcc/config/avr/avr.cc
+++ b/gcc/config/avr/avr.cc
@@ -12513,6 +12513,17 @@ avr_rtx_costs_1 (rtx x, machine_mode mode, int outer_code,
   return true;
 
 case PLUS:
+  // uint16_t += 2 * uint8_t;
+  if (mode == HImode
+	  && GET_CODE (XEXP (x, 0)) == ASHIFT
+	  && REG_P (XEXP (x, 1))
+	  && XEXP (XEXP (x, 0), 1) == const1_rtx
+	  && GET_CODE (XEXP (XEXP (x, 0), 0)) == ZERO_EXTEND)
+	{
+	  *total = COSTS_N_INSNS (4);
+	  return true;
+	}
+
   if (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
 	  && REG_P (XEXP (x, 1)))
 	{
diff --git a/gcc/config/avr/avr.md b/gcc/config/avr/avr.md
index bc8a59c956c..52b6cff4a8b 100644
--- a/gcc/config/avr/avr.md
+++ b/gcc/config/avr/avr.md
@@ -1630,6 +1630,39 @@ (define_insn "*addhi3_zero_extend.const"
   "subi %A0,%n2\;sbc %B0,%B0"
   [(set_attr "length" "2")])
 
+
+;; Occurs when computing offsets into 16-bit arrays.
+;; Saves up to 2 instructions.
+(define_insn_and_split "*addhi3_zero_extend.ashift1.split"
+  [(set (match_operand:HI 0 "register_operand""=r")
+(plus:HI (ashift:HI (zero_extend:HI (match_operand:QI 1 "register_operand" "r"))
+(const_int 1))
+ (match_operand:HI 2 "register_operand""0")))]
+  ""
+  "#"
+  "&& reload_completed"
+  [(parallel [(set (match_dup 0)
+   (plus:HI (ashift:HI (zero_extend:HI (match_dup 1))
+   (const_int 1))
+(match_dup 2)))
+  (clobber (reg:CC REG_CC))])])
+
+(define_insn "*addhi3_zero_extend.ashift1"
+  [(set (match_operand:HI 0 "register_operand""=r")
+(plus:HI (ashift:HI (zero_extend:HI (match_operand:QI 1 "register_operand" "r"))
+(const_int 1))
+ (match_operand:HI 2 "register_operand""0")))
+   (clobber (reg:CC REG_CC))]
+  "reload_completed"
+  {
+return reg_overlap_mentioned_p (operands[1], operands[0])
+  ? "mov __tmp_reg__,%1\;add %A0,__tmp_reg__\;adc %B0,__zero_reg__\;add %A0,__tmp_reg__\;adc %B0,__zero_reg__"
+  : "add %A0,%1\;adc %B0,__zero_reg__\;add %A0,%1\;adc %B0,__zero_reg__";
+  }
+  [(set (attr "length")
+(symbol_ref ("4 + reg_overlap_mentioned_p (operands[1], operands[0])")))])
+
+
 (define_insn_and_split "*usum_widenqihi3_split"
   [(set (match_operand:HI 0 "register_operand"  "=r")
 (plus:HI (zero_extend:HI (match_operand:QI 1 "register_operand"  "0"))


[patch,avr,applied] Add some more cost computation

2024-03-09 Thread Georg-Johann Lay

This adds cost computation for some insn combiner patterns
and improves a few other nits.

Johann

--

AVR: Add cost computation for some insn combine patterns.

gcc/
* config/avr/avr.cc (avr_rtx_costs_1) [PLUS]: Determine cost for
usum_widenqihi and add_zero_extend1.
[MINUS]: Determine costs for udiff_widenqihi, sub+zero_extend,
sub+sign_extend.
* config/avr/avr.md (*addhi3.sign_extend1, *subhi3.sign_extend2):
Compute exact insn lengths.
(*usum_widenqihi3): Allow input operands to commute.diff --git a/gcc/config/avr/avr.cc b/gcc/config/avr/avr.cc
index 1fa4b557f5d..00fce8da15f 100644
--- a/gcc/config/avr/avr.cc
+++ b/gcc/config/avr/avr.cc
@@ -12524,10 +12524,25 @@ avr_rtx_costs_1 (rtx x, machine_mode mode, int outer_code,
 	  return true;
 	}
 
+  // *usum_widenqihi
+  if (mode == HImode
+	  && GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
+	  && GET_CODE (XEXP (x, 1)) == ZERO_EXTEND)
+	{
+	  *total = COSTS_N_INSNS (3);
+	  return true;
+	}
+
   if (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
 	  && REG_P (XEXP (x, 1)))
 	{
-	  *total = COSTS_N_INSNS (GET_MODE_SIZE (mode) - 1);
+	  *total = COSTS_N_INSNS (GET_MODE_SIZE (mode));
+	  return true;
+	}
+  if (REG_P (XEXP (x, 0))
+	  && GET_CODE (XEXP (x, 1)) == ZERO_EXTEND)
+	{
+	  *total = COSTS_N_INSNS (GET_MODE_SIZE (mode));
 	  return true;
 	}
 
@@ -12610,6 +12625,29 @@ avr_rtx_costs_1 (rtx x, machine_mode mode, int outer_code,
   return true;
 
 case MINUS:
+  // *udiff_widenqihi
+  if (mode == HImode
+	  && GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
+	  && GET_CODE (XEXP (x, 1)) == ZERO_EXTEND)
+	{
+	  *total = COSTS_N_INSNS (2);
+	  return true;
+	}
+  // *sub3_zero_extend1
+  if (REG_P (XEXP (x, 0))
+	  && GET_CODE (XEXP (x, 1)) == ZERO_EXTEND)
+	{
+	  *total = COSTS_N_INSNS (GET_MODE_SIZE (mode));
+	  return true;
+	}
+  // *sub3.sign_extend2
+  if (REG_P (XEXP (x, 0))
+	  && GET_CODE (XEXP (x, 1)) == SIGN_EXTEND)
+	{
+	  *total = COSTS_N_INSNS (2 + GET_MODE_SIZE (mode));
+	  return true;
+	}
+
   if (AVR_HAVE_MUL
 	  && QImode == mode
 	  && register_operand (XEXP (x, 0), QImode)
diff --git a/gcc/config/avr/avr.md b/gcc/config/avr/avr.md
index 52b6cff4a8b..59ec724f7da 100644
--- a/gcc/config/avr/avr.md
+++ b/gcc/config/avr/avr.md
@@ -1588,12 +1588,10 @@ (define_insn_and_split "*addhi3.sign_extend1_split"
   ""
   "#"
   "&& reload_completed"
-  [(parallel
-  [(set (match_dup 0)
-(plus:HI
-  (sign_extend:HI (match_dup 1))
-  (match_dup 2)))
-   (clobber (reg:CC REG_CC))])])
+  [(parallel [(set (match_dup 0)
+   (plus:HI (sign_extend:HI (match_dup 1))
+(match_dup 2)))
+  (clobber (reg:CC REG_CC))])])
 
 
 (define_insn "*addhi3.sign_extend1"
@@ -1607,7 +1605,8 @@ (define_insn "*addhi3.sign_extend1"
   ? "mov __tmp_reg__,%1\;add %A0,%1\;adc %B0,__zero_reg__\;sbrc __tmp_reg__,7\;dec %B0"
   : "add %A0,%1\;adc %B0,__zero_reg__\;sbrc %1,7\;dec %B0";
   }
-  [(set_attr "length" "5")])
+  [(set (attr "length")
+(symbol_ref ("4 + reg_overlap_mentioned_p (operands[0], operands[1])")))])
 
 (define_insn_and_split "*addhi3_zero_extend.const_split"
   [(set (match_operand:HI 0 "register_operand" "=d")
@@ -1665,7 +1664,7 @@ (define_insn "*addhi3_zero_extend.ashift1"
 
 (define_insn_and_split "*usum_widenqihi3_split"
   [(set (match_operand:HI 0 "register_operand"  "=r")
-(plus:HI (zero_extend:HI (match_operand:QI 1 "register_operand"  "0"))
+(plus:HI (zero_extend:HI (match_operand:QI 1 "register_operand" "%0"))
  (zero_extend:HI (match_operand:QI 2 "register_operand"  "r"]
   ""
   "#"
@@ -1678,7 +1677,7 @@ (define_insn_and_split "*usum_widenqihi3_split"
 
 (define_insn "*usum_widenqihi3"
   [(set (match_operand:HI 0 "register_operand"  "=r")
-(plus:HI (zero_extend:HI (match_operand:QI 1 "register_operand"  "0"))
+(plus:HI (zero_extend:HI (match_operand:QI 1 "register_operand" "%0"))
  (zero_extend:HI (match_operand:QI 2 "register_operand"  "r"
(clobber (reg:CC REG_CC))]
   "reload_completed"
@@ -2186,7 +2185,8 @@ (define_insn "*subhi3.sign_extend2"
   ? "mov __tmp_reg__,%2\;sub %A0,%2\;sbc %B0,__zero_reg__\;sbrc __tmp_reg__,7\;inc %B0"
   : "sub %A0,%2\;sbc %B0,__zero_reg__\;sbrc %2,7\;inc %B0";
   }
-  [(set_attr "length" "5")])
+  [(set (attr "length")
+(symbol_ref ("4 + reg_overlap_mentioned_p (operands[0], operands[2])")))])
 
 ;; "subsi3"
 ;; "subsq3" "subusq3"


[patch,avr,applied] Tweak xor insn constraints

2024-03-18 Thread Georg-Johann Lay

xor insn allows some more values without the requirement
of a scratch register.  This patch adds new constraint
alternative for such values.  The output function avr_out_bitop
already handles these cases, so no change is needed there.

Johann

--

avr.md - Tweak xor insn constraints.

xor insn can handle some more values without the requirement of a
scratch register.  This patch adds a new constraint alternative for
such values.  The output function avr_out_bitop already handles
these cases, so no change is needed there.

gcc/
* config/avr/constraints.md (CX2, CX3, CX4): New constraints.
* config/avr/avr-protos.h (avr_xor_noclobber_dconst): New proto.
* config/avr/avr.cc (avr_xor_noclobber_dconst): New function.
* config/avr/avr.md (xorhi3, *xorhi3): Add "d,0,CX2,X" alternative.
(xorpsi3, *xorpsi3): Add "d,0,CX3,X" alternative.
(xorsi3, *xorsi3): Add "d,0,CX4,X" alternative.diff --git a/gcc/config/avr/avr-protos.h b/gcc/config/avr/avr-protos.h
index bb680312117..dc23cfbf461 100644
--- a/gcc/config/avr/avr-protos.h
+++ b/gcc/config/avr/avr-protos.h
@@ -101,6 +101,7 @@ extern const char* avr_out_xload (rtx_insn *, rtx*, int*);
 extern const char* avr_out_cpymem (rtx_insn *, rtx*, int*);
 extern const char* avr_out_insert_bits (rtx*, int*);
 extern bool avr_popcount_each_byte (rtx, int, int);
+extern bool avr_xor_noclobber_dconst (rtx, int);
 extern bool avr_has_nibble_0xf (rtx);
 
 extern int extra_constraint_Q (rtx x);
diff --git a/gcc/config/avr/avr.cc b/gcc/config/avr/avr.cc
index 00fce8da15f..12c59668b4c 100644
--- a/gcc/config/avr/avr.cc
+++ b/gcc/config/avr/avr.cc
@@ -281,6 +281,31 @@ avr_popcount_each_byte (rtx xval, int n_bytes, int pop_mask)
 }
 
 
+/* Constraint helper function.  XVAL is a CONST_INT.  Return true if we
+   can perform XOR without a clobber reg, provided the operation is on
+   a d-register.  This means each byte is in { 0, 0xff, 0x80 }.  */
+
+bool
+avr_xor_noclobber_dconst (rtx xval, int n_bytes)
+{
+  machine_mode mode = GET_MODE (xval);
+
+  if (VOIDmode == mode)
+mode = SImode;
+
+  for (int i = 0; i < n_bytes; ++i)
+{
+  rtx xval8 = simplify_gen_subreg (QImode, xval, mode, i);
+  unsigned int val8 = UINTVAL (xval8) & GET_MODE_MASK (QImode);
+
+  if (val8 != 0 && val8 != 0xff && val8 != 0x80)
+	return false;
+}
+
+  return true;
+}
+
+
 /* Access some RTX as INT_MODE.  If X is a CONST_FIXED we can get
the bit representation of X by "casting" it to CONST_INT.  */
 
diff --git a/gcc/config/avr/avr.md b/gcc/config/avr/avr.md
index bc408633eb5..97f42be7729 100644
--- a/gcc/config/avr/avr.md
+++ b/gcc/config/avr/avr.md
@@ -4741,10 +4741,10 @@ (define_insn "*xorqi3"
   [(set_attr "length" "1")])
 
 (define_insn_and_split "xorhi3"
-  [(set (match_operand:HI 0 "register_operand"   "=??r,r  ,r")
-(xor:HI (match_operand:HI 1 "register_operand" "%0,0  ,0")
-(match_operand:HI 2 "nonmemory_operand" "r,Cx2,n")))
-   (clobber (match_scratch:QI 3"=X,X  ,&d"))]
+  [(set (match_operand:HI 0 "register_operand"   "=??r,r  ,d  ,r")
+(xor:HI (match_operand:HI 1 "register_operand" "%0,0  ,0  ,0")
+(match_operand:HI 2 "nonmemory_operand" "r,Cx2,CX2,n")))
+   (clobber (match_scratch:QI 3"=X,X  ,X  ,&d"))]
   ""
   "#"
   "&& reload_completed"
@@ -4755,10 +4755,10 @@ (define_insn_and_split "xorhi3"
   (clobber (reg:CC REG_CC))])])
 
 (define_insn "*xorhi3"
-  [(set (match_operand:HI 0 "register_operand"   "=??r,r  ,r")
-(xor:HI (match_operand:HI 1 "register_operand" "%0,0  ,0")
-(match_operand:HI 2 "nonmemory_operand" "r,Cx2,n")))
-   (clobber (match_scratch:QI 3"=X,X  ,&d"))
+  [(set (match_operand:HI 0 "register_operand"   "=??r,r  ,d  ,r")
+(xor:HI (match_operand:HI 1 "register_operand" "%0,0  ,0  ,0")
+(match_operand:HI 2 "nonmemory_operand" "r,Cx2,CX2,n")))
+   (clobber (match_scratch:QI 3"=X,X  ,X  ,&d"))
(clobber (reg:CC REG_CC))]
   "reload_completed"
   {
@@ -4767,14 +4767,14 @@ (define_insn "*xorhi3"
 
 return avr_out_bitop (insn, operands, NULL);
   }
-  [(set_attr "length" "2,2,4")
-   (set_attr "adjust_len" "*,out_bitop,out_bitop")])
+  [(set_attr "length" "2,2,4,4")
+   (set_attr "adjust_len" "*,out_bitop,out_bitop,out_bitop")])
 
 (define_insn_and_split "xorpsi3"
-  [(set (match_operand:PSI 0 "register_operand""=??r,r  ,r")
-(xor:PSI (match_operand:PSI 1 "register_operand" "%0,0  ,0")
- (match_operand:PSI 2 "nonmemory_operand" "r,Cx3,n")))
-   (clobber (match_scratch:QI 3  "=X,X  ,&d"))]
+  [(set (match_operand:PSI 0 "register_operand""=??r,r  ,d  ,r")
+(xor:PSI (match_operand:PSI 1 "register_operand" "%0,0  ,0  ,0")
+ (match_operand:PSI 2 "nonmemory_operand" "r,Cx3,CX3,n")))
+   (clobber (match_scratch

[patch,avr,applied] Adjust message for SIGNAL and INTERRUPT usage

2024-03-22 Thread Georg-Johann Lay

Applied this patchlet for a more precise diagnostic.

Johann

--

AVR: Adjust message for SIGNAL and INTERRUPT usage

gcc/
* config/avr/avr.cc (avr_set_current_function): Adjust diagnostic
for deprecated SIGNAL and INTERRUPT usage without respective header.

diff --git a/gcc/config/avr/avr.cc b/gcc/config/avr/avr.cc
index 12c59668b4c..4a5a921107b 100644
--- a/gcc/config/avr/avr.cc
+++ b/gcc/config/avr/avr.cc
@@ -1495,14 +1495,20 @@ avr_set_current_function (tree decl)
   // Common problem is using "ISR" without first including 
avr/interrupt.h.

   const char *name = IDENTIFIER_POINTER (DECL_NAME (decl));
   name = default_strip_name_encoding (name);
-  if (strcmp ("ISR", name) == 0
-  || strcmp ("INTERRUPT", name) == 0
-  || strcmp ("SIGNAL", name) == 0)
+  if (strcmp ("ISR", name) == 0)
 {
   warning_at (loc, OPT_Wmisspelled_isr, "%qs is a reserved identifier"
  " in AVR-LibC.  Consider %<#include %>"
  " before using the %qs macro", name, name);
 }
+  if (strcmp ("INTERRUPT", name) == 0
+  || strcmp ("SIGNAL", name) == 0)
+{
+  warning_at (loc, OPT_Wmisspelled_isr, "%qs is a deprecated 
identifier"

+ " in AVR-LibC.  Consider %<#include %>"
+ " or %<#include %>"
+ " before using the %qs macro", name, name);
+}
 #endif // AVR-LibC naming conventions

   /* Don't print the above diagnostics more than once.  */


[patch,avr,applied]: Rename %_misc specs.

2024-02-08 Thread Georg-Johann Lay

This renames pecs like cc1_misc to cc1_rodata_in_ram to
point out their purpose.

Johann

--

AVR: Rename device-specs %_misc to %_rodata_in_ram.

gcc/
* config/avr/gen-avr-mmcu-specs.cc: Rename spec cc1_misc to
cc1_rodata_in_ram.  Rename spec link_misc to link_rodata_in_ram.
Remove spec asm_misc.
* config/avr/specs.h: Same.
diff --git a/gcc/config/avr/gen-avr-mmcu-specs.cc b/gcc/config/avr/gen-avr-mmcu-specs.cc
index 02778aa3ce8..06d9d3c8d7d 100644
--- a/gcc/config/avr/gen-avr-mmcu-specs.cc
+++ b/gcc/config/avr/gen-avr-mmcu-specs.cc
@@ -294,7 +294,7 @@ print_mcu (const avr_mcu_t *mcu)
 	   : "\t%{mabsdata}");
 
   // -m[no-]rodata-in-ram basically affects linking, but sanity-check early.
-  fprintf (f, "*cc1_misc:\n\t%%(check_rodata_in_ram)\n\n");
+  fprintf (f, "*cc1_rodata_in_ram:\n\t%%(check_rodata_in_ram)\n\n");
 
   // avr-gcc specific specs for assembling / the assembler.
 
@@ -319,8 +319,6 @@ print_mcu (const avr_mcu_t *mcu)
 	   ? "\t%{mno-skip-bug}"
 	   : "\t%{!mskip-bug: -mno-skip-bug}");
 
-  fprintf (f, "*asm_misc:\n" /* empty */ "\n\n");
-
   // avr-specific specs for linking / the linker.
 
   int wrap_k =
@@ -361,7 +359,7 @@ print_mcu (const avr_mcu_t *mcu)
 }
 
   // -m[no-]rodata-in-ram affects linking.  Sanity check its usage.
-  fprintf (f, "*link_misc:\n\t%%(check_rodata_in_ram)\n\n");
+  fprintf (f, "*link_rodata_in_ram:\n\t%%(check_rodata_in_ram)\n\n");
 
   // Specs known to GCC.
 
diff --git a/gcc/config/avr/specs.h b/gcc/config/avr/specs.h
index 574402035bc..0ccc37b8844 100644
--- a/gcc/config/avr/specs.h
+++ b/gcc/config/avr/specs.h
@@ -36,7 +36,7 @@ along with GCC; see the file COPYING3.  If not see
   "%(cc1_errata_skip) " \
   "%(cc1_rmw) " \
   "%(cc1_absdata) " \
-  "%(cc1_misc) "
+  "%(cc1_rodata_in_ram) "
 
 #undef  CC1PLUS_SPEC
 #define CC1PLUS_SPEC\
@@ -54,8 +54,7 @@ along with GCC; see the file COPYING3.  If not see
   "%(asm_relax) "   \
   "%(asm_rmw) " \
   "%(asm_gccisr) "  \
-  "%(asm_errata_skip) " \
-  "%(asm_misc) "
+  "%(asm_errata_skip) "
 
 #define LINK_RELAX_SPEC \
   "%{mrelax:--relax} "
@@ -67,7 +66,7 @@ along with GCC; see the file COPYING3.  If not see
   "%(link_text_start) " \
   "%(link_relax) "  \
   "%(link_pmem_wrap) "  \
-  "%(link_misc) "   \
+  "%(link_rodata_in_ram) "  \
   "%{shared:%eshared is not supported} "
 
 #undef  LIB_SPEC


[patch, avr, applied] Specs always define __AVR_PM_BASE_ADDRESS__ when the core has it

2024-02-08 Thread Georg-Johann Lay

This defines the spec always when the core has it, not only
override it when it differs from the core's value.

Johann

--


AVR: Always define __AVR_PM_BASE_ADDRESS__ in specs provided the core 
has it.


gcc/
* config/avr/gen-avr-mmcu-specs.cc (print_mcu) <*cpp_mcu>: Spec always
defines __AVR_PM_BASE_ADDRESS__ if the core has it.
diff --git a/gcc/config/avr/gen-avr-mmcu-specs.cc b/gcc/config/avr/gen-avr-mmcu-specs.cc
index 06d9d3c8d7d..41ebfa82eb5 100644
--- a/gcc/config/avr/gen-avr-mmcu-specs.cc
+++ b/gcc/config/avr/gen-avr-mmcu-specs.cc
@@ -199,13 +199,21 @@ print_mcu (const avr_mcu_t *mcu)
   bool flmap = (mcu->dev_attribute & AVR_ISA_FLMAP);
   bool is_arch = mcu->macro == NULL;
   bool is_device = ! is_arch;
-  int flash_pm_offset = 0;
+  int rodata_pm_offset = 0;
+  int pm_base_address = 0;
 
   if (arch->flash_pm_offset
   && mcu->flash_pm_offset
   && mcu->flash_pm_offset != arch->flash_pm_offset)
 {
-  flash_pm_offset = mcu->flash_pm_offset;
+  rodata_pm_offset = mcu->flash_pm_offset;
+}
+
+  if (arch->flash_pm_offset)
+{
+  pm_base_address = mcu->flash_pm_offset
+	? mcu->flash_pm_offset
+	: arch->flash_pm_offset;
 }
 
   if (is_arch
@@ -339,8 +347,8 @@ print_mcu (const avr_mcu_t *mcu)
 
   fprintf (f, "*link_arch:\n\t%s", link_arch_spec);
   if (is_device
-  && flash_pm_offset)
-fprintf (f, " --defsym=__RODATA_PM_OFFSET__=0x%x", flash_pm_offset);
+  && rodata_pm_offset)
+fprintf (f, " --defsym=__RODATA_PM_OFFSET__=0x%x", rodata_pm_offset);
   fprintf (f, "\n\n");
 
   if (is_device)
@@ -381,10 +389,10 @@ print_mcu (const avr_mcu_t *mcu)
 
   fprintf (f, "*cpp_mcu:\n");
   fprintf (f, "\t-D%s", mcu->macro);
-  if (flash_pm_offset)
+  if (pm_base_address)
 	{
 	  fprintf (f, " -U__AVR_PM_BASE_ADDRESS__");
-	  fprintf (f, " -D__AVR_PM_BASE_ADDRESS__=0x%x", flash_pm_offset);
+	  fprintf (f, " -D__AVR_PM_BASE_ADDRESS__=0x%x", pm_base_address);
 	}
   if (have_flmap)
 	fprintf (f, " -D__AVR_HAVE_FLMAP__");


[patch,avr,applied] PR113824: Fix multilib set for ATA5795

2024-02-08 Thread Georg-Johann Lay

This device was in the wrong multilib set.

Johann

--

AVR: target/113824 - Fix multilib set for ATA5795.

gcc/
PR target/113824
* config/avr/avr-mcus.def (ata5797): Move from avr5 to avr4.
* doc/avr-mmcu.texi: Rebuild.diff --git a/gcc/config/avr/avr-mcus.def b/gcc/config/avr/avr-mcus.def
index 7ddfba0a13c..27812d441f7 100644
--- a/gcc/config/avr/avr-mcus.def
+++ b/gcc/config/avr/avr-mcus.def
@@ -138,9 +138,10 @@ AVR_MCU ("attiny167",ARCH_AVR35, AVR_ISA_NONE, "__AVR_ATtiny167__",
 AVR_MCU ("attiny1634",   ARCH_AVR35, AVR_ISA_NONE, "__AVR_ATtiny1634__",   0x0100, 0x0, 0x4000, 0)
 /* Enhanced, <= 8K.  */
 AVR_MCU ("avr4", ARCH_AVR4, AVR_ISA_NONE,  NULL,   0x0060, 0x0, 0x2000, 0)
+AVR_MCU ("ata5795",  ARCH_AVR4, AVR_ISA_NONE,  "__AVR_ATA5795__",  0x0100, 0x0, 0x2000, 0)
 AVR_MCU ("ata6285",  ARCH_AVR4, AVR_ISA_NONE,  "__AVR_ATA6285__",  0x0100, 0x0, 0x2000, 0)
 AVR_MCU ("ata6286",  ARCH_AVR4, AVR_ISA_NONE,  "__AVR_ATA6286__",  0x0100, 0x0, 0x2000, 0)
-AVR_MCU ("ata6289",  ARCH_AVR4, AVR_ISA_NONE, "__AVR_ATA6289__",   0x0100, 0x0, 0x2000, 0)
+AVR_MCU ("ata6289",  ARCH_AVR4, AVR_ISA_NONE,  "__AVR_ATA6289__",  0x0100, 0x0, 0x2000, 0)
 AVR_MCU ("ata6612c", ARCH_AVR4, AVR_ISA_NONE,  "__AVR_ATA6612C__", 0x0100, 0x0, 0x2000, 0)
 AVR_MCU ("atmega8",  ARCH_AVR4, AVR_ISA_NONE,  "__AVR_ATmega8__",  0x0060, 0x0, 0x2000, 0)
 AVR_MCU ("atmega8a", ARCH_AVR4, AVR_ISA_NONE,  "__AVR_ATmega8A__", 0x0060, 0x0, 0x2000, 0)
@@ -172,7 +173,6 @@ AVR_MCU ("ata5787",  ARCH_AVR5, AVR_ISA_NONE, "__AVR_ATA5787__",
 AVR_MCU ("ata5790",  ARCH_AVR5, AVR_ISA_NONE, "__AVR_ATA5790__",   0x0100, 0x0, 0x4000, 0)
 AVR_MCU ("ata5790n", ARCH_AVR5, AVR_ISA_NONE, "__AVR_ATA5790N__",  0x0100, 0x0, 0x4000, 0)
 AVR_MCU ("ata5791",  ARCH_AVR5, AVR_ISA_NONE, "__AVR_ATA5791__",   0x0100, 0x0, 0x4000, 0)
-AVR_MCU ("ata5795",  ARCH_AVR5, AVR_ISA_NONE, "__AVR_ATA5795__",   0x0100, 0x0, 0x2000, 0)
 AVR_MCU ("ata5831",  ARCH_AVR5, AVR_ISA_NONE, "__AVR_ATA5831__",   0x0200, 0x8000, 0xd000, 0)
 AVR_MCU ("ata5835",  ARCH_AVR5, AVR_ISA_NONE, "__AVR_ATA5835__",   0x0200, 0x8000, 0xd200, 0)
 AVR_MCU ("ata6613c", ARCH_AVR5, AVR_ISA_NONE, "__AVR_ATA6613C__",  0x0100, 0x0, 0x4000, 0)
diff --git a/gcc/doc/avr-mmcu.texi b/gcc/doc/avr-mmcu.texi
index f38a0e06343..dcbf4ef7247 100644
--- a/gcc/doc/avr-mmcu.texi
+++ b/gcc/doc/avr-mmcu.texi
@@ -34,11 +34,11 @@
 
 @item @anchor{avr4}avr4
 ``Enhanced'' devices with up to 8@tie{}KiB of program memory.
-@*@var{mcu}@tie{}= @code{atmega48}, @code{atmega48a}, @code{atmega48p}, @code{atmega48pa}, @code{atmega48pb}, @code{atmega8}, @code{atmega8a}, @code{atmega8hva}, @code{atmega88}, @code{atmega88a}, @code{atmega88p}, @code{atmega88pa}, @code{atmega88pb}, @code{atmega8515}, @code{atmega8535}, @code{ata6285}, @code{ata6286}, @code{ata6289}, @code{ata6612c}, @code{at90pwm1}, @code{at90pwm2}, @code{at90pwm2b}, @code{at90pwm3}, @code{at90pwm3b}, @code{at90pwm81}.
+@*@var{mcu}@tie{}= @code{atmega48}, @code{atmega48a}, @code{atmega48p}, @code{atmega48pa}, @code{atmega48pb}, @code{atmega8}, @code{atmega8a}, @code{atmega8hva}, @code{atmega88}, @code{atmega88a}, @code{atmega88p}, @code{atmega88pa}, @code{atmega88pb}, @code{atmega8515}, @code{atmega8535}, @code{ata5795}, @code{ata6285}, @code{ata6286}, @code{ata6289}, @code{ata6612c}, @code{at90pwm1}, @code{at90pwm2}, @code{at90pwm2b}, @code{at90pwm3}, @code{at90pwm3b}, @code{at90pwm81}.
 
 @item @anchor{avr5}avr5
 ``Enhanced'' devices with 16@tie{}KiB up to 64@tie{}KiB of program memory.
-@*@var{mcu}@tie{}= @code{atmega16}, @code{atmega16a}, @code{atmega16hva}, @code{atmega16hva2}, @code{atmega16hvb}, @code{atmega16hvbrevb}, @code{atmega16m1}, @code{atmega16u4}, @code{atmega161}, @code{atmega162}, @code{atmega163}, @code{atmega164a}, @code{atmega164p}, @code{atmega164pa}, @code{atmega165}, @code{atmega165a}, @code{atmega165p}, @code{atmega165pa}, @code{atmega168}, @code{atmega168a}, @code{atmega168p}, @code{atmega168pa}, @code{atmega168pb}, @code{atmega169}, @code{atmega169a}, @code{atmega169p}, @code{atmega169pa}, @code{atmega32}, @code{atmega32a}, @code{atmega32c1}, @code{atmega32hvb}, @code{atmega32hvbrevb}, @code{atmega32m1}, @code{atmega32u4}, @code{atmega32u6}, @code{atmega323}, @code{atmega324a}, @code{atmega324p}, @code{atmega324pa}, @code{atmega324pb}, @code{atmega325}, @code{atmega325a}, @code{atmega325p}, @code{atmega325pa}, @code{atmega328}, @code{atmega328p}, @code{atmega328pb}, @code{atmega329}, @code{atmega329a}, @code{atmega329p}, @code{atmega329pa}, @code{atmega3250}, @code{atmega3250a}, @code{atmega3250p}, @code{atmega3250pa}, @code{atmega3290}, @code{atmega3290a}, @code{atmega3290p}, @code{atmega3290pa}, @code{atmega406}, @code{atmega64}, @code{atmega64a}, @co

[patch,avr,applied] Tidy up gen-avr-mmcu-specs.cc

2024-02-08 Thread Georg-Johann Lay

This patchlet tidies up gen-avr-mmcu-specs.cc.
Some information was computed more than once, in different
functions. The patch uses a new struct to pass around information.

Johann

AVR: Tidy up gen-avr-mmcu-specs.cc

gcc/
* config/avr/gen-avr-mmcu-specs.cc (struct McuInfo): New.
(main, print_mcu, diagnose_mrodata_in_ram): Pass it down.

--diff --git a/gcc/config/avr/gen-avr-mmcu-specs.cc b/gcc/config/avr/gen-avr-mmcu-specs.cc
index 41ebfa82eb5..ea69145d404 100644
--- a/gcc/config/avr/gen-avr-mmcu-specs.cc
+++ b/gcc/config/avr/gen-avr-mmcu-specs.cc
@@ -129,62 +129,70 @@ static const bool have_avrxmega3_rodata_in_flash = false;
 #endif
 
 
-static void
-diagnose_mrodata_in_ram (FILE *f, const char *spec, const avr_mcu_t *mcu)
+struct McuInfo
 {
-  enum avr_arch_id arch_id = mcu->arch_id;
-  const avr_arch_t *arch = &avr_arch_types[arch_id];
-  const bool is_arch = mcu->macro == NULL;
-  const bool flmap = (mcu->dev_attribute & AVR_ISA_FLMAP);
-  const bool have_flmap2 = have_avrxmega2_flmap && arch_id == ARCH_AVRXMEGA2;
-  const bool have_flmap4 = have_avrxmega4_flmap && arch_id == ARCH_AVRXMEGA4;
-  const bool have_flmap = flmap && (have_flmap2 || have_flmap4);
-
-  const bool rodata_in_flash = (arch_id == ARCH_AVRTINY
-|| (arch_id == ARCH_AVRXMEGA3
-&& have_avrxmega3_rodata_in_flash));
+  enum avr_arch_id arch_id;
+  const avr_arch_t *arch;
+  bool is_arch, is_device;
+  bool flmap, have_flmap2, have_flmap4, have_flmap;
+  bool rodata_in_flash;
   // Device name as used by the vendor, extracted from "__AVR___".
   char mcu_Name[50] = { 0 };
-  if (! is_arch)
-snprintf (mcu_Name, 1 + strlen (mcu->macro) - strlen ("__AVR___"),
-	  "%s", mcu->macro + strlen ("__AVR_"));
 
+  McuInfo (const avr_mcu_t *mcu)
+: arch_id (mcu->arch_id), arch (& avr_arch_types[arch_id]),
+  is_arch (mcu->macro == NULL), is_device (! is_arch),
+  flmap (mcu->dev_attribute & AVR_ISA_FLMAP),
+  have_flmap2 (have_avrxmega2_flmap && arch_id == ARCH_AVRXMEGA2),
+  have_flmap4 (have_avrxmega4_flmap && arch_id == ARCH_AVRXMEGA4),
+  have_flmap (flmap && (have_flmap2 || have_flmap4)),
+  rodata_in_flash (arch_id == ARCH_AVRTINY
+		   || (arch_id == ARCH_AVRXMEGA3
+			   && have_avrxmega3_rodata_in_flash))
+  {
+if (is_device)
+  snprintf (mcu_Name, 1 + strlen (mcu->macro) - strlen ("__AVR_" "__"),
+		"%s", mcu->macro + strlen ("__AVR_"));
+  }
+};
+
+
+static void
+diagnose_mrodata_in_ram (FILE *f, const char *spec, const avr_mcu_t *mcu,
+			 const McuInfo &mi)
+{
   fprintf (f, "%s:\n", spec);
-  if (rodata_in_flash && is_arch)
+  if (mi.rodata_in_flash && mi.is_arch)
 fprintf (f, "\t%%{mrodata-in-ram: %%e-mrodata-in-ram is not supported"
 	 " for %s}", mcu->name);
-  else if (rodata_in_flash)
+  else if (mi.rodata_in_flash)
 fprintf (f, "\t%%{mrodata-in-ram: %%e-mrodata-in-ram is not supported"
-	 " for %s (arch=%s)}", mcu_Name, arch->name);
-  else if (is_arch)
+	 " for %s (arch=%s)}", mi.mcu_Name, mi.arch->name);
+  else if (mi.is_arch)
 {
-  if (! have_flmap2 && ! have_flmap4)
+  if (! mi.have_flmap2 && ! mi.have_flmap4)
 	fprintf (f, "\t%%{mno-rodata-in-ram: %%e-mno-rodata-in-ram is not"
 		 " supported for %s}", mcu->name);
 }
-  else if (! have_flmap)
+  else if (! mi.have_flmap)
 fprintf (f, "\t%%{mno-rodata-in-ram: %%e-mno-rodata-in-ram is not supported"
-	 " for %s (arch=%s)}", mcu_Name, arch->name);
+	 " for %s (arch=%s)}", mi.mcu_Name, mi.arch->name);
   fprintf (f, "\n\n");
 }
 
 
 static void
-print_mcu (const avr_mcu_t *mcu)
+print_mcu (const avr_mcu_t *mcu, const McuInfo &mi)
 {
   const char *sp8_spec;
   const char *rcall_spec;
   const avr_mcu_t *arch_mcu;
-  const avr_arch_t *arch;
-  enum avr_arch_id arch_id = mcu->arch_id;
 
   for (arch_mcu = mcu; arch_mcu->macro; )
 arch_mcu--;
-  if (arch_mcu->arch_id != arch_id)
+  if (arch_mcu->arch_id != mi.arch_id)
 exit (EXIT_FAILURE);
 
-  arch = &avr_arch_types[arch_id];
-
   char name[100];
   if (snprintf (name, sizeof name, "specs-%s", mcu->name) >= (int) sizeof name)
exit (EXIT_FAILURE);
@@ -196,29 +204,26 @@ print_mcu (const avr_mcu_t *mcu)
   bool rmw = (mcu->dev_attribute & AVR_ISA_RMW) != 0;
   bool sp8 = (mcu->dev_attribute & AVR_SHORT_SP) != 0;
   bool rcall = (mcu->dev_attribute & AVR_ISA_RCALL);
-  bool flmap = (mcu->dev_attribute & AVR_ISA_FLMAP);
-  bool is_arch = mcu->macro == NULL;
-  bool is_device = ! is_arch;
   int rodata_pm_offset = 0;
   int pm_base_address = 0;
 
-  if (arch->flash_pm_offset
+  if (mi.arch->flash_pm_offset
   && mcu->flash_pm_offset
-  && mcu->flash_pm_offset != arch->flash_pm_offset)
+  && mcu->flash_pm_offset != mi.arch->flash_pm_offset)
 {
   rodata_pm_offset = mcu->flash_pm_offset;
 }
 
-  if (arch->flash_pm_offset)
+  if (mi.arch->flash_pm_offset)
 {
   pm_base_address = mcu->flash_pm_offset
 	? mcu->flash_pm_offset
-	: arch->flash_pm_offset;
+	: mi.arch->flash_pm

[patch,avr,applied] Addendum to target/112944: Initialize FLMAP as needed

2024-02-12 Thread Georg-Johann Lay

This code will link against parts of the startup code
from AVR-LibC when it is needed to init bit-field FLMAP.

Johann

--

AVR: target/112944 - Addendum: Link code to initialize NVMCTRL_CTRLB.FLMAP

For devices that see a part for the flash memory in the RAM address space,
bit-field NVMCTRL_CTRLB.FLMAP must match the value of symbol __flmap.
This is achieved by dragging in startup code from lib.a.
The mechanism is the same like for libgcc's __do_copy_data and 
__do_clear_bss.

The code is implemented in AVR-LibC #931 and can be dragged by referencing
__do_flmap_init.

In addition to setting FLMAP, that code also sets bit FLMAPLOCK provided
symbol __flmap_lock has a non-zero value.  This protects FLMAP from future
changes.

When the __do_flmap_init code is not wanted, the symbol can be satisfied by
linking with  -Wl,--defsym,__do_flmap_init=0

gcc/
PR target/112944
* config/avr/gen-avr-mmcu-specs.cc (print_mcu) [have_flmap]:
<*link_rodata_in_ram>: Spec undefs symbol __do_flmap_init
when not linked with -mrodata-in-ram.

diff --git a/gcc/config/avr/gen-avr-mmcu-specs.cc 
b/gcc/config/avr/gen-avr-mmcu-specs.cc

index ea69145d404..bb94bea12b0 100644
--- a/gcc/config/avr/gen-avr-mmcu-specs.cc
+++ b/gcc/config/avr/gen-avr-mmcu-specs.cc
@@ -369,7 +369,10 @@ print_mcu (const avr_mcu_t *mcu, const McuInfo &mi)
 }

   // -m[no-]rodata-in-ram affects linking.  Sanity check its usage.
-  fprintf (f, "*link_rodata_in_ram:\n\t%%(check_rodata_in_ram)\n\n");
+  fprintf (f, "*link_rodata_in_ram:\n\t%%(check_rodata_in_ram)");
+  if (mi.is_device && mi.have_flmap)
+fprintf (f, " %%{!mrodata-in-ram:-u __do_flmap_init}");
+  fprintf (f, "\n\n");

   // Specs known to GCC.



Re: [pushed] wwwdocs: gcc-14: Fix typo in AVR section

2024-02-14 Thread Georg-Johann Lay

Am 14.02.24 um 01:40 schrieb Gerald Pfeifer:

Note that  is not part of current HTML standards; can we simply
remove it?


Hi Gerald,

thanks for looking into this.

The  is not strictly needed, I just has the case that
"-Wl,--defsym,__RODATA_FLASH_START__=32k" had a line-break in it.

In addition, I believe it might be good to rephrase that sentence. Do you 
mean "the linker will not pull in that code from ... any more"?


Yes.  When the symbol is satisfied by --defsym, then the code is not
dragged or pulled from that static lib.  I am not a native speaker,
and it is great when you fix any awkward formulations or grammar.

Thanks a lot

Johann


Gerald
---
  htdocs/gcc-14/changes.html | 2 +-
  1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/htdocs/gcc-14/changes.html b/htdocs/gcc-14/changes.html
index 6ac7c8b1..92bd0a7b 100644
--- a/htdocs/gcc-14/changes.html
+++ b/htdocs/gcc-14/changes.html
@@ -370,7 +370,7 @@ a work-in-progress.
precedence over __flmap.
For example, linking with
-Wl,--defsym,__RODATA_FLASH_START__=32k
-   choses the second 32 KiB block.
+   chooses the second 32 KiB block.
The default uses the last 32 KiB block, which is also the
hardware default for bit-field NVMCTRL_CTRLB.FLMAP.
When a non-default block is used,


[patch,avr,applied] Fix PR target/113927: Simple code triggers a stack frame

2024-02-15 Thread Georg-Johann Lay

Applied this patch

Johann

--

AVR: target 113927 - Simple code triggers stack frame for Reduced Tiny.

The -mmcu=avrtiny cores have no ADIW and SBIW instructions.  This was
implemented by clearing all regs out of regclass ADDW_REGS so that
constraint "w" never matched.  This corrupted the subset relations of
the register classes as they appear in enum reg_class.

This patch keeps ADDW_REGS like for all other cores, i.e. it contains
R24...R31.  Instead of tests like  test_hard_reg_class (ADDW_REGS, *)
the code now uses  avr_adiw_reg_p (*).  And all insns with constraint "w"
get "isa" insn attribute value of "adiw".

Plus, a new built-in macro __AVR_HAVE_ADIW__ is provided, which is more
specific than __AVR_TINY__.

gcc/
PR target/113927
* config/avr/avr.h (AVR_HAVE_ADIW): New macro.
* config/avr/avr-protos.h (avr_adiw_reg_p): New proto.
* config/avr/avr.cc (avr_adiw_reg_p): New function.
(avr_conditional_register_usage) [AVR_TINY]: Don't clear ADDW_REGS.
Replace test_hard_reg_class (ADDW_REGS, ...) with calls to
* config/avr/avr.md: Same.
(attr "isa") : Remove.
: Add.
(define_insn, define_insn_and_split): When an alternative has
constraint "w", then set attribute "isa" to "adiw".
* config/avr/avr-c.cc (avr_cpu_cpp_builtins) [AVR_HAVE_ADIW]:
Built-in define __AVR_HAVE_ADIW__.
* doc/invoke.texi (AVR Options): Document it.
diff --git a/gcc/config/avr/avr-c.cc b/gcc/config/avr/avr-c.cc
index 60905a76556..5e7f759ed73 100644
--- a/gcc/config/avr/avr-c.cc
+++ b/gcc/config/avr/avr-c.cc
@@ -307,6 +307,7 @@ avr_cpu_cpp_builtins (struct cpp_reader *pfile)
   if (AVR_HAVE_ELPMX)cpp_define (pfile, "__AVR_HAVE_ELPMX__");
   if (AVR_HAVE_MOVW) cpp_define (pfile, "__AVR_HAVE_MOVW__");
   if (AVR_HAVE_LPMX) cpp_define (pfile, "__AVR_HAVE_LPMX__");
+  if (AVR_HAVE_ADIW) cpp_define (pfile, "__AVR_HAVE_ADIW__");
 
   if (avr_arch->asm_only)
 cpp_define (pfile, "__AVR_ASM_ONLY__");
diff --git a/gcc/config/avr/avr-protos.h b/gcc/config/avr/avr-protos.h
index 46b75f96b9c..7d1f815c664 100644
--- a/gcc/config/avr/avr-protos.h
+++ b/gcc/config/avr/avr-protos.h
@@ -123,6 +123,7 @@ extern enum reg_class avr_mode_code_base_reg_class (machine_mode, addr_space_t,
 extern bool avr_regno_mode_code_ok_for_base_p (int, machine_mode, addr_space_t, RTX_CODE, RTX_CODE);
 extern rtx avr_incoming_return_addr_rtx (void);
 extern rtx avr_legitimize_reload_address (rtx*, machine_mode, int, int, int, int, rtx (*)(rtx,int));
+extern bool avr_adiw_reg_p (rtx);
 extern bool avr_mem_flash_p (rtx);
 extern bool avr_mem_memx_p (rtx);
 extern bool avr_load_libgcc_p (rtx);
diff --git a/gcc/config/avr/avr.cc b/gcc/config/avr/avr.cc
index d21b286ed8b..4a55f14bff7 100644
--- a/gcc/config/avr/avr.cc
+++ b/gcc/config/avr/avr.cc
@@ -292,6 +292,17 @@ avr_to_int_mode (rtx x)
 : simplify_gen_subreg (int_mode_for_mode (mode).require (), x, mode, 0);
 }
 
+
+/* Return true if hard register REG supports the ADIW and SBIW instructions.  */
+
+bool
+avr_adiw_reg_p (rtx reg)
+{
+  return (AVR_HAVE_ADIW
+	  && test_hard_reg_class (ADDW_REGS, reg));
+}
+
+
 namespace {
 
 static const pass_data avr_pass_data_recompute_notes =
@@ -6272,7 +6283,7 @@ avr_out_compare (rtx_insn *insn, rtx *xop, int *plen)
   /* Word registers >= R24 can use SBIW/ADIW with 0..63.  */
 
   if (i == 0
-	  && test_hard_reg_class (ADDW_REGS, reg8))
+	  && avr_adiw_reg_p (reg8))
 	{
 	  int val16 = trunc_int_for_mode (INTVAL (xval), HImode);
 
@@ -8186,7 +8197,7 @@ avr_out_plus_1 (rtx *xop, int *plen, enum rtx_code code, int *pcc,
   if (!started
 	  && i % 2 == 0
 	  && i + 2 <= n_bytes
-	  && test_hard_reg_class (ADDW_REGS, reg8))
+	  && avr_adiw_reg_p (reg8))
 	{
 	  rtx xval16 = simplify_gen_subreg (HImode, xval, imode, i);
 	  unsigned int val16 = UINTVAL (xval16) & GET_MODE_MASK (HImode);
@@ -8678,7 +8689,7 @@ avr_out_plus_set_ZN (rtx *xop, int *plen)
 }
 
   if (n_bytes == 2
-  && test_hard_reg_class (ADDW_REGS, xreg)
+  && avr_adiw_reg_p (xreg)
   && IN_RANGE (INTVAL (xval), 1, 63))
 {
   // Add 16-bit value in [1..63] to a w register.
@@ -8705,7 +8716,7 @@ avr_out_plus_set_ZN (rtx *xop, int *plen)
 
   if (i == 0
 	  && n_bytes >= 2
-	  && test_hard_reg_class (ADDW_REGS, op[0]))
+	  && avr_adiw_reg_p (op[0]))
 	{
 	  op[1] = simplify_gen_subreg (HImode, xval, mode, 0);
 	  if (IN_RANGE (INTVAL (op[1]), 0, 63))
@@ -13312,7 +13323,6 @@ avr_conditional_register_usage (void)
 	  reg_alloc_order[i] = tiny_reg_alloc_order[i];
 	}
 
-  CLEAR_HARD_REG_SET (reg_class_contents[(int) ADDW_REGS]);
   CLEAR_HARD_REG_SET (reg_class_contents[(int) NO_LD_REGS]);
 }
 }
@@ -14043,7 +14053,7 @@ avr_out_cpymem (rtx_insn *insn ATTRIBUTE_UNUSED, rtx *op, int *plen)
 {
   addr_space_t as = (addr_space_t) INTVAL (op[0]);
   machine_mode loop_mode = GET_MODE (op[1]);
-  bool sbiw_p = test_hard_reg_class (ADDW_REGS, op[1]);
+  bool

[patch,avr,applied] Minor improvements to option and attribute documentation.

2024-02-18 Thread Georg-Johann Lay

Applied this patch.

Johann


--

AVR: Improve documentation for -mmcu=.

gcc/
* doc/invoke.texi (AVR Options) <-mmcu>: Remove "Atmel".
Note on complete device support.


AVR: Add examples for ISR macro to interrupt attribute doc.

gcc/
* doc/extend.texi (AVR Function Attributes): Fuse description
of "signal" and "interrupt" attribute.  Link pseudo instruction.
diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi
index 2b8ba1949bf..e048404dffe 100644
--- a/gcc/doc/extend.texi
+++ b/gcc/doc/extend.texi
@@ -5060,20 +5060,47 @@ without modifying an existing @option{-march=} or @option{-mcpu} option.
 These function attributes are supported by the AVR back end:
 
 @table @code
+@cindex @code{signal} function attribute, AVR
 @cindex @code{interrupt} function attribute, AVR
-@item interrupt
-Use this attribute to indicate
-that the specified function is an interrupt handler.  The compiler generates
+@item signal
+@itemx interrupt
+The function is an interrupt service routine (ISR).  The compiler generates
 function entry and exit sequences suitable for use in an interrupt handler
-when this attribute is present.
+when one of the attributes is present.
+
+The AVR hardware globally disables interrupts when an interrupt is executed.
+
+@itemize @bullet
+@item ISRs with the @code{signal} attribute do not re-enable interrupts.
+It is save to enable interrupts in a @code{signal} handler.
+This ``save'' only applies to the code
+generated by the compiler and not to the IRQ layout of the
+application which is responsibility of the application.
+
+@item ISRs with the @code{interrupt} attribute re-enable interrupts.
+The first instruction of the routine is a @code{SEI} instruction to
+globally enable interrupts.
+@end itemize
+
+The recommended way to use these attributes is by means of the
+@code{ISR} macro provided by @code{avr/interrupt.h} from
+@w{@uref{https://www.nongnu.org/avr-libc/user-manual/group__avr__interrupts.html,,AVR-LibC}}:
+@example
+#include 
 
-On the AVR, the hardware globally disables interrupts when an
-interrupt is executed.  The first instruction of an interrupt handler
-declared with this attribute is a @code{SEI} instruction to
-re-enable interrupts.  See also the @code{signal} function attribute
-that does not insert a @code{SEI} instruction.  If both @code{signal} and
-@code{interrupt} are specified for the same function, @code{signal}
-is silently ignored.
+ISR (INT0_vect) // Uses the "signal" attribute.
+@{
+// Code
+@}
+
+ISR (ADC_vect, ISR_NOBLOCK) // Uses the "interrupt" attribute.
+@{
+// Code
+@}
+@end example
+
+When both @code{signal} and @code{interrupt} are specified for the same
+function, then @code{signal} is silently ignored.
 
 @cindex @code{naked} function attribute, AVR
 @item naked
@@ -5088,7 +5115,9 @@ depended upon to work reliably and are not supported.
 
 @cindex @code{no_gccisr} function attribute, AVR
 @item no_gccisr
-Do not use @code{__gcc_isr} pseudo instructions in a function with
+Do not use the @code{__gcc_isr}
+@uref{https://sourceware.org/binutils/docs/as/AVR-Pseudo-Instructions.html,pseudo instruction}
+in a function with
 the @code{interrupt} or @code{signal} attribute aka. interrupt
 service routine (ISR).
 Use this attribute if the preamble of the ISR prologue should always read
@@ -5141,24 +5170,6 @@ or a frame pointer whereas @code{OS_main} and @code{OS_task} do this
 as needed.
 @end itemize
 
-@cindex @code{signal} function attribute, AVR
-@item signal
-Use this attribute on the AVR to indicate that the specified
-function is an interrupt handler.  The compiler generates function
-entry and exit sequences suitable for use in an interrupt handler when this
-attribute is present.
-
-See also the @code{interrupt} function attribute. 
-
-The AVR hardware globally disables interrupts when an interrupt is executed.
-Interrupt handler functions defined with the @code{signal} attribute
-do not re-enable interrupts.  It is save to enable interrupts in a
-@code{signal} handler.  This ``save'' only applies to the code
-generated by the compiler and not to the IRQ layout of the
-application which is responsibility of the application.
-
-If both @code{signal} and @code{interrupt} are specified for the same
-function, @code{signal} is silently ignored.
 @end table
 
 @node Blackfin Function Attributes
diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
index d0e67729f56..e18886e0ac7 100644
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@@ -23732,12 +23732,22 @@ These options are defined for AVR implementations:
 @table @gcctabopt
 @opindex mmcu
 @item -mmcu=@var{mcu}
-Specify Atmel AVR instruction set architectures (ISA) or MCU type.
-
-The default for this option is@tie{}@samp{avr2}.
-
-GCC supports the following AVR devices and ISAs:
-
+Specify the AVR instruction set architecture (ISA) or device type.
+The default for this option is@tie{}@code{avr2}.
+
+The following AVR devices and ISAs are supported.
+@emph{Note

[patch,avr,applied] Use @defbuiltin to document built-ins.

2024-02-20 Thread Georg-Johann Lay

This patch uses @defbuiltin to document built-in
functions so that the functions are listed in the index.
Previously, @table @code was used.

Johann

--

AVR: extend.texi - Use @defbuiltin to document built-ins.

gcc/
* doc/extend.texi (AVR Built-in Functions): Use @defbuiltin
instead of @table.diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi
index e048404dffe..b2383b55666 100644
--- a/gcc/doc/extend.texi
+++ b/gcc/doc/extend.texi
@@ -16782,37 +16782,41 @@ or if not a specific built-in is implemented or not. For example, if
 @code{__builtin_avr_nop} is available the macro
 @code{__BUILTIN_AVR_NOP} is defined to @code{1} and undefined otherwise.
 
-@table @code
+@defbuiltin{void __builtin_avr_nop (void)}
+@defbuiltinx{void __builtin_avr_nop (void)}
+@defbuiltinx{void __builtin_avr_sei (void)}
+@defbuiltinx{void __builtin_avr_cli (void)}
+@defbuiltinx{void __builtin_avr_sleep (void)}
+@defbuiltinx{void __builtin_avr_wdr (void)}
+@defbuiltinx{{unsigned char} __builtin_avr_swap (unsigned char)}
+@defbuiltinx{{unsigned int} __builtin_avr_fmul (unsigned char, unsigned char)}
+@defbuiltinx{int __builtin_avr_fmuls (char, char)}
+@defbuiltinx{int __builtin_avr_fmulsu (char, unsigned char)}
 
-@item void __builtin_avr_nop (void)
-@itemx void __builtin_avr_sei (void)
-@itemx void __builtin_avr_cli (void)
-@itemx void __builtin_avr_sleep (void)
-@itemx void __builtin_avr_wdr (void)
-@itemx unsigned char __builtin_avr_swap (unsigned char)
-@itemx unsigned int __builtin_avr_fmul (unsigned char, unsigned char)
-@itemx int __builtin_avr_fmuls (char, char)
-@itemx int __builtin_avr_fmulsu (char, unsigned char)
 These built-in functions map to the respective machine
 instruction, i.e.@: @code{nop}, @code{sei}, @code{cli}, @code{sleep},
 @code{wdr}, @code{swap}, @code{fmul}, @code{fmuls}
 resp. @code{fmulsu}. The three @code{fmul*} built-ins are implemented
 as library call if no hardware multiplier is available.
 
-@item void __builtin_avr_delay_cycles (unsigned long ticks)
+@enddefbuiltin
+
+@defbuiltin{void __builtin_avr_delay_cycles (unsigned long @var{ticks})}
 Delay execution for @var{ticks} cycles. Note that this
 built-in does not take into account the effect of interrupts that
 might increase delay time. @var{ticks} must be a compile-time
 integer constant; delays with a variable number of cycles are not supported.
+@enddefbuiltin
 
-@item char __builtin_avr_flash_segment (const __memx void*)
+@defbuiltin{char __builtin_avr_flash_segment (const __memx void*)}
 This built-in takes a byte address to the 24-bit
 @ref{AVR Named Address Spaces,address space} @code{__memx} and returns
 the number of the flash segment (the 64 KiB chunk) where the address
 points to.  Counting starts at @code{0}.
 If the address does not point to flash memory, return @code{-1}.
+@enddefbuiltin
 
-@item uint8_t __builtin_avr_insert_bits (uint32_t map, uint8_t bits, uint8_t val)
+@defbuiltin{uint8_t __builtin_avr_insert_bits (uint32_t @var{map}, uint8_t @var{bits}, uint8_t @var{val})}
 Insert bits from @var{bits} into @var{val} and return the resulting
 value. The nibbles of @var{map} determine how the insertion is
 performed: Let @var{X} be the @var{n}-th nibble of @var{map}
@@ -16856,12 +16860,12 @@ __builtin_avr_insert_bits (0x3210, bits, val);
 // reverse the bit order of bits
 __builtin_avr_insert_bits (0x01234567, bits, 0);
 @end smallexample
+@enddefbuiltin
 
-@item void __builtin_avr_nops (unsigned count)
+@defbuiltin{void __builtin_avr_nops (unsigned @var{count})}
 Insert @var{count} @code{NOP} instructions.
 The number of instructions must be a compile-time integer constant.
-
-@end table
+@enddefbuiltin
 
 @noindent
 There are many more AVR-specific built-in functions that are used to


[patch,avr,applied] Use int types of exact width and signedness in built-ins prototypes

2024-02-20 Thread Georg-Johann Lay

AVR: Use types of exact size and signedness in built-ins.

The AVR built-ins used types like "int" or "char" that don't
have exact signedness or type size which depend on -mint8
and -f[no-][un-]signed-char etc.  As the built-ins are modelling
machine instructions of given type sizes and signedness, also
use according types in their prototypes.

gcc/
* config/avr/builtins.def: Use function prototypes of given size
and signedness.
	* config/avr/avr.cc (avr_init_builtins): Adjust types required by 
builtins.def.

* doc/extend.texi (AVR Built-in Functions): Adjust accordingly.diff --git a/gcc/config/avr/avr.cc b/gcc/config/avr/avr.cc
index 4a55f14bff7..d3756a2f036 100644
--- a/gcc/config/avr/avr.cc
+++ b/gcc/config/avr/avr.cc
@@ -14605,35 +14605,35 @@ avr_init_builtins (void)
 {
   tree void_ftype_void
 = build_function_type_list (void_type_node, NULL_TREE);
-  tree uchar_ftype_uchar
-= build_function_type_list (unsigned_char_type_node,
-unsigned_char_type_node,
+  tree uintQI_ftype_uintQI
+= build_function_type_list (unsigned_intQI_type_node,
+unsigned_intQI_type_node,
 NULL_TREE);
-  tree uint_ftype_uchar_uchar
-= build_function_type_list (unsigned_type_node,
-unsigned_char_type_node,
-unsigned_char_type_node,
+  tree uintHI_ftype_uintQI_uintQI
+= build_function_type_list (unsigned_intHI_type_node,
+unsigned_intQI_type_node,
+unsigned_intQI_type_node,
 NULL_TREE);
-  tree int_ftype_char_char
-= build_function_type_list (integer_type_node,
-char_type_node,
-char_type_node,
+  tree intHI_ftype_intQI_intQI
+= build_function_type_list (intHI_type_node,
+intQI_type_node,
+intQI_type_node,
 NULL_TREE);
-  tree int_ftype_char_uchar
-= build_function_type_list (integer_type_node,
-char_type_node,
-unsigned_char_type_node,
+  tree intHI_ftype_intQI_uintQI
+= build_function_type_list (intHI_type_node,
+intQI_type_node,
+unsigned_intQI_type_node,
 NULL_TREE);
-  tree void_ftype_ulong
+  tree void_ftype_uintSI
 = build_function_type_list (void_type_node,
-long_unsigned_type_node,
+unsigned_intSI_type_node,
 NULL_TREE);
 
-  tree uchar_ftype_ulong_uchar_uchar
-= build_function_type_list (unsigned_char_type_node,
-long_unsigned_type_node,
-unsigned_char_type_node,
-unsigned_char_type_node,
+  tree uintQI_ftype_uintSI_uintQI_uintQI
+= build_function_type_list (unsigned_intQI_type_node,
+unsigned_intSI_type_node,
+unsigned_intQI_type_node,
+unsigned_intQI_type_node,
 NULL_TREE);
 
   tree const_memx_void_node
@@ -14644,8 +14644,8 @@ avr_init_builtins (void)
   tree const_memx_ptr_type_node
 = build_pointer_type_for_mode (const_memx_void_node, PSImode, false);
 
-  tree char_ftype_const_memx_ptr
-= build_function_type_list (char_type_node,
+  tree intQI_ftype_const_memx_ptr
+= build_function_type_list (intQI_type_node,
 const_memx_ptr_type_node,
 NULL);
 
diff --git a/gcc/config/avr/builtins.def b/gcc/config/avr/builtins.def
index b4bf7beb590..316bdebe498 100644
--- a/gcc/config/avr/builtins.def
+++ b/gcc/config/avr/builtins.def
@@ -43,17 +43,17 @@ DEF_BUILTIN (SLEEP, 0, void_ftype_void, sleep, NULL)
 /* Mapped to respective instruction but might also be folded away
or emit as libgcc call if ISA does not provide the instruction.  */
 
-DEF_BUILTIN (SWAP,   1, uchar_ftype_uchar,  rotlqi3_4, NULL)
-DEF_BUILTIN (FMUL,   2, uint_ftype_uchar_uchar, fmul, NULL)
-DEF_BUILTIN (FMULS,  2, int_ftype_char_char,fmuls, NULL)
-DEF_BUILTIN (FMULSU, 2, int_ftype_char_uchar,   fmulsu, NULL)
+DEF_BUILTIN (SWAP,   1, uintQI_ftype_uintQI,rotlqi3_4, NULL)
+DEF_BUILTIN (FMUL,   2, uintHI_ftype_uintQI_uintQI, fmul, NULL)
+DEF_BUILTIN (FMULS,  2, intHI_ftype_intQI_intQI,fmuls, NULL)
+DEF_BUILTIN (FMULSU, 2, intHI_ftype_intQI_uintQI,   fmulsu, NULL)
 
 /* More complex stuff that cannot be mapped 1:1 to an instruction.  */
 
-DEF_BUILTIN (DELAY_CYCLES, -1, void_ftype_ulong, nothing, NULL)
-DEF_BUILTIN (NOPS, -1, void_ftype_ulong, nothing, NULL)
-DEF_BUILTIN (INSERT_BITS, 3, uchar_ftype_ulong_uchar_uchar, insert_bits, NULL)
-DEF_BUILTIN (FLASH_SEGMENT, 1, char_ftype_const_memx_ptr, flash_segment, NULL)
+DEF_BUILTIN (DELAY_CYCLES, -1, void_ftype_uintSI, nothing, NULL)
+DEF_BUILTIN (NOPS, -1, void_ftype_uintSI, nothing, NULL)
+DEF_BUILTIN (INSERT_BITS, 3, uintQI_ftype_uintSI_uintQI_uintQI, insert_bits, NULL)
+DEF_BUILTIN (FLASH_SEGMENT, 1, intQI_ftype_const_memx_ptr, flash_segment, NULL)
 
 /* ISO/IEC TR 18037 "Embedded C"
The following builtins are undocumented and used by stdfix.h.  */
diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi
index b2383b55666..2135dfde9c8 100644
--- a/gcc/doc/extend.texi
+++ b/gcc/doc/extend.texi
@@ -16783,32 +16783,30 @@ or if not a specific built-in is implemented or not. For example, if
 @code{__BUILTIN_AVR_NOP} is defined to @code{1} and undefined otherwise.
 
 @defbuilti

[patch,avr] PR114100 : Better indirect addressing on reduced cores

2024-02-26 Thread Georg-Johann Lay



A description of what the patch does follows in the commit message below.

On ATmega128, there are no changes in test results.

On ATtiny40 (reduced core) there are no new execution fails,
but apart from that there is quite some noise in the delta:

unsupported (memory full) -> pass
unsupported (memory full) -> fail due to unresolved symbol (printf, 
float, ...)

unsupported (memory full) -> fail (excess errors) this is because the
testsuite is far from being diagnostic-clean.

All these transitions are because the code size shrinks a lot,
sometimes 20% or more.

When there are no objections or improvements, I would go ahead
and install it so it can go into v14.

Johann

--

The Reduced Tiny core does not support indirect addressing with offset,
which basically means that every indirect memory access with a size
of more than one byte is effectively POST_INC or PRE_DEC.  The lack of
that addressing mode is currently handled by pretending to support it,
and then let the insn printers add and subtract again offsets as needed.
For example, the following C code

   int vars[10];

   void inc_var2 (void) {
  ++vars[2];
   }

is compiled to:

   ldi r30,lo8(vars) ;  14   [c=4 l=2]  *movhi/4
   ldi r31,hi8(vars)
   subi r30,lo8(-(4));  15   [c=8 l=6]  *movhi/2
   sbci r31,hi8(-(4))
   ld r20,Z+
   ld r21,Z
   subi r30,lo8((4+1))
   sbci r31,hi8((4+1))
   subi r20,-1 ;  16   [c=4 l=2]  *addhi3_clobber/1
   sbci r21,-1
   subi r30,lo8(-(4+1));  17   [c=4 l=4]  *movhi/3
   sbci r31,hi8(-(4+1))
   st Z,r21
   st -Z,r20

where the code could be -- and with this patch actually is -- like

   ldi r30,lo8(vars+4);  28   [c=4 l=2]  *movhi/4
   ldi r31,hi8(vars+4)
   ld r20,Z+  ;  17   [c=8 l=2]  *movhi/2
   ld r21,Z+
   subi r20,-1;  19   [c=4 l=2]  *addhi3_clobber/1
   sbci r21,-1
   st -Z,r21  ;  30   [c=4 l=2]  *movhi/3
   st -Z,r20

This is achieved in two steps:

- A post-reload split into "real" instructions during .split2.
- A new avr-specific mini pass .avr-fuse-add that runs before
  RTL peephole and that tries to combine the generated pointer
  additions into memory accesses to form POST_INC or PRE_DEC.

gcc/
PR target/114100
* doc/invoke.texi (AVR Options) <-mfuse-add>: Document.
* config/avr/avr.opt (-mfuse-add=): New target option.
* common/config/avr/avr-common.cc (avr_option_optimization_table)
[OPT_LEVELS_1_PLUS]: Set -mfuse-add=1.
[OPT_LEVELS_2_PLUS]: Set -mfuse-add=2.
* config/avr/avr-passes.def (avr_pass_fuse_add): Insert new pass.
* config/avr/avr-protos.h (avr_split_tiny_move)
(make_avr_pass_fuse_add): New protos.
* config/avr/avr.md [AVR_TINY]: New post-reload splitter uses
avr_split_tiny_move to split indirect memory accesses.
(gen_move_clobbercc): New define_expand helper.
* config/avr/avr.cc (avr_pass_data_fuse_add): New pass data.
(avr_pass_fuse_add): New class from rtl_opt_pass.
(make_avr_pass_fuse_add, avr_split_tiny_move): New functions.
(reg_seen_between_p, emit_move_ccc, emit_move_ccc_after): New functions.
(avr_legitimate_address_p) [AVR_TINY]: Don't restrict offsets
of PLUS addressing for AVR_TINY.
(avr_regno_mode_code_ok_for_base_p) [AVR_TINY]: Ignore -mstrict-X.
(avr_out_plus_1) [AVR_TINY]: Tweak ++Y and --Y.
(avr_mode_code_base_reg_class) [AVR_TINY]: Always return POINTER_REGS.

 gcc/common/config/avr/avr-common.cc |   2 +
 gcc/config/avr/avr-passes.def   |   9 ++
 gcc/config/avr/avr-protos.h |   2 +
 gcc/config/avr/avr.cc   | 787 
-

 gcc/config/avr/avr.md   |  29 +
 gcc/config/avr/avr.opt  |   8 ++
 gcc/doc/invoke.texi |  10 +-
 7 files changed, 845 insertions(+), 2 deletions(-)diff --git a/gcc/common/config/avr/avr-common.cc b/gcc/common/config/avr/avr-common.cc
index 7867483909d..fdf130f1e1a 100644
--- a/gcc/common/config/avr/avr-common.cc
+++ b/gcc/common/config/avr/avr-common.cc
@@ -34,6 +34,8 @@ static const struct default_options avr_option_optimization_table[] =
 { OPT_LEVELS_ALL, OPT_fcaller_saves, NULL, 0 },
 { OPT_LEVELS_1_PLUS_NOT_DEBUG, OPT_mgas_isr_prologues, NULL, 1 },
 { OPT_LEVELS_1_PLUS, OPT_mmain_is_OS_task, NULL, 1 },
+{ OPT_LEVELS_1_PLUS, OPT_mfuse_add_, NULL, 1 },
+{ OPT_LEVELS_2_PLUS, OPT_mfuse_add_, NULL, 2 },
 // Stick to the "old" placement of the subreg lowering pass.
 { OPT_LEVELS_1_PLUS, OPT_fsplit_wide_types_early, NULL, 1 },
 /* Allow optimizer to introduce store data races. This used to be the
diff --git a/gcc/config/avr/avr-passes.def b/gcc/config/avr/avr-passes.def
index 34e5b95f920..748260edaef 100644
--- a/gcc/config/avr/avr-passes.def
+++ b/gcc/config/avr/avr-passes.def
@@ -17,6 +17,15 @@
along with GCC;

[avr,patch,applied] Remove some dead code

2024-02-26 Thread Georg-Johann Lay

This code was dead in the block where it lived,
because avr_adiw_reg_p() is only true when ADIW and SBIW
are available -- which is not the case for AVR_TINY.

Johann

--

AVR: Dead code removal.

gcc/
* config/avr/avr.cc (avr_out_compare) [AVR_TINY]: Remove code in
an "if avr_adiw_reg_p()" block that's dead for AVR_TINY.

diff --git a/gcc/config/avr/avr.cc b/gcc/config/avr/avr.cc
index d3756a2f036..655a8e89fdc 100644
--- a/gcc/config/avr/avr.cc
+++ b/gcc/config/avr/avr.cc
@@ -6291,10 +6291,7 @@ avr_out_compare (rtx_insn *insn, rtx *xop, int *plen)
  && (val8 == 0
  || reg_unused_after (insn, xreg)))
{
- if (AVR_TINY)
-   avr_asm_len (TINY_SBIW (%A0, %B0, %1), xop, plen, 2);
- else
-   avr_asm_len ("sbiw %0,%1", xop, plen, 1);
+ avr_asm_len ("sbiw %0,%1", xop, plen, 1);

  i++;
  continue;
@@ -6305,9 +6302,7 @@ avr_out_compare (rtx_insn *insn, rtx *xop, int *plen)
  && compare_eq_p (insn)
  && reg_unused_after (insn, xreg))
{
- return AVR_TINY
-   ? avr_asm_len (TINY_ADIW (%A0, %B0, %n1), xop, plen, 2)
-   : avr_asm_len ("adiw %0,%n1", xop, plen, 1);
+ return avr_asm_len ("adiw %0,%n1", xop, plen, 1);
}
}



  1   2   3   4   5   6   7   8   9   10   >