Re: [PATCH 2/2] [ARM] Add support for -mpure-code in thumb-1 (v6m)

Kyrill Tkachov Tue, 12 Nov 2019 02:35:20 -0800

Hi Christophe,

On 10/18/19 2:18 PM, Christophe Lyon wrote:

Hi,


This patch extends support for -mpure-code to all thumb-1 processors,
by removing the need for MOVT.

Symbol addresses are built using upper8_15, upper0_7, lower8_15 and
lower0_7 relocations, and constants are built using sequences of
movs/adds and lsls instructions.

The extension of the *thumb1_movhf pattern uses always the same size
(6) although it can emit a shorter sequence when possible. This is
similar to what *arm32_movhf already does.

CASE_VECTOR_PC_RELATIVE is now false with -mpure-code, to avoid
generating invalid assembly code with differences from symbols from
two different sections (the difference cannot be computed by the
assembler).

Tests pr45701-[12].c needed a small adjustment to avoid matching
upper8_15 when looking for the r8 register.

Test no-literal-pool.c is augmented with __fp16, so it now uses
-mfp16-format=ieee.

Test thumb1-Os-mult.c generates an inline code sequence with
-mpure-code and computes the multiplication by using a sequence of
add/shift rather than using the multiply instruction, so we skip it in
presence of -mpure-code.

With -mcpu=cortex-m0, the pure-code/no-literal-pool.c fails because
code like:
static char *p = "Hello World";
char *
testchar ()
{
  return p + 4;
}
generates 2 indirections (I removed non-essential directives/code)
        .section        .rodata
.LC0:
.ascii  "Hello World\000"
.data
p:
.word   .LC0
.section        .rodata
.LC2:
.word   p
.section .text,"0x20000006",%progbits
testchar:
push    {r7, lr}
add     r7, sp, #0
movs    r3, #:upper8_15:#.LC2
lsls    r3, #8
adds    r3, #:upper0_7:#.LC2
lsls    r3, #8
adds    r3, #:lower8_15:#.LC2
lsls    r3, #8
adds    r3, #:lower0_7:#.LC2
ldr     r3, [r3]
ldr     r3, [r3]
adds    r3, r3, #4
movs    r0, r3
mov     sp, r7
@ sp needed
pop     {r7, pc}

By contrast, when using -mcpu=cortex-m4, the code looks like:
        .section        .rodata
.LC0:
.ascii  "Hello World\000"
.data
p:
.word   .LC0
testchar:
push    {r7}
add     r7, sp, #0
movw    r3, #:lower16:p
movt    r3, #:upper16:p
ldr     r3, [r3]
adds    r3, r3, #4
mov     r0, r3
mov     sp, r7
pop     {r7}
bx      lr

I haven't found yet how to make code for cortex-m0 apply upper/lower
relocations to "p" instead of .LC2. The current code looks functional,
but could be improved.

OK as-is?

Thanks,

Christophe



diff --git a/gcc/config/arm/arm-protos.h b/gcc/config/arm/arm-protos.h
index f995974..beb8411 100644
--- a/gcc/config/arm/arm-protos.h
+++ b/gcc/config/arm/arm-protos.h
@@ -66,6 +66,7 @@ extern bool arm_small_register_classes_for_mode_p 
(machine_mode);
 extern int const_ok_for_arm (HOST_WIDE_INT);
 extern int const_ok_for_op (HOST_WIDE_INT, enum rtx_code);
 extern int const_ok_for_dimode_op (HOST_WIDE_INT, enum rtx_code);
+extern void thumb1_gen_const_int (rtx, HOST_WIDE_INT);
 extern int arm_split_constant (RTX_CODE, machine_mode, rtx,
                               HOST_WIDE_INT, rtx, rtx, int);
 extern int legitimate_pic_operand_p (rtx);
diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c
index 9f0975d..836f147 100644
--- a/gcc/config/arm/arm.c
+++ b/gcc/config/arm/arm.c
@@ -2882,13 +2882,19 @@ arm_option_check_internal (struct gcc_options *opts)
     {
       const char *flag = (target_pure_code ? "-mpure-code" :
                                             "-mslow-flash-data");
+      bool not_supported = arm_arch_notm || flag_pic || TARGET_NEON;

- /* We only support -mpure-code and -mslow-flash-data on M-profile targets

-        with MOVT.  */
-      if (!TARGET_HAVE_MOVT || arm_arch_notm || flag_pic || TARGET_NEON)
+      /* We only support -mslow-flash-data on M-profile targets with
+        MOVT.  */
+      if (target_slow_flash_data && (!TARGET_HAVE_MOVT || not_supported))
        error ("%s only supports non-pic code on M-profile targets with the "
               "MOVT instruction", flag);

+ /* We only support -mpure-code-flash-data on M-profile

+        targets.  */


Typo in the option name.

+      if (target_pure_code && not_supported)
+       error ("%s only supports non-pic code on M-profile targets", flag);
+
       /* Cannot load addresses: -mslow-flash-data forbids literal pool and
         -mword-relocations forbids relocation of MOVT/MOVW.  */
       if (target_word_relocations)
@@ -4400,6 +4406,38 @@ const_ok_for_dimode_op (HOST_WIDE_INT i, enum rtx_code 
code)
     }
 }

+/* Emit a sequence of movs/adds/shift to produce a 32-bit constant.

+   Avoid generating useless code when one of the bytes is zero.  */
+void
+thumb1_gen_const_int (rtx op0, HOST_WIDE_INT op1)
+{
+  bool mov_done_p = false;
+  int i;
+
+  /* Emit upper 3 bytes if needed.  */
+  for (i = 0; i < 3; i++)
+    {
+      int byte = (op1 >> (8 * (3 - i))) & 0xff;
+
+      if (byte)
+       {
+         emit_set_insn (op0, mov_done_p
+                        ? gen_rtx_PLUS (SImode,op0, GEN_INT (byte))
+                        : GEN_INT (byte));
+         mov_done_p = true;
+       }
+
+      if (mov_done_p)
+       emit_set_insn (op0, gen_rtx_ASHIFT (SImode, op0, GEN_INT (8)));
+    }
+
+  /* Emit lower byte if needed.  */
+  if (!mov_done_p)
+    emit_set_insn (op0, GEN_INT (op1 & 0xff));
+  else if (op1 & 0xff)
+    emit_set_insn (op0, gen_rtx_PLUS (SImode, op0, GEN_INT (op1 & 0xff)));
+}

AFAIK we already have functions like arm_gen_constant that are supposed to 
generate optimal immediate sequences.
Do they not fit the usecase here?

Looks reasonable to me otherwise.
Thanks,
Kyrill


 +
 /* Emit a sequence of insns to handle a large constant.
    CODE is the code of the operation required, it can be any of SET, PLUS,
    IOR, AND, XOR, MINUS;
@@ -8530,7 +8568,8 @@ thumb1_legitimate_address_p (machine_mode mode, rtx x, 
int strict_p)
   /* This is PC relative data before arm_reorg runs.  */
   else if (GET_MODE_SIZE (mode) >= 4 && CONSTANT_P (x)
           && GET_CODE (x) == SYMBOL_REF
-           && CONSTANT_POOL_ADDRESS_P (x) && !flag_pic)
+          && CONSTANT_POOL_ADDRESS_P (x) && !flag_pic
+          && !arm_disable_literal_pool)
     return 1;

/* This is PC relative data after arm_reorg runs. */

@@ -8598,6 +8637,7 @@ thumb1_legitimate_address_p (machine_mode mode, rtx x, 
int strict_p)
           && GET_MODE_SIZE (mode) == 4
           && GET_CODE (x) == SYMBOL_REF
           && CONSTANT_POOL_ADDRESS_P (x)
+          && !arm_disable_literal_pool
           && ! (flag_pic
                 && symbol_mentioned_p (get_pool_constant (x))
                 && ! pcrel_constant_p (get_pool_constant (x))))
@@ -9278,7 +9318,9 @@ thumb1_rtx_costs (rtx x, enum rtx_code code, enum 
rtx_code outer)
            return 0;
          if (thumb_shiftable_const (INTVAL (x)))
            return COSTS_N_INSNS (2);
-         return COSTS_N_INSNS (3);
+         return arm_disable_literal_pool
+           ? COSTS_N_INSNS (8)
+           : COSTS_N_INSNS (3);
        }
       else if ((outer == PLUS || outer == COMPARE)
               && INTVAL (x) < 256 && INTVAL (x) > -256)
@@ -9435,7 +9477,9 @@ thumb1_size_rtx_costs (rtx x, enum rtx_code code, enum 
rtx_code outer)
          /* See split "TARGET_THUMB1 && satisfies_constraint_K".  */
           if (thumb_shiftable_const (INTVAL (x)))
             return COSTS_N_INSNS (2);
-          return COSTS_N_INSNS (3);
+         return arm_disable_literal_pool
+           ? COSTS_N_INSNS (8)
+           : COSTS_N_INSNS (3);
         }
       else if ((outer == PLUS || outer == COMPARE)
                && INTVAL (x) < 256 && INTVAL (x) > -256)
@@ -27073,14 +27117,41 @@ arm_thumb1_mi_thunk (FILE *file, tree, HOST_WIDE_INT 
delta,
          /* push r3 so we can use it as a temporary.  */
          /* TODO: Omit this save if r3 is not used.  */
          fputs ("\tpush {r3}\n", file);
-         fputs ("\tldr\tr3, ", file);
+
+         /* With -mpure-code, we cannot load the address from the
+            constant pool: we build it explicitly.  */
+         if (target_pure_code)
+           {
+             fputs ("\tmovs\tr3, #:upper8_15:#", file);
+             assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
+             fputc ('\n', file);
+             fputs ("\tlsls r3, #8\n", file);
+             fputs ("\tadds\tr3, #:upper0_7:#", file);
+             assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
+             fputc ('\n', file);
+             fputs ("\tlsls r3, #8\n", file);
+             fputs ("\tadds\tr3, #:lower8_15:#", file);
+             assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
+             fputc ('\n', file);
+             fputs ("\tlsls r3, #8\n", file);
+             fputs ("\tadds\tr3, #:lower0_7:#", file);
+             assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
+             fputc ('\n', file);
+           }
+         else
+           fputs ("\tldr\tr3, ", file);
        }
       else
        {
          fputs ("\tldr\tr12, ", file);
        }
-      assemble_name (file, label);
-      fputc ('\n', file);
+
+      if (!target_pure_code)
+       {
+         assemble_name (file, label);
+         fputc ('\n', file);
+       }
+
       if (flag_pic)
        {
          /* If we are generating PIC, the ldr instruction below loads
diff --git a/gcc/config/arm/arm.h b/gcc/config/arm/arm.h
index 8b67c9c..d842448 100644
--- a/gcc/config/arm/arm.h
+++ b/gcc/config/arm/arm.h
@@ -1850,9 +1850,11 @@ enum arm_auto_incmodes
    for the index in the tablejump instruction.  */
 #define CASE_VECTOR_MODE Pmode

-#define CASE_VECTOR_PC_RELATIVE (TARGET_THUMB2 \

-                                || (TARGET_THUMB1                      \
-                                    && (optimize_size || flag_pic)))
+#define CASE_VECTOR_PC_RELATIVE ((TARGET_THUMB2                                
\
+                                 || (TARGET_THUMB1                     \
+                                     && (optimize_size || flag_pic)))  \
+                                && (!target_pure_code))
+

#define CASE_VECTOR_SHORTEN_MODE(min, max, body) \

   (TARGET_THUMB1                                                       \
diff --git a/gcc/config/arm/thumb1.md b/gcc/config/arm/thumb1.md
index 5c70200..dd758eb 100644
--- a/gcc/config/arm/thumb1.md
+++ b/gcc/config/arm/thumb1.md
@@ -43,6 +43,41 @@

+(define_insn "thumb1_movsi_symbol_ref"

+  [(set (match_operand:SI 0 "register_operand" "=l")
+       (match_operand:SI 1 "general_operand" ""))
+   ]
+  "TARGET_THUMB1
+   && arm_disable_literal_pool
+   && GET_CODE (operands[1]) == SYMBOL_REF"
+  "*
+  output_asm_insn (\"movs\\t%0, #:upper8_15:%1\", operands);
+  output_asm_insn (\"lsls\\t%0, #8\", operands);
+  output_asm_insn (\"adds\\t%0, #:upper0_7:%1\", operands);
+  output_asm_insn (\"lsls\\t%0, #8\", operands);
+  output_asm_insn (\"adds\\t%0, #:lower8_15:%1\", operands);
+  output_asm_insn (\"lsls\\t%0, #8\", operands);
+  output_asm_insn (\"adds\\t%0, #:lower0_7:%1\", operands);
+  return \"\";
+  "
+  [(set_attr "length" "14")
+   (set_attr "conds" "clob")]
+)
+
+(define_split
+  [(set (match_operand:SI 0 "register_operand" "")
+       (match_operand:SI 1 "immediate_operand" ""))]
+  "TARGET_THUMB1
+   && arm_disable_literal_pool
+   && GET_CODE (operands[1]) == CONST_INT
+   && !satisfies_constraint_I (operands[1])"
+  [(clobber (const_int 0))]
+  "
+    thumb1_gen_const_int (operands[0], INTVAL (operands[1]));
+    DONE;
+  "
+)
+
 (define_insn "*thumb1_adddi3"
   [(set (match_operand:DI          0 "register_operand" "=l")
        (plus:DI (match_operand:DI 1 "register_operand" "%0")
@@ -829,8 +864,8 @@
    (set_attr "conds" "clob,nocond,nocond,nocond,nocond,clob")])

(define_insn "*thumb1_movhf"

-  [(set (match_operand:HF     0 "nonimmediate_operand" "=l,l,m,*r,*h")
-       (match_operand:HF     1 "general_operand"      "l,mF,l,*h,*r"))]
+  [(set (match_operand:HF     0 "nonimmediate_operand" "=l,l,l,m,*r,*h")
+       (match_operand:HF     1 "general_operand"      "l, m,F,l,*h,*r"))]
   "TARGET_THUMB1
    && (          s_register_operand (operands[0], HFmode)
        || s_register_operand (operands[1], HFmode))"
@@ -855,14 +890,34 @@
          }
        return \"ldrh\\t%0, %1\";
       }
-    case 2: return \"strh\\t%1, %0\";
+    case 2:
+    {
+      int bits;
+      int high;
+      rtx ops[3];
+
+      bits = real_to_target (NULL, CONST_DOUBLE_REAL_VALUE (operands[1]),
+                            HFmode);
+      ops[0] = operands[0];
+      high = (bits >> 8) & 0xff;
+      ops[1] = GEN_INT (high);
+      ops[2] = GEN_INT (bits & 0xff);
+      if (high != 0)
+       output_asm_insn (\"movs\\t%0, %1\;lsls\\t%0, #8\;adds\\t%0, %2\", ops);
+      else
+       output_asm_insn (\"movs\\t%0, %2\", ops);
+
+      return \"\";
+    }
+    case 3: return \"strh\\t%1, %0\";
     default: return \"mov\\t%0, %1\";
     }
   "
-  [(set_attr "length" "2")
-   (set_attr "type" "mov_reg,load_4,store_4,mov_reg,mov_reg")
-   (set_attr "pool_range" "*,1018,*,*,*")
-   (set_attr "conds" "clob,nocond,nocond,nocond,nocond")])
+  [(set_attr "length" "2,2,6,2,2,2")
+   (set_attr "type" "mov_reg,load_4,mov_reg,store_4,mov_reg,mov_reg")
+   (set_attr "pool_range" "*,1018,*,*,*,*")
+   (set_attr "conds" "clob,nocond,nocond,nocond,nocond,nocond")])
+
 ;;; ??? This should have alternatives for constants.
 (define_insn "*thumb1_movsf_insn"
   [(set (match_operand:SF     0 "nonimmediate_operand" "=l,l,>,l, m,*r,*h")
diff --git a/gcc/testsuite/gcc.target/arm/pr45701-1.c 
b/gcc/testsuite/gcc.target/arm/pr45701-1.c
index b26011b..15913d8 100644
--- a/gcc/testsuite/gcc.target/arm/pr45701-1.c
+++ b/gcc/testsuite/gcc.target/arm/pr45701-1.c
@@ -2,7 +2,7 @@
 /* { dg-skip-if "" { ! { arm_thumb1_ok || arm_thumb2_ok } } } */
 /* { dg-options "-mthumb -Os" }  */
 /* { dg-final { scan-assembler "push\t\{r3" { target { ! 
arm*-*-uclinuxfdpiceabi } } } } */
-/* { dg-final { scan-assembler-not "\[^\-\]r8" { target { ! 
arm*-*-uclinuxfdpiceabi } } } } */
+/* { dg-final { scan-assembler-not "\[^\-e\]r8" { target { ! 
arm*-*-uclinuxfdpiceabi } } } } */

extern int hist_verify;

 extern int a1;
diff --git a/gcc/testsuite/gcc.target/arm/pr45701-2.c 
b/gcc/testsuite/gcc.target/arm/pr45701-2.c
index 32eed4d..bb2d36e 100644
--- a/gcc/testsuite/gcc.target/arm/pr45701-2.c
+++ b/gcc/testsuite/gcc.target/arm/pr45701-2.c
@@ -2,7 +2,7 @@
 /* { dg-skip-if "" { ! { arm_thumb1_ok || arm_thumb2_ok } } } */
 /* { dg-options "-mthumb -Os" }  */
 /* { dg-final { scan-assembler "push\t\{r3" { target { ! 
arm*-*-uclinuxfdpiceabi } } } } */
-/* { dg-final { scan-assembler-not "\[^\-\]r8" { target { ! 
arm*-*-uclinuxfdpiceabi } } } } */
+/* { dg-final { scan-assembler-not "\[^\-e\]r8" { target { ! 
arm*-*-uclinuxfdpiceabi } } } } */

extern int hist_verify;

 extern int a1;
diff --git a/gcc/testsuite/gcc.target/arm/pure-code/no-literal-pool.c 
b/gcc/testsuite/gcc.target/arm/pure-code/no-literal-pool.c
index 4b893fd..3de1620 100644
--- a/gcc/testsuite/gcc.target/arm/pure-code/no-literal-pool.c
+++ b/gcc/testsuite/gcc.target/arm/pure-code/no-literal-pool.c
@@ -1,12 +1,24 @@
 /* { dg-do compile } */
-/* { dg-options "-mpure-code" } */
+/* { dg-options "-mpure-code -mfp16-format=ieee" } */
 /* { dg-skip-if "" { *-*-* } { "-g" "-fpic" "-fPIC" } { "" } } */

+__fp16 hf;

 float sf;
 double df;
 long long l;
 static char *p = "Hello World";

+__fp16

+testsfp16 (__fp16 *p)
+{
+  hf = 1.3;
+  *p += hf;
+  if (*p > 1.1234f)
+    return 2.1234f;
+  else
+    return 3.1234f;
+}
+
 float
 testsf (float *p)
 {
diff --git a/gcc/testsuite/gcc.target/arm/pure-code/pure-code.exp 
b/gcc/testsuite/gcc.target/arm/pure-code/pure-code.exp
index bf7e4ad..b05cfd6 100644
--- a/gcc/testsuite/gcc.target/arm/pure-code/pure-code.exp
+++ b/gcc/testsuite/gcc.target/arm/pure-code/pure-code.exp
@@ -25,11 +25,8 @@ if ![info exists DEFAULT_CFLAGS] then {
     set DEFAULT_CFLAGS " -ansi -pedantic-errors"
 }

-# The -mpure-code option is only available for M-profile targets that support

-# the MOVT instruction.
-if {([check_effective_target_arm_thumb2_ok]
-     || [check_effective_target_arm_thumb1_movt_ok])
-    && [check_effective_target_arm_cortex_m]} then {
+# The -mpure-code option is only available for M-profile targets.
+if {[check_effective_target_arm_cortex_m]} then {
 # Initialize `dg'.
 dg-init

@@ -56,4 +53,4 @@ set LTO_TORTURE_OPTIONS ${saved-lto_torture_options}# All done.

 dg-finish
-}
+#}
diff --git a/gcc/testsuite/gcc.target/arm/thumb1-Os-mult.c 
b/gcc/testsuite/gcc.target/arm/thumb1-Os-mult.c
index b989c42..92772d4 100644
--- a/gcc/testsuite/gcc.target/arm/thumb1-Os-mult.c
+++ b/gcc/testsuite/gcc.target/arm/thumb1-Os-mult.c
@@ -1,6 +1,7 @@
 /* { dg-do compile } */
 /* { dg-require-effective-target arm_thumb1_ok } */
 /* { dg-options "-Os" } */
+/* { dg-skip-if "-mpure-code generates an inline multiplication code sequence" { *-*-* } 
{ "-mpure-code" } } */
 /* { dg-skip-if "" { ! { arm_thumb1 } } } */

int

Re: [PATCH 2/2] [ARM] Add support for -mpure-code in thumb-1 (v6m)

Reply via email to