Hi Christophe,
On 10/18/19 2:18 PM, Christophe Lyon wrote:
Hi,
This patch extends support for -mpure-code to all thumb-1 processors,
by removing the need for MOVT.
Symbol addresses are built using upper8_15, upper0_7, lower8_15 and
lower0_7 relocations, and constants are built using sequences of
movs/adds and lsls instructions.
The extension of the *thumb1_movhf pattern uses always the same size
(6) although it can emit a shorter sequence when possible. This is
similar to what *arm32_movhf already does.
CASE_VECTOR_PC_RELATIVE is now false with -mpure-code, to avoid
generating invalid assembly code with differences from symbols from
two different sections (the difference cannot be computed by the
assembler).
Tests pr45701-[12].c needed a small adjustment to avoid matching
upper8_15 when looking for the r8 register.
Test no-literal-pool.c is augmented with __fp16, so it now uses
-mfp16-format=ieee.
Test thumb1-Os-mult.c generates an inline code sequence with
-mpure-code and computes the multiplication by using a sequence of
add/shift rather than using the multiply instruction, so we skip it in
presence of -mpure-code.
With -mcpu=cortex-m0, the pure-code/no-literal-pool.c fails because
code like:
static char *p = "Hello World";
char *
testchar ()
{
return p + 4;
}
generates 2 indirections (I removed non-essential directives/code)
.section .rodata
.LC0:
.ascii "Hello World\000"
.data
p:
.word .LC0
.section .rodata
.LC2:
.word p
.section .text,"0x20000006",%progbits
testchar:
push {r7, lr}
add r7, sp, #0
movs r3, #:upper8_15:#.LC2
lsls r3, #8
adds r3, #:upper0_7:#.LC2
lsls r3, #8
adds r3, #:lower8_15:#.LC2
lsls r3, #8
adds r3, #:lower0_7:#.LC2
ldr r3, [r3]
ldr r3, [r3]
adds r3, r3, #4
movs r0, r3
mov sp, r7
@ sp needed
pop {r7, pc}
By contrast, when using -mcpu=cortex-m4, the code looks like:
.section .rodata
.LC0:
.ascii "Hello World\000"
.data
p:
.word .LC0
testchar:
push {r7}
add r7, sp, #0
movw r3, #:lower16:p
movt r3, #:upper16:p
ldr r3, [r3]
adds r3, r3, #4
mov r0, r3
mov sp, r7
pop {r7}
bx lr
I haven't found yet how to make code for cortex-m0 apply upper/lower
relocations to "p" instead of .LC2. The current code looks functional,
but could be improved.
OK as-is?
Thanks,
Christophe
diff --git a/gcc/config/arm/arm-protos.h b/gcc/config/arm/arm-protos.h
index f995974..beb8411 100644
--- a/gcc/config/arm/arm-protos.h
+++ b/gcc/config/arm/arm-protos.h
@@ -66,6 +66,7 @@ extern bool arm_small_register_classes_for_mode_p
(machine_mode);
extern int const_ok_for_arm (HOST_WIDE_INT);
extern int const_ok_for_op (HOST_WIDE_INT, enum rtx_code);
extern int const_ok_for_dimode_op (HOST_WIDE_INT, enum rtx_code);
+extern void thumb1_gen_const_int (rtx, HOST_WIDE_INT);
extern int arm_split_constant (RTX_CODE, machine_mode, rtx,
HOST_WIDE_INT, rtx, rtx, int);
extern int legitimate_pic_operand_p (rtx);
diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c
index 9f0975d..836f147 100644
--- a/gcc/config/arm/arm.c
+++ b/gcc/config/arm/arm.c
@@ -2882,13 +2882,19 @@ arm_option_check_internal (struct gcc_options *opts)
{
const char *flag = (target_pure_code ? "-mpure-code" :
"-mslow-flash-data");
+ bool not_supported = arm_arch_notm || flag_pic || TARGET_NEON;
- /* We only support -mpure-code and -mslow-flash-data on M-profile targets
- with MOVT. */
- if (!TARGET_HAVE_MOVT || arm_arch_notm || flag_pic || TARGET_NEON)
+ /* We only support -mslow-flash-data on M-profile targets with
+ MOVT. */
+ if (target_slow_flash_data && (!TARGET_HAVE_MOVT || not_supported))
error ("%s only supports non-pic code on M-profile targets with the "
"MOVT instruction", flag);
+ /* We only support -mpure-code-flash-data on M-profile
+ targets. */
Typo in the option name.
+ if (target_pure_code && not_supported)
+ error ("%s only supports non-pic code on M-profile targets", flag);
+
/* Cannot load addresses: -mslow-flash-data forbids literal pool and
-mword-relocations forbids relocation of MOVT/MOVW. */
if (target_word_relocations)
@@ -4400,6 +4406,38 @@ const_ok_for_dimode_op (HOST_WIDE_INT i, enum rtx_code
code)
}
}
+/* Emit a sequence of movs/adds/shift to produce a 32-bit constant.
+ Avoid generating useless code when one of the bytes is zero. */
+void
+thumb1_gen_const_int (rtx op0, HOST_WIDE_INT op1)
+{
+ bool mov_done_p = false;
+ int i;
+
+ /* Emit upper 3 bytes if needed. */
+ for (i = 0; i < 3; i++)
+ {
+ int byte = (op1 >> (8 * (3 - i))) & 0xff;
+
+ if (byte)
+ {
+ emit_set_insn (op0, mov_done_p
+ ? gen_rtx_PLUS (SImode,op0, GEN_INT (byte))
+ : GEN_INT (byte));
+ mov_done_p = true;
+ }
+
+ if (mov_done_p)
+ emit_set_insn (op0, gen_rtx_ASHIFT (SImode, op0, GEN_INT (8)));
+ }
+
+ /* Emit lower byte if needed. */
+ if (!mov_done_p)
+ emit_set_insn (op0, GEN_INT (op1 & 0xff));
+ else if (op1 & 0xff)
+ emit_set_insn (op0, gen_rtx_PLUS (SImode, op0, GEN_INT (op1 & 0xff)));
+}
AFAIK we already have functions like arm_gen_constant that are supposed to
generate optimal immediate sequences.
Do they not fit the usecase here?
Looks reasonable to me otherwise.
Thanks,
Kyrill
+
/* Emit a sequence of insns to handle a large constant.
CODE is the code of the operation required, it can be any of SET, PLUS,
IOR, AND, XOR, MINUS;
@@ -8530,7 +8568,8 @@ thumb1_legitimate_address_p (machine_mode mode, rtx x,
int strict_p)
/* This is PC relative data before arm_reorg runs. */
else if (GET_MODE_SIZE (mode) >= 4 && CONSTANT_P (x)
&& GET_CODE (x) == SYMBOL_REF
- && CONSTANT_POOL_ADDRESS_P (x) && !flag_pic)
+ && CONSTANT_POOL_ADDRESS_P (x) && !flag_pic
+ && !arm_disable_literal_pool)
return 1;
/* This is PC relative data after arm_reorg runs. */
@@ -8598,6 +8637,7 @@ thumb1_legitimate_address_p (machine_mode mode, rtx x,
int strict_p)
&& GET_MODE_SIZE (mode) == 4
&& GET_CODE (x) == SYMBOL_REF
&& CONSTANT_POOL_ADDRESS_P (x)
+ && !arm_disable_literal_pool
&& ! (flag_pic
&& symbol_mentioned_p (get_pool_constant (x))
&& ! pcrel_constant_p (get_pool_constant (x))))
@@ -9278,7 +9318,9 @@ thumb1_rtx_costs (rtx x, enum rtx_code code, enum
rtx_code outer)
return 0;
if (thumb_shiftable_const (INTVAL (x)))
return COSTS_N_INSNS (2);
- return COSTS_N_INSNS (3);
+ return arm_disable_literal_pool
+ ? COSTS_N_INSNS (8)
+ : COSTS_N_INSNS (3);
}
else if ((outer == PLUS || outer == COMPARE)
&& INTVAL (x) < 256 && INTVAL (x) > -256)
@@ -9435,7 +9477,9 @@ thumb1_size_rtx_costs (rtx x, enum rtx_code code, enum
rtx_code outer)
/* See split "TARGET_THUMB1 && satisfies_constraint_K". */
if (thumb_shiftable_const (INTVAL (x)))
return COSTS_N_INSNS (2);
- return COSTS_N_INSNS (3);
+ return arm_disable_literal_pool
+ ? COSTS_N_INSNS (8)
+ : COSTS_N_INSNS (3);
}
else if ((outer == PLUS || outer == COMPARE)
&& INTVAL (x) < 256 && INTVAL (x) > -256)
@@ -27073,14 +27117,41 @@ arm_thumb1_mi_thunk (FILE *file, tree, HOST_WIDE_INT
delta,
/* push r3 so we can use it as a temporary. */
/* TODO: Omit this save if r3 is not used. */
fputs ("\tpush {r3}\n", file);
- fputs ("\tldr\tr3, ", file);
+
+ /* With -mpure-code, we cannot load the address from the
+ constant pool: we build it explicitly. */
+ if (target_pure_code)
+ {
+ fputs ("\tmovs\tr3, #:upper8_15:#", file);
+ assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
+ fputc ('\n', file);
+ fputs ("\tlsls r3, #8\n", file);
+ fputs ("\tadds\tr3, #:upper0_7:#", file);
+ assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
+ fputc ('\n', file);
+ fputs ("\tlsls r3, #8\n", file);
+ fputs ("\tadds\tr3, #:lower8_15:#", file);
+ assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
+ fputc ('\n', file);
+ fputs ("\tlsls r3, #8\n", file);
+ fputs ("\tadds\tr3, #:lower0_7:#", file);
+ assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
+ fputc ('\n', file);
+ }
+ else
+ fputs ("\tldr\tr3, ", file);
}
else
{
fputs ("\tldr\tr12, ", file);
}
- assemble_name (file, label);
- fputc ('\n', file);
+
+ if (!target_pure_code)
+ {
+ assemble_name (file, label);
+ fputc ('\n', file);
+ }
+
if (flag_pic)
{
/* If we are generating PIC, the ldr instruction below loads
diff --git a/gcc/config/arm/arm.h b/gcc/config/arm/arm.h
index 8b67c9c..d842448 100644
--- a/gcc/config/arm/arm.h
+++ b/gcc/config/arm/arm.h
@@ -1850,9 +1850,11 @@ enum arm_auto_incmodes
for the index in the tablejump instruction. */
#define CASE_VECTOR_MODE Pmode
-#define CASE_VECTOR_PC_RELATIVE (TARGET_THUMB2 \
- || (TARGET_THUMB1 \
- && (optimize_size || flag_pic)))
+#define CASE_VECTOR_PC_RELATIVE ((TARGET_THUMB2
\
+ || (TARGET_THUMB1 \
+ && (optimize_size || flag_pic))) \
+ && (!target_pure_code))
+
#define CASE_VECTOR_SHORTEN_MODE(min, max, body) \
(TARGET_THUMB1 \
diff --git a/gcc/config/arm/thumb1.md b/gcc/config/arm/thumb1.md
index 5c70200..dd758eb 100644
--- a/gcc/config/arm/thumb1.md
+++ b/gcc/config/arm/thumb1.md
@@ -43,6 +43,41 @@
+(define_insn "thumb1_movsi_symbol_ref"
+ [(set (match_operand:SI 0 "register_operand" "=l")
+ (match_operand:SI 1 "general_operand" ""))
+ ]
+ "TARGET_THUMB1
+ && arm_disable_literal_pool
+ && GET_CODE (operands[1]) == SYMBOL_REF"
+ "*
+ output_asm_insn (\"movs\\t%0, #:upper8_15:%1\", operands);
+ output_asm_insn (\"lsls\\t%0, #8\", operands);
+ output_asm_insn (\"adds\\t%0, #:upper0_7:%1\", operands);
+ output_asm_insn (\"lsls\\t%0, #8\", operands);
+ output_asm_insn (\"adds\\t%0, #:lower8_15:%1\", operands);
+ output_asm_insn (\"lsls\\t%0, #8\", operands);
+ output_asm_insn (\"adds\\t%0, #:lower0_7:%1\", operands);
+ return \"\";
+ "
+ [(set_attr "length" "14")
+ (set_attr "conds" "clob")]
+)
+
+(define_split
+ [(set (match_operand:SI 0 "register_operand" "")
+ (match_operand:SI 1 "immediate_operand" ""))]
+ "TARGET_THUMB1
+ && arm_disable_literal_pool
+ && GET_CODE (operands[1]) == CONST_INT
+ && !satisfies_constraint_I (operands[1])"
+ [(clobber (const_int 0))]
+ "
+ thumb1_gen_const_int (operands[0], INTVAL (operands[1]));
+ DONE;
+ "
+)
+
(define_insn "*thumb1_adddi3"
[(set (match_operand:DI 0 "register_operand" "=l")
(plus:DI (match_operand:DI 1 "register_operand" "%0")
@@ -829,8 +864,8 @@
(set_attr "conds" "clob,nocond,nocond,nocond,nocond,clob")])
(define_insn "*thumb1_movhf"
- [(set (match_operand:HF 0 "nonimmediate_operand" "=l,l,m,*r,*h")
- (match_operand:HF 1 "general_operand" "l,mF,l,*h,*r"))]
+ [(set (match_operand:HF 0 "nonimmediate_operand" "=l,l,l,m,*r,*h")
+ (match_operand:HF 1 "general_operand" "l, m,F,l,*h,*r"))]
"TARGET_THUMB1
&& ( s_register_operand (operands[0], HFmode)
|| s_register_operand (operands[1], HFmode))"
@@ -855,14 +890,34 @@
}
return \"ldrh\\t%0, %1\";
}
- case 2: return \"strh\\t%1, %0\";
+ case 2:
+ {
+ int bits;
+ int high;
+ rtx ops[3];
+
+ bits = real_to_target (NULL, CONST_DOUBLE_REAL_VALUE (operands[1]),
+ HFmode);
+ ops[0] = operands[0];
+ high = (bits >> 8) & 0xff;
+ ops[1] = GEN_INT (high);
+ ops[2] = GEN_INT (bits & 0xff);
+ if (high != 0)
+ output_asm_insn (\"movs\\t%0, %1\;lsls\\t%0, #8\;adds\\t%0, %2\", ops);
+ else
+ output_asm_insn (\"movs\\t%0, %2\", ops);
+
+ return \"\";
+ }
+ case 3: return \"strh\\t%1, %0\";
default: return \"mov\\t%0, %1\";
}
"
- [(set_attr "length" "2")
- (set_attr "type" "mov_reg,load_4,store_4,mov_reg,mov_reg")
- (set_attr "pool_range" "*,1018,*,*,*")
- (set_attr "conds" "clob,nocond,nocond,nocond,nocond")])
+ [(set_attr "length" "2,2,6,2,2,2")
+ (set_attr "type" "mov_reg,load_4,mov_reg,store_4,mov_reg,mov_reg")
+ (set_attr "pool_range" "*,1018,*,*,*,*")
+ (set_attr "conds" "clob,nocond,nocond,nocond,nocond,nocond")])
+
;;; ??? This should have alternatives for constants.
(define_insn "*thumb1_movsf_insn"
[(set (match_operand:SF 0 "nonimmediate_operand" "=l,l,>,l, m,*r,*h")
diff --git a/gcc/testsuite/gcc.target/arm/pr45701-1.c
b/gcc/testsuite/gcc.target/arm/pr45701-1.c
index b26011b..15913d8 100644
--- a/gcc/testsuite/gcc.target/arm/pr45701-1.c
+++ b/gcc/testsuite/gcc.target/arm/pr45701-1.c
@@ -2,7 +2,7 @@
/* { dg-skip-if "" { ! { arm_thumb1_ok || arm_thumb2_ok } } } */
/* { dg-options "-mthumb -Os" } */
/* { dg-final { scan-assembler "push\t\{r3" { target { !
arm*-*-uclinuxfdpiceabi } } } } */
-/* { dg-final { scan-assembler-not "\[^\-\]r8" { target { !
arm*-*-uclinuxfdpiceabi } } } } */
+/* { dg-final { scan-assembler-not "\[^\-e\]r8" { target { !
arm*-*-uclinuxfdpiceabi } } } } */
extern int hist_verify;
extern int a1;
diff --git a/gcc/testsuite/gcc.target/arm/pr45701-2.c
b/gcc/testsuite/gcc.target/arm/pr45701-2.c
index 32eed4d..bb2d36e 100644
--- a/gcc/testsuite/gcc.target/arm/pr45701-2.c
+++ b/gcc/testsuite/gcc.target/arm/pr45701-2.c
@@ -2,7 +2,7 @@
/* { dg-skip-if "" { ! { arm_thumb1_ok || arm_thumb2_ok } } } */
/* { dg-options "-mthumb -Os" } */
/* { dg-final { scan-assembler "push\t\{r3" { target { !
arm*-*-uclinuxfdpiceabi } } } } */
-/* { dg-final { scan-assembler-not "\[^\-\]r8" { target { !
arm*-*-uclinuxfdpiceabi } } } } */
+/* { dg-final { scan-assembler-not "\[^\-e\]r8" { target { !
arm*-*-uclinuxfdpiceabi } } } } */
extern int hist_verify;
extern int a1;
diff --git a/gcc/testsuite/gcc.target/arm/pure-code/no-literal-pool.c
b/gcc/testsuite/gcc.target/arm/pure-code/no-literal-pool.c
index 4b893fd..3de1620 100644
--- a/gcc/testsuite/gcc.target/arm/pure-code/no-literal-pool.c
+++ b/gcc/testsuite/gcc.target/arm/pure-code/no-literal-pool.c
@@ -1,12 +1,24 @@
/* { dg-do compile } */
-/* { dg-options "-mpure-code" } */
+/* { dg-options "-mpure-code -mfp16-format=ieee" } */
/* { dg-skip-if "" { *-*-* } { "-g" "-fpic" "-fPIC" } { "" } } */
+__fp16 hf;
float sf;
double df;
long long l;
static char *p = "Hello World";
+__fp16
+testsfp16 (__fp16 *p)
+{
+ hf = 1.3;
+ *p += hf;
+ if (*p > 1.1234f)
+ return 2.1234f;
+ else
+ return 3.1234f;
+}
+
float
testsf (float *p)
{
diff --git a/gcc/testsuite/gcc.target/arm/pure-code/pure-code.exp
b/gcc/testsuite/gcc.target/arm/pure-code/pure-code.exp
index bf7e4ad..b05cfd6 100644
--- a/gcc/testsuite/gcc.target/arm/pure-code/pure-code.exp
+++ b/gcc/testsuite/gcc.target/arm/pure-code/pure-code.exp
@@ -25,11 +25,8 @@ if ![info exists DEFAULT_CFLAGS] then {
set DEFAULT_CFLAGS " -ansi -pedantic-errors"
}
-# The -mpure-code option is only available for M-profile targets that support
-# the MOVT instruction.
-if {([check_effective_target_arm_thumb2_ok]
- || [check_effective_target_arm_thumb1_movt_ok])
- && [check_effective_target_arm_cortex_m]} then {
+# The -mpure-code option is only available for M-profile targets.
+if {[check_effective_target_arm_cortex_m]} then {
# Initialize `dg'.
dg-init
@@ -56,4 +53,4 @@ set LTO_TORTURE_OPTIONS ${saved-lto_torture_options}
# All done.
dg-finish
-}
+#}
diff --git a/gcc/testsuite/gcc.target/arm/thumb1-Os-mult.c
b/gcc/testsuite/gcc.target/arm/thumb1-Os-mult.c
index b989c42..92772d4 100644
--- a/gcc/testsuite/gcc.target/arm/thumb1-Os-mult.c
+++ b/gcc/testsuite/gcc.target/arm/thumb1-Os-mult.c
@@ -1,6 +1,7 @@
/* { dg-do compile } */
/* { dg-require-effective-target arm_thumb1_ok } */
/* { dg-options "-Os" } */
+/* { dg-skip-if "-mpure-code generates an inline multiplication code sequence" { *-*-* }
{ "-mpure-code" } } */
/* { dg-skip-if "" { ! { arm_thumb1 } } } */
int