On 7/22/21 6:29 AM, Kito Cheng via Gcc-patches wrote:
Could you add a testcase? Otherwise LGTM.
Option: -O2 -mtune=thead-c906 -march=rv64gc -mabi=lp64
void foo(char *dst){
__builtin_memset(dst, 0, 15);
}
On Thu, Jul 22, 2021 at 8:53 PM Christoph Muellner via Gcc-patches
<gcc-patches@gcc.gnu.org> wrote:
This patch enables the overlap-by-pieces feature of the by-pieces
infrastructure for inlining builtins in case the target has set
riscv_slow_unaligned_access_p to false.
To demonstrate the effect for targets with fast unaligned access,
the following code sequences are generated for a 15-byte memset-zero.
Without overlap_op_by_pieces we get:
8e: 00053023 sd zero,0(a0)
92: 00052423 sw zero,8(a0)
96: 00051623 sh zero,12(a0)
9a: 00050723 sb zero,14(a0)
To generate even the non optimized code above with gcc 11 [1][2], what
do I need to do. Despite -mno-strict-align and trying -mtune={rocket,
sifive-7-series}, I only get the fully unrolled version
foo:
# memcpy-15.c:2: __builtin_memset(dst, 0, 15);
sb zero,0(a0) #, MEM <char[1:15]> [(void *)dst_2(D)]
sb zero,1(a0) #, MEM <char[1:15]> [(void *)dst_2(D)]
sb zero,2(a0) #, MEM <char[1:15]> [(void *)dst_2(D)]
sb zero,3(a0) #, MEM <char[1:15]> [(void *)dst_2(D)]
sb zero,4(a0) #, MEM <char[1:15]> [(void *)dst_2(D)]
sb zero,5(a0) #, MEM <char[1:15]> [(void *)dst_2(D)]
sb zero,6(a0) #, MEM <char[1:15]> [(void *)dst_2(D)]
sb zero,7(a0) #, MEM <char[1:15]> [(void *)dst_2(D)]
sb zero,8(a0) #, MEM <char[1:15]> [(void *)dst_2(D)]
sb zero,9(a0) #, MEM <char[1:15]> [(void *)dst_2(D)]
sb zero,10(a0) #, MEM <char[1:15]> [(void *)dst_2(D)]
sb zero,11(a0) #, MEM <char[1:15]> [(void *)dst_2(D)]
sb zero,12(a0) #, MEM <char[1:15]> [(void *)dst_2(D)]
sb zero,13(a0) #, MEM <char[1:15]> [(void *)dst_2(D)]
sb zero,14(a0) #, MEM <char[1:15]> [(void *)dst_2(D)]
ret
.size foo, .-foo
.ident "GCC: (GNU) 11.1.0"
[1] https://gcc.gnu.org/pipermail/gcc-patches/2021-October/581858.html
[2] https://github.com/kito-cheng/riscv-gcc/tree/riscv-gcc-11.1.0-zbabcs
Thx,
-Vineet
With overlap_op_by_pieces we get:
7e: 00053023 sd zero,0(a0)
82: 000533a3 sd zero,7(a0)
gcc/ChangeLog:
* config/riscv/riscv.c (riscv_overlap_op_by_pieces): New function.
(TARGET_OVERLAP_OP_BY_PIECES_P): Connect to
riscv_overlap_op_by_pieces.
Signed-off-by: Christoph Muellner <cmuell...@gcc.gnu.org>
---
gcc/config/riscv/riscv.c | 11 +++++++++++
1 file changed, 11 insertions(+)
diff --git a/gcc/config/riscv/riscv.c b/gcc/config/riscv/riscv.c
index 576960bb37c..98c76ba657a 100644
--- a/gcc/config/riscv/riscv.c
+++ b/gcc/config/riscv/riscv.c
@@ -5201,6 +5201,14 @@ riscv_slow_unaligned_access (machine_mode, unsigned int)
return riscv_slow_unaligned_access_p;
}
+/* Implement TARGET_OVERLAP_OP_BY_PIECES_P. */
+
+static bool
+riscv_overlap_op_by_pieces (void)
+{
+ return !riscv_slow_unaligned_access_p;
+}
+
/* Implement TARGET_CAN_CHANGE_MODE_CLASS. */
static bool
@@ -5525,6 +5533,9 @@ riscv_asan_shadow_offset (void)
#undef TARGET_SLOW_UNALIGNED_ACCESS
#define TARGET_SLOW_UNALIGNED_ACCESS riscv_slow_unaligned_access
+#undef TARGET_OVERLAP_OP_BY_PIECES_P
+#define TARGET_OVERLAP_OP_BY_PIECES_P riscv_overlap_op_by_pieces
+
#undef TARGET_SECONDARY_MEMORY_NEEDED
#define TARGET_SECONDARY_MEMORY_NEEDED riscv_secondary_memory_needed
--
2.31.1