On Mon, Apr 21, 2025 at 6:34 PM Jan Hubicka <hubi...@ucw.cz> wrote:
...
> We originally put CLEAR_RATIO < MOVE_RATIO based on observation that
>   mov $0, mem
> is longer in encoding than
>   mov mem, mem
> and there was a plan to implement optimization to avoid long immediates
> in moves, but it did not materialize (yet).  With SSE this problem
> disappears since SSE stores does not have immediates anyway.

Here is a patch to implement it with UNSPEC_STORE_BY_PIECES.
How does it look?

-- 
H.J.
From c021053a4fea121a3c4a593b2907701c42a626bc Mon Sep 17 00:00:00 2001
From: "H.J. Lu" <hjl.to...@gmail.com>
Date: Mon, 21 Apr 2025 21:12:35 +0800
Subject: [PATCH] RFC: Add TARGET_STORE_BY_PIECES_ICODE

Add a target hook to control the instruction to move the memory used by
the store by_pieces infrastructure so that a target can choose a specific
instruction for shorter encoding.

Signed-off-by: H.J. Lu <hjl.to...@gmail.com>
---
 gcc/config/i386/i386.cc      | 27 +++++++++++++++++++++++++++
 gcc/config/i386/i386.md      | 24 ++++++++++++++++++++++++
 gcc/config/i386/x86-tune.def |  6 ++++++
 gcc/doc/tm.texi              |  5 +++++
 gcc/doc/tm.texi.in           |  2 ++
 gcc/expr.cc                  |  2 +-
 gcc/target.def               |  7 +++++++
 gcc/targhooks.cc             |  9 +++++++++
 gcc/targhooks.h              |  1 +
 9 files changed, 82 insertions(+), 1 deletion(-)

diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc
index 28603c2943e..8d289ad1a53 100644
--- a/gcc/config/i386/i386.cc
+++ b/gcc/config/i386/i386.cc
@@ -26549,6 +26549,31 @@ ix86_redzone_clobber ()
   return NULL_RTX;
 }
 
+/* Implement TARGET_STORE_BY_PIECES_ICODE.  */
+
+static insn_code
+ix86_store_by_pieces_icode (machine_mode mode)
+{
+  if (STORE_MAX_PIECES == UNITS_PER_WORD
+      && ix86_tune_features [X86_TUNE_USE_REGISTER_STORE_BY_PIECES])
+    switch (mode)
+      {
+      case SImode:
+	/* Allow 32-bit immediate in 64-bit mode since it will be
+	   used at most twice.  */
+	if (TARGET_64BIT)
+	  break;
+	return CODE_FOR_store_by_pieces_movsi;
+      case DImode:
+	if (TARGET_64BIT)
+	  return CODE_FOR_store_by_pieces_movdi;
+      default:
+	break;
+      }
+
+  return default_store_by_pieces_icode (mode);
+}
+
 /* Target-specific selftests.  */
 
 #if CHECKING_P
@@ -26994,6 +27019,8 @@ static const scoped_attribute_specs *const ix86_attribute_table[] =
 
 #undef TARGET_OVERLAP_OP_BY_PIECES_P
 #define TARGET_OVERLAP_OP_BY_PIECES_P hook_bool_void_true
+#undef TARGET_STORE_BY_PIECES_ICODE
+#define TARGET_STORE_BY_PIECES_ICODE ix86_store_by_pieces_icode
 
 #undef TARGET_FLAGS_REGNUM
 #define TARGET_FLAGS_REGNUM FLAGS_REG
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index d6b2f2959b2..77761ab5fc3 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -118,6 +118,7 @@ (define_c_enum "unspec" [
   UNSPEC_POPFL
   UNSPEC_OPTCOMX
   UNSPEC_SETCC_SI_SLP
+  UNSPEC_STORE_BY_PIECES
 
   ;; For SSE/MMX support:
   UNSPEC_FIX_NOTRUNC
@@ -2417,6 +2418,29 @@ (define_expand "mov<mode>"
   ""
   "ix86_expand_move (<MODE>mode, operands); DONE;")
 
+;; SI/DI mode register stores used by store by_pieces for shorter
+;; encoding.
+(define_expand "store_by_pieces_mov<mode>"
+  [(set (match_operand:SWI48x 0 "memory_operand")
+        (match_operand:SWI48x 1 "general_operand"))]
+  ""
+{
+  operands[1] = force_reg (<MODE>mode, operands[1]);
+  emit_insn (gen_store_by_pieces_mov<mode>_1 (operands[0],
+                                              operands[1]));
+  DONE;
+})
+
+(define_insn "store_by_pieces_mov<mode>_1"
+  [(set (match_operand:SWI48x 0 "memory_operand" "=m")
+        (unspec:SWI48x
+         [(match_operand:SWI48x 1 "register_operand" "r")]
+         UNSPEC_STORE_BY_PIECES))]
+  ""
+  "mov\t{%1, %0|%0, %1}"
+  [(set_attr "type" "imov")
+   (set_attr "mode" "<MODE>")])
+
 (define_insn "*mov<mode>_xor"
   [(set (match_operand:SWI48 0 "register_operand" "=r")
 	(match_operand:SWI48 1 "const0_operand"))
diff --git a/gcc/config/i386/x86-tune.def b/gcc/config/i386/x86-tune.def
index c3635c71d06..00a1638ad61 100644
--- a/gcc/config/i386/x86-tune.def
+++ b/gcc/config/i386/x86-tune.def
@@ -636,6 +636,12 @@ DEF_TUNE (X86_TUNE_AVX512_STORE_BY_PIECES, "avx512_store_by_pieces",
 DEF_TUNE (X86_TUNE_AVX512_TWO_EPILOGUES, "avx512_two_epilogues",
 	  m_ZNVER4 | m_ZNVER5)
 
+/* X86_TUNE_USE_REGISTER_STORE_BY_PIECES: Generate store_by_pieces with
+   register store.  */
+DEF_TUNE (X86_TUNE_USE_REGISTER_STORE_BY_PIECES,
+	  "use_register_store_by_pieces",
+	  0)
+
 /*****************************************************************************/
 /*****************************************************************************/
 /* Historical relics: tuning flags that helps a specific old CPU designs     */
diff --git a/gcc/doc/tm.texi b/gcc/doc/tm.texi
index a96700c0d38..9753ebcf9c2 100644
--- a/gcc/doc/tm.texi
+++ b/gcc/doc/tm.texi
@@ -7186,6 +7186,11 @@ particular mode from being used for block comparisons by returning a
 negative number from this hook.
 @end deftypefn
 
+@deftypefn {Target Hook} insn_code TARGET_STORE_BY_PIECES_ICODE (machine_mode @var{mode})
+This target hook returns insn_code to move the @var{mode} memory used
+by the store @code{by_pieces} infrastructure.
+@end deftypefn
+
 @defmac MOVE_MAX_PIECES
 A C expression used by @code{move_by_pieces} to determine the largest unit
 a load or store used to copy memory is.  Defaults to @code{MOVE_MAX}.
diff --git a/gcc/doc/tm.texi.in b/gcc/doc/tm.texi.in
index eccc4d88493..e8cd831ad32 100644
--- a/gcc/doc/tm.texi.in
+++ b/gcc/doc/tm.texi.in
@@ -4639,6 +4639,8 @@ If you don't define this, a reasonable default is used.
 
 @hook TARGET_COMPARE_BY_PIECES_BRANCH_RATIO
 
+@hook TARGET_STORE_BY_PIECES_ICODE
+
 @defmac MOVE_MAX_PIECES
 A C expression used by @code{move_by_pieces} to determine the largest unit
 a load or store used to copy memory is.  Defaults to @code{MOVE_MAX}.
diff --git a/gcc/expr.cc b/gcc/expr.cc
index 3815c565e2d..caed7c4dcf7 100644
--- a/gcc/expr.cc
+++ b/gcc/expr.cc
@@ -1714,7 +1714,7 @@ class store_by_pieces_d : public op_by_pieces_d
 bool
 store_by_pieces_d::prepare_mode (machine_mode mode, unsigned int align)
 {
-  insn_code icode = optab_handler (mov_optab, mode);
+  insn_code icode = targetm.store_by_pieces_icode (mode);
   m_gen_fun = GEN_FCN (icode);
   return icode != CODE_FOR_nothing && align >= GET_MODE_ALIGNMENT (mode);
 }
diff --git a/gcc/target.def b/gcc/target.def
index 6c7cdc8126b..63157dbcde2 100644
--- a/gcc/target.def
+++ b/gcc/target.def
@@ -3909,6 +3909,13 @@ negative number from this hook.",
  int, (machine_mode mode),
  default_compare_by_pieces_branch_ratio)
 
+DEFHOOK
+(store_by_pieces_icode,
+ "This target hook returns insn_code to move the @var{mode} memory used\n\
+by the store @code{by_pieces} infrastructure.",
+ insn_code, (machine_mode mode),
+ default_store_by_pieces_icode)
+
 DEFHOOK
 (slow_unaligned_access,
  "This hook returns true if memory accesses described by the\n\
diff --git a/gcc/targhooks.cc b/gcc/targhooks.cc
index c79458e374e..0233f331f53 100644
--- a/gcc/targhooks.cc
+++ b/gcc/targhooks.cc
@@ -2196,6 +2196,15 @@ default_compare_by_pieces_branch_ratio (machine_mode)
   return 1;
 }
 
+/* This target hook returns insn_code to move the MODE memory used by the
+   store by_pieces infrastructure.  */
+
+insn_code
+default_store_by_pieces_icode (machine_mode mode)
+{
+  return optab_handler (mov_optab, mode);
+}
+
 /* Write PATCH_AREA_SIZE NOPs into the asm outfile FILE around a function
    entry.  If RECORD_P is true and the target supports named sections,
    the location of the NOPs will be recorded in a special object section
diff --git a/gcc/targhooks.h b/gcc/targhooks.h
index f16b58798c2..fb38fc9aec4 100644
--- a/gcc/targhooks.h
+++ b/gcc/targhooks.h
@@ -249,6 +249,7 @@ extern bool default_use_by_pieces_infrastructure_p (unsigned HOST_WIDE_INT,
 						    enum by_pieces_operation,
 						    bool);
 extern int default_compare_by_pieces_branch_ratio (machine_mode);
+extern insn_code default_store_by_pieces_icode (machine_mode);
 
 extern void default_print_patchable_function_entry (FILE *,
 						    unsigned HOST_WIDE_INT,
-- 
2.49.0

Reply via email to