Re: [PATCH v2] LoongArch: Add support for TLS descriptors

2024-02-28 Thread Jinyang He

On 2024-02-29 09:42, mengqinggang wrote:


Generate la.tls.desc macro instruction for TLS descriptors model.

la.tls.desc expand to
   pcalau12i $a0, %desc_pc_hi20(a)
   ld.d  $a1, $a0, %desc_ld_pc_lo12(a)
   addi.d$a0, $a0, %desc_add_pc_lo12(a)
   jirl  $ra, $a1, %desc_call(a)


Sorry for I might miss something before. Just some confusing.
In binutils `la.tls.desc` has been resolved as

#define INSN_LA_TLS_DESC64  \
  "pcalau12i $r4,%%desc_pc_hi20(%2);"   \
  "addi.d $r4,$r4,%%desc_pc_lo12(%2);"  \
  "ld.d $r1,$r4,%%desc_ld(%2);" \
  "jirl $r1,$r1,%%desc_call(%2);",  \

Should is need to be consistent with binutils?




The default is TLS descriptors, but can be configure with
-mtls-dialect={desc,trad}.

gcc/ChangeLog:

* config.gcc: Add --with_tls to change the TLS flavor.
* config/loongarch/genopts/loongarch.opt.in: Add -mtls-dialect to
configure TLS flavor.
* config/loongarch/loongarch-opts.h (enum loongarch_tls_type): New.
* config/loongarch/loongarch-protos.h (NUM_SYMBOL_TYPES): New.
* config/loongarch/loongarch.cc (loongarch_symbol_insns): Add
instruction sequence length data for TLS DESC.
(loongarch_legitimize_tls_address): New TLS DESC instruction sequence.
* config/loongarch/loongarch.h (TARGET_TLS_DESC): New.
* config/loongarch/loongarch.md (@got_load_tls_desc): New.
* config/loongarch/loongarch.opt: Regenerated.
---
Changes v1 -> v2:
- Clobber fcc0-fcc7 registers in got_load_tls_desc template.
- Support --with-tls in configure.

  gcc/config.gcc| 15 ++-
  gcc/config/loongarch/genopts/loongarch.opt.in | 14 ++
  gcc/config/loongarch/loongarch-opts.h |  6 +++
  gcc/config/loongarch/loongarch-protos.h   |  3 +-
  gcc/config/loongarch/loongarch.cc | 45 +++
  gcc/config/loongarch/loongarch.h  |  8 
  gcc/config/loongarch/loongarch.md | 36 +++
  gcc/config/loongarch/loongarch.opt| 14 ++
  8 files changed, 130 insertions(+), 11 deletions(-)

diff --git a/gcc/config.gcc b/gcc/config.gcc
index a0f9c672308..72a5e992821 100644
--- a/gcc/config.gcc
+++ b/gcc/config.gcc
@@ -2546,6 +2546,7 @@ loongarch*-*-linux*)
# Force .init_array support.  The configure script cannot always
# automatically detect that GAS supports it, yet we require it.
gcc_cv_initfini_array=yes
+   with_tls=${with_tls:-desc}
;;
  
  loongarch*-*-elf*)

@@ -4987,7 +4988,7 @@ case "${target}" in
;;
  
  	loongarch*-*)

-   supported_defaults="abi arch tune fpu simd multilib-default 
strict-align-lib"
+   supported_defaults="abi arch tune fpu simd multilib-default 
strict-align-lib tls"
  
  		# Local variables

unset \
@@ -5245,6 +5246,18 @@ case "${target}" in
with_multilib_list="${abi_base}/${abi_ext}"
fi
  
+		# Handle --with-tls.

+   case "$with_tls" in
+   "" \
+   | trad | desc)
+   # OK
+   ;;
+   *)
+   echo "Unknown TLS method used in --with-tls=$with_tls" 1>&2
+   exit 1
+   ;;
+   esac
+
# Check if the configured default ABI combination is included in
# ${with_multilib_list}.
loongarch_multilib_list_sane=no
diff --git a/gcc/config/loongarch/genopts/loongarch.opt.in 
b/gcc/config/loongarch/genopts/loongarch.opt.in
index 02f918053f5..2cc943ef683 100644
--- a/gcc/config/loongarch/genopts/loongarch.opt.in
+++ b/gcc/config/loongarch/genopts/loongarch.opt.in
@@ -262,3 +262,17 @@ default value is 4.
  ; CPUCFG independently, so we use bit flags to specify them.
  TargetVariable
  HOST_WIDE_INT la_isa_evolution = 0
+
+Enum
+Name(tls_type) Type(enum loongarch_tls_type)
+The possible TLS dialects:
+
+EnumValue
+Enum(tls_type) String(trad) Value(TLS_TRADITIONAL)
+
+EnumValue
+Enum(tls_type) String(desc) Value(TLS_DESCRIPTORS)
+
+mtls-dialect=
+Target RejectNegative Joined Enum(tls_type) Var(loongarch_tls_dialect) 
Init(TLS_DESCRIPTORS) Save
+Specify TLS dialect.
diff --git a/gcc/config/loongarch/loongarch-opts.h 
b/gcc/config/loongarch/loongarch-opts.h
index 586e67e65ee..a08ab6fac10 100644
--- a/gcc/config/loongarch/loongarch-opts.h
+++ b/gcc/config/loongarch/loongarch-opts.h
@@ -134,4 +134,10 @@ struct loongarch_flags {
  #define HAVE_AS_TLS_LE_RELAXATION 0
  #endif
  
+/* TLS types.  */

+enum loongarch_tls_type {
+  TLS_TRADITIONAL,
+  TLS_DESCRIPTORS
+};
+
  #endif /* LOONGARCH_OPTS_H */
diff --git a/gcc/config/loongarch/loongarch-protos.h 
b/gcc/config/loongarch/loongarch-protos.h
index 1fdfda9af01..6b417a3c371 100644
--- a/gcc/config/loongarch/loongarch-protos.h
+++ b/gcc/config/loongarch/loongarch-protos.h
@@ -53,8 +53,9 @@ enum loongarch_symbol_type {
SYMBOL

Re: [PATCH] LoongArch: Allow -mcmodel=extreme and model attribute with -mexplicit-relocs=auto

2023-12-07 Thread Jinyang He

On 2023-12-08 10:04, chenglulu wrote:



在 2023/12/7 下午8:20, Xi Ruoyao 写道:

There seems no real reason to require -mexplicit-relocs=always for
-mcmodel=extreme or model attribute.  As the linker does not know how to
relax a 3-operand la.local or la.global pseudo instruction, just emit
explicit relocs for SYMBOL_PCREL64, and under TARGET_CMODEL_EXTREME also
SYMBOL_GOT_DISP.


I think there is no problem, HeJinyang is testing and if there is no 
problem with the


test play, you can merge in.


Thanks!



Thanks! Now I can compile linux kernel with `-mexplicit-relocs=auto`,
and instructions is as expected. (kernel relax some pcalau12i+addi.d
pair to pcaddi and module still access percpu var by 64bits-la.pcrel)



[PATCH] LoongArch: Fix atomic_exchange make comparison and may jump out

2022-11-15 Thread Jinyang He
gcc/ChangeLog:

* config/loongarch/sync.md:
Add atomic_cas_value_exchange_and_7 and fix atomic_exchange.

gcc/testsuite/ChangeLog:

* gcc.target/loongarch/sync-1.c: New test.
---
 gcc/config/loongarch/sync.md|  27 -
 gcc/testsuite/gcc.target/loongarch/sync-1.c | 104 
 2 files changed, 129 insertions(+), 2 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/loongarch/sync-1.c

diff --git a/gcc/config/loongarch/sync.md b/gcc/config/loongarch/sync.md
index 0c4f1983e..8a8e6247b 100644
--- a/gcc/config/loongarch/sync.md
+++ b/gcc/config/loongarch/sync.md
@@ -448,6 +448,29 @@
 }
   [(set (attr "length") (const_int 32))])
 
+(define_insn "atomic_cas_value_exchange_and_7_"
+  [(set (match_operand:GPR 0 "register_operand" "=&r")
+   (match_operand:GPR 1 "memory_operand" "+ZC"))
+   (set (match_dup 1)
+   (unspec_volatile:GPR [(match_operand:GPR 2 "reg_or_0_operand" "rJ")
+ (match_operand:GPR 3 "reg_or_0_operand" "rJ")
+ (match_operand:GPR 4 "reg_or_0_operand" "rJ")
+ (match_operand:GPR 5 "reg_or_0_operand"  "rJ")
+ (match_operand:SI 6 "const_int_operand")] ;; model
+UNSPEC_SYNC_EXCHANGE))
+   (clobber (match_scratch:GPR 7 "=&r"))]
+  ""
+{
+  return "%G6\\n\\t"
+"1:\\n\\t"
+"ll.\\t%0,%1\\n\\t"
+"and\\t%7,%0,%z3\\n\\t"
+"or%i5\\t%7,%7,%5\\n\\t"
+"sc.\\t%7,%1\\n\\t"
+"beqz\\t%7,1b\\n\\t";
+}
+  [(set (attr "length") (const_int 20))])
+
 (define_expand "atomic_exchange"
   [(set (match_operand:SHORT 0 "register_operand")
(unspec_volatile:SHORT
@@ -459,9 +482,9 @@
   ""
 {
   union loongarch_gen_fn_ptrs generator;
-  generator.fn_7 = gen_atomic_cas_value_cmp_and_7_si;
+  generator.fn_7 = gen_atomic_cas_value_exchange_and_7_si;
   loongarch_expand_atomic_qihi (generator, operands[0], operands[1],
-   operands[1], operands[2], operands[3]);
+   const0_rtx, operands[2], operands[3]);
   DONE;
 })
 
diff --git a/gcc/testsuite/gcc.target/loongarch/sync-1.c 
b/gcc/testsuite/gcc.target/loongarch/sync-1.c
new file mode 100644
index 0..cebed6a9b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/loongarch/sync-1.c
@@ -0,0 +1,104 @@
+/* Test __sync_test_and_set in atomic_exchange */
+/* { dg-do run } */
+/* { dg-options "-lpthread -std=c11" } */
+
+#include 
+#include 
+#include 
+#include 
+
+#define NR_THREAD 16
+#define NR_DATA 1000
+#define ITER_COUNT 1
+
+static int _data[NR_DATA];
+static char _lock;
+static int _overcnt;
+
+static inline void proc_yield(int cnt)
+{
+  __asm__ __volatile__("":::"memory");
+}
+
+static void unlock()
+{
+  return atomic_store_explicit(&_lock, 0, memory_order_seq_cst);
+}
+
+static int trylock()
+{
+  return atomic_exchange_explicit(&_lock, 1, memory_order_acquire) == 0;
+}
+
+static void lockslow()
+{
+  for (int i = 0;; i++) {
+if (i < 10)
+  proc_yield(i);
+else
+  sched_yield();
+if (atomic_load_explicit(&_lock, memory_order_relaxed) == 0
+  && atomic_exchange_explicit(&_lock, 1, memory_order_acquire) == 0)
+  return;
+  }
+}
+
+static void lock()
+{
+  if (trylock())
+return;
+  lockslow();
+}
+
+static void checkeq(int a, int b)
+{
+  if (a != b)
+__builtin_abort();
+}
+
+static void adddata()
+{
+  int i, v;
+  lock();
+  v = _data[0];
+  for (i = 0; i < NR_DATA; i++) {
+checkeq(_data[i], v);
+_data[i]++;
+  }
+  unlock();
+}
+
+static void backoff()
+{
+  int i, data[NR_DATA] = {0};
+  for (i = 0; i < NR_DATA; i++) {
+data[i]++;
+checkeq(data[i], 1);
+  }
+}
+
+static void *write_mutex_thread(void *unused)
+{
+  int i;
+  for (i = 0; i < ITER_COUNT; i++) {
+adddata();
+backoff();
+  }
+  atomic_fetch_add(&_overcnt, 1);
+}
+
+int main()
+{
+  int cnt;
+
+  pthread_t threads[NR_THREAD];
+  for (int i = 0; i < NR_THREAD; i++)
+pthread_create(&threads[i], 0, write_mutex_thread, NULL);
+  for (int i = 0; i < NR_THREAD; i++)
+pthread_detach(threads[i]);
+  while(cnt != NR_THREAD) {
+sched_yield();
+cnt = atomic_load(&_overcnt);
+  }
+  return 0;
+}
-- 
2.34.3



Re: [PATCH] LoongArch: Fix atomic_exchange make comparison and may jump out

2022-11-15 Thread Jinyang He

On 2022/11/15 下午10:21, Xi Ruoyao wrote:


On Tue, 2022-11-15 at 21:03 +0800, Jinyang He wrote:

gcc/ChangeLog:

* config/loongarch/sync.md:
Add atomic_cas_value_exchange_and_7 and fix atomic_exchange.

nit:

* config/loongarch/sync.md (atomic_cas_value_exchange_and_7):
New define_insn.
(atomic_exchange): Use atomic_cas_value_exchange_and_7 instead
of atomic_cas_value_cmp_and.


gcc/testsuite/ChangeLog:

* gcc.target/loongarch/sync-1.c: New test.

Likewise, ChangeLog content should be indented with a tab. (Not 8
spaces: if my mail client changes my tab to 8 spaces I'm sorry).

/* snip */


OK. Thanks for the clear commit message and the explanation of format.



+  return "%G6\\n\\t"
+    "1:\\n\\t"
+    "ll.\\t%0,%1\\n\\t"
+    "and\\t%7,%0,%z3\\n\\t"
+    "or%i5\\t%7,%7,%5\\n\\t"
+    "sc.\\t%7,%1\\n\\t"
+    "beqz\\t%7,1b\\n\\t";

Do we need a "dbar 0x700" after beqz?

/* snip */


That's worth discussing. Actually I don't see any dbar hint definition
like 0x700 in the manual right now.
Besides, I think what should be provided here is a relaxed version. And
whether the barrier exsit or not is depend on the specific memory_order.

https://loongson.github.io/LoongArch-Documentation/LoongArch-Vol1-EN.html#_dbar





diff --git a/gcc/testsuite/gcc.target/loongarch/sync-1.c 
b/gcc/testsuite/gcc.target/loongarch/sync-1.c
new file mode 100644
index 0..cebed6a9b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/loongarch/sync-1.c
@@ -0,0 +1,104 @@
+/* Test __sync_test_and_set in atomic_exchange */
+/* { dg-do run } */
+/* { dg-options "-lpthread -std=c11" } */

This test seems not deterministic.  And the use of sched_yield is very
tricky, as the man page says:

sched_yield() is intended for use with  real-time  scheduling  policies
(i.e., SCHED_FIFO or SCHED_RR).  Use of sched_yield() with nondetermin‐
istic scheduling policies such as SCHED_OTHER is unspecified  and  very
likely means your application design is broken.


Yes, there might be something wrong. The test is just a variants from
llvm::tsan. It was presented to prove that the old implementation did
have problems.




I'd suggest to create a bug report at https://gcc.gnu.org/bugzilla


Thanks, I need to do that. It is must be I missing something at
https://gcc.gnu.org/contribute.html.



and
post this test in the PR.  Then add the PR number into the changelog,
and just add a { dg-do compile } and { dg-final { scan-assembler ... } }
test into the testsuite to ensure the correct ll/sc loop is generated.

A bug report also emphasises that this is a bug fix, which is suitable
for GCC 13 (in stage 3 now) and GCC 12 (the fix will be backported).


I will create a bug report where we all can discuss it.
Thanks for your review and help. :-)



Re: [PATCH] LoongArch: Fix atomic_exchange make comparison and may jump out

2022-11-16 Thread Jinyang He

On 2022/11/16 下午7:46, Xi Ruoyao wrote:


On Wed, 2022-11-16 at 10:11 +0800, Jinyang He wrote:


+  return "%G6\\n\\t"
+    "1:\\n\\t"
+    "ll.\\t%0,%1\\n\\t"
+    "and\\t%7,%0,%z3\\n\\t"
+    "or%i5\\t%7,%7,%5\\n\\t"
+    "sc.\\t%7,%1\\n\\t"
+    "beqz\\t%7,1b\\n\\t";

Do we need a "dbar 0x700" after beqz?

/* snip */

That's worth discussing. Actually I don't see any dbar hint definition
like 0x700 in the manual right now.
Besides, I think what should be provided here is a relaxed version. And
whether the barrier exsit or not is depend on the specific memory_order.

It's not related to memory order, but for a hardware issue workaround.
Jiaxun told me (via LKML):

I had checked with Loongson guys and they confirmed that the
workaround still needs to be applied to latest 3A4000 processors,
including 3A4000 for MIPS and 3A5000 for LoongArch.

Though, the reason behind the workaround varies with the evaluation

of their uArch, for GS464V based core, barrier is required as the
uArch design allows regular load to be reordered after an atomic
linked load, and that would break assumption of compiler atomic
constraints.


That certainly seems to be needed, but before or after. It's beyond my
recognition and cc huang...@loongson.cn for help.




Without these dbar instructions I'd got random test failures in GCC
libgomp test suite.

We use a non-zero hint here because it is treated exactly same as zero
in 3A5000, and the future LoongArch processors can fix the issue and
ignore the dbar 0x700 instruction.

Thanks, it's a nice workaround.



Re: [PATCH] LoongArch: Fix atomic_exchange make comparison and may jump out

2022-11-16 Thread Jinyang He

On 2022/11/17 上午9:39, Jinyang He wrote:


On 2022/11/16 下午7:46, Xi Ruoyao wrote:


On Wed, 2022-11-16 at 10:11 +0800, Jinyang He wrote:


+  return "%G6\\n\\t"
+    "1:\\n\\t"
+    "ll.\\t%0,%1\\n\\t"
+    "and\\t%7,%0,%z3\\n\\t"
+    "or%i5\\t%7,%7,%5\\n\\t"
+    "sc.\\t%7,%1\\n\\t"
+    "beqz\\t%7,1b\\n\\t";

Do we need a "dbar 0x700" after beqz?

/* snip */

That's worth discussing. Actually I don't see any dbar hint definition
like 0x700 in the manual right now.
Besides, I think what should be provided here is a relaxed version. And
whether the barrier exsit or not is depend on the specific 
memory_order.

It's not related to memory order, but for a hardware issue workaround.
Jiaxun told me (via LKML):

    I had checked with Loongson guys and they confirmed that the
    workaround still needs to be applied to latest 3A4000 processors,
    including 3A4000 for MIPS and 3A5000 for LoongArch.
        Though, the reason behind the workaround varies with the 
evaluation

    of their uArch, for GS464V based core, barrier is required as the
    uArch design allows regular load to be reordered after an atomic
    linked load, and that would break assumption of compiler atomic
    constraints.


That certainly seems to be needed, but before or after. It's beyond my
recognition and cc huang...@loongson.cn for help.



Pei told me the ll-sc works at present like follows,

uArch like:
  ll -> (ll.dbar ll.ld_atomic)
  sc -> (sc.dbar sc.st_atomic)

exchange:
ll.dbar
<---+
ll.ld_atomic $rd    |
...(no jmp) |
sc.dbar |
sc.st_stomic $rd    |
ld $rj -can-not-emit-at-+

The load $rj can not emit between ll.dbar and ll.ld_atomic because the 
sc.dbar barrier it.



compare and exchange:
ll.dbar
<---+
ll.ld_atomic $rd    |
...(jmp) ---+--+
sc.dbar |  |
sc.st_stomic $rd    |  |
    |   <--+
ld $rj -may-emit-at-+

Jumping out ll-sc may lead loading $rj emit between ll.dbar and ll.atomic.


Thus, exchange not need dbar.







Without these dbar instructions I'd got random test failures in GCC
libgomp test suite.


Which test suite?




We use a non-zero hint here because it is treated exactly same as zero
in 3A5000, and the future LoongArch processors can fix the issue and
ignore the dbar 0x700 instruction.

Thanks, it's a nice workaround.




Re: [PATCH] LoongArch: Fix atomic_exchange make comparison and may jump out

2022-11-16 Thread Jinyang He

On 2022/11/17 上午11:38, Xi Ruoyao wrote:


On Thu, 2022-11-17 at 10:55 +0800, Jinyang He wrote:

On 2022/11/17 上午9:39, Jinyang He wrote:


On 2022/11/16 下午7:46, Xi Ruoyao wrote:


On Wed, 2022-11-16 at 10:11 +0800, Jinyang He wrote:


+  return "%G6\\n\\t"
+    "1:\\n\\t"
+    "ll.\\t%0,%1\\n\\t"
+    "and\\t%7,%0,%z3\\n\\t"
+    "or%i5\\t%7,%7,%5\\n\\t"
+    "sc.\\t%7,%1\\n\\t"
+    "beqz\\t%7,1b\\n\\t";

Do we need a "dbar 0x700" after beqz?

/* snip */

That's worth discussing. Actually I don't see any dbar hint definition
like 0x700 in the manual right now.
Besides, I think what should be provided here is a relaxed version. And
whether the barrier exsit or not is depend on the specific
memory_order.

It's not related to memory order, but for a hardware issue workaround.
Jiaxun told me (via LKML):

     I had checked with Loongson guys and they confirmed that the
     workaround still needs to be applied to latest 3A4000 processors,
     including 3A4000 for MIPS and 3A5000 for LoongArch.
         Though, the reason behind the workaround varies with the
evaluation
     of their uArch, for GS464V based core, barrier is required as the
     uArch design allows regular load to be reordered after an atomic
     linked load, and that would break assumption of compiler atomic
     constraints.

That certainly seems to be needed, but before or after. It's beyond my
recognition and cc huang...@loongson.cn for help.


Pei told me the ll-sc works at present like follows,

uArch like:
    ll -> (ll.dbar ll.ld_atomic)
    sc -> (sc.dbar sc.st_atomic)

exchange:
ll.dbar
<---+
ll.ld_atomic $rd    |
...(no jmp) |
sc.dbar |
sc.st_stomic $rd    |
ld $rj -can-not-emit-at-+

The load $rj can not emit between ll.dbar and ll.ld_atomic because the
sc.dbar barrier it.


compare and exchange:
ll.dbar
<---+
ll.ld_atomic $rd    |
...(jmp) ---+--+
sc.dbar |  |
sc.st_stomic $rd    |  |
  |   <--+
ld $rj -may-emit-at-+

Jumping out ll-sc may lead loading $rj emit between ll.dbar and ll.atomic.


Thus, exchange not need dbar.





Without these dbar instructions I'd got random test failures in GCC
libgomp test suite.

Which test suite?

I mean when we didn't use dbar 0x700 for compare-and-exchange (during
the early development stage of GCC for LoongArch) I observed these
failures.

So we do need an additional dbar for compare-and-exchange, but do not
need it for a bare atomic exchange?

Yes.



[PATCH v2] LoongArch: Fix atomic_exchange expanding [PR107713]

2022-11-16 Thread Jinyang He
We used to expand atomic_exchange_n(ptr, new, mem_order) for subword types
into something like:

{
  __typeof__(*ptr) t = atomic_load_n(ptr, mem_order);
  atomic_compare_exchange_n(ptr, &t, new, true, mem_order, mem_order);
  return t;
}

It's incorrect because another thread may store a different value into *ptr
after atomic_load_n.  Then atomic_compare_exchange_n will not store into
*ptr, but atomic_exchange_n should always perform the store.

gcc/ChangeLog:

PR target/107713
* config/loongarch/sync.md
(atomic_cas_value_exchange_7_): New define_insn.
(atomic_exchange): Use atomic_cas_value_exchange_7_si instead of
atomic_cas_value_cmp_and_7_si.

gcc/testsuite/ChangeLog:

PR target/107713
* gcc.target/loongarch/pr107713-1.c: New test.
* gcc.target/loongarch/pr107713-2.c: New test.
---
 Thanks Ruoyao who helped me with community works, improved the submit
 message and gave a more straightforward test.
 Thanks Pei who gave me a further understanding of ll-sc offline.

 gcc/config/loongarch/sync.md  | 27 +-
 .../gcc.target/loongarch/pr107713-1.c | 50 +++
 .../gcc.target/loongarch/pr107713-2.c |  9 
 3 files changed, 84 insertions(+), 2 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/loongarch/pr107713-1.c
 create mode 100644 gcc/testsuite/gcc.target/loongarch/pr107713-2.c

diff --git a/gcc/config/loongarch/sync.md b/gcc/config/loongarch/sync.md
index 0c4f1983e88..45be1442439 100644
--- a/gcc/config/loongarch/sync.md
+++ b/gcc/config/loongarch/sync.md
@@ -448,6 +448,29 @@
 }
   [(set (attr "length") (const_int 32))])
 
+(define_insn "atomic_cas_value_exchange_7_"
+  [(set (match_operand:GPR 0 "register_operand" "=&r")
+   (match_operand:GPR 1 "memory_operand" "+ZC"))
+   (set (match_dup 1)
+   (unspec_volatile:GPR [(match_operand:GPR 2 "reg_or_0_operand" "rJ")
+ (match_operand:GPR 3 "reg_or_0_operand" "rJ")
+ (match_operand:GPR 4 "reg_or_0_operand" "rJ")
+ (match_operand:GPR 5 "reg_or_0_operand"  "rJ")
+ (match_operand:SI 6 "const_int_operand")] ;; model
+UNSPEC_SYNC_EXCHANGE))
+   (clobber (match_scratch:GPR 7 "=&r"))]
+  ""
+{
+  return "%G6\\n\\t"
+"1:\\n\\t"
+"ll.\\t%0,%1\\n\\t"
+"and\\t%7,%0,%z3\\n\\t"
+"or%i5\\t%7,%7,%5\\n\\t"
+"sc.\\t%7,%1\\n\\t"
+"beqz\\t%7,1b\\n\\t";
+}
+  [(set (attr "length") (const_int 20))])
+
 (define_expand "atomic_exchange"
   [(set (match_operand:SHORT 0 "register_operand")
(unspec_volatile:SHORT
@@ -459,9 +482,9 @@
   ""
 {
   union loongarch_gen_fn_ptrs generator;
-  generator.fn_7 = gen_atomic_cas_value_cmp_and_7_si;
+  generator.fn_7 = gen_atomic_cas_value_exchange_7_si;
   loongarch_expand_atomic_qihi (generator, operands[0], operands[1],
-   operands[1], operands[2], operands[3]);
+   const0_rtx, operands[2], operands[3]);
   DONE;
 })
 
diff --git a/gcc/testsuite/gcc.target/loongarch/pr107713-1.c 
b/gcc/testsuite/gcc.target/loongarch/pr107713-1.c
new file mode 100644
index 000..d1536c95b27
--- /dev/null
+++ b/gcc/testsuite/gcc.target/loongarch/pr107713-1.c
@@ -0,0 +1,50 @@
+/* { dg-do run } */
+/* { dg-require-effective-target pthread } */
+/* { dg-options "-pthread" } */
+
+#include 
+
+char x, x1, x2;
+
+void *
+work1 (void *)
+{
+  for (int i = 0; i < 100; i++)
+x1 = __atomic_exchange_n (&x, x1, __ATOMIC_SEQ_CST);
+  return NULL;
+}
+
+void *
+work2 (void *)
+{
+  for (int i = 0; i < 100; i++)
+x2 = __atomic_exchange_n (&x, x2, __ATOMIC_SEQ_CST);
+  return NULL;
+}
+
+void
+test (void)
+{
+  x = 0;
+  x1 = 1;
+  x2 = 2;
+  pthread_t w1, w2;
+  if (pthread_create (&w1, NULL, work1, NULL) != 0)
+__builtin_abort ();
+  if (pthread_create (&w2, NULL, work2, NULL) != 0)
+__builtin_abort ();
+  if (pthread_join (w1, NULL) != 0)
+__builtin_abort ();
+  if (pthread_join (w2, NULL) != 0)
+__builtin_abort ();
+  if ((x ^ x1 ^ x2) != 3)
+__builtin_abort ();
+}
+
+int
+main ()
+{
+  int i;
+  for (i = 0; i < 1; i++)
+test ();
+}
diff --git a/gcc/testsuite/gcc.target/loongarch/pr107713-2.c 
b/gcc/testsuite/gcc.target/loongarch/pr107713-2.c
new file mode 100644
index 000..82d44db3d51
--- /dev/null
+++ b/gcc/testsuite/gcc.target/loongarch/pr107713-2.c
@@ -0,0 +1,9 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+/* { dg-final { scan-assembler-times "beq|bne" 1 } } */
+
+char
+t (char *p, char x)
+{
+  return __atomic_exchange_n (p, x, __ATOMIC_RELAXED);
+}
-- 
2.37.3



[PATCH v3] LoongArch: Mask shift offset when emit {xv, v}{srl, sll, sra} with sameimm vector

2024-11-27 Thread Jinyang He
For {xv,v}{srl,sll,sra}, the constraint `vector_same_uimm6` cause overflow
in when emit {w,h,b}. Since the number of bits shifted is the remainder of
the register value, it is actually unnecessary to constrain the range.
Simply mask the shift number with the unit-bit-width, without any
constraint on the shift range.

gcc/ChangeLog:

* config/loongarch/constraints.md (Uuv6, Uuvx): Remove Uuv6,
add Uuvx as replicated vector const with unsigned range [0,umax].
* config/loongarch/lasx.md (xvsrl, xvsra, xvsll): Mask shift
offset by its unit bits.
* config/loongarch/lsx.md (vsrl, vsra, vsll): Likewise.
* config/loongarch/loongarch-protos.h
(loongarch_const_vector_same_int_p): Set default for low and high.
* config/loongarch/predicates.md: Replace reg_or_vector_same_uimm6
_operand to reg_or_vector_same_uimm_operand.

gcc/testsuite/ChangeLog:

* gcc.target/loongarch/vector/lasx/lasx-shift-sameimm-vec.c: New test.
* gcc.target/loongarch/vector/lsx/lsx-shift-sameimm-vec.c: New test.
---
v2: Fix indent in lsx.md and lasx.md.
Use "dg-do assemble" in test which suggested by Ruoyao.
v3: Re-enable scan-assembler.

 gcc/config/loongarch/constraints.md   | 14 ++--
 gcc/config/loongarch/lasx.md  | 60 
 gcc/config/loongarch/loongarch-protos.h   |  5 +-
 gcc/config/loongarch/lsx.md   | 60 
 gcc/config/loongarch/predicates.md|  8 +--
 .../vector/lasx/lasx-shift-sameimm-vec.c  | 72 +++
 .../vector/lsx/lsx-shift-sameimm-vec.c| 72 +++
 7 files changed, 254 insertions(+), 37 deletions(-)
 create mode 100644 
gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-shift-sameimm-vec.c
 create mode 100644 
gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-shift-sameimm-vec.c

diff --git a/gcc/config/loongarch/constraints.md 
b/gcc/config/loongarch/constraints.md
index 18da8b31f49..66ef1073fad 100644
--- a/gcc/config/loongarch/constraints.md
+++ b/gcc/config/loongarch/constraints.md
@@ -334,19 +334,19 @@
   (and (match_code "const_vector")
(match_test "loongarch_const_vector_same_int_p (op, mode, -16, 15)")))
 
-(define_constraint "Uuv6"
-  "@internal
-   A replicated vector const in which the replicated value is in the range
-   [0,63]."
-  (and (match_code "const_vector")
-   (match_test "loongarch_const_vector_same_int_p (op, mode, 0, 63)")))
-
 (define_constraint "Urv8"
   "@internal
A replicated vector const with replicated byte values as well as elements"
   (and (match_code "const_vector")
(match_test "loongarch_const_vector_same_bytes_p (op, mode)")))
 
+(define_constraint "Uuvx"
+  "@internal
+   A replicated vector const in which the replicated value is in the unsigned
+   range [0,umax]."
+  (and (match_code "const_vector")
+   (match_test "loongarch_const_vector_same_int_p (op, mode)")))
+
 (define_memory_constraint "ZC"
   "A memory operand whose address is formed by a base register and offset
that is suitable for use in instructions with the same addressing mode
diff --git a/gcc/config/loongarch/lasx.md b/gcc/config/loongarch/lasx.md
index 457ed163f31..90778dd8ff9 100644
--- a/gcc/config/loongarch/lasx.md
+++ b/gcc/config/loongarch/lasx.md
@@ -1013,11 +1013,23 @@
   [(set (match_operand:ILASX 0 "register_operand" "=f,f")
(lshiftrt:ILASX
  (match_operand:ILASX 1 "register_operand" "f,f")
- (match_operand:ILASX 2 "reg_or_vector_same_uimm6_operand" "f,Uuv6")))]
+ (match_operand:ILASX 2 "reg_or_vector_same_uimm_operand" "f,Uuvx")))]
   "ISA_HAS_LASX"
-  "@
-   xvsrl.\t%u0,%u1,%u2
-   xvsrli.\t%u0,%u1,%E2"
+{
+  switch (which_alternative)
+{
+case 0:
+  return "xvsrl.\t%u0,%u1,%u2";
+case 1:
+  {
+   unsigned HOST_WIDE_INT val = UINTVAL (CONST_VECTOR_ELT (operands[2], 
0));
+   operands[2] = GEN_INT (val & (GET_MODE_UNIT_BITSIZE (mode) - 1));
+   return "xvsrli.\t%u0,%u1,%d2";
+  }
+default:
+  gcc_unreachable ();
+}
+}
   [(set_attr "type" "simd_shift")
(set_attr "mode" "")])
 
@@ -1026,11 +1038,23 @@
   [(set (match_operand:ILASX 0 "register_operand" "=f,f")
(ashiftrt:ILASX
  (match_operand:ILASX 1 "register_operand" "f,f")
- (match_operand:ILASX 2 "reg_or_vector_same_uimm6_operand" "f,Uuv6")))]
+ (match_operand:ILASX 2 "reg_or_vector_same_uimm_operand" "f,Uuvx")))]
   "ISA_HAS_LASX"
-  "@
-   xvsra.\t%u0,%u1,%u2
-   xvsrai.\t%u0,%u1,%E2"
+{
+  switch (which_alternative)
+{
+case 0:
+  return "xvsra.\t%u0,%u1,%u2";
+case 1:
+  {
+   unsigned HOST_WIDE_INT val = UINTVAL (CONST_VECTOR_ELT (operands[2], 
0));
+   operands[2] = GEN_INT (val & (GET_MODE_UNIT_BITSIZE (mode) - 1));
+   return "xvsrai.\t%u0,%u1,%d2";
+  }
+default:
+  gcc_unreachable ();
+}
+}
   [(set_attr "type" "simd_shift")
(set_attr "mode" "")])
 

Re: [PATCH] LoongArch: Mask shift offset when emit {xv,v}{srl,sll,sra} with sameimm vector.

2024-11-26 Thread Jinyang He

在 2024/11/27 上午10:14, Xi Ruoyao 写道:


On Tue, 2024-11-26 at 18:37 +0800, Jinyang He wrote:

For {xv,v}{srl,sll,sra}, the constraint `vector_same_uimm6` cause overflow
in when emit {w,h,b}. Since the number of bits shifted is the remainder of
the register value, it is actually unnecessary to constrain the range.
Simply mask the shift number with the unit-bit-width, without any
constraint on the shift range.

Some comments below.

/* snip */


diff --git a/gcc/config/loongarch/lasx.md b/gcc/config/loongarch/lasx.md
index 457ed163f31..da3bc758a29 100644
--- a/gcc/config/loongarch/lasx.md
+++ b/gcc/config/loongarch/lasx.md
@@ -1013,11 +1013,23 @@
    [(set (match_operand:ILASX 0 "register_operand" "=f,f")
    (lshiftrt:ILASX
      (match_operand:ILASX 1 "register_operand" "f,f")
-     (match_operand:ILASX 2 "reg_or_vector_same_uimm6_operand" "f,Uuv6")))]
+     (match_operand:ILASX 2 "reg_or_vector_same_uimm_operand" "f,Uuvx")))]
    "ISA_HAS_LASX"
-  "@
-   xvsrl.\t%u0,%u1,%u2
-   xvsrli.\t%u0,%u1,%E2"
+{
+  switch (which_alternative)
+    {
+    case 0:
+  return "xvsrl.\t%u0,%u1,%u2";
+    case 1:
+  {
+    unsigned HOST_WIDE_INT val = UINTVAL (CONST_VECTOR_ELT (operands[2], 0));
+    operands[2] = GEN_INT (val & (GET_MODE_UNIT_BITSIZE (mode) - 1));
+    return "xvsrli.\t%u0,%u1,%d2";

Incorrect indent.  Check editor and mail client setting?


Sorry for my mistake tabstop=4 and expandtab. Thanks for reminding.




+  }
+    default:
+  gcc_unreachable ();
+    }
+}
    [(set_attr "type" "simd_shift")
     (set_attr "mode" "")])
  
@@ -1026,11 +1038,23 @@

    [(set (match_operand:ILASX 0 "register_operand" "=f,f")
    (ashiftrt:ILASX
      (match_operand:ILASX 1 "register_operand" "f,f")
-     (match_operand:ILASX 2 "reg_or_vector_same_uimm6_operand" "f,Uuv6")))]
+     (match_operand:ILASX 2 "reg_or_vector_same_uimm_operand" "f,Uuvx")))]
    "ISA_HAS_LASX"
-  "@
-   xvsra.\t%u0,%u1,%u2
-   xvsrai.\t%u0,%u1,%E2"
+{
+  switch (which_alternative)
+    {
+    case 0:
+  return "xvsra.\t%u0,%u1,%u2";
+    case 1:
+  {
+    unsigned HOST_WIDE_INT val = UINTVAL (CONST_VECTOR_ELT (operands[2], 0));
+    operands[2] = GEN_INT (val & (GET_MODE_UNIT_BITSIZE (mode) - 1));
+    return "xvsrai.\t%u0,%u1,%d2";

Likewise.


+  }
+    default:
+  gcc_unreachable ();
+    }
+}
    [(set_attr "type" "simd_shift")
     (set_attr "mode" "")])
  
@@ -1039,11 +1063,23 @@

    [(set (match_operand:ILASX 0 "register_operand" "=f,f")
    (ashift:ILASX
      (match_operand:ILASX 1 "register_operand" "f,f")
-     (match_operand:ILASX 2 "reg_or_vector_same_uimm6_operand" "f,Uuv6")))]
+     (match_operand:ILASX 2 "reg_or_vector_same_uimm_operand" "f,Uuvx")))]
    "ISA_HAS_LASX"
-  "@
-   xvsll.\t%u0,%u1,%u2
-   xvslli.\t%u0,%u1,%E2"
+{
+  switch (which_alternative)
+    {
+    case 0:
+  return "xvsll.\t%u0,%u1,%u2";
+    case 1:
+  {
+    unsigned HOST_WIDE_INT val = UINTVAL (CONST_VECTOR_ELT (operands[2], 0));
+    operands[2] = GEN_INT (val & (GET_MODE_UNIT_BITSIZE (mode) - 1));
+    return "xvslli.\t%u0,%u1,%d2";

Likewise.


+  }
+    default:
+  gcc_unreachable ();
+    }
+}
    [(set_attr "type" "simd_shift")
     (set_attr "mode" "")])
  
diff --git a/gcc/config/loongarch/loongarch-protos.h b/gcc/config/loongarch/loongarch-protos.h

index bc1b94b41d3..be37f284f39 100644
--- a/gcc/config/loongarch/loongarch-protos.h
+++ b/gcc/config/loongarch/loongarch-protos.h
@@ -113,8 +113,9 @@ extern rtx loongarch_return_addr (int, rtx);
  
  extern bool loongarch_const_vector_same_val_p (rtx, machine_mode);

  extern bool loongarch_const_vector_same_bytes_p (rtx, machine_mode);
-extern bool loongarch_const_vector_same_int_p (rtx, machine_mode, 
HOST_WIDE_INT,
-     HOST_WIDE_INT);
+extern bool loongarch_const_vector_same_int_p (rtx, machine_mode,
+      HOST_WIDE_INT low = HOST_WIDE_INT_MIN,
+      HOST_WIDE_INT high = HOST_WIDE_INT_MAX);
  extern bool loongarch_const_vector_shuffle_set_p (rtx, machine_mode);
  extern bool loongarch_const_vector_bitimm_set_p (rtx, machine_mode);
  extern bool loongarch_const_vector_bitimm_clr_p (rtx, machine_mode);
diff --git a/gcc/config/loongarch/lsx.md b/gcc/config/loongarch/lsx.md
index a9004290371..314544e012d 100644
--- a/gcc/config/loongarch/lsx.md
+++ b/gcc/config/loongarch/lsx.md
@@ -879,11 +879,23 @@
    [(set (match_operand:ILSX 0

[PATCH v2] LoongArch: Mask shift offset when emit {xv, v}{srl, sll, sra} with sameimm vector

2024-11-27 Thread Jinyang He
For {xv,v}{srl,sll,sra}, the constraint `vector_same_uimm6` cause overflow
in when emit {w,h,b}. Since the number of bits shifted is the remainder of
the register value, it is actually unnecessary to constrain the range.
Simply mask the shift number with the unit-bit-width, without any
constraint on the shift range.

gcc/ChangeLog:

* config/loongarch/constraints.md (Uuv6, Uuvx): Remove Uuv6,
add Uuvx as replicated vector const with unsigned range [0,umax].
* config/loongarch/lasx.md (xvsrl, xvsra, xvsll): Mask shift
offset by its unit bits.
* config/loongarch/lsx.md (vsrl, vsra, vsll): Likewise.
* config/loongarch/loongarch-protos.h
(loongarch_const_vector_same_int_p): Set default for low and high.
* config/loongarch/predicates.md: Replace reg_or_vector_same_uimm6
_operand to reg_or_vector_same_uimm_operand.

gcc/testsuite/ChangeLog:

* gcc.target/loongarch/vector/lasx/lasx-shift-sameimm-vec.c: New test.
* gcc.target/loongarch/vector/lsx/lsx-shift-sameimm-vec.c: New test.
---
v2: Fix indent in lsx.md and lasx.md.
Use "dg-do assemble" in test which suggested by Ruoyao.

 gcc/config/loongarch/constraints.md   | 14 ++---
 gcc/config/loongarch/lasx.md  | 60 +++
 gcc/config/loongarch/loongarch-protos.h   |  5 +-
 gcc/config/loongarch/lsx.md   | 60 +++
 gcc/config/loongarch/predicates.md|  8 +--
 .../vector/lasx/lasx-shift-sameimm-vec.c  | 48 +++
 .../vector/lsx/lsx-shift-sameimm-vec.c| 48 +++
 7 files changed, 206 insertions(+), 37 deletions(-)
 create mode 100644 
gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-shift-sameimm-vec.c
 create mode 100644 
gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-shift-sameimm-vec.c

diff --git a/gcc/config/loongarch/constraints.md 
b/gcc/config/loongarch/constraints.md
index 18da8b31f49..66ef1073fad 100644
--- a/gcc/config/loongarch/constraints.md
+++ b/gcc/config/loongarch/constraints.md
@@ -334,19 +334,19 @@
   (and (match_code "const_vector")
(match_test "loongarch_const_vector_same_int_p (op, mode, -16, 15)")))
 
-(define_constraint "Uuv6"
-  "@internal
-   A replicated vector const in which the replicated value is in the range
-   [0,63]."
-  (and (match_code "const_vector")
-   (match_test "loongarch_const_vector_same_int_p (op, mode, 0, 63)")))
-
 (define_constraint "Urv8"
   "@internal
A replicated vector const with replicated byte values as well as elements"
   (and (match_code "const_vector")
(match_test "loongarch_const_vector_same_bytes_p (op, mode)")))
 
+(define_constraint "Uuvx"
+  "@internal
+   A replicated vector const in which the replicated value is in the unsigned
+   range [0,umax]."
+  (and (match_code "const_vector")
+   (match_test "loongarch_const_vector_same_int_p (op, mode)")))
+
 (define_memory_constraint "ZC"
   "A memory operand whose address is formed by a base register and offset
that is suitable for use in instructions with the same addressing mode
diff --git a/gcc/config/loongarch/lasx.md b/gcc/config/loongarch/lasx.md
index 457ed163f31..90778dd8ff9 100644
--- a/gcc/config/loongarch/lasx.md
+++ b/gcc/config/loongarch/lasx.md
@@ -1013,11 +1013,23 @@
   [(set (match_operand:ILASX 0 "register_operand" "=f,f")
(lshiftrt:ILASX
  (match_operand:ILASX 1 "register_operand" "f,f")
- (match_operand:ILASX 2 "reg_or_vector_same_uimm6_operand" "f,Uuv6")))]
+ (match_operand:ILASX 2 "reg_or_vector_same_uimm_operand" "f,Uuvx")))]
   "ISA_HAS_LASX"
-  "@
-   xvsrl.\t%u0,%u1,%u2
-   xvsrli.\t%u0,%u1,%E2"
+{
+  switch (which_alternative)
+{
+case 0:
+  return "xvsrl.\t%u0,%u1,%u2";
+case 1:
+  {
+   unsigned HOST_WIDE_INT val = UINTVAL (CONST_VECTOR_ELT (operands[2], 
0));
+   operands[2] = GEN_INT (val & (GET_MODE_UNIT_BITSIZE (mode) - 1));
+   return "xvsrli.\t%u0,%u1,%d2";
+  }
+default:
+  gcc_unreachable ();
+}
+}
   [(set_attr "type" "simd_shift")
(set_attr "mode" "")])
 
@@ -1026,11 +1038,23 @@
   [(set (match_operand:ILASX 0 "register_operand" "=f,f")
(ashiftrt:ILASX
  (match_operand:ILASX 1 "register_operand" "f,f")
- (match_operand:ILASX 2 "reg_or_vector_same_uimm6_operand" "f,Uuv6")))]
+ (match_operand:ILASX 2 "reg_or_vector_same_uimm_operand" "f,Uuvx")))]
   "ISA_HAS_LASX"
-  "@
-   xvsra.\t%u0,%u1,%u2
-   xvsrai.\t%u0,%u1,%E2"
+{
+  switch (which_alternative)
+{
+case 0:
+  return "xvsra.\t%u0,%u1,%u2";
+case 1:
+  {
+   unsigned HOST_WIDE_INT val = UINTVAL (CONST_VECTOR_ELT (operands[2], 
0));
+   operands[2] = GEN_INT (val & (GET_MODE_UNIT_BITSIZE (mode) - 1));
+   return "xvsrai.\t%u0,%u1,%d2";
+  }
+default:
+  gcc_unreachable ();
+}
+}
   [(set_attr "type" "simd_shift")
(set_attr "mode" "")])
 
@@ -1039,11 +1063,23 @@
   [(se

[PATCH] LoongArch: Mask shift offset when emit {xv, v}{srl, sll, sra} with sameimm vector.

2024-11-26 Thread Jinyang He
For {xv,v}{srl,sll,sra}, the constraint `vector_same_uimm6` cause overflow
in when emit {w,h,b}. Since the number of bits shifted is the remainder of
the register value, it is actually unnecessary to constrain the range.
Simply mask the shift number with the unit-bit-width, without any
constraint on the shift range.

gcc/ChangeLog:

* config/loongarch/constraints.md (Uuv6, Uuvx): Remove Uuv6,
add Uuvx as replicated vector const with unsigned range [0,umax].
* config/loongarch/lasx.md (xvsrl, xvsra, xvsll): Mask shift
offset by its unit bits.
* config/loongarch/lsx.md (vsrl, vsra, vsll): Likewise.
* config/loongarch/loongarch-protos.h
(loongarch_const_vector_same_int_p): Set default for low and high.
* config/loongarch/predicates.md: Replace reg_or_vector_same_uimm6
_operand to reg_or_vector_same_uimm_operand.

gcc/testsuite/ChangeLog:

* gcc.target/loongarch/vector/lasx/lasx-shift-sameimm-vec.c: New test.
* gcc.target/loongarch/vector/lsx/lsx-shift-sameimm-vec.c: New test.
---
 gcc/config/loongarch/constraints.md   | 14 ++--
 gcc/config/loongarch/lasx.md  | 60 
 gcc/config/loongarch/loongarch-protos.h   |  5 +-
 gcc/config/loongarch/lsx.md   | 60 
 gcc/config/loongarch/predicates.md|  8 +--
 .../vector/lasx/lasx-shift-sameimm-vec.c  | 72 +++
 .../vector/lsx/lsx-shift-sameimm-vec.c| 72 +++
 7 files changed, 254 insertions(+), 37 deletions(-)
 create mode 100644 
gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-shift-sameimm-vec.c
 create mode 100644 
gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-shift-sameimm-vec.c

diff --git a/gcc/config/loongarch/constraints.md 
b/gcc/config/loongarch/constraints.md
index 18da8b31f49..66ef1073fad 100644
--- a/gcc/config/loongarch/constraints.md
+++ b/gcc/config/loongarch/constraints.md
@@ -334,19 +334,19 @@
   (and (match_code "const_vector")
(match_test "loongarch_const_vector_same_int_p (op, mode, -16, 15)")))
 
-(define_constraint "Uuv6"
-  "@internal
-   A replicated vector const in which the replicated value is in the range
-   [0,63]."
-  (and (match_code "const_vector")
-   (match_test "loongarch_const_vector_same_int_p (op, mode, 0, 63)")))
-
 (define_constraint "Urv8"
   "@internal
A replicated vector const with replicated byte values as well as elements"
   (and (match_code "const_vector")
(match_test "loongarch_const_vector_same_bytes_p (op, mode)")))
 
+(define_constraint "Uuvx"
+  "@internal
+   A replicated vector const in which the replicated value is in the unsigned
+   range [0,umax]."
+  (and (match_code "const_vector")
+   (match_test "loongarch_const_vector_same_int_p (op, mode)")))
+
 (define_memory_constraint "ZC"
   "A memory operand whose address is formed by a base register and offset
that is suitable for use in instructions with the same addressing mode
diff --git a/gcc/config/loongarch/lasx.md b/gcc/config/loongarch/lasx.md
index 457ed163f31..da3bc758a29 100644
--- a/gcc/config/loongarch/lasx.md
+++ b/gcc/config/loongarch/lasx.md
@@ -1013,11 +1013,23 @@
   [(set (match_operand:ILASX 0 "register_operand" "=f,f")
(lshiftrt:ILASX
  (match_operand:ILASX 1 "register_operand" "f,f")
- (match_operand:ILASX 2 "reg_or_vector_same_uimm6_operand" "f,Uuv6")))]
+ (match_operand:ILASX 2 "reg_or_vector_same_uimm_operand" "f,Uuvx")))]
   "ISA_HAS_LASX"
-  "@
-   xvsrl.\t%u0,%u1,%u2
-   xvsrli.\t%u0,%u1,%E2"
+{
+  switch (which_alternative)
+{
+case 0:
+  return "xvsrl.\t%u0,%u1,%u2";
+case 1:
+  {
+unsigned HOST_WIDE_INT val = UINTVAL (CONST_VECTOR_ELT (operands[2], 0));
+operands[2] = GEN_INT (val & (GET_MODE_UNIT_BITSIZE (mode) - 1));
+return "xvsrli.\t%u0,%u1,%d2";
+  }
+default:
+  gcc_unreachable ();
+}
+}
   [(set_attr "type" "simd_shift")
(set_attr "mode" "")])
 
@@ -1026,11 +1038,23 @@
   [(set (match_operand:ILASX 0 "register_operand" "=f,f")
(ashiftrt:ILASX
  (match_operand:ILASX 1 "register_operand" "f,f")
- (match_operand:ILASX 2 "reg_or_vector_same_uimm6_operand" "f,Uuv6")))]
+ (match_operand:ILASX 2 "reg_or_vector_same_uimm_operand" "f,Uuvx")))]
   "ISA_HAS_LASX"
-  "@
-   xvsra.\t%u0,%u1,%u2
-   xvsrai.\t%u0,%u1,%E2"
+{
+  switch (which_alternative)
+{
+case 0:
+  return "xvsra.\t%u0,%u1,%u2";
+case 1:
+  {
+unsigned HOST_WIDE_INT val = UINTVAL (CONST_VECTOR_ELT (operands[2], 0));
+operands[2] = GEN_INT (val & (GET_MODE_UNIT_BITSIZE (mode) - 1));
+return "xvsrai.\t%u0,%u1,%d2";
+  }
+default:
+  gcc_unreachable ();
+}
+}
   [(set_attr "type" "simd_shift")
(set_attr "mode" "")])
 
@@ -1039,11 +1063,23 @@
   [(set (match_operand:ILASX 0 "register_operand" "=f,f")
(ashift:ILASX
  (match_operand:ILASX 1 "register_o

Re: Pushed r15-9167: [PATCH] LoongArch: Make gen-evolution.awk compatible with FreeBSD awk

2025-04-03 Thread Jinyang He

On 2025-04-03 11:37, Lulu Cheng wrote:



在 2025/4/3 上午11:12, Xi Ruoyao 写道:

On Thu, 2025-04-03 at 10:13 +0800, Lulu Cheng wrote:

在 2025/4/2 上午11:19, Xi Ruoyao 写道:

Avoid using gensub that FreeBSD awk lacks, use gsub and split those
each
of gawk, mawk, and FreeBSD awk provides.

Reported-by: mp...@vip.163.com
Link: https://man.freebsd.org/cgi/man.cgi?query=awk

gcc/ChangeLog:

* config/loongarch/genopts/gen-evolution.awk: Avoid using
gensub
that FreeBSD awk lacks.
---

Manually tested the script with gawk and FreeBSD awk.  Ok for trunk?

OK.

Thanks!

Pushed now.


Could you backpoint it  to gcc 14?

Thanks.


gcc/config/loongarch/genopts/gen-evolution.awk | 8 +---
   1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/gcc/config/loongarch/genopts/gen-evolution.awk
b/gcc/config/loongarch/genopts/gen-evolution.awk
index bf16b26760e..142b658fe7a 100644
--- a/gcc/config/loongarch/genopts/gen-evolution.awk
+++ b/gcc/config/loongarch/genopts/gen-evolution.awk
@@ -33,10 +33,12 @@ BEGIN {
   {
   cpucfg_word[NR] = $1
   cpucfg_bit_in_word[NR] = $2
-    name[NR] = gensub(/-/, "_", "g", $3)
+    name[NR] = $3
+    gsub("-", "_", name[NR])
   name_capitalized[NR] = toupper(name[NR])
-    isa_version_major[NR] = gensub(/^([1-9][0-9]*)\.([0-9]+)$/,
"\\1", 1, $4)
-    isa_version_minor[NR] = gensub(/^([1-9][0-9]*)\.([0-9]+)$/,
"\\2", 1, $4)
+    split($4, isa_ver, "\\.")
+    isa_version_major[NR] = isa_ver[1]
+    isa_version_minor[NR] = isa_ver[2]
      $1 = $2 = $3 = $4 = ""
   sub (/^\s*/, "")

Hi, Ruoyao and Lulu,

When I cross-build gcc on ubuntu I triggered `illegal reference to array 
idx_list` at asort by mawk.


$ cd gcc/config/loongarch/genopts
$ mawk -v header_p=1 -f gen-evolution.awk isa-evolution.in
mawk: gen-evolution.awk: line 104: illegal reference to array idx_list

$ mawk --version
mawk 1.3.4 20240123
Copyright 2008-2023,2024, Thomas E. Dickey
Copyright 1991-1996,2014, Michael D. Brennan

random-funcs:   arc4random_stir/arc4random
regex-funcs:    internal

compiled limits:
sprintf buffer  8192
maximum-integer 9223372036854775808