Author: tangaac Date: 2024-10-23T16:03:15+08:00 New Revision: 5b9c76b6e70c4195609bb939e067b82cdbe02adf
URL: https://github.com/llvm/llvm-project/commit/5b9c76b6e70c4195609bb939e067b82cdbe02adf DIFF: https://github.com/llvm/llvm-project/commit/5b9c76b6e70c4195609bb939e067b82cdbe02adf.diff LOG: [LoongArch] Support LoongArch-specific amswap[_db].{b/h} and amadd[_db].{b/h} instructions (#113255) Two options for clang: -mlam-bh & -mno-lam-bh. Enable or disable amswap[__db].{b/h} and amadd[__db].{b/h} instructions. The default is -mno-lam-bh. Only works on LoongArch64. Added: clang/test/Driver/loongarch-mlam-bh.c llvm/test/CodeGen/LoongArch/ir-instruction/atomicrmw-lam-bh.ll Modified: clang/include/clang/Driver/Options.td clang/lib/Basic/Targets/LoongArch.cpp clang/lib/Basic/Targets/LoongArch.h clang/lib/Driver/ToolChains/Arch/LoongArch.cpp clang/test/Driver/loongarch-march.c clang/test/Preprocessor/init-loongarch.c llvm/include/llvm/TargetParser/LoongArchTargetParser.def llvm/include/llvm/TargetParser/LoongArchTargetParser.h llvm/lib/Target/LoongArch/LoongArch.td llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp llvm/lib/Target/LoongArch/LoongArchInstrInfo.td llvm/lib/TargetParser/LoongArchTargetParser.cpp Removed: ################################################################################ diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index bec53cf93254ca..70f2fb6bdc4db9 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -5394,6 +5394,10 @@ def mfrecipe : Flag<["-"], "mfrecipe">, Group<m_loongarch_Features_Group>, HelpText<"Enable frecipe.{s/d} and frsqrte.{s/d}">; def mno_frecipe : Flag<["-"], "mno-frecipe">, Group<m_loongarch_Features_Group>, HelpText<"Disable frecipe.{s/d} and frsqrte.{s/d}">; +def mlam_bh : Flag<["-"], "mlam-bh">, Group<m_loongarch_Features_Group>, + HelpText<"Enable amswap_[db].{b/h} and amadd_[db].{b/h}">; +def mno_lam_bh : Flag<["-"], "mno-lam-bh">, Group<m_loongarch_Features_Group>, + HelpText<"Disable amswap_[db].{b/h} and amadd_[db].{b/h}">; def mannotate_tablejump : Flag<["-"], "mannotate-tablejump">, Group<m_loongarch_Features_Group>, HelpText<"Enable annotate table jump instruction to correlate it with the jump table.">; def mno_annotate_tablejump : Flag<["-"], "mno-annotate-tablejump">, Group<m_loongarch_Features_Group>, diff --git a/clang/lib/Basic/Targets/LoongArch.cpp b/clang/lib/Basic/Targets/LoongArch.cpp index cb3fd12c48ddb6..07b22b35f603ce 100644 --- a/clang/lib/Basic/Targets/LoongArch.cpp +++ b/clang/lib/Basic/Targets/LoongArch.cpp @@ -205,7 +205,7 @@ void LoongArchTargetInfo::getTargetDefines(const LangOptions &Opts, // TODO: As more features of the V1.1 ISA are supported, a unified "v1.1" // arch feature set will be used to include all sub-features belonging to // the V1.1 ISA version. - if (HasFeatureFrecipe) + if (HasFeatureFrecipe && HasFeatureLAM_BH) Builder.defineMacro("__loongarch_arch", Twine('"') + "la64v1.1" + Twine('"')); else @@ -236,6 +236,9 @@ void LoongArchTargetInfo::getTargetDefines(const LangOptions &Opts, if (HasFeatureFrecipe) Builder.defineMacro("__loongarch_frecipe", Twine(1)); + if (HasFeatureLAM_BH) + Builder.defineMacro("__loongarch_lam_bh", Twine(1)); + StringRef ABI = getABI(); if (ABI == "lp64d" || ABI == "lp64f" || ABI == "lp64s") Builder.defineMacro("__loongarch_lp64"); @@ -312,6 +315,8 @@ bool LoongArchTargetInfo::handleTargetFeatures( HasUnalignedAccess = false; else if (Feature == "+frecipe") HasFeatureFrecipe = true; + else if (Feature == "+lam-bh") + HasFeatureLAM_BH = true; } return true; } diff --git a/clang/lib/Basic/Targets/LoongArch.h b/clang/lib/Basic/Targets/LoongArch.h index c668ca7eca047a..3585e9f7968b4b 100644 --- a/clang/lib/Basic/Targets/LoongArch.h +++ b/clang/lib/Basic/Targets/LoongArch.h @@ -30,6 +30,7 @@ class LLVM_LIBRARY_VISIBILITY LoongArchTargetInfo : public TargetInfo { bool HasFeatureLSX; bool HasFeatureLASX; bool HasFeatureFrecipe; + bool HasFeatureLAM_BH; public: LoongArchTargetInfo(const llvm::Triple &Triple, const TargetOptions &) @@ -39,6 +40,7 @@ class LLVM_LIBRARY_VISIBILITY LoongArchTargetInfo : public TargetInfo { HasFeatureLSX = false; HasFeatureLASX = false; HasFeatureFrecipe = false; + HasFeatureLAM_BH = false; LongDoubleWidth = 128; LongDoubleAlign = 128; LongDoubleFormat = &llvm::APFloat::IEEEquad(); diff --git a/clang/lib/Driver/ToolChains/Arch/LoongArch.cpp b/clang/lib/Driver/ToolChains/Arch/LoongArch.cpp index 355253e4b3b07c..e69a5562137ccd 100644 --- a/clang/lib/Driver/ToolChains/Arch/LoongArch.cpp +++ b/clang/lib/Driver/ToolChains/Arch/LoongArch.cpp @@ -260,6 +260,15 @@ void loongarch::getLoongArchTargetFeatures(const Driver &D, else Features.push_back("-frecipe"); } + + // Select lam-bh feature determined by -m[no-]lam-bh. + if (const Arg *A = + Args.getLastArg(options::OPT_mlam_bh, options::OPT_mno_lam_bh)) { + if (A->getOption().matches(options::OPT_mlam_bh)) + Features.push_back("+lam-bh"); + else + Features.push_back("-lam-bh"); + } } std::string loongarch::postProcessTargetCPUString(const std::string &CPU, diff --git a/clang/test/Driver/loongarch-march.c b/clang/test/Driver/loongarch-march.c index 2d5b315d962a1e..d4cd5b07ae905f 100644 --- a/clang/test/Driver/loongarch-march.c +++ b/clang/test/Driver/loongarch-march.c @@ -39,21 +39,21 @@ // CC1-LA64V1P1: "-target-cpu" "loongarch64" // CC1-LA64V1P1-NOT: "-target-feature" -// CC1-LA64V1P1: "-target-feature" "+64bit" "-target-feature" "+d" "-target-feature" "+lsx" "-target-feature" "+ual" "-target-feature" "+frecipe" +// CC1-LA64V1P1: "-target-feature" "+64bit" "-target-feature" "+d" "-target-feature" "+lsx" "-target-feature" "+ual" "-target-feature" "+frecipe" "-target-feature" "+lam-bh" // CC1-LA64V1P1-NOT: "-target-feature" // CC1-LA64V1P1: "-target-abi" "lp64d" // CC1-LA664: "-target-cpu" "la664" // CC1-LA664-NOT: "-target-feature" -// CC1-LA664: "-target-feature" "+64bit" "-target-feature" "+f" "-target-feature" "+d" "-target-feature" "+lsx" "-target-feature" "+lasx" "-target-feature" "+ual" "-target-feature" "+frecipe" +// CC1-LA664: "-target-feature" "+64bit" "-target-feature" "+f" "-target-feature" "+d" "-target-feature" "+lsx" "-target-feature" "+lasx" "-target-feature" "+ual" "-target-feature" "+frecipe" "-target-feature" "+lam-bh" // CC1-LA664-NOT: "-target-feature" // CC1-LA664: "-target-abi" "lp64d" // IR-LOONGARCH64: attributes #[[#]] ={{.*}}"target-cpu"="loongarch64" {{.*}}"target-features"="+64bit,+d,+f,+ual" // IR-LA464: attributes #[[#]] ={{.*}}"target-cpu"="la464" {{.*}}"target-features"="+64bit,+d,+f,+lasx,+lsx,+ual" // IR-LA64V1P0: attributes #[[#]] ={{.*}}"target-cpu"="loongarch64" {{.*}}"target-features"="+64bit,+d,+lsx,+ual" -// IR-LA64V1P1: attributes #[[#]] ={{.*}}"target-cpu"="loongarch64" {{.*}}"target-features"="+64bit,+d,+frecipe,+lsx,+ual" -// IR-LA664: attributes #[[#]] ={{.*}}"target-cpu"="la664" {{.*}}"target-features"="+64bit,+d,+f,+frecipe,+lasx,+lsx,+ual" +// IR-LA64V1P1: attributes #[[#]] ={{.*}}"target-cpu"="loongarch64" {{.*}}"target-features"="+64bit,+d,+frecipe,+lam-bh,+lsx,+ual" +// IR-LA664: attributes #[[#]] ={{.*}}"target-cpu"="la664" {{.*}}"target-features"="+64bit,+d,+f,+frecipe,+lam-bh,+lasx,+lsx,+ual" int foo(void) { return 3; diff --git a/clang/test/Driver/loongarch-mlam-bh.c b/clang/test/Driver/loongarch-mlam-bh.c new file mode 100644 index 00000000000000..6f2901e594dfcc --- /dev/null +++ b/clang/test/Driver/loongarch-mlam-bh.c @@ -0,0 +1,30 @@ +/// Test -m[no]lam-bh options. + +// RUN: %clang --target=loongarch64 -mlam-bh -fsyntax-only %s -### 2>&1 | \ +// RUN: FileCheck %s --check-prefix=CC1-LAM-BH +// RUN: %clang --target=loongarch64 -mno-lam-bh -fsyntax-only %s -### 2>&1 | \ +// RUN: FileCheck %s --check-prefix=CC1-NO-LAM-BH +// RUN: %clang --target=loongarch64 -mno-lam-bh -mlam-bh -fsyntax-only %s -### 2>&1 | \ +// RUN: FileCheck %s --check-prefix=CC1-LAM-BH +// RUN: %clang --target=loongarch64 -mlam-bh -mno-lam-bh -fsyntax-only %s -### 2>&1 | \ +// RUN: FileCheck %s --check-prefix=CC1-NO-LAM-BH + +// RUN: %clang --target=loongarch64 -mlam-bh -S -emit-llvm %s -o - | \ +// RUN: FileCheck %s --check-prefix=IR-LAM-BH +// RUN: %clang --target=loongarch64 -mno-lam-bh -S -emit-llvm %s -o - | \ +// RUN: FileCheck %s --check-prefix=IR-NO-LAM-BH +// RUN: %clang --target=loongarch64 -mno-lam-bh -mlam-bh -S -emit-llvm %s -o - | \ +// RUN: FileCheck %s --check-prefix=IR-LAM-BH +// RUN: %clang --target=loongarch64 -mlam-bh -mno-lam-bh -S -emit-llvm %s -o - | \ +// RUN: FileCheck %s --check-prefix=IR-NO-LAM-BH + + +// CC1-LAM-BH: "-target-feature" "+lam-bh" +// CC1-NO-LAM-BH: "-target-feature" "-lam-bh" + +// IR-LAM-BH: attributes #[[#]] ={{.*}}"target-features"="{{(.*,)?}}+lam-bh{{(,.*)?}}" +// IR-NO-LAM-BH: attributes #[[#]] ={{.*}}"target-features"="{{(.*,)?}}-lam-bh{{(,.*)?}}" + +int foo(void) { + return 42; +} \ No newline at end of file diff --git a/clang/test/Preprocessor/init-loongarch.c b/clang/test/Preprocessor/init-loongarch.c index 771d56ffb1c1b9..8019292e0f10e0 100644 --- a/clang/test/Preprocessor/init-loongarch.c +++ b/clang/test/Preprocessor/init-loongarch.c @@ -798,7 +798,7 @@ // LA64-FPU0-LP64S-NOT: #define __loongarch_single_float // LA64-FPU0-LP64S: #define __loongarch_soft_float 1 -/// Check __loongarch_arch{_tune/_frecipe}. +/// Check __loongarch_arch{_tune/_frecipe/_lam_bh}. // RUN: %clang --target=loongarch64 -x c -E -dM %s -o - | \ // RUN: FileCheck --match-full-lines --check-prefix=ARCH-TUNE -DARCH=la64v1.0 -DTUNE=loongarch64 %s @@ -819,30 +819,41 @@ // RUN: %clang --target=loongarch64 -x c -E -dM %s -o - -march=la64v1.0 -Xclang -target-feature -Xclang -lsx | \ // RUN: FileCheck --match-full-lines --check-prefix=ARCH-TUNE -DARCH=loongarch64 -DTUNE=loongarch64 %s // RUN: %clang --target=loongarch64 -x c -E -dM %s -o - -march=la64v1.0 -Xclang -target-feature -Xclang +frecipe | \ -// RUN: FileCheck --match-full-lines --check-prefixes=ARCH-TUNE,FRECIPE -DARCH=la64v1.1 -DTUNE=loongarch64 %s +// RUN: FileCheck --match-full-lines --check-prefixes=ARCH-TUNE,FRECIPE -DARCH=la64v1.0 -DTUNE=loongarch64 %s // RUN: %clang --target=loongarch64 -x c -E -dM %s -o - -march=loongarch64 -Xclang -target-feature -Xclang +lsx | \ // RUN: FileCheck --match-full-lines --check-prefix=ARCH-TUNE -DARCH=la64v1.0 -DTUNE=loongarch64 %s // RUN: %clang --target=loongarch64 -x c -E -dM %s -o - -march=la64v1.1 | \ -// RUN: FileCheck --match-full-lines --check-prefixes=ARCH-TUNE,FRECIPE -DARCH=la64v1.1 -DTUNE=loongarch64 %s +// RUN: FileCheck --match-full-lines --check-prefixes=ARCH-TUNE,FRECIPE,LAM-BH -DARCH=la64v1.1 -DTUNE=loongarch64 %s // RUN: %clang --target=loongarch64 -x c -E -dM %s -o - -march=la64v1.1 -Xclang -target-feature -Xclang -frecipe | \ -// RUN: FileCheck --match-full-lines --check-prefix=ARCH-TUNE -DARCH=la64v1.0 -DTUNE=loongarch64 %s +// RUN: FileCheck --match-full-lines --check-prefixes=ARCH-TUNE,LAM-BH -DARCH=la64v1.0 -DTUNE=loongarch64 %s // RUN: %clang --target=loongarch64 -x c -E -dM %s -o - -march=la64v1.1 -Xclang -target-feature -Xclang -lsx | \ -// RUN: FileCheck --match-full-lines --check-prefixes=ARCH-TUNE,FRECIPE -DARCH=loongarch64 -DTUNE=loongarch64 %s +// RUN: FileCheck --match-full-lines --check-prefixes=ARCH-TUNE,FRECIPE,LAM-BH -DARCH=loongarch64 -DTUNE=loongarch64 %s // RUN: %clang --target=loongarch64 -x c -E -dM %s -o - -march=loongarch64 -Xclang -target-feature -Xclang +frecipe | \ // RUN: FileCheck --match-full-lines --check-prefixes=ARCH-TUNE,FRECIPE -DARCH=loongarch64 -DTUNE=loongarch64 %s // RUN: %clang --target=loongarch64 -x c -E -dM %s -o - -march=loongarch64 -Xclang -target-feature -Xclang +lsx -Xclang -target-feature -Xclang +frecipe | \ -// RUN: FileCheck --match-full-lines --check-prefixes=ARCH-TUNE,FRECIPE -DARCH=la64v1.1 -DTUNE=loongarch64 %s +// RUN: FileCheck --match-full-lines --check-prefixes=ARCH-TUNE,FRECIPE -DARCH=la64v1.0 -DTUNE=loongarch64 %s +// RUN: %clang --target=loongarch64 -x c -E -dM %s -o - -march=la64v1.0 -Xclang -target-feature -Xclang +lam-bh | \ +// RUN: FileCheck --match-full-lines --check-prefixes=ARCH-TUNE,LAM-BH -DARCH=la64v1.0 -DTUNE=loongarch64 %s +// RUN: %clang --target=loongarch64 -x c -E -dM %s -o - -march=la64v1.1 -Xclang -target-feature -Xclang -lam-bh | \ +// RUN: FileCheck --match-full-lines --check-prefixes=ARCH-TUNE,FRECIPE -DARCH=la64v1.0 -DTUNE=loongarch64 %s +// RUN: %clang --target=loongarch64 -x c -E -dM %s -o - -march=loongarch64 -Xclang -target-feature -Xclang +lam-bh | \ +// RUN: FileCheck --match-full-lines --check-prefixes=ARCH-TUNE,LAM-BH -DARCH=loongarch64 -DTUNE=loongarch64 %s +// RUN: %clang --target=loongarch64 -x c -E -dM %s -o - -march=loongarch64 -Xclang -target-feature -Xclang +lsx -Xclang -target-feature -Xclang +lam-bh | \ +// RUN: FileCheck --match-full-lines --check-prefixes=ARCH-TUNE,LAM-BH -DARCH=la64v1.0 -DTUNE=loongarch64 %s +// RUN: %clang --target=loongarch64 -x c -E -dM %s -o - -march=la64v1.0 -Xclang -target-feature -Xclang +frecipe -Xclang -target-feature -Xclang +lam-bh | \ +// RUN: FileCheck --match-full-lines --check-prefixes=ARCH-TUNE -DARCH=la64v1.1 -DTUNE=loongarch64 %s // RUN: %clang --target=loongarch64 -x c -E -dM %s -o - -march=la664 | \ -// RUN: FileCheck --match-full-lines --check-prefixes=ARCH-TUNE,FRECIPE -DARCH=la664 -DTUNE=la664 %s +// RUN: FileCheck --match-full-lines --check-prefixes=ARCH-TUNE,FRECIPE,LAM-BH -DARCH=la664 -DTUNE=la664 %s // RUN: %clang --target=loongarch64 -x c -E -dM %s -o - -mtune=la664 | \ // RUN: FileCheck --match-full-lines --check-prefix=ARCH-TUNE -DARCH=la64v1.0 -DTUNE=la664 %s // RUN: %clang --target=loongarch64 -x c -E -dM %s -o - -march=loongarch64 -mtune=la664 | \ // RUN: FileCheck --match-full-lines --check-prefix=ARCH-TUNE -DARCH=loongarch64 -DTUNE=la664 %s // RUN: %clang --target=loongarch64 -x c -E -dM %s -o - -march=la664 -mtune=loongarch64 | \ -// RUN: FileCheck --match-full-lines --check-prefixes=ARCH-TUNE,FRECIPE -DARCH=la664 -DTUNE=loongarch64 %s +// RUN: FileCheck --match-full-lines --check-prefixes=ARCH-TUNE,FRECIPE,LAM-BH -DARCH=la664 -DTUNE=loongarch64 %s // ARCH-TUNE: #define __loongarch_arch "[[ARCH]]" // FRECIPE: #define __loongarch_frecipe 1 +// LAM-BH: #define __loongarch_lam_bh 1 // ARCH-TUNE: #define __loongarch_tune "[[TUNE]]" // RUN: %clang --target=loongarch64 -mlsx -x c -E -dM %s -o - \ diff --git a/llvm/include/llvm/TargetParser/LoongArchTargetParser.def b/llvm/include/llvm/TargetParser/LoongArchTargetParser.def index 101a48cbd53994..6cd2018b7b59cb 100644 --- a/llvm/include/llvm/TargetParser/LoongArchTargetParser.def +++ b/llvm/include/llvm/TargetParser/LoongArchTargetParser.def @@ -11,6 +11,7 @@ LOONGARCH_FEATURE("+lbt", FK_LBT) LOONGARCH_FEATURE("+lvz", FK_LVZ) LOONGARCH_FEATURE("+ual", FK_UAL) LOONGARCH_FEATURE("+frecipe", FK_FRECIPE) +LOONGARCH_FEATURE("+lam-bh", FK_LAM_BH) #undef LOONGARCH_FEATURE @@ -20,6 +21,6 @@ LOONGARCH_FEATURE("+frecipe", FK_FRECIPE) LOONGARCH_ARCH("loongarch64", AK_LOONGARCH64, FK_64BIT | FK_FP32 | FK_FP64 | FK_UAL) LOONGARCH_ARCH("la464", AK_LA464, FK_64BIT | FK_FP32 | FK_FP64 | FK_LSX | FK_LASX | FK_UAL) -LOONGARCH_ARCH("la664", AK_LA664, FK_64BIT | FK_FP32 | FK_FP64 | FK_LSX | FK_LASX | FK_UAL | FK_FRECIPE) +LOONGARCH_ARCH("la664", AK_LA664, FK_64BIT | FK_FP32 | FK_FP64 | FK_LSX | FK_LASX | FK_UAL | FK_FRECIPE | FK_LAM_BH) #undef LOONGARCH_ARCH diff --git a/llvm/include/llvm/TargetParser/LoongArchTargetParser.h b/llvm/include/llvm/TargetParser/LoongArchTargetParser.h index c0bb15a5163b12..b5be03b1b67fbb 100644 --- a/llvm/include/llvm/TargetParser/LoongArchTargetParser.h +++ b/llvm/include/llvm/TargetParser/LoongArchTargetParser.h @@ -49,6 +49,10 @@ enum FeatureKind : uint32_t { // Floating-point approximate reciprocal instructions are available. FK_FRECIPE = 1 << 9, + + // Atomic memory swap and add instructions for byte and half word are + // available. + FK_LAM_BH = 1 << 10, }; struct FeatureInfo { diff --git a/llvm/lib/Target/LoongArch/LoongArch.td b/llvm/lib/Target/LoongArch/LoongArch.td index ddb27dc6404fa8..54ebf86666abf9 100644 --- a/llvm/lib/Target/LoongArch/LoongArch.td +++ b/llvm/lib/Target/LoongArch/LoongArch.td @@ -112,6 +112,12 @@ def FeatureFrecipe "Support frecipe.{s/d} and frsqrte.{s/d} instructions.">; def HasFrecipe : Predicate<"Subtarget->hasFrecipe()">; +// Atomic memory swap and add instructions for byte and half word +def FeatureLAM_BH + : SubtargetFeature<"lam-bh", "HasLAM_BH", "true", + "Support amswap[_db].{b/h} and amadd[_db].{b/h} instructions.">; +def HasLAM_BH : Predicate<"Subtarget->hasLAM_BH()">; + def TunePreferWInst : SubtargetFeature<"prefer-w-inst", "PreferWInst", "true", "Prefer instructions with W suffix">; @@ -151,7 +157,8 @@ def : ProcessorModel<"la664", NoSchedModel, [Feature64Bit, FeatureExtLASX, FeatureExtLVZ, FeatureExtLBT, - FeatureFrecipe]>; + FeatureFrecipe, + FeatureLAM_BH]>; //===----------------------------------------------------------------------===// // Define the LoongArch target. diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp index cb5c2f91c5b6c3..e2c644a56c95b0 100644 --- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp +++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp @@ -5735,6 +5735,13 @@ LoongArchTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const { AI->getOperation() == AtomicRMWInst::USubSat) return AtomicExpansionKind::CmpXChg; + if (Subtarget.hasLAM_BH() && Subtarget.is64Bit() && + (AI->getOperation() == AtomicRMWInst::Xchg || + AI->getOperation() == AtomicRMWInst::Add || + AI->getOperation() == AtomicRMWInst::Sub)) { + return AtomicExpansionKind::None; + } + unsigned Size = AI->getType()->getPrimitiveSizeInBits(); if (Size == 8 || Size == 16) return AtomicExpansionKind::MaskedIntrinsic; diff --git a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td index fa2ce33cab2911..671b8cc6ffe1b1 100644 --- a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td +++ b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td @@ -2078,6 +2078,35 @@ multiclass binary_atomic_op_wd<string inst, string op, string signed = ""> { (!cast<Instruction>(inst#"__DB_D"#signed) GPR:$rk, GPR:$rj)>; } +// Atomic operation for byte and half word +multiclass binary_atomic_op_bh<string inst, string op> { + def : Pat<(!cast<PatFrag>(op#"_i8_monotonic") GPR:$rj, GPR:$rk), + (!cast<Instruction>(inst#"_B") GPR:$rk, GPR:$rj)>; + def : Pat<(!cast<PatFrag>(op#"_i16_monotonic") GPR:$rj, GPR:$rk), + (!cast<Instruction>(inst#"_H") GPR:$rk, GPR:$rj)>; + + def : Pat<(!cast<PatFrag>(op#"_i8") GPR:$rj, GPR:$rk), + (!cast<Instruction>(inst#"__DB_B") GPR:$rk, GPR:$rj)>; + def : Pat<(!cast<PatFrag>(op#"_i16") GPR:$rj, GPR:$rk), + (!cast<Instruction>(inst#"__DB_H") GPR:$rk, GPR:$rj)>; +} + +let Predicates = [ HasLAM_BH, IsLA64 ] in { + +defm : binary_atomic_op_bh<"AMSWAP", "atomic_swap">; +defm : binary_atomic_op_bh<"AMADD", "atomic_load_add">; +def : Pat<(atomic_load_sub_i8_monotonic GPR:$rj, GPR:$rk), + (AMADD_B (SUB_W R0, GPR:$rk), GPR:$rj)>; +def : Pat<(atomic_load_sub_i16_monotonic GPR:$rj, GPR:$rk), + (AMADD_H (SUB_W R0, GPR:$rk), GPR:$rj)>; + +def : Pat<(atomic_load_sub_i8 GPR:$rj, GPR:$rk), + (AMADD__DB_B (SUB_W R0, GPR:$rk), GPR:$rj)>; +def : Pat<(atomic_load_sub_i16 GPR:$rj, GPR:$rk), + (AMADD__DB_H (SUB_W R0, GPR:$rk), GPR:$rj)>; +} // Predicates = [ IsLA64, HasLAM_BH ] + + let Predicates = [IsLA64] in { defm : binary_atomic_op_wd<"AMSWAP", "atomic_swap">; diff --git a/llvm/lib/TargetParser/LoongArchTargetParser.cpp b/llvm/lib/TargetParser/LoongArchTargetParser.cpp index 8e86d18de2ad9a..27e3b5683c5a6e 100644 --- a/llvm/lib/TargetParser/LoongArchTargetParser.cpp +++ b/llvm/lib/TargetParser/LoongArchTargetParser.cpp @@ -50,8 +50,10 @@ bool LoongArch::getArchFeatures(StringRef Arch, Features.push_back("+d"); Features.push_back("+lsx"); Features.push_back("+ual"); - if (Arch == "la64v1.1") + if (Arch == "la64v1.1") { Features.push_back("+frecipe"); + Features.push_back("+lam-bh"); + } return true; } diff --git a/llvm/test/CodeGen/LoongArch/ir-instruction/atomicrmw-lam-bh.ll b/llvm/test/CodeGen/LoongArch/ir-instruction/atomicrmw-lam-bh.ll new file mode 100644 index 00000000000000..646ccbd7626853 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/ir-instruction/atomicrmw-lam-bh.ll @@ -0,0 +1,1071 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch32 --mattr=+d,+lam-bh < %s | FileCheck %s --check-prefix=LA32 +; RUN: llc --mtriple=loongarch64 --mattr=+d,+lam-bh < %s | FileCheck %s --check-prefix=LA64 + +;; We need to ensure that even if lam-bh is enabled +;; it will not generate the am*.b/h instruction on loongarch32. + +define i8 @atomicrmw_xchg_i8_acquire(ptr %a, i8 %b) nounwind { +; LA32-LABEL: atomicrmw_xchg_i8_acquire: +; LA32: # %bb.0: +; LA32-NEXT: slli.w $a2, $a0, 3 +; LA32-NEXT: bstrins.w $a0, $zero, 1, 0 +; LA32-NEXT: ori $a3, $zero, 255 +; LA32-NEXT: sll.w $a3, $a3, $a2 +; LA32-NEXT: andi $a1, $a1, 255 +; LA32-NEXT: sll.w $a1, $a1, $a2 +; LA32-NEXT: .LBB0_1: # =>This Inner Loop Header: Depth=1 +; LA32-NEXT: ll.w $a4, $a0, 0 +; LA32-NEXT: addi.w $a5, $a1, 0 +; LA32-NEXT: xor $a5, $a4, $a5 +; LA32-NEXT: and $a5, $a5, $a3 +; LA32-NEXT: xor $a5, $a4, $a5 +; LA32-NEXT: sc.w $a5, $a0, 0 +; LA32-NEXT: beqz $a5, .LBB0_1 +; LA32-NEXT: # %bb.2: +; LA32-NEXT: srl.w $a0, $a4, $a2 +; LA32-NEXT: ret +; +; LA64-LABEL: atomicrmw_xchg_i8_acquire: +; LA64: # %bb.0: +; LA64-NEXT: amswap_db.b $a2, $a1, $a0 +; LA64-NEXT: move $a0, $a2 +; LA64-NEXT: ret +; + %1 = atomicrmw xchg ptr %a, i8 %b acquire + ret i8 %1 +} + +define i8 @atomicrmw_xchg_0_i8_acquire(ptr %a) nounwind { +; LA32-LABEL: atomicrmw_xchg_0_i8_acquire: +; LA32: # %bb.0: +; LA32-NEXT: slli.w $a1, $a0, 3 +; LA32-NEXT: bstrins.w $a0, $zero, 1, 0 +; LA32-NEXT: ori $a2, $zero, 255 +; LA32-NEXT: sll.w $a2, $a2, $a1 +; LA32-NEXT: nor $a2, $a2, $zero +; LA32-NEXT: .LBB1_1: # =>This Inner Loop Header: Depth=1 +; LA32-NEXT: ll.w $a3, $a0, 0 +; LA32-NEXT: and $a4, $a3, $a2 +; LA32-NEXT: sc.w $a4, $a0, 0 +; LA32-NEXT: beqz $a4, .LBB1_1 +; LA32-NEXT: # %bb.2: +; LA32-NEXT: srl.w $a0, $a3, $a1 +; LA32-NEXT: ret +; +; LA64-LABEL: atomicrmw_xchg_0_i8_acquire: +; LA64: # %bb.0: +; LA64-NEXT: amswap_db.b $a1, $zero, $a0 +; LA64-NEXT: move $a0, $a1 +; LA64-NEXT: ret +; + %1 = atomicrmw xchg ptr %a, i8 0 acquire + ret i8 %1 +} + +define i8 @atomicrmw_xchg_minus_1_i8_acquire(ptr %a) nounwind { +; LA32-LABEL: atomicrmw_xchg_minus_1_i8_acquire: +; LA32: # %bb.0: +; LA32-NEXT: slli.w $a1, $a0, 3 +; LA32-NEXT: bstrins.w $a0, $zero, 1, 0 +; LA32-NEXT: ori $a2, $zero, 255 +; LA32-NEXT: sll.w $a2, $a2, $a1 +; LA32-NEXT: .LBB2_1: # =>This Inner Loop Header: Depth=1 +; LA32-NEXT: ll.w $a3, $a0, 0 +; LA32-NEXT: or $a4, $a3, $a2 +; LA32-NEXT: sc.w $a4, $a0, 0 +; LA32-NEXT: beqz $a4, .LBB2_1 +; LA32-NEXT: # %bb.2: +; LA32-NEXT: srl.w $a0, $a3, $a1 +; LA32-NEXT: ret +; +; LA64-LABEL: atomicrmw_xchg_minus_1_i8_acquire: +; LA64: # %bb.0: +; LA64-NEXT: addi.w $a2, $zero, -1 +; LA64-NEXT: amswap_db.b $a1, $a2, $a0 +; LA64-NEXT: move $a0, $a1 +; LA64-NEXT: ret +; + %1 = atomicrmw xchg ptr %a, i8 -1 acquire + ret i8 %1 +} + +define i16 @atomicrmw_xchg_i16_acquire(ptr %a, i16 %b) nounwind { +; LA32-LABEL: atomicrmw_xchg_i16_acquire: +; LA32: # %bb.0: +; LA32-NEXT: slli.w $a2, $a0, 3 +; LA32-NEXT: bstrins.w $a0, $zero, 1, 0 +; LA32-NEXT: lu12i.w $a3, 15 +; LA32-NEXT: ori $a3, $a3, 4095 +; LA32-NEXT: sll.w $a3, $a3, $a2 +; LA32-NEXT: bstrpick.w $a1, $a1, 15, 0 +; LA32-NEXT: sll.w $a1, $a1, $a2 +; LA32-NEXT: .LBB3_1: # =>This Inner Loop Header: Depth=1 +; LA32-NEXT: ll.w $a4, $a0, 0 +; LA32-NEXT: addi.w $a5, $a1, 0 +; LA32-NEXT: xor $a5, $a4, $a5 +; LA32-NEXT: and $a5, $a5, $a3 +; LA32-NEXT: xor $a5, $a4, $a5 +; LA32-NEXT: sc.w $a5, $a0, 0 +; LA32-NEXT: beqz $a5, .LBB3_1 +; LA32-NEXT: # %bb.2: +; LA32-NEXT: srl.w $a0, $a4, $a2 +; LA32-NEXT: ret +; +; LA64-LABEL: atomicrmw_xchg_i16_acquire: +; LA64: # %bb.0: +; LA64-NEXT: amswap_db.h $a2, $a1, $a0 +; LA64-NEXT: move $a0, $a2 +; LA64-NEXT: ret +; + %1 = atomicrmw xchg ptr %a, i16 %b acquire + ret i16 %1 +} + +define i16 @atomicrmw_xchg_0_i16_acquire(ptr %a) nounwind { +; LA32-LABEL: atomicrmw_xchg_0_i16_acquire: +; LA32: # %bb.0: +; LA32-NEXT: slli.w $a1, $a0, 3 +; LA32-NEXT: bstrins.w $a0, $zero, 1, 0 +; LA32-NEXT: lu12i.w $a2, 15 +; LA32-NEXT: ori $a2, $a2, 4095 +; LA32-NEXT: sll.w $a2, $a2, $a1 +; LA32-NEXT: nor $a2, $a2, $zero +; LA32-NEXT: .LBB4_1: # =>This Inner Loop Header: Depth=1 +; LA32-NEXT: ll.w $a3, $a0, 0 +; LA32-NEXT: and $a4, $a3, $a2 +; LA32-NEXT: sc.w $a4, $a0, 0 +; LA32-NEXT: beqz $a4, .LBB4_1 +; LA32-NEXT: # %bb.2: +; LA32-NEXT: srl.w $a0, $a3, $a1 +; LA32-NEXT: ret +; +; LA64-LABEL: atomicrmw_xchg_0_i16_acquire: +; LA64: # %bb.0: +; LA64-NEXT: amswap_db.h $a1, $zero, $a0 +; LA64-NEXT: move $a0, $a1 +; LA64-NEXT: ret +; + %1 = atomicrmw xchg ptr %a, i16 0 acquire + ret i16 %1 +} + +define i16 @atomicrmw_xchg_minus_1_i16_acquire(ptr %a) nounwind { +; LA32-LABEL: atomicrmw_xchg_minus_1_i16_acquire: +; LA32: # %bb.0: +; LA32-NEXT: slli.w $a1, $a0, 3 +; LA32-NEXT: bstrins.w $a0, $zero, 1, 0 +; LA32-NEXT: lu12i.w $a2, 15 +; LA32-NEXT: ori $a2, $a2, 4095 +; LA32-NEXT: sll.w $a2, $a2, $a1 +; LA32-NEXT: .LBB5_1: # =>This Inner Loop Header: Depth=1 +; LA32-NEXT: ll.w $a3, $a0, 0 +; LA32-NEXT: or $a4, $a3, $a2 +; LA32-NEXT: sc.w $a4, $a0, 0 +; LA32-NEXT: beqz $a4, .LBB5_1 +; LA32-NEXT: # %bb.2: +; LA32-NEXT: srl.w $a0, $a3, $a1 +; LA32-NEXT: ret +; +; LA64-LABEL: atomicrmw_xchg_minus_1_i16_acquire: +; LA64: # %bb.0: +; LA64-NEXT: addi.w $a2, $zero, -1 +; LA64-NEXT: amswap_db.h $a1, $a2, $a0 +; LA64-NEXT: move $a0, $a1 +; LA64-NEXT: ret +; + %1 = atomicrmw xchg ptr %a, i16 -1 acquire + ret i16 %1 +} + +define i8 @atomicrmw_xchg_i8_release(ptr %a, i8 %b) nounwind { +; LA32-LABEL: atomicrmw_xchg_i8_release: +; LA32: # %bb.0: +; LA32-NEXT: slli.w $a2, $a0, 3 +; LA32-NEXT: bstrins.w $a0, $zero, 1, 0 +; LA32-NEXT: ori $a3, $zero, 255 +; LA32-NEXT: sll.w $a3, $a3, $a2 +; LA32-NEXT: andi $a1, $a1, 255 +; LA32-NEXT: sll.w $a1, $a1, $a2 +; LA32-NEXT: .LBB6_1: # =>This Inner Loop Header: Depth=1 +; LA32-NEXT: ll.w $a4, $a0, 0 +; LA32-NEXT: addi.w $a5, $a1, 0 +; LA32-NEXT: xor $a5, $a4, $a5 +; LA32-NEXT: and $a5, $a5, $a3 +; LA32-NEXT: xor $a5, $a4, $a5 +; LA32-NEXT: sc.w $a5, $a0, 0 +; LA32-NEXT: beqz $a5, .LBB6_1 +; LA32-NEXT: # %bb.2: +; LA32-NEXT: srl.w $a0, $a4, $a2 +; LA32-NEXT: ret +; +; LA64-LABEL: atomicrmw_xchg_i8_release: +; LA64: # %bb.0: +; LA64-NEXT: amswap_db.b $a2, $a1, $a0 +; LA64-NEXT: move $a0, $a2 +; LA64-NEXT: ret +; + %1 = atomicrmw xchg ptr %a, i8 %b release + ret i8 %1 +} + +define i16 @atomicrmw_xchg_i16_release(ptr %a, i16 %b) nounwind { +; LA32-LABEL: atomicrmw_xchg_i16_release: +; LA32: # %bb.0: +; LA32-NEXT: slli.w $a2, $a0, 3 +; LA32-NEXT: bstrins.w $a0, $zero, 1, 0 +; LA32-NEXT: lu12i.w $a3, 15 +; LA32-NEXT: ori $a3, $a3, 4095 +; LA32-NEXT: sll.w $a3, $a3, $a2 +; LA32-NEXT: bstrpick.w $a1, $a1, 15, 0 +; LA32-NEXT: sll.w $a1, $a1, $a2 +; LA32-NEXT: .LBB7_1: # =>This Inner Loop Header: Depth=1 +; LA32-NEXT: ll.w $a4, $a0, 0 +; LA32-NEXT: addi.w $a5, $a1, 0 +; LA32-NEXT: xor $a5, $a4, $a5 +; LA32-NEXT: and $a5, $a5, $a3 +; LA32-NEXT: xor $a5, $a4, $a5 +; LA32-NEXT: sc.w $a5, $a0, 0 +; LA32-NEXT: beqz $a5, .LBB7_1 +; LA32-NEXT: # %bb.2: +; LA32-NEXT: srl.w $a0, $a4, $a2 +; LA32-NEXT: ret +; +; LA64-LABEL: atomicrmw_xchg_i16_release: +; LA64: # %bb.0: +; LA64-NEXT: amswap_db.h $a2, $a1, $a0 +; LA64-NEXT: move $a0, $a2 +; LA64-NEXT: ret +; + %1 = atomicrmw xchg ptr %a, i16 %b release + ret i16 %1 +} + +define i8 @atomicrmw_xchg_i8_acq_rel(ptr %a, i8 %b) nounwind { +; LA32-LABEL: atomicrmw_xchg_i8_acq_rel: +; LA32: # %bb.0: +; LA32-NEXT: slli.w $a2, $a0, 3 +; LA32-NEXT: bstrins.w $a0, $zero, 1, 0 +; LA32-NEXT: ori $a3, $zero, 255 +; LA32-NEXT: sll.w $a3, $a3, $a2 +; LA32-NEXT: andi $a1, $a1, 255 +; LA32-NEXT: sll.w $a1, $a1, $a2 +; LA32-NEXT: .LBB8_1: # =>This Inner Loop Header: Depth=1 +; LA32-NEXT: ll.w $a4, $a0, 0 +; LA32-NEXT: addi.w $a5, $a1, 0 +; LA32-NEXT: xor $a5, $a4, $a5 +; LA32-NEXT: and $a5, $a5, $a3 +; LA32-NEXT: xor $a5, $a4, $a5 +; LA32-NEXT: sc.w $a5, $a0, 0 +; LA32-NEXT: beqz $a5, .LBB8_1 +; LA32-NEXT: # %bb.2: +; LA32-NEXT: srl.w $a0, $a4, $a2 +; LA32-NEXT: ret +; +; LA64-LABEL: atomicrmw_xchg_i8_acq_rel: +; LA64: # %bb.0: +; LA64-NEXT: amswap_db.b $a2, $a1, $a0 +; LA64-NEXT: move $a0, $a2 +; LA64-NEXT: ret +; + %1 = atomicrmw xchg ptr %a, i8 %b acq_rel + ret i8 %1 +} + +define i16 @atomicrmw_xchg_i16_acq_rel(ptr %a, i16 %b) nounwind { +; LA32-LABEL: atomicrmw_xchg_i16_acq_rel: +; LA32: # %bb.0: +; LA32-NEXT: slli.w $a2, $a0, 3 +; LA32-NEXT: bstrins.w $a0, $zero, 1, 0 +; LA32-NEXT: lu12i.w $a3, 15 +; LA32-NEXT: ori $a3, $a3, 4095 +; LA32-NEXT: sll.w $a3, $a3, $a2 +; LA32-NEXT: bstrpick.w $a1, $a1, 15, 0 +; LA32-NEXT: sll.w $a1, $a1, $a2 +; LA32-NEXT: .LBB9_1: # =>This Inner Loop Header: Depth=1 +; LA32-NEXT: ll.w $a4, $a0, 0 +; LA32-NEXT: addi.w $a5, $a1, 0 +; LA32-NEXT: xor $a5, $a4, $a5 +; LA32-NEXT: and $a5, $a5, $a3 +; LA32-NEXT: xor $a5, $a4, $a5 +; LA32-NEXT: sc.w $a5, $a0, 0 +; LA32-NEXT: beqz $a5, .LBB9_1 +; LA32-NEXT: # %bb.2: +; LA32-NEXT: srl.w $a0, $a4, $a2 +; LA32-NEXT: ret +; +; LA64-LABEL: atomicrmw_xchg_i16_acq_rel: +; LA64: # %bb.0: +; LA64-NEXT: amswap_db.h $a2, $a1, $a0 +; LA64-NEXT: move $a0, $a2 +; LA64-NEXT: ret +; + %1 = atomicrmw xchg ptr %a, i16 %b acq_rel + ret i16 %1 +} + +define i8 @atomicrmw_xchg_i8_seq_cst(ptr %a, i8 %b) nounwind { +; LA32-LABEL: atomicrmw_xchg_i8_seq_cst: +; LA32: # %bb.0: +; LA32-NEXT: slli.w $a2, $a0, 3 +; LA32-NEXT: bstrins.w $a0, $zero, 1, 0 +; LA32-NEXT: ori $a3, $zero, 255 +; LA32-NEXT: sll.w $a3, $a3, $a2 +; LA32-NEXT: andi $a1, $a1, 255 +; LA32-NEXT: sll.w $a1, $a1, $a2 +; LA32-NEXT: .LBB10_1: # =>This Inner Loop Header: Depth=1 +; LA32-NEXT: ll.w $a4, $a0, 0 +; LA32-NEXT: addi.w $a5, $a1, 0 +; LA32-NEXT: xor $a5, $a4, $a5 +; LA32-NEXT: and $a5, $a5, $a3 +; LA32-NEXT: xor $a5, $a4, $a5 +; LA32-NEXT: sc.w $a5, $a0, 0 +; LA32-NEXT: beqz $a5, .LBB10_1 +; LA32-NEXT: # %bb.2: +; LA32-NEXT: srl.w $a0, $a4, $a2 +; LA32-NEXT: ret +; +; LA64-LABEL: atomicrmw_xchg_i8_seq_cst: +; LA64: # %bb.0: +; LA64-NEXT: amswap_db.b $a2, $a1, $a0 +; LA64-NEXT: move $a0, $a2 +; LA64-NEXT: ret +; + %1 = atomicrmw xchg ptr %a, i8 %b seq_cst + ret i8 %1 +} + +define i16 @atomicrmw_xchg_i16_seq_cst(ptr %a, i16 %b) nounwind { +; LA32-LABEL: atomicrmw_xchg_i16_seq_cst: +; LA32: # %bb.0: +; LA32-NEXT: slli.w $a2, $a0, 3 +; LA32-NEXT: bstrins.w $a0, $zero, 1, 0 +; LA32-NEXT: lu12i.w $a3, 15 +; LA32-NEXT: ori $a3, $a3, 4095 +; LA32-NEXT: sll.w $a3, $a3, $a2 +; LA32-NEXT: bstrpick.w $a1, $a1, 15, 0 +; LA32-NEXT: sll.w $a1, $a1, $a2 +; LA32-NEXT: .LBB11_1: # =>This Inner Loop Header: Depth=1 +; LA32-NEXT: ll.w $a4, $a0, 0 +; LA32-NEXT: addi.w $a5, $a1, 0 +; LA32-NEXT: xor $a5, $a4, $a5 +; LA32-NEXT: and $a5, $a5, $a3 +; LA32-NEXT: xor $a5, $a4, $a5 +; LA32-NEXT: sc.w $a5, $a0, 0 +; LA32-NEXT: beqz $a5, .LBB11_1 +; LA32-NEXT: # %bb.2: +; LA32-NEXT: srl.w $a0, $a4, $a2 +; LA32-NEXT: ret +; +; LA64-LABEL: atomicrmw_xchg_i16_seq_cst: +; LA64: # %bb.0: +; LA64-NEXT: amswap_db.h $a2, $a1, $a0 +; LA64-NEXT: move $a0, $a2 +; LA64-NEXT: ret +; + %1 = atomicrmw xchg ptr %a, i16 %b seq_cst + ret i16 %1 +} + +define i8 @atomicrmw_xchg_i8_monotonic(ptr %a, i8 %b) nounwind { +; LA32-LABEL: atomicrmw_xchg_i8_monotonic: +; LA32: # %bb.0: +; LA32-NEXT: slli.w $a2, $a0, 3 +; LA32-NEXT: bstrins.w $a0, $zero, 1, 0 +; LA32-NEXT: ori $a3, $zero, 255 +; LA32-NEXT: sll.w $a3, $a3, $a2 +; LA32-NEXT: andi $a1, $a1, 255 +; LA32-NEXT: sll.w $a1, $a1, $a2 +; LA32-NEXT: .LBB12_1: # =>This Inner Loop Header: Depth=1 +; LA32-NEXT: ll.w $a4, $a0, 0 +; LA32-NEXT: addi.w $a5, $a1, 0 +; LA32-NEXT: xor $a5, $a4, $a5 +; LA32-NEXT: and $a5, $a5, $a3 +; LA32-NEXT: xor $a5, $a4, $a5 +; LA32-NEXT: sc.w $a5, $a0, 0 +; LA32-NEXT: beqz $a5, .LBB12_1 +; LA32-NEXT: # %bb.2: +; LA32-NEXT: srl.w $a0, $a4, $a2 +; LA32-NEXT: ret +; +; LA64-LABEL: atomicrmw_xchg_i8_monotonic: +; LA64: # %bb.0: +; LA64-NEXT: amswap.b $a2, $a1, $a0 +; LA64-NEXT: move $a0, $a2 +; LA64-NEXT: ret +; + %1 = atomicrmw xchg ptr %a, i8 %b monotonic + ret i8 %1 +} + +define i16 @atomicrmw_xchg_i16_monotonic(ptr %a, i16 %b) nounwind { +; LA32-LABEL: atomicrmw_xchg_i16_monotonic: +; LA32: # %bb.0: +; LA32-NEXT: slli.w $a2, $a0, 3 +; LA32-NEXT: bstrins.w $a0, $zero, 1, 0 +; LA32-NEXT: lu12i.w $a3, 15 +; LA32-NEXT: ori $a3, $a3, 4095 +; LA32-NEXT: sll.w $a3, $a3, $a2 +; LA32-NEXT: bstrpick.w $a1, $a1, 15, 0 +; LA32-NEXT: sll.w $a1, $a1, $a2 +; LA32-NEXT: .LBB13_1: # =>This Inner Loop Header: Depth=1 +; LA32-NEXT: ll.w $a4, $a0, 0 +; LA32-NEXT: addi.w $a5, $a1, 0 +; LA32-NEXT: xor $a5, $a4, $a5 +; LA32-NEXT: and $a5, $a5, $a3 +; LA32-NEXT: xor $a5, $a4, $a5 +; LA32-NEXT: sc.w $a5, $a0, 0 +; LA32-NEXT: beqz $a5, .LBB13_1 +; LA32-NEXT: # %bb.2: +; LA32-NEXT: srl.w $a0, $a4, $a2 +; LA32-NEXT: ret +; +; LA64-LABEL: atomicrmw_xchg_i16_monotonic: +; LA64: # %bb.0: +; LA64-NEXT: amswap.h $a2, $a1, $a0 +; LA64-NEXT: move $a0, $a2 +; LA64-NEXT: ret +; + %1 = atomicrmw xchg ptr %a, i16 %b monotonic + ret i16 %1 +} + +define i8 @atomicrmw_add_i8_acquire(ptr %a, i8 %b) nounwind { +; LA32-LABEL: atomicrmw_add_i8_acquire: +; LA32: # %bb.0: +; LA32-NEXT: slli.w $a2, $a0, 3 +; LA32-NEXT: bstrins.w $a0, $zero, 1, 0 +; LA32-NEXT: ori $a3, $zero, 255 +; LA32-NEXT: sll.w $a3, $a3, $a2 +; LA32-NEXT: andi $a1, $a1, 255 +; LA32-NEXT: sll.w $a1, $a1, $a2 +; LA32-NEXT: .LBB14_1: # =>This Inner Loop Header: Depth=1 +; LA32-NEXT: ll.w $a4, $a0, 0 +; LA32-NEXT: add.w $a5, $a4, $a1 +; LA32-NEXT: xor $a5, $a4, $a5 +; LA32-NEXT: and $a5, $a5, $a3 +; LA32-NEXT: xor $a5, $a4, $a5 +; LA32-NEXT: sc.w $a5, $a0, 0 +; LA32-NEXT: beqz $a5, .LBB14_1 +; LA32-NEXT: # %bb.2: +; LA32-NEXT: srl.w $a0, $a4, $a2 +; LA32-NEXT: ret +; +; LA64-LABEL: atomicrmw_add_i8_acquire: +; LA64: # %bb.0: +; LA64-NEXT: amadd_db.b $a2, $a1, $a0 +; LA64-NEXT: move $a0, $a2 +; LA64-NEXT: ret +; + %1 = atomicrmw add ptr %a, i8 %b acquire + ret i8 %1 +} + +define i16 @atomicrmw_add_i16_acquire(ptr %a, i16 %b) nounwind { +; LA32-LABEL: atomicrmw_add_i16_acquire: +; LA32: # %bb.0: +; LA32-NEXT: slli.w $a2, $a0, 3 +; LA32-NEXT: bstrins.w $a0, $zero, 1, 0 +; LA32-NEXT: lu12i.w $a3, 15 +; LA32-NEXT: ori $a3, $a3, 4095 +; LA32-NEXT: sll.w $a3, $a3, $a2 +; LA32-NEXT: bstrpick.w $a1, $a1, 15, 0 +; LA32-NEXT: sll.w $a1, $a1, $a2 +; LA32-NEXT: .LBB15_1: # =>This Inner Loop Header: Depth=1 +; LA32-NEXT: ll.w $a4, $a0, 0 +; LA32-NEXT: add.w $a5, $a4, $a1 +; LA32-NEXT: xor $a5, $a4, $a5 +; LA32-NEXT: and $a5, $a5, $a3 +; LA32-NEXT: xor $a5, $a4, $a5 +; LA32-NEXT: sc.w $a5, $a0, 0 +; LA32-NEXT: beqz $a5, .LBB15_1 +; LA32-NEXT: # %bb.2: +; LA32-NEXT: srl.w $a0, $a4, $a2 +; LA32-NEXT: ret +; +; LA64-LABEL: atomicrmw_add_i16_acquire: +; LA64: # %bb.0: +; LA64-NEXT: amadd_db.h $a2, $a1, $a0 +; LA64-NEXT: move $a0, $a2 +; LA64-NEXT: ret +; + %1 = atomicrmw add ptr %a, i16 %b acquire + ret i16 %1 +} + +define i8 @atomicrmw_add_i8_release(ptr %a, i8 %b) nounwind { +; LA32-LABEL: atomicrmw_add_i8_release: +; LA32: # %bb.0: +; LA32-NEXT: slli.w $a2, $a0, 3 +; LA32-NEXT: bstrins.w $a0, $zero, 1, 0 +; LA32-NEXT: ori $a3, $zero, 255 +; LA32-NEXT: sll.w $a3, $a3, $a2 +; LA32-NEXT: andi $a1, $a1, 255 +; LA32-NEXT: sll.w $a1, $a1, $a2 +; LA32-NEXT: .LBB16_1: # =>This Inner Loop Header: Depth=1 +; LA32-NEXT: ll.w $a4, $a0, 0 +; LA32-NEXT: add.w $a5, $a4, $a1 +; LA32-NEXT: xor $a5, $a4, $a5 +; LA32-NEXT: and $a5, $a5, $a3 +; LA32-NEXT: xor $a5, $a4, $a5 +; LA32-NEXT: sc.w $a5, $a0, 0 +; LA32-NEXT: beqz $a5, .LBB16_1 +; LA32-NEXT: # %bb.2: +; LA32-NEXT: srl.w $a0, $a4, $a2 +; LA32-NEXT: ret +; +; LA64-LABEL: atomicrmw_add_i8_release: +; LA64: # %bb.0: +; LA64-NEXT: amadd_db.b $a2, $a1, $a0 +; LA64-NEXT: move $a0, $a2 +; LA64-NEXT: ret +; + %1 = atomicrmw add ptr %a, i8 %b release + ret i8 %1 +} + +define i16 @atomicrmw_add_i16_release(ptr %a, i16 %b) nounwind { +; LA32-LABEL: atomicrmw_add_i16_release: +; LA32: # %bb.0: +; LA32-NEXT: slli.w $a2, $a0, 3 +; LA32-NEXT: bstrins.w $a0, $zero, 1, 0 +; LA32-NEXT: lu12i.w $a3, 15 +; LA32-NEXT: ori $a3, $a3, 4095 +; LA32-NEXT: sll.w $a3, $a3, $a2 +; LA32-NEXT: bstrpick.w $a1, $a1, 15, 0 +; LA32-NEXT: sll.w $a1, $a1, $a2 +; LA32-NEXT: .LBB17_1: # =>This Inner Loop Header: Depth=1 +; LA32-NEXT: ll.w $a4, $a0, 0 +; LA32-NEXT: add.w $a5, $a4, $a1 +; LA32-NEXT: xor $a5, $a4, $a5 +; LA32-NEXT: and $a5, $a5, $a3 +; LA32-NEXT: xor $a5, $a4, $a5 +; LA32-NEXT: sc.w $a5, $a0, 0 +; LA32-NEXT: beqz $a5, .LBB17_1 +; LA32-NEXT: # %bb.2: +; LA32-NEXT: srl.w $a0, $a4, $a2 +; LA32-NEXT: ret +; +; LA64-LABEL: atomicrmw_add_i16_release: +; LA64: # %bb.0: +; LA64-NEXT: amadd_db.h $a2, $a1, $a0 +; LA64-NEXT: move $a0, $a2 +; LA64-NEXT: ret +; + %1 = atomicrmw add ptr %a, i16 %b release + ret i16 %1 +} + +define i8 @atomicrmw_add_i8_acq_rel(ptr %a, i8 %b) nounwind { +; LA32-LABEL: atomicrmw_add_i8_acq_rel: +; LA32: # %bb.0: +; LA32-NEXT: slli.w $a2, $a0, 3 +; LA32-NEXT: bstrins.w $a0, $zero, 1, 0 +; LA32-NEXT: ori $a3, $zero, 255 +; LA32-NEXT: sll.w $a3, $a3, $a2 +; LA32-NEXT: andi $a1, $a1, 255 +; LA32-NEXT: sll.w $a1, $a1, $a2 +; LA32-NEXT: .LBB18_1: # =>This Inner Loop Header: Depth=1 +; LA32-NEXT: ll.w $a4, $a0, 0 +; LA32-NEXT: add.w $a5, $a4, $a1 +; LA32-NEXT: xor $a5, $a4, $a5 +; LA32-NEXT: and $a5, $a5, $a3 +; LA32-NEXT: xor $a5, $a4, $a5 +; LA32-NEXT: sc.w $a5, $a0, 0 +; LA32-NEXT: beqz $a5, .LBB18_1 +; LA32-NEXT: # %bb.2: +; LA32-NEXT: srl.w $a0, $a4, $a2 +; LA32-NEXT: ret +; +; LA64-LABEL: atomicrmw_add_i8_acq_rel: +; LA64: # %bb.0: +; LA64-NEXT: amadd_db.b $a2, $a1, $a0 +; LA64-NEXT: move $a0, $a2 +; LA64-NEXT: ret +; + %1 = atomicrmw add ptr %a, i8 %b acq_rel + ret i8 %1 +} + +define i16 @atomicrmw_add_i16_acq_rel(ptr %a, i16 %b) nounwind { +; LA32-LABEL: atomicrmw_add_i16_acq_rel: +; LA32: # %bb.0: +; LA32-NEXT: slli.w $a2, $a0, 3 +; LA32-NEXT: bstrins.w $a0, $zero, 1, 0 +; LA32-NEXT: lu12i.w $a3, 15 +; LA32-NEXT: ori $a3, $a3, 4095 +; LA32-NEXT: sll.w $a3, $a3, $a2 +; LA32-NEXT: bstrpick.w $a1, $a1, 15, 0 +; LA32-NEXT: sll.w $a1, $a1, $a2 +; LA32-NEXT: .LBB19_1: # =>This Inner Loop Header: Depth=1 +; LA32-NEXT: ll.w $a4, $a0, 0 +; LA32-NEXT: add.w $a5, $a4, $a1 +; LA32-NEXT: xor $a5, $a4, $a5 +; LA32-NEXT: and $a5, $a5, $a3 +; LA32-NEXT: xor $a5, $a4, $a5 +; LA32-NEXT: sc.w $a5, $a0, 0 +; LA32-NEXT: beqz $a5, .LBB19_1 +; LA32-NEXT: # %bb.2: +; LA32-NEXT: srl.w $a0, $a4, $a2 +; LA32-NEXT: ret +; +; LA64-LABEL: atomicrmw_add_i16_acq_rel: +; LA64: # %bb.0: +; LA64-NEXT: amadd_db.h $a2, $a1, $a0 +; LA64-NEXT: move $a0, $a2 +; LA64-NEXT: ret +; + %1 = atomicrmw add ptr %a, i16 %b acq_rel + ret i16 %1 +} + +define i8 @atomicrmw_add_i8_seq_cst(ptr %a, i8 %b) nounwind { +; LA32-LABEL: atomicrmw_add_i8_seq_cst: +; LA32: # %bb.0: +; LA32-NEXT: slli.w $a2, $a0, 3 +; LA32-NEXT: bstrins.w $a0, $zero, 1, 0 +; LA32-NEXT: ori $a3, $zero, 255 +; LA32-NEXT: sll.w $a3, $a3, $a2 +; LA32-NEXT: andi $a1, $a1, 255 +; LA32-NEXT: sll.w $a1, $a1, $a2 +; LA32-NEXT: .LBB20_1: # =>This Inner Loop Header: Depth=1 +; LA32-NEXT: ll.w $a4, $a0, 0 +; LA32-NEXT: add.w $a5, $a4, $a1 +; LA32-NEXT: xor $a5, $a4, $a5 +; LA32-NEXT: and $a5, $a5, $a3 +; LA32-NEXT: xor $a5, $a4, $a5 +; LA32-NEXT: sc.w $a5, $a0, 0 +; LA32-NEXT: beqz $a5, .LBB20_1 +; LA32-NEXT: # %bb.2: +; LA32-NEXT: srl.w $a0, $a4, $a2 +; LA32-NEXT: ret +; +; LA64-LABEL: atomicrmw_add_i8_seq_cst: +; LA64: # %bb.0: +; LA64-NEXT: amadd_db.b $a2, $a1, $a0 +; LA64-NEXT: move $a0, $a2 +; LA64-NEXT: ret +; + %1 = atomicrmw add ptr %a, i8 %b seq_cst + ret i8 %1 +} + +define i16 @atomicrmw_add_i16_seq_cst(ptr %a, i16 %b) nounwind { +; LA32-LABEL: atomicrmw_add_i16_seq_cst: +; LA32: # %bb.0: +; LA32-NEXT: slli.w $a2, $a0, 3 +; LA32-NEXT: bstrins.w $a0, $zero, 1, 0 +; LA32-NEXT: lu12i.w $a3, 15 +; LA32-NEXT: ori $a3, $a3, 4095 +; LA32-NEXT: sll.w $a3, $a3, $a2 +; LA32-NEXT: bstrpick.w $a1, $a1, 15, 0 +; LA32-NEXT: sll.w $a1, $a1, $a2 +; LA32-NEXT: .LBB21_1: # =>This Inner Loop Header: Depth=1 +; LA32-NEXT: ll.w $a4, $a0, 0 +; LA32-NEXT: add.w $a5, $a4, $a1 +; LA32-NEXT: xor $a5, $a4, $a5 +; LA32-NEXT: and $a5, $a5, $a3 +; LA32-NEXT: xor $a5, $a4, $a5 +; LA32-NEXT: sc.w $a5, $a0, 0 +; LA32-NEXT: beqz $a5, .LBB21_1 +; LA32-NEXT: # %bb.2: +; LA32-NEXT: srl.w $a0, $a4, $a2 +; LA32-NEXT: ret +; +; LA64-LABEL: atomicrmw_add_i16_seq_cst: +; LA64: # %bb.0: +; LA64-NEXT: amadd_db.h $a2, $a1, $a0 +; LA64-NEXT: move $a0, $a2 +; LA64-NEXT: ret +; + %1 = atomicrmw add ptr %a, i16 %b seq_cst + ret i16 %1 +} + +define i8 @atomicrmw_add_i8_monotonic(ptr %a, i8 %b) nounwind { +; LA32-LABEL: atomicrmw_add_i8_monotonic: +; LA32: # %bb.0: +; LA32-NEXT: slli.w $a2, $a0, 3 +; LA32-NEXT: bstrins.w $a0, $zero, 1, 0 +; LA32-NEXT: ori $a3, $zero, 255 +; LA32-NEXT: sll.w $a3, $a3, $a2 +; LA32-NEXT: andi $a1, $a1, 255 +; LA32-NEXT: sll.w $a1, $a1, $a2 +; LA32-NEXT: .LBB22_1: # =>This Inner Loop Header: Depth=1 +; LA32-NEXT: ll.w $a4, $a0, 0 +; LA32-NEXT: add.w $a5, $a4, $a1 +; LA32-NEXT: xor $a5, $a4, $a5 +; LA32-NEXT: and $a5, $a5, $a3 +; LA32-NEXT: xor $a5, $a4, $a5 +; LA32-NEXT: sc.w $a5, $a0, 0 +; LA32-NEXT: beqz $a5, .LBB22_1 +; LA32-NEXT: # %bb.2: +; LA32-NEXT: srl.w $a0, $a4, $a2 +; LA32-NEXT: ret +; +; LA64-LABEL: atomicrmw_add_i8_monotonic: +; LA64: # %bb.0: +; LA64-NEXT: amadd.b $a2, $a1, $a0 +; LA64-NEXT: move $a0, $a2 +; LA64-NEXT: ret +; + %1 = atomicrmw add ptr %a, i8 %b monotonic + ret i8 %1 +} + +define i16 @atomicrmw_add_i16_monotonic(ptr %a, i16 %b) nounwind { +; LA32-LABEL: atomicrmw_add_i16_monotonic: +; LA32: # %bb.0: +; LA32-NEXT: slli.w $a2, $a0, 3 +; LA32-NEXT: bstrins.w $a0, $zero, 1, 0 +; LA32-NEXT: lu12i.w $a3, 15 +; LA32-NEXT: ori $a3, $a3, 4095 +; LA32-NEXT: sll.w $a3, $a3, $a2 +; LA32-NEXT: bstrpick.w $a1, $a1, 15, 0 +; LA32-NEXT: sll.w $a1, $a1, $a2 +; LA32-NEXT: .LBB23_1: # =>This Inner Loop Header: Depth=1 +; LA32-NEXT: ll.w $a4, $a0, 0 +; LA32-NEXT: add.w $a5, $a4, $a1 +; LA32-NEXT: xor $a5, $a4, $a5 +; LA32-NEXT: and $a5, $a5, $a3 +; LA32-NEXT: xor $a5, $a4, $a5 +; LA32-NEXT: sc.w $a5, $a0, 0 +; LA32-NEXT: beqz $a5, .LBB23_1 +; LA32-NEXT: # %bb.2: +; LA32-NEXT: srl.w $a0, $a4, $a2 +; LA32-NEXT: ret +; +; LA64-LABEL: atomicrmw_add_i16_monotonic: +; LA64: # %bb.0: +; LA64-NEXT: amadd.h $a2, $a1, $a0 +; LA64-NEXT: move $a0, $a2 +; LA64-NEXT: ret +; + %1 = atomicrmw add ptr %a, i16 %b monotonic + ret i16 %1 +} + +define i8 @atomicrmw_sub_i8_acquire(ptr %a, i8 %b) nounwind { +; LA32-LABEL: atomicrmw_sub_i8_acquire: +; LA32: # %bb.0: +; LA32-NEXT: slli.w $a2, $a0, 3 +; LA32-NEXT: bstrins.w $a0, $zero, 1, 0 +; LA32-NEXT: ori $a3, $zero, 255 +; LA32-NEXT: sll.w $a3, $a3, $a2 +; LA32-NEXT: andi $a1, $a1, 255 +; LA32-NEXT: sll.w $a1, $a1, $a2 +; LA32-NEXT: .LBB24_1: # =>This Inner Loop Header: Depth=1 +; LA32-NEXT: ll.w $a4, $a0, 0 +; LA32-NEXT: sub.w $a5, $a4, $a1 +; LA32-NEXT: xor $a5, $a4, $a5 +; LA32-NEXT: and $a5, $a5, $a3 +; LA32-NEXT: xor $a5, $a4, $a5 +; LA32-NEXT: sc.w $a5, $a0, 0 +; LA32-NEXT: beqz $a5, .LBB24_1 +; LA32-NEXT: # %bb.2: +; LA32-NEXT: srl.w $a0, $a4, $a2 +; LA32-NEXT: ret +; +; LA64-LABEL: atomicrmw_sub_i8_acquire: +; LA64: # %bb.0: +; LA64-NEXT: sub.w $a2, $zero, $a1 +; LA64-NEXT: amadd_db.b $a1, $a2, $a0 +; LA64-NEXT: move $a0, $a1 +; LA64-NEXT: ret +; + %1 = atomicrmw sub ptr %a, i8 %b acquire + ret i8 %1 +} + +define i16 @atomicrmw_sub_i16_acquire(ptr %a, i16 %b) nounwind { +; LA32-LABEL: atomicrmw_sub_i16_acquire: +; LA32: # %bb.0: +; LA32-NEXT: slli.w $a2, $a0, 3 +; LA32-NEXT: bstrins.w $a0, $zero, 1, 0 +; LA32-NEXT: lu12i.w $a3, 15 +; LA32-NEXT: ori $a3, $a3, 4095 +; LA32-NEXT: sll.w $a3, $a3, $a2 +; LA32-NEXT: bstrpick.w $a1, $a1, 15, 0 +; LA32-NEXT: sll.w $a1, $a1, $a2 +; LA32-NEXT: .LBB25_1: # =>This Inner Loop Header: Depth=1 +; LA32-NEXT: ll.w $a4, $a0, 0 +; LA32-NEXT: sub.w $a5, $a4, $a1 +; LA32-NEXT: xor $a5, $a4, $a5 +; LA32-NEXT: and $a5, $a5, $a3 +; LA32-NEXT: xor $a5, $a4, $a5 +; LA32-NEXT: sc.w $a5, $a0, 0 +; LA32-NEXT: beqz $a5, .LBB25_1 +; LA32-NEXT: # %bb.2: +; LA32-NEXT: srl.w $a0, $a4, $a2 +; LA32-NEXT: ret +; +; LA64-LABEL: atomicrmw_sub_i16_acquire: +; LA64: # %bb.0: +; LA64-NEXT: sub.w $a2, $zero, $a1 +; LA64-NEXT: amadd_db.h $a1, $a2, $a0 +; LA64-NEXT: move $a0, $a1 +; LA64-NEXT: ret +; + %1 = atomicrmw sub ptr %a, i16 %b acquire + ret i16 %1 +} + +define i8 @atomicrmw_sub_i8_release(ptr %a, i8 %b) nounwind { +; LA32-LABEL: atomicrmw_sub_i8_release: +; LA32: # %bb.0: +; LA32-NEXT: slli.w $a2, $a0, 3 +; LA32-NEXT: bstrins.w $a0, $zero, 1, 0 +; LA32-NEXT: ori $a3, $zero, 255 +; LA32-NEXT: sll.w $a3, $a3, $a2 +; LA32-NEXT: andi $a1, $a1, 255 +; LA32-NEXT: sll.w $a1, $a1, $a2 +; LA32-NEXT: .LBB26_1: # =>This Inner Loop Header: Depth=1 +; LA32-NEXT: ll.w $a4, $a0, 0 +; LA32-NEXT: sub.w $a5, $a4, $a1 +; LA32-NEXT: xor $a5, $a4, $a5 +; LA32-NEXT: and $a5, $a5, $a3 +; LA32-NEXT: xor $a5, $a4, $a5 +; LA32-NEXT: sc.w $a5, $a0, 0 +; LA32-NEXT: beqz $a5, .LBB26_1 +; LA32-NEXT: # %bb.2: +; LA32-NEXT: srl.w $a0, $a4, $a2 +; LA32-NEXT: ret +; +; LA64-LABEL: atomicrmw_sub_i8_release: +; LA64: # %bb.0: +; LA64-NEXT: sub.w $a2, $zero, $a1 +; LA64-NEXT: amadd_db.b $a1, $a2, $a0 +; LA64-NEXT: move $a0, $a1 +; LA64-NEXT: ret +; + %1 = atomicrmw sub ptr %a, i8 %b release + ret i8 %1 +} + +define i16 @atomicrmw_sub_i16_release(ptr %a, i16 %b) nounwind { +; LA32-LABEL: atomicrmw_sub_i16_release: +; LA32: # %bb.0: +; LA32-NEXT: slli.w $a2, $a0, 3 +; LA32-NEXT: bstrins.w $a0, $zero, 1, 0 +; LA32-NEXT: lu12i.w $a3, 15 +; LA32-NEXT: ori $a3, $a3, 4095 +; LA32-NEXT: sll.w $a3, $a3, $a2 +; LA32-NEXT: bstrpick.w $a1, $a1, 15, 0 +; LA32-NEXT: sll.w $a1, $a1, $a2 +; LA32-NEXT: .LBB27_1: # =>This Inner Loop Header: Depth=1 +; LA32-NEXT: ll.w $a4, $a0, 0 +; LA32-NEXT: sub.w $a5, $a4, $a1 +; LA32-NEXT: xor $a5, $a4, $a5 +; LA32-NEXT: and $a5, $a5, $a3 +; LA32-NEXT: xor $a5, $a4, $a5 +; LA32-NEXT: sc.w $a5, $a0, 0 +; LA32-NEXT: beqz $a5, .LBB27_1 +; LA32-NEXT: # %bb.2: +; LA32-NEXT: srl.w $a0, $a4, $a2 +; LA32-NEXT: ret +; +; LA64-LABEL: atomicrmw_sub_i16_release: +; LA64: # %bb.0: +; LA64-NEXT: sub.w $a2, $zero, $a1 +; LA64-NEXT: amadd_db.h $a1, $a2, $a0 +; LA64-NEXT: move $a0, $a1 +; LA64-NEXT: ret +; + %1 = atomicrmw sub ptr %a, i16 %b release + ret i16 %1 +} + +define i8 @atomicrmw_sub_i8_acq_rel(ptr %a, i8 %b) nounwind { +; LA32-LABEL: atomicrmw_sub_i8_acq_rel: +; LA32: # %bb.0: +; LA32-NEXT: slli.w $a2, $a0, 3 +; LA32-NEXT: bstrins.w $a0, $zero, 1, 0 +; LA32-NEXT: ori $a3, $zero, 255 +; LA32-NEXT: sll.w $a3, $a3, $a2 +; LA32-NEXT: andi $a1, $a1, 255 +; LA32-NEXT: sll.w $a1, $a1, $a2 +; LA32-NEXT: .LBB28_1: # =>This Inner Loop Header: Depth=1 +; LA32-NEXT: ll.w $a4, $a0, 0 +; LA32-NEXT: sub.w $a5, $a4, $a1 +; LA32-NEXT: xor $a5, $a4, $a5 +; LA32-NEXT: and $a5, $a5, $a3 +; LA32-NEXT: xor $a5, $a4, $a5 +; LA32-NEXT: sc.w $a5, $a0, 0 +; LA32-NEXT: beqz $a5, .LBB28_1 +; LA32-NEXT: # %bb.2: +; LA32-NEXT: srl.w $a0, $a4, $a2 +; LA32-NEXT: ret +; +; LA64-LABEL: atomicrmw_sub_i8_acq_rel: +; LA64: # %bb.0: +; LA64-NEXT: sub.w $a2, $zero, $a1 +; LA64-NEXT: amadd_db.b $a1, $a2, $a0 +; LA64-NEXT: move $a0, $a1 +; LA64-NEXT: ret +; + %1 = atomicrmw sub ptr %a, i8 %b acq_rel + ret i8 %1 +} + +define i16 @atomicrmw_sub_i16_acq_rel(ptr %a, i16 %b) nounwind { +; LA32-LABEL: atomicrmw_sub_i16_acq_rel: +; LA32: # %bb.0: +; LA32-NEXT: slli.w $a2, $a0, 3 +; LA32-NEXT: bstrins.w $a0, $zero, 1, 0 +; LA32-NEXT: lu12i.w $a3, 15 +; LA32-NEXT: ori $a3, $a3, 4095 +; LA32-NEXT: sll.w $a3, $a3, $a2 +; LA32-NEXT: bstrpick.w $a1, $a1, 15, 0 +; LA32-NEXT: sll.w $a1, $a1, $a2 +; LA32-NEXT: .LBB29_1: # =>This Inner Loop Header: Depth=1 +; LA32-NEXT: ll.w $a4, $a0, 0 +; LA32-NEXT: sub.w $a5, $a4, $a1 +; LA32-NEXT: xor $a5, $a4, $a5 +; LA32-NEXT: and $a5, $a5, $a3 +; LA32-NEXT: xor $a5, $a4, $a5 +; LA32-NEXT: sc.w $a5, $a0, 0 +; LA32-NEXT: beqz $a5, .LBB29_1 +; LA32-NEXT: # %bb.2: +; LA32-NEXT: srl.w $a0, $a4, $a2 +; LA32-NEXT: ret +; +; LA64-LABEL: atomicrmw_sub_i16_acq_rel: +; LA64: # %bb.0: +; LA64-NEXT: sub.w $a2, $zero, $a1 +; LA64-NEXT: amadd_db.h $a1, $a2, $a0 +; LA64-NEXT: move $a0, $a1 +; LA64-NEXT: ret +; + %1 = atomicrmw sub ptr %a, i16 %b acq_rel + ret i16 %1 +} + +define i8 @atomicrmw_sub_i8_seq_cst(ptr %a, i8 %b) nounwind { +; LA32-LABEL: atomicrmw_sub_i8_seq_cst: +; LA32: # %bb.0: +; LA32-NEXT: slli.w $a2, $a0, 3 +; LA32-NEXT: bstrins.w $a0, $zero, 1, 0 +; LA32-NEXT: ori $a3, $zero, 255 +; LA32-NEXT: sll.w $a3, $a3, $a2 +; LA32-NEXT: andi $a1, $a1, 255 +; LA32-NEXT: sll.w $a1, $a1, $a2 +; LA32-NEXT: .LBB30_1: # =>This Inner Loop Header: Depth=1 +; LA32-NEXT: ll.w $a4, $a0, 0 +; LA32-NEXT: sub.w $a5, $a4, $a1 +; LA32-NEXT: xor $a5, $a4, $a5 +; LA32-NEXT: and $a5, $a5, $a3 +; LA32-NEXT: xor $a5, $a4, $a5 +; LA32-NEXT: sc.w $a5, $a0, 0 +; LA32-NEXT: beqz $a5, .LBB30_1 +; LA32-NEXT: # %bb.2: +; LA32-NEXT: srl.w $a0, $a4, $a2 +; LA32-NEXT: ret +; +; LA64-LABEL: atomicrmw_sub_i8_seq_cst: +; LA64: # %bb.0: +; LA64-NEXT: sub.w $a2, $zero, $a1 +; LA64-NEXT: amadd_db.b $a1, $a2, $a0 +; LA64-NEXT: move $a0, $a1 +; LA64-NEXT: ret +; + %1 = atomicrmw sub ptr %a, i8 %b seq_cst + ret i8 %1 +} + +define i16 @atomicrmw_sub_i16_seq_cst(ptr %a, i16 %b) nounwind { +; LA32-LABEL: atomicrmw_sub_i16_seq_cst: +; LA32: # %bb.0: +; LA32-NEXT: slli.w $a2, $a0, 3 +; LA32-NEXT: bstrins.w $a0, $zero, 1, 0 +; LA32-NEXT: lu12i.w $a3, 15 +; LA32-NEXT: ori $a3, $a3, 4095 +; LA32-NEXT: sll.w $a3, $a3, $a2 +; LA32-NEXT: bstrpick.w $a1, $a1, 15, 0 +; LA32-NEXT: sll.w $a1, $a1, $a2 +; LA32-NEXT: .LBB31_1: # =>This Inner Loop Header: Depth=1 +; LA32-NEXT: ll.w $a4, $a0, 0 +; LA32-NEXT: sub.w $a5, $a4, $a1 +; LA32-NEXT: xor $a5, $a4, $a5 +; LA32-NEXT: and $a5, $a5, $a3 +; LA32-NEXT: xor $a5, $a4, $a5 +; LA32-NEXT: sc.w $a5, $a0, 0 +; LA32-NEXT: beqz $a5, .LBB31_1 +; LA32-NEXT: # %bb.2: +; LA32-NEXT: srl.w $a0, $a4, $a2 +; LA32-NEXT: ret +; +; LA64-LABEL: atomicrmw_sub_i16_seq_cst: +; LA64: # %bb.0: +; LA64-NEXT: sub.w $a2, $zero, $a1 +; LA64-NEXT: amadd_db.h $a1, $a2, $a0 +; LA64-NEXT: move $a0, $a1 +; LA64-NEXT: ret +; + %1 = atomicrmw sub ptr %a, i16 %b seq_cst + ret i16 %1 +} + +define i8 @atomicrmw_sub_i8_monotonic(ptr %a, i8 %b) nounwind { +; LA32-LABEL: atomicrmw_sub_i8_monotonic: +; LA32: # %bb.0: +; LA32-NEXT: slli.w $a2, $a0, 3 +; LA32-NEXT: bstrins.w $a0, $zero, 1, 0 +; LA32-NEXT: ori $a3, $zero, 255 +; LA32-NEXT: sll.w $a3, $a3, $a2 +; LA32-NEXT: andi $a1, $a1, 255 +; LA32-NEXT: sll.w $a1, $a1, $a2 +; LA32-NEXT: .LBB32_1: # =>This Inner Loop Header: Depth=1 +; LA32-NEXT: ll.w $a4, $a0, 0 +; LA32-NEXT: sub.w $a5, $a4, $a1 +; LA32-NEXT: xor $a5, $a4, $a5 +; LA32-NEXT: and $a5, $a5, $a3 +; LA32-NEXT: xor $a5, $a4, $a5 +; LA32-NEXT: sc.w $a5, $a0, 0 +; LA32-NEXT: beqz $a5, .LBB32_1 +; LA32-NEXT: # %bb.2: +; LA32-NEXT: srl.w $a0, $a4, $a2 +; LA32-NEXT: ret +; +; LA64-LABEL: atomicrmw_sub_i8_monotonic: +; LA64: # %bb.0: +; LA64-NEXT: sub.w $a2, $zero, $a1 +; LA64-NEXT: amadd.b $a1, $a2, $a0 +; LA64-NEXT: move $a0, $a1 +; LA64-NEXT: ret +; + %1 = atomicrmw sub ptr %a, i8 %b monotonic + ret i8 %1 +} + +define i16 @atomicrmw_sub_i16_monotonic(ptr %a, i16 %b) nounwind { +; LA32-LABEL: atomicrmw_sub_i16_monotonic: +; LA32: # %bb.0: +; LA32-NEXT: slli.w $a2, $a0, 3 +; LA32-NEXT: bstrins.w $a0, $zero, 1, 0 +; LA32-NEXT: lu12i.w $a3, 15 +; LA32-NEXT: ori $a3, $a3, 4095 +; LA32-NEXT: sll.w $a3, $a3, $a2 +; LA32-NEXT: bstrpick.w $a1, $a1, 15, 0 +; LA32-NEXT: sll.w $a1, $a1, $a2 +; LA32-NEXT: .LBB33_1: # =>This Inner Loop Header: Depth=1 +; LA32-NEXT: ll.w $a4, $a0, 0 +; LA32-NEXT: sub.w $a5, $a4, $a1 +; LA32-NEXT: xor $a5, $a4, $a5 +; LA32-NEXT: and $a5, $a5, $a3 +; LA32-NEXT: xor $a5, $a4, $a5 +; LA32-NEXT: sc.w $a5, $a0, 0 +; LA32-NEXT: beqz $a5, .LBB33_1 +; LA32-NEXT: # %bb.2: +; LA32-NEXT: srl.w $a0, $a4, $a2 +; LA32-NEXT: ret +; +; LA64-LABEL: atomicrmw_sub_i16_monotonic: +; LA64: # %bb.0: +; LA64-NEXT: sub.w $a2, $zero, $a1 +; LA64-NEXT: amadd.h $a1, $a2, $a0 +; LA64-NEXT: move $a0, $a1 +; LA64-NEXT: ret +; + %1 = atomicrmw sub ptr %a, i16 %b monotonic + ret i16 %1 +} _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits