[llvm-branch-commits] [flang] 1ffc1aa - [mlir] Use mlir::OpState::operator->() to get to methods of mlir::Operation.

2020-12-13 Thread Christian Sigg via llvm-branch-commits

Author: Christian Sigg
Date: 2020-12-13T09:58:16+01:00
New Revision: 1ffc1aaa09674824ce66600ee0b77d6a2c042e68

URL: 
https://github.com/llvm/llvm-project/commit/1ffc1aaa09674824ce66600ee0b77d6a2c042e68
DIFF: 
https://github.com/llvm/llvm-project/commit/1ffc1aaa09674824ce66600ee0b77d6a2c042e68.diff

LOG: [mlir] Use mlir::OpState::operator->() to get to methods of 
mlir::Operation.

This is a preparation step to remove those methods from OpState.

Reviewed By: mehdi_amini

Differential Revision: https://reviews.llvm.org/D93098

Added: 


Modified: 
flang/include/flang/Optimizer/Dialect/FIROps.td
mlir/include/mlir/Dialect/Affine/IR/AffineOps.h
mlir/include/mlir/Dialect/Affine/IR/AffineOps.td
mlir/include/mlir/Dialect/Linalg/IR/LinalgStructuredOpsInterface.td
mlir/include/mlir/Dialect/StandardOps/IR/Ops.td
mlir/include/mlir/IR/FunctionSupport.h
mlir/include/mlir/IR/Operation.h
mlir/include/mlir/IR/SymbolInterfaces.td
mlir/lib/Analysis/Utils.cpp
mlir/lib/Conversion/GPUCommon/ConvertKernelFuncToBlob.cpp
mlir/lib/Conversion/GPUToSPIRV/ConvertGPUToSPIRV.cpp
mlir/lib/Conversion/GPUToVulkan/ConvertGPULaunchFuncToVulkanLaunchFunc.cpp
mlir/lib/Conversion/LinalgToStandard/LinalgToStandard.cpp
mlir/lib/Conversion/SCFToGPU/SCFToGPU.cpp
mlir/lib/Conversion/SPIRVToLLVM/ConvertSPIRVToLLVM.cpp
mlir/lib/Conversion/StandardToLLVM/StandardToLLVM.cpp
mlir/lib/Dialect/Affine/IR/AffineOps.cpp
mlir/lib/Dialect/Affine/Utils/Utils.cpp
mlir/lib/Dialect/GPU/IR/GPUDialect.cpp
mlir/lib/Dialect/GPU/Transforms/KernelOutlining.cpp
mlir/lib/Dialect/GPU/Transforms/ParallelLoopMapper.cpp
mlir/lib/Dialect/LLVMIR/IR/LLVMDialect.cpp
mlir/lib/Dialect/Linalg/Transforms/Interchange.cpp
mlir/lib/Dialect/SPIRV/SPIRVLowering.cpp
mlir/lib/Dialect/SPIRV/Serialization/Deserializer.cpp
mlir/lib/Dialect/SPIRV/Serialization/Serializer.cpp
mlir/lib/Dialect/SPIRV/Transforms/UpdateVCEPass.cpp
mlir/lib/Dialect/Vector/VectorOps.cpp
mlir/lib/Dialect/Vector/VectorTransforms.cpp
mlir/test/lib/Dialect/SPIRV/TestEntryPointAbi.cpp
mlir/test/lib/Dialect/Test/TestDialect.cpp
mlir/test/lib/Dialect/Test/TestOps.td
mlir/test/lib/Dialect/Test/TestPatterns.cpp
mlir/test/lib/Transforms/TestCallGraph.cpp
mlir/test/mlir-tblgen/op-attribute.td
mlir/tools/mlir-tblgen/OpDefinitionsGen.cpp
mlir/tools/mlir-tblgen/OpFormatGen.cpp
mlir/tools/mlir-tblgen/SPIRVUtilsGen.cpp

Removed: 




diff  --git a/flang/include/flang/Optimizer/Dialect/FIROps.td 
b/flang/include/flang/Optimizer/Dialect/FIROps.td
index 9f477aa81b0c..4ffca0395804 100644
--- a/flang/include/flang/Optimizer/Dialect/FIROps.td
+++ b/flang/include/flang/Optimizer/Dialect/FIROps.td
@@ -2700,7 +2700,7 @@ def fir_GlobalOp : fir_Op<"global", [IsolatedFromAbove, 
Symbol]> {
 p.printAttributeWithoutType(getAttr(symbolAttrName()));
 if (auto val = getValueOrNull())
   p << '(' << val << ')';
-if (getAttr(constantAttrName()))
+if ((*this)->getAttr(constantAttrName()))
   p << " constant";
 p << " : ";
 p.printType(getType());

diff  --git a/mlir/include/mlir/Dialect/Affine/IR/AffineOps.h 
b/mlir/include/mlir/Dialect/Affine/IR/AffineOps.h
index c4bed26bfc29..b097f18e8cea 100644
--- a/mlir/include/mlir/Dialect/Affine/IR/AffineOps.h
+++ b/mlir/include/mlir/Dialect/Affine/IR/AffineOps.h
@@ -107,7 +107,7 @@ class AffineDmaStartOp
   /// Returns the affine map used to access the src memref.
   AffineMap getSrcMap() { return getSrcMapAttr().getValue(); }
   AffineMapAttr getSrcMapAttr() {
-return getAttr(getSrcMapAttrName()).cast();
+return (*this)->getAttr(getSrcMapAttrName()).cast();
   }
 
   /// Returns the source memref affine map indices for this DMA operation.
@@ -146,7 +146,7 @@ class AffineDmaStartOp
   /// Returns the affine map used to access the dst memref.
   AffineMap getDstMap() { return getDstMapAttr().getValue(); }
   AffineMapAttr getDstMapAttr() {
-return getAttr(getDstMapAttrName()).cast();
+return (*this)->getAttr(getDstMapAttrName()).cast();
   }
 
   /// Returns the destination memref indices for this DMA operation.
@@ -175,7 +175,7 @@ class AffineDmaStartOp
   /// Returns the affine map used to access the tag memref.
   AffineMap getTagMap() { return getTagMapAttr().getValue(); }
   AffineMapAttr getTagMapAttr() {
-return getAttr(getTagMapAttrName()).cast();
+return (*this)->getAttr(getTagMapAttrName()).cast();
   }
 
   /// Returns the tag memref indices for this DMA operation.
@@ -289,7 +289,7 @@ class AffineDmaWaitOp
   /// Returns the affine map used to access the tag memref.
   AffineMap getTagMap() { return getTagMapAttr().getValue(); }
   AffineMapAttr getTagMapAttr() {
-return getAttr(getTagMapAttrName()).cast();
+return (*this)->getAttr(getTagMapAttrName()).cast();
   }
 
   // Returns the tag memref index f

[llvm-branch-commits] [lldb] 0cd8686 - [lldb] [Process/FreeBSD] Add more 'override' keywords

2020-12-13 Thread Michał Górny via llvm-branch-commits

Author: Michał Górny
Date: 2020-12-13T09:59:32+01:00
New Revision: 0cd8686043f962d20395fde382af3af14042d4ed

URL: 
https://github.com/llvm/llvm-project/commit/0cd8686043f962d20395fde382af3af14042d4ed
DIFF: 
https://github.com/llvm/llvm-project/commit/0cd8686043f962d20395fde382af3af14042d4ed.diff

LOG: [lldb] [Process/FreeBSD] Add more 'override' keywords

Added: 


Modified: 
lldb/source/Plugins/Process/FreeBSD/ProcessMonitor.cpp

Removed: 




diff  --git a/lldb/source/Plugins/Process/FreeBSD/ProcessMonitor.cpp 
b/lldb/source/Plugins/Process/FreeBSD/ProcessMonitor.cpp
index 2c7c948f1059..4637458b53c3 100644
--- a/lldb/source/Plugins/Process/FreeBSD/ProcessMonitor.cpp
+++ b/lldb/source/Plugins/Process/FreeBSD/ProcessMonitor.cpp
@@ -215,7 +215,7 @@ class ReadOperation : public Operation {
   : m_addr(addr), m_buff(buff), m_size(size), m_error(error),
 m_result(result) {}
 
-  void Execute(ProcessMonitor *monitor);
+  void Execute(ProcessMonitor *monitor) override;
 
 private:
   lldb::addr_t m_addr;
@@ -240,7 +240,7 @@ class WriteOperation : public Operation {
   : m_addr(addr), m_buff(buff), m_size(size), m_error(error),
 m_result(result) {}
 
-  void Execute(ProcessMonitor *monitor);
+  void Execute(ProcessMonitor *monitor) override;
 
 private:
   lldb::addr_t m_addr;
@@ -303,7 +303,7 @@ class WriteRegOperation : public Operation {
 const RegisterValue &value, bool &result)
   : m_tid(tid), m_offset(offset), m_value(value), m_result(result) {}
 
-  void Execute(ProcessMonitor *monitor);
+  void Execute(ProcessMonitor *monitor) override;
 
 private:
   lldb::tid_t m_tid;
@@ -336,7 +336,7 @@ class ReadDebugRegOperation : public Operation {
   : m_tid(tid), m_offset(offset), m_size(size), m_value(value),
 m_result(result) {}
 
-  void Execute(ProcessMonitor *monitor);
+  void Execute(ProcessMonitor *monitor) override;
 
 private:
   lldb::tid_t m_tid;
@@ -369,7 +369,7 @@ class WriteDebugRegOperation : public Operation {
  const RegisterValue &value, bool &result)
   : m_tid(tid), m_offset(offset), m_value(value), m_result(result) {}
 
-  void Execute(ProcessMonitor *monitor);
+  void Execute(ProcessMonitor *monitor) override;
 
 private:
   lldb::tid_t m_tid;
@@ -400,7 +400,7 @@ class ReadGPROperation : public Operation {
   ReadGPROperation(lldb::tid_t tid, void *buf, bool &result)
   : m_tid(tid), m_buf(buf), m_result(result) {}
 
-  void Execute(ProcessMonitor *monitor);
+  void Execute(ProcessMonitor *monitor) override;
 
 private:
   lldb::tid_t m_tid;
@@ -426,7 +426,7 @@ class ReadFPROperation : public Operation {
   ReadFPROperation(lldb::tid_t tid, void *buf, bool &result)
   : m_tid(tid), m_buf(buf), m_result(result) {}
 
-  void Execute(ProcessMonitor *monitor);
+  void Execute(ProcessMonitor *monitor) override;
 
 private:
   lldb::tid_t m_tid;
@@ -448,7 +448,7 @@ class WriteGPROperation : public Operation {
   WriteGPROperation(lldb::tid_t tid, void *buf, bool &result)
   : m_tid(tid), m_buf(buf), m_result(result) {}
 
-  void Execute(ProcessMonitor *monitor);
+  void Execute(ProcessMonitor *monitor) override;
 
 private:
   lldb::tid_t m_tid;
@@ -470,7 +470,7 @@ class WriteFPROperation : public Operation {
   WriteFPROperation(lldb::tid_t tid, void *buf, bool &result)
   : m_tid(tid), m_buf(buf), m_result(result) {}
 
-  void Execute(ProcessMonitor *monitor);
+  void Execute(ProcessMonitor *monitor) override;
 
 private:
   lldb::tid_t m_tid;
@@ -492,7 +492,7 @@ class ResumeOperation : public Operation {
   ResumeOperation(uint32_t signo, bool &result)
   : m_signo(signo), m_result(result) {}
 
-  void Execute(ProcessMonitor *monitor);
+  void Execute(ProcessMonitor *monitor) override;
 
 private:
   uint32_t m_signo;
@@ -522,7 +522,7 @@ class SingleStepOperation : public Operation {
   SingleStepOperation(uint32_t signo, bool &result)
   : m_signo(signo), m_result(result) {}
 
-  void Execute(ProcessMonitor *monitor);
+  void Execute(ProcessMonitor *monitor) override;
 
 private:
   uint32_t m_signo;
@@ -549,7 +549,7 @@ class LwpInfoOperation : public Operation {
   LwpInfoOperation(lldb::tid_t tid, void *info, bool &result, int &ptrace_err)
   : m_tid(tid), m_info(info), m_result(result), m_err(ptrace_err) {}
 
-  void Execute(ProcessMonitor *monitor);
+  void Execute(ProcessMonitor *monitor) override;
 
 private:
   lldb::tid_t m_tid;
@@ -577,7 +577,7 @@ class ThreadSuspendOperation : public Operation {
   ThreadSuspendOperation(lldb::tid_t tid, bool suspend, bool &result)
   : m_tid(tid), m_suspend(suspend), m_result(result) {}
 
-  void Execute(ProcessMonitor *monitor);
+  void Execute(ProcessMonitor *monitor) override;
 
 private:
   lldb::tid_t m_tid;
@@ -596,7 +596,7 @@ class EventMessageOperation : public Operation {
   EventMessageOperation(lldb::tid_t tid, unsigned long *mes

[llvm-branch-commits] [clang] 05d1729 - [VE] Optimize toolchain regression test

2020-12-13 Thread Kazushi Marukawa via llvm-branch-commits

Author: Kazushi (Jam) Marukawa
Date: 2020-12-13T20:26:05+09:00
New Revision: 05d1729232cdff323cafd469532504aa85740967

URL: 
https://github.com/llvm/llvm-project/commit/05d1729232cdff323cafd469532504aa85740967
DIFF: 
https://github.com/llvm/llvm-project/commit/05d1729232cdff323cafd469532504aa85740967.diff

LOG: [VE] Optimize toolchain regression test

Optimize toolchain regression test for VE by removing not a useful test
(-fuse-init-array test) and merge several tests to one test which checks
default behavior of driver.  Also add sysroot to reduce conflicts.

These are suggested in https://reviews.llvm.org/D92996.
Thank you so much.

Reviewed By: MaskRay

Differential Revision: https://reviews.llvm.org/D93084

Added: 
clang/test/Driver/Inputs/basic_ve_tree/opt/nec/ve/lib/crt1.o
clang/test/Driver/Inputs/basic_ve_tree/opt/nec/ve/lib/crti.o
clang/test/Driver/Inputs/basic_ve_tree/opt/nec/ve/lib/crtn.o

clang/test/Driver/Inputs/basic_ve_tree/resource_dir/lib/linux/clang_rt.crtbegin-ve.o

clang/test/Driver/Inputs/basic_ve_tree/resource_dir/lib/linux/clang_rt.crtend-ve.o

clang/test/Driver/Inputs/basic_ve_tree/resource_dir/lib/linux/libclang_rt.builtins-ve.a

Modified: 
clang/test/Driver/ve-toolchain.c
clang/test/Driver/ve-toolchain.cpp

Removed: 




diff  --git a/clang/test/Driver/Inputs/basic_ve_tree/opt/nec/ve/lib/crt1.o 
b/clang/test/Driver/Inputs/basic_ve_tree/opt/nec/ve/lib/crt1.o
new file mode 100644
index ..e69de29bb2d1

diff  --git a/clang/test/Driver/Inputs/basic_ve_tree/opt/nec/ve/lib/crti.o 
b/clang/test/Driver/Inputs/basic_ve_tree/opt/nec/ve/lib/crti.o
new file mode 100644
index ..e69de29bb2d1

diff  --git a/clang/test/Driver/Inputs/basic_ve_tree/opt/nec/ve/lib/crtn.o 
b/clang/test/Driver/Inputs/basic_ve_tree/opt/nec/ve/lib/crtn.o
new file mode 100644
index ..e69de29bb2d1

diff  --git 
a/clang/test/Driver/Inputs/basic_ve_tree/resource_dir/lib/linux/clang_rt.crtbegin-ve.o
 
b/clang/test/Driver/Inputs/basic_ve_tree/resource_dir/lib/linux/clang_rt.crtbegin-ve.o
new file mode 100644
index ..e69de29bb2d1

diff  --git 
a/clang/test/Driver/Inputs/basic_ve_tree/resource_dir/lib/linux/clang_rt.crtend-ve.o
 
b/clang/test/Driver/Inputs/basic_ve_tree/resource_dir/lib/linux/clang_rt.crtend-ve.o
new file mode 100644
index ..e69de29bb2d1

diff  --git 
a/clang/test/Driver/Inputs/basic_ve_tree/resource_dir/lib/linux/libclang_rt.builtins-ve.a
 
b/clang/test/Driver/Inputs/basic_ve_tree/resource_dir/lib/linux/libclang_rt.builtins-ve.a
new file mode 100644
index ..e69de29bb2d1

diff  --git a/clang/test/Driver/ve-toolchain.c 
b/clang/test/Driver/ve-toolchain.c
index 0ca3c84373f3..ac925e470770 100644
--- a/clang/test/Driver/ve-toolchain.c
+++ b/clang/test/Driver/ve-toolchain.c
@@ -7,83 +7,93 @@
 // RUN: %clang -### -g -target ve %s 2>&1 | FileCheck -check-prefix=DWARF_VER 
%s
 // DWARF_VER: "-dwarf-version=4"
 
-///-
-/// Checking dynamic-linker
-
-// RUN: %clang -### -target ve %s 2>&1 | FileCheck -check-prefix=DYNLINKER %s
-// DYNLINKER: nld{{.*}} "-dynamic-linker" "/opt/nec/ve/lib/ld-linux-ve.so.1"
-
-///-
-/// Checking VE specific option
-
-// RUN: %clang -### -target ve %s 2>&1 | FileCheck -check-prefix=VENLDOPT %s
-// VENLDOPT: nld{{.*}} "-z" "max-page-size=0x400"
-
 
///-
 /// Checking include-path
 
-// RUN: %clang -### -target ve %s 2>&1 | FileCheck -check-prefix=DEFINC %s
+// RUN: %clang -### -target ve --sysroot %S/Inputs/basic_ve_tree %s \
+// RUN: -resource-dir=%S/Input/basic_ve_tree/resource_dir \
+// RUN: 2>&1 | FileCheck -check-prefix=DEFINC %s
 // DEFINC: clang{{.*}} "-cc1"
-// DEFINC: "-nostdsysteminc"
-// DEFINC: "-internal-isystem" "{{.*}}/lib/clang/{{[0-9.]*}}/include"
-// DEFINC: "-internal-isystem" "/opt/nec/ve/include"
-
-// RUN: %clang -### -target ve %s -nostdlibinc 2>&1 | \
-// RUN:FileCheck -check-prefix=NOSTDLIBINC %s
+// DEFINC-SAME: "-nostdsysteminc"
+// DEFINC-SAME: "-resource-dir" "[[RESOURCE_DIR:[^"]+]]"
+// DEFINC-SAME: "-isysroot" "[[SYSROOT:[^"]+]]"
+// DEFINC-SAME: "-internal-isystem" "[[RESOURCE_DIR]]/include"
+// DEFINC-SAME: "-internal-isystem" "[[SYSROOT]]/opt/nec/ve/include"
+
+// RUN: %clang -### -target ve --sysroot %S/Inputs/basic_ve_tree %s \
+// RUN: -resource-dir=%S/Input/basic_ve_tree/resource_dir \
+// RUN: -nostdlibinc 2>&1 | FileCheck -check-prefix=NOSTDLIBINC %s
 // NOSTDLIBINC: clang{{.*}} "-cc1"
-// NOSTDLIBINC: "-internal-isystem" "{{.*}}/lib/clang/{{[0-9.]*}}/include"
-// NOSTDLIBINC-NOT: "-internal-isystem" "/opt/nec/ve/include"
-
-// RUN: %clang -### -target ve %s -nobuiltininc 2>&1 | \
-// RUN:FileCheck -check-prefix=NOBUILT

[llvm-branch-commits] [llvm] bb939eb - [BasicAA] Handle known non-zero variable index

2020-12-13 Thread Nikita Popov via llvm-branch-commits

Author: Nikita Popov
Date: 2020-12-13T13:20:05+01:00
New Revision: bb939ebfd7f48a1aa744941d6de86e69d3796b5d

URL: 
https://github.com/llvm/llvm-project/commit/bb939ebfd7f48a1aa744941d6de86e69d3796b5d
DIFF: 
https://github.com/llvm/llvm-project/commit/bb939ebfd7f48a1aa744941d6de86e69d3796b5d.diff

LOG: [BasicAA] Handle known non-zero variable index

BasicAA currently handles cases like Scale*V0 + (-Scale)*V1 where
V0 != V1, but does not handle the simpler case of Scale*V with
V != 0. Add it based on an isKnownNonZero() call.

I'm not passing a context instruction for now, because the existing
approach of always using GEP1 for context could result in symmetry
issues.

Differential Revision: https://reviews.llvm.org/D93162

Added: 


Modified: 
llvm/lib/Analysis/BasicAliasAnalysis.cpp
llvm/test/Analysis/BasicAA/bug.23626.ll
llvm/test/Analysis/BasicAA/sequential-gep.ll

Removed: 




diff  --git a/llvm/lib/Analysis/BasicAliasAnalysis.cpp 
b/llvm/lib/Analysis/BasicAliasAnalysis.cpp
index 9ba61ad64178..3e22ce84d181 100644
--- a/llvm/lib/Analysis/BasicAliasAnalysis.cpp
+++ b/llvm/lib/Analysis/BasicAliasAnalysis.cpp
@@ -1288,8 +1288,12 @@ AliasResult BasicAAResult::aliasGEP(
 if (V1Size.hasValue() && V2Size.hasValue()) {
   // Try to determine whether abs(VarIndex) > 0.
   Optional MinAbsVarIndex;
-  // TODO: Could handle single non-zero index as well.
-  if (DecompGEP1.VarIndices.size() == 2) {
+  if (DecompGEP1.VarIndices.size() == 1) {
+// VarIndex = Scale*V. If V != 0 then abs(VarIndex) >= abs(Scale).
+const VariableGEPIndex &Var = DecompGEP1.VarIndices[0];
+if (isKnownNonZero(Var.V, DL))
+  MinAbsVarIndex = Var.Scale.abs();
+  } else if (DecompGEP1.VarIndices.size() == 2) {
 // VarIndex = Scale*V0 + (-Scale)*V1.
 // If V0 != V1 then abs(VarIndex) >= abs(Scale).
 // Check that VisitedPhiBBs is empty, to avoid reasoning about

diff  --git a/llvm/test/Analysis/BasicAA/bug.23626.ll 
b/llvm/test/Analysis/BasicAA/bug.23626.ll
index 9a4641ef58e3..9df3d5767db8 100644
--- a/llvm/test/Analysis/BasicAA/bug.23626.ll
+++ b/llvm/test/Analysis/BasicAA/bug.23626.ll
@@ -4,9 +4,9 @@ target triple = "x86_64-apple-darwin13.4.0"
 
 ; CHECK-LABEL: compute1
 ; CHECK: MayAlias: i32* %arrayidx8, i32* %out
-; CHECK: MayAlias: i32* %arrayidx11, i32* %out
+; CHECK: NoAlias:  i32* %arrayidx11, i32* %out
 ; CHECK: MayAlias: i32* %arrayidx11, i32* %arrayidx8
-; CHECK: MayAlias: i32* %arrayidx14, i32* %out
+; CHECK: NoAlias:  i32* %arrayidx14, i32* %out
 ; CHECK: MayAlias: i32* %arrayidx14, i32* %arrayidx8
 ; CHECK: MayAlias: i32* %arrayidx11, i32* %arrayidx14
 define void @compute1(i32 %num.0.lcssa, i32* %out) {

diff  --git a/llvm/test/Analysis/BasicAA/sequential-gep.ll 
b/llvm/test/Analysis/BasicAA/sequential-gep.ll
index 2b61b5327c10..b6e84fa97138 100644
--- a/llvm/test/Analysis/BasicAA/sequential-gep.ll
+++ b/llvm/test/Analysis/BasicAA/sequential-gep.ll
@@ -111,10 +111,9 @@ define void @add_non_zero_with_offset(i32* %p, i32 
%addend, i32* %q) {
 }
 
 ; CHECK-LABEL: non_zero_index_simple
-; CHECK: MayAlias: i32* %gep, i32* %p
-; CHECK: MayAlias: i16* %gep.16, i32* %p
+; CHECK: NoAlias: i32* %gep, i32* %p
+; CHECK: NoAlias: i16* %gep.16, i32* %p
 ; CHECK: MayAlias: i32* %p, i64* %gep.64
-; TODO: First two could be NoAlias.
 define void @non_zero_index_simple(i32* %p, i32* %q) {
   %knownnonzero = load i32, i32* %q, !range !0
   %gep = getelementptr i32, i32* %p, i32 %knownnonzero
@@ -125,8 +124,7 @@ define void @non_zero_index_simple(i32* %p, i32* %q) {
 
 ; CHECK-LABEL: non_zero_index_with_offset
 ; CHECK: MayAlias: i32* %gep, i32* %p
-; CHECK: MayAlias: i16* %gep.16, i32* %p
-; TODO: Last could be NoAlias.
+; CHECK: NoAlias: i16* %gep.16, i32* %p
 define void @non_zero_index_with_offset(i32* %p, i32* %q) {
   %knownnonzero = load i32, i32* %q, !range !0
   %p.8 = bitcast i32* %p to i8*



___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [clang] 3630640 - [clang-format] Remove double trim

2020-12-13 Thread Björn Schäpers via llvm-branch-commits

Author: Björn Schäpers
Date: 2020-12-13T14:16:54+01:00
New Revision: 36306403d492d4a4b54c72c6c4c511021584243b

URL: 
https://github.com/llvm/llvm-project/commit/36306403d492d4a4b54c72c6c4c511021584243b
DIFF: 
https://github.com/llvm/llvm-project/commit/36306403d492d4a4b54c72c6c4c511021584243b.diff

LOG: [clang-format] Remove double trim

Lines[i] is already trimmed 3 lines before

Differential Revision: https://reviews.llvm.org/D91996

Added: 


Modified: 
clang/lib/Format/BreakableToken.cpp

Removed: 




diff  --git a/clang/lib/Format/BreakableToken.cpp 
b/clang/lib/Format/BreakableToken.cpp
index 4975c89164a4..ea5cc31af07a 100644
--- a/clang/lib/Format/BreakableToken.cpp
+++ b/clang/lib/Format/BreakableToken.cpp
@@ -773,10 +773,7 @@ BreakableLineCommentSection::BreakableLineCommentSection(
 OriginalPrefix.resize(Lines.size());
 for (size_t i = FirstLineIndex, e = Lines.size(); i < e; ++i) {
   Lines[i] = Lines[i].ltrim(Blanks);
-  // We need to trim the blanks in case this is not the first line in a
-  // multiline comment. Then the indent is included in Lines[i].
-  StringRef IndentPrefix =
-  getLineCommentIndentPrefix(Lines[i].ltrim(Blanks), Style);
+  StringRef IndentPrefix = getLineCommentIndentPrefix(Lines[i], Style);
   assert((TokenText.startswith("//") || TokenText.startswith("#")) &&
  "unsupported line comment prefix, '//' and '#' are supported");
   OriginalPrefix[i] = Prefix[i] = IndentPrefix;



___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] 0ee73bb - [X86] Regenerate vector-reduce-mul.ll with common check prefixes. NFC.

2020-12-13 Thread Simon Pilgrim via llvm-branch-commits

Author: Simon Pilgrim
Date: 2020-12-13T14:25:42Z
New Revision: 0ee73bb24ab624990175519a8158e966e80f7f92

URL: 
https://github.com/llvm/llvm-project/commit/0ee73bb24ab624990175519a8158e966e80f7f92
DIFF: 
https://github.com/llvm/llvm-project/commit/0ee73bb24ab624990175519a8158e966e80f7f92.diff

LOG: [X86] Regenerate vector-reduce-mul.ll with common check prefixes. NFC.

Try to merge AVX1/AVX2/AVX512 codegen checks where possible

Added: 


Modified: 
llvm/test/CodeGen/X86/vector-reduce-mul.ll

Removed: 




diff  --git a/llvm/test/CodeGen/X86/vector-reduce-mul.ll 
b/llvm/test/CodeGen/X86/vector-reduce-mul.ll
index 3f827f14c043..0c4214dd385c 100644
--- a/llvm/test/CodeGen/X86/vector-reduce-mul.ll
+++ b/llvm/test/CodeGen/X86/vector-reduce-mul.ll
@@ -1,12 +1,12 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s 
--check-prefixes=SSE,SSE2
 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s 
--check-prefixes=SSE,SSE41
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s 
--check-prefixes=AVX,AVX1
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s 
--check-prefixes=AVX,AVX2
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw | 
FileCheck %s --check-prefixes=AVX512,AVX512BW
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown 
-mattr=+avx512f,+avx512bw,+avx512vl | FileCheck %s 
--check-prefixes=AVX512,AVX512BWVL
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512dq | 
FileCheck %s --check-prefixes=AVX512,AVX512DQ
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown 
-mattr=+avx512f,+avx512dq,+avx512vl | FileCheck %s 
--check-prefixes=AVX512,AVX512DQVL
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s 
--check-prefixes=AVX,AVX1OR2,AVX1
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s 
--check-prefixes=AVX,AVX1OR2,AVX2
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw | 
FileCheck %s --check-prefixes=AVX,AVX512,AVX512BW
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown 
-mattr=+avx512f,+avx512bw,+avx512vl | FileCheck %s 
--check-prefixes=AVX,AVX512,AVX512BWVL
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512dq | 
FileCheck %s --check-prefixes=AVX,AVX512,AVX512DQ
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown 
-mattr=+avx512f,+avx512dq,+avx512vl | FileCheck %s 
--check-prefixes=AVX,AVX512,AVX512DQVL
 
 ;
 ; vXi64
@@ -28,19 +28,19 @@ define i64 @test_v2i64(<2 x i64> %a0) {
 ; SSE-NEXT:movq %xmm0, %rax
 ; SSE-NEXT:retq
 ;
-; AVX-LABEL: test_v2i64:
-; AVX:   # %bb.0:
-; AVX-NEXT:vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
-; AVX-NEXT:vpsrlq $32, %xmm0, %xmm2
-; AVX-NEXT:vpmuludq %xmm1, %xmm2, %xmm2
-; AVX-NEXT:vpshufd {{.*#+}} xmm3 = xmm0[3,3,3,3]
-; AVX-NEXT:vpmuludq %xmm3, %xmm0, %xmm3
-; AVX-NEXT:vpaddq %xmm2, %xmm3, %xmm2
-; AVX-NEXT:vpsllq $32, %xmm2, %xmm2
-; AVX-NEXT:vpmuludq %xmm1, %xmm0, %xmm0
-; AVX-NEXT:vpaddq %xmm2, %xmm0, %xmm0
-; AVX-NEXT:vmovq %xmm0, %rax
-; AVX-NEXT:retq
+; AVX1OR2-LABEL: test_v2i64:
+; AVX1OR2:   # %bb.0:
+; AVX1OR2-NEXT:vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
+; AVX1OR2-NEXT:vpsrlq $32, %xmm0, %xmm2
+; AVX1OR2-NEXT:vpmuludq %xmm1, %xmm2, %xmm2
+; AVX1OR2-NEXT:vpshufd {{.*#+}} xmm3 = xmm0[3,3,3,3]
+; AVX1OR2-NEXT:vpmuludq %xmm3, %xmm0, %xmm3
+; AVX1OR2-NEXT:vpaddq %xmm2, %xmm3, %xmm2
+; AVX1OR2-NEXT:vpsllq $32, %xmm2, %xmm2
+; AVX1OR2-NEXT:vpmuludq %xmm1, %xmm0, %xmm0
+; AVX1OR2-NEXT:vpaddq %xmm2, %xmm0, %xmm0
+; AVX1OR2-NEXT:vmovq %xmm0, %rax
+; AVX1OR2-NEXT:retq
 ;
 ; AVX512BW-LABEL: test_v2i64:
 ; AVX512BW:   # %bb.0:
@@ -792,13 +792,6 @@ define i32 @test_v2i32(<2 x i32> %a0) {
 ; AVX-NEXT:vpmulld %xmm1, %xmm0, %xmm0
 ; AVX-NEXT:vmovd %xmm0, %eax
 ; AVX-NEXT:retq
-;
-; AVX512-LABEL: test_v2i32:
-; AVX512:   # %bb.0:
-; AVX512-NEXT:vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1]
-; AVX512-NEXT:vpmulld %xmm1, %xmm0, %xmm0
-; AVX512-NEXT:vmovd %xmm0, %eax
-; AVX512-NEXT:retq
   %1 = call i32 @llvm.vector.reduce.mul.v2i32(<2 x i32> %a0)
   ret i32 %1
 }
@@ -832,15 +825,6 @@ define i32 @test_v4i32(<4 x i32> %a0) {
 ; AVX-NEXT:vpmulld %xmm1, %xmm0, %xmm0
 ; AVX-NEXT:vmovd %xmm0, %eax
 ; AVX-NEXT:retq
-;
-; AVX512-LABEL: test_v4i32:
-; AVX512:   # %bb.0:
-; AVX512-NEXT:vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
-; AVX512-NEXT:vpmulld %xmm1, %xmm0, %xmm0
-; AVX512-NEXT:vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1]
-; AVX512-NEXT:vpmulld %xmm1, %xmm0, %xmm0
-; AVX512-NEXT:vmovd %xmm0, %eax
-; AVX512-NEXT:retq
   %1 = call i32 @llvm.vector.reduce.mul.v4i32(<4 x i32> %a0)
   ret i32 %1
 }
@@ -,14 +1095,6 @@ define i16 @test_v2i16(<2 x i1

[llvm-branch-commits] [mlir] 076f87a - [MLIR][SPIRV] Add support for GLSL F/U/SClamp.

2020-12-13 Thread Lei Zhang via llvm-branch-commits

Author: ergawy
Date: 2020-12-13T09:56:46-05:00
New Revision: 076f87a86741f96c076cea9f9f2af17de55122a3

URL: 
https://github.com/llvm/llvm-project/commit/076f87a86741f96c076cea9f9f2af17de55122a3
DIFF: 
https://github.com/llvm/llvm-project/commit/076f87a86741f96c076cea9f9f2af17de55122a3.diff

LOG: [MLIR][SPIRV] Add support for GLSL F/U/SClamp.

Adds support for 3 ternary ops from SPIR-V extended instructions for
GLSL. Namely, adds support for FClamp, UClamp, and SClamp.

Reviewed By: antiagainst

Differential Revision: https://reviews.llvm.org/D92859

Added: 


Modified: 
mlir/include/mlir/Dialect/SPIRV/SPIRVBase.td
mlir/include/mlir/Dialect/SPIRV/SPIRVGLSLOps.td
mlir/test/Dialect/SPIRV/Serialization/glsl-ops.mlir
mlir/test/Dialect/SPIRV/glslops.mlir

Removed: 




diff  --git a/mlir/include/mlir/Dialect/SPIRV/SPIRVBase.td 
b/mlir/include/mlir/Dialect/SPIRV/SPIRVBase.td
index 54e5efe5f295..0d80ecc8ddda 100644
--- a/mlir/include/mlir/Dialect/SPIRV/SPIRVBase.td
+++ b/mlir/include/mlir/Dialect/SPIRV/SPIRVBase.td
@@ -3075,6 +3075,7 @@ def SPV_Type : AnyTypeOf<[
 SPV_AnyCooperativeMatrix, SPV_AnyMatrix
   ]>;
 
+def SPV_SignedInt : SignedIntOfWidths<[8, 16, 32, 64]>;
 def SPV_SignlessOrUnsignedInt : SignlessOrUnsignedIntOfWidths<[8, 16, 32, 64]>;
 
 class SPV_CoopMatrixOfType allowedTypes> :

diff  --git a/mlir/include/mlir/Dialect/SPIRV/SPIRVGLSLOps.td 
b/mlir/include/mlir/Dialect/SPIRV/SPIRVGLSLOps.td
index 10cafd825116..73745fe4694e 100644
--- a/mlir/include/mlir/Dialect/SPIRV/SPIRVGLSLOps.td
+++ b/mlir/include/mlir/Dialect/SPIRV/SPIRVGLSLOps.td
@@ -77,6 +77,28 @@ class SPV_GLSLBinaryArithmeticOp traits = []> :
   SPV_GLSLBinaryOp;
 
+// Base class for GLSL ternary ops.
+class SPV_GLSLTernaryArithmeticOp traits = []> :
+  SPV_GLSLOp {
+
+  let arguments = (ins
+SPV_ScalarOrVectorOf:$x,
+SPV_ScalarOrVectorOf:$y,
+SPV_ScalarOrVectorOf:$z
+  );
+
+  let results = (outs
+SPV_ScalarOrVectorOf:$result
+  );
+
+  let parser = [{ return impl::parseOneResultSameOperandTypeOp(parser, 
result); }];
+
+  let printer = [{ return impl::printOneResultOp(getOperation(), p); }];
+
+  let verifier = [{ return success(); }];
+}
+
 // -
 
 def SPV_GLSLFAbsOp : SPV_GLSLUnaryArithmeticOp<"FAbs", 4, SPV_Float> {
@@ -862,4 +884,92 @@ def SPV_GLSLTanhOp : SPV_GLSLUnaryArithmeticOp<"Tanh", 21, 
SPV_Float16or32> {
   }];
 }
 
+// -
+
+def SPV_GLSLFClampOp : SPV_GLSLTernaryArithmeticOp<"FClamp", 43, SPV_Float> {
+  let summary = "Clamp x between min and max values.";
+
+  let description = [{
+Result is min(max(x, minVal), maxVal). The resulting value is undefined if
+minVal > maxVal. The semantics used by min() and max() are those of FMin 
and
+FMax.
+
+The operands must all be a scalar or vector whose component type is
+floating-point.
+
+Result Type and the type of all operands must be the same type. Results are
+computed per component.
+
+
+```
+fclamp-op ::= ssa-id `=` `spv.GLSL.FClamp` ssa-use, ssa-use, ssa-use `:`
+   float-scalar-vector-type
+```
+ Example:
+
+```mlir
+%2 = spv.GLSL.FClamp %x, %min, %max : f32
+%3 = spv.GLSL.FClamp %x, %min, %max : vector<3xf16>
+```
+  }];
+}
+
+// -
+
+def SPV_GLSLUClampOp : SPV_GLSLTernaryArithmeticOp<"UClamp", 44, 
SPV_SignlessOrUnsignedInt> {
+  let summary = "Clamp x between min and max values.";
+
+  let description = [{
+Result is min(max(x, minVal), maxVal), where x, minVal and maxVal are
+interpreted as unsigned integers. The resulting value is undefined if
+minVal > maxVal.
+
+Result Type and the type of the operands must both be integer scalar or
+integer vector types. Result Type and operand types must have the same 
number
+of components with the same component width. Results are computed per
+component.
+
+
+```
+uclamp-op ::= ssa-id `=` `spv.GLSL.UClamp` ssa-use, ssa-use, ssa-use `:`
+   unsgined-signless-scalar-vector-type
+```
+ Example:
+
+```mlir
+%2 = spv.GLSL.UClamp %x, %min, %max : i32
+%3 = spv.GLSL.UClamp %x, %min, %max : vector<3xui16>
+```
+  }];
+}
+
+// -
+
+def SPV_GLSLSClampOp : SPV_GLSLTernaryArithmeticOp<"SClamp", 45, 
SPV_SignedInt> {
+  let summary = "Clamp x between min and max values.";
+
+  let description = [{
+Result is min(max(x, minVal), maxVal), where x, minVal and maxVal are
+interpreted as signed integers. The resulting value is undefined if
+minVal > maxVal.
+
+Result Type and the type of the operands must both be integer scalar or
+integer vector types. Result Type and operand types must have the same 
number
+of components with the same component width. Results are computed per
+component.
+
+
+```
+uclamp-op ::= ssa-id `=` `spv.GLSL.UClamp` ssa-use, ssa-use, ssa-use `:`
+   sgined-scalar-vector-type
+ 

[llvm-branch-commits] [mlir] 9c3fa3d - Don't emit on op diagnostic in reproducer emission

2020-12-13 Thread Jacques Pienaar via llvm-branch-commits

Author: Jacques Pienaar
Date: 2020-12-13T07:21:32-08:00
New Revision: 9c3fa3d84d5cdcdcdb5b6961f2c587f84e7caa39

URL: 
https://github.com/llvm/llvm-project/commit/9c3fa3d84d5cdcdcdb5b6961f2c587f84e7caa39
DIFF: 
https://github.com/llvm/llvm-project/commit/9c3fa3d84d5cdcdcdb5b6961f2c587f84e7caa39.diff

LOG: Don't emit on op diagnostic in reproducer emission

This avoids dumping the module post emitting a reproducer, which results in
many MB logs where a reproducer has already been neatly generated.

Differential Revision: https://reviews.llvm.org/D93165

Added: 


Modified: 
mlir/lib/Pass/Pass.cpp

Removed: 




diff  --git a/mlir/lib/Pass/Pass.cpp b/mlir/lib/Pass/Pass.cpp
index 056da035a5b5..f53a087fac47 100644
--- a/mlir/lib/Pass/Pass.cpp
+++ b/mlir/lib/Pass/Pass.cpp
@@ -765,10 +765,14 @@ 
PassManager::runWithCrashRecovery(MutableArrayRef> passes,
   std::string error;
   if (failed(context.generate(error)))
 return op->emitError(": ") << error;
-  return op->emitError()
- << "A failure has been detected while processing the MLIR module, a "
-"reproducer has been generated in '"
- << *crashReproducerFileName << "'";
+  bool shouldPrintOnOp = op->getContext()->shouldPrintOpOnDiagnostic();
+  op->getContext()->printOpOnDiagnostic(false);
+  op->emitError()
+  << "A failure has been detected while processing the MLIR module, a "
+ "reproducer has been generated in '"
+  << *crashReproducerFileName << "'";
+  op->getContext()->printOpOnDiagnostic(shouldPrintOnOp);
+  return failure();
 }
 
 
//===--===//



___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] 47321c3 - [X86][SSE] combineReductionToHorizontal - add vXi8 ISD::MUL reduction handling (PR39709)

2020-12-13 Thread Simon Pilgrim via llvm-branch-commits

Author: Simon Pilgrim
Date: 2020-12-13T15:22:54Z
New Revision: 47321c311bdbe0145b9bf45d822185c37b19fa50

URL: 
https://github.com/llvm/llvm-project/commit/47321c311bdbe0145b9bf45d822185c37b19fa50
DIFF: 
https://github.com/llvm/llvm-project/commit/47321c311bdbe0145b9bf45d822185c37b19fa50.diff

LOG: [X86][SSE] combineReductionToHorizontal - add vXi8 ISD::MUL reduction 
handling (PR39709)

Default expansion leads to repeated extensions/truncations to/from vXi16 which 
shuffle combining and demanded elts can't completely unravel.

Better just to promote (any_extend) the input and perform a vXi16 reduction.

We'll be able to remove a lot of this if we ever get decent legalization 
support for reduction intrinsics in SelectionDAG.

Added: 


Modified: 
llvm/lib/Target/X86/X86ISelLowering.cpp
llvm/lib/Target/X86/X86ISelLowering.h
llvm/test/CodeGen/X86/vector-reduce-mul.ll

Removed: 




diff  --git a/llvm/lib/Target/X86/X86ISelLowering.cpp 
b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 849f5a06db61..04987a2b9abe 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -6357,8 +6357,10 @@ static SDValue IsNOT(SDValue V, SelectionDAG &DAG, bool 
OneUse = false) {
   return SDValue();
 }
 
-void llvm::createUnpackShuffleMask(MVT VT, SmallVectorImpl &Mask,
+void llvm::createUnpackShuffleMask(EVT VT, SmallVectorImpl &Mask,
bool Lo, bool Unary) {
+  assert(VT.getScalarType().isSimple() && (VT.getSizeInBits() % 128) == 0 &&
+ "Illegal vector type to unpack");
   assert(Mask.empty() && "Expected an empty shuffle mask vector");
   int NumElts = VT.getVectorNumElements();
   int NumEltsInLane = 128 / VT.getScalarSizeInBits();
@@ -6387,7 +6389,7 @@ void llvm::createSplat2ShuffleMask(MVT VT, 
SmallVectorImpl &Mask,
 }
 
 /// Returns a vector_shuffle node for an unpackl operation.
-static SDValue getUnpackl(SelectionDAG &DAG, const SDLoc &dl, MVT VT,
+static SDValue getUnpackl(SelectionDAG &DAG, const SDLoc &dl, EVT VT,
   SDValue V1, SDValue V2) {
   SmallVector Mask;
   createUnpackShuffleMask(VT, Mask, /* Lo = */ true, /* Unary = */ false);
@@ -6395,7 +6397,7 @@ static SDValue getUnpackl(SelectionDAG &DAG, const SDLoc 
&dl, MVT VT,
 }
 
 /// Returns a vector_shuffle node for an unpackh operation.
-static SDValue getUnpackh(SelectionDAG &DAG, const SDLoc &dl, MVT VT,
+static SDValue getUnpackh(SelectionDAG &DAG, const SDLoc &dl, EVT VT,
   SDValue V1, SDValue V2) {
   SmallVector Mask;
   createUnpackShuffleMask(VT, Mask, /* Lo = */ false, /* Unary = */ false);
@@ -40026,8 +40028,8 @@ static SDValue combineReductionToHorizontal(SDNode 
*ExtElt, SelectionDAG &DAG,
 return SDValue();
 
   ISD::NodeType Opc;
-  SDValue Rdx =
-  DAG.matchBinOpReduction(ExtElt, Opc, {ISD::ADD, ISD::FADD}, true);
+  SDValue Rdx = DAG.matchBinOpReduction(ExtElt, Opc,
+{ISD::ADD, ISD::MUL, ISD::FADD}, true);
   if (!Rdx)
 return SDValue();
 
@@ -40042,7 +40044,42 @@ static SDValue combineReductionToHorizontal(SDNode 
*ExtElt, SelectionDAG &DAG,
 
   SDLoc DL(ExtElt);
 
-  // vXi8 reduction - sub 128-bit vector.
+  // vXi8 mul reduction - promote to vXi16 mul reduction.
+  if (Opc == ISD::MUL) {
+unsigned NumElts = VecVT.getVectorNumElements();
+if (VT != MVT::i8 || NumElts < 4 || !isPowerOf2_32(NumElts))
+  return SDValue();
+if (VecVT.getSizeInBits() >= 128) {
+  EVT WideVT = EVT::getVectorVT(*DAG.getContext(), MVT::i16, NumElts / 2);
+  SDValue Lo = getUnpackl(DAG, DL, VecVT, Rdx, DAG.getUNDEF(VecVT));
+  SDValue Hi = getUnpackh(DAG, DL, VecVT, Rdx, DAG.getUNDEF(VecVT));
+  Lo = DAG.getBitcast(WideVT, Lo);
+  Hi = DAG.getBitcast(WideVT, Hi);
+  Rdx = DAG.getNode(Opc, DL, WideVT, Lo, Hi);
+  while (Rdx.getValueSizeInBits() > 128) {
+std::tie(Lo, Hi) = splitVector(Rdx, DAG, DL);
+Rdx = DAG.getNode(Opc, DL, Lo.getValueType(), Lo, Hi);
+  }
+} else {
+  Rdx = widenSubVector(Rdx, false, Subtarget, DAG, DL, 128);
+  Rdx = getUnpackl(DAG, DL, MVT::v16i8, Rdx, DAG.getUNDEF(MVT::v16i8));
+  Rdx = DAG.getBitcast(MVT::v8i16, Rdx);
+}
+if (NumElts >= 8)
+  Rdx = DAG.getNode(Opc, DL, MVT::v8i16, Rdx,
+DAG.getVectorShuffle(MVT::v8i16, DL, Rdx, Rdx,
+ {4, 5, 6, 7, -1, -1, -1, -1}));
+Rdx = DAG.getNode(Opc, DL, MVT::v8i16, Rdx,
+  DAG.getVectorShuffle(MVT::v8i16, DL, Rdx, Rdx,
+   {2, 3, -1, -1, -1, -1, -1, -1}));
+Rdx = DAG.getNode(Opc, DL, MVT::v8i16, Rdx,
+  DAG.getVectorShuffle(MVT::v8i16, DL, Rdx, Rdx,
+   {1, -1, -1, -1, -1, -1, -1, -1}));
+Rdx = DAG.getBitcast(MVT::v16i8, Rdx);
+   

[llvm-branch-commits] [clang] 4855a10 - [X86] Convert fadd/fmul _mm_reduce_* intrinsics to emit llvm.reduction intrinsics (PR47506)

2020-12-13 Thread Simon Pilgrim via llvm-branch-commits

Author: Simon Pilgrim
Date: 2020-12-13T15:37:35Z
New Revision: 4855a1004d4d87b6c21c510c1724e74a8d37d91a

URL: 
https://github.com/llvm/llvm-project/commit/4855a1004d4d87b6c21c510c1724e74a8d37d91a
DIFF: 
https://github.com/llvm/llvm-project/commit/4855a1004d4d87b6c21c510c1724e74a8d37d91a.diff

LOG: [X86] Convert fadd/fmul _mm_reduce_* intrinsics to emit llvm.reduction 
intrinsics (PR47506)

Followup to D87604, having confirmed on PR47506 that we can use the llvm 
codegen expansion for fadd/fmul as well.

Differential Revision: https://reviews.llvm.org/D92940

Added: 


Modified: 
clang/include/clang/Basic/BuiltinsX86.def
clang/lib/CodeGen/CGBuiltin.cpp
clang/lib/Headers/avx512fintrin.h
clang/test/CodeGen/X86/avx512-reduceIntrin.c

Removed: 




diff  --git a/clang/include/clang/Basic/BuiltinsX86.def 
b/clang/include/clang/Basic/BuiltinsX86.def
index 0f5594f1a4e6..16fb7dd7b0e6 100644
--- a/clang/include/clang/Basic/BuiltinsX86.def
+++ b/clang/include/clang/Basic/BuiltinsX86.def
@@ -1876,6 +1876,10 @@ TARGET_BUILTIN(__builtin_ia32_reduce_add_d512, "iV16i", 
"ncV:512:", "avx512f")
 TARGET_BUILTIN(__builtin_ia32_reduce_add_q512, "OiV8Oi", "ncV:512:", "avx512f")
 TARGET_BUILTIN(__builtin_ia32_reduce_and_d512, "iV16i", "ncV:512:", "avx512f")
 TARGET_BUILTIN(__builtin_ia32_reduce_and_q512, "OiV8Oi", "ncV:512:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_reduce_fadd_pd512, "ddV8d", "ncV:512:", 
"avx512f")
+TARGET_BUILTIN(__builtin_ia32_reduce_fadd_ps512, "ffV16f", "ncV:512:", 
"avx512f")
+TARGET_BUILTIN(__builtin_ia32_reduce_fmul_pd512, "ddV8d", "ncV:512:", 
"avx512f")
+TARGET_BUILTIN(__builtin_ia32_reduce_fmul_ps512, "ffV16f", "ncV:512:", 
"avx512f")
 TARGET_BUILTIN(__builtin_ia32_reduce_mul_d512, "iV16i", "ncV:512:", "avx512f")
 TARGET_BUILTIN(__builtin_ia32_reduce_mul_q512, "OiV8Oi", "ncV:512:", "avx512f")
 TARGET_BUILTIN(__builtin_ia32_reduce_or_d512, "iV16i", "ncV:512:", "avx512f")

diff  --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index 316a60c31fd4..74f6c9fee2c8 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -13631,6 +13631,18 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned 
BuiltinID,
 CGM.getIntrinsic(Intrinsic::vector_reduce_and, Ops[0]->getType());
 return Builder.CreateCall(F, {Ops[0]});
   }
+  case X86::BI__builtin_ia32_reduce_fadd_pd512:
+  case X86::BI__builtin_ia32_reduce_fadd_ps512: {
+Function *F =
+CGM.getIntrinsic(Intrinsic::vector_reduce_fadd, Ops[1]->getType());
+return Builder.CreateCall(F, {Ops[0], Ops[1]});
+  }
+  case X86::BI__builtin_ia32_reduce_fmul_pd512:
+  case X86::BI__builtin_ia32_reduce_fmul_ps512: {
+Function *F =
+CGM.getIntrinsic(Intrinsic::vector_reduce_fmul, Ops[1]->getType());
+return Builder.CreateCall(F, {Ops[0], Ops[1]});
+  }
   case X86::BI__builtin_ia32_reduce_mul_d512:
   case X86::BI__builtin_ia32_reduce_mul_q512: {
 Function *F =

diff  --git a/clang/lib/Headers/avx512fintrin.h 
b/clang/lib/Headers/avx512fintrin.h
index 2df399d978e3..2ee4350b14d4 100644
--- a/clang/lib/Headers/avx512fintrin.h
+++ b/clang/lib/Headers/avx512fintrin.h
@@ -9345,37 +9345,25 @@ _mm512_mask_reduce_or_epi64(__mmask8 __M, __m512i __W) {
   return __builtin_ia32_reduce_or_q512(__W);
 }
 
-#define _mm512_mask_reduce_operator(op) \
-  __m256d __t1 = _mm512_extractf64x4_pd(__W, 0); \
-  __m256d __t2 = _mm512_extractf64x4_pd(__W, 1); \
-  __m256d __t3 = __t1 op __t2; \
-  __m128d __t4 = _mm256_extractf128_pd(__t3, 0); \
-  __m128d __t5 = _mm256_extractf128_pd(__t3, 1); \
-  __m128d __t6 = __t4 op __t5; \
-  __m128d __t7 = __builtin_shufflevector(__t6, __t6, 1, 0); \
-  __m128d __t8 = __t6 op __t7; \
-  return __t8[0]
-
 static __inline__ double __DEFAULT_FN_ATTRS512 _mm512_reduce_add_pd(__m512d 
__W) {
-  _mm512_mask_reduce_operator(+);
+  return __builtin_ia32_reduce_fadd_pd512(0.0, __W);
 }
 
 static __inline__ double __DEFAULT_FN_ATTRS512 _mm512_reduce_mul_pd(__m512d 
__W) {
-  _mm512_mask_reduce_operator(*);
+  return __builtin_ia32_reduce_fmul_pd512(1.0, __W);
 }
 
 static __inline__ double __DEFAULT_FN_ATTRS512
 _mm512_mask_reduce_add_pd(__mmask8 __M, __m512d __W) {
   __W = _mm512_maskz_mov_pd(__M, __W);
-  _mm512_mask_reduce_operator(+);
+  return __builtin_ia32_reduce_fadd_pd512(0.0, __W);
 }
 
 static __inline__ double __DEFAULT_FN_ATTRS512
 _mm512_mask_reduce_mul_pd(__mmask8 __M, __m512d __W) {
   __W = _mm512_mask_mov_pd(_mm512_set1_pd(1.0), __M, __W);
-  _mm512_mask_reduce_operator(*);
+  return __builtin_ia32_reduce_fmul_pd512(1.0, __W);
 }
-#undef _mm512_mask_reduce_operator
 
 static __inline__ int __DEFAULT_FN_ATTRS512
 _mm512_reduce_add_epi32(__m512i __W) {
@@ -9421,41 +9409,27 @@ _mm512_mask_reduce_or_epi32(__mmask16 __M, __m512i __W) 
{
   return __builtin_ia32_reduce_or_d512((__v16si)__W);
 }
 
-#define _mm512_mask_reduce_operator(op) \
-  __m256 __t1

[llvm-branch-commits] [llvm] 46bc40e - Recommit "[AArch64] Lower calls with rv_marker attribute."

2020-12-13 Thread Florian Hahn via llvm-branch-commits

Author: Florian Hahn
Date: 2020-12-13T16:20:39Z
New Revision: 46bc40e50246c1902a1ca7916c8286cb837643ee

URL: 
https://github.com/llvm/llvm-project/commit/46bc40e50246c1902a1ca7916c8286cb837643ee
DIFF: 
https://github.com/llvm/llvm-project/commit/46bc40e50246c1902a1ca7916c8286cb837643ee.diff

LOG: Recommit "[AArch64] Lower calls with rv_marker attribute."

This recommits a87fccb3ff9c with a fix to mark the destination operand
of the marker instruction as def, to fix a machine verifier failure.

This reverts the revert commit c0f2cea7c0afc7c9688e1633f2a9b25c8ea4a9bd.

Added: 
llvm/test/CodeGen/AArch64/call-rv-marker.ll
llvm/test/CodeGen/AArch64/expand-blr-rvmarker-pseudo.mir

Modified: 
llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp
llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
llvm/lib/Target/AArch64/AArch64ISelLowering.h
llvm/lib/Target/AArch64/AArch64InstrInfo.td

Removed: 




diff  --git a/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp 
b/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp
index 9e65ad2e18f9..e57650ae60b1 100644
--- a/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp
@@ -83,6 +83,8 @@ class AArch64ExpandPseudo : public MachineFunctionPass {
   bool expandSVESpillFill(MachineBasicBlock &MBB,
   MachineBasicBlock::iterator MBBI, unsigned Opc,
   unsigned N);
+  bool expandCALL_RVMARKER(MachineBasicBlock &MBB,
+   MachineBasicBlock::iterator MBBI);
 };
 
 } // end anonymous namespace
@@ -627,6 +629,46 @@ bool 
AArch64ExpandPseudo::expandSVESpillFill(MachineBasicBlock &MBB,
   return true;
 }
 
+bool AArch64ExpandPseudo::expandCALL_RVMARKER(
+MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI) {
+  // Expand CALL_RVMARKER pseudo to a branch, followed by the special `mov x29,
+  // x29` marker. Mark the sequence as bundle, to avoid passes moving other 
code
+  // in between.
+  MachineInstr &MI = *MBBI;
+
+  MachineInstr *OriginalCall;
+  MachineOperand &CallTarget = MI.getOperand(0);
+  assert((CallTarget.isGlobal() || CallTarget.isReg()) &&
+ "invalid operand for regular call");
+  unsigned Opc = CallTarget.isGlobal() ? AArch64::BL : AArch64::BLR;
+  OriginalCall = BuildMI(MBB, MBBI, MI.getDebugLoc(), 
TII->get(Opc)).getInstr();
+  OriginalCall->addOperand(CallTarget);
+
+  unsigned RegMaskStartIdx = 1;
+  // Skip register arguments. Those are added during ISel, but are not
+  // needed for the concrete branch.
+  while (!MI.getOperand(RegMaskStartIdx).isRegMask()) {
+assert(MI.getOperand(RegMaskStartIdx).isReg() &&
+   "should only skip register operands");
+RegMaskStartIdx++;
+  }
+  for (; RegMaskStartIdx < MI.getNumOperands(); ++RegMaskStartIdx)
+OriginalCall->addOperand(MI.getOperand(RegMaskStartIdx));
+
+  auto *Marker = BuildMI(MBB, MBBI, MI.getDebugLoc(), 
TII->get(AArch64::ORRXrs))
+ .addReg(AArch64::FP, RegState::Define)
+ .addReg(AArch64::XZR)
+ .addReg(AArch64::FP)
+ .addImm(0)
+ .getInstr();
+  if (MI.shouldUpdateCallSiteInfo())
+MBB.getParent()->moveCallSiteInfo(&MI, Marker);
+  MI.eraseFromParent();
+  finalizeBundle(MBB, OriginalCall->getIterator(),
+ std::next(Marker->getIterator()));
+  return true;
+}
+
 /// If MBBI references a pseudo instruction that should be expanded here,
 /// do the expansion and return true.  Otherwise return false.
 bool AArch64ExpandPseudo::expandMI(MachineBasicBlock &MBB,
@@ -1014,6 +1056,8 @@ bool AArch64ExpandPseudo::expandMI(MachineBasicBlock &MBB,
  return expandSVESpillFill(MBB, MBBI, AArch64::LDR_ZXI, 3);
case AArch64::LDR_ZZXI:
  return expandSVESpillFill(MBB, MBBI, AArch64::LDR_ZXI, 2);
+   case AArch64::BLR_RVMARKER:
+ return expandCALL_RVMARKER(MBB, MBBI);
   }
   return false;
 }

diff  --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp 
b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 01301abf10e3..4e7dba09d5b0 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -1934,6 +1934,7 @@ const char 
*AArch64TargetLowering::getTargetNodeName(unsigned Opcode) const {
 MAKE_CASE(AArch64ISD::INDEX_VECTOR)
 MAKE_CASE(AArch64ISD::UABD)
 MAKE_CASE(AArch64ISD::SABD)
+MAKE_CASE(AArch64ISD::CALL_RVMARKER)
   }
 #undef MAKE_CASE
   return nullptr;
@@ -5539,8 +5540,17 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI,
 return Ret;
   }
 
+  unsigned CallOpc = AArch64ISD::CALL;
+  // Calls marked with "rv_marker" are special. They should be expanded to the
+  // call, directly followed by a special marker sequence. Use the 
CALL_RVMARKER
+  // to do that.
+  if (CLI.CB && CLI.CB->hasRetAttr("rv_marker")) {
+assert(!IsTai

[llvm-branch-commits] [llvm] 533f857 - [VPlan] Use interleaveComma in printOperands() (NFC).

2020-12-13 Thread Florian Hahn via llvm-branch-commits

Author: Florian Hahn
Date: 2020-12-13T16:29:16Z
New Revision: 533f85767c62d03cf72a67f826ed4c3b8023d79d

URL: 
https://github.com/llvm/llvm-project/commit/533f85767c62d03cf72a67f826ed4c3b8023d79d
DIFF: 
https://github.com/llvm/llvm-project/commit/533f85767c62d03cf72a67f826ed4c3b8023d79d.diff

LOG: [VPlan] Use interleaveComma in printOperands() (NFC).

Added: 


Modified: 
llvm/lib/Transforms/Vectorize/VPlan.cpp

Removed: 




diff  --git a/llvm/lib/Transforms/Vectorize/VPlan.cpp 
b/llvm/lib/Transforms/Vectorize/VPlan.cpp
index 7cc5291a35f9..516c149bd280 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlan.cpp
@@ -20,6 +20,7 @@
 #include "VPlanDominatorTree.h"
 #include "llvm/ADT/DepthFirstIterator.h"
 #include "llvm/ADT/PostOrderIterator.h"
+#include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/Twine.h"
 #include "llvm/Analysis/IVDescriptors.h"
@@ -1065,13 +1066,9 @@ void VPValue::printAsOperand(raw_ostream &OS, 
VPSlotTracker &Tracker) const {
 }
 
 void VPUser::printOperands(raw_ostream &O, VPSlotTracker &SlotTracker) const {
-  bool First = true;
-  for (VPValue *Op : operands()) {
-if (!First)
-  O << ", ";
+  interleaveComma(operands(), O, [&O, &SlotTracker](VPValue *Op) {
 Op->printAsOperand(O, SlotTracker);
-First = false;
-  }
+  });
 }
 
 void VPInterleavedAccessInfo::visitRegion(VPRegionBlock *Region,



___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] 8bdfc12 - [X86][AVX] Add additional X86ISD::SUBV_BROADCAST_LOAD test case for D92645

2020-12-13 Thread Simon Pilgrim via llvm-branch-commits

Author: Simon Pilgrim
Date: 2020-12-13T16:43:33Z
New Revision: 8bdfc1222f7cee4f4c0988e6a03fd090e997b99e

URL: 
https://github.com/llvm/llvm-project/commit/8bdfc1222f7cee4f4c0988e6a03fd090e997b99e
DIFF: 
https://github.com/llvm/llvm-project/commit/8bdfc1222f7cee4f4c0988e6a03fd090e997b99e.diff

LOG: [X86][AVX] Add additional X86ISD::SUBV_BROADCAST_LOAD test case for D92645

Suggested by @yubing - to check whether we can reuse a single subvector 
broadcast for 128/256/512-bit vectors.

Added: 


Modified: 
llvm/test/CodeGen/X86/subvector-broadcast.ll

Removed: 




diff  --git a/llvm/test/CodeGen/X86/subvector-broadcast.ll 
b/llvm/test/CodeGen/X86/subvector-broadcast.ll
index d07cd0a23137..bde44e6072b3 100644
--- a/llvm/test/CodeGen/X86/subvector-broadcast.ll
+++ b/llvm/test/CodeGen/X86/subvector-broadcast.ll
@@ -904,6 +904,155 @@ entry:
   ret void
 }
 
+@ha4 = global <4 x i32> zeroinitializer, align 8
+@hb4 = global <8 x i32> zeroinitializer, align 8
+@hc4 = global <16 x i32> zeroinitializer, align 8
+
+define void @fallback_broadcast_v4i32_v8i32_v16i32(<4 x i32> %a, <8 x i32> %b, 
<16 x i32> %c) nounwind {
+; X86-AVX1-LABEL: fallback_broadcast_v4i32_v8i32_v16i32:
+; X86-AVX1:   # %bb.0: # %entry
+; X86-AVX1-NEXT:pushl %ebp
+; X86-AVX1-NEXT:movl %esp, %ebp
+; X86-AVX1-NEXT:andl $-32, %esp
+; X86-AVX1-NEXT:subl $32, %esp
+; X86-AVX1-NEXT:vmovdqa {{.*#+}} xmm3 = [1,2,3,4]
+; X86-AVX1-NEXT:vpaddd %xmm3, %xmm0, %xmm0
+; X86-AVX1-NEXT:vextractf128 $1, %ymm1, %xmm4
+; X86-AVX1-NEXT:vpaddd %xmm3, %xmm4, %xmm4
+; X86-AVX1-NEXT:vpaddd %xmm3, %xmm1, %xmm1
+; X86-AVX1-NEXT:vinsertf128 $1, %xmm4, %ymm1, %ymm1
+; X86-AVX1-NEXT:vinsertf128 $1, %xmm3, %ymm3, %ymm4
+; X86-AVX1-NEXT:vandps %ymm4, %ymm1, %ymm1
+; X86-AVX1-NEXT:vextractf128 $1, %ymm2, %xmm5
+; X86-AVX1-NEXT:vpaddd %xmm3, %xmm5, %xmm5
+; X86-AVX1-NEXT:vpaddd %xmm3, %xmm2, %xmm2
+; X86-AVX1-NEXT:vinsertf128 $1, %xmm5, %ymm2, %ymm2
+; X86-AVX1-NEXT:vpaddd 8(%ebp), %xmm3, %xmm5
+; X86-AVX1-NEXT:vpaddd 24(%ebp), %xmm3, %xmm3
+; X86-AVX1-NEXT:vinsertf128 $1, %xmm3, %ymm5, %ymm3
+; X86-AVX1-NEXT:vandps %ymm4, %ymm2, %ymm2
+; X86-AVX1-NEXT:vandps %ymm4, %ymm3, %ymm3
+; X86-AVX1-NEXT:vmovdqu %xmm0, ha4
+; X86-AVX1-NEXT:vmovups %ymm1, hb4
+; X86-AVX1-NEXT:vmovups %ymm3, hc4+32
+; X86-AVX1-NEXT:vmovups %ymm2, hc4
+; X86-AVX1-NEXT:movl %ebp, %esp
+; X86-AVX1-NEXT:popl %ebp
+; X86-AVX1-NEXT:vzeroupper
+; X86-AVX1-NEXT:retl
+;
+; X86-AVX2-LABEL: fallback_broadcast_v4i32_v8i32_v16i32:
+; X86-AVX2:   # %bb.0: # %entry
+; X86-AVX2-NEXT:pushl %ebp
+; X86-AVX2-NEXT:movl %esp, %ebp
+; X86-AVX2-NEXT:andl $-32, %esp
+; X86-AVX2-NEXT:subl $32, %esp
+; X86-AVX2-NEXT:vmovdqa {{.*#+}} xmm3 = [1,2,3,4]
+; X86-AVX2-NEXT:vpaddd %xmm3, %xmm0, %xmm0
+; X86-AVX2-NEXT:vinserti128 $1, %xmm3, %ymm3, %ymm3
+; X86-AVX2-NEXT:vpaddd %ymm3, %ymm1, %ymm1
+; X86-AVX2-NEXT:vpand %ymm3, %ymm1, %ymm1
+; X86-AVX2-NEXT:vpaddd 8(%ebp), %ymm3, %ymm4
+; X86-AVX2-NEXT:vpaddd %ymm3, %ymm2, %ymm2
+; X86-AVX2-NEXT:vpand %ymm3, %ymm2, %ymm2
+; X86-AVX2-NEXT:vpand %ymm3, %ymm4, %ymm3
+; X86-AVX2-NEXT:vmovdqu %xmm0, ha4
+; X86-AVX2-NEXT:vmovdqu %ymm1, hb4
+; X86-AVX2-NEXT:vmovdqu %ymm3, hc4+32
+; X86-AVX2-NEXT:vmovdqu %ymm2, hc4
+; X86-AVX2-NEXT:movl %ebp, %esp
+; X86-AVX2-NEXT:popl %ebp
+; X86-AVX2-NEXT:vzeroupper
+; X86-AVX2-NEXT:retl
+;
+; X86-AVX512-LABEL: fallback_broadcast_v4i32_v8i32_v16i32:
+; X86-AVX512:   # %bb.0: # %entry
+; X86-AVX512-NEXT:vmovdqa {{.*#+}} xmm3 = [1,2,3,4]
+; X86-AVX512-NEXT:vpaddd %xmm3, %xmm0, %xmm0
+; X86-AVX512-NEXT:vinserti128 $1, %xmm3, %ymm3, %ymm4
+; X86-AVX512-NEXT:vpaddd %ymm4, %ymm1, %ymm1
+; X86-AVX512-NEXT:vpand %ymm4, %ymm1, %ymm1
+; X86-AVX512-NEXT:vshufi32x4 {{.*#+}} zmm3 = 
zmm3[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3]
+; X86-AVX512-NEXT:vpaddd %zmm3, %zmm2, %zmm2
+; X86-AVX512-NEXT:vpandd %zmm3, %zmm2, %zmm2
+; X86-AVX512-NEXT:vmovdqu %xmm0, ha4
+; X86-AVX512-NEXT:vmovdqu %ymm1, hb4
+; X86-AVX512-NEXT:vmovdqu64 %zmm2, hc4
+; X86-AVX512-NEXT:vzeroupper
+; X86-AVX512-NEXT:retl
+;
+; X64-AVX1-LABEL: fallback_broadcast_v4i32_v8i32_v16i32:
+; X64-AVX1:   # %bb.0: # %entry
+; X64-AVX1-NEXT:vmovdqa {{.*#+}} xmm4 = [1,2,3,4]
+; X64-AVX1-NEXT:vpaddd %xmm4, %xmm0, %xmm0
+; X64-AVX1-NEXT:vextractf128 $1, %ymm1, %xmm5
+; X64-AVX1-NEXT:vpaddd %xmm4, %xmm5, %xmm5
+; X64-AVX1-NEXT:vpaddd %xmm4, %xmm1, %xmm1
+; X64-AVX1-NEXT:vinsertf128 $1, %xmm5, %ymm1, %ymm1
+; X64-AVX1-NEXT:vinsertf128 $1, %xmm4, %ymm4, %ymm5
+; X64-AVX1-NEXT:vandps %ymm5, %ymm1, %ymm1
+; X64-AVX1-NEXT:vextractf128 $1, %ymm3, %xmm6
+; X64-AVX1-NEXT:vpaddd %xmm4, %xmm6, %xmm6
+; X64-AVX1-NEXT:vpaddd %xmm4, %xmm3, %xmm3
+; X64-

[llvm-branch-commits] [llvm] d5c434d - [X86][SSE] combineX86ShufflesRecursively - add basic handling for combining shuffles of different widths (PR45974)

2020-12-13 Thread Simon Pilgrim via llvm-branch-commits

Author: Simon Pilgrim
Date: 2020-12-13T17:18:07Z
New Revision: d5c434d7dda25909cd7886e419baf3db3578953e

URL: 
https://github.com/llvm/llvm-project/commit/d5c434d7dda25909cd7886e419baf3db3578953e
DIFF: 
https://github.com/llvm/llvm-project/commit/d5c434d7dda25909cd7886e419baf3db3578953e.diff

LOG: [X86][SSE] combineX86ShufflesRecursively - add basic handling for 
combining shuffles of different widths (PR45974)

If a faux shuffle uses smaller shuffle inputs, try to recursively combine with 
those inputs directly instead of widening them immediately. Then widen all 
smaller inputs at the bottom of the recursion.

This will still mean we're generating nodes on the fly (PR45974) even if we 
don't combine to a new shuffle but it does help AVX2+ targets combine across 
xmm/ymm/zmm types, mainly as variable shuffles.

Added: 


Modified: 
llvm/lib/Target/X86/X86ISelLowering.cpp
llvm/test/CodeGen/X86/min-legal-vector-width.ll
llvm/test/CodeGen/X86/vector-shuffle-256-v16.ll
llvm/test/CodeGen/X86/vector-shuffle-256-v8.ll
llvm/test/CodeGen/X86/vector-shuffle-v1.ll
llvm/test/CodeGen/X86/x86-interleaved-access.ll

Removed: 




diff  --git a/llvm/lib/Target/X86/X86ISelLowering.cpp 
b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 04987a2b9abe..b4a397080284 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -36135,8 +36135,8 @@ static SDValue combineX86ShufflesRecursively(
   if (!VT.isVector() || !VT.isSimple())
 return SDValue(); // Bail if we hit a non-simple non-vector.
 
-  assert(VT.getSizeInBits() == RootSizeInBits &&
- "Can only combine shuffles of the same vector register size.");
+  assert((RootSizeInBits % VT.getSizeInBits()) == 0 &&
+ "Can only combine shuffles upto size of the root op.");
 
   // Extract target shuffle mask and resolve sentinels and inputs.
   // TODO - determine Op's demanded elts from RootMask.
@@ -36149,17 +36149,32 @@ static SDValue combineX86ShufflesRecursively(
   OpZero, DAG, Depth, false))
 return SDValue();
 
-  // Shuffle inputs must be the same size as the result, bail on any larger
-  // inputs and widen any smaller inputs.
-  if (llvm::any_of(OpInputs, [RootSizeInBits](SDValue Op) {
-return Op.getValueSizeInBits() > RootSizeInBits;
+  // Shuffle inputs must not be larger than the shuffle result.
+  // TODO: Relax this for single input faux shuffles (trunc/extract_subvector).
+  if (llvm::any_of(OpInputs, [VT](SDValue OpInput) {
+return OpInput.getValueSizeInBits() > VT.getSizeInBits();
   }))
 return SDValue();
 
-  for (SDValue &Op : OpInputs)
-if (Op.getValueSizeInBits() < RootSizeInBits)
-  Op = widenSubVector(peekThroughOneUseBitcasts(Op), false, Subtarget, DAG,
-  SDLoc(Op), RootSizeInBits);
+  // If the shuffle result was smaller than the root, we need to adjust the
+  // mask indices and pad the mask with undefs.
+  if (RootSizeInBits > VT.getSizeInBits()) {
+unsigned NumSubVecs = RootSizeInBits / VT.getSizeInBits();
+unsigned OpMaskSize = OpMask.size();
+if (OpInputs.size() > 1) {
+  unsigned PaddedMaskSize = NumSubVecs * OpMaskSize;
+  for (int &M : OpMask) {
+if (M < 0)
+  continue;
+int EltIdx = M % OpMaskSize;
+int OpIdx = M / OpMaskSize;
+M = (PaddedMaskSize * OpIdx) + EltIdx;
+  }
+}
+OpZero = OpZero.zext(NumSubVecs * OpMaskSize);
+OpUndef = OpUndef.zext(NumSubVecs * OpMaskSize);
+OpMask.append((NumSubVecs - 1) * OpMaskSize, SM_SentinelUndef);
+  }
 
   SmallVector Mask;
   SmallVector Ops;
@@ -36337,6 +36352,18 @@ static SDValue combineX86ShufflesRecursively(
 }
   }
 
+  // Widen any subvector shuffle inputs we've collected.
+  if (any_of(Ops, [RootSizeInBits](SDValue Op) {
+return Op.getValueSizeInBits() < RootSizeInBits;
+  })) {
+for (SDValue &Op : Ops)
+  if (Op.getValueSizeInBits() < RootSizeInBits)
+Op = widenSubVector(Op, false, Subtarget, DAG, SDLoc(Op),
+RootSizeInBits);
+// Reresolve - we might have repeated subvector sources.
+resolveTargetShuffleInputsAndMask(Ops, Mask);
+  }
+
   // Attempt to constant fold all of the constant source ops.
   if (SDValue Cst = combineX86ShufflesConstants(
   Ops, Mask, Root, HasVariableMask, DAG, Subtarget))

diff  --git a/llvm/test/CodeGen/X86/min-legal-vector-width.ll 
b/llvm/test/CodeGen/X86/min-legal-vector-width.ll
index a39fbf878fd9..5456cd2e753a 100644
--- a/llvm/test/CodeGen/X86/min-legal-vector-width.ll
+++ b/llvm/test/CodeGen/X86/min-legal-vector-width.ll
@@ -1682,25 +1682,45 @@ define <32 x i8> @var_rotate_v32i8(<32 x i8> %a, <32 x 
i8> %b) nounwind "min-leg
 }
 
 define <32 x i8> @splatvar_rotate_v32i8(<32 x i8> %a, <32 x i8> %b) nounwind 
"min-legal-vector-width"="256" {
-; CHECK-LABE

[llvm-branch-commits] [llvm] 828602c - [NFC]{AMDGPU] Update AMDGPUUsage with AMD RDNA 2 reference

2020-12-13 Thread via llvm-branch-commits

Author: Tony
Date: 2020-12-13T17:21:02Z
New Revision: 828602c772c80e11f5416127e5fad4375fa09cd8

URL: 
https://github.com/llvm/llvm-project/commit/828602c772c80e11f5416127e5fad4375fa09cd8
DIFF: 
https://github.com/llvm/llvm-project/commit/828602c772c80e11f5416127e5fad4375fa09cd8.diff

LOG: [NFC]{AMDGPU] Update AMDGPUUsage with AMD RDNA 2 reference

Differential Revision: https://reviews.llvm.org/D93172

Added: 


Modified: 
llvm/docs/AMDGPUUsage.rst

Removed: 




diff  --git a/llvm/docs/AMDGPUUsage.rst b/llvm/docs/AMDGPUUsage.rst
index e4469dc4c143..e5d081a37500 100644
--- a/llvm/docs/AMDGPUUsage.rst
+++ b/llvm/docs/AMDGPUUsage.rst
@@ -312,7 +312,7 @@ specific information.

  Add product

  names.
 
- **GCN GFX9** [AMD-GCN-GFX9]_
+ **GCN GFX9 (Vega)** [AMD-GCN-GFX9]_
  
---
  ``gfx900``  ``amdgcn``   dGPU  - xnack
   - *rocm-amdhsa* - Radeon Vega

   - *pal-amdhsa*Frontier Edition
@@ -363,7 +363,7 @@ specific information.

   - Ryzen 3 Pro 4350G

   - Ryzen 3 Pro 4350GE
 
- **GCN GFX10** [AMD-GCN-GFX10]_
+ **GCN GFX10 (RDNA 1)** [AMD-GCN-GFX10-RDNA1]_
  
---
  ``gfx1010`` ``amdgcn``   dGPU  - cumode   
   - *rocm-amdhsa* - Radeon RX 5700
 - wavefrontsize64  
   - *pal-amdhsa*  - Radeon RX 5700 XT
@@ -380,7 +380,8 @@ specific information.
  ``gfx1012`` ``amdgcn``   dGPU  - cumode   
   - *rocm-amdhsa* - Radeon RX 5500
 - wavefrontsize64  
   - *pal-amdhsa*  - Radeon RX 5500 XT
 - xnack
   - *pal-amdpal*
-
+ **GCN GFX10 (RDNA 2)** [AMD-GCN-GFX10-RDNA2]_
+ 
---
  ``gfx1030`` ``amdgcn``   dGPU  - cumode   
   - *rocm-amdhsa* *TBA*
 - wavefrontsize64  
   - *pal-amdhsa*

   - *pal-amdpal*  .. TODO::
@@ -8719,8 +8720,8 @@ in this description.
 
 For more information about instructions, their semantics and supported
 combinations of operands, refer to one of instruction set architecture manuals
-[AMD-GCN-GFX6]_, [AMD-GCN-GFX7]_, [AMD-GCN-GFX8]_, [AMD-GCN-GFX9]_ and
-[AMD-GCN-GFX10]_.
+[AMD-GCN-GFX6]_, [AMD-GCN-GFX7]_, [AMD-GCN-GFX8]_, [AMD-GCN-GFX9]_,
+[AMD-GCN-GFX10-RDNA1]_ and [AMD-GCN-GFX10-RDNA2]_.
 
 Operands
 
@@ -9447,7 +9448,8 @@ Additional Documentation
 .. [AMD-GCN-GFX7] `AMD Sea Islands Series ISA 
`_
 .. [AMD-GCN-GFX8] `AMD GCN3 Instruction Set Architecture 
`__
 .. [AMD-GCN-GFX9] `AMD "Vega" Instruction Set Architecture 
`__
-.. [AMD-GCN-GFX10] `AMD "RDNA 1.0" Instruction Set Architecture 
`__
+.. [AMD-GCN-GFX10-RDNA1] `AMD "RDNA 1.0" Instruction Set Architecture 
`__
+.. [AMD-GCN-GFX10-RDNA2] `AMD "RDNA 2" Instruction Set Architecture 
`__
 .. [AMD-RADEON-HD-2000-3000] `AMD R6xx shader ISA 
`__
 .. [AMD-RADEON-HD-4000] `AMD R7xx shader ISA 
`__
 .. [AMD-RADEON-HD-5000] `AMD Evergreen shader ISA 
`__



___
llvm-branch-commits mailing list
llvm-b

[llvm-branch-commits] [llvm] 5891ad4 - [Transforms] Use llvm::erase_value (NFC)

2020-12-13 Thread Kazu Hirata via llvm-branch-commits

Author: Kazu Hirata
Date: 2020-12-13T09:48:47-08:00
New Revision: 5891ad4e222f510e532ba2ce6cd56919986d9cb1

URL: 
https://github.com/llvm/llvm-project/commit/5891ad4e222f510e532ba2ce6cd56919986d9cb1
DIFF: 
https://github.com/llvm/llvm-project/commit/5891ad4e222f510e532ba2ce6cd56919986d9cb1.diff

LOG: [Transforms] Use llvm::erase_value (NFC)

Added: 


Modified: 
llvm/lib/Transforms/Scalar/LoopUnswitch.cpp
llvm/lib/Transforms/Utils/LoopUnroll.cpp
llvm/lib/Transforms/Utils/SimplifyCFG.cpp
llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

Removed: 




diff  --git a/llvm/lib/Transforms/Scalar/LoopUnswitch.cpp 
b/llvm/lib/Transforms/Scalar/LoopUnswitch.cpp
index 00b242c16f38..50792d713b41 100644
--- a/llvm/lib/Transforms/Scalar/LoopUnswitch.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopUnswitch.cpp
@@ -1386,9 +1386,7 @@ void LoopUnswitch::unswitchNontrivialCondition(Value 
*LIC, Constant *Val,
 /// Remove all instances of I from the worklist vector specified.
 static void removeFromWorklist(Instruction *I,
std::vector &Worklist) {
-
-  Worklist.erase(std::remove(Worklist.begin(), Worklist.end(), I),
- Worklist.end());
+  llvm::erase_value(Worklist, I);
 }
 
 /// When we find that I really equals V, remove I from the

diff  --git a/llvm/lib/Transforms/Utils/LoopUnroll.cpp 
b/llvm/lib/Transforms/Utils/LoopUnroll.cpp
index 7bea696a853a..4d5d03528633 100644
--- a/llvm/lib/Transforms/Utils/LoopUnroll.cpp
+++ b/llvm/lib/Transforms/Utils/LoopUnroll.cpp
@@ -865,9 +865,7 @@ LoopUnrollResult llvm::UnrollLoop(Loop *L, 
UnrollLoopOptions ULO, LoopInfo *LI,
   if (MergeBlockIntoPredecessor(Dest, &DTU, LI)) {
 // Dest has been folded into Fold. Update our worklists accordingly.
 std::replace(Latches.begin(), Latches.end(), Dest, Fold);
-UnrolledLoopBlocks.erase(std::remove(UnrolledLoopBlocks.begin(),
- UnrolledLoopBlocks.end(), Dest),
- UnrolledLoopBlocks.end());
+llvm::erase_value(UnrolledLoopBlocks, Dest);
   }
 }
   }

diff  --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp 
b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
index 073a43faadd9..f2b7ffd67057 100644
--- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -791,7 +791,7 @@ BasicBlock *SimplifyCFGOpt::GetValueEqualityComparisonCases(
 static void
 EliminateBlockCases(BasicBlock *BB,
 std::vector &Cases) {
-  Cases.erase(std::remove(Cases.begin(), Cases.end(), BB), Cases.end());
+  llvm::erase_value(Cases, BB);
 }
 
 /// Return true if there are any keys in C1 that exist in C2 as well.

diff  --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp 
b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index dc35f5c3df3d..c5ba3709f6b1 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -7398,9 +7398,8 @@ static bool findBuildAggregate(Instruction 
*LastInsertInst,
 
   if (findBuildAggregate_rec(LastInsertInst, TTI, BuildVectorOpds, InsertElts,
  0)) {
-llvm::erase_if(BuildVectorOpds,
-   [](const Value *V) { return V == nullptr; });
-llvm::erase_if(InsertElts, [](const Value *V) { return V == nullptr; });
+llvm::erase_value(BuildVectorOpds, nullptr);
+llvm::erase_value(InsertElts, nullptr);
 if (BuildVectorOpds.size() >= 2)
   return true;
   }



___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] 0a4e028 - [Analysis] Remove unused declaration replaceEdgeKey (NFC)

2020-12-13 Thread Kazu Hirata via llvm-branch-commits

Author: Kazu Hirata
Date: 2020-12-13T10:03:45-08:00
New Revision: 0a4e028d13a4708e60c0cd36bf5efd18d3181ac8

URL: 
https://github.com/llvm/llvm-project/commit/0a4e028d13a4708e60c0cd36bf5efd18d3181ac8
DIFF: 
https://github.com/llvm/llvm-project/commit/0a4e028d13a4708e60c0cd36bf5efd18d3181ac8.diff

LOG: [Analysis] Remove unused declaration replaceEdgeKey (NFC)

The declaration was introduced without a corresponding definition on
Feb 9, 2017 in commit aaad9f84be2a6a3eb8202ed4eaa5e5e2021d055e.

Added: 


Modified: 
llvm/include/llvm/Analysis/LazyCallGraph.h

Removed: 




diff  --git a/llvm/include/llvm/Analysis/LazyCallGraph.h 
b/llvm/include/llvm/Analysis/LazyCallGraph.h
index aa0758165598..f356aec9e0a1 100644
--- a/llvm/include/llvm/Analysis/LazyCallGraph.h
+++ b/llvm/include/llvm/Analysis/LazyCallGraph.h
@@ -305,13 +305,6 @@ class LazyCallGraph {
 
 /// Internal helper to remove the edge to the given function.
 bool removeEdgeInternal(Node &ChildN);
-
-/// Internal helper to replace an edge key with a new one.
-///
-/// This should be used when the function for a particular node in the
-/// graph gets replaced and we are updating all of the edges to that node
-/// to use the new function as the key.
-void replaceEdgeKey(Function &OldTarget, Function &NewTarget);
   };
 
   /// A node in the call graph.



___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] f99b4f5 - [X86] Extend varargs test

2020-12-13 Thread Harald van Dijk via llvm-branch-commits

Author: Harald van Dijk
Date: 2020-12-13T18:33:10Z
New Revision: f99b4f5241a3b3436b05355f5ea8588274254f8b

URL: 
https://github.com/llvm/llvm-project/commit/f99b4f5241a3b3436b05355f5ea8588274254f8b
DIFF: 
https://github.com/llvm/llvm-project/commit/f99b4f5241a3b3436b05355f5ea8588274254f8b.diff

LOG: [X86] Extend varargs test

This extends the existing x86-64-varargs test by passing enough
arguments that they need to be passed in memory, and by passing them in
reverse order, using va_arg for each argument to retrieve them and
restoring them to the correct order, and by using va_copy to have two
va_lists to use with va_arg.

Added: 


Modified: 
llvm/test/CodeGen/X86/x86-64-varargs.ll

Removed: 




diff  --git a/llvm/test/CodeGen/X86/x86-64-varargs.ll 
b/llvm/test/CodeGen/X86/x86-64-varargs.ll
index 58f7c82c2123..48b757be2645 100644
--- a/llvm/test/CodeGen/X86/x86-64-varargs.ll
+++ b/llvm/test/CodeGen/X86/x86-64-varargs.ll
@@ -1,30 +1,326 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=x86_64-apple-darwin -code-model=large 
-relocation-model=static | FileCheck %s
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 
UTC_ARGS: --no_x86_scrub_sp
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -code-model=large 
-relocation-model=static | FileCheck --check-prefix=CHECK-X64 %s
 
-@.str = internal constant [26 x i8] c"%d, %f, %d, %lld, %d, %f\0A\00"  
; <[26 x i8]*> [#uses=1]
+@.str = internal constant [38 x i8] c"%d, %f, %d, %lld, %d, %f, %d, %d, 
%d\0A\00"  ; <[38 x i8]*> [#uses=1]
 
 declare i32 @printf(i8*, ...) nounwind
 
-define i32 @main() nounwind  {
-; CHECK-LABEL: main:
-; CHECK:   ## %bb.0: ## %entry
-; CHECK-NEXT:pushq %rax
-; CHECK-NEXT:movabsq $_.str, %rdi
-; CHECK-NEXT:movabsq $_printf, %r9
-; CHECK-NEXT:movabsq $LCPI0_0, %rax
-; CHECK-NEXT:movsd {{.*#+}} xmm0 = mem[0],zero
-; CHECK-NEXT:movabsq $LCPI0_1, %rax
-; CHECK-NEXT:movsd {{.*#+}} xmm1 = mem[0],zero
-; CHECK-NEXT:movabsq $123456677890, %rcx ## imm = 0x1CBE976802
-; CHECK-NEXT:movl $12, %esi
-; CHECK-NEXT:movl $120, %edx
-; CHECK-NEXT:movl $-10, %r8d
-; CHECK-NEXT:movb $2, %al
-; CHECK-NEXT:callq *%r9
-; CHECK-NEXT:xorl %eax, %eax
-; CHECK-NEXT:popq %rcx
-; CHECK-NEXT:retq
+declare void @llvm.va_start(i8*)
+declare void @llvm.va_copy(i8*, i8*)
+declare void @llvm.va_end(i8*)
+
+%struct.va_list = type { i32, i32, i8*, i8* }
+
+define void @func(...) nounwind {
+; CHECK-X64-LABEL: func:
+; CHECK-X64:   ## %bb.0: ## %entry
+; CHECK-X64-NEXT:pushq %rbx
+; CHECK-X64-NEXT:subq $224, %rsp
+; CHECK-X64-NEXT:testb %al, %al
+; CHECK-X64-NEXT:je LBB0_2
+; CHECK-X64-NEXT:  ## %bb.1: ## %entry
+; CHECK-X64-NEXT:movaps %xmm0, 96(%rsp)
+; CHECK-X64-NEXT:movaps %xmm1, 112(%rsp)
+; CHECK-X64-NEXT:movaps %xmm2, 128(%rsp)
+; CHECK-X64-NEXT:movaps %xmm3, 144(%rsp)
+; CHECK-X64-NEXT:movaps %xmm4, 160(%rsp)
+; CHECK-X64-NEXT:movaps %xmm5, 176(%rsp)
+; CHECK-X64-NEXT:movaps %xmm6, 192(%rsp)
+; CHECK-X64-NEXT:movaps %xmm7, 208(%rsp)
+; CHECK-X64-NEXT:  LBB0_2: ## %entry
+; CHECK-X64-NEXT:movq %rdi, 48(%rsp)
+; CHECK-X64-NEXT:movq %rsi, 56(%rsp)
+; CHECK-X64-NEXT:movq %rdx, 64(%rsp)
+; CHECK-X64-NEXT:movq %rcx, 72(%rsp)
+; CHECK-X64-NEXT:movq %r8, 80(%rsp)
+; CHECK-X64-NEXT:movq %r9, 88(%rsp)
+; CHECK-X64-NEXT:movabsq $206158430208, %rax ## imm = 0x30
+; CHECK-X64-NEXT:movq %rax, (%rsp)
+; CHECK-X64-NEXT:leaq 240(%rsp), %rax
+; CHECK-X64-NEXT:movq %rax, 8(%rsp)
+; CHECK-X64-NEXT:leaq 48(%rsp), %rax
+; CHECK-X64-NEXT:movq %rax, 16(%rsp)
+; CHECK-X64-NEXT:movl (%rsp), %ecx
+; CHECK-X64-NEXT:cmpl $48, %ecx
+; CHECK-X64-NEXT:jae LBB0_4
+; CHECK-X64-NEXT:  ## %bb.3: ## %entry
+; CHECK-X64-NEXT:movq 16(%rsp), %rax
+; CHECK-X64-NEXT:addq %rcx, %rax
+; CHECK-X64-NEXT:addl $8, %ecx
+; CHECK-X64-NEXT:movl %ecx, (%rsp)
+; CHECK-X64-NEXT:jmp LBB0_5
+; CHECK-X64-NEXT:  LBB0_4: ## %entry
+; CHECK-X64-NEXT:movq 8(%rsp), %rax
+; CHECK-X64-NEXT:movq %rax, %rcx
+; CHECK-X64-NEXT:addq $8, %rcx
+; CHECK-X64-NEXT:movq %rcx, 8(%rsp)
+; CHECK-X64-NEXT:  LBB0_5: ## %entry
+; CHECK-X64-NEXT:movl (%rax), %r10d
+; CHECK-X64-NEXT:movl (%rsp), %ecx
+; CHECK-X64-NEXT:cmpl $48, %ecx
+; CHECK-X64-NEXT:jae LBB0_7
+; CHECK-X64-NEXT:  ## %bb.6: ## %entry
+; CHECK-X64-NEXT:movq 16(%rsp), %rax
+; CHECK-X64-NEXT:addq %rcx, %rax
+; CHECK-X64-NEXT:addl $8, %ecx
+; CHECK-X64-NEXT:movl %ecx, (%rsp)
+; CHECK-X64-NEXT:jmp LBB0_8
+; CHECK-X64-NEXT:  LBB0_7: ## %entry
+; CHECK-X64-NEXT:movq 8(%rsp), %rax
+; CHECK-X64-NEXT:movq %rax, %rcx
+; CHECK-X64-NEXT:addq $8, %rcx
+; CHECK-X64-NEXT:movq %rcx, 8(%rsp)
+; CHECK-X64-NEXT:  LBB0_8: ## %entry
+; 

[llvm-branch-commits] [llvm] 22dba70 - [AC] Handle (X+C1)

2020-12-13 Thread Nikita Popov via llvm-branch-commits

Author: Nikita Popov
Date: 2020-12-13T21:00:32+01:00
New Revision: 22dba707b0fbf778466baef718cc536700b99df3

URL: 
https://github.com/llvm/llvm-project/commit/22dba707b0fbf778466baef718cc536700b99df3
DIFF: 
https://github.com/llvm/llvm-project/commit/22dba707b0fbf778466baef718cc536700b99df3.diff

LOG: [AC] Handle (X+C1)C && X C3 && A < C4,
+// and recognized by LVI at least.
+if (Pred == ICmpInst::ICMP_ULT &&
+match(A, m_Add(m_Value(X), m_ConstantInt())) &&
+match(B, m_ConstantInt()))
+  AddAffected(X);
   }
 }
 

diff  --git a/llvm/test/Transforms/CorrelatedValuePropagation/icmp.ll 
b/llvm/test/Transforms/CorrelatedValuePropagation/icmp.ll
index 71cdbefc8942..b1e3bd334d91 100644
--- a/llvm/test/Transforms/CorrelatedValuePropagation/icmp.ll
+++ b/llvm/test/Transforms/CorrelatedValuePropagation/icmp.ll
@@ -577,9 +577,8 @@ define i1 @test_assume_cmp_with_offset(i64 %idx) {
 ; CHECK-NEXT:[[IDX_OFF1:%.*]] = add i64 [[IDX:%.*]], -5
 ; CHECK-NEXT:[[CMP1:%.*]] = icmp ult i64 [[IDX_OFF1]], 3
 ; CHECK-NEXT:tail call void @llvm.assume(i1 [[CMP1]])
-; CHECK-NEXT:[[IDX_OFF2:%.*]] = add i64 [[IDX]], -1
-; CHECK-NEXT:[[CMP2:%.*]] = icmp ult i64 [[IDX_OFF2]], 10
-; CHECK-NEXT:ret i1 [[CMP2]]
+; CHECK-NEXT:[[IDX_OFF2:%.*]] = add nsw i64 [[IDX]], -1
+; CHECK-NEXT:ret i1 true
 ;
   %idx.off1 = add i64 %idx, -5
   %cmp1 = icmp ult i64 %idx.off1, 3



___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] fa31f33 - [X86] Add isel patterns to form VPDPWSSD from (add (vpmaddwd X, Y), Z) when AVXVNNI is enabled.

2020-12-13 Thread Craig Topper via llvm-branch-commits

Author: Craig Topper
Date: 2020-12-13T12:02:07-08:00
New Revision: fa31f337a259acac1731571146d5734208c616d0

URL: 
https://github.com/llvm/llvm-project/commit/fa31f337a259acac1731571146d5734208c616d0
DIFF: 
https://github.com/llvm/llvm-project/commit/fa31f337a259acac1731571146d5734208c616d0.diff

LOG: [X86] Add isel patterns to form VPDPWSSD from (add (vpmaddwd X, Y), Z) 
when AVXVNNI is enabled.

We already have these patterns for AVX512VNNI.

Added: 
llvm/test/CodeGen/X86/avxvnni.ll

Modified: 
llvm/lib/Target/X86/X86InstrAVX512.td
llvm/lib/Target/X86/X86InstrSSE.td
llvm/test/CodeGen/X86/avx512vnni.ll

Removed: 




diff  --git a/llvm/lib/Target/X86/X86InstrAVX512.td 
b/llvm/lib/Target/X86/X86InstrAVX512.td
index f223a152fff6..d250fd9ad44c 100644
--- a/llvm/lib/Target/X86/X86InstrAVX512.td
+++ b/llvm/lib/Target/X86/X86InstrAVX512.td
@@ -11914,11 +11914,6 @@ defm VPDPBUSDS  : VNNI_common<0x51, "vpdpbusds", 
X86Vpdpbusds, SchedWriteVecIMul
 defm VPDPWSSD   : VNNI_common<0x52, "vpdpwssd", X86Vpdpwssd, 
SchedWriteVecIMul, 1>;
 defm VPDPWSSDS  : VNNI_common<0x53, "vpdpwssds", X86Vpdpwssds, 
SchedWriteVecIMul, 1>;
 
-def X86vpmaddwd_su : PatFrag<(ops node:$lhs, node:$rhs),
- (X86vpmaddwd node:$lhs, node:$rhs), [{
-  return N->hasOneUse();
-}]>;
-
 // Patterns to match VPDPWSSD from existing instructions/intrinsics.
 let Predicates = [HasVNNI] in {
   def : Pat<(v16i32 (add VR512:$src1,

diff  --git a/llvm/lib/Target/X86/X86InstrSSE.td 
b/llvm/lib/Target/X86/X86InstrSSE.td
index 36dd5cbe9e5f..896a2fa58f72 100644
--- a/llvm/lib/Target/X86/X86InstrSSE.td
+++ b/llvm/lib/Target/X86/X86InstrSSE.td
@@ -7206,6 +7206,26 @@ defm VPDPBUSDS  : avx_vnni_rm<0x51, "vpdpbusds", 
X86Vpdpbusds, 0>, ExplicitVEXPr
 defm VPDPWSSD   : avx_vnni_rm<0x52, "vpdpwssd",  X86Vpdpwssd, 1>, 
ExplicitVEXPrefix;
 defm VPDPWSSDS  : avx_vnni_rm<0x53, "vpdpwssds", X86Vpdpwssds, 1>, 
ExplicitVEXPrefix;
 
+def X86vpmaddwd_su : PatFrag<(ops node:$lhs, node:$rhs),
+ (X86vpmaddwd node:$lhs, node:$rhs), [{
+  return N->hasOneUse();
+}]>;
+
+let Predicates = [HasAVXVNNI, NoVLX_Or_NoVNNI] in {
+  def : Pat<(v8i32 (add VR256:$src1,
+(X86vpmaddwd_su VR256:$src2, VR256:$src3))),
+(VPDPWSSDYrr VR256:$src1, VR256:$src2, VR256:$src3)>;
+  def : Pat<(v8i32 (add VR256:$src1,
+(X86vpmaddwd_su VR256:$src2, (load addr:$src3,
+(VPDPWSSDYrm VR256:$src1, VR256:$src2, addr:$src3)>;
+  def : Pat<(v4i32 (add VR128:$src1,
+(X86vpmaddwd_su VR128:$src2, VR128:$src3))),
+(VPDPWSSDrr VR128:$src1, VR128:$src2, VR128:$src3)>;
+  def : Pat<(v4i32 (add VR128:$src1,
+(X86vpmaddwd_su VR128:$src2, (load addr:$src3,
+(VPDPWSSDrm VR128:$src1, VR128:$src2, addr:$src3)>;
+}
+
 
//===--===//
 // VPERMIL - Permute Single and Double Floating-Point Values
 //

diff  --git a/llvm/test/CodeGen/X86/avx512vnni.ll 
b/llvm/test/CodeGen/X86/avx512vnni.ll
index 2464a3e93ac6..7dde6451074a 100644
--- a/llvm/test/CodeGen/X86/avx512vnni.ll
+++ b/llvm/test/CodeGen/X86/avx512vnni.ll
@@ -1,134 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc < %s -disable-peephole -mtriple=x86_64-unknown-unknown 
-mattr=+avx512vnni,+avx512vl,+avx512bw | FileCheck %s --check-prefixes=CHECK
 
-define <4 x i32> @test_pmaddwd_v8i16_add_v4i32(<4 x i32> %a0, <8 x i16> %a1, 
<8 x i16> %a2) {
-; CHECK-LABEL: test_pmaddwd_v8i16_add_v4i32:
-; CHECK:   # %bb.0:
-; CHECK-NEXT:vpdpwssd %xmm2, %xmm1, %xmm0
-; CHECK-NEXT:retq
-  %1 = call <4 x i32> @llvm.x86.sse2.pmadd.wd(<8 x i16> %a1, <8 x i16> %a2)
-  %2 = add <4 x i32> %1, %a0
-  ret <4 x i32> %2
-}
-
-define <4 x i32> @test_pmaddwd_v8i16_add_v4i32_commute(<4 x i32> %a0, <8 x 
i16> %a1, <8 x i16> %a2) {
-; CHECK-LABEL: test_pmaddwd_v8i16_add_v4i32_commute:
-; CHECK:   # %bb.0:
-; CHECK-NEXT:vpdpwssd %xmm2, %xmm1, %xmm0
-; CHECK-NEXT:retq
-  %1 = call <4 x i32> @llvm.x86.sse2.pmadd.wd(<8 x i16> %a1, <8 x i16> %a2)
-  %2 = add <4 x i32> %a0, %1
-  ret <4 x i32> %2
-}
-
-define <4 x i32> @test_pmaddwd_v8i16_add_v4i32_load1(<4 x i32> %a0, <8 x i16>* 
%p1, <8 x i16> %a2) {
-; CHECK-LABEL: test_pmaddwd_v8i16_add_v4i32_load1:
-; CHECK:   # %bb.0:
-; CHECK-NEXT:vpdpwssd (%rdi), %xmm1, %xmm0
-; CHECK-NEXT:retq
-  %a1 = load <8 x i16>, <8 x i16>* %p1
-  %1 = call <4 x i32> @llvm.x86.sse2.pmadd.wd(<8 x i16> %a1, <8 x i16> %a2)
-  %2 = add <4 x i32> %1, %a0
-  ret <4 x i32> %2
-}
-
-define <4 x i32> @test_pmaddwd_v8i16_add_v4i32_load2(<4 x i32> %a0, <8 x i16> 
%a1, <8 x i16>* %p2) {
-; CHECK-LABEL: test_pmaddwd_v8i16_add_v4i32_load2:
-; CHECK:   # %bb.0:
-; CHECK-NEXT:vpdpwssd (%rdi), %xmm1, %xmm0
-; CHECK-NEXT:retq
-  %a2 = lo

[llvm-branch-commits] [llvm] 0261ce9 - [X86] Add ExeDomain = SSEPackedSingle to cvtss2sd and cvtsd2ss instrutions.

2020-12-13 Thread Craig Topper via llvm-branch-commits

Author: Craig Topper
Date: 2020-12-13T12:35:33-08:00
New Revision: 0261ce9e17bd4ef17dd558d80b029e12c1677535

URL: 
https://github.com/llvm/llvm-project/commit/0261ce9e17bd4ef17dd558d80b029e12c1677535
DIFF: 
https://github.com/llvm/llvm-project/commit/0261ce9e17bd4ef17dd558d80b029e12c1677535.diff

LOG: [X86] Add ExeDomain = SSEPackedSingle to cvtss2sd and cvtsd2ss instrutions.

Prep for D92993

Added: 


Modified: 
llvm/lib/Target/X86/X86InstrAVX512.td
llvm/lib/Target/X86/X86InstrSSE.td

Removed: 




diff  --git a/llvm/lib/Target/X86/X86InstrAVX512.td 
b/llvm/lib/Target/X86/X86InstrAVX512.td
index d250fd9ad44c..afe2176548fa 100644
--- a/llvm/lib/Target/X86/X86InstrAVX512.td
+++ b/llvm/lib/Target/X86/X86InstrAVX512.td
@@ -7569,7 +7569,7 @@ multiclass avx512_cvt_fp_scalar_sd2ss opc, string 
OpcodeStr,
   SDNode OpNode, SDNode OpNodeRnd,
   X86FoldableSchedWrite sched,
   X86VectorVTInfo _src, X86VectorVTInfo 
_dst> {
-  let Predicates = [HasAVX512] in {
+  let Predicates = [HasAVX512], ExeDomain = SSEPackedSingle in {
 defm Z : avx512_cvt_fp_scalar,
  avx512_cvt_fp_rc_scalar, VEX_W, EVEX_CD8<64, CD8VT1>, 
XD;
@@ -7580,7 +7580,7 @@ multiclass avx512_cvt_fp_scalar_ss2sd opc, string 
OpcodeStr,
   SDNode OpNode, SDNode OpNodeSAE,
   X86FoldableSchedWrite sched,
   X86VectorVTInfo _src, X86VectorVTInfo 
_dst> {
-  let Predicates = [HasAVX512] in {
+  let Predicates = [HasAVX512], ExeDomain = SSEPackedSingle in {
 defm Z : avx512_cvt_fp_scalar,
  avx512_cvt_fp_sae_scalar,
  EVEX_CD8<32, CD8VT1>, XS;

diff  --git a/llvm/lib/Target/X86/X86InstrSSE.td 
b/llvm/lib/Target/X86/X86InstrSSE.td
index 896a2fa58f72..0fac3b6f1761 100644
--- a/llvm/lib/Target/X86/X86InstrSSE.td
+++ b/llvm/lib/Target/X86/X86InstrSSE.td
@@ -1242,7 +1242,8 @@ def : InstAlias<"cvtsd2si{q}\t{$src, $dst|$dst, $src}",
 /// SSE 2 Only
 
 // Convert scalar double to scalar single
-let isCodeGenOnly = 1, hasSideEffects = 0, Predicates = [UseAVX] in {
+let isCodeGenOnly = 1, hasSideEffects = 0, Predicates = [UseAVX],
+ExeDomain = SSEPackedSingle in {
 def VCVTSD2SSrr  : VSDI<0x5A, MRMSrcReg, (outs FR32:$dst),
 (ins FR32:$src1, FR64:$src2),
 "cvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}", 
[]>,
@@ -1260,7 +1261,7 @@ def : Pat<(f32 (any_fpround FR64:$src)),
 (VCVTSD2SSrr (f32 (IMPLICIT_DEF)), FR64:$src)>,
   Requires<[UseAVX]>;
 
-let isCodeGenOnly = 1 in {
+let isCodeGenOnly = 1, ExeDomain = SSEPackedSingle in {
 def CVTSD2SSrr  : SDI<0x5A, MRMSrcReg, (outs FR32:$dst), (ins FR64:$src),
   "cvtsd2ss\t{$src, $dst|$dst, $src}",
   [(set FR32:$dst, (any_fpround FR64:$src))]>,
@@ -1272,7 +1273,7 @@ def CVTSD2SSrm  : I<0x5A, MRMSrcMem, (outs FR32:$dst), 
(ins f64mem:$src),
 Sched<[WriteCvtSD2SS.Folded]>, SIMD_EXC;
 }
 
-let Uses = [MXCSR], mayRaiseFPException = 1 in {
+let Uses = [MXCSR], mayRaiseFPException = 1, ExeDomain = SSEPackedSingle in {
 def VCVTSD2SSrr_Int: I<0x5A, MRMSrcReg,
(outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
"vcvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}",
@@ -1306,7 +1307,7 @@ def CVTSD2SSrm_Int: I<0x5A, MRMSrcMem,
 
 // Convert scalar single to scalar double
 // SSE2 instructions with XS prefix
-let isCodeGenOnly = 1, hasSideEffects = 0 in {
+let isCodeGenOnly = 1, hasSideEffects = 0, ExeDomain = SSEPackedSingle in {
 def VCVTSS2SDrr : I<0x5A, MRMSrcReg, (outs FR64:$dst),
 (ins FR64:$src1, FR32:$src2),
 "vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
@@ -1326,7 +1327,7 @@ def : Pat<(f64 (any_fpextend FR32:$src)),
 def : Pat<(any_fpextend (loadf32 addr:$src)),
 (VCVTSS2SDrm (f64 (IMPLICIT_DEF)), addr:$src)>, Requires<[UseAVX, 
OptForSize]>;
 
-let isCodeGenOnly = 1 in {
+let isCodeGenOnly = 1, ExeDomain = SSEPackedSingle in {
 def CVTSS2SDrr : I<0x5A, MRMSrcReg, (outs FR64:$dst), (ins FR32:$src),
"cvtss2sd\t{$src, $dst|$dst, $src}",
[(set FR64:$dst, (any_fpextend FR32:$src))]>,
@@ -1338,7 +1339,8 @@ def CVTSS2SDrm : I<0x5A, MRMSrcMem, (outs FR64:$dst), 
(ins f32mem:$src),
Sched<[WriteCvtSS2SD.Folded]>, SIMD_EXC;
 } // isCodeGenOnly = 1
 
-let hasSideEffects = 0, Uses = [MXCSR], mayRaiseFPException = 1 in {
+let hasSideEffects = 0, Uses = [MXCSR], mayRaiseFPException = 1,
+ExeDomain = SSEPackedSingle in {
 def VCVTSS2SDrr_Int: I<0x5A, MRMSrcReg,
   (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
 "vcvtss2sd\t{$src2, $src1, $dst|$dst, $sr

[llvm-branch-commits] [clang] 5ad202c - [NFC][AMDGPU] Reformat AMD GPU targets in cuda.cpp

2020-12-13 Thread via llvm-branch-commits

Author: Tony
Date: 2020-12-13T23:02:59Z
New Revision: 5ad202ce8963157242785a8345024a80c721ece2

URL: 
https://github.com/llvm/llvm-project/commit/5ad202ce8963157242785a8345024a80c721ece2
DIFF: 
https://github.com/llvm/llvm-project/commit/5ad202ce8963157242785a8345024a80c721ece2.diff

LOG: [NFC][AMDGPU] Reformat AMD GPU targets in cuda.cpp

Differential Revision: https://reviews.llvm.org/D93181

Added: 


Modified: 
clang/lib/Basic/Cuda.cpp

Removed: 




diff  --git a/clang/lib/Basic/Cuda.cpp b/clang/lib/Basic/Cuda.cpp
index d9f79a1a10d5..144113f2d2e7 100644
--- a/clang/lib/Basic/Cuda.cpp
+++ b/clang/lib/Basic/Cuda.cpp
@@ -72,23 +72,34 @@ CudaArchToStringMap arch_names[] = {
 SM(70), SM(72),  // Volta
 SM(75),  // Turing
 SM(80),  // Ampere
-GFX(600), // tahiti
-GFX(601), // pitcairn, verde
-GFX(602), // oland, hainan
-GFX(700), // kaveri
-GFX(701), // hawaii
-GFX(702), // 290,290x,R390,R390x
-GFX(703), // kabini mullins
-GFX(704), // bonaire
-GFX(705),
-GFX(801), // carrizo
-GFX(802), // tonga,iceland
-GFX(803), // fiji,polaris10
-GFX(805), // tongapro
-GFX(810), // stoney
-GFX(900), // vega, instinct
-GFX(902), GFX(904), GFX(906), GFX(908), GFX(909), GFX(90c),
-GFX(1010), GFX(1011), GFX(1012), GFX(1030), GFX(1031), GFX(1032), GFX(1033)
+GFX(600),  // gfx600
+GFX(601),  // gfx601
+GFX(602),  // gfx602
+GFX(700),  // gfx700
+GFX(701),  // gfx701
+GFX(702),  // gfx702
+GFX(703),  // gfx703
+GFX(704),  // gfx704
+GFX(705),  // gfx705
+GFX(801),  // gfx801
+GFX(802),  // gfx802
+GFX(803),  // gfx803
+GFX(805),  // gfx805
+GFX(810),  // gfx810
+GFX(900),  // gfx900
+GFX(902),  // gfx902
+GFX(904),  // gfx903
+GFX(906),  // gfx906
+GFX(908),  // gfx908
+GFX(909),  // gfx909
+GFX(90c),  // gfx90c
+GFX(1010), // gfx1010
+GFX(1011), // gfx1011
+GFX(1012), // gfx1012
+GFX(1030), // gfx1030
+GFX(1031), // gfx1031
+GFX(1032), // gfx1032
+GFX(1033), // gfx1033
 // clang-format on
 };
 #undef SM



___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [clang-tools-extra] fef242c - [clangd] Fix locateMacroAt() for macro definition outside preamble

2020-12-13 Thread Nathan Ridge via llvm-branch-commits

Author: Nathan Ridge
Date: 2020-12-13T18:33:33-05:00
New Revision: fef242c32e833b84e8b46bd56a28c01c5f9aa65d

URL: 
https://github.com/llvm/llvm-project/commit/fef242c32e833b84e8b46bd56a28c01c5f9aa65d
DIFF: 
https://github.com/llvm/llvm-project/commit/fef242c32e833b84e8b46bd56a28c01c5f9aa65d.diff

LOG: [clangd] Fix locateMacroAt() for macro definition outside preamble

Fixes https://github.com/clangd/clangd/issues/577

Differential Revision: https://reviews.llvm.org/D91025

Added: 


Modified: 
clang-tools-extra/clangd/SourceCode.cpp
clang-tools-extra/clangd/unittests/XRefsTests.cpp

Removed: 




diff  --git a/clang-tools-extra/clangd/SourceCode.cpp 
b/clang-tools-extra/clangd/SourceCode.cpp
index 54248386d7b4..c0ccf2152750 100644
--- a/clang-tools-extra/clangd/SourceCode.cpp
+++ b/clang-tools-extra/clangd/SourceCode.cpp
@@ -975,17 +975,30 @@ llvm::Optional locateMacroAt(const 
syntax::Token &SpelledTok,
   if (!IdentifierInfo || !IdentifierInfo->hadMacroDefinition())
 return None;
 
-  // Get the definition just before the searched location so that a macro
-  // referenced in a '#undef MACRO' can still be found. Note that we only do
-  // that if Loc is not pointing at start of file.
-  if (SM.getLocForStartOfFile(SM.getFileID(Loc)) != Loc)
-Loc = Loc.getLocWithOffset(-1);
-  MacroDefinition MacroDef = PP.getMacroDefinitionAtLoc(IdentifierInfo, Loc);
-  if (auto *MI = MacroDef.getMacroInfo())
-return DefinedMacro{
-IdentifierInfo->getName(), MI,
-translatePreamblePatchLocation(MI->getDefinitionLoc(), SM)};
-  return None;
+  // We need to take special case to handle #define and #undef.
+  // Preprocessor::getMacroDefinitionAtLoc() only considers a macro
+  // definition to be in scope *after* the location of the macro name in a
+  // #define that introduces it, and *before* the location of the macro name
+  // in an #undef that undefines it. To handle these cases, we check for
+  // the macro being in scope either just after or just before the location
+  // of the token. In getting the location before, we also take care to check
+  // for start-of-file.
+  FileID FID = SM.getFileID(Loc);
+  assert(Loc != SM.getLocForEndOfFile(FID));
+  SourceLocation JustAfterToken = Loc.getLocWithOffset(1);
+  auto *MacroInfo =
+  PP.getMacroDefinitionAtLoc(IdentifierInfo, 
JustAfterToken).getMacroInfo();
+  if (!MacroInfo && SM.getLocForStartOfFile(FID) != Loc) {
+SourceLocation JustBeforeToken = Loc.getLocWithOffset(-1);
+MacroInfo = PP.getMacroDefinitionAtLoc(IdentifierInfo, JustBeforeToken)
+.getMacroInfo();
+  }
+  if (!MacroInfo) {
+return None;
+  }
+  return DefinedMacro{
+  IdentifierInfo->getName(), MacroInfo,
+  translatePreamblePatchLocation(MacroInfo->getDefinitionLoc(), SM)};
 }
 
 llvm::Expected Edit::apply() const {

diff  --git a/clang-tools-extra/clangd/unittests/XRefsTests.cpp 
b/clang-tools-extra/clangd/unittests/XRefsTests.cpp
index c3c87bd628bd..5b0ceb1cc200 100644
--- a/clang-tools-extra/clangd/unittests/XRefsTests.cpp
+++ b/clang-tools-extra/clangd/unittests/XRefsTests.cpp
@@ -1624,6 +1624,14 @@ TEST(FindReferences, WithinAST) {
 }
   )cpp",
 
+  R"cpp(// Macro outside preamble
+int breakPreamble;
+#define [[MA^CRO]](X) (X+1)
+void test() {
+  int x = [[MACRO]]([[MACRO]](1));
+}
+  )cpp",
+
   R"cpp(
 int [[v^ar]] = 0;
 void foo(int s = [[var]]);



___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] 0207de0 - [ORC] Prefer preincrement on iterator.

2020-12-13 Thread Lang Hames via llvm-branch-commits

Author: Lang Hames
Date: 2020-12-14T12:00:21+11:00
New Revision: 0207de0bfe77b643adb27605e9f0fdfb8929

URL: 
https://github.com/llvm/llvm-project/commit/0207de0bfe77b643adb27605e9f0fdfb8929
DIFF: 
https://github.com/llvm/llvm-project/commit/0207de0bfe77b643adb27605e9f0fdfb8929.diff

LOG: [ORC] Prefer preincrement on iterator.

Added: 


Modified: 
llvm/lib/ExecutionEngine/Orc/TPCDynamicLibrarySearchGenerator.cpp

Removed: 




diff  --git a/llvm/lib/ExecutionEngine/Orc/TPCDynamicLibrarySearchGenerator.cpp 
b/llvm/lib/ExecutionEngine/Orc/TPCDynamicLibrarySearchGenerator.cpp
index 80d8f34ea447..3442da5810cb 100644
--- a/llvm/lib/ExecutionEngine/Orc/TPCDynamicLibrarySearchGenerator.cpp
+++ b/llvm/lib/ExecutionEngine/Orc/TPCDynamicLibrarySearchGenerator.cpp
@@ -55,7 +55,7 @@ Error TPCDynamicLibrarySearchGenerator::tryToGenerate(
 if (*ResultI)
   NewSymbols[KV.first] =
   JITEvaluatedSymbol(*ResultI, JITSymbolFlags::Exported);
-ResultI++;
+++ResultI;
   }
 
   // If there were no resolved symbols bail out.



___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] 77bb3c1 - [JITLink] Fix include guard end comment.

2020-12-13 Thread Lang Hames via llvm-branch-commits

Author: Lang Hames
Date: 2020-12-14T12:00:21+11:00
New Revision: 77bb3c1ac200079abf4eec57a81a2f6cb14a6eae

URL: 
https://github.com/llvm/llvm-project/commit/77bb3c1ac200079abf4eec57a81a2f6cb14a6eae
DIFF: 
https://github.com/llvm/llvm-project/commit/77bb3c1ac200079abf4eec57a81a2f6cb14a6eae.diff

LOG: [JITLink] Fix include guard end comment.

Added: 


Modified: 
llvm/include/llvm/ExecutionEngine/JITLink/JITLinkMemoryManager.h

Removed: 




diff  --git a/llvm/include/llvm/ExecutionEngine/JITLink/JITLinkMemoryManager.h 
b/llvm/include/llvm/ExecutionEngine/JITLink/JITLinkMemoryManager.h
index 0c8514a60a50..a3dc6c1a7005 100644
--- a/llvm/include/llvm/ExecutionEngine/JITLink/JITLinkMemoryManager.h
+++ b/llvm/include/llvm/ExecutionEngine/JITLink/JITLinkMemoryManager.h
@@ -107,4 +107,4 @@ class InProcessMemoryManager : public JITLinkMemoryManager {
 } // end namespace jitlink
 } // end namespace llvm
 
-#endif // LLVM_EXECUTIONENGINE_JITLINK_JITLINK_H
+#endif // LLVM_EXECUTIONENGINE_JITLINK_JITLINKMEMORYMANAGER_H



___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] 8904ee8 - [JITLink] Add JITLinkDylib type, thread through JITLinkMemoryManager APIs.

2020-12-13 Thread Lang Hames via llvm-branch-commits

Author: Lang Hames
Date: 2020-12-14T12:29:16+11:00
New Revision: 8904ee8ac7ebcc50a60de0914abc6862e28b6664

URL: 
https://github.com/llvm/llvm-project/commit/8904ee8ac7ebcc50a60de0914abc6862e28b6664
DIFF: 
https://github.com/llvm/llvm-project/commit/8904ee8ac7ebcc50a60de0914abc6862e28b6664.diff

LOG: [JITLink] Add JITLinkDylib type, thread through JITLinkMemoryManager APIs.

JITLinkDylib represents a target dylib for a JITLink link. By representing this
explicitly we can:
  - Enable JITLinkMemoryManagers to manage allocations on a per-dylib basis
(e.g by maintaining a seperate allocation pool for each JITLinkDylib).
  - Enable new features and diagnostics that require information about the
target dylib (not implemented in this patch).

Added: 


Modified: 
llvm/include/llvm/ExecutionEngine/JITLink/JITLink.h
llvm/include/llvm/ExecutionEngine/JITLink/JITLinkMemoryManager.h
llvm/include/llvm/ExecutionEngine/Orc/Core.h
llvm/include/llvm/ExecutionEngine/Orc/OrcRPCTargetProcessControl.h
llvm/include/llvm/ExecutionEngine/Orc/OrcRemoteTargetClient.h
llvm/lib/ExecutionEngine/JITLink/JITLinkGeneric.cpp
llvm/lib/ExecutionEngine/JITLink/JITLinkMemoryManager.cpp
llvm/lib/ExecutionEngine/Orc/ObjectLinkingLayer.cpp
llvm/lib/ExecutionEngine/Orc/TPCIndirectionUtils.cpp
llvm/tools/llvm-jitlink/llvm-jitlink.cpp

Removed: 




diff  --git a/llvm/include/llvm/ExecutionEngine/JITLink/JITLink.h 
b/llvm/include/llvm/ExecutionEngine/JITLink/JITLink.h
index f0b4d9bcd49c..48e64335613b 100644
--- a/llvm/include/llvm/ExecutionEngine/JITLink/JITLink.h
+++ b/llvm/include/llvm/ExecutionEngine/JITLink/JITLink.h
@@ -1270,9 +1270,15 @@ class JITLinkContext {
 public:
   using LookupMap = DenseMap;
 
+  /// Create a JITLinkContext.
+  JITLinkContext(const JITLinkDylib *JD) : JD(JD) {}
+
   /// Destroy a JITLinkContext.
   virtual ~JITLinkContext();
 
+  /// Return the JITLinkDylib that this link is targeting, if any.
+  const JITLinkDylib *getJITLinkDylib() const { return JD; }
+
   /// Return the MemoryManager to be used for this link.
   virtual JITLinkMemoryManager &getMemoryManager() = 0;
 
@@ -1324,6 +1330,9 @@ class JITLinkContext {
   /// Called by JITLink to modify the pass pipeline prior to linking.
   /// The default version performs no modification.
   virtual Error modifyPassConfig(const Triple &TT, PassConfiguration &Config);
+
+private:
+  const JITLinkDylib *JD = nullptr;
 };
 
 /// Marks all symbols in a graph live. This can be used as a default,

diff  --git a/llvm/include/llvm/ExecutionEngine/JITLink/JITLinkMemoryManager.h 
b/llvm/include/llvm/ExecutionEngine/JITLink/JITLinkMemoryManager.h
index a3dc6c1a7005..cee7d6b09c48 100644
--- a/llvm/include/llvm/ExecutionEngine/JITLink/JITLinkMemoryManager.h
+++ b/llvm/include/llvm/ExecutionEngine/JITLink/JITLinkMemoryManager.h
@@ -14,10 +14,11 @@
 #define LLVM_EXECUTIONENGINE_JITLINK_JITLINKMEMORYMANAGER_H
 
 #include "llvm/ADT/DenseMap.h"
+#include "llvm/ExecutionEngine/JITLink/JITLinkDylib.h"
 #include "llvm/ExecutionEngine/JITSymbol.h"
 #include "llvm/Support/Error.h"
-#include "llvm/Support/Memory.h"
 #include "llvm/Support/MSVCErrorWorkarounds.h"
+#include "llvm/Support/Memory.h"
 
 #include 
 #include 
@@ -93,15 +94,25 @@ class JITLinkMemoryManager {
   virtual ~JITLinkMemoryManager();
 
   /// Create an Allocation object.
+  ///
+  /// The JD argument represents the target JITLinkDylib, and can be used by
+  /// JITLinkMemoryManager implementers to manage per-dylib allocation pools
+  /// (e.g. one pre-reserved address space slab per dylib to ensure that all
+  /// allocations for the dylib are within a certain range). The JD argument
+  /// may be null (representing an allocation not associated with any
+  /// JITDylib.
+  ///
+  /// The request argument describes the segment sizes and permisssions being
+  /// requested.
   virtual Expected>
-  allocate(const SegmentsRequestMap &Request) = 0;
+  allocate(const JITLinkDylib *JD, const SegmentsRequestMap &Request) = 0;
 };
 
 /// A JITLinkMemoryManager that allocates in-process memory.
 class InProcessMemoryManager : public JITLinkMemoryManager {
 public:
   Expected>
-  allocate(const SegmentsRequestMap &Request) override;
+  allocate(const JITLinkDylib *JD, const SegmentsRequestMap &Request) override;
 };
 
 } // end namespace jitlink

diff  --git a/llvm/include/llvm/ExecutionEngine/Orc/Core.h 
b/llvm/include/llvm/ExecutionEngine/Orc/Core.h
index 6256872b1094..3020694ee732 100644
--- a/llvm/include/llvm/ExecutionEngine/Orc/Core.h
+++ b/llvm/include/llvm/ExecutionEngine/Orc/Core.h
@@ -17,6 +17,7 @@
 #include "llvm/ADT/DenseSet.h"
 #include "llvm/ADT/FunctionExtras.h"
 #include "llvm/ADT/IntrusiveRefCntPtr.h"
+#include "llvm/ExecutionEngine/JITLink/JITLinkDylib.h"
 #include "llvm/ExecutionEngine/JITSymbol.h"
 #include "llvm/ExecutionEngine/Orc/SymbolStringPool.h"
 #include "llvm/ExecutionEngi

[llvm-branch-commits] [llvm] 5b112bc - Revert "[JITLink] Add JITLinkDylib type, thread through JITLinkMemoryManager APIs."

2020-12-13 Thread Nico Weber via llvm-branch-commits

Author: Nico Weber
Date: 2020-12-13T21:30:38-05:00
New Revision: 5b112bcc0de7b4a815d91eda195177bab800acd3

URL: 
https://github.com/llvm/llvm-project/commit/5b112bcc0de7b4a815d91eda195177bab800acd3
DIFF: 
https://github.com/llvm/llvm-project/commit/5b112bcc0de7b4a815d91eda195177bab800acd3.diff

LOG: Revert "[JITLink] Add JITLinkDylib type, thread through 
JITLinkMemoryManager APIs."

This reverts commit 8904ee8ac7ebcc50a60de0914abc6862e28b6664.
Didn't `git add` llvm/ExecutionEngine/JITLink/JITLinkDylib.h and hence doesn't
build anywhere.

Added: 


Modified: 
llvm/include/llvm/ExecutionEngine/JITLink/JITLink.h
llvm/include/llvm/ExecutionEngine/JITLink/JITLinkMemoryManager.h
llvm/include/llvm/ExecutionEngine/Orc/Core.h
llvm/include/llvm/ExecutionEngine/Orc/OrcRPCTargetProcessControl.h
llvm/include/llvm/ExecutionEngine/Orc/OrcRemoteTargetClient.h
llvm/lib/ExecutionEngine/JITLink/JITLinkGeneric.cpp
llvm/lib/ExecutionEngine/JITLink/JITLinkMemoryManager.cpp
llvm/lib/ExecutionEngine/Orc/ObjectLinkingLayer.cpp
llvm/lib/ExecutionEngine/Orc/TPCIndirectionUtils.cpp
llvm/tools/llvm-jitlink/llvm-jitlink.cpp

Removed: 




diff  --git a/llvm/include/llvm/ExecutionEngine/JITLink/JITLink.h 
b/llvm/include/llvm/ExecutionEngine/JITLink/JITLink.h
index 48e64335613b..f0b4d9bcd49c 100644
--- a/llvm/include/llvm/ExecutionEngine/JITLink/JITLink.h
+++ b/llvm/include/llvm/ExecutionEngine/JITLink/JITLink.h
@@ -1270,15 +1270,9 @@ class JITLinkContext {
 public:
   using LookupMap = DenseMap;
 
-  /// Create a JITLinkContext.
-  JITLinkContext(const JITLinkDylib *JD) : JD(JD) {}
-
   /// Destroy a JITLinkContext.
   virtual ~JITLinkContext();
 
-  /// Return the JITLinkDylib that this link is targeting, if any.
-  const JITLinkDylib *getJITLinkDylib() const { return JD; }
-
   /// Return the MemoryManager to be used for this link.
   virtual JITLinkMemoryManager &getMemoryManager() = 0;
 
@@ -1330,9 +1324,6 @@ class JITLinkContext {
   /// Called by JITLink to modify the pass pipeline prior to linking.
   /// The default version performs no modification.
   virtual Error modifyPassConfig(const Triple &TT, PassConfiguration &Config);
-
-private:
-  const JITLinkDylib *JD = nullptr;
 };
 
 /// Marks all symbols in a graph live. This can be used as a default,

diff  --git a/llvm/include/llvm/ExecutionEngine/JITLink/JITLinkMemoryManager.h 
b/llvm/include/llvm/ExecutionEngine/JITLink/JITLinkMemoryManager.h
index cee7d6b09c48..a3dc6c1a7005 100644
--- a/llvm/include/llvm/ExecutionEngine/JITLink/JITLinkMemoryManager.h
+++ b/llvm/include/llvm/ExecutionEngine/JITLink/JITLinkMemoryManager.h
@@ -14,11 +14,10 @@
 #define LLVM_EXECUTIONENGINE_JITLINK_JITLINKMEMORYMANAGER_H
 
 #include "llvm/ADT/DenseMap.h"
-#include "llvm/ExecutionEngine/JITLink/JITLinkDylib.h"
 #include "llvm/ExecutionEngine/JITSymbol.h"
 #include "llvm/Support/Error.h"
-#include "llvm/Support/MSVCErrorWorkarounds.h"
 #include "llvm/Support/Memory.h"
+#include "llvm/Support/MSVCErrorWorkarounds.h"
 
 #include 
 #include 
@@ -94,25 +93,15 @@ class JITLinkMemoryManager {
   virtual ~JITLinkMemoryManager();
 
   /// Create an Allocation object.
-  ///
-  /// The JD argument represents the target JITLinkDylib, and can be used by
-  /// JITLinkMemoryManager implementers to manage per-dylib allocation pools
-  /// (e.g. one pre-reserved address space slab per dylib to ensure that all
-  /// allocations for the dylib are within a certain range). The JD argument
-  /// may be null (representing an allocation not associated with any
-  /// JITDylib.
-  ///
-  /// The request argument describes the segment sizes and permisssions being
-  /// requested.
   virtual Expected>
-  allocate(const JITLinkDylib *JD, const SegmentsRequestMap &Request) = 0;
+  allocate(const SegmentsRequestMap &Request) = 0;
 };
 
 /// A JITLinkMemoryManager that allocates in-process memory.
 class InProcessMemoryManager : public JITLinkMemoryManager {
 public:
   Expected>
-  allocate(const JITLinkDylib *JD, const SegmentsRequestMap &Request) override;
+  allocate(const SegmentsRequestMap &Request) override;
 };
 
 } // end namespace jitlink

diff  --git a/llvm/include/llvm/ExecutionEngine/Orc/Core.h 
b/llvm/include/llvm/ExecutionEngine/Orc/Core.h
index 3020694ee732..6256872b1094 100644
--- a/llvm/include/llvm/ExecutionEngine/Orc/Core.h
+++ b/llvm/include/llvm/ExecutionEngine/Orc/Core.h
@@ -17,7 +17,6 @@
 #include "llvm/ADT/DenseSet.h"
 #include "llvm/ADT/FunctionExtras.h"
 #include "llvm/ADT/IntrusiveRefCntPtr.h"
-#include "llvm/ExecutionEngine/JITLink/JITLinkDylib.h"
 #include "llvm/ExecutionEngine/JITSymbol.h"
 #include "llvm/ExecutionEngine/Orc/SymbolStringPool.h"
 #include "llvm/ExecutionEngine/OrcV1Deprecation.h"
@@ -888,8 +887,7 @@ class DefinitionGenerator {
 /// their addresses may be used as keys for resource management.
 /// JITDylib state changes must be made via an Execution

[llvm-branch-commits] [llvm] 04795ab - Re-apply 8904ee8ac7e with missing header included this time.

2020-12-13 Thread Lang Hames via llvm-branch-commits

Author: Lang Hames
Date: 2020-12-14T13:39:33+11:00
New Revision: 04795ab8368a9f4169b737e6db2aebea47d6cf10

URL: 
https://github.com/llvm/llvm-project/commit/04795ab8368a9f4169b737e6db2aebea47d6cf10
DIFF: 
https://github.com/llvm/llvm-project/commit/04795ab8368a9f4169b737e6db2aebea47d6cf10.diff

LOG: Re-apply 8904ee8ac7e with missing header included this time.

Added: 
llvm/include/llvm/ExecutionEngine/JITLink/JITLinkDylib.h

Modified: 
llvm/include/llvm/ExecutionEngine/JITLink/JITLink.h
llvm/include/llvm/ExecutionEngine/JITLink/JITLinkMemoryManager.h
llvm/include/llvm/ExecutionEngine/Orc/Core.h
llvm/include/llvm/ExecutionEngine/Orc/OrcRPCTargetProcessControl.h
llvm/include/llvm/ExecutionEngine/Orc/OrcRemoteTargetClient.h
llvm/lib/ExecutionEngine/JITLink/JITLinkGeneric.cpp
llvm/lib/ExecutionEngine/JITLink/JITLinkMemoryManager.cpp
llvm/lib/ExecutionEngine/Orc/ObjectLinkingLayer.cpp
llvm/lib/ExecutionEngine/Orc/TPCIndirectionUtils.cpp
llvm/tools/llvm-jitlink/llvm-jitlink.cpp

Removed: 




diff  --git a/llvm/include/llvm/ExecutionEngine/JITLink/JITLink.h 
b/llvm/include/llvm/ExecutionEngine/JITLink/JITLink.h
index f0b4d9bcd49c..48e64335613b 100644
--- a/llvm/include/llvm/ExecutionEngine/JITLink/JITLink.h
+++ b/llvm/include/llvm/ExecutionEngine/JITLink/JITLink.h
@@ -1270,9 +1270,15 @@ class JITLinkContext {
 public:
   using LookupMap = DenseMap;
 
+  /// Create a JITLinkContext.
+  JITLinkContext(const JITLinkDylib *JD) : JD(JD) {}
+
   /// Destroy a JITLinkContext.
   virtual ~JITLinkContext();
 
+  /// Return the JITLinkDylib that this link is targeting, if any.
+  const JITLinkDylib *getJITLinkDylib() const { return JD; }
+
   /// Return the MemoryManager to be used for this link.
   virtual JITLinkMemoryManager &getMemoryManager() = 0;
 
@@ -1324,6 +1330,9 @@ class JITLinkContext {
   /// Called by JITLink to modify the pass pipeline prior to linking.
   /// The default version performs no modification.
   virtual Error modifyPassConfig(const Triple &TT, PassConfiguration &Config);
+
+private:
+  const JITLinkDylib *JD = nullptr;
 };
 
 /// Marks all symbols in a graph live. This can be used as a default,

diff  --git a/llvm/include/llvm/ExecutionEngine/JITLink/JITLinkDylib.h 
b/llvm/include/llvm/ExecutionEngine/JITLink/JITLinkDylib.h
new file mode 100644
index ..2aa88cb50074
--- /dev/null
+++ b/llvm/include/llvm/ExecutionEngine/JITLink/JITLinkDylib.h
@@ -0,0 +1,24 @@
+//===-- JITLinkDylib.h - JITLink Dylib type -*- C++ 
-*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===--===//
+//
+// Defines the JITLinkDylib API.
+//
+//===--===//
+
+#ifndef LLVM_EXECUTIONENGINE_JITLINK_JITLINKDYLIB_H
+#define LLVM_EXECUTIONENGINE_JITLINK_JITLINKDYLIB_H
+
+namespace llvm {
+namespace jitlink {
+
+class JITLinkDylib {};
+
+} // end namespace jitlink
+} // end namespace llvm
+
+#endif // LLVM_EXECUTIONENGINE_JITLINK_JITLINKDYLIB_H

diff  --git a/llvm/include/llvm/ExecutionEngine/JITLink/JITLinkMemoryManager.h 
b/llvm/include/llvm/ExecutionEngine/JITLink/JITLinkMemoryManager.h
index a3dc6c1a7005..cee7d6b09c48 100644
--- a/llvm/include/llvm/ExecutionEngine/JITLink/JITLinkMemoryManager.h
+++ b/llvm/include/llvm/ExecutionEngine/JITLink/JITLinkMemoryManager.h
@@ -14,10 +14,11 @@
 #define LLVM_EXECUTIONENGINE_JITLINK_JITLINKMEMORYMANAGER_H
 
 #include "llvm/ADT/DenseMap.h"
+#include "llvm/ExecutionEngine/JITLink/JITLinkDylib.h"
 #include "llvm/ExecutionEngine/JITSymbol.h"
 #include "llvm/Support/Error.h"
-#include "llvm/Support/Memory.h"
 #include "llvm/Support/MSVCErrorWorkarounds.h"
+#include "llvm/Support/Memory.h"
 
 #include 
 #include 
@@ -93,15 +94,25 @@ class JITLinkMemoryManager {
   virtual ~JITLinkMemoryManager();
 
   /// Create an Allocation object.
+  ///
+  /// The JD argument represents the target JITLinkDylib, and can be used by
+  /// JITLinkMemoryManager implementers to manage per-dylib allocation pools
+  /// (e.g. one pre-reserved address space slab per dylib to ensure that all
+  /// allocations for the dylib are within a certain range). The JD argument
+  /// may be null (representing an allocation not associated with any
+  /// JITDylib.
+  ///
+  /// The request argument describes the segment sizes and permisssions being
+  /// requested.
   virtual Expected>
-  allocate(const SegmentsRequestMap &Request) = 0;
+  allocate(const JITLinkDylib *JD, const SegmentsRequestMap &Request) = 0;
 };
 
 /// A JITLinkMemoryManager that allocates in-process memory.
 class InProcessMemoryManager : public JITLinkMemoryManager {
 public:
   Expected>
-  

[llvm-branch-commits] [llvm] 655011c - [opt][NPM] Pin -lower-amx-type to legacy PM

2020-12-13 Thread Arthur Eubanks via llvm-branch-commits

Author: Arthur Eubanks
Date: 2020-12-13T19:16:20-08:00
New Revision: 655011c7134878a24ed690339cfae9dc3d6c12f9

URL: 
https://github.com/llvm/llvm-project/commit/655011c7134878a24ed690339cfae9dc3d6c12f9
DIFF: 
https://github.com/llvm/llvm-project/commit/655011c7134878a24ed690339cfae9dc3d6c12f9.diff

LOG: [opt][NPM] Pin -lower-amx-type to legacy PM

This is part of the codegen pipeline.

Added: 


Modified: 
llvm/tools/opt/opt.cpp

Removed: 




diff  --git a/llvm/tools/opt/opt.cpp b/llvm/tools/opt/opt.cpp
index ad9dc2d0774a..d7a39c911811 100644
--- a/llvm/tools/opt/opt.cpp
+++ b/llvm/tools/opt/opt.cpp
@@ -486,7 +486,8 @@ static bool shouldPinPassToLegacyPM(StringRef Pass) {
   "indirectbr-expand",
   "generic-to-nvvm",
   "expandmemcmp",
-  "loop-reduce"};
+  "loop-reduce",
+  "lower-amx-type"};
   for (const auto &P : PassNamePrefix)
 if (Pass.startswith(P))
   return true;



___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] 913515e - [Target] Use llvm::is_contained (NFC)

2020-12-13 Thread Kazu Hirata via llvm-branch-commits

Author: Kazu Hirata
Date: 2020-12-13T19:35:10-08:00
New Revision: 913515e4652c35221420db1575254d3e935ef835

URL: 
https://github.com/llvm/llvm-project/commit/913515e4652c35221420db1575254d3e935ef835
DIFF: 
https://github.com/llvm/llvm-project/commit/913515e4652c35221420db1575254d3e935ef835.diff

LOG: [Target] Use llvm::is_contained (NFC)

Added: 


Modified: 
llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp
llvm/lib/Target/Hexagon/HexagonSubtarget.cpp
llvm/lib/Target/PowerPC/PPCMIPeephole.cpp
llvm/lib/Target/X86/X86ISelLowering.cpp

Removed: 




diff  --git a/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp 
b/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp
index f39c8e889043..f228e0a23794 100644
--- a/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp
@@ -283,7 +283,7 @@ HexagonTargetLowering::getPreferredHvxVectorAction(MVT 
VecTy) const {
   // widen the vector. Note: the threshold was not selected in
   // any scientific way.
   ArrayRef Tys = Subtarget.getHVXElementTypes();
-  if (llvm::find(Tys, ElemTy) != Tys.end()) {
+  if (llvm::is_contained(Tys, ElemTy)) {
 unsigned VecWidth = VecTy.getSizeInBits();
 bool HaveThreshold = HvxWidenThreshold.getNumOccurrences() > 0;
 if (HaveThreshold && 8*HvxWidenThreshold <= VecWidth)

diff  --git a/llvm/lib/Target/Hexagon/HexagonSubtarget.cpp 
b/llvm/lib/Target/Hexagon/HexagonSubtarget.cpp
index b79e7c213a37..fed1abb9549b 100644
--- a/llvm/lib/Target/Hexagon/HexagonSubtarget.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonSubtarget.cpp
@@ -133,7 +133,7 @@ bool HexagonSubtarget::isHVXElementType(MVT Ty, bool 
IncludeBool) const {
   if (IncludeBool && Ty == MVT::i1)
 return true;
   ArrayRef ElemTypes = getHVXElementTypes();
-  return llvm::find(ElemTypes, Ty) != ElemTypes.end();
+  return llvm::is_contained(ElemTypes, Ty);
 }
 
 bool HexagonSubtarget::isHVXVectorType(MVT VecTy, bool IncludeBool) const {
@@ -159,7 +159,7 @@ bool HexagonSubtarget::isHVXVectorType(MVT VecTy, bool 
IncludeBool) const {
   unsigned VecWidth = VecTy.getSizeInBits();
   if (VecWidth != 8 * HwLen && VecWidth != 16 * HwLen)
 return false;
-  return llvm::find(ElemTypes, ElemTy) != ElemTypes.end();
+  return llvm::is_contained(ElemTypes, ElemTy);
 }
 
 bool HexagonSubtarget::isTypeForHVX(Type *VecTy, bool IncludeBool) const {

diff  --git a/llvm/lib/Target/PowerPC/PPCMIPeephole.cpp 
b/llvm/lib/Target/PowerPC/PPCMIPeephole.cpp
index 633f216388d0..307fffae870c 100644
--- a/llvm/lib/Target/PowerPC/PPCMIPeephole.cpp
+++ b/llvm/lib/Target/PowerPC/PPCMIPeephole.cpp
@@ -302,7 +302,7 @@ static bool collectUnprimedAccPHIs(MachineRegisterInfo *MRI,
   // code.
   if (Opcode != PPC::PHI)
 continue;
-  if (std::find(PHIs.begin(), PHIs.end(), Instr) != PHIs.end())
+  if (llvm::is_contained(PHIs, Instr))
 return false;
   PHIs.push_back(Instr);
 }

diff  --git a/llvm/lib/Target/X86/X86ISelLowering.cpp 
b/llvm/lib/Target/X86/X86ISelLowering.cpp
index b4a397080284..dade38c0538a 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -18280,7 +18280,7 @@ static SDValue lowerVECTOR_SHUFFLE(SDValue Op, const 
X86Subtarget &Subtarget,
 // Modify the new Mask to take all zeros from the all-zero vector.
 // Choose indices that are blend-friendly.
 bool UsedZeroVector = false;
-assert(find(WidenedMask, SM_SentinelZero) != WidenedMask.end() &&
+assert(is_contained(WidenedMask, SM_SentinelZero) &&
"V2's non-undef elements are used?!");
 for (int i = 0; i != NewNumElts; ++i)
   if (WidenedMask[i] == SM_SentinelZero) {



___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] ee5b5b7 - [CodeGen] Use llvm::erase_value (NFC)

2020-12-13 Thread Kazu Hirata via llvm-branch-commits

Author: Kazu Hirata
Date: 2020-12-13T20:05:48-08:00
New Revision: ee5b5b7a35d01c8a8a08c93b0dce8b18bb8b4b39

URL: 
https://github.com/llvm/llvm-project/commit/ee5b5b7a35d01c8a8a08c93b0dce8b18bb8b4b39
DIFF: 
https://github.com/llvm/llvm-project/commit/ee5b5b7a35d01c8a8a08c93b0dce8b18bb8b4b39.diff

LOG: [CodeGen] Use llvm::erase_value (NFC)

Added: 


Modified: 
llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
llvm/lib/CodeGen/MachineBlockPlacement.cpp
llvm/lib/CodeGen/MachineRegisterInfo.cpp
llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
llvm/lib/CodeGen/TwoAddressInstructionPass.cpp

Removed: 




diff  --git a/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp 
b/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
index 4ab0c60399f2..2be446531faa 100644
--- a/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
@@ -573,7 +573,7 @@ static const DIExpression *combineDIExpressions(const 
DIExpression *Original,
   std::vector Elts = Addition->getElements().vec();
   // Avoid multiple DW_OP_stack_values.
   if (Original->isImplicit() && Addition->isImplicit())
-erase_if(Elts, [](uint64_t Op) { return Op == dwarf::DW_OP_stack_value; });
+erase_value(Elts, dwarf::DW_OP_stack_value);
   const DIExpression *CombinedExpr =
   (Elts.size() > 0) ? DIExpression::append(Original, Elts) : Original;
   return CombinedExpr;

diff  --git a/llvm/lib/CodeGen/MachineBlockPlacement.cpp 
b/llvm/lib/CodeGen/MachineBlockPlacement.cpp
index 61e803fbfc40..bd4640822a63 100644
--- a/llvm/lib/CodeGen/MachineBlockPlacement.cpp
+++ b/llvm/lib/CodeGen/MachineBlockPlacement.cpp
@@ -3030,12 +3030,7 @@ bool MachineBlockPlacement::maybeTailDuplicateBlock(
   SmallVectorImpl &RemoveList = BlockWorkList;
   if (RemBB->isEHPad())
 RemoveList = EHPadWorkList;
-  RemoveList.erase(
-  llvm::remove_if(RemoveList,
-  [RemBB](MachineBasicBlock *BB) {
-return BB == RemBB;
-  }),
-  RemoveList.end());
+  llvm::erase_value(RemoveList, RemBB);
 }
 
 // Handle the filter set

diff  --git a/llvm/lib/CodeGen/MachineRegisterInfo.cpp 
b/llvm/lib/CodeGen/MachineRegisterInfo.cpp
index fe04ba5009bb..f5793c63acbf 100644
--- a/llvm/lib/CodeGen/MachineRegisterInfo.cpp
+++ b/llvm/lib/CodeGen/MachineRegisterInfo.cpp
@@ -630,8 +630,7 @@ void 
MachineRegisterInfo::disableCalleeSavedRegister(MCRegister Reg) {
 
   // Remove the register (and its aliases from the list).
   for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI)
-UpdatedCSRs.erase(std::remove(UpdatedCSRs.begin(), UpdatedCSRs.end(), *AI),
-  UpdatedCSRs.end());
+llvm::erase_value(UpdatedCSRs, *AI);
 }
 
 const MCPhysReg *MachineRegisterInfo::getCalleeSavedRegs() const {

diff  --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp 
b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
index 43dbc3df16df..0d0da6a401ba 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
@@ -3510,7 +3510,7 @@ void SelectionDAGISel::SelectCodeCommon(SDNode 
*NodeToMatch,
   auto &Chain = ChainNodesMatched;
   assert((!E || !is_contained(Chain, N)) &&
  "Chain node replaced during MorphNode");
-  Chain.erase(std::remove(Chain.begin(), Chain.end(), N), Chain.end());
+  llvm::erase_value(Chain, N);
 });
 Res = cast(MorphNode(NodeToMatch, TargetOpc, VTList,
 Ops, EmitNodeInfo));

diff  --git a/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp 
b/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp
index 8025851fa0f3..ecee4aed7f88 100644
--- a/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp
+++ b/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp
@@ -1015,7 +1015,7 @@ bool TwoAddressInstructionPass::rescheduleKillAboveMI(
   if (MOReg.isPhysical() && regOverlapsSet(LiveDefs, MOReg, TRI))
 return false;
   // Physical register def is seen.
-  Defs.erase(std::remove(Defs.begin(), Defs.end(), MOReg), Defs.end());
+  llvm::erase_value(Defs, MOReg);
 }
   }
 



___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] 4830d45 - [MachineCombiner][NFC] Add MustReduceRegisterPressure goal

2020-12-13 Thread Chen Zheng via llvm-branch-commits

Author: Chen Zheng
Date: 2020-12-14T00:02:42-05:00
New Revision: 4830d458dd0d133354cbe1a616e38dfda8e096b8

URL: 
https://github.com/llvm/llvm-project/commit/4830d458dd0d133354cbe1a616e38dfda8e096b8
DIFF: 
https://github.com/llvm/llvm-project/commit/4830d458dd0d133354cbe1a616e38dfda8e096b8.diff

LOG: [MachineCombiner][NFC] Add MustReduceRegisterPressure goal

add a new goal MustReduceRegisterPressure for machine combiner pass.

PowerPC will use this new goal to do some register pressure related 
optimization.

Reviewed By: spatel

Differential Revision: https://reviews.llvm.org/D92068

Added: 


Modified: 
llvm/include/llvm/CodeGen/TargetInstrInfo.h
llvm/lib/CodeGen/MachineCombiner.cpp
llvm/lib/CodeGen/TargetInstrInfo.cpp
llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
llvm/lib/Target/AArch64/AArch64InstrInfo.h
llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
llvm/lib/Target/PowerPC/PPCInstrInfo.h

Removed: 




diff  --git a/llvm/include/llvm/CodeGen/TargetInstrInfo.h 
b/llvm/include/llvm/CodeGen/TargetInstrInfo.h
index d7a0e47d3bb5..1cf205f9f5a3 100644
--- a/llvm/include/llvm/CodeGen/TargetInstrInfo.h
+++ b/llvm/include/llvm/CodeGen/TargetInstrInfo.h
@@ -25,6 +25,7 @@
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineOperand.h"
 #include "llvm/CodeGen/MachineOutliner.h"
+#include "llvm/CodeGen/RegisterClassInfo.h"
 #include "llvm/CodeGen/VirtRegMap.h"
 #include "llvm/MC/MCInstrInfo.h"
 #include "llvm/Support/BranchProbability.h"
@@ -1076,9 +1077,23 @@ class TargetInstrInfo : public MCInstrInfo {
   /// faster sequence.
   /// \param Root - Instruction that could be combined with one of its operands
   /// \param Patterns - Vector of possible combination patterns
-  virtual bool getMachineCombinerPatterns(
-  MachineInstr &Root,
-  SmallVectorImpl &Patterns) const;
+  virtual bool
+  getMachineCombinerPatterns(MachineInstr &Root,
+ SmallVectorImpl &Patterns,
+ bool DoRegPressureReduce) const;
+
+  /// Return true if target supports reassociation of instructions in machine
+  /// combiner pass to reduce register pressure for a given BB.
+  virtual bool
+  shouldReduceRegisterPressure(MachineBasicBlock *MBB,
+   RegisterClassInfo *RegClassInfo) const {
+return false;
+  }
+
+  /// Fix up the placeholder we may add in genAlternativeCodeSequence().
+  virtual void
+  finalizeInsInstrs(MachineInstr &Root, MachineCombinerPattern &P,
+SmallVectorImpl &InsInstrs) const {}
 
   /// Return true when a code sequence can improve throughput. It
   /// should be called only for instructions in loops.

diff  --git a/llvm/lib/CodeGen/MachineCombiner.cpp 
b/llvm/lib/CodeGen/MachineCombiner.cpp
index f241435a0482..878912a6032b 100644
--- a/llvm/lib/CodeGen/MachineCombiner.cpp
+++ b/llvm/lib/CodeGen/MachineCombiner.cpp
@@ -22,6 +22,7 @@
 #include "llvm/CodeGen/MachineSizeOpts.h"
 #include "llvm/CodeGen/MachineTraceMetrics.h"
 #include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/RegisterClassInfo.h"
 #include "llvm/CodeGen/TargetInstrInfo.h"
 #include "llvm/CodeGen/TargetRegisterInfo.h"
 #include "llvm/CodeGen/TargetSchedule.h"
@@ -72,6 +73,7 @@ class MachineCombiner : public MachineFunctionPass {
   MachineTraceMetrics::Ensemble *MinInstr;
   MachineBlockFrequencyInfo *MBFI;
   ProfileSummaryInfo *PSI;
+  RegisterClassInfo RegClassInfo;
 
   TargetSchedModel TSchedModel;
 
@@ -103,6 +105,10 @@ class MachineCombiner : public MachineFunctionPass {
   SmallVectorImpl &DelInstrs,
   DenseMap &InstrIdxForVirtReg,
   MachineCombinerPattern Pattern, bool 
SlackIsAccurate);
+  bool reduceRegisterPressure(MachineInstr &Root, MachineBasicBlock *MBB,
+  SmallVectorImpl &InsInstrs,
+  SmallVectorImpl &DelInstrs,
+  MachineCombinerPattern Pattern);
   bool preservesResourceLen(MachineBasicBlock *MBB,
 MachineTraceMetrics::Trace BlockTrace,
 SmallVectorImpl &InsInstrs,
@@ -257,8 +263,9 @@ unsigned MachineCombiner::getLatency(MachineInstr *Root, 
MachineInstr *NewRoot,
 /// The combiner's goal may 
diff er based on which pattern it is attempting
 /// to optimize.
 enum class CombinerObjective {
-  MustReduceDepth, // The data dependency chain must be improved.
-  Default  // The critical path must not be lengthened.
+  MustReduceDepth,// The data dependency chain must be improved.
+  MustReduceRegisterPressure, // The register pressure must be reduced.
+  Default // The critical path must not be lengthened.
 };
 
 static CombinerObjective getCombinerObjective(MachineCombinerPattern P) {
@@ -300,6 +307,18 @@ std::pair 
MachineCombiner::getLaten

[llvm-branch-commits] [llvm] b8c847e - [SLP][Test] Precommit test for D93192

2020-12-13 Thread Anton Afanasyev via llvm-branch-commits

Author: Anton Afanasyev
Date: 2020-12-14T09:23:47+03:00
New Revision: b8c847ee731b319c1790ab4410f14933aa59efd5

URL: 
https://github.com/llvm/llvm-project/commit/b8c847ee731b319c1790ab4410f14933aa59efd5
DIFF: 
https://github.com/llvm/llvm-project/commit/b8c847ee731b319c1790ab4410f14933aa59efd5.diff

LOG: [SLP][Test] Precommit test for D93192

This test shows failure of combined stores chains vectorization

Added: 
llvm/test/Transforms/SLPVectorizer/X86/combined-stores-chains.ll

Modified: 


Removed: 




diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/combined-stores-chains.ll 
b/llvm/test/Transforms/SLPVectorizer/X86/combined-stores-chains.ll
new file mode 100644
index ..63e3178c0278
--- /dev/null
+++ b/llvm/test/Transforms/SLPVectorizer/X86/combined-stores-chains.ll
@@ -0,0 +1,107 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -slp-vectorizer -S -mtriple=x86_64-- -mcpu=corei7 | FileCheck 
%s
+
+define void @foo(i8* %v0, i8* readonly %v1) {
+; CHECK-LABEL: @foo(
+; CHECK-NEXT:[[T0:%.*]] = bitcast i8* [[V0:%.*]] to i32*
+; CHECK-NEXT:[[T1:%.*]] = bitcast i8* [[V1:%.*]] to i32*
+; CHECK-NEXT:[[T02:%.*]] = bitcast i8* [[V0]] to i64*
+; CHECK-NEXT:[[T12:%.*]] = bitcast i8* [[V1]] to i64*
+; CHECK-NEXT:[[T14:%.*]] = getelementptr inbounds i32, i32* [[T1]], i64 4
+; CHECK-NEXT:[[T18:%.*]] = getelementptr inbounds i32, i32* [[T1]], i64 5
+; CHECK-NEXT:[[T22:%.*]] = getelementptr inbounds i32, i32* [[T1]], i64 6
+; CHECK-NEXT:[[T26:%.*]] = getelementptr inbounds i32, i32* [[T1]], i64 7
+; CHECK-NEXT:[[T142:%.*]] = getelementptr inbounds i64, i64* [[T12]], i64 8
+; CHECK-NEXT:[[T182:%.*]] = getelementptr inbounds i64, i64* [[T12]], i64 9
+; CHECK-NEXT:[[T222:%.*]] = getelementptr inbounds i64, i64* [[T12]], i64 
10
+; CHECK-NEXT:[[T262:%.*]] = getelementptr inbounds i64, i64* [[T12]], i64 
11
+; CHECK-NEXT:[[T21:%.*]] = getelementptr inbounds i32, i32* [[T0]], i64 4
+; CHECK-NEXT:[[T25:%.*]] = getelementptr inbounds i32, i32* [[T0]], i64 5
+; CHECK-NEXT:[[T29:%.*]] = getelementptr inbounds i32, i32* [[T0]], i64 6
+; CHECK-NEXT:[[T32:%.*]] = getelementptr inbounds i32, i32* [[T0]], i64 7
+; CHECK-NEXT:[[T212:%.*]] = getelementptr inbounds i64, i64* [[T02]], i64 8
+; CHECK-NEXT:[[T252:%.*]] = getelementptr inbounds i64, i64* [[T02]], i64 9
+; CHECK-NEXT:[[T292:%.*]] = getelementptr inbounds i64, i64* [[T02]], i64 
10
+; CHECK-NEXT:[[T322:%.*]] = getelementptr inbounds i64, i64* [[T02]], i64 
11
+; CHECK-NEXT:[[T19:%.*]] = load i32, i32* [[T14]], align 4
+; CHECK-NEXT:[[T23:%.*]] = load i32, i32* [[T18]], align 4
+; CHECK-NEXT:[[T27:%.*]] = load i32, i32* [[T22]], align 4
+; CHECK-NEXT:[[T30:%.*]] = load i32, i32* [[T26]], align 4
+; CHECK-NEXT:[[TMP1:%.*]] = bitcast i64* [[T142]] to <2 x i64>*
+; CHECK-NEXT:[[TMP2:%.*]] = load <2 x i64>, <2 x i64>* [[TMP1]], align 8
+; CHECK-NEXT:[[TMP3:%.*]] = bitcast i64* [[T222]] to <2 x i64>*
+; CHECK-NEXT:[[TMP4:%.*]] = load <2 x i64>, <2 x i64>* [[TMP3]], align 8
+; CHECK-NEXT:[[T20:%.*]] = add nsw i32 [[T19]], 4
+; CHECK-NEXT:[[T24:%.*]] = add nsw i32 [[T23]], 4
+; CHECK-NEXT:[[T28:%.*]] = add nsw i32 [[T27]], 6
+; CHECK-NEXT:[[T31:%.*]] = add nsw i32 [[T30]], 7
+; CHECK-NEXT:[[TMP5:%.*]] = add nsw <2 x i64> [[TMP2]], 
+; CHECK-NEXT:[[TMP6:%.*]] = add nsw <2 x i64> [[TMP4]], 
+; CHECK-NEXT:[[TMP7:%.*]] = bitcast i64* [[T212]] to <2 x i64>*
+; CHECK-NEXT:store <2 x i64> [[TMP5]], <2 x i64>* [[TMP7]], align 8
+; CHECK-NEXT:[[TMP8:%.*]] = bitcast i64* [[T292]] to <2 x i64>*
+; CHECK-NEXT:store <2 x i64> [[TMP6]], <2 x i64>* [[TMP8]], align 8
+; CHECK-NEXT:store i32 [[T20]], i32* [[T21]], align 4
+; CHECK-NEXT:store i32 [[T24]], i32* [[T25]], align 4
+; CHECK-NEXT:store i32 [[T28]], i32* [[T29]], align 4
+; CHECK-NEXT:store i32 [[T31]], i32* [[T32]], align 4
+; CHECK-NEXT:ret void
+;
+  %t0 = bitcast i8* %v0 to i32*
+  %t1 = bitcast i8* %v1 to i32*
+
+  %t02 = bitcast i8* %v0 to i64*
+  %t12 = bitcast i8* %v1 to i64*
+
+  %t14 = getelementptr inbounds i32, i32* %t1, i64 4
+  %t18 = getelementptr inbounds i32, i32* %t1, i64 5
+  %t22 = getelementptr inbounds i32, i32* %t1, i64 6
+  %t26 = getelementptr inbounds i32, i32* %t1, i64 7
+
+  %t142 = getelementptr inbounds i64, i64* %t12, i64 8
+  %t182 = getelementptr inbounds i64, i64* %t12, i64 9
+  %t222 = getelementptr inbounds i64, i64* %t12, i64 10
+  %t262 = getelementptr inbounds i64, i64* %t12, i64 11
+
+  %t21 = getelementptr inbounds i32, i32* %t0, i64 4
+  %t25 = getelementptr inbounds i32, i32* %t0, i64 5
+  %t29 = getelementptr inbounds i32, i32* %t0, i64 6
+  %t32 = getelementptr inbounds i32, i32* %t0, i64 7
+
+  %t212 = getelementptr inbounds i64, i64* %t02, i64 8
+  %t252 = getelementptr inbou

[llvm-branch-commits] [clang] abbd57e - Factor out and centralize repeated 'getExpandedPackSize'.

2020-12-13 Thread Richard Smith via llvm-branch-commits

Author: Richard Smith
Date: 2020-12-13T22:43:23-08:00
New Revision: abbd57e558b907a7be8adc5a5b9699dd7c23b1af

URL: 
https://github.com/llvm/llvm-project/commit/abbd57e558b907a7be8adc5a5b9699dd7c23b1af
DIFF: 
https://github.com/llvm/llvm-project/commit/abbd57e558b907a7be8adc5a5b9699dd7c23b1af.diff

LOG: Factor out and centralize repeated 'getExpandedPackSize'.

Added: 


Modified: 
clang/include/clang/AST/DeclTemplate.h
clang/lib/AST/DeclTemplate.cpp
clang/lib/Sema/SemaTemplate.cpp
clang/lib/Sema/SemaTemplateDeduction.cpp

Removed: 




diff  --git a/clang/include/clang/AST/DeclTemplate.h 
b/clang/include/clang/AST/DeclTemplate.h
index 641647659c17..7fbf6294970e 100644
--- a/clang/include/clang/AST/DeclTemplate.h
+++ b/clang/include/clang/AST/DeclTemplate.h
@@ -3353,6 +3353,36 @@ inline TemplateDecl *getAsTypeTemplateDecl(Decl *D) {
  : nullptr;
 }
 
+/// Check whether the template parameter is a pack expansion, and if so,
+/// determine the number of parameters produced by that expansion. For 
instance:
+///
+/// \code
+/// template struct A {
+///   template class ...TTs, typename ...Us> struct B;
+/// };
+/// \endcode
+///
+/// In \c A::B, \c NTs and \c TTs have expanded pack size 2, and \c Us
+/// is not a pack expansion, so returns an empty Optional.
+inline Optional getExpandedPackSize(const NamedDecl *Param) {
+  if (const auto *TTP = dyn_cast(Param)) {
+if (TTP->isExpandedParameterPack())
+  return TTP->getNumExpansionParameters();
+  }
+
+  if (const auto *NTTP = dyn_cast(Param)) {
+if (NTTP->isExpandedParameterPack())
+  return NTTP->getNumExpansionTypes();
+  }
+
+  if (const auto *TTP = dyn_cast(Param)) {
+if (TTP->isExpandedParameterPack())
+  return TTP->getNumExpansionTemplateParameters();
+  }
+
+  return None;
+}
+
 } // namespace clang
 
 #endif // LLVM_CLANG_AST_DECLTEMPLATE_H

diff  --git a/clang/lib/AST/DeclTemplate.cpp b/clang/lib/AST/DeclTemplate.cpp
index 328ceaa63df3..25235c56ec46 100644
--- a/clang/lib/AST/DeclTemplate.cpp
+++ b/clang/lib/AST/DeclTemplate.cpp
@@ -102,24 +102,10 @@ unsigned TemplateParameterList::getMinRequiredArguments() 
const {
   unsigned NumRequiredArgs = 0;
   for (const NamedDecl *P : asArray()) {
 if (P->isTemplateParameterPack()) {
-  if (const auto *NTTP = dyn_cast(P)) {
-if (NTTP->isExpandedParameterPack()) {
-  NumRequiredArgs += NTTP->getNumExpansionTypes();
-  continue;
-}
-  } else if (const auto *TTP = dyn_cast(P)) {
-if (TTP->isExpandedParameterPack()) {
-  NumRequiredArgs += TTP->getNumExpansionParameters();
-  continue;
-}
-  } else {
-const auto *TP = cast(P);
-if (TP->isExpandedParameterPack()) {
-  NumRequiredArgs += TP->getNumExpansionTemplateParameters();
-  continue;
-}
+  if (Optional Expansions = getExpandedPackSize(P)) {
+NumRequiredArgs += *Expansions;
+continue;
   }
-
   break;
 }
 

diff  --git a/clang/lib/Sema/SemaTemplate.cpp b/clang/lib/Sema/SemaTemplate.cpp
index 4176aa1f458f..70a25fb782e9 100644
--- a/clang/lib/Sema/SemaTemplate.cpp
+++ b/clang/lib/Sema/SemaTemplate.cpp
@@ -5588,39 +5588,6 @@ bool Sema::CheckTemplateArgument(NamedDecl *Param,
   return false;
 }
 
-/// Check whether the template parameter is a pack expansion, and if so,
-/// determine the number of parameters produced by that expansion. For 
instance:
-///
-/// \code
-/// template struct A {
-///   template class ...TTs, typename ...Us> struct B;
-/// };
-/// \endcode
-///
-/// In \c A::B, \c NTs and \c TTs have expanded pack size 2, and \c Us
-/// is not a pack expansion, so returns an empty Optional.
-static Optional getExpandedPackSize(NamedDecl *Param) {
-  if (TemplateTypeParmDecl *TTP
-= dyn_cast(Param)) {
-if (TTP->isExpandedParameterPack())
-  return TTP->getNumExpansionParameters();
-  }
-
-  if (NonTypeTemplateParmDecl *NTTP
-= dyn_cast(Param)) {
-if (NTTP->isExpandedParameterPack())
-  return NTTP->getNumExpansionTypes();
-  }
-
-  if (TemplateTemplateParmDecl *TTP
-= dyn_cast(Param)) {
-if (TTP->isExpandedParameterPack())
-  return TTP->getNumExpansionTemplateParameters();
-  }
-
-  return None;
-}
-
 /// Diagnose a missing template argument.
 template
 static bool diagnoseMissingArgument(Sema &S, SourceLocation Loc,

diff  --git a/clang/lib/Sema/SemaTemplateDeduction.cpp 
b/clang/lib/Sema/SemaTemplateDeduction.cpp
index d137aec82b8c..4a3b64cf5425 100644
--- a/clang/lib/Sema/SemaTemplateDeduction.cpp
+++ b/clang/lib/Sema/SemaTemplateDeduction.cpp
@@ -658,23 +658,6 @@ static TemplateParameter makeTemplateParameter(Decl *D) {
   return TemplateParameter(cast(D));
 }
 
-/// If \p Param is an expanded parameter pack, get the number of expansions.
-static Optional getExpandedPackSize(NamedDecl *Param) {
-  if (auto *TTP =

[llvm-branch-commits] [clang] 05cdf4a - Consider reference, pointer, and pointer-to-member TemplateArguments to be different if they have different types.

2020-12-13 Thread Richard Smith via llvm-branch-commits

Author: Richard Smith
Date: 2020-12-13T22:43:24-08:00
New Revision: 05cdf4acf42acce9ddcff646a5d6ac666710fe6d

URL: 
https://github.com/llvm/llvm-project/commit/05cdf4acf42acce9ddcff646a5d6ac666710fe6d
DIFF: 
https://github.com/llvm/llvm-project/commit/05cdf4acf42acce9ddcff646a5d6ac666710fe6d.diff

LOG: Consider reference, pointer, and pointer-to-member TemplateArguments to be 
different if they have different types.

For the Itanium ABI, this implements the mangling rule suggested in
https://github.com/itanium-cxx-abi/cxx-abi/issues/47, namely mangling
such template arguments as being cast to the parameter type in the case
where the template name is overloadable. This can cause a mangling
change for rare cases, where

 * the template argument declaration is converted from its declared type
   to the type of the template parameter, and
 * the template parameter either has a deduced type or is a parameter of
   a function template.

However, such changes are necessary to avoid mangling collisions. The
ABI changes can be reversed with -fclang-abi-compat=11 or earlier.

Re-commit with a fix for the regression introduced last time: don't
expect parameters and arguments to line up inside an 
mangling.

Differential Revision: https://reviews.llvm.org/D91488

Added: 


Modified: 
clang/include/clang/Basic/LangOptions.h
clang/lib/AST/ItaniumMangle.cpp
clang/lib/AST/StmtProfile.cpp
clang/lib/AST/TemplateBase.cpp
clang/test/CodeGenCXX/clang-abi-compat.cpp
clang/test/CodeGenCXX/mangle-class-nttp.cpp
clang/test/CodeGenCXX/mangle-template.cpp
clang/test/SemaTemplate/temp_arg_nontype_cxx1z.cpp

Removed: 




diff  --git a/clang/include/clang/Basic/LangOptions.h 
b/clang/include/clang/Basic/LangOptions.h
index 203c45fdd9a7..251c9a9ecb5d 100644
--- a/clang/include/clang/Basic/LangOptions.h
+++ b/clang/include/clang/Basic/LangOptions.h
@@ -155,8 +155,10 @@ class LangOptions : public LangOptionsBase {
 
 /// Attempt to be ABI-compatible with code generated by Clang 11.0.x
 /// (git  2e10b7a39b93). This causes clang to pass unions with a 256-bit
-/// vector member on the stack instead of using registers, and to not
-/// properly mangle substitutions for template names in some cases.
+/// vector member on the stack instead of using registers, to not properly
+/// mangle substitutions for template names in some cases, and to mangle
+/// declaration template arguments without a cast to the parameter type
+/// even when that can lead to mangling collisions.
 Ver11,
 
 /// Conform to the underlying platform's C and C++ ABIs as closely

diff  --git a/clang/lib/AST/ItaniumMangle.cpp b/clang/lib/AST/ItaniumMangle.cpp
index f5a4f6708c83..fe4968052e17 100644
--- a/clang/lib/AST/ItaniumMangle.cpp
+++ b/clang/lib/AST/ItaniumMangle.cpp
@@ -551,13 +551,15 @@ class CXXNameMangler {
   void mangleCXXCtorType(CXXCtorType T, const CXXRecordDecl *InheritedFrom);
   void mangleCXXDtorType(CXXDtorType T);
 
-  void mangleTemplateArgs(const TemplateArgumentLoc *TemplateArgs,
+  void mangleTemplateArgs(TemplateName TN,
+  const TemplateArgumentLoc *TemplateArgs,
   unsigned NumTemplateArgs);
-  void mangleTemplateArgs(const TemplateArgument *TemplateArgs,
+  void mangleTemplateArgs(TemplateName TN, const TemplateArgument 
*TemplateArgs,
   unsigned NumTemplateArgs);
-  void mangleTemplateArgs(const TemplateArgumentList &AL);
-  void mangleTemplateArg(TemplateArgument A);
-  void mangleValueInTemplateArg(QualType T, const APValue &V);
+  void mangleTemplateArgs(TemplateName TN, const TemplateArgumentList &AL);
+  void mangleTemplateArg(TemplateArgument A, bool NeedExactType);
+  void mangleValueInTemplateArg(QualType T, const APValue &V, bool TopLevel,
+bool NeedExactType = false);
 
   void mangleTemplateParameter(unsigned Depth, unsigned Index);
 
@@ -823,6 +825,11 @@ isTemplate(GlobalDecl GD, const TemplateArgumentList 
*&TemplateArgs) {
   return GlobalDecl();
 }
 
+static TemplateName asTemplateName(GlobalDecl GD) {
+  const TemplateDecl *TD = dyn_cast_or_null(GD.getDecl());
+  return TemplateName(const_cast(TD));
+}
+
 void CXXNameMangler::mangleName(GlobalDecl GD) {
   const NamedDecl *ND = cast(GD.getDecl());
   if (const VarDecl *VD = dyn_cast(ND)) {
@@ -899,7 +906,7 @@ void CXXNameMangler::mangleNameWithAbiTags(GlobalDecl GD,
 const TemplateArgumentList *TemplateArgs = nullptr;
 if (GlobalDecl TD = isTemplate(GD, TemplateArgs)) {
   mangleUnscopedTemplateName(TD, AdditionalAbiTags);
-  mangleTemplateArgs(*TemplateArgs);
+  mangleTemplateArgs(asTemplateName(TD), *TemplateArgs);
   return;
 }
 
@@ -952,7 +959,7 @@ void CXXNameMangler::mangleTemplateName(const TemplateDecl 
*TD,
 
   if (DC->isTranslationUnit() || isStdNamespace(DC)) {
 mangleUnscoped

[llvm-branch-commits] [libcxx] 7de9c61 - Fix test expectation to cope with custom version namespaces.

2020-12-13 Thread Richard Smith via llvm-branch-commits

Author: Richard Smith
Date: 2020-12-13T22:43:24-08:00
New Revision: 7de9c61f3111c8b8bc9e03a7935356e2f372d8b4

URL: 
https://github.com/llvm/llvm-project/commit/7de9c61f3111c8b8bc9e03a7935356e2f372d8b4
DIFF: 
https://github.com/llvm/llvm-project/commit/7de9c61f3111c8b8bc9e03a7935356e2f372d8b4.diff

LOG: Fix test expectation to cope with custom version namespaces.

Added: 


Modified: 
libcxx/test/std/numerics/rand/rand.eng/rand.eng.lcong/params.fail.cpp

Removed: 




diff  --git 
a/libcxx/test/std/numerics/rand/rand.eng/rand.eng.lcong/params.fail.cpp 
b/libcxx/test/std/numerics/rand/rand.eng/rand.eng.lcong/params.fail.cpp
index c325b77734bd..86e627344a48 100644
--- a/libcxx/test/std/numerics/rand/rand.eng/rand.eng.lcong/params.fail.cpp
+++ b/libcxx/test/std/numerics/rand/rand.eng/rand.eng.lcong/params.fail.cpp
@@ -24,8 +24,8 @@ int main(int, char**)
 // expected-error@random:* {{static_assert failed due to requirement '1ULL 
== 0 || 1ULL < 1ULL' "linear_congruential_engine invalid parameters"}}
 std::linear_congruential_engine e3;
 std::linear_congruential_engine e4;
-// expected-error@random:* {{static_assert failed due to requirement 
'std::__1::is_unsigned::value' "_UIntType must be unsigned type"}}
+// expected-error-re@random:* {{static_assert failed due to requirement 
'std:{{.*}}:is_unsigned::value' "_UIntType must be unsigned type"}}
 std::linear_congruential_engine e5;
 
 return 0;
-}
\ No newline at end of file
+}



___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [compiler-rt] 33b740f - [CMake][compiler-rt][AArch64] Avoid preprocessing LSE builtins separately

2020-12-13 Thread Raul Tambre via llvm-branch-commits

Author: Raul Tambre
Date: 2020-12-14T09:20:30+02:00
New Revision: 33b740f8dc3496237619a7bc6722f23655cb1f94

URL: 
https://github.com/llvm/llvm-project/commit/33b740f8dc3496237619a7bc6722f23655cb1f94
DIFF: 
https://github.com/llvm/llvm-project/commit/33b740f8dc3496237619a7bc6722f23655cb1f94.diff

LOG: [CMake][compiler-rt][AArch64] Avoid preprocessing LSE builtins separately

Invoking the preprocessor ourselves is fragile and would require us to 
replicate CMake's handling of definitions, compiler flags, etc for proper 
compatibility.
In my toolchain builds this notably resulted in a bunch of warnings from unused 
flags as my CMAKE_C_FLAGS includes CPU-specific optimization options.
Notably this part was already duplicating the logic for VISIBILITY_HIDDEN 
define.

Instead, symlink the files and set the proper set of defines on each.
This should also be faster as we avoid invoking the compiler multiple times.

Fixes https://llvm.org/PR48494

Reviewed By: ilinpv

Differential Revision: https://reviews.llvm.org/D93178

Added: 


Modified: 
compiler-rt/lib/builtins/CMakeLists.txt
compiler-rt/lib/builtins/aarch64/lse.S

Removed: 




diff  --git a/compiler-rt/lib/builtins/CMakeLists.txt 
b/compiler-rt/lib/builtins/CMakeLists.txt
index d84f4d09e53e..5259e951dff3 100644
--- a/compiler-rt/lib/builtins/CMakeLists.txt
+++ b/compiler-rt/lib/builtins/CMakeLists.txt
@@ -509,31 +509,24 @@ set(aarch64_SOURCES
 )
 
 # Generate outline atomics helpers from lse.S base
-set(CUSTOM_FLAGS ${CMAKE_C_FLAGS})
-if(NOT ANDROID)
-  append_list_if(COMPILER_RT_HAS_VISIBILITY_HIDDEN_FLAG -DVISIBILITY_HIDDEN 
CUSTOM_FLAGS)
-endif()
-append_list_if(COMPILER_RT_HAS_ASM_LSE -DHAS_ASM_LSE CUSTOM_FLAGS)
-string(REPLACE " " "\t" CUSTOM_FLAGS "${CUSTOM_FLAGS}")
 set(OA_HELPERS_DIR "${CMAKE_CURRENT_BINARY_DIR}/outline_atomic_helpers.dir")
-file(MAKE_DIRECTORY ${OA_HELPERS_DIR})
+file(MAKE_DIRECTORY "${OA_HELPERS_DIR}")
 
 foreach(pat cas swp ldadd ldclr ldeor ldset)
   foreach(size 1 2 4 8 16)
 foreach(model 1 2 3 4)
   if(pat STREQUAL "cas" OR NOT size STREQUAL "16")
-set(helper_asm 
${OA_HELPERS_DIR}/outline_atomic_${pat}${size}_${model}.S)
+set(helper_asm 
"${OA_HELPERS_DIR}/outline_atomic_${pat}${size}_${model}.S")
 add_custom_command(
   OUTPUT ${helper_asm}
-  COMMAND ${CMAKE_C_COMPILER} -E ${CUSTOM_FLAGS} -DL_${pat} 
-DSIZE=${size} -DMODEL=${model}
-  ${CMAKE_CURRENT_SOURCE_DIR}/aarch64/lse.S -o ${helper_asm}
-  DEPENDS aarch64/lse.S assembly.h
+  COMMAND ${CMAKE_COMMAND} -E create_symlink 
"${CMAKE_CURRENT_SOURCE_DIR}/aarch64/lse.S" "${helper_asm}"
 )
-set_source_files_properties(${helper_asm} PROPERTIES GENERATED TRUE)
-set(aarch64_SOURCES
-  ${aarch64_SOURCES}
-  ${helper_asm}
+set_source_files_properties("${helper_asm}"
+  PROPERTIES
+  COMPILE_DEFINITIONS "L_${pat};SIZE=${size};MODEL=${model}"
+  INCLUDE_DIRECTORIES "${CMAKE_CURRENT_SOURCE_DIR}"
 )
+list(APPEND aarch64_SOURCES "${helper_asm}")
   endif()
 endforeach(model)
   endforeach(size)
@@ -687,6 +680,8 @@ else ()
 append_list_if(COMPILER_RT_HAS_VISIBILITY_HIDDEN_FLAG VISIBILITY_HIDDEN 
BUILTIN_DEFS)
   endif()
 
+  append_list_if(COMPILER_RT_HAS_ASM_LSE HAS_ASM_LSE BUILTIN_DEFS)
+
   foreach (arch ${BUILTIN_SUPPORTED_ARCH})
 if (CAN_TARGET_${arch})
   # For ARM archs, exclude any VFP builtins if VFP is not supported

diff  --git a/compiler-rt/lib/builtins/aarch64/lse.S 
b/compiler-rt/lib/builtins/aarch64/lse.S
index 4c75fa524c44..f030d5ddc1c5 100644
--- a/compiler-rt/lib/builtins/aarch64/lse.S
+++ b/compiler-rt/lib/builtins/aarch64/lse.S
@@ -2,7 +2,7 @@
 // See https://llvm.org/LICENSE.txt for license information.
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 
-#include "../assembly.h"
+#include "assembly.h"
 
 // Out-of-line LSE atomics helpers. Ported from libgcc library.
 // N = {1, 2, 4, 8}



___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] 2acd5a4 - [LoopIdiom] Pre-commit tests for D92745. NFC

2020-12-13 Thread Craig Topper via llvm-branch-commits

Author: Craig Topper
Date: 2020-12-13T23:25:00-08:00
New Revision: 2acd5a473860d6ed024252367e7fdefa105e9df9

URL: 
https://github.com/llvm/llvm-project/commit/2acd5a473860d6ed024252367e7fdefa105e9df9
DIFF: 
https://github.com/llvm/llvm-project/commit/2acd5a473860d6ed024252367e7fdefa105e9df9.diff

LOG: [LoopIdiom] Pre-commit tests for D92745. NFC

Added: 


Modified: 
llvm/test/Transforms/LoopIdiom/X86/ctlz.ll
llvm/test/Transforms/LoopIdiom/X86/cttz.ll

Removed: 




diff  --git a/llvm/test/Transforms/LoopIdiom/X86/ctlz.ll 
b/llvm/test/Transforms/LoopIdiom/X86/ctlz.ll
index 117946cf3d2c..6d3863a0ee33 100644
--- a/llvm/test/Transforms/LoopIdiom/X86/ctlz.ll
+++ b/llvm/test/Transforms/LoopIdiom/X86/ctlz.ll
@@ -672,3 +672,104 @@ while.cond:   ; preds 
= %while.cond, %entry
 while.end:; preds = %while.cond
   ret i32 %inc
 }
+
+; Recognize CTLZ builtin pattern.
+; Here it will replace the loop -
+; assume builtin is always profitable.
+;
+; int ctlz_decrement(int n)
+; {
+;   int i = 32;
+;   while(n) {
+; n >>= 1;
+; i--;
+;   }
+;   return i;
+; }
+;
+define i32 @ctlz_decrement(i32 %n) {
+; ALL-LABEL: @ctlz_decrement(
+; ALL-NEXT:  entry:
+; ALL-NEXT:[[TOBOOL4:%.*]] = icmp eq i32 [[N:%.*]], 0
+; ALL-NEXT:br i1 [[TOBOOL4]], label [[WHILE_END:%.*]], label 
[[WHILE_BODY_PREHEADER:%.*]]
+; ALL:   while.body.preheader:
+; ALL-NEXT:br label [[WHILE_BODY:%.*]]
+; ALL:   while.body:
+; ALL-NEXT:[[I_06:%.*]] = phi i32 [ [[INC:%.*]], [[WHILE_BODY]] ], [ 32, 
[[WHILE_BODY_PREHEADER]] ]
+; ALL-NEXT:[[N_ADDR_05:%.*]] = phi i32 [ [[SHR:%.*]], [[WHILE_BODY]] ], [ 
[[N]], [[WHILE_BODY_PREHEADER]] ]
+; ALL-NEXT:[[SHR]] = lshr i32 [[N_ADDR_05]], 1
+; ALL-NEXT:[[INC]] = add nsw i32 [[I_06]], -1
+; ALL-NEXT:[[TOBOOL:%.*]] = icmp eq i32 [[SHR]], 0
+; ALL-NEXT:br i1 [[TOBOOL]], label [[WHILE_END_LOOPEXIT:%.*]], label 
[[WHILE_BODY]]
+; ALL:   while.end.loopexit:
+; ALL-NEXT:[[INC_LCSSA:%.*]] = phi i32 [ [[INC]], [[WHILE_BODY]] ]
+; ALL-NEXT:br label [[WHILE_END]]
+; ALL:   while.end:
+; ALL-NEXT:[[I_0_LCSSA:%.*]] = phi i32 [ 32, [[ENTRY:%.*]] ], [ 
[[INC_LCSSA]], [[WHILE_END_LOOPEXIT]] ]
+; ALL-NEXT:ret i32 [[I_0_LCSSA]]
+;
+entry:
+  %tobool4 = icmp eq i32 %n, 0
+  br i1 %tobool4, label %while.end, label %while.body.preheader
+
+while.body.preheader: ; preds = %entry
+  br label %while.body
+
+while.body:   ; preds = 
%while.body.preheader, %while.body
+  %i.06 = phi i32 [ %inc, %while.body ], [ 32, %while.body.preheader ]
+  %n.addr.05 = phi i32 [ %shr, %while.body ], [ %n, %while.body.preheader ]
+  %shr = lshr i32 %n.addr.05, 1
+  %inc = add nsw i32 %i.06, -1
+  %tobool = icmp eq i32 %shr, 0
+  br i1 %tobool, label %while.end.loopexit, label %while.body
+
+while.end.loopexit:   ; preds = %while.body
+  br label %while.end
+
+while.end:; preds = 
%while.end.loopexit, %entry
+  %i.0.lcssa = phi i32 [ 32, %entry ], [ %inc, %while.end.loopexit ]
+  ret i32 %i.0.lcssa
+}
+
+; Recognize CTLZ builtin pattern.
+; Here it will replace the loop -
+; assume builtin is always profitable.
+;
+; int ctlz_lshr_decrement(int n)
+; {
+;   int i = 31;
+;   while(n >>= 1) {
+; i--;
+;   }
+;   return i;
+; }
+;
+define i32 @ctlz_lshr_decrement(i32 %n) {
+; ALL-LABEL: @ctlz_lshr_decrement(
+; ALL-NEXT:  entry:
+; ALL-NEXT:br label [[WHILE_COND:%.*]]
+; ALL:   while.cond:
+; ALL-NEXT:[[N_ADDR_0:%.*]] = phi i32 [ [[N:%.*]], [[ENTRY:%.*]] ], [ 
[[SHR:%.*]], [[WHILE_COND]] ]
+; ALL-NEXT:[[I_0:%.*]] = phi i32 [ 31, [[ENTRY]] ], [ [[INC:%.*]], 
[[WHILE_COND]] ]
+; ALL-NEXT:[[SHR]] = lshr i32 [[N_ADDR_0]], 1
+; ALL-NEXT:[[TOBOOL:%.*]] = icmp eq i32 [[SHR]], 0
+; ALL-NEXT:[[INC]] = add nsw i32 [[I_0]], -1
+; ALL-NEXT:br i1 [[TOBOOL]], label [[WHILE_END:%.*]], label [[WHILE_COND]]
+; ALL:   while.end:
+; ALL-NEXT:[[I_0_LCSSA:%.*]] = phi i32 [ [[I_0]], [[WHILE_COND]] ]
+; ALL-NEXT:ret i32 [[I_0_LCSSA]]
+;
+entry:
+  br label %while.cond
+
+while.cond:   ; preds = %while.cond, %entry
+  %n.addr.0 = phi i32 [ %n, %entry ], [ %shr, %while.cond ]
+  %i.0 = phi i32 [ 31, %entry ], [ %inc, %while.cond ]
+  %shr = lshr i32 %n.addr.0, 1
+  %tobool = icmp eq i32 %shr, 0
+  %inc = add nsw i32 %i.0, -1
+  br i1 %tobool, label %while.end, label %while.cond
+
+while.end:; preds = %while.cond
+  ret i32 %i.0
+}

diff  --git a/llvm/test/Transforms/LoopIdiom/X86/cttz.ll 
b/llvm/test/Transforms/LoopIdiom/X86/cttz.ll
index 82bc8207eb0e..642eb11d2d7f 100644
--- a/llvm/test/Transforms/LoopIdiom/X86/cttz.ll
+++ b/llvm/test/Transforms/LoopIdiom/X86/cttz.ll
@@ -113,3 +113,103 @@ while

[llvm-branch-commits] [flang] e43b3b0 - [Flang][OpenMP] Semantic checks for Atomic construct.

2020-12-13 Thread Sameeran joshi via llvm-branch-commits

Author: sameeran joshi
Date: 2020-12-14T13:03:57+05:30
New Revision: e43b3b08ccd60d63d4c3316859e9fec4cdaeaddd

URL: 
https://github.com/llvm/llvm-project/commit/e43b3b08ccd60d63d4c3316859e9fec4cdaeaddd
DIFF: 
https://github.com/llvm/llvm-project/commit/e43b3b08ccd60d63d4c3316859e9fec4cdaeaddd.diff

LOG: [Flang][OpenMP] Semantic checks for Atomic construct.

Patch implements restrictions from 2.17.7  of OpenMP 5.0 standard for atomic 
Construct. Tests for the same are added.

One of the restriction
`OpenMP constructs may not be encountered during execution of an atomic region.`
Is mentioned in 5.0 standard to be a semantic restriction, but given the 
stricter nature of parser in F18 it's caught at parsing itself.

This patch is a next patch in series from D88965.

Reviewed By: clementval

Differential Revision: https://reviews.llvm.org/D89583

Added: 
flang/test/Semantics/omp-atomic01.f90

Modified: 
flang/include/flang/Parser/dump-parse-tree.h
flang/include/flang/Parser/parse-tree.h
flang/lib/Parser/openmp-parsers.cpp
flang/lib/Parser/unparse.cpp
flang/lib/Semantics/check-omp-structure.cpp
flang/lib/Semantics/check-omp-structure.h
flang/test/Semantics/omp-atomic.f90
llvm/include/llvm/Frontend/OpenMP/OMP.td

Removed: 




diff  --git a/flang/include/flang/Parser/dump-parse-tree.h 
b/flang/include/flang/Parser/dump-parse-tree.h
index 791e21fa4b62..0d819f861495 100644
--- a/flang/include/flang/Parser/dump-parse-tree.h
+++ b/flang/include/flang/Parser/dump-parse-tree.h
@@ -551,6 +551,8 @@ class ParseTreeDumper {
   NODE(parser, OpenMPDeclareSimdConstruct)
   NODE(parser, OpenMPDeclareTargetConstruct)
   NODE(parser, OmpMemoryOrderClause)
+  NODE(parser, OmpAtomicClause)
+  NODE(parser, OmpAtomicClauseList)
   NODE(parser, OpenMPFlushConstruct)
   NODE(parser, OpenMPLoopConstruct)
   NODE(parser, OpenMPExecutableAllocate)

diff  --git a/flang/include/flang/Parser/parse-tree.h 
b/flang/include/flang/Parser/parse-tree.h
index ca73af210c15..5d2909a4142a 100644
--- a/flang/include/flang/Parser/parse-tree.h
+++ b/flang/include/flang/Parser/parse-tree.h
@@ -3628,11 +3628,30 @@ struct OpenMPExecutableAllocate {
   t;
 };
 
-// 2.17.7 atomic -> ATOMIC [clause[,]] atomic-clause [[,]clause] |
-//  ATOMIC [clause]
-//clause -> memory-order-clause | HINT(hint-expression)
-//memory-order-clause -> SEQ_CST | ACQ_REL | RELEASE | ACQUIRE | 
RELAXED
-//atomic-clause -> READ | WRITE | UPDATE | CAPTURE
+// 2.17.7 Atomic construct/2.17.8 Flush construct [OpenMP 5.0]
+//memory-order-clause -> acq_rel
+//   release
+//   acquire
+//   seq_cst
+//   relaxed
+struct OmpMemoryOrderClause {
+  WRAPPER_CLASS_BOILERPLATE(OmpMemoryOrderClause, OmpClause);
+  CharBlock source;
+};
+
+// 2.17.7 Atomic construct
+//atomic-clause -> memory-order-clause | HINT(hint-expression)
+struct OmpAtomicClause {
+  UNION_CLASS_BOILERPLATE(OmpAtomicClause);
+  CharBlock source;
+  std::variant u;
+};
+
+// atomic-clause-list -> [atomic-clause, [atomic-clause], ...]
+struct OmpAtomicClauseList {
+  WRAPPER_CLASS_BOILERPLATE(OmpAtomicClauseList, std::list);
+  CharBlock source;
+};
 
 // END ATOMIC
 EMPTY_CLASS(OmpEndAtomic);
@@ -3641,8 +3660,8 @@ EMPTY_CLASS(OmpEndAtomic);
 struct OmpAtomicRead {
   TUPLE_CLASS_BOILERPLATE(OmpAtomicRead);
   CharBlock source;
-  std::tuple,
-  std::optional>
+  std::tuple, std::optional>
   t;
 };
 
@@ -3650,8 +3669,8 @@ struct OmpAtomicRead {
 struct OmpAtomicWrite {
   TUPLE_CLASS_BOILERPLATE(OmpAtomicWrite);
   CharBlock source;
-  std::tuple,
-  std::optional>
+  std::tuple, std::optional>
   t;
 };
 
@@ -3659,8 +3678,8 @@ struct OmpAtomicWrite {
 struct OmpAtomicUpdate {
   TUPLE_CLASS_BOILERPLATE(OmpAtomicUpdate);
   CharBlock source;
-  std::tuple,
-  std::optional>
+  std::tuple, std::optional>
   t;
 };
 
@@ -3670,7 +3689,8 @@ struct OmpAtomicCapture {
   CharBlock source;
   WRAPPER_CLASS(Stmt1, Statement);
   WRAPPER_CLASS(Stmt2, Statement);
-  std::tuple
+  std::tuple
   t;
 };
 
@@ -3678,11 +3698,15 @@ struct OmpAtomicCapture {
 struct OmpAtomic {
   TUPLE_CLASS_BOILERPLATE(OmpAtomic);
   CharBlock source;
-  std::tuple,
+  std::tuple,
   std::optional>
   t;
 };
 
+// 2.17.7 atomic ->
+//ATOMIC [atomic-clause-list] atomic-construct [atomic-clause-list] |
+//ATOMIC [atomic-clause-list]
+//atomic-construct -> READ | WRITE | UPDATE | CAPTURE
 struct OpenMPAtomicConstruct {
   UNION_CLASS_BOILERPLATE(OpenMPAtomicConstruct);
   std::variant> t;
 };
 
-// 2.17.8 Flush Construct [OpenMP 5.0]
-// memory-order-clause -> acq_rel
-//release
-//acquire
-struct OmpMemoryOrderClause {
-  WRAPPER_CLASS_BOILERPLATE(OmpMemoryOrderCl

[llvm-branch-commits] [clang] 6326b09 - [AST][RecoveryExpr] Preserve type for broken overrload member call expr.

2020-12-13 Thread Haojian Wu via llvm-branch-commits

Author: Haojian Wu
Date: 2020-12-14T08:50:41+01:00
New Revision: 6326b098852bea51debe415a85eebd1753151cd0

URL: 
https://github.com/llvm/llvm-project/commit/6326b098852bea51debe415a85eebd1753151cd0
DIFF: 
https://github.com/llvm/llvm-project/commit/6326b098852bea51debe415a85eebd1753151cd0.diff

LOG: [AST][RecoveryExpr] Preserve type for broken overrload member call expr.

Reviewed By: sammccall

Differential Revision: https://reviews.llvm.org/D80109

Added: 


Modified: 
clang/lib/Sema/SemaOverload.cpp
clang/test/AST/ast-dump-recovery.cpp
clang/test/CXX/dcl.dcl/basic.namespace/namespace.udecl/p12.cpp

Removed: 




diff  --git a/clang/lib/Sema/SemaOverload.cpp b/clang/lib/Sema/SemaOverload.cpp
index 5689efe578fa..13d2125d1a28 100644
--- a/clang/lib/Sema/SemaOverload.cpp
+++ b/clang/lib/Sema/SemaOverload.cpp
@@ -14300,6 +14300,7 @@ ExprResult Sema::BuildCallToMemberFunction(Scope *S, 
Expr *MemExprE,
 UnbridgedCasts.restore();
 
 OverloadCandidateSet::iterator Best;
+bool Succeeded = false;
 switch (CandidateSet.BestViableFunction(*this, UnresExpr->getBeginLoc(),
 Best)) {
 case OR_Success:
@@ -14307,7 +14308,7 @@ ExprResult Sema::BuildCallToMemberFunction(Scope *S, 
Expr *MemExprE,
   FoundDecl = Best->FoundDecl;
   CheckUnresolvedMemberAccess(UnresExpr, Best->FoundDecl);
   if (DiagnoseUseOfDecl(Best->FoundDecl, UnresExpr->getNameLoc()))
-return ExprError();
+break;
   // If FoundDecl is 
diff erent from Method (such as if one is a template
   // and the other a specialization), make sure DiagnoseUseOfDecl is
   // called on both.
@@ -14316,7 +14317,8 @@ ExprResult Sema::BuildCallToMemberFunction(Scope *S, 
Expr *MemExprE,
   // being used.
   if (Method != FoundDecl.getDecl() &&
   DiagnoseUseOfDecl(Method, UnresExpr->getNameLoc()))
-return ExprError();
+break;
+  Succeeded = true;
   break;
 
 case OR_No_Viable_Function:
@@ -14326,27 +14328,25 @@ ExprResult Sema::BuildCallToMemberFunction(Scope *S, 
Expr *MemExprE,
   PDiag(diag::err_ovl_no_viable_member_function_in_call)
   << DeclName << MemExprE->getSourceRange()),
   *this, OCD_AllCandidates, Args);
-  // FIXME: Leaking incoming expressions!
-  return ExprError();
-
+  break;
 case OR_Ambiguous:
   CandidateSet.NoteCandidates(
   PartialDiagnosticAt(UnresExpr->getMemberLoc(),
   PDiag(diag::err_ovl_ambiguous_member_call)
   << DeclName << MemExprE->getSourceRange()),
   *this, OCD_AmbiguousCandidates, Args);
-  // FIXME: Leaking incoming expressions!
-  return ExprError();
-
+  break;
 case OR_Deleted:
   CandidateSet.NoteCandidates(
   PartialDiagnosticAt(UnresExpr->getMemberLoc(),
   PDiag(diag::err_ovl_deleted_member_call)
   << DeclName << MemExprE->getSourceRange()),
   *this, OCD_AllCandidates, Args);
-  // FIXME: Leaking incoming expressions!
-  return ExprError();
+  break;
 }
+// Overload resolution fails, try to recover.
+if (!Succeeded)
+  return BuildRecoveryExpr(chooseRecoveryType(CandidateSet, &Best));
 
 MemExprE = FixOverloadedFunctionReference(MemExprE, FoundDecl, Method);
 

diff  --git a/clang/test/AST/ast-dump-recovery.cpp 
b/clang/test/AST/ast-dump-recovery.cpp
index 2a8346eb0d15..a8da2b8ad449 100644
--- a/clang/test/AST/ast-dump-recovery.cpp
+++ b/clang/test/AST/ast-dump-recovery.cpp
@@ -125,6 +125,9 @@ struct Foo2 {
   double func();
   class ForwardClass;
   ForwardClass createFwd();
+
+  int overload();
+  int overload(int, int);
 };
 void test2(Foo2 f) {
   // CHECK:  RecoveryExpr {{.*}} 'double'
@@ -136,6 +139,11 @@ void test2(Foo2 f) {
   // CHECK-NEXT: `-MemberExpr {{.*}} '' .createFwd
   // CHECK-NEXT:   `-DeclRefExpr {{.*}} 'f'
   f.createFwd();
+  // CHECK:  RecoveryExpr {{.*}} 'int' contains-errors
+  // CHECK-NEXT: |-UnresolvedMemberExpr
+  // CHECK-NEXT:`-DeclRefExpr {{.*}} 'Foo2'
+  // CHECK-NEXT: `-IntegerLiteral {{.*}} 'int' 1
+  f.overload(1);
 }
 
 // CHECK: |-AlignedAttr {{.*}} alignas

diff  --git a/clang/test/CXX/dcl.dcl/basic.namespace/namespace.udecl/p12.cpp 
b/clang/test/CXX/dcl.dcl/basic.namespace/namespace.udecl/p12.cpp
index ce43720cb2d3..f12e0083fb0c 100644
--- a/clang/test/CXX/dcl.dcl/basic.namespace/namespace.udecl/p12.cpp
+++ b/clang/test/CXX/dcl.dcl/basic.namespace/namespace.udecl/p12.cpp
@@ -9,7 +9,7 @@
 //   parameter types in a base class (rather than conflicting).
 
 template  struct Opaque {};
-template  void expect(Opaque _) {}
+template  void expect(Opaque _) {} // expected-note 4 
{{candidate function template not viable}}
 
 // PR5727
 // This just shouldn't cr

[llvm-branch-commits] [llvm] 08e287a - [PowerPC][FP128] Fix the incorrect signature for math library call

2020-12-13 Thread QingShan Zhang via llvm-branch-commits

Author: QingShan Zhang
Date: 2020-12-14T07:52:56Z
New Revision: 08e287aaf39f3ab8ccfcd4535fafa1c5d99ffdf7

URL: 
https://github.com/llvm/llvm-project/commit/08e287aaf39f3ab8ccfcd4535fafa1c5d99ffdf7
DIFF: 
https://github.com/llvm/llvm-project/commit/08e287aaf39f3ab8ccfcd4535fafa1c5d99ffdf7.diff

LOG: [PowerPC][FP128] Fix the incorrect signature for math library call

The runtime library has two family library implementation for ppc_fp128 and 
fp128.
For IBM Long double(ppc_fp128), it is suffixed with 'l', i.e(sqrtl). For
IEEE Long double(fp128), it is suffixed with "ieee128" or "f128".
We miss to map several libcall for IEEE Long double.

Reviewed By: qiucf

Differential Revision: https://reviews.llvm.org/D91675

Added: 


Modified: 
llvm/lib/CodeGen/TargetLoweringBase.cpp
llvm/lib/Target/PowerPC/PPCISelLowering.cpp
llvm/test/CodeGen/PowerPC/f128-arith.ll
llvm/test/CodeGen/PowerPC/f128-conv.ll
llvm/test/CodeGen/PowerPC/f128-rounding.ll
llvm/test/CodeGen/PowerPC/fp-strict-conv-f128.ll
llvm/test/CodeGen/PowerPC/fp-strict-f128.ll
llvm/test/CodeGen/PowerPC/recipest.ll

Removed: 




diff  --git a/llvm/lib/CodeGen/TargetLoweringBase.cpp 
b/llvm/lib/CodeGen/TargetLoweringBase.cpp
index 58543b48a994..553434cdd5fa 100644
--- a/llvm/lib/CodeGen/TargetLoweringBase.cpp
+++ b/llvm/lib/CodeGen/TargetLoweringBase.cpp
@@ -140,18 +140,23 @@ void TargetLoweringBase::InitLibcalls(const Triple &TT) {
 setLibcallName(RTLIB::SUB_F128, "__subkf3");
 setLibcallName(RTLIB::MUL_F128, "__mulkf3");
 setLibcallName(RTLIB::DIV_F128, "__divkf3");
+setLibcallName(RTLIB::POWI_F128, "__powikf2");
 setLibcallName(RTLIB::FPEXT_F32_F128, "__extendsfkf2");
 setLibcallName(RTLIB::FPEXT_F64_F128, "__extenddfkf2");
 setLibcallName(RTLIB::FPROUND_F128_F32, "__trunckfsf2");
 setLibcallName(RTLIB::FPROUND_F128_F64, "__trunckfdf2");
 setLibcallName(RTLIB::FPTOSINT_F128_I32, "__fixkfsi");
 setLibcallName(RTLIB::FPTOSINT_F128_I64, "__fixkfdi");
+setLibcallName(RTLIB::FPTOSINT_F128_I128, "__fixkfti");
 setLibcallName(RTLIB::FPTOUINT_F128_I32, "__fixunskfsi");
 setLibcallName(RTLIB::FPTOUINT_F128_I64, "__fixunskfdi");
+setLibcallName(RTLIB::FPTOUINT_F128_I128, "__fixunskfti");
 setLibcallName(RTLIB::SINTTOFP_I32_F128, "__floatsikf");
 setLibcallName(RTLIB::SINTTOFP_I64_F128, "__floatdikf");
+setLibcallName(RTLIB::SINTTOFP_I128_F128, "__floattikf");
 setLibcallName(RTLIB::UINTTOFP_I32_F128, "__floatunsikf");
 setLibcallName(RTLIB::UINTTOFP_I64_F128, "__floatundikf");
+setLibcallName(RTLIB::UINTTOFP_I128_F128, "__floatuntikf");
 setLibcallName(RTLIB::OEQ_F128, "__eqkf2");
 setLibcallName(RTLIB::UNE_F128, "__nekf2");
 setLibcallName(RTLIB::OGE_F128, "__gekf2");

diff  --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp 
b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index 10cf7d7f5e02..a98d99af552c 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -1308,8 +1308,19 @@ PPCTargetLowering::PPCTargetLowering(const 
PPCTargetMachine &TM,
   setLibcallName(RTLIB::POW_F128, "powf128");
   setLibcallName(RTLIB::FMIN_F128, "fminf128");
   setLibcallName(RTLIB::FMAX_F128, "fmaxf128");
-  setLibcallName(RTLIB::POWI_F128, "__powikf2");
   setLibcallName(RTLIB::REM_F128, "fmodf128");
+  setLibcallName(RTLIB::SQRT_F128, "sqrtf128");
+  setLibcallName(RTLIB::CEIL_F128, "ceilf128");
+  setLibcallName(RTLIB::FLOOR_F128, "floorf128");
+  setLibcallName(RTLIB::TRUNC_F128, "truncf128");
+  setLibcallName(RTLIB::ROUND_F128, "roundf128");
+  setLibcallName(RTLIB::LROUND_F128, "lroundf128");
+  setLibcallName(RTLIB::LLROUND_F128, "llroundf128");
+  setLibcallName(RTLIB::RINT_F128, "rintf128");
+  setLibcallName(RTLIB::LRINT_F128, "lrintf128");
+  setLibcallName(RTLIB::LLRINT_F128, "llrintf128");
+  setLibcallName(RTLIB::NEARBYINT_F128, "nearbyintf128");
+  setLibcallName(RTLIB::FMA_F128, "fmaf128");
 
   // With 32 condition bits, we don't need to sink (and duplicate) compares
   // aggressively in CodeGenPrep.

diff  --git a/llvm/test/CodeGen/PowerPC/f128-arith.ll 
b/llvm/test/CodeGen/PowerPC/f128-arith.ll
index 61bd03aa6368..587cf32a70e6 100644
--- a/llvm/test/CodeGen/PowerPC/f128-arith.ll
+++ b/llvm/test/CodeGen/PowerPC/f128-arith.ll
@@ -195,7 +195,7 @@ define void @qpSqrt(fp128* nocapture readonly %a, fp128* 
nocapture %res) {
 ; CHECK-P8-NEXT:stdu r1, -48(r1)
 ; CHECK-P8-NEXT:lvx v2, 0, r3
 ; CHECK-P8-NEXT:mr r30, r4
-; CHECK-P8-NEXT:bl sqrtl
+; CHECK-P8-NEXT:bl sqrtf128
 ; CHECK-P8-NEXT:nop
 ; CHECK-P8-NEXT:stvx v2, 0, r30
 ; CHECK-P8-NEXT:addi r1, r1, 48
@@ -840,7 +840,7 @@ define void @qpCeil(fp128* nocapture readonly %a, fp128* 
nocapture %res) {
 ; CHECK-P8-NEXT:stdu r1, -48(r1)
 ; CHECK-P8-NEXT:lvx v2, 0, r3
 ; CHECK-P8-NEXT:mr r30, r4
-; CHECK-P8-NEXT:bl c