r314145 - Reinstall the patch "Use EmitPointerWithAlignment to get alignment information of the pointer used in atomic expr" after fixing PR31620.

2017-09-25 Thread Wei Mi via cfe-commits
Author: wmi
Date: Mon Sep 25 12:57:59 2017
New Revision: 314145

URL: http://llvm.org/viewvc/llvm-project?rev=314145&view=rev
Log:
Reinstall the patch "Use EmitPointerWithAlignment to get alignment information 
of the pointer used in atomic expr" after fixing PR31620.

This is to fix PR34347. EmitAtomicExpr now only uses alignment information from
Type, instead of Decl, so when the declaration of an atomic variable is marked
to have the alignment equal as its size, EmitAtomicExpr doesn't know about it 
and
will generate libcall instead of atomic op. The patch uses 
EmitPointerWithAlignment
to get the precise alignment information.

Differential Revision: https://reviews.llvm.org/D37310

Added:
cfe/trunk/test/CodeGenCXX/atomic-align.cpp
Modified:
cfe/trunk/lib/CodeGen/CGAtomic.cpp

Modified: cfe/trunk/lib/CodeGen/CGAtomic.cpp
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGAtomic.cpp?rev=314145&r1=314144&r2=314145&view=diff
==
--- cfe/trunk/lib/CodeGen/CGAtomic.cpp (original)
+++ cfe/trunk/lib/CodeGen/CGAtomic.cpp Mon Sep 25 12:57:59 2017
@@ -745,19 +745,19 @@ RValue CodeGenFunction::EmitAtomicExpr(A
   QualType MemTy = AtomicTy;
   if (const AtomicType *AT = AtomicTy->getAs())
 MemTy = AT->getValueType();
-  CharUnits sizeChars, alignChars;
-  std::tie(sizeChars, alignChars) = getContext().getTypeInfoInChars(AtomicTy);
-  uint64_t Size = sizeChars.getQuantity();
-  unsigned MaxInlineWidthInBits = getTarget().getMaxAtomicInlineWidth();
-  bool UseLibcall = (sizeChars != alignChars ||
- getContext().toBits(sizeChars) > MaxInlineWidthInBits);
-
   llvm::Value *IsWeak = nullptr, *OrderFail = nullptr;
 
   Address Val1 = Address::invalid();
   Address Val2 = Address::invalid();
   Address Dest = Address::invalid();
-  Address Ptr(EmitScalarExpr(E->getPtr()), alignChars);
+  Address Ptr = EmitPointerWithAlignment(E->getPtr());
+
+  CharUnits sizeChars, alignChars;
+  std::tie(sizeChars, alignChars) = getContext().getTypeInfoInChars(AtomicTy);
+  uint64_t Size = sizeChars.getQuantity();
+  unsigned MaxInlineWidthInBits = getTarget().getMaxAtomicInlineWidth();
+  bool UseLibcall = ((Ptr.getAlignment() % sizeChars) != 0 ||
+ getContext().toBits(sizeChars) > MaxInlineWidthInBits);
 
   if (E->getOp() == AtomicExpr::AO__c11_atomic_init ||
   E->getOp() == AtomicExpr::AO__opencl_atomic_init) {

Added: cfe/trunk/test/CodeGenCXX/atomic-align.cpp
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGenCXX/atomic-align.cpp?rev=314145&view=auto
==
--- cfe/trunk/test/CodeGenCXX/atomic-align.cpp (added)
+++ cfe/trunk/test/CodeGenCXX/atomic-align.cpp Mon Sep 25 12:57:59 2017
@@ -0,0 +1,30 @@
+// RUN: %clang_cc1 %s -std=c++11 -emit-llvm -o - -triple=x86_64-linux-gnu | 
FileCheck %s
+
+struct AM {
+  int f1, f2;
+};
+alignas(8) AM m;
+AM load1() {
+  AM am;
+  // m is declared to align to 8bytes, so generate load atomic instead
+  // of libcall.
+  // CHECK-LABEL: @_Z5load1v
+  // CHECK: load atomic {{.*}} monotonic
+  __atomic_load(&m, &am, 0);
+  return am;
+}
+
+struct BM {
+  int f1;
+  alignas(8) AM f2;
+};
+BM bm;
+AM load2() {
+  AM am;
+  // BM::f2 is declared to align to 8bytes, so generate load atomic instead
+  // of libcall.
+  // CHECK-LABEL: @_Z5load2v
+  // CHECK: load atomic {{.*}} monotonic
+  __atomic_load(&bm.f2, &am, 0);
+  return am;
+}


___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


r315915 - [Bitfield] Add an option to access bitfield in a fine-grained manner.

2017-10-16 Thread Wei Mi via cfe-commits
Author: wmi
Date: Mon Oct 16 09:50:27 2017
New Revision: 315915

URL: http://llvm.org/viewvc/llvm-project?rev=315915&view=rev
Log:
[Bitfield] Add an option to access bitfield in a fine-grained manner.

Currently all the consecutive bitfields are wrapped as a large integer unless 
there is unamed zero sized bitfield in between. The patch provides an 
alternative manner which makes the bitfield to be accessed as separate memory 
location if it has legal integer width and is naturally aligned. Such separate 
bitfield may split the original consecutive bitfields into subgroups of 
consecutive bitfields, and each subgroup will be wrapped as an integer. Now 
This is all controlled by an option -ffine-grained-bitfield-accesses. The 
alternative of bitfield access manner can improve the access efficiency of 
those bitfields with legal width and being aligned, but may reduce the chance 
of load/store combining of other bitfields, so it depends on how the bitfields 
are defined and actually accessed to choose when to use the option. For now the 
option is off by default.

Differential revision: https://reviews.llvm.org/D36562

Added:
cfe/trunk/test/CodeGenCXX/finegrain-bitfield-access.cpp
Modified:
cfe/trunk/include/clang/Basic/DiagnosticDriverKinds.td
cfe/trunk/include/clang/Driver/Options.td
cfe/trunk/include/clang/Frontend/CodeGenOptions.def
cfe/trunk/lib/CodeGen/CGRecordLayoutBuilder.cpp
cfe/trunk/lib/Driver/ToolChains/Clang.cpp
cfe/trunk/lib/Frontend/CompilerInvocation.cpp

Modified: cfe/trunk/include/clang/Basic/DiagnosticDriverKinds.td
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Basic/DiagnosticDriverKinds.td?rev=315915&r1=315914&r2=315915&view=diff
==
--- cfe/trunk/include/clang/Basic/DiagnosticDriverKinds.td (original)
+++ cfe/trunk/include/clang/Basic/DiagnosticDriverKinds.td Mon Oct 16 09:50:27 
2017
@@ -330,4 +330,8 @@ def warn_drv_msvc_not_found : Warning<
   "unable to find a Visual Studio installation; "
   "try running Clang from a developer command prompt">,
   InGroup>;
+
+def warn_drv_fine_grained_bitfield_accesses_ignored : Warning<
+  "option '-ffine-grained-bitfield-accesses' cannot be enabled together with a 
sanitizer; flag ignored">,
+  InGroup;
 }

Modified: cfe/trunk/include/clang/Driver/Options.td
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Driver/Options.td?rev=315915&r1=315914&r2=315915&view=diff
==
--- cfe/trunk/include/clang/Driver/Options.td (original)
+++ cfe/trunk/include/clang/Driver/Options.td Mon Oct 16 09:50:27 2017
@@ -1045,6 +1045,13 @@ def fxray_never_instrument :
   Group, Flags<[CC1Option]>,
   HelpText<"Filename defining the whitelist for imbuing the 'never instrument' 
XRay attribute.">;
 
+def ffine_grained_bitfield_accesses : Flag<["-"],
+  "ffine-grained-bitfield-accesses">, Group, Flags<[CC1Option]>,
+  HelpText<"Use separate accesses for bitfields with legal widths and 
alignments.">;
+def fno_fine_grained_bitfield_accesses : Flag<["-"],
+  "fno-fine-grained-bitfield-accesses">, Group, 
Flags<[CC1Option]>,
+  HelpText<"Use large-integer access for consecutive bitfield runs.">;
+
 def flat__namespace : Flag<["-"], "flat_namespace">;
 def flax_vector_conversions : Flag<["-"], "flax-vector-conversions">, 
Group;
 def flimited_precision_EQ : Joined<["-"], "flimited-precision=">, 
Group;

Modified: cfe/trunk/include/clang/Frontend/CodeGenOptions.def
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Frontend/CodeGenOptions.def?rev=315915&r1=315914&r2=315915&view=diff
==
--- cfe/trunk/include/clang/Frontend/CodeGenOptions.def (original)
+++ cfe/trunk/include/clang/Frontend/CodeGenOptions.def Mon Oct 16 09:50:27 2017
@@ -179,6 +179,7 @@ CODEGENOPT(SanitizeCoverageStackDepth, 1
 CODEGENOPT(SanitizeStats , 1, 0) ///< Collect statistics for sanitizers.
 CODEGENOPT(SimplifyLibCalls  , 1, 1) ///< Set when -fbuiltin is enabled.
 CODEGENOPT(SoftFloat , 1, 0) ///< -soft-float.
+CODEGENOPT(FineGrainedBitfieldAccesses, 1, 0) ///< Enable fine-grained 
bitfield accesses.
 CODEGENOPT(StrictEnums   , 1, 0) ///< Optimize based on strict enum 
definition.
 CODEGENOPT(StrictVTablePointers, 1, 0) ///< Optimize based on the strict 
vtable pointers
 CODEGENOPT(TimePasses, 1, 0) ///< Set when -ftime-report is enabled.

Modified: cfe/trunk/lib/CodeGen/CGRecordLayoutBuilder.cpp
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGRecordLayoutBuilder.cpp?rev=315915&r1=315914&r2=315915&view=diff
==
--- cfe/trunk/lib/CodeGen/CGRecordLayoutBuilder.cpp (original)
+++ cfe/trunk/lib/CodeGen/CGRecordLayoutBuilder.cpp Mon Oct 16 09:50:27 2017
@@ -403,6 +403,27 @@ CGRecordLowering::acc

Re: [PATCH] D36562: [Bitfield] Make the bitfield a separate location if it has width of legal integer type and its bit offset is naturally aligned for the type

2017-09-03 Thread Wei Mi via cfe-commits
On Sat, Sep 2, 2017 at 6:04 PM, Hal Finkel  wrote:
>
> On 08/22/2017 10:56 PM, Wei Mi via llvm-commits wrote:
>>
>> On Tue, Aug 22, 2017 at 7:03 PM, Xinliang David Li 
>> wrote:
>>>
>>>
>>> On Tue, Aug 22, 2017 at 6:37 PM, Chandler Carruth via Phabricator
>>>  wrote:

 chandlerc added a comment.

 I'm really not a fan of the degree of complexity and subtlety that this
 introduces into the frontend, all to allow particular backend
 optimizations.

 I feel like this is Clang working around a fundamental deficiency in
 LLVM
 and we should instead find a way to fix this in LLVM itself.

 As has been pointed out before, user code can synthesize large integers
 that small bit sequences are extracted from, and Clang and LLVM should
 handle those just as well as actual bitfields.

 Can we see how far we can push the LLVM side before we add complexity to
 Clang here? I understand that there remain challenges to LLVM's stuff,
 but I
 don't think those challenges make *all* of the LLVM improvements off the
 table, I don't think we've exhausted all ways of improving the LLVM
 changes
 being proposed, and I think we should still land all of those and
 re-evaluate how important these issues are when all of that is in place.
>>>
>>>
>>> The main challenge of doing  this in LLVM is that inter-procedural
>>> analysis
>>> (and possibly cross module) is needed (for store forwarding issues).
>>>
>>> Wei, perhaps you can provide concrete test case to illustrate the issue
>>> so
>>> that reviewers have a good understanding.
>>>
>>> David
>>
>> Here is a runable testcase:
>>  1.cc 
>> class A {
>> public:
>>unsigned long f1:2;
>>unsigned long f2:6;
>>unsigned long f3:8;
>>unsigned long f4:4;
>> };
>> A a;
>> unsigned long b;
>> unsigned long N = 10;
>>
>> __attribute__((noinline))
>> void foo() {
>>a.f3 = 3;
>> }
>>
>> __attribute__((noinline))
>> void goo() {
>>b = a.f3;
>> }
>>
>> int main() {
>>unsigned long i;
>>for (i = 0; i < N; i++) {
>>  foo();
>>  goo();
>>}
>> }
>> 
>> Now trunk takes about twice running time compared with trunk + this
>> patch. That is because trunk shrinks the store of a.f3 in foo (Done by
>> DagCombiner) but not shrink the load of a.f3 in goo, so store
>> forwarding will be blocked.
>
>
> I can confirm that this is true on Haswell and also on an POWER8.
> Interestingly, on a POWER7, the performance is the same either way (on the
> good side). I ran the test case as presented and where I replaced f3 with a
> non-bitfield unsigned char member. Thinking that the POWER7 result might be
> because it's big-Endian, I flipped the order of the fields, and found that
> the version where f3 is not a bitfield is faster than otherwise, but only by
> 12.5%.
>
> Why, in this case, don't we shrink the load? It seems like we should (and it
> seems like a straightforward case).
>
> Thanks again,
> Hal
>

Hal, thanks for trying the test.

Yes, it is straightforward to shrink the load in the test. I can
change the testcase a little to show why it is sometime difficult to
shrink the load:

class A {
public:
  unsigned long f1:16;
  unsigned long f2:16;
  unsigned long f3:16;
  unsigned long f4:8;
};
A a;
bool b;
unsigned long N = 10;

__attribute__((noinline))
void foo() {
  a.f4 = 3;
}

__attribute__((noinline))
void goo() {
  b = (a.f4 == 0 && a.f3 == (unsigned short)-1);
}

int main() {
  unsigned long i;
  for (i = 0; i < N; i++) {
foo();
goo();
  }
}

For the load a.f4 in goo, it is diffcult to motivate its shrink after
instcombine because the comparison with a.f3 and the comparison with
a.f4 are merged:

define void @_Z3goov() local_unnamed_addr #0 {
  %1 = load i64, i64* bitcast (%class.A* @a to i64*), align 8
  %2 = and i64 %1, 0xff
  %3 = icmp eq i64 %2, 0x
  %4 = zext i1 %3 to i8
  store i8 %4, i8* @b, align 1, !tbaa !2
  ret void
}

Thanks,
Wei.

>>
>> The testcases shows the potential problem of store shrinking. Before
>> we decide to do store shrinking, we need to know all the related loads
>> will be shrunk,  and that requires IPA analysis. Otherwise, when load
>> shrinking was blocked for some difficult case (Like the instcombine
>> case described in
>> https://www.mail-archive.com/cfe-commits@lists.llvm.org/msg65085.html),
>> performance regression will happen.
>>
>> Wei.
>>
>>


 Repository:
rL LLVM

 https://reviews.llvm.org/D36562



>> ___
>> llvm-commits mailing list
>> llvm-comm...@lists.llvm.org
>> http://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-commits
>
>
> --
> Hal Finkel
> Lead, Compiler Technology and Programming Languages
> Leadership Computing Facility
> Argonne National Laboratory
>
__

Re: [PATCH] D36562: [Bitfield] Make the bitfield a separate location if it has width of legal integer type and its bit offset is naturally aligned for the type

2017-09-03 Thread Wei Mi via cfe-commits
On Sun, Sep 3, 2017 at 8:55 PM, Hal Finkel  wrote:
>
> On 09/03/2017 10:38 PM, Xinliang David Li wrote:
>
> Store forwarding stall cost is usually much higher compared with a load
> hitting L1 cache. For instance, on Haswell,  the latter is ~4 cycles, while
> the store forwarding stalls cost about 10 cycles more than a successful
> store forwarding, which is roughly 15 cycles. In some scenarios, the store
> forwarding stalls can be as high as 50 cycles. See Agner's documentation.
>
>
> I understand. As I understand it, there are two potential ways to fix this
> problem:
>
>  1. You can make the store wider (to match the size of the wide load, thus
> permitting forwarding).
>  2. You can make the load smaller (to match the size of the small store,
> thus permitting forwarding).
>
> At least in this benchmark, which is a better solution?
>
> Thanks again,
> Hal
>

For this benchmark, smaller load is better. On my sandybridge desktop,
wider store is 3.77s, smaller load is 3.45s. If store forwarding is
blocked, it costs 6.9s.

However, we don't have good way to narrow the load matching the store
shrinking because the field information has been lost.  For the IR
below:

define void @_Z3goov() local_unnamed_addr #0 {
  %1 = load i64, i64* bitcast (%class.A* @a to i64*), align 8
  %2 = and i64 %1, 0xff
  %3 = icmp eq i64 %2, 0x
  %4 = zext i1 %3 to i8
  store i8 %4, i8* @b, align 1, !tbaa !2
  ret void
}

We know the 24bits range from bit 32 to bit 56 of @a are accessed, but
we don't know whether the 24bits ranges contain 8bits + 16bits
bitfields, or 16bits + 8bits bitfields, or 8bit + 8bit + 8bit
bitfields. Once the load shrinking done locally is inconsistent with
store shrinking, we will have store forwarding issue and will suffer
from huge regression.

Thanks,
Wei.



>
>
> In other words, the optimizer needs to be taught to avoid defeating  the HW
> pipeline feature as much as possible.
>
> David
>
> On Sun, Sep 3, 2017 at 6:32 PM, Hal Finkel  wrote:
>>
>>
>> On 09/03/2017 03:44 PM, Wei Mi wrote:
>>>
>>> On Sat, Sep 2, 2017 at 6:04 PM, Hal Finkel  wrote:

 On 08/22/2017 10:56 PM, Wei Mi via llvm-commits wrote:
>
> On Tue, Aug 22, 2017 at 7:03 PM, Xinliang David Li 
> wrote:
>>
>>
>> On Tue, Aug 22, 2017 at 6:37 PM, Chandler Carruth via Phabricator
>>  wrote:
>>>
>>> chandlerc added a comment.
>>>
>>> I'm really not a fan of the degree of complexity and subtlety that
>>> this
>>> introduces into the frontend, all to allow particular backend
>>> optimizations.
>>>
>>> I feel like this is Clang working around a fundamental deficiency in
>>> LLVM
>>> and we should instead find a way to fix this in LLVM itself.
>>>
>>> As has been pointed out before, user code can synthesize large
>>> integers
>>> that small bit sequences are extracted from, and Clang and LLVM
>>> should
>>> handle those just as well as actual bitfields.
>>>
>>> Can we see how far we can push the LLVM side before we add complexity
>>> to
>>> Clang here? I understand that there remain challenges to LLVM's
>>> stuff,
>>> but I
>>> don't think those challenges make *all* of the LLVM improvements off
>>> the
>>> table, I don't think we've exhausted all ways of improving the LLVM
>>> changes
>>> being proposed, and I think we should still land all of those and
>>> re-evaluate how important these issues are when all of that is in
>>> place.
>>
>>
>> The main challenge of doing  this in LLVM is that inter-procedural
>> analysis
>> (and possibly cross module) is needed (for store forwarding issues).
>>
>> Wei, perhaps you can provide concrete test case to illustrate the
>> issue
>> so
>> that reviewers have a good understanding.
>>
>> David
>
> Here is a runable testcase:
>  1.cc 
> class A {
> public:
> unsigned long f1:2;
> unsigned long f2:6;
> unsigned long f3:8;
> unsigned long f4:4;
> };
> A a;
> unsigned long b;
> unsigned long N = 10;
>
> __attribute__((noinline))
> void foo() {
> a.f3 = 3;
> }
>
> __attribute__((noinline))
> void goo() {
> b = a.f3;
> }
>
> int main() {
> unsigned long i;
> for (i = 0; i < N; i++) {
>   foo();
>   goo();
> }
> }
> 
> Now trunk takes about twice running time compared with trunk + this
> patch. That is because trunk shrinks the store of a.f3 in foo (Done by
> DagCombiner) but not shrink the load of a.f3 in goo, so store
> forwarding will be blocked.


 I can confirm that this is true on Haswell and also on an POWER8.
 Interestingly, on a POWER7, the performance is the same either w

r312801 - Use EmitPointerWithAlignment to get alignment information of the pointer used in atomic expr.

2017-09-08 Thread Wei Mi via cfe-commits
Author: wmi
Date: Fri Sep  8 10:07:32 2017
New Revision: 312801

URL: http://llvm.org/viewvc/llvm-project?rev=312801&view=rev
Log:
Use EmitPointerWithAlignment to get alignment information of the pointer used 
in atomic expr.

This is to fix PR34347. EmitAtomicExpr now only uses alignment information from
Type, instead of Decl, so when the declaration of an atomic variable is marked
to have the alignment equal as its size, EmitAtomicExpr doesn't know about it 
and
will generate libcall instead of atomic op. The patch uses 
EmitPointerWithAlignment
to get the precise alignment information.

Differential Revision: https://reviews.llvm.org/D37310

Added:
cfe/trunk/test/CodeGenCXX/atomic-align.cpp
Modified:
cfe/trunk/lib/CodeGen/CGAtomic.cpp

Modified: cfe/trunk/lib/CodeGen/CGAtomic.cpp
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGAtomic.cpp?rev=312801&r1=312800&r2=312801&view=diff
==
--- cfe/trunk/lib/CodeGen/CGAtomic.cpp (original)
+++ cfe/trunk/lib/CodeGen/CGAtomic.cpp Fri Sep  8 10:07:32 2017
@@ -745,19 +745,19 @@ RValue CodeGenFunction::EmitAtomicExpr(A
   QualType MemTy = AtomicTy;
   if (const AtomicType *AT = AtomicTy->getAs())
 MemTy = AT->getValueType();
-  CharUnits sizeChars, alignChars;
-  std::tie(sizeChars, alignChars) = getContext().getTypeInfoInChars(AtomicTy);
-  uint64_t Size = sizeChars.getQuantity();
-  unsigned MaxInlineWidthInBits = getTarget().getMaxAtomicInlineWidth();
-  bool UseLibcall = (sizeChars != alignChars ||
- getContext().toBits(sizeChars) > MaxInlineWidthInBits);
-
   llvm::Value *IsWeak = nullptr, *OrderFail = nullptr;
 
   Address Val1 = Address::invalid();
   Address Val2 = Address::invalid();
   Address Dest = Address::invalid();
-  Address Ptr(EmitScalarExpr(E->getPtr()), alignChars);
+  Address Ptr = EmitPointerWithAlignment(E->getPtr());
+
+  CharUnits sizeChars, alignChars;
+  std::tie(sizeChars, alignChars) = getContext().getTypeInfoInChars(AtomicTy);
+  uint64_t Size = sizeChars.getQuantity();
+  unsigned MaxInlineWidthInBits = getTarget().getMaxAtomicInlineWidth();
+  bool UseLibcall = (sizeChars != Ptr.getAlignment() ||
+ getContext().toBits(sizeChars) > MaxInlineWidthInBits);
 
   if (E->getOp() == AtomicExpr::AO__c11_atomic_init ||
   E->getOp() == AtomicExpr::AO__opencl_atomic_init) {

Added: cfe/trunk/test/CodeGenCXX/atomic-align.cpp
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGenCXX/atomic-align.cpp?rev=312801&view=auto
==
--- cfe/trunk/test/CodeGenCXX/atomic-align.cpp (added)
+++ cfe/trunk/test/CodeGenCXX/atomic-align.cpp Fri Sep  8 10:07:32 2017
@@ -0,0 +1,30 @@
+// RUN: %clang_cc1 %s -std=c++11 -emit-llvm -o - -triple=x86_64-linux-gnu | 
FileCheck %s
+
+struct AM {
+  int f1, f2;
+};
+alignas(8) AM m;
+AM load1() {
+  AM am;
+  // m is declared to align to 8bytes, so generate load atomic instead
+  // of libcall.
+  // CHECK-LABEL: @_Z5load1v
+  // CHECK: load atomic {{.*}} monotonic
+  __atomic_load(&m, &am, 0);
+  return am;
+}
+
+struct BM {
+  int f1;
+  alignas(8) AM f2;
+};
+BM bm;
+AM load2() {
+  AM am;
+  // BM::f2 is declared to align to 8bytes, so generate load atomic instead
+  // of libcall.
+  // CHECK-LABEL: @_Z5load2v
+  // CHECK: load atomic {{.*}} monotonic
+  __atomic_load(&bm.f2, &am, 0);
+  return am;
+}


___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


r312805 - Revert rL312801 since it generated some calls from libatomic and broke some tests.

2017-09-08 Thread Wei Mi via cfe-commits
Author: wmi
Date: Fri Sep  8 11:10:13 2017
New Revision: 312805

URL: http://llvm.org/viewvc/llvm-project?rev=312805&view=rev
Log:
Revert rL312801 since it generated some calls from libatomic and broke some 
tests.

Modified:
cfe/trunk/lib/CodeGen/CGAtomic.cpp
cfe/trunk/test/CodeGenCXX/atomic-align.cpp

Modified: cfe/trunk/lib/CodeGen/CGAtomic.cpp
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGAtomic.cpp?rev=312805&r1=312804&r2=312805&view=diff
==
--- cfe/trunk/lib/CodeGen/CGAtomic.cpp (original)
+++ cfe/trunk/lib/CodeGen/CGAtomic.cpp Fri Sep  8 11:10:13 2017
@@ -745,20 +745,20 @@ RValue CodeGenFunction::EmitAtomicExpr(A
   QualType MemTy = AtomicTy;
   if (const AtomicType *AT = AtomicTy->getAs())
 MemTy = AT->getValueType();
-  llvm::Value *IsWeak = nullptr, *OrderFail = nullptr;
-
-  Address Val1 = Address::invalid();
-  Address Val2 = Address::invalid();
-  Address Dest = Address::invalid();
-  Address Ptr = EmitPointerWithAlignment(E->getPtr());
-
   CharUnits sizeChars, alignChars;
   std::tie(sizeChars, alignChars) = getContext().getTypeInfoInChars(AtomicTy);
   uint64_t Size = sizeChars.getQuantity();
   unsigned MaxInlineWidthInBits = getTarget().getMaxAtomicInlineWidth();
-  bool UseLibcall = (sizeChars != Ptr.getAlignment() ||
+  bool UseLibcall = (sizeChars != alignChars ||
  getContext().toBits(sizeChars) > MaxInlineWidthInBits);
 
+  llvm::Value *IsWeak = nullptr, *OrderFail = nullptr;
+
+  Address Val1 = Address::invalid();
+  Address Val2 = Address::invalid();
+  Address Dest = Address::invalid();
+  Address Ptr(EmitScalarExpr(E->getPtr()), alignChars);
+
   if (E->getOp() == AtomicExpr::AO__c11_atomic_init ||
   E->getOp() == AtomicExpr::AO__opencl_atomic_init) {
 LValue lvalue = MakeAddrLValue(Ptr, AtomicTy);

Modified: cfe/trunk/test/CodeGenCXX/atomic-align.cpp
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGenCXX/atomic-align.cpp?rev=312805&r1=312804&r2=312805&view=diff
==
--- cfe/trunk/test/CodeGenCXX/atomic-align.cpp (original)
+++ cfe/trunk/test/CodeGenCXX/atomic-align.cpp Fri Sep  8 11:10:13 2017
@@ -1,30 +0,0 @@
-// RUN: %clang_cc1 %s -std=c++11 -emit-llvm -o - -triple=x86_64-linux-gnu | 
FileCheck %s
-
-struct AM {
-  int f1, f2;
-};
-alignas(8) AM m;
-AM load1() {
-  AM am;
-  // m is declared to align to 8bytes, so generate load atomic instead
-  // of libcall.
-  // CHECK-LABEL: @_Z5load1v
-  // CHECK: load atomic {{.*}} monotonic
-  __atomic_load(&m, &am, 0);
-  return am;
-}
-
-struct BM {
-  int f1;
-  alignas(8) AM f2;
-};
-BM bm;
-AM load2() {
-  AM am;
-  // BM::f2 is declared to align to 8bytes, so generate load atomic instead
-  // of libcall.
-  // CHECK-LABEL: @_Z5load2v
-  // CHECK: load atomic {{.*}} monotonic
-  __atomic_load(&bm.f2, &am, 0);
-  return am;
-}


___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


r312810 - Delete empty file test/CodeGenCXX/atomic-align.cpp after the revert at rL312805.

2017-09-08 Thread Wei Mi via cfe-commits
Author: wmi
Date: Fri Sep  8 11:31:21 2017
New Revision: 312810

URL: http://llvm.org/viewvc/llvm-project?rev=312810&view=rev
Log:
Delete empty file test/CodeGenCXX/atomic-align.cpp after the revert at rL312805.

Removed:
cfe/trunk/test/CodeGenCXX/atomic-align.cpp

Removed: cfe/trunk/test/CodeGenCXX/atomic-align.cpp
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGenCXX/atomic-align.cpp?rev=312809&view=auto
==
(empty)


___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


r312830 - Reinstall the patch "Use EmitPointerWithAlignment to get alignment information of the pointer used in atomic expr".

2017-09-08 Thread Wei Mi via cfe-commits
Author: wmi
Date: Fri Sep  8 14:58:18 2017
New Revision: 312830

URL: http://llvm.org/viewvc/llvm-project?rev=312830&view=rev
Log:
Reinstall the patch "Use EmitPointerWithAlignment to get alignment information 
of the pointer used in atomic expr".

This is to fix PR34347. EmitAtomicExpr now only uses alignment information from
Type, instead of Decl, so when the declaration of an atomic variable is marked
to have the alignment equal as its size, EmitAtomicExpr doesn't know about it 
and
will generate libcall instead of atomic op. The patch uses 
EmitPointerWithAlignment
to get the precise alignment information.

Differential Revision: https://reviews.llvm.org/D37310

Added:
cfe/trunk/test/CodeGenCXX/atomic-align.cpp
Modified:
cfe/trunk/lib/CodeGen/CGAtomic.cpp

Modified: cfe/trunk/lib/CodeGen/CGAtomic.cpp
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGAtomic.cpp?rev=312830&r1=312829&r2=312830&view=diff
==
--- cfe/trunk/lib/CodeGen/CGAtomic.cpp (original)
+++ cfe/trunk/lib/CodeGen/CGAtomic.cpp Fri Sep  8 14:58:18 2017
@@ -745,19 +745,19 @@ RValue CodeGenFunction::EmitAtomicExpr(A
   QualType MemTy = AtomicTy;
   if (const AtomicType *AT = AtomicTy->getAs())
 MemTy = AT->getValueType();
-  CharUnits sizeChars, alignChars;
-  std::tie(sizeChars, alignChars) = getContext().getTypeInfoInChars(AtomicTy);
-  uint64_t Size = sizeChars.getQuantity();
-  unsigned MaxInlineWidthInBits = getTarget().getMaxAtomicInlineWidth();
-  bool UseLibcall = (sizeChars != alignChars ||
- getContext().toBits(sizeChars) > MaxInlineWidthInBits);
-
   llvm::Value *IsWeak = nullptr, *OrderFail = nullptr;
 
   Address Val1 = Address::invalid();
   Address Val2 = Address::invalid();
   Address Dest = Address::invalid();
-  Address Ptr(EmitScalarExpr(E->getPtr()), alignChars);
+  Address Ptr = EmitPointerWithAlignment(E->getPtr());
+
+  CharUnits sizeChars, alignChars;
+  std::tie(sizeChars, alignChars) = getContext().getTypeInfoInChars(AtomicTy);
+  uint64_t Size = sizeChars.getQuantity();
+  unsigned MaxInlineWidthInBits = getTarget().getMaxAtomicInlineWidth();
+  bool UseLibcall = ((Ptr.getAlignment() % sizeChars) != 0 ||
+ getContext().toBits(sizeChars) > MaxInlineWidthInBits);
 
   if (E->getOp() == AtomicExpr::AO__c11_atomic_init ||
   E->getOp() == AtomicExpr::AO__opencl_atomic_init) {

Added: cfe/trunk/test/CodeGenCXX/atomic-align.cpp
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGenCXX/atomic-align.cpp?rev=312830&view=auto
==
--- cfe/trunk/test/CodeGenCXX/atomic-align.cpp (added)
+++ cfe/trunk/test/CodeGenCXX/atomic-align.cpp Fri Sep  8 14:58:18 2017
@@ -0,0 +1,30 @@
+// RUN: %clang_cc1 %s -std=c++11 -emit-llvm -o - -triple=x86_64-linux-gnu | 
FileCheck %s
+
+struct AM {
+  int f1, f2;
+};
+alignas(8) AM m;
+AM load1() {
+  AM am;
+  // m is declared to align to 8bytes, so generate load atomic instead
+  // of libcall.
+  // CHECK-LABEL: @_Z5load1v
+  // CHECK: load atomic {{.*}} monotonic
+  __atomic_load(&m, &am, 0);
+  return am;
+}
+
+struct BM {
+  int f1;
+  alignas(8) AM f2;
+};
+BM bm;
+AM load2() {
+  AM am;
+  // BM::f2 is declared to align to 8bytes, so generate load atomic instead
+  // of libcall.
+  // CHECK-LABEL: @_Z5load2v
+  // CHECK: load atomic {{.*}} monotonic
+  __atomic_load(&bm.f2, &am, 0);
+  return am;
+}


___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


r313992 - [Atomic][X8664] set max atomic inline width according to the target

2017-09-22 Thread Wei Mi via cfe-commits
Author: wmi
Date: Fri Sep 22 09:30:00 2017
New Revision: 313992

URL: http://llvm.org/viewvc/llvm-project?rev=313992&view=rev
Log:
[Atomic][X8664] set max atomic inline width according to the target

This is to fix PR31620. MaxAtomicInlineWidth is set to 128 for x86_64. However
for target without cx16 support, 128 atomic operation will generate __sync_*
libcalls. The patch set MaxAtomicInlineWidth to 64 if the target doesn't support
cx16.

Differential Revision: https://reviews.llvm.org/D38046

Added:
cfe/trunk/test/CodeGenCXX/atomic-inline.cpp
Modified:
cfe/trunk/include/clang/Basic/TargetInfo.h
cfe/trunk/lib/Basic/Targets.cpp
cfe/trunk/lib/Basic/Targets/X86.h
cfe/trunk/test/OpenMP/atomic_capture_codegen.cpp
cfe/trunk/test/OpenMP/atomic_read_codegen.c
cfe/trunk/test/OpenMP/atomic_update_codegen.cpp
cfe/trunk/test/OpenMP/atomic_write_codegen.c

Modified: cfe/trunk/include/clang/Basic/TargetInfo.h
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Basic/TargetInfo.h?rev=313992&r1=313991&r2=313992&view=diff
==
--- cfe/trunk/include/clang/Basic/TargetInfo.h (original)
+++ cfe/trunk/include/clang/Basic/TargetInfo.h Fri Sep 22 09:30:00 2017
@@ -448,6 +448,9 @@ public:
   /// \brief Return the maximum width lock-free atomic operation which can be
   /// inlined given the supported features of the given target.
   unsigned getMaxAtomicInlineWidth() const { return MaxAtomicInlineWidth; }
+  /// \brief Set the maximum inline or promote width lock-free atomic operation
+  /// for the given target.
+  virtual void setMaxAtomicWidth() {}
   /// \brief Returns true if the given target supports lock-free atomic
   /// operations at the specified width and alignment.
   virtual bool hasBuiltinAtomic(uint64_t AtomicSizeInBits,

Modified: cfe/trunk/lib/Basic/Targets.cpp
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Basic/Targets.cpp?rev=313992&r1=313991&r2=313992&view=diff
==
--- cfe/trunk/lib/Basic/Targets.cpp (original)
+++ cfe/trunk/lib/Basic/Targets.cpp Fri Sep 22 09:30:00 2017
@@ -620,6 +620,7 @@ TargetInfo::CreateTargetInfo(Diagnostics
 
   Target->setSupportedOpenCLOpts();
   Target->setOpenCLExtensionOpts();
+  Target->setMaxAtomicWidth();
 
   if (!Target->validateTarget(Diags))
 return nullptr;

Modified: cfe/trunk/lib/Basic/Targets/X86.h
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Basic/Targets/X86.h?rev=313992&r1=313991&r2=313992&view=diff
==
--- cfe/trunk/lib/Basic/Targets/X86.h (original)
+++ cfe/trunk/lib/Basic/Targets/X86.h Fri Sep 22 09:30:00 2017
@@ -814,7 +814,7 @@ public:
 
 // x86-64 has atomics up to 16 bytes.
 MaxAtomicPromoteWidth = 128;
-MaxAtomicInlineWidth = 128;
+MaxAtomicInlineWidth = 64;
   }
 
   BuiltinVaListKind getBuiltinVaListKind() const override {
@@ -872,6 +872,12 @@ public:
  HasSizeMismatch);
   }
 
+  void setMaxAtomicWidth() override {
+if (hasFeature("cx16"))
+  MaxAtomicInlineWidth = 128;
+return;
+  }
+
   ArrayRef getTargetBuiltins() const override;
 };
 

Added: cfe/trunk/test/CodeGenCXX/atomic-inline.cpp
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGenCXX/atomic-inline.cpp?rev=313992&view=auto
==
--- cfe/trunk/test/CodeGenCXX/atomic-inline.cpp (added)
+++ cfe/trunk/test/CodeGenCXX/atomic-inline.cpp Fri Sep 22 09:30:00 2017
@@ -0,0 +1,69 @@
+// RUN: %clang_cc1 %s -std=c++11 -emit-llvm -o - -triple=x86_64-linux-gnu | 
FileCheck %s
+// RUN: %clang_cc1 %s -std=c++11 -emit-llvm -o - -triple=x86_64-linux-gnu 
-target-cpu core2 | FileCheck %s --check-prefix=CORE2
+// Check the atomic code generation for cpu targets w/wo cx16 support.
+
+struct alignas(8) AM8 {
+  int f1, f2;
+};
+AM8 m8;
+AM8 load8() {
+  AM8 am;
+  // CHECK-LABEL: @_Z5load8v
+  // CHECK: load atomic i64, {{.*}} monotonic
+  // CORE2-LABEL: @_Z5load8v
+  // CORE2: load atomic i64, {{.*}} monotonic
+  __atomic_load(&m8, &am, 0);
+  return am;
+}
+
+AM8 s8;
+void store8() {
+  // CHECK-LABEL: @_Z6store8v
+  // CHECK: store atomic i64 {{.*}} monotonic
+  // CORE2-LABEL: @_Z6store8v
+  // CORE2: store atomic i64 {{.*}} monotonic
+  __atomic_store(&m8, &s8, 0);
+}
+
+bool cmpxchg8() {
+  AM8 am;
+  // CHECK-LABEL: @_Z8cmpxchg8v
+  // CHECK: cmpxchg i64* {{.*}} monotonic
+  // CORE2-LABEL: @_Z8cmpxchg8v
+  // CORE2: cmpxchg i64* {{.*}} monotonic
+  return __atomic_compare_exchange(&m8, &s8, &am, 0, 0, 0);
+}
+
+struct alignas(16) AM16 {
+  long f1, f2;
+};
+
+AM16 m16;
+AM16 load16() {
+  AM16 am;
+  // CHECK-LABEL: @_Z6load16v
+  // CHECK: call void @__atomic_load
+  // CORE2-LABEL: @_Z6load16v
+  // CORE2: load atomic i128, {{.*}} monotonic
+  __atomic

[clang] 7a6c894 - [SampleFDO] Add use-sample-profile function attribute.

2020-06-02 Thread Wei Mi via cfe-commits

Author: Wei Mi
Date: 2020-06-02T17:23:17-07:00
New Revision: 7a6c89427c9babc8e4a69e8a2b61bbf4a4b80c56

URL: 
https://github.com/llvm/llvm-project/commit/7a6c89427c9babc8e4a69e8a2b61bbf4a4b80c56
DIFF: 
https://github.com/llvm/llvm-project/commit/7a6c89427c9babc8e4a69e8a2b61bbf4a4b80c56.diff

LOG: [SampleFDO] Add use-sample-profile function attribute.

When sampleFDO is enabled, people may expect they can use
-fno-profile-sample-use to opt-out using sample profile for a certain file.
That could be either for debugging purpose or for performance tuning purpose.
However, when thinlto is enabled, if a function in file A compiled with
-fno-profile-sample-use is imported to another file B compiled with
-fprofile-sample-use, the inlined copy of the function in file B may still
get its profile annotated.

The inconsistency may even introduce profile unused warning because if the
target is not compiled with explicit debug information flag, the function
in file A won't have its debug information enabled (debug information will
be enabled implicitly only when -fprofile-sample-use is used). After it is
imported into file B which is compiled with -fprofile-sample-use, profile
annotation for the outline copy of the function will fail because the
function has no debug information, and that will trigger  profile unused
warning.

We add a new attribute use-sample-profile to control whether a function
will use its sample profile no matter for its outline or inline copies.
That will make the behavior of -fno-profile-sample-use consistent.

Differential Revision: https://reviews.llvm.org/D79959

Added: 
clang/test/CodeGen/use-sample-profile-attr.c
llvm/test/Transforms/Inline/inline-incompat-attrs.ll
llvm/test/Transforms/Inline/partial-inline-incompat-attrs.ll
llvm/test/Transforms/SampleProfile/Inputs/use-sample-profile-attr.prof
llvm/test/Transforms/SampleProfile/use-sample-profile-attr.ll

Modified: 
clang/lib/CodeGen/CodeGenFunction.cpp
llvm/include/llvm/IR/Attributes.td
llvm/lib/Transforms/IPO/SampleProfile.cpp
llvm/test/LTO/Resolution/X86/load-sample-prof-icp.ll
llvm/test/LTO/Resolution/X86/load-sample-prof-lto.ll
llvm/test/LTO/Resolution/X86/load-sample-prof.ll
llvm/test/Transforms/SampleProfile/Inputs/profile-symbol-list.ll
llvm/test/Transforms/SampleProfile/branch.ll
llvm/test/Transforms/SampleProfile/calls.ll
llvm/test/Transforms/SampleProfile/cold-indirect-call.ll
llvm/test/Transforms/SampleProfile/cov-zero-samples.ll
llvm/test/Transforms/SampleProfile/coverage-warning.ll
llvm/test/Transforms/SampleProfile/discriminator.ll
llvm/test/Transforms/SampleProfile/early-inline.ll
llvm/test/Transforms/SampleProfile/entry_counts.ll
llvm/test/Transforms/SampleProfile/entry_counts_cold.ll
llvm/test/Transforms/SampleProfile/entry_counts_missing_dbginfo.ll
llvm/test/Transforms/SampleProfile/fnptr.ll
llvm/test/Transforms/SampleProfile/function_metadata.ll
llvm/test/Transforms/SampleProfile/gcc-simple.ll
llvm/test/Transforms/SampleProfile/indirect-call-gcc.ll
llvm/test/Transforms/SampleProfile/indirect-call.ll
llvm/test/Transforms/SampleProfile/inline-callee-update.ll
llvm/test/Transforms/SampleProfile/inline-cold-callsite-samplepgo.ll
llvm/test/Transforms/SampleProfile/inline-cold.ll
llvm/test/Transforms/SampleProfile/inline-combine.ll
llvm/test/Transforms/SampleProfile/inline-coverage.ll
llvm/test/Transforms/SampleProfile/inline-mergeprof.ll
llvm/test/Transforms/SampleProfile/inline-stats.ll
llvm/test/Transforms/SampleProfile/inline-topdown.ll
llvm/test/Transforms/SampleProfile/inline.ll
llvm/test/Transforms/SampleProfile/nolocinfo.ll
llvm/test/Transforms/SampleProfile/offset.ll
llvm/test/Transforms/SampleProfile/profile-format-compress.ll
llvm/test/Transforms/SampleProfile/profile-format.ll
llvm/test/Transforms/SampleProfile/profile-sample-accurate.ll
llvm/test/Transforms/SampleProfile/propagate.ll
llvm/test/Transforms/SampleProfile/remap.ll
llvm/test/Transforms/SampleProfile/remarks.ll
llvm/test/Transforms/SampleProfile/section-accurate-samplepgo.ll
llvm/test/Transforms/SampleProfile/syntax.ll
llvm/test/Transforms/SampleProfile/warm-inline-instance.ll

Removed: 




diff  --git a/clang/lib/CodeGen/CodeGenFunction.cpp 
b/clang/lib/CodeGen/CodeGenFunction.cpp
index d6622a435b53..0fa795d696e5 100644
--- a/clang/lib/CodeGen/CodeGenFunction.cpp
+++ b/clang/lib/CodeGen/CodeGenFunction.cpp
@@ -791,6 +791,9 @@ void CodeGenFunction::StartFunction(GlobalDecl GD, QualType 
RetTy,
   if (CGM.getCodeGenOpts().ProfileSampleAccurate)
 Fn->addFnAttr("profile-sample-accurate");
 
+  if (!CGM.getCodeGenOpts().SampleProfileFile.empty())
+Fn->addFnAttr("use-sample-profile");
+
   if (D && D->hasAttr())
 Fn->addFnAttr("cfi-canonical-jump-table");
 

diff  --git a/clang/

Re: [PATCH] D36562: [Bitfield] Make the bitfield a separate location if it has width of legal integer type and its bit offset is naturally aligned for the type

2017-08-22 Thread Wei Mi via cfe-commits
On Tue, Aug 22, 2017 at 7:03 PM, Xinliang David Li  wrote:
>
>
> On Tue, Aug 22, 2017 at 6:37 PM, Chandler Carruth via Phabricator
>  wrote:
>>
>> chandlerc added a comment.
>>
>> I'm really not a fan of the degree of complexity and subtlety that this
>> introduces into the frontend, all to allow particular backend optimizations.
>>
>> I feel like this is Clang working around a fundamental deficiency in LLVM
>> and we should instead find a way to fix this in LLVM itself.
>>
>> As has been pointed out before, user code can synthesize large integers
>> that small bit sequences are extracted from, and Clang and LLVM should
>> handle those just as well as actual bitfields.
>>
>> Can we see how far we can push the LLVM side before we add complexity to
>> Clang here? I understand that there remain challenges to LLVM's stuff, but I
>> don't think those challenges make *all* of the LLVM improvements off the
>> table, I don't think we've exhausted all ways of improving the LLVM changes
>> being proposed, and I think we should still land all of those and
>> re-evaluate how important these issues are when all of that is in place.
>
>
> The main challenge of doing  this in LLVM is that inter-procedural analysis
> (and possibly cross module) is needed (for store forwarding issues).
>
> Wei, perhaps you can provide concrete test case to illustrate the issue so
> that reviewers have a good understanding.
>
> David

Here is a runable testcase:
 1.cc 
class A {
public:
  unsigned long f1:2;
  unsigned long f2:6;
  unsigned long f3:8;
  unsigned long f4:4;
};
A a;
unsigned long b;
unsigned long N = 10;

__attribute__((noinline))
void foo() {
  a.f3 = 3;
}

__attribute__((noinline))
void goo() {
  b = a.f3;
}

int main() {
  unsigned long i;
  for (i = 0; i < N; i++) {
foo();
goo();
  }
}

Now trunk takes about twice running time compared with trunk + this
patch. That is because trunk shrinks the store of a.f3 in foo (Done by
DagCombiner) but not shrink the load of a.f3 in goo, so store
forwarding will be blocked.

The testcases shows the potential problem of store shrinking. Before
we decide to do store shrinking, we need to know all the related loads
will be shrunk,  and that requires IPA analysis. Otherwise, when load
shrinking was blocked for some difficult case (Like the instcombine
case described in
https://www.mail-archive.com/cfe-commits@lists.llvm.org/msg65085.html),
performance regression will happen.

Wei.


>>
>>
>>
>> Repository:
>>   rL LLVM
>>
>> https://reviews.llvm.org/D36562
>>
>>
>>
>
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


Re: [PATCH] D104099: [NewPM] Remove SpeculateAroundPHIs pass from pipeline

2021-06-14 Thread Wei Mi via cfe-commits
On Mon, Jun 14, 2021 at 4:04 PM Xinliang David Li 
wrote:

>
>
> On Mon, Jun 14, 2021 at 3:59 PM Roman Lebedev via Phabricator <
> revi...@reviews.llvm.org> wrote:
>
>> lebedev.ri added a subscriber: MaskRay.
>> lebedev.ri added a comment.
>>
>> In D104099#2815531 , @wenlei
>> wrote:
>>
>> > In D104099#2814167 ,
>> @davidxl wrote:
>> >
>> >> Adding Wei to help measure performance impact on our internal
>> workloads.  Also add Wenlei to help measure impact with FB's workloads.
>> >
>> > Measured perf using FB internal workload w/ and w/o this pass, result
>> is neutral.
>>
>> Thank you for checking!
>>
>> So far, it seems the reaction to this proposal has been overwhelmingly
>> positive.
>> Does anyone else wish to chime in? Should i land this? @asbirlea @MaskRay
>> ?
>>
>
> Wei is doing more measurement @google. Please wait for the response.
>
> David
>

Start doing the test. Will report back.

Wei.


>
>
>>
>> Repository:
>>   rG LLVM Github Monorepo
>>
>> CHANGES SINCE LAST ACTION
>>   https://reviews.llvm.org/D104099/new/
>>
>> https://reviews.llvm.org/D104099
>>
>>
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


Re: [PATCH] D104099: [NewPM] Remove SpeculateAroundPHIs pass from pipeline

2021-06-15 Thread Wei Mi via cfe-commits
On Mon, Jun 14, 2021 at 4:52 PM Wei Mi  wrote:

>
>
> On Mon, Jun 14, 2021 at 4:04 PM Xinliang David Li 
> wrote:
>
>>
>>
>> On Mon, Jun 14, 2021 at 3:59 PM Roman Lebedev via Phabricator <
>> revi...@reviews.llvm.org> wrote:
>>
>>> lebedev.ri added a subscriber: MaskRay.
>>> lebedev.ri added a comment.
>>>
>>> In D104099#2815531 , @wenlei
>>> wrote:
>>>
>>> > In D104099#2814167 ,
>>> @davidxl wrote:
>>> >
>>> >> Adding Wei to help measure performance impact on our internal
>>> workloads.  Also add Wenlei to help measure impact with FB's workloads.
>>> >
>>> > Measured perf using FB internal workload w/ and w/o this pass, result
>>> is neutral.
>>>
>>> Thank you for checking!
>>>
>>> So far, it seems the reaction to this proposal has been overwhelmingly
>>> positive.
>>> Does anyone else wish to chime in? Should i land this? @asbirlea
>>> @MaskRay ?
>>>
>>
>> Wei is doing more measurement @google. Please wait for the response.
>>
>> David
>>
>
> Start doing the test. Will report back.
>
> Wei.
>

No performance change found in google internal benchmarks.

 Wei.


>
>
>>
>>
>>>
>>> Repository:
>>>   rG LLVM Github Monorepo
>>>
>>> CHANGES SINCE LAST ACTION
>>>   https://reviews.llvm.org/D104099/new/
>>>
>>> https://reviews.llvm.org/D104099
>>>
>>>
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


r281587 - Update clang unittests for rL281586.

2016-09-14 Thread Wei Mi via cfe-commits
Author: wmi
Date: Thu Sep 15 01:31:30 2016
New Revision: 281587

URL: http://llvm.org/viewvc/llvm-project?rev=281587&view=rev
Log:
Update clang unittests for rL281586.

The change in rL281586 is in llvm component and tests updated here are
in clang component, so I have to commit them consecutively.

Modified:
cfe/trunk/test/CodeGen/cleanup-destslot-simple.c
cfe/trunk/test/CodeGen/temporary-lifetime-exceptions.cpp
cfe/trunk/test/CodeGen/temporary-lifetime.cpp
cfe/trunk/test/CodeGenCXX/nrvo.cpp
cfe/trunk/test/CodeGenObjC/exceptions.m
cfe/trunk/test/CodeGenObjCXX/exceptions-legacy.mm

Modified: cfe/trunk/test/CodeGen/cleanup-destslot-simple.c
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/cleanup-destslot-simple.c?rev=281587&r1=281586&r2=281587&view=diff
==
--- cfe/trunk/test/CodeGen/cleanup-destslot-simple.c (original)
+++ cfe/trunk/test/CodeGen/cleanup-destslot-simple.c Thu Sep 15 01:31:30 2016
@@ -13,8 +13,8 @@ int test() {
   return *p;
 // CHECK: [[X:%.*]] = alloca i32
 // CHECK: [[P:%.*]] = alloca i32*
-// LIFETIME: call void @llvm.lifetime.start(i64 4, i8* %{{.*}}){{( 
#[0-9]+)?}}, !dbg
-// LIFETIME: call void @llvm.lifetime.start(i64 8, i8* %{{.*}}){{( 
#[0-9]+)?}}, !dbg
+// LIFETIME: call void @llvm.lifetime.start(i64 4, i8* nonnull %{{.*}}){{( 
#[0-9]+)?}}, !dbg
+// LIFETIME: call void @llvm.lifetime.start(i64 8, i8* nonnull %{{.*}}){{( 
#[0-9]+)?}}, !dbg
 // CHECK-NOT: store i32 %{{.*}}, i32* %cleanup.dest.slot
 // LIFETIME: call void @llvm.lifetime.end(i64 8, {{.*}}){{( #[0-9]+)?}}, !dbg
 // LIFETIME: call void @llvm.lifetime.end(i64 4, {{.*}}){{( #[0-9]+)?}}, !dbg

Modified: cfe/trunk/test/CodeGen/temporary-lifetime-exceptions.cpp
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/temporary-lifetime-exceptions.cpp?rev=281587&r1=281586&r2=281587&view=diff
==
--- cfe/trunk/test/CodeGen/temporary-lifetime-exceptions.cpp (original)
+++ cfe/trunk/test/CodeGen/temporary-lifetime-exceptions.cpp Thu Sep 15 
01:31:30 2016
@@ -9,16 +9,16 @@ A Baz(const A&);
 void Test1() {
   // CHECK-LABEL: @_Z5Test1v(
   // CHECK: getelementptr
-  // CHECK-NEXT: call void @llvm.lifetime.start(i64 1, i8* [[TMP:[^ ]+]])
+  // CHECK-NEXT: call void @llvm.lifetime.start(i64 1, i8* nonnull [[TMP:[^ 
]+]])
   // CHECK-NEXT: getelementptr
-  // CHECK-NEXT: call void @llvm.lifetime.start(i64 1, i8* [[TMP1:[^ ]+]])
+  // CHECK-NEXT: call void @llvm.lifetime.start(i64 1, i8* nonnull [[TMP1:[^ 
]+]])
 
   // Normal exit
-  // CHECK: call void @llvm.lifetime.end(i64 1, i8* [[TMP1]])
-  // CHECK-NEXT: call void @llvm.lifetime.end(i64 1, i8* [[TMP]])
+  // CHECK: call void @llvm.lifetime.end(i64 1, i8* nonnull [[TMP1]])
+  // CHECK-NEXT: call void @llvm.lifetime.end(i64 1, i8* nonnull [[TMP]])
 
   // Exception exit
-  // CHECK: call void @llvm.lifetime.end(i64 1, i8* [[TMP1]])
-  // CHECK-NEXT: call void @llvm.lifetime.end(i64 1, i8* [[TMP]])
+  // CHECK: call void @llvm.lifetime.end(i64 1, i8* nonnull [[TMP1]])
+  // CHECK-NEXT: call void @llvm.lifetime.end(i64 1, i8* nonnull [[TMP]])
   Baz(Baz(A()));
 }

Modified: cfe/trunk/test/CodeGen/temporary-lifetime.cpp
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/temporary-lifetime.cpp?rev=281587&r1=281586&r2=281587&view=diff
==
--- cfe/trunk/test/CodeGen/temporary-lifetime.cpp (original)
+++ cfe/trunk/test/CodeGen/temporary-lifetime.cpp Thu Sep 15 01:31:30 2016
@@ -21,27 +21,27 @@ T Baz();
 
 void Test1() {
   // CHECK-DTOR-LABEL: Test1
-  // CHECK-DTOR: call void @llvm.lifetime.start(i64 1024, i8* %[[ADDR:[0-9]+]])
+  // CHECK-DTOR: call void @llvm.lifetime.start(i64 1024, i8* nonnull 
%[[ADDR:[0-9]+]])
   // CHECK-DTOR: call void @_ZN1AC1Ev(%struct.A* nonnull %[[VAR:[^ ]+]])
   // CHECK-DTOR: call void @_Z3FooIRK1AEvOT_
   // CHECK-DTOR: call void @_ZN1AD1Ev(%struct.A* nonnull %[[VAR]])
-  // CHECK-DTOR: call void @llvm.lifetime.end(i64 1024, i8* %[[ADDR]])
-  // CHECK-DTOR: call void @llvm.lifetime.start(i64 1024, i8* %[[ADDR:[0-9]+]])
+  // CHECK-DTOR: call void @llvm.lifetime.end(i64 1024, i8* nonnull %[[ADDR]])
+  // CHECK-DTOR: call void @llvm.lifetime.start(i64 1024, i8* nonnull 
%[[ADDR:[0-9]+]])
   // CHECK-DTOR: call void @_ZN1AC1Ev(%struct.A* nonnull %[[VAR:[^ ]+]])
   // CHECK-DTOR: call void @_Z3FooIRK1AEvOT_
   // CHECK-DTOR: call void @_ZN1AD1Ev(%struct.A* nonnull %[[VAR]])
-  // CHECK-DTOR: call void @llvm.lifetime.end(i64 1024, i8* %[[ADDR]])
+  // CHECK-DTOR: call void @llvm.lifetime.end(i64 1024, i8* nonnull %[[ADDR]])
   // CHECK-DTOR: }
 
   // CHECK-NO-DTOR-LABEL: Test1
-  // CHECK-NO-DTOR: call void @llvm.lifetime.start(i64 1024, i8* 
%[[ADDR:[0-9]+]])
+  // CHECK-NO-DTOR: call void @llvm.lifetime.start(i64 1024, i8* nonnull 
%[[ADDR:[0-9]+]])
   // CHECK-NO-DTOR: cal

r323281 - Adjust MaxAtomicInlineWidth for i386/i486 targets.

2018-01-23 Thread Wei Mi via cfe-commits
Author: wmi
Date: Tue Jan 23 15:27:57 2018
New Revision: 323281

URL: http://llvm.org/viewvc/llvm-project?rev=323281&view=rev
Log:
Adjust MaxAtomicInlineWidth for i386/i486 targets.

This is to fix the bug reported in 
https://bugs.llvm.org/show_bug.cgi?id=34347#c6.
Currently, all  MaxAtomicInlineWidth of x86-32 targets are set to 64. However,
i386 doesn't support any cmpxchg related instructions. i486 only supports 
cmpxchg.
So in this patch MaxAtomicInlineWidth is reset as follows:
For i386, the MaxAtomicInlineWidth should be 0 because no cmpxchg is supported.
For i486, the MaxAtomicInlineWidth should be 32 because it supports cmpxchg.
For others 32 bits x86 cpu, the MaxAtomicInlineWidth should be 64 because of 
cmpxchg8b.

Differential Revision: https://reviews.llvm.org/D42154

Modified:
cfe/trunk/lib/Basic/Targets/X86.h
cfe/trunk/test/CodeGenCXX/atomic-inline.cpp

Modified: cfe/trunk/lib/Basic/Targets/X86.h
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Basic/Targets/X86.h?rev=323281&r1=323280&r2=323281&view=diff
==
--- cfe/trunk/lib/Basic/Targets/X86.h (original)
+++ cfe/trunk/lib/Basic/Targets/X86.h Tue Jan 23 15:27:57 2018
@@ -100,6 +100,7 @@ class LLVM_LIBRARY_VISIBILITY X86TargetI
   bool HasRetpoline = false;
   bool HasRetpolineExternalThunk = false;
 
+protected:
   /// \brief Enumeration of all of the X86 CPUs supported by Clang.
   ///
   /// Each enumeration represents a particular CPU supported by Clang. These
@@ -325,9 +326,11 @@ public:
  (1 << TargetInfo::LongDouble));
 
 // x86-32 has atomics up to 8 bytes
-// FIXME: Check that we actually have cmpxchg8b before setting
-// MaxAtomicInlineWidth. (cmpxchg8b is an i586 instruction.)
-MaxAtomicPromoteWidth = MaxAtomicInlineWidth = 64;
+CPUKind Kind = getCPUKind(Opts.CPU);
+if (Kind >= CK_i586 || Kind == CK_Generic)
+  MaxAtomicPromoteWidth = MaxAtomicInlineWidth = 64;
+else if (Kind >= CK_i486)
+  MaxAtomicPromoteWidth = MaxAtomicInlineWidth = 32;
   }
 
   BuiltinVaListKind getBuiltinVaListKind() const override {

Modified: cfe/trunk/test/CodeGenCXX/atomic-inline.cpp
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGenCXX/atomic-inline.cpp?rev=323281&r1=323280&r2=323281&view=diff
==
--- cfe/trunk/test/CodeGenCXX/atomic-inline.cpp (original)
+++ cfe/trunk/test/CodeGenCXX/atomic-inline.cpp Tue Jan 23 15:27:57 2018
@@ -1,6 +1,52 @@
 // RUN: %clang_cc1 %s -std=c++11 -emit-llvm -o - -triple=x86_64-linux-gnu | 
FileCheck %s
 // RUN: %clang_cc1 %s -std=c++11 -emit-llvm -o - -triple=x86_64-linux-gnu 
-target-cpu core2 | FileCheck %s --check-prefix=CORE2
-// Check the atomic code generation for cpu targets w/wo cx16 support.
+// RUN: %clang_cc1 %s -std=c++11 -emit-llvm -o - -triple=i386-linux-gnu 
-target-cpu i386 | FileCheck %s --check-prefix=I386
+// RUN: %clang_cc1 %s -std=c++11 -emit-llvm -o - -triple=i386-linux-gnu 
-target-cpu i486 | FileCheck %s --check-prefix=I486
+// Check the atomic code generation for cpu targets w/wo cx, cx8 and cx16 
support.
+
+struct alignas(4) AM4 {
+  short f1, f2;
+};
+AM4 m4;
+AM4 load4() {
+  AM4 am;
+  // CHECK-LABEL: @_Z5load4v
+  // CHECK: load atomic i32, {{.*}} monotonic
+  // CORE2-LABEL: @_Z5load4v
+  // CORE2: load atomic i32, {{.*}} monotonic
+  // I386-LABEL: @_Z5load4v
+  // I386: call i32 @__atomic_load_4
+  // I486-LABEL: @_Z5load4v
+  // I486: load atomic i32, {{.*}} monotonic
+  __atomic_load(&m4, &am, 0);
+  return am;
+}
+
+AM4 s4;
+void store4() {
+  // CHECK-LABEL: @_Z6store4v
+  // CHECK: store atomic i32 {{.*}} monotonic
+  // CORE2-LABEL: @_Z6store4v
+  // CORE2: store atomic i32 {{.*}} monotonic
+  // I386-LABEL: @_Z6store4v
+  // I386: call void @__atomic_store_4
+  // I486-LABEL: @_Z6store4v
+  // I486: store atomic i32 {{.*}} monotonic
+  __atomic_store(&m4, &s4, 0);
+}
+
+bool cmpxchg4() {
+  AM4 am;
+  // CHECK-LABEL: @_Z8cmpxchg4v
+  // CHECK: cmpxchg i32* {{.*}} monotonic
+  // CORE2-LABEL: @_Z8cmpxchg4v
+  // CORE2: cmpxchg i32* {{.*}} monotonic
+  // I386-LABEL: @_Z8cmpxchg4v
+  // I386: call zeroext i1 @__atomic_compare_exchange_4
+  // I486-LABEL: @_Z8cmpxchg4v
+  // I486: cmpxchg i32* {{.*}} monotonic
+  return __atomic_compare_exchange(&m4, &s4, &am, 0, 0, 0);
+}
 
 struct alignas(8) AM8 {
   int f1, f2;
@@ -12,6 +58,10 @@ AM8 load8() {
   // CHECK: load atomic i64, {{.*}} monotonic
   // CORE2-LABEL: @_Z5load8v
   // CORE2: load atomic i64, {{.*}} monotonic
+  // I386-LABEL: @_Z5load8v
+  // I386: call i64 @__atomic_load_8
+  // I486-LABEL: @_Z5load8v
+  // I486: call i64 @__atomic_load_8
   __atomic_load(&m8, &am, 0);
   return am;
 }
@@ -22,6 +72,10 @@ void store8() {
   // CHECK: store atomic i64 {{.*}} monotonic
   // CORE2-LABEL: @_Z6store8v
   // CORE2: store atomic i64 {{.*}} monotonic
+  // I386-LABEL: @_Z6store8v
+  // I386: call void @__atomic_s

r265816 - Always have clang pass -pie-level and -pic-level values to the code generator.

2016-04-08 Thread Wei Mi via cfe-commits
Author: wmi
Date: Fri Apr  8 12:42:32 2016
New Revision: 265816

URL: http://llvm.org/viewvc/llvm-project?rev=265816&view=rev
Log:
Always have clang pass -pie-level and -pic-level values to the code generator.

Patch by tmsriram!

Differential Revision: http://reviews.llvm.org/D18843

Modified:
cfe/trunk/lib/Frontend/CompilerInvocation.cpp

Modified: cfe/trunk/lib/Frontend/CompilerInvocation.cpp
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Frontend/CompilerInvocation.cpp?rev=265816&r1=265815&r2=265816&view=diff
==
--- cfe/trunk/lib/Frontend/CompilerInvocation.cpp (original)
+++ cfe/trunk/lib/Frontend/CompilerInvocation.cpp Fri Apr  8 12:42:32 2016
@@ -2106,6 +2106,7 @@ bool CompilerInvocation::CreateFromArgs(
   InputArgList Args =
   Opts->ParseArgs(llvm::makeArrayRef(ArgBegin, ArgEnd), MissingArgIndex,
   MissingArgCount, IncludedFlagsBitmask);
+  LangOptions &LangOpts = *Res.getLangOpts();
 
   // Check for missing argument error.
   if (MissingArgCount) {
@@ -2124,7 +2125,7 @@ bool CompilerInvocation::CreateFromArgs(
   Success &= ParseMigratorArgs(Res.getMigratorOpts(), Args);
   ParseDependencyOutputArgs(Res.getDependencyOutputOpts(), Args);
   Success &= ParseDiagnosticArgs(Res.getDiagnosticOpts(), Args, &Diags);
-  ParseCommentArgs(Res.getLangOpts()->CommentOpts, Args);
+  ParseCommentArgs(LangOpts.CommentOpts, Args);
   ParseFileSystemArgs(Res.getFileSystemOpts(), Args);
   // FIXME: We shouldn't have to pass the DashX option around here
   InputKind DashX = ParseFrontendArgs(Res.getFrontendOpts(), Args, Diags);
@@ -2137,22 +2138,26 @@ bool CompilerInvocation::CreateFromArgs(
 // PassManager in BackendUtil.cpp. They need to be initializd no matter
 // what the input type is.
 if (Args.hasArg(OPT_fobjc_arc))
-  Res.getLangOpts()->ObjCAutoRefCount = 1;
+  LangOpts.ObjCAutoRefCount = 1;
+// PIClevel and PIELevel are needed during code generation and this should 
be
+// set regardless of the input type.
+LangOpts.PICLevel = getLastArgIntValue(Args, OPT_pic_level, 0, Diags);
+LangOpts.PIELevel = getLastArgIntValue(Args, OPT_pie_level, 0, Diags);
 parseSanitizerKinds("-fsanitize=", Args.getAllArgValues(OPT_fsanitize_EQ),
-Diags, Res.getLangOpts()->Sanitize);
+Diags, LangOpts.Sanitize);
   } else {
 // Other LangOpts are only initialzed when the input is not AST or LLVM IR.
-ParseLangArgs(*Res.getLangOpts(), Args, DashX, Res.getTargetOpts(), Diags);
+ParseLangArgs(LangOpts, Args, DashX, Res.getTargetOpts(), Diags);
 if (Res.getFrontendOpts().ProgramAction == frontend::RewriteObjC)
-  Res.getLangOpts()->ObjCExceptions = 1;
+  LangOpts.ObjCExceptions = 1;
   }
 
   // FIXME: Override value name discarding when asan or msan is used because 
the
   // backend passes depend on the name of the alloca in order to print out
   // names.
   Res.getCodeGenOpts().DiscardValueNames &=
-  !Res.getLangOpts()->Sanitize.has(SanitizerKind::Address) &&
-  !Res.getLangOpts()->Sanitize.has(SanitizerKind::Memory);
+  !LangOpts.Sanitize.has(SanitizerKind::Address) &&
+  !LangOpts.Sanitize.has(SanitizerKind::Memory);
 
   // FIXME: ParsePreprocessorArgs uses the FileManager to read the contents of
   // PCH file and find the original header name. Remove the need to do that in


___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


Re: [PATCH] D18843: Always have clang pass -pie-level and -pic-level values to the code generator

2016-04-08 Thread Wei Mi via cfe-commits
This revision was automatically updated to reflect the committed changes.
Closed by commit rL265816: Always have clang pass -pie-level and -pic-level 
values to the code generator. (authored by wmi).

Changed prior to commit:
  http://reviews.llvm.org/D18843?vs=52945&id=53038#toc

Repository:
  rL LLVM

http://reviews.llvm.org/D18843

Files:
  cfe/trunk/lib/Frontend/CompilerInvocation.cpp

Index: cfe/trunk/lib/Frontend/CompilerInvocation.cpp
===
--- cfe/trunk/lib/Frontend/CompilerInvocation.cpp
+++ cfe/trunk/lib/Frontend/CompilerInvocation.cpp
@@ -2106,6 +2106,7 @@
   InputArgList Args =
   Opts->ParseArgs(llvm::makeArrayRef(ArgBegin, ArgEnd), MissingArgIndex,
   MissingArgCount, IncludedFlagsBitmask);
+  LangOptions &LangOpts = *Res.getLangOpts();
 
   // Check for missing argument error.
   if (MissingArgCount) {
@@ -2124,7 +2125,7 @@
   Success &= ParseMigratorArgs(Res.getMigratorOpts(), Args);
   ParseDependencyOutputArgs(Res.getDependencyOutputOpts(), Args);
   Success &= ParseDiagnosticArgs(Res.getDiagnosticOpts(), Args, &Diags);
-  ParseCommentArgs(Res.getLangOpts()->CommentOpts, Args);
+  ParseCommentArgs(LangOpts.CommentOpts, Args);
   ParseFileSystemArgs(Res.getFileSystemOpts(), Args);
   // FIXME: We shouldn't have to pass the DashX option around here
   InputKind DashX = ParseFrontendArgs(Res.getFrontendOpts(), Args, Diags);
@@ -2137,22 +2138,26 @@
 // PassManager in BackendUtil.cpp. They need to be initializd no matter
 // what the input type is.
 if (Args.hasArg(OPT_fobjc_arc))
-  Res.getLangOpts()->ObjCAutoRefCount = 1;
+  LangOpts.ObjCAutoRefCount = 1;
+// PIClevel and PIELevel are needed during code generation and this should 
be
+// set regardless of the input type.
+LangOpts.PICLevel = getLastArgIntValue(Args, OPT_pic_level, 0, Diags);
+LangOpts.PIELevel = getLastArgIntValue(Args, OPT_pie_level, 0, Diags);
 parseSanitizerKinds("-fsanitize=", Args.getAllArgValues(OPT_fsanitize_EQ),
-Diags, Res.getLangOpts()->Sanitize);
+Diags, LangOpts.Sanitize);
   } else {
 // Other LangOpts are only initialzed when the input is not AST or LLVM IR.
-ParseLangArgs(*Res.getLangOpts(), Args, DashX, Res.getTargetOpts(), Diags);
+ParseLangArgs(LangOpts, Args, DashX, Res.getTargetOpts(), Diags);
 if (Res.getFrontendOpts().ProgramAction == frontend::RewriteObjC)
-  Res.getLangOpts()->ObjCExceptions = 1;
+  LangOpts.ObjCExceptions = 1;
   }
 
   // FIXME: Override value name discarding when asan or msan is used because 
the
   // backend passes depend on the name of the alloca in order to print out
   // names.
   Res.getCodeGenOpts().DiscardValueNames &=
-  !Res.getLangOpts()->Sanitize.has(SanitizerKind::Address) &&
-  !Res.getLangOpts()->Sanitize.has(SanitizerKind::Memory);
+  !LangOpts.Sanitize.has(SanitizerKind::Address) &&
+  !LangOpts.Sanitize.has(SanitizerKind::Memory);
 
   // FIXME: ParsePreprocessorArgs uses the FileManager to read the contents of
   // PCH file and find the original header name. Remove the need to do that in


Index: cfe/trunk/lib/Frontend/CompilerInvocation.cpp
===
--- cfe/trunk/lib/Frontend/CompilerInvocation.cpp
+++ cfe/trunk/lib/Frontend/CompilerInvocation.cpp
@@ -2106,6 +2106,7 @@
   InputArgList Args =
   Opts->ParseArgs(llvm::makeArrayRef(ArgBegin, ArgEnd), MissingArgIndex,
   MissingArgCount, IncludedFlagsBitmask);
+  LangOptions &LangOpts = *Res.getLangOpts();
 
   // Check for missing argument error.
   if (MissingArgCount) {
@@ -2124,7 +2125,7 @@
   Success &= ParseMigratorArgs(Res.getMigratorOpts(), Args);
   ParseDependencyOutputArgs(Res.getDependencyOutputOpts(), Args);
   Success &= ParseDiagnosticArgs(Res.getDiagnosticOpts(), Args, &Diags);
-  ParseCommentArgs(Res.getLangOpts()->CommentOpts, Args);
+  ParseCommentArgs(LangOpts.CommentOpts, Args);
   ParseFileSystemArgs(Res.getFileSystemOpts(), Args);
   // FIXME: We shouldn't have to pass the DashX option around here
   InputKind DashX = ParseFrontendArgs(Res.getFrontendOpts(), Args, Diags);
@@ -2137,22 +2138,26 @@
 // PassManager in BackendUtil.cpp. They need to be initializd no matter
 // what the input type is.
 if (Args.hasArg(OPT_fobjc_arc))
-  Res.getLangOpts()->ObjCAutoRefCount = 1;
+  LangOpts.ObjCAutoRefCount = 1;
+// PIClevel and PIELevel are needed during code generation and this should be
+// set regardless of the input type.
+LangOpts.PICLevel = getLastArgIntValue(Args, OPT_pic_level, 0, Diags);
+LangOpts.PIELevel = getLastArgIntValue(Args, OPT_pie_level, 0, Diags);
 parseSanitizerKinds("-fsanitize=", Args.getAllArgValues(OPT_fsanitize_EQ),
-Diags, Res.getLangOpts()->Sanitize);
+Diags, LangOpts.Sanitize);
   } else {
   

[clang] d535a05 - [ThinLTO] During module importing, close one source module before open

2021-03-30 Thread Wei Mi via cfe-commits

Author: Wei Mi
Date: 2021-03-30T14:37:29-07:00
New Revision: d535a05ca1a6b959399762d5e14efde1fcfe6af8

URL: 
https://github.com/llvm/llvm-project/commit/d535a05ca1a6b959399762d5e14efde1fcfe6af8
DIFF: 
https://github.com/llvm/llvm-project/commit/d535a05ca1a6b959399762d5e14efde1fcfe6af8.diff

LOG: [ThinLTO] During module importing, close one source module before open
another one for distributed mode.

Currently during module importing, ThinLTO opens all the source modules,
collect functions to be imported and append them to the destination module,
then leave all the modules open through out the lto backend pipeline. This
patch refactors it in the way that one source module will be closed before
another source module is opened. All the source modules will be closed after
importing phase is done. It will save some amount of memory when there are
many source modules to be imported.

Note that this patch only changes the distributed thinlto mode. For in
process thinlto mode, one source module is shared acorss different thinlto
backend threads so it is not changed in this patch.

Differential Revision: https://reviews.llvm.org/D99554

Added: 


Modified: 
clang/lib/CodeGen/BackendUtil.cpp
clang/test/CodeGen/thinlto_backend.ll
llvm/include/llvm/LTO/LTOBackend.h
llvm/lib/LTO/LTO.cpp
llvm/lib/LTO/LTOBackend.cpp

Removed: 




diff  --git a/clang/lib/CodeGen/BackendUtil.cpp 
b/clang/lib/CodeGen/BackendUtil.cpp
index 6de482ea74f52..41eafd13d97c3 100644
--- a/clang/lib/CodeGen/BackendUtil.cpp
+++ b/clang/lib/CodeGen/BackendUtil.cpp
@@ -1503,10 +1503,7 @@ static void runThinLTOBackend(
   // we should only invoke this using the individual indexes written out
   // via a WriteIndexesThinBackend.
   FunctionImporter::ImportMapTy ImportList;
-  std::vector> OwnedImports;
-  MapVector ModuleMap;
-  if (!lto::loadReferencedModules(*M, *CombinedIndex, ImportList, ModuleMap,
-  OwnedImports))
+  if (!lto::initImportList(*M, *CombinedIndex, ImportList))
 return;
 
   auto AddStream = [&](size_t Task) {
@@ -1583,7 +1580,7 @@ static void runThinLTOBackend(
   if (Error E =
   thinBackend(Conf, -1, AddStream, *M, *CombinedIndex, ImportList,
   ModuleToDefinedGVSummaries[M->getModuleIdentifier()],
-  ModuleMap, CGOpts.CmdArgs)) {
+  /* ModuleMap */ nullptr, CGOpts.CmdArgs)) {
 handleAllErrors(std::move(E), [&](ErrorInfoBase &EIB) {
   errs() << "Error running ThinLTO backend: " << EIB.message() << '\n';
 });

diff  --git a/clang/test/CodeGen/thinlto_backend.ll 
b/clang/test/CodeGen/thinlto_backend.ll
index 715ff1ec229ea..c8b840e400066 100644
--- a/clang/test/CodeGen/thinlto_backend.ll
+++ b/clang/test/CodeGen/thinlto_backend.ll
@@ -47,7 +47,7 @@
 ; Ensure we get expected error for input files without summaries
 ; RUN: opt -o %t2.o %s
 ; RUN: %clang -target x86_64-unknown-linux-gnu -O2 -o %t3.o -x ir %t1.o -c 
-fthinlto-index=%t.thinlto.bc 2>&1 | FileCheck %s -check-prefix=CHECK-ERROR2
-; CHECK-ERROR2: Error loading imported file '{{.*}}': Could not find module 
summary
+; CHECK-ERROR2: Error loading imported file {{.*}}: Could not find module 
summary
 
 target datalayout = 
"e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
 target triple = "x86_64-unknown-linux-gnu"

diff  --git a/llvm/include/llvm/LTO/LTOBackend.h 
b/llvm/include/llvm/LTO/LTOBackend.h
index 32b7984787b01..de89f4bb10dff 100644
--- a/llvm/include/llvm/LTO/LTOBackend.h
+++ b/llvm/include/llvm/LTO/LTOBackend.h
@@ -46,11 +46,16 @@ Error backend(const Config &C, AddStreamFn AddStream,
   ModuleSummaryIndex &CombinedIndex);
 
 /// Runs a ThinLTO backend.
+/// If \p ModuleMap is not nullptr, all the module files to be imported have
+/// already been mapped to memory and the corresponding BitcodeModule objects
+/// are saved in the ModuleMap. If \p ModuleMap is nullptr, module files will
+/// be mapped to memory on demand and at any given time during importing, only
+/// one source module will be kept open at the most.
 Error thinBackend(const Config &C, unsigned Task, AddStreamFn AddStream,
   Module &M, const ModuleSummaryIndex &CombinedIndex,
   const FunctionImporter::ImportMapTy &ImportList,
   const GVSummaryMapTy &DefinedGlobals,
-  MapVector &ModuleMap,
+  MapVector *ModuleMap,
   const std::vector &CmdArgs = 
std::vector());
 
 Error finalizeOptimizationRemarks(
@@ -62,15 +67,11 @@ BitcodeModule 
*findThinLTOModule(MutableArrayRef BMs);
 /// Variant of the above.
 Expected findThinLTOModule(MemoryBufferRef MBRef);
 
-/// Distributed ThinLTO: load the referenced modules, keeping their buffers
-/// alive in the provided OwnedImportLifetimeManager. Returns false if the
+/// Distributed ThinLTO: collect the referenced