Re: [PATCH] D24682: [PR30341] Alias must point to a definition
sebpop added a comment. The change looks good to me. Somebody else should approve. https://reviews.llvm.org/D24682 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
Re: [PATCH] D24481: make “#pragma STDC FP_CONTRACT” on by default
This revision was automatically updated to reflect the committed changes. Closed by commit rL282259: set the underlying value of “#pragma STDC FP_CONTRACT” on by default (authored by spop). Changed prior to commit: https://reviews.llvm.org/D24481?vs=72186&id=72299#toc Repository: rL LLVM https://reviews.llvm.org/D24481 Files: cfe/trunk/lib/Frontend/CompilerInvocation.cpp cfe/trunk/test/CodeGen/aarch64-neon-fma.c cfe/trunk/test/CodeGen/aarch64-scalar-fma.c cfe/trunk/test/CodeGen/fp-contract-pragma.cpp cfe/trunk/test/CodeGen/fp-contract-pragma___on-by-default.c cfe/trunk/test/Driver/clang_f_opts.c Index: cfe/trunk/lib/Frontend/CompilerInvocation.cpp === --- cfe/trunk/lib/Frontend/CompilerInvocation.cpp +++ cfe/trunk/lib/Frontend/CompilerInvocation.cpp @@ -2445,6 +2445,12 @@ if (Arch == llvm::Triple::spir || Arch == llvm::Triple::spir64) { Res.getDiagnosticOpts().Warnings.push_back("spir-compat"); } + + if ((LangOpts.C11 || LangOpts.C99 || LangOpts.CPlusPlus) && + (CodeGenOptions::FPC_On == Res.getCodeGenOpts().getFPContractMode()) && + !LangOpts.CUDA) +LangOpts.DefaultFPContract = 1; + return Success; } Index: cfe/trunk/test/CodeGen/fp-contract-pragma___on-by-default.c === --- cfe/trunk/test/CodeGen/fp-contract-pragma___on-by-default.c +++ cfe/trunk/test/CodeGen/fp-contract-pragma___on-by-default.c @@ -0,0 +1,21 @@ +// RUN: %clang_cc1 -triple aarch64 -O0 -S -o - %s | FileCheck %s --check-prefix ALL_BUILDS +// RUN: %clang_cc1 -triple aarch64 -O1 -S -o - %s | FileCheck %s --check-prefixes ALL_BUILDS,NON_O0 +// RUN: %clang_cc1 -triple aarch64 -O2 -S -o - %s | FileCheck %s --check-prefixes ALL_BUILDS,NON_O0 +// RUN: %clang_cc1 -triple aarch64 -O3 -S -o - %s | FileCheck %s --check-prefixes ALL_BUILDS,NON_O0 + +// REQUIRES: aarch64-registered-target + +// ALL_BUILDS-LABEL: fmadd_double: +// ALL_BUILDS: fmadd d0, d{{[0-7]}}, d{{[0-7]}}, d{{[0-7]}} +// NON_O0-NEXT: ret +double fmadd_double(double a, double b, double c) { + return a*b+c; +} + +// ALL_BUILDS: fmadd_single: +// ALL_BUILDS: fmadd s0, s{{[0-7]}}, s{{[0-7]}}, s{{[0-7]}} +// NON_O0-NEXT: ret +float fmadd_single(float a, float b, float c) { + return a*b+c; +} + Index: cfe/trunk/test/CodeGen/aarch64-neon-fma.c === --- cfe/trunk/test/CodeGen/aarch64-neon-fma.c +++ cfe/trunk/test/CodeGen/aarch64-neon-fma.c @@ -1,4 +1,4 @@ -// RUN: %clang_cc1 -triple arm64-none-linux-gnu -target-feature +neon -S -emit-llvm -o - %s | opt -S -mem2reg | FileCheck %s +// RUN: %clang_cc1 -triple arm64-none-linux-gnu -target-feature +neon -ffp-contract=off -S -emit-llvm -o - %s | opt -S -mem2reg | FileCheck %s // Test new aarch64 intrinsics and types Index: cfe/trunk/test/CodeGen/aarch64-scalar-fma.c === --- cfe/trunk/test/CodeGen/aarch64-scalar-fma.c +++ cfe/trunk/test/CodeGen/aarch64-scalar-fma.c @@ -0,0 +1,177 @@ +// RUN: %clang_cc1 -triple=aarch64-unknown -Os -ffp-contract=fast -S -o - %s | FileCheck -check-prefix=CHECK-FAST -check-prefix=CHECK-ALL %s +// RUN: %clang_cc1 -triple=aarch64-unknown -Os -ffp-contract=on -S -o - %s | FileCheck -check-prefix=CHECK-ON -check-prefix=CHECK-ALL %s +// RUN: %clang_cc1 -triple=aarch64-unknown -Os -ffp-contract=off -S -o - %s | FileCheck -check-prefix=CHECK-OFF -check-prefix=CHECK-ALL %s +// RUN: %clang_cc1 -triple=aarch64-unknown -Os -S -o - %s | FileCheck -check-prefix=CHECK-ON -check-prefix=CHECK-ALL %s +// REQUIRES: aarch64-registered-target + +float test1(float x, float y, float z) { + return x*y + z; + // CHECK-ALL-LABEL: test1: + // CHECK-FAST: fmadd + // CHECK-ON: fmadd + // CHECK-OFF: fmul + // CHECK-OFF-NEXT: fadd +} + +double test2(double x, double y, double z) { + z -= x*y; + return z; + // CHECK-ALL-LABEL: test2: + // CHECK-FAST: fmsub + // CHECK-ON: fmsub + // CHECK-OFF: fmul + // CHECK-OFF-NEXT: fsub +} + +float test3(float x, float y, float z) { + float tmp = x*y; + return tmp + z; + // CHECK-ALL-LABEL: test3: + // CHECK-FAST: fmadd + // CHECK-ON: fmul + // CHECK-ON-NEXT: fadd + // CHECK-OFF: fmul + // CHECK-OFF-NEXT: fadd +} + +double test4(double x, double y, double z) { + double tmp = x*y; + return tmp - z; + // CHECK-ALL-LABEL: test4: + // CHECK-FAST: fnmsub + // CHECK-ON: fmul + // CHECK-ON-NEXT: fsub + // CHECK-OFF: fmul + // CHECK-OFF-NEXT: fsub +} + +#pragma STDC FP_CONTRACT ON + +float test5(float x, float y, float z) { + return x*y + z; + // CHECK-ALL-LABEL: test5: + // CHECK-FAST: fmadd + // CHECK-ON: fmadd + // CHECK-OFF: fmul + // CHECK-OFF-NEXT: fadd +} + +double test6(double x, double y, double z) { + z -= x*y; + return z; + // CHECK-ALL-LABEL: test6: + // CHECK-FAST: fmsub + // CHECK-ON: fmsub + // CHECK-OFF: fmul + // CHECK-OFF-NEXT: fsub +} + +float te
Re: [PATCH] D24682: [PR30341] Alias must point to a definition
sebpop added inline comments. Comment at: clang/lib/CodeGen/CGCXX.cpp:140-142 @@ +139,5 @@ + // FIXME: An extern template instantiation will create functions with + // linkage "AvailableExternally". In libc++, some classes also define + // members with attribute "AlwaysInline" and expect no reference to + // be generated. It is desirable to reenable this optimisation after + // corresponding LLVM changes. rsmith wrote: > It's not clear what this comment about `AlwaysInline` is referring to, since > the code does not mention that. I think we should just remove all the FIXME note: Aditya has just moved this comment from below... to here. Comment at: clang/lib/CodeGen/CGCXX.cpp:162 @@ -161,3 @@ - !TargetDecl.getDecl()->hasAttr())) { -// FIXME: An extern template instantiation will create functions with -// linkage "AvailableExternally". In libc++, some classes also define ... from here ... Comment at: clang/lib/CodeGen/CGCXX.cpp:170 @@ -159,9 +169,3 @@ if (llvm::GlobalValue::isDiscardableIfUnused(Linkage) && - (TargetLinkage != llvm::GlobalValue::AvailableExternallyLinkage || - !TargetDecl.getDecl()->hasAttr())) { -// FIXME: An extern template instantiation will create functions with -// linkage "AvailableExternally". In libc++, some classes also define -// members with attribute "AlwaysInline" and expect no reference to -// be generated. It is desirable to reenable this optimisation after -// corresponding LLVM changes. + !TargetDecl.getDecl()->hasAttr()) { addReplacement(MangledName, Aliasee); rsmith wrote: > Did you mean to change the behavior here? For non-`available_externally` > functions, we never used to care whether they're `always_inline`. Why do we > care now? I think this change does not modify the current behavior: the check has been moved up and we early return in that case. https://reviews.llvm.org/D24682 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D24991: Inline hot functions in libcxx shared_ptr implementation.
sebpop added a comment. In https://reviews.llvm.org/D24991#565861, @EricWF wrote: > Please provide benchmark tests which demonstrate that these benefits are > concrete and not just "potential". Moving methods out of the dylib is no > easy task so I would like hard evidence that it's worth while. With this patch we have seen the score of a proprietary benchmark going up by 20%, matching the performance we see with LLVM + libstdc++. We will provide a testcase that shows the performance uplift. Repository: rL LLVM https://reviews.llvm.org/D24991 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D24991: Inline hot functions in libcxx shared_ptr implementation.
sebpop added a comment. In https://reviews.llvm.org/D24991#565861, @EricWF wrote: > In https://reviews.llvm.org/D24991#565715, @mclow.lists wrote: > > > How does this play with existing binaries? Applications that expect these > > functions to exist in the dylib? > > > This patch is majorly ABI breaking, although we could probably find a > formulation that wasn't. Eric, Marshall, any suggestions on how to fix the backwards compatibility issue? Thanks! Repository: rL LLVM https://reviews.llvm.org/D24991 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[libcxx] r284179 - remove warnings from google-benchmarks in libcxx
Author: spop Date: Thu Oct 13 19:07:57 2016 New Revision: 284179 URL: http://llvm.org/viewvc/llvm-project?rev=284179&view=rev Log: remove warnings from google-benchmarks in libcxx Differential Revision: https://reviews.llvm.org/D25522 Patch written by Aditya Kumar. Modified: libcxx/trunk/benchmarks/ContainerBenchmarks.hpp libcxx/trunk/benchmarks/GenerateInput.hpp Modified: libcxx/trunk/benchmarks/ContainerBenchmarks.hpp URL: http://llvm.org/viewvc/llvm-project/libcxx/trunk/benchmarks/ContainerBenchmarks.hpp?rev=284179&r1=284178&r2=284179&view=diff == --- libcxx/trunk/benchmarks/ContainerBenchmarks.hpp (original) +++ libcxx/trunk/benchmarks/ContainerBenchmarks.hpp Thu Oct 13 19:07:57 2016 @@ -11,10 +11,11 @@ namespace ContainerBenchmarks { template void BM_ConstructIterIter(benchmark::State& st, Container, GenInputs gen) { auto in = gen(st.range(0)); +const auto begin = in.begin(); const auto end = in.end(); benchmark::DoNotOptimize(&in); while (st.KeepRunning()) { -Container c(in.begin(), in.end()); +Container c(begin, end); benchmark::DoNotOptimize(c.data()); } } Modified: libcxx/trunk/benchmarks/GenerateInput.hpp URL: http://llvm.org/viewvc/llvm-project/libcxx/trunk/benchmarks/GenerateInput.hpp?rev=284179&r1=284178&r2=284179&view=diff == --- libcxx/trunk/benchmarks/GenerateInput.hpp (original) +++ libcxx/trunk/benchmarks/GenerateInput.hpp Thu Oct 13 19:07:57 2016 @@ -112,7 +112,7 @@ inline std::vector getDupli inline std::vector getRandomStringInputs(size_t N) { std::vector inputs; -for (int i=0; i < N; ++i) { +for (size_t i=0; i < N; ++i) { inputs.push_back(getRandomString(1024)); } return inputs; ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D25624: Added 'inline' attribute to basic_string's destructor
sebpop accepted this revision. sebpop added a reviewer: sebpop. sebpop added a comment. This revision is now accepted and ready to land. This got approved in the past review. Let's commit it now that the clang bug was fixed. Thanks Aditya for keeping track of this. https://reviews.llvm.org/D25624 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D25624: Added 'inline' attribute to basic_string's destructor
sebpop added a comment. If I remember correctly, we pushed the fix after 3.9 was released. Could you please explain the problem of requiring trunk LLVM to build trunk libcxx? https://reviews.llvm.org/D25624 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[libcxx] r290761 - improve performance of string::find
Author: spop Date: Fri Dec 30 12:01:36 2016 New Revision: 290761 URL: http://llvm.org/viewvc/llvm-project?rev=290761&view=rev Log: improve performance of string::find string::find used to call the generic algorithm ::find. The patch special case string::find such that it ultimately gets converted to calls to memchr and memcmp. The patch improves the performance of the string::find routine by about 20x. Without the patch, the performance on an x86_64-linux 3400 MHz machine is: Benchmark Time CPU Iterations - BM_StringFindNoMatch/10 4 ns 4 ns 166421326 BM_StringFindNoMatch/6437 ns 37 ns 18754392 BM_StringFindNoMatch/512 268 ns268 ns2586060 BM_StringFindNoMatch/4k 2143 ns 2144 ns 328342 BM_StringFindNoMatch/32k16910 ns 16917 ns 40623 BM_StringFindNoMatch/128k 67577 ns 67602 ns 10138 BM_StringFindAllMatch/1 3 ns 3 ns 265163471 BM_StringFindAllMatch/8 6 ns 6 ns 112582467 BM_StringFindAllMatch/64 36 ns 36 ns 19566457 BM_StringFindAllMatch/512 209 ns209 ns3318893 BM_StringFindAllMatch/4k 1618 ns 1618 ns 432963 BM_StringFindAllMatch/32k 12909 ns 12914 ns 54317 BM_StringFindAllMatch/128k 48342 ns 48361 ns 13922 BM_StringFindMatch1/1 33777 ns 33790 ns 20698 BM_StringFindMatch1/8 33940 ns 33953 ns 20619 BM_StringFindMatch1/64 34038 ns 34051 ns 20571 BM_StringFindMatch1/512 34217 ns 34230 ns 20480 BM_StringFindMatch1/4k 35510 ns 35524 ns 19752 BM_StringFindMatch1/32k 46438 ns 46456 ns 15030 BM_StringFindMatch2/1 33839 ns 33852 ns 20648 BM_StringFindMatch2/8 33950 ns 33963 ns 20594 BM_StringFindMatch2/64 33846 ns 33859 ns 20668 BM_StringFindMatch2/512 34023 ns 34036 ns 20279 BM_StringFindMatch2/4k 35422 ns 35436 ns 19716 BM_StringFindMatch2/32k 46570 ns 46588 ns 15027 With the patch applied Benchmark Time CPU Iterations - BM_StringFindNoMatch/10 5 ns 5 ns 133724346 BM_StringFindNoMatch/64 6 ns 6 ns 119312184 BM_StringFindNoMatch/512 13 ns 13 ns 51539628 BM_StringFindNoMatch/4k77 ns 77 ns8935934 BM_StringFindNoMatch/32k 551 ns551 ns1222808 BM_StringFindNoMatch/128k2684 ns 2685 ns 259957 BM_StringFindAllMatch/1 7 ns 7 ns 98017959 BM_StringFindAllMatch/8 7 ns 7 ns 91466911 BM_StringFindAllMatch/648 ns 8 ns 85707392 BM_StringFindAllMatch/512 20 ns 20 ns 34490895 BM_StringFindAllMatch/4k 93 ns 93 ns7360375 BM_StringFindAllMatch/32k 827 ns828 ns 829944 BM_StringFindAllMatch/128k 3593 ns 3594 ns 195815 BM_StringFindMatch1/11332 ns 1332 ns 516354 BM_StringFindMatch1/81336 ns 1336 ns 495876 BM_StringFindMatch1/64 1338 ns 1339 ns 516656 BM_StringFindMatch1/512 1357 ns 1357 ns 510717 BM_StringFindMatch1/4k 1485 ns 1486 ns 461228 BM_StringFindMatch1/32k 2235 ns 2236 ns 318253 BM_StringFindMatch2/11335 ns 1335 ns 517105 BM_StringFindMatch2/81336 ns 1337 ns 518004 BM_StringFindMatch2/64 1344 ns 1345 ns 511751 BM_StringFindMatch2/512 1361 ns 1361 ns 508150 BM_StringFindMatch2/4k 1611 ns 1611 ns 463388 BM_StringFindMatch2/32k 2187 ns 2187 ns 317532 Patch written by Aditya Kumar and Sebastian Pop. Differential Revision: https://reviews.llvm.org/D27068 Added: libcxx/trunk/benchmarks/string.bench.cpp Modified: libcxx/trunk/include/__string Added: libcxx/trunk/benchmarks/string.bench.cpp URL: http://llvm.org/viewvc/llvm-project/libcxx/trunk/benchmarks/string.bench.cpp?rev=290761&view=auto == --- libcxx/trunk/benchmarks/string.bench.cpp (added) +++ libcxx/trunk/benchmarks/string.bench.cpp Fri Dec 30 12:01:36 2016 @@ -0,0 +1,49 @@ +#include +#include +#include + +#include "benchmark/benchmark_api.h" +#include "GenerateInput.hpp" + +constexpr std::size_t MAX_STRING_LEN = 8 << 14; + +// Benchmark when there is no match. +static void BM_StringFindNoMatch(benchmark::State &state) { + std::string s1(state.range(0), '-'); + std::string s2(8, '*'); + while (state.KeepRunnin
Re: [PATCH] D23855: Make exception-throwing from a noexcept build `abort()`.
sebpop added a comment. I like the patch. Thanks for removing #ifdefs from the code: it improves readability in general. Would it be possible to move the __throw_* functions in a same .h file to avoid having them all over? Comment at: include/array:212 @@ -214,3 +211,3 @@ #else -assert(!"array::at out_of_range"); +_VSTD::abort(); #endif Maybe you can use __throw_out_of_range? Fewer #ifdefs in the code is better. Comment at: include/array:226 @@ -228,3 +225,3 @@ #else -assert(!"array::at out_of_range"); +_VSTD::abort(); #endif __throw_out_of_range Comment at: include/bitset:218 @@ +217,3 @@ +{ +#ifndef _LIBCPP_NO_EXCEPTIONS +throw overflow_error(__msg); To improve readability, could you please avoid the double negation by turning around the clauses: #ifdef _LIBCPP_NO_EXCEPTIONS _VSTD::abort(); #else throw overflow_error(__msg); #endif And the same below in all the __throw_* functions. Thanks! Comment at: include/deque:909 @@ -908,1 +908,3 @@ +#else + _VSTD::abort(); #endif Maybe you can add another function for this one: __throw_length_error Comment at: include/deque:920 @@ -917,1 +919,3 @@ +#else + _VSTD::abort(); #endif __throw_out_of_range Comment at: include/experimental/any:106 @@ -106,3 +105,3 @@ #else -assert(!"bad_any_cast"); +_VSTD::abort(); #endif Maybe add a new function __throw_bad_any_cast? Comment at: include/experimental/dynarray:145 @@ -148,3 +144,3 @@ #else -assert(!"dynarray::allocation"); +_VSTD::abort(); #endif New function: __throw_bad_array_length? Comment at: include/experimental/dynarray:286 @@ -289,3 +285,3 @@ #else -assert(!"dynarray::at out_of_range"); +_VSTD::abort(); #endif __throw_out_of_range Comment at: include/experimental/dynarray:302 @@ -305,3 +301,3 @@ #else -assert(!"dynarray::at out_of_range"); +_VSTD::abort(); #endif __throw_out_of_range https://reviews.llvm.org/D23855 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
Re: [PATCH] D23855: Make exception-throwing from a noexcept build `abort()`.
sebpop accepted this revision. sebpop added a comment. This revision is now accepted and ready to land. Very nice cleanup. Maybe you can move some more #ifdefs into __throw_* functions, although as is LGTM. Thanks! https://reviews.llvm.org/D23855 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D24097: Add testcases for PR30188
sebpop created this revision. sebpop added reviewers: jmolloy, danielcdh, davidxl, mkuper, dberlin. sebpop added subscribers: llvm-commits, cfe-commits. Add testcases to clang test/ to make sure that the combined middle-end optimizations do not regress on optimizing the number of loads in the loops. https://reviews.llvm.org/D24097 Files: clang/test/CodeGen/pr30188.c clang/test/CodeGenCXX/pr30188.cpp Index: clang/test/CodeGenCXX/pr30188.cpp === --- /dev/null +++ clang/test/CodeGenCXX/pr30188.cpp @@ -0,0 +1,24 @@ +// RUN: %clang_cc1 -O2 -emit-llvm -o - %s | FileCheck %s + +// Check that optimizers are able to optimize the number of loads in the loop. +// CHECK: load +// CHECK-NOT: load + +int test(float exp, float *array) { + int hi = 255; + int lo = 0; + int offset = 0; + + while(hi > lo) { +unsigned delta = (hi - lo) / 2u; +delta = 1u > delta ? 1u : delta; +offset = lo + delta; + +if (array[offset] > exp) + hi = hi - delta; +else + lo = lo + delta; + } + + return offset; +} Index: clang/test/CodeGen/pr30188.c === --- /dev/null +++ clang/test/CodeGen/pr30188.c @@ -0,0 +1,32 @@ +// RUN: %clang_cc1 -O2 -emit-llvm -o - %s | FileCheck %s + +// Check that optimizers are able to optimize the number of loads in the loop. +// CHECK: load +// CHECK: load +// CHECK: load +// CHECK-NOT: load + +struct b { + int x_; +}; + +struct a { + int l_; + struct b *data_; +}; + +int BinarySearch(struct a *input, struct b t) { + if (input->l_ > 0) { +int low = 0; +int high = input->l_; +while (high != low + 1) { + int mid = (high + low) / 2; + if (input->data_[mid].x_ > t.x_) +high = mid; + else +low = mid; +} +return low; + } + return -1; +} Index: clang/test/CodeGenCXX/pr30188.cpp === --- /dev/null +++ clang/test/CodeGenCXX/pr30188.cpp @@ -0,0 +1,24 @@ +// RUN: %clang_cc1 -O2 -emit-llvm -o - %s | FileCheck %s + +// Check that optimizers are able to optimize the number of loads in the loop. +// CHECK: load +// CHECK-NOT: load + +int test(float exp, float *array) { + int hi = 255; + int lo = 0; + int offset = 0; + + while(hi > lo) { +unsigned delta = (hi - lo) / 2u; +delta = 1u > delta ? 1u : delta; +offset = lo + delta; + +if (array[offset] > exp) + hi = hi - delta; +else + lo = lo + delta; + } + + return offset; +} Index: clang/test/CodeGen/pr30188.c === --- /dev/null +++ clang/test/CodeGen/pr30188.c @@ -0,0 +1,32 @@ +// RUN: %clang_cc1 -O2 -emit-llvm -o - %s | FileCheck %s + +// Check that optimizers are able to optimize the number of loads in the loop. +// CHECK: load +// CHECK: load +// CHECK: load +// CHECK-NOT: load + +struct b { + int x_; +}; + +struct a { + int l_; + struct b *data_; +}; + +int BinarySearch(struct a *input, struct b t) { + if (input->l_ > 0) { +int low = 0; +int high = input->l_; +while (high != low + 1) { + int mid = (high + low) / 2; + if (input->data_[mid].x_ > t.x_) +high = mid; + else +low = mid; +} +return low; + } + return -1; +} ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
Re: [PATCH] D24097: Add testcases for PR30188
sebpop abandoned this revision. sebpop added a comment. Agreed, this is a test for llvm/opt. I will submit another patch. https://reviews.llvm.org/D24097 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
Re: [PATCH] D24682: [PR30341] Alias must point to a definition
sebpop added inline comments. Comment at: clang/lib/CodeGen/CGCXX.cpp:137 @@ -136,1 +136,3 @@ + // r254170: Disallow aliases to available_externally. + if (TargetLinkage == llvm::GlobalValue::AvailableExternallyLinkage) Please remove the reference to r254170. You may want to put a reference to a bug, or better, just describe in full why this is disabled. Comment at: clang/lib/CodeGen/CGCXX.cpp:166 @@ -162,3 +165,3 @@ // FIXME: An extern template instantiation will create functions with // linkage "AvailableExternally". In libc++, some classes also define // members with attribute "AlwaysInline" and expect no reference to You may want to adjust this comment, now that the condition does not check for "AvailableExternally". You can move part of the FIXME in the comment above. https://reviews.llvm.org/D24682 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D24991: Inline hot functions in libcxx shared_ptr implementation.
sebpop added a comment. In https://reviews.llvm.org/D24991#571056, @hiraditya wrote: > Marshall suggests using macro as we discussed offline. For some reason the > reply does not appear here: > http://lists.llvm.org/pipermail/cfe-commits/Week-of-Mon-20161010/173780.html Ping. Repository: rL LLVM https://reviews.llvm.org/D24991 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D24991: Inline hot functions in libcxx shared_ptr implementation.
sebpop removed rL LLVM as the repository for this revision. sebpop updated this revision to Diff 75908. sebpop added a comment. The patch also implements the idea that Marshall proposed in: http://lists.llvm.org/pipermail/cfe-commits/Week-of-Mon-20161010/173780.html > I have an idea; it involves a macro that is sometimes "inline" and > sometimes not, and changes when you're building the library vs. when you're > just including the headers. Tested on x86_64-linux. The symbols for the functions that are now inlined still appear in the libc++.so. Ok to commit? https://reviews.llvm.org/D24991 Files: libcxx/include/__atomic_support libcxx/include/memory libcxx/src/include/atomic_support.h libcxx/src/memory.cpp libcxx/src/mutex.cpp Index: libcxx/src/mutex.cpp === --- libcxx/src/mutex.cpp +++ libcxx/src/mutex.cpp @@ -12,7 +12,7 @@ #include "limits" #include "system_error" #include "cassert" -#include "include/atomic_support.h" +#include "__atomic_support" _LIBCPP_BEGIN_NAMESPACE_STD #ifndef _LIBCPP_HAS_NO_THREADS Index: libcxx/src/memory.cpp === --- libcxx/src/memory.cpp +++ libcxx/src/memory.cpp @@ -13,32 +13,10 @@ #include "mutex" #include "thread" #endif -#include "include/atomic_support.h" +#include "__atomic_support" _LIBCPP_BEGIN_NAMESPACE_STD -namespace -{ - -// NOTE: Relaxed and acq/rel atomics (for increment and decrement respectively) -// should be sufficient for thread safety. -// See https://llvm.org/bugs/show_bug.cgi?id=22803 -template -inline T -increment(T& t) _NOEXCEPT -{ -return __libcpp_atomic_add(&t, 1, _AO_Relaxed); -} - -template -inline T -decrement(T& t) _NOEXCEPT -{ -return __libcpp_atomic_add(&t, -1, _AO_Acq_Rel); -} - -} // namespace - const allocator_arg_t allocator_arg = allocator_arg_t(); bad_weak_ptr::~bad_weak_ptr() _NOEXCEPT {} @@ -53,27 +31,27 @@ { } +__shared_weak_count::~__shared_weak_count() +{ +} + void __shared_count::__add_shared() _NOEXCEPT { -increment(__shared_owners_); +__atomic_inc_dec::increment(__shared_owners_); } bool __shared_count::__release_shared() _NOEXCEPT { -if (decrement(__shared_owners_) == -1) +if (__atomic_inc_dec::decrement(__shared_owners_) == -1) { __on_zero_shared(); return true; } return false; } -__shared_weak_count::~__shared_weak_count() -{ -} - void __shared_weak_count::__add_shared() _NOEXCEPT { @@ -83,7 +61,7 @@ void __shared_weak_count::__add_weak() _NOEXCEPT { -increment(__shared_weak_owners_); +__atomic_inc_dec::increment(__shared_weak_owners_); } void @@ -124,7 +102,7 @@ //__libcpp_atomic_store(&__shared_weak_owners_, -1, _AO_Release); __on_zero_shared_weak(); } -else if (decrement(__shared_weak_owners_) == -1) +else if (__atomic_inc_dec::decrement(__shared_weak_owners_) == -1) __on_zero_shared_weak(); } Index: libcxx/src/include/atomic_support.h === --- libcxx/src/include/atomic_support.h +++ /dev/null @@ -1,158 +0,0 @@ -//===--=== -// -// The LLVM Compiler Infrastructure -// -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.TXT for details. -// -//===--=== - -#ifndef ATOMIC_SUPPORT_H -#define ATOMIC_SUPPORT_H - -#include "__config" -#include "memory" // for __libcpp_relaxed_load - -#if defined(__clang__) && __has_builtin(__atomic_load_n) \ - && __has_builtin(__atomic_store_n)\ - && __has_builtin(__atomic_add_fetch) \ - && __has_builtin(__atomic_compare_exchange_n) \ - && defined(__ATOMIC_RELAXED) \ - && defined(__ATOMIC_CONSUME) \ - && defined(__ATOMIC_ACQUIRE) \ - && defined(__ATOMIC_RELEASE) \ - && defined(__ATOMIC_ACQ_REL) \ - && defined(__ATOMIC_SEQ_CST) -# define _LIBCPP_HAS_ATOMIC_BUILTINS -#elif !defined(__clang__) && defined(_GNUC_VER) && _GNUC_VER >= 407 -# define _LIBCPP_HAS_ATOMIC_BUILTINS -#endif - -#if !defined(_LIBCPP_HAS_ATOMIC_BUILTINS) && !defined(_LIBCPP_HAS_NO_THREADS) -# if defined(_MSC_VER) && !defined(__clang__) -_LIBCPP_WARNING("Building libc++ without __atomic builtins is unsupported") -# else -# warning Building libc++ without __atomic builtins is unsupported -# endif -#endif - -_LIBCPP_BEGIN_NAMESPACE_STD - -namespace { - -#if defined(_LIBCPP_HAS_ATOMIC_BUILTINS) && !defined(_LIBCPP_HAS_NO_THREADS) - -enum _
[PATCH] D24991: Inline hot functions in libcxx shared_ptr implementation.
sebpop marked 2 inline comments as done. sebpop added inline comments. Comment at: libcxx/include/memory:3802 +{ + return __libcpp_atomic_add(&t, 1, _AO_Relaxed); +} EricWF wrote: > Why add `increment` and `decrement` at all? Just manually inline > `__libcpp_atomic_add` at the callsites. I like the idea to manually inline the inc and dec functions. What should we do with the NOTE: above? // NOTE: Relaxed and acq/rel atomics (for increment and decrement respectively) // should be sufficient for thread safety. // See https://llvm.org/bugs/show_bug.cgi?id=22803 should we just go ahead and remove the note, or you want to have it where inc/dec are called? (about a dozen places.) https://reviews.llvm.org/D24991 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D25624: Added 'inline' attribute to basic_string's destructor
sebpop added inline comments. Comment at: libcxx/include/string:1841 template +inline _LIBCPP_HEADER_INLINE_VISIBILITY basic_string<_CharT, _Traits, _Allocator>::~basic_string() and let's also use the define just here: #ifndef _LIBCPP_BUILDING_STRING inline _LIBCPP_INLINE_VISIBILITY #endif Comment at: libcxx/src/string.cpp:10 +#define _LIBCPP_HEADER_INLINE_VISIBILITY + To be consistent with memory.cpp: #define _LIBCPP_BUILDING_MEMORY let's rename the define as: #define _LIBCPP_BUILDING_STRING https://reviews.llvm.org/D25624 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D25624: Added 'inline' attribute to basic_string's destructor
sebpop added a comment. Other than removing that comment the patch looks good. Thanks! Comment at: libcxx/src/string.cpp:10 +// For keeping the definition of ~basic_string in this translation unit. +#define _LIBCPP_BUILDING_STRING Let's not add this comment here, as this define may be used in the future for other purposes. Also a simple grep would give you the places where this is used. https://reviews.llvm.org/D25624 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D25624: Added 'inline' attribute to basic_string's destructor
sebpop added a comment. In https://reviews.llvm.org/D25624#582731, @mehdi_amini wrote: > I don't understand: > > 1. The motivation for this change This is a change for performance: we have seen some benchmarks where inlining the string dtor brings performance up by 5%: from what I remember, there is a string ctor shortly followed by a dtor that could be optimized away if both the ctor and dtor are inlined. We already committed the patch to do the ctor inlining, and that gave us some performance. This patch is what remains to get the rest of the performance by inlining the string dtor. > 2. Why is this correct? There seems to be a problem the patch uncovers, and we will investigate. Thanks for pointing out this issue. https://reviews.llvm.org/D25624 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D24991: Inline hot functions in libcxx shared_ptr implementation.
sebpop added a comment. Ping: Eric, Marshall, could you please approve or comment on this patch? Thanks! https://reviews.llvm.org/D24991 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D25624: Added 'inline' attribute to basic_string's destructor
sebpop added a comment. In https://reviews.llvm.org/D25624#583163, @mehdi_amini wrote: > I'd like to understand why only the destructor? We have committed a patch to inline the constructor of std::string. Do you think we should inline some other functions? In https://reviews.llvm.org/D25624#583167, @mehdi_amini wrote: > I talked with Eric on IRC, he mentioned some benchmarks were ran. I'd like to > understand what was the baseline? We ran a proprietary benchmark compiled with clang + libc++ and compared against clang + libstdc++. With this patch, libc++ is on par with libstdc++. > Here we add *both* the inline keyword and the always_inline attribute. I'd > like to know if there is a benchmarks that shows that always_inline is > beneficial on top of the inline keyword. > If we need to add always_inline anywhere: this is likely an inliner > heuristic failure and we should at minima track it as an example to improve > it. This is a good observation: I am not sure we ran the benchmark without the always_inline attribute. We will try again wihout that attribute and report whether it matters performance wise. Thanks Mehdi for catching this! https://reviews.llvm.org/D25624 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D24991: Inline hot functions in libcxx shared_ptr implementation.
sebpop added a comment. In https://reviews.llvm.org/D24991#583877, @kubabrecka wrote: > Just a question: TSan intercepts on the dylib functions, namely > `__release_shared`, to track the atomic accesses. Can you make sure this > doesn't break? There's a few testcases for this in compiler-rt. I just ran ninja check-all with and without this patch and there are no regressions in compiler-rt on an x86_64-linux machine. https://reviews.llvm.org/D24991 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D24991: Inline hot functions in libcxx shared_ptr implementation.
sebpop added a comment. @mclow.lists, @EricWF, ok to commit the patch? Thanks, Sebastian https://reviews.llvm.org/D24991 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
Re: [PATCH] D22782: Added 'inline' attribute to __init to inline the basic_string's constructor
sebpop added a comment. In https://reviews.llvm.org/D22782#495436, @mclow.lists wrote: > Do we have a test for the problem that this is solving? I think we can write a testcase that shows that copy constructors are not optimized away unless the string constructor is inlined. This patch fixes the performance of a proprietary benchmark when compiled with libc++, closing the performance gap with the same benchmark compiled with the gnu libstdc++. Overall with this patch we have half a billion fewer instructions out of about 10 billion. https://reviews.llvm.org/D22782 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
Re: [PATCH] D22782: Added 'inline' attribute to __init to inline the basic_string's constructor
This revision was automatically updated to reflect the committed changes. Closed by commit rL278356: Add 'inline' attribute to __init to inline the basic_string's constructor (authored by spop). Changed prior to commit: https://reviews.llvm.org/D22782?vs=67597&id=67699#toc Repository: rL LLVM https://reviews.llvm.org/D22782 Files: libcxx/trunk/include/string Index: libcxx/trunk/include/string === --- libcxx/trunk/include/string +++ libcxx/trunk/include/string @@ -1442,6 +1442,7 @@ } template +inline _LIBCPP_INLINE_VISIBILITY void basic_string<_CharT, _Traits, _Allocator>::__init(const value_type* __s, size_type __sz, size_type __reserve) { @@ -1466,6 +1467,7 @@ } template +inline _LIBCPP_INLINE_VISIBILITY void basic_string<_CharT, _Traits, _Allocator>::__init(const value_type* __s, size_type __sz) { @@ -1603,6 +1605,7 @@ #endif // _LIBCPP_HAS_NO_RVALUE_REFERENCES template +inline _LIBCPP_INLINE_VISIBILITY void basic_string<_CharT, _Traits, _Allocator>::__init(size_type __n, value_type __c) { @@ -1699,6 +1702,7 @@ template template +inline _LIBCPP_INLINE_VISIBILITY typename enable_if < __is_exactly_input_iterator<_InputIterator>::value, @@ -1726,6 +1730,7 @@ template template +inline _LIBCPP_INLINE_VISIBILITY typename enable_if < __is_forward_iterator<_ForwardIterator>::value, Index: libcxx/trunk/include/string === --- libcxx/trunk/include/string +++ libcxx/trunk/include/string @@ -1442,6 +1442,7 @@ } template +inline _LIBCPP_INLINE_VISIBILITY void basic_string<_CharT, _Traits, _Allocator>::__init(const value_type* __s, size_type __sz, size_type __reserve) { @@ -1466,6 +1467,7 @@ } template +inline _LIBCPP_INLINE_VISIBILITY void basic_string<_CharT, _Traits, _Allocator>::__init(const value_type* __s, size_type __sz) { @@ -1603,6 +1605,7 @@ #endif // _LIBCPP_HAS_NO_RVALUE_REFERENCES template +inline _LIBCPP_INLINE_VISIBILITY void basic_string<_CharT, _Traits, _Allocator>::__init(size_type __n, value_type __c) { @@ -1699,6 +1702,7 @@ template template +inline _LIBCPP_INLINE_VISIBILITY typename enable_if < __is_exactly_input_iterator<_InputIterator>::value, @@ -1726,6 +1730,7 @@ template template +inline _LIBCPP_INLINE_VISIBILITY typename enable_if < __is_forward_iterator<_ForwardIterator>::value, ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
Re: [PATCH] D22782: Added 'inline' attribute to __init to inline the basic_string's constructor
sebpop added a comment. In https://reviews.llvm.org/D22782#512337, @EricWF wrote: > I would love to see a benchmark with this, but I've done enough investigating > on my own that I *know* this patch is beneficial. This patch was motivated by perf analysis we did on a proprietary benchmark in which we have seen a reduction of about 1 billion instructions (out of 10B) on x86_64-linux and aarch64-linux. Repository: rL LLVM https://reviews.llvm.org/D22782 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[libcxx] r278356 - Add 'inline' attribute to __init to inline the basic_string's constructor
Author: spop Date: Thu Aug 11 11:51:48 2016 New Revision: 278356 URL: http://llvm.org/viewvc/llvm-project?rev=278356&view=rev Log: Add 'inline' attribute to __init to inline the basic_string's constructor basic_string's constructor calls init which was not getting inlined. This prevented optimization of const string as init would appear as a call in between a string's def and use. Patch by Laxman Sole and Aditya Kumar. Differential Revision: https://reviews.llvm.org/D22782 Modified: libcxx/trunk/include/string Modified: libcxx/trunk/include/string URL: http://llvm.org/viewvc/llvm-project/libcxx/trunk/include/string?rev=278356&r1=278355&r2=278356&view=diff == --- libcxx/trunk/include/string (original) +++ libcxx/trunk/include/string Thu Aug 11 11:51:48 2016 @@ -1442,6 +1442,7 @@ basic_string<_CharT, _Traits, _Allocator } template +inline _LIBCPP_INLINE_VISIBILITY void basic_string<_CharT, _Traits, _Allocator>::__init(const value_type* __s, size_type __sz, size_type __reserve) { @@ -1466,6 +1467,7 @@ basic_string<_CharT, _Traits, _Allocator } template +inline _LIBCPP_INLINE_VISIBILITY void basic_string<_CharT, _Traits, _Allocator>::__init(const value_type* __s, size_type __sz) { @@ -1603,6 +1605,7 @@ basic_string<_CharT, _Traits, _Allocator #endif // _LIBCPP_HAS_NO_RVALUE_REFERENCES template +inline _LIBCPP_INLINE_VISIBILITY void basic_string<_CharT, _Traits, _Allocator>::__init(size_type __n, value_type __c) { @@ -1699,6 +1702,7 @@ basic_string<_CharT, _Traits, _Allocator template template +inline _LIBCPP_INLINE_VISIBILITY typename enable_if < __is_exactly_input_iterator<_InputIterator>::value, @@ -1726,6 +1730,7 @@ basic_string<_CharT, _Traits, _Allocator template template +inline _LIBCPP_INLINE_VISIBILITY typename enable_if < __is_forward_iterator<_ForwardIterator>::value, ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
Re: [PATCH] D21907: [AArch64] fix profiling with GNU LINUX on Aarch64
sebpop added a comment. LGTM. http://reviews.llvm.org/D21907 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [clang] Add deprecation warning for `-Ofast` driver option (PR #98736)
@@ -442,6 +442,10 @@ def warn_drv_deprecated_arg : Warning< def warn_drv_deprecated_arg_no_relaxed_template_template_args : Warning< "argument '-fno-relaxed-template-template-args' is deprecated">, InGroup; +def warn_drv_deprecated_arg_ofast : Warning< + "argument '-Ofast' is deprecated; use '-O3 -ffast math' for the same behavior," sebpop wrote: Missing dash between ffast and math. Here and several times below. https://github.com/llvm/llvm-project/pull/98736 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [flang] [flang] add -floop-interchange and enable it with opt levels (PR #140182)
sebpop wrote: > Do you have any compilation time and performance data? @madhur13490 did several changes to loop interchange to optimize the overall compilation time with the pass. I believe Madhur has only looked at c/c++ benchmarks and not at how loop interchange would impact flang. I think that if compilation time is good for c/c++, it should also be good for fortran. On the perf side, I was looking if we can already catch swim from cpu2000, and that fails with not enough data to infer number of iterations. I will be working on adding assume (N < 1335) based on analyzing array decls and infer loop bounds. https://github.com/llvm/llvm-project/pull/140182 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [flang] [flang] add -floop-interchange and enable it with opt levels (PR #140182)
@@ -421,7 +421,8 @@ static void CheckSubscripts( static void CheckSubscripts( semantics::SemanticsContext &context, CoarrayRef &ref) { - const Symbol &coarraySymbol{ref.GetBase().GetLastSymbol()}; + const auto &base = ref.GetBase(); + const Symbol &coarraySymbol{base.GetLastSymbol()}; sebpop wrote: This has been submitted separately https://github.com/llvm/llvm-project/pull/138793 Without this change I cannot build flang on arm64-linux ubuntu 24.04 machine. https://github.com/llvm/llvm-project/pull/140182 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [flang] [flang] add -floop-interchange and enable it with opt levels (PR #140182)
https://github.com/sebpop updated https://github.com/llvm/llvm-project/pull/140182 >From b0a6935e8439bc5b4f742f55eb3bb090790a8f95 Mon Sep 17 00:00:00 2001 From: Sebastian Pop Date: Wed, 7 May 2025 01:14:49 + Subject: [PATCH 1/4] [flang] fix Werror=dangling-reference MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit when compiling with g++ 13.3.0 flang build fails with: llvm-project/flang/lib/Semantics/expression.cpp:424:17: error: possibly dangling reference to a temporary [-Werror=dangling-reference] 424 | const Symbol &coarraySymbol{ref.GetBase().GetLastSymbol()}; | ^ llvm-project/flang/lib/Semantics/expression.cpp:424:58: note: the temporary was destroyed at the end of the full expression ‘Fortran::evaluate::CoarrayRef::GetBase() const().Fortran::evaluate::NamedEntity::GetLastSymbol()’ 424 | const Symbol &coarraySymbol{ref.GetBase().GetLastSymbol()}; | ~~~^~ Keep the base in a temporary variable to make sure it is not deleted. --- flang/lib/Semantics/expression.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/flang/lib/Semantics/expression.cpp b/flang/lib/Semantics/expression.cpp index e139bda7e4950..35eb7b61429fb 100644 --- a/flang/lib/Semantics/expression.cpp +++ b/flang/lib/Semantics/expression.cpp @@ -421,7 +421,8 @@ static void CheckSubscripts( static void CheckSubscripts( semantics::SemanticsContext &context, CoarrayRef &ref) { - const Symbol &coarraySymbol{ref.GetBase().GetLastSymbol()}; + const auto &base = ref.GetBase(); + const Symbol &coarraySymbol{base.GetLastSymbol()}; Shape lb, ub; if (FoldSubscripts(context, coarraySymbol, ref.subscript(), lb, ub)) { ValidateSubscripts(context, coarraySymbol, ref.subscript(), lb, ub); >From c6d051a2b4239e1fe78e1d4483b500b129956867 Mon Sep 17 00:00:00 2001 From: Sebastian Pop Date: Mon, 12 May 2025 21:56:03 + Subject: [PATCH 2/4] [flang] add -floop-interchange to flang driver This patch allows flang to recognize the flags -floop-interchange and -fno-loop-interchange. -floop-interchange adds the loop interchange pass to the pass pipeline. --- clang/include/clang/Driver/Options.td | 4 ++-- clang/lib/Driver/ToolChains/Flang.cpp | 3 +++ flang/include/flang/Frontend/CodeGenOptions.def | 1 + flang/lib/Frontend/CompilerInvocation.cpp | 3 +++ flang/lib/Frontend/FrontendActions.cpp | 1 + flang/test/Driver/loop-interchange.f90 | 7 +++ 6 files changed, 17 insertions(+), 2 deletions(-) create mode 100644 flang/test/Driver/loop-interchange.f90 diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index 11677626dbf1f..287a00863bb35 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -4141,9 +4141,9 @@ def ftrap_function_EQ : Joined<["-"], "ftrap-function=">, Group, HelpText<"Issue call to specified function rather than a trap instruction">, MarshallingInfoString>; def floop_interchange : Flag<["-"], "floop-interchange">, Group, - HelpText<"Enable the loop interchange pass">, Visibility<[ClangOption, CC1Option]>; + HelpText<"Enable the loop interchange pass">, Visibility<[ClangOption, CC1Option, FlangOption, FC1Option]>; def fno_loop_interchange: Flag<["-"], "fno-loop-interchange">, Group, - HelpText<"Disable the loop interchange pass">, Visibility<[ClangOption, CC1Option]>; + HelpText<"Disable the loop interchange pass">, Visibility<[ClangOption, CC1Option, FlangOption, FC1Option]>; def funroll_loops : Flag<["-"], "funroll-loops">, Group, HelpText<"Turn on loop unroller">, Visibility<[ClangOption, CC1Option, FlangOption, FC1Option]>; def fno_unroll_loops : Flag<["-"], "fno-unroll-loops">, Group, diff --git a/clang/lib/Driver/ToolChains/Flang.cpp b/clang/lib/Driver/ToolChains/Flang.cpp index b1ca747e68b89..c6c7a0b75a987 100644 --- a/clang/lib/Driver/ToolChains/Flang.cpp +++ b/clang/lib/Driver/ToolChains/Flang.cpp @@ -152,6 +152,9 @@ void Flang::addCodegenOptions(const ArgList &Args, !stackArrays->getOption().matches(options::OPT_fno_stack_arrays)) CmdArgs.push_back("-fstack-arrays"); + Args.AddLastArg(CmdArgs, options::OPT_floop_interchange, + options::OPT_fno_loop_interchange); + handleVectorizeLoopsArgs(Args, CmdArgs); handleVectorizeSLPArgs(Args, CmdArgs); diff --git a/flang/include/flang/Frontend/CodeGenOptions.def b/flang/include/flang/Frontend/CodeGenOptions.def index d9dbd274e83e5..7ced60f512219 100644 --- a/flang/include/flang/Frontend/CodeGenOptions.def +++ b/flang/include/flang/Frontend/CodeGenOptions.def @@ -35,6 +35,7 @@ CODEGENOPT(PrepareForThinLTO , 1, 0) ///< Set when -flto=thin is enabled on the CODEGENOPT(StackArrays, 1, 0) ///< -fstack-arrays (enable the stack-arrays pass) CODEGENOPT(VectorizeLoop, 1, 0) ///< Enable loop vectorization. CODEGENOP
[clang] [flang] [flang] add -floop-interchange and enable it with opt levels (PR #140182)
https://github.com/sebpop updated https://github.com/llvm/llvm-project/pull/140182 >From 46efee7d48a11794fc103cf67b21796d8e5f3408 Mon Sep 17 00:00:00 2001 From: Sebastian Pop Date: Mon, 12 May 2025 21:56:03 + Subject: [PATCH 1/3] [flang] add -floop-interchange to flang driver This patch allows flang to recognize the flags -floop-interchange and -fno-loop-interchange. -floop-interchange adds the loop interchange pass to the pass pipeline. --- clang/include/clang/Driver/Options.td | 4 ++-- clang/lib/Driver/ToolChains/Flang.cpp | 3 +++ flang/docs/ReleaseNotes.md | 2 ++ flang/include/flang/Frontend/CodeGenOptions.def | 1 + flang/lib/Frontend/CompilerInvocation.cpp | 3 +++ flang/lib/Frontend/FrontendActions.cpp | 1 + flang/test/Driver/loop-interchange.f90 | 7 +++ 7 files changed, 19 insertions(+), 2 deletions(-) create mode 100644 flang/test/Driver/loop-interchange.f90 diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index bd8df8f6a749a..c8c675bc17e7d 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -4186,9 +4186,9 @@ def ftrap_function_EQ : Joined<["-"], "ftrap-function=">, Group, HelpText<"Issue call to specified function rather than a trap instruction">, MarshallingInfoString>; def floop_interchange : Flag<["-"], "floop-interchange">, Group, - HelpText<"Enable the loop interchange pass">, Visibility<[ClangOption, CC1Option]>; + HelpText<"Enable the loop interchange pass">, Visibility<[ClangOption, CC1Option, FlangOption, FC1Option]>; def fno_loop_interchange: Flag<["-"], "fno-loop-interchange">, Group, - HelpText<"Disable the loop interchange pass">, Visibility<[ClangOption, CC1Option]>; + HelpText<"Disable the loop interchange pass">, Visibility<[ClangOption, CC1Option, FlangOption, FC1Option]>; def funroll_loops : Flag<["-"], "funroll-loops">, Group, HelpText<"Turn on loop unroller">, Visibility<[ClangOption, CC1Option, FlangOption, FC1Option]>; def fno_unroll_loops : Flag<["-"], "fno-unroll-loops">, Group, diff --git a/clang/lib/Driver/ToolChains/Flang.cpp b/clang/lib/Driver/ToolChains/Flang.cpp index b1ca747e68b89..c6c7a0b75a987 100644 --- a/clang/lib/Driver/ToolChains/Flang.cpp +++ b/clang/lib/Driver/ToolChains/Flang.cpp @@ -152,6 +152,9 @@ void Flang::addCodegenOptions(const ArgList &Args, !stackArrays->getOption().matches(options::OPT_fno_stack_arrays)) CmdArgs.push_back("-fstack-arrays"); + Args.AddLastArg(CmdArgs, options::OPT_floop_interchange, + options::OPT_fno_loop_interchange); + handleVectorizeLoopsArgs(Args, CmdArgs); handleVectorizeSLPArgs(Args, CmdArgs); diff --git a/flang/docs/ReleaseNotes.md b/flang/docs/ReleaseNotes.md index b356f64553d7e..c76635d121d58 100644 --- a/flang/docs/ReleaseNotes.md +++ b/flang/docs/ReleaseNotes.md @@ -32,6 +32,8 @@ page](https://llvm.org/releases/). ## New Compiler Flags +* -floop-interchange is now recognized by flang. + ## Windows Support ## Fortran Language Changes in Flang diff --git a/flang/include/flang/Frontend/CodeGenOptions.def b/flang/include/flang/Frontend/CodeGenOptions.def index d9dbd274e83e5..7ced60f512219 100644 --- a/flang/include/flang/Frontend/CodeGenOptions.def +++ b/flang/include/flang/Frontend/CodeGenOptions.def @@ -35,6 +35,7 @@ CODEGENOPT(PrepareForThinLTO , 1, 0) ///< Set when -flto=thin is enabled on the CODEGENOPT(StackArrays, 1, 0) ///< -fstack-arrays (enable the stack-arrays pass) CODEGENOPT(VectorizeLoop, 1, 0) ///< Enable loop vectorization. CODEGENOPT(VectorizeSLP, 1, 0) ///< Enable SLP vectorization. +CODEGENOPT(InterchangeLoops, 1, 0) ///< Enable loop interchange. CODEGENOPT(LoopVersioning, 1, 0) ///< Enable loop versioning. CODEGENOPT(UnrollLoops, 1, 0) ///< Enable loop unrolling CODEGENOPT(AliasAnalysis, 1, 0) ///< Enable alias analysis pass diff --git a/flang/lib/Frontend/CompilerInvocation.cpp b/flang/lib/Frontend/CompilerInvocation.cpp index 238079a09ef3a..67fb0924def71 100644 --- a/flang/lib/Frontend/CompilerInvocation.cpp +++ b/flang/lib/Frontend/CompilerInvocation.cpp @@ -269,6 +269,9 @@ static void parseCodeGenArgs(Fortran::frontend::CodeGenOptions &opts, clang::driver::options::OPT_fno_stack_arrays, false)) opts.StackArrays = 1; + if (args.getLastArg(clang::driver::options::OPT_floop_interchange)) +opts.InterchangeLoops = 1; + if (args.getLastArg(clang::driver::options::OPT_vectorize_loops)) opts.VectorizeLoop = 1; diff --git a/flang/lib/Frontend/FrontendActions.cpp b/flang/lib/Frontend/FrontendActions.cpp index e5a15c555fa5e..38dfaadf1dff9 100644 --- a/flang/lib/Frontend/FrontendActions.cpp +++ b/flang/lib/Frontend/FrontendActions.cpp @@ -922,6 +922,7 @@ void CodeGenAction::runOptimizationPipeline(llvm::raw_pwrite_stream &os) { if (ci.isTimingEnabled()) si.getTimePasses().setOutStream(ci.getTimingStreamLLVM
[clang] [flang] [flang] add -floop-interchange and enable it with opt levels (PR #140182)
https://github.com/sebpop closed https://github.com/llvm/llvm-project/pull/140182 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits