[PATCH] D82020: PowerPC-specific builtin constrained FP enablement

2020-06-17 Thread Andrew J Wock via Phabricator via cfe-commits
ajwock created this revision.
ajwock added reviewers: kpn, cameron.mcinally, spatel, hfinkel, nemanjai, 
kbarton.
Herald added subscribers: cfe-commits, steven.zhang, shchenz.
Herald added a project: clang.
ajwock added a reviewer: steven.zhang.

This change enables PowerPC compiler builtins to generate constrained floating 
point operations when clang is indicated to do so.

A couple of possibly unexpected backend divergences between constrained 
floating point and regular behavior are highlighted under the test tag 
FIXME-CHECK.  This may be something for those on the PPC backend to look at.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D82020

Files:
  clang/lib/CodeGen/CGBuiltin.cpp
  clang/test/CodeGen/builtins-ppc-fpconstrained.c

Index: clang/test/CodeGen/builtins-ppc-fpconstrained.c
===
--- /dev/null
+++ clang/test/CodeGen/builtins-ppc-fpconstrained.c
@@ -0,0 +1,166 @@
+// RUN: %clang_cc1 -triple powerpc64le-gnu-linux -target-feature +vsx \
+// RUN: -disable-O0-optnone -Wall -Wno-unused -Werror -emit-llvm %s -o - | \
+// RUN: FileCheck --check-prefix=CHECK-UNCONSTRAINED -vv %s
+// RUN: %clang_cc1 -triple powerpc64le-gnu-linux -target-feature +vsx \
+// RUN: -disable-O0-optnone -ffp-exception-behavior=strict -Wall \
+// RUN: -Wno-unused -Werror -emit-llvm %s -o - | FileCheck \
+// RUN: --check-prefix=CHECK-CONSTRAINED -vv %s
+// RUN: %clang_cc1 -triple powerpc64le-gnu-linux -target-feature +vsx \
+// RUN: -disable-O0-optnone -fallow-half-arguments-and-returns -S -o - %s | \
+// RUN: FileCheck --check-prefix=CHECK-ASM --check-prefix=NOT-FIXME-CHECK  %s
+// RUN: %clang_cc1 -triple powerpc64le-gnu-linux -target-feature +vsx \
+// RUN: -disable-O0-optnone -fallow-half-arguments-and-returns -S \
+// RUN: -ffp-exception-behavior=strict  -o - %s | FileCheck \
+// RUN: --check-prefix=CHECK-ASM --check-prefix=FIXME-CHECK  %s
+
+typedef __attribute__((vector_size(4 * sizeof(float float vec_float;
+typedef __attribute__((vector_size(2 * sizeof(double double vec_double;
+
+volatile vec_double vd;
+volatile vec_float vf;
+
+void test_float(void) {
+  vf = __builtin_vsx_xvsqrtsp(vf);
+  // CHECK-LABEL: try-xvsqrtsp
+  // CHECK-UNCONSTRAINED: @llvm.sqrt.v4f32(<4 x float> %{{.*}})
+  // CHECK-CONSTRAINED: @llvm.experimental.constrained.sqrt.v4f32(<4 x float> %{{.*}}, metadata !"round.tonearest", metadata !"fpexcept.strict")
+  // CHECK-ASM: xvsqrtsp
+
+  vd = __builtin_vsx_xvsqrtdp(vd);
+  // CHECK-LABEL: try-xvsqrtdp
+  // CHECK-UNCONSTRAINED: @llvm.sqrt.v2f64(<2 x double> %{{.*}})
+  // CHECK-CONSTRAINED: @llvm.experimental.constrained.sqrt.v2f64(<2 x double> %{{.*}}, metadata !"round.tonearest", metadata !"fpexcept.strict")
+  // CHECK-ASM: xvsqrtdp
+
+  vf = __builtin_vsx_xvrspim(vf);
+  // CHECK-LABEL: try-xvrspim
+  // CHECK-UNCONSTRAINED: @llvm.floor.v4f32(<4 x float> %{{.*}})
+  // CHECK-CONSTRAINED: @llvm.experimental.constrained.floor.v4f32(<4 x float> %{{.*}}, metadata !"fpexcept.strict")
+  // CHECK-ASM: xvrspim
+
+  vd = __builtin_vsx_xvrdpim(vd);
+  // CHECK-LABEL: try-xvrdpim
+  // CHECK-UNCONSTRAINED: @llvm.floor.v2f64(<2 x double> %{{.*}})
+  // CHECK-CONSTRAINED: @llvm.experimental.constrained.floor.v2f64(<2 x double> %{{.*}}, metadata !"fpexcept.strict")
+  // CHECK-ASM: xvrdpim
+
+  vf = __builtin_vsx_xvrspi(vf);
+  // CHECK-LABEL: try-xvrspi
+  // CHECK-UNCONSTRAINED: @llvm.round.v4f32(<4 x float> %{{.*}})
+  // CHECK-CONSTRAINED: @llvm.experimental.constrained.round.v4f32(<4 x float> %{{.*}}, metadata !"fpexcept.strict")
+  // CHECK-ASM: xvrspi
+
+  vd = __builtin_vsx_xvrdpi(vd);
+  // CHECK-LABEL: try-xvrdpi
+  // CHECK-UNCONSTRAINED: @llvm.round.v2f64(<2 x double> %{{.*}})
+  // CHECK-CONSTRAINED: @llvm.experimental.constrained.round.v2f64(<2 x double> %{{.*}}, metadata !"fpexcept.strict")
+  // CHECK-ASM: xvrdpi
+
+  vf = __builtin_vsx_xvrspic(vf);
+  // CHECK-LABEL: try-xvrspic
+  // CHECK-UNCONSTRAINED: @llvm.nearbyint.v4f32(<4 x float> %{{.*}})
+  // CHECK-CONSTRAINED: @llvm.experimental.constrained.nearbyint.v4f32(<4 x float> %{{.*}}, metadata !"round.tonearest", metadata !"fpexcept.strict")
+  // NO-FIXME-CHECK: xvrspic
+  // FIXME-CHECK: bl nearbyintf
+  // FIXME-CHECK: bl nearbyintf
+  // FIXME-CHECK: bl nearbyintf
+  // FIXME-CHECK: bl nearbyintf
+
+  vd = __builtin_vsx_xvrdpic(vd);
+  // CHECK-LABEL: try-xvrdpic
+  // CHECK-UNCONSTRAINED: @llvm.nearbyint.v2f64(<2 x double> %{{.*}})
+  // CHECK-CONSTRAINED: @llvm.experimental.constrained.nearbyint.v2f64(<2 x double> %{{.*}}, metadata !"round.tonearest", metadata !"fpexcept.strict")
+  // NO-FIXME-CHECK: xvrdpic
+  // FIXME-CHECK: bl nearbyint
+  // FIXME-CHECK: bl nearbyint
+
+  vf = __builtin_vsx_xvrspip(vf);
+  // CHECK-LABEL: try-xvrspip
+  // CHECK-UNCONSTRAINED: @llvm.ceil.v4f32(<4 x float> %{{.*}})
+  // CHECK-CONSTRAINED: @llvm.experimental.constrained.ceil.v4f32(<4 x float> %{{.*}}, metadata !"fpexcept.strict")
+  // CHECK-ASM: xvrspi

[PATCH] D82020: PowerPC-specific builtin constrained FP enablement

2020-06-19 Thread Andrew J Wock via Phabricator via cfe-commits
ajwock updated this revision to Diff 272186.
ajwock added a comment.

Took steven.zhang's suggestion, added REQUIRES line to diff.  Hopefully 
addressed harbormaster concerns.


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D82020/new/

https://reviews.llvm.org/D82020

Files:
  clang/lib/CodeGen/CGBuiltin.cpp
  clang/test/CodeGen/builtins-ppc-fpconstrained.c

Index: clang/test/CodeGen/builtins-ppc-fpconstrained.c
===
--- /dev/null
+++ clang/test/CodeGen/builtins-ppc-fpconstrained.c
@@ -0,0 +1,167 @@
+// REQUIRES: powerpc-registered-target
+// RUN: %clang_cc1 -triple powerpc64le-gnu-linux -target-feature +vsx \
+// RUN: -disable-O0-optnone -Wall -Wno-unused -Werror -emit-llvm %s -o - | \
+// RUN: FileCheck --check-prefix=CHECK-UNCONSTRAINED -vv %s
+// RUN: %clang_cc1 -triple powerpc64le-gnu-linux -target-feature +vsx \
+// RUN: -disable-O0-optnone -ffp-exception-behavior=strict -Wall \
+// RUN: -Wno-unused -Werror -emit-llvm %s -o - | FileCheck \
+// RUN: --check-prefix=CHECK-CONSTRAINED -vv %s
+// RUN: %clang_cc1 -triple powerpc64le-gnu-linux -target-feature +vsx \
+// RUN: -disable-O0-optnone -fallow-half-arguments-and-returns -S -o - %s | \
+// RUN: FileCheck --check-prefix=CHECK-ASM --check-prefix=NOT-FIXME-CHECK  %s
+// RUN: %clang_cc1 -triple powerpc64le-gnu-linux -target-feature +vsx \
+// RUN: -disable-O0-optnone -fallow-half-arguments-and-returns -S \
+// RUN: -ffp-exception-behavior=strict  -o - %s | FileCheck \
+// RUN: --check-prefix=CHECK-ASM --check-prefix=FIXME-CHECK  %s
+
+typedef __attribute__((vector_size(4 * sizeof(float float vec_float;
+typedef __attribute__((vector_size(2 * sizeof(double double vec_double;
+
+volatile vec_double vd;
+volatile vec_float vf;
+
+void test_float(void) {
+  vf = __builtin_vsx_xvsqrtsp(vf);
+  // CHECK-LABEL: try-xvsqrtsp
+  // CHECK-UNCONSTRAINED: @llvm.sqrt.v4f32(<4 x float> %{{.*}})
+  // CHECK-CONSTRAINED: @llvm.experimental.constrained.sqrt.v4f32(<4 x float> %{{.*}}, metadata !"round.tonearest", metadata !"fpexcept.strict")
+  // CHECK-ASM: xvsqrtsp
+
+  vd = __builtin_vsx_xvsqrtdp(vd);
+  // CHECK-LABEL: try-xvsqrtdp
+  // CHECK-UNCONSTRAINED: @llvm.sqrt.v2f64(<2 x double> %{{.*}})
+  // CHECK-CONSTRAINED: @llvm.experimental.constrained.sqrt.v2f64(<2 x double> %{{.*}}, metadata !"round.tonearest", metadata !"fpexcept.strict")
+  // CHECK-ASM: xvsqrtdp
+
+  vf = __builtin_vsx_xvrspim(vf);
+  // CHECK-LABEL: try-xvrspim
+  // CHECK-UNCONSTRAINED: @llvm.floor.v4f32(<4 x float> %{{.*}})
+  // CHECK-CONSTRAINED: @llvm.experimental.constrained.floor.v4f32(<4 x float> %{{.*}}, metadata !"fpexcept.strict")
+  // CHECK-ASM: xvrspim
+
+  vd = __builtin_vsx_xvrdpim(vd);
+  // CHECK-LABEL: try-xvrdpim
+  // CHECK-UNCONSTRAINED: @llvm.floor.v2f64(<2 x double> %{{.*}})
+  // CHECK-CONSTRAINED: @llvm.experimental.constrained.floor.v2f64(<2 x double> %{{.*}}, metadata !"fpexcept.strict")
+  // CHECK-ASM: xvrdpim
+
+  vf = __builtin_vsx_xvrspi(vf);
+  // CHECK-LABEL: try-xvrspi
+  // CHECK-UNCONSTRAINED: @llvm.round.v4f32(<4 x float> %{{.*}})
+  // CHECK-CONSTRAINED: @llvm.experimental.constrained.round.v4f32(<4 x float> %{{.*}}, metadata !"fpexcept.strict")
+  // CHECK-ASM: xvrspi
+
+  vd = __builtin_vsx_xvrdpi(vd);
+  // CHECK-LABEL: try-xvrdpi
+  // CHECK-UNCONSTRAINED: @llvm.round.v2f64(<2 x double> %{{.*}})
+  // CHECK-CONSTRAINED: @llvm.experimental.constrained.round.v2f64(<2 x double> %{{.*}}, metadata !"fpexcept.strict")
+  // CHECK-ASM: xvrdpi
+
+  vf = __builtin_vsx_xvrspic(vf);
+  // CHECK-LABEL: try-xvrspic
+  // CHECK-UNCONSTRAINED: @llvm.nearbyint.v4f32(<4 x float> %{{.*}})
+  // CHECK-CONSTRAINED: @llvm.experimental.constrained.nearbyint.v4f32(<4 x float> %{{.*}}, metadata !"round.tonearest", metadata !"fpexcept.strict")
+  // NO-FIXME-CHECK: xvrspic
+  // FIXME-CHECK: bl nearbyintf
+  // FIXME-CHECK: bl nearbyintf
+  // FIXME-CHECK: bl nearbyintf
+  // FIXME-CHECK: bl nearbyintf
+
+  vd = __builtin_vsx_xvrdpic(vd);
+  // CHECK-LABEL: try-xvrdpic
+  // CHECK-UNCONSTRAINED: @llvm.nearbyint.v2f64(<2 x double> %{{.*}})
+  // CHECK-CONSTRAINED: @llvm.experimental.constrained.nearbyint.v2f64(<2 x double> %{{.*}}, metadata !"round.tonearest", metadata !"fpexcept.strict")
+  // NO-FIXME-CHECK: xvrdpic
+  // FIXME-CHECK: bl nearbyint
+  // FIXME-CHECK: bl nearbyint
+
+  vf = __builtin_vsx_xvrspip(vf);
+  // CHECK-LABEL: try-xvrspip
+  // CHECK-UNCONSTRAINED: @llvm.ceil.v4f32(<4 x float> %{{.*}})
+  // CHECK-CONSTRAINED: @llvm.experimental.constrained.ceil.v4f32(<4 x float> %{{.*}}, metadata !"fpexcept.strict")
+  // CHECK-ASM: xvrspip
+
+  vd = __builtin_vsx_xvrdpip(vd);
+  // CHECK-LABEL: try-xvrdpip
+  // CHECK-UNCONSTRAINED: @llvm.ceil.v2f64(<2 x double> %{{.*}})
+  // CHECK-CONSTRAINED: @llvm.experimental.constrained.ceil.v2f64(<2 x double> %{{.*}}, metadata !"fpexcept.strict")
+  // CHECK-ASM: xvrdpip
+
+  vf = __builtin_vsx_xvrspiz(vf);
+  // CHECK-LABEL: try-xvrspiz
+  // CHECK

[PATCH] D82020: PowerPC-specific builtin constrained FP enablement

2020-06-21 Thread Andrew J Wock via Phabricator via cfe-commits
ajwock updated this revision to Diff 272330.
ajwock added a comment.

It seems one of the issues that my tests revealed was already remedied in very 
recent changes, causing my test to fail. I changed the test to reflect that 
while also taking steven's recommendations.


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D82020/new/

https://reviews.llvm.org/D82020

Files:
  clang/lib/CodeGen/CGBuiltin.cpp
  clang/test/CodeGen/builtins-ppc-fpconstrained.c

Index: clang/test/CodeGen/builtins-ppc-fpconstrained.c
===
--- /dev/null
+++ clang/test/CodeGen/builtins-ppc-fpconstrained.c
@@ -0,0 +1,159 @@
+// REQUIRES: powerpc-registered-target
+// RUN: %clang_cc1 -triple powerpc64le-gnu-linux -target-feature +vsx \
+// RUN: -emit-llvm %s -o - | FileCheck --check-prefix=CHECK-UNCONSTRAINED %s
+// RUN: %clang_cc1 -triple powerpc64le-gnu-linux -target-feature +vsx \
+// RUN:  -ffp-exception-behavior=strict -emit-llvm %s -o - | FileCheck \
+// RUN: --check-prefix=CHECK-CONSTRAINED -vv %s
+// RUN: %clang_cc1 -triple powerpc64le-gnu-linux -target-feature +vsx \
+// RUN: -fallow-half-arguments-and-returns -S -o - %s | \
+// RUN: FileCheck --check-prefix=CHECK-ASM --check-prefix=NOT-FIXME-CHECK  %s
+// RUN: %clang_cc1 -triple powerpc64le-gnu-linux -target-feature +vsx \
+// RUN: -fallow-half-arguments-and-returns -S -ffp-exception-behavior=strict \
+// RUN: -o - %s | FileCheck --check-prefix=CHECK-ASM \
+// RUN: --check-prefix=FIXME-CHECK  %s
+
+typedef __attribute__((vector_size(4 * sizeof(float float vec_float;
+typedef __attribute__((vector_size(2 * sizeof(double double vec_double;
+
+volatile vec_double vd;
+volatile vec_float vf;
+
+void test_float(void) {
+  vf = __builtin_vsx_xvsqrtsp(vf);
+  // CHECK-LABEL: try-xvsqrtsp
+  // CHECK-UNCONSTRAINED: @llvm.sqrt.v4f32(<4 x float> %{{.*}})
+  // CHECK-CONSTRAINED: @llvm.experimental.constrained.sqrt.v4f32(<4 x float> %{{.*}}, metadata !"round.tonearest", metadata !"fpexcept.strict")
+  // CHECK-ASM: xvsqrtsp
+
+  vd = __builtin_vsx_xvsqrtdp(vd);
+  // CHECK-LABEL: try-xvsqrtdp
+  // CHECK-UNCONSTRAINED: @llvm.sqrt.v2f64(<2 x double> %{{.*}})
+  // CHECK-CONSTRAINED: @llvm.experimental.constrained.sqrt.v2f64(<2 x double> %{{.*}}, metadata !"round.tonearest", metadata !"fpexcept.strict")
+  // CHECK-ASM: xvsqrtdp
+
+  vf = __builtin_vsx_xvrspim(vf);
+  // CHECK-LABEL: try-xvrspim
+  // CHECK-UNCONSTRAINED: @llvm.floor.v4f32(<4 x float> %{{.*}})
+  // CHECK-CONSTRAINED: @llvm.experimental.constrained.floor.v4f32(<4 x float> %{{.*}}, metadata !"fpexcept.strict")
+  // CHECK-ASM: xvrspim
+
+  vd = __builtin_vsx_xvrdpim(vd);
+  // CHECK-LABEL: try-xvrdpim
+  // CHECK-UNCONSTRAINED: @llvm.floor.v2f64(<2 x double> %{{.*}})
+  // CHECK-CONSTRAINED: @llvm.experimental.constrained.floor.v2f64(<2 x double> %{{.*}}, metadata !"fpexcept.strict")
+  // CHECK-ASM: xvrdpim
+
+  vf = __builtin_vsx_xvrspi(vf);
+  // CHECK-LABEL: try-xvrspi
+  // CHECK-UNCONSTRAINED: @llvm.round.v4f32(<4 x float> %{{.*}})
+  // CHECK-CONSTRAINED: @llvm.experimental.constrained.round.v4f32(<4 x float> %{{.*}}, metadata !"fpexcept.strict")
+  // CHECK-ASM: xvrspi
+
+  vd = __builtin_vsx_xvrdpi(vd);
+  // CHECK-LABEL: try-xvrdpi
+  // CHECK-UNCONSTRAINED: @llvm.round.v2f64(<2 x double> %{{.*}})
+  // CHECK-CONSTRAINED: @llvm.experimental.constrained.round.v2f64(<2 x double> %{{.*}}, metadata !"fpexcept.strict")
+  // CHECK-ASM: xvrdpi
+
+  vf = __builtin_vsx_xvrspic(vf);
+  // CHECK-LABEL: try-xvrspic
+  // CHECK-UNCONSTRAINED: @llvm.nearbyint.v4f32(<4 x float> %{{.*}})
+  // CHECK-CONSTRAINED: @llvm.experimental.constrained.nearbyint.v4f32(<4 x float> %{{.*}}, metadata !"round.tonearest", metadata !"fpexcept.strict")
+  // CHECK-ASM: xvrspic
+
+  vd = __builtin_vsx_xvrdpic(vd);
+  // CHECK-LABEL: try-xvrdpic
+  // CHECK-UNCONSTRAINED: @llvm.nearbyint.v2f64(<2 x double> %{{.*}})
+  // CHECK-CONSTRAINED: @llvm.experimental.constrained.nearbyint.v2f64(<2 x double> %{{.*}}, metadata !"round.tonearest", metadata !"fpexcept.strict")
+  // CHECK-ASM: xvrdpic
+
+  vf = __builtin_vsx_xvrspip(vf);
+  // CHECK-LABEL: try-xvrspip
+  // CHECK-UNCONSTRAINED: @llvm.ceil.v4f32(<4 x float> %{{.*}})
+  // CHECK-CONSTRAINED: @llvm.experimental.constrained.ceil.v4f32(<4 x float> %{{.*}}, metadata !"fpexcept.strict")
+  // CHECK-ASM: xvrspip
+
+  vd = __builtin_vsx_xvrdpip(vd);
+  // CHECK-LABEL: try-xvrdpip
+  // CHECK-UNCONSTRAINED: @llvm.ceil.v2f64(<2 x double> %{{.*}})
+  // CHECK-CONSTRAINED: @llvm.experimental.constrained.ceil.v2f64(<2 x double> %{{.*}}, metadata !"fpexcept.strict")
+  // CHECK-ASM: xvrdpip
+
+  vf = __builtin_vsx_xvrspiz(vf);
+  // CHECK-LABEL: try-xvrspiz
+  // CHECK-UNCONSTRAINED: @llvm.trunc.v4f32(<4 x float> %{{.*}})
+  // CHECK-CONSTRAINED: @llvm.experimental.constrained.trunc.v4f32(<4 x float> %{{.*}}, metadata !"fpexcept.strict")
+  // CHECK-ASM: xvrspiz
+
+  vd = __builtin_vsx_xvrdpiz(vd);
+  // CHECK-LABEL: try-xvrdpiz
+  

[PATCH] D76949: Replace subtract-from-zero float in version with fneg in PowerPC special fma compiler builtins

2020-03-27 Thread Andrew J Wock via Phabricator via cfe-commits
ajwock created this revision.
ajwock added reviewers: kpn, cameron.mcinally, spatel, hfinkel.
Herald added subscribers: cfe-commits, steven.zhang, shchenz, kbarton, nemanjai.
Herald added a project: clang.
ajwock added a reviewer: nemanjai.
Herald added a subscriber: wuzish.

This patch adds a test for the PowerPC fma compiler builtins, some variations 
of which negate inputs and outputs.  The code to generate IR for these builtins 
was untested before this patch.

Originally, the code used the outdated method of subtracting floating point 
values from -0.0 as floating point negation.  This patch remedies that.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D76949

Files:
  clang/lib/CodeGen/CGBuiltin.cpp
  clang/test/CodeGen/builtins-ppc-fma.c


Index: clang/test/CodeGen/builtins-ppc-fma.c
===
--- clang/test/CodeGen/builtins-ppc-fma.c
+++ clang/test/CodeGen/builtins-ppc-fma.c
@@ -0,0 +1,43 @@
+// RUN: %clang_cc1 -triple powerpc64le-gnu-linux \
+// RUN: -target-feature +altivec -Wall -Wno-unused -Werror -emit-llvm %s -o - 
| FileCheck  \
+// RUN: %s
+
+typedef __attribute__((vector_size(4 * sizeof(float float vec_float;
+typedef __attribute__((vector_size(2 * sizeof(double double vec_double;
+
+volatile vec_double vd;
+volatile vec_float vf;
+
+void test_fma(void) {
+  vf = __builtin_vsx_xvmaddasp(vf, vf, vf);
+  // CHECK: @llvm.fma.v4f32(<4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x 
float> %{{.*}})
+
+  vd = __builtin_vsx_xvmaddadp(vd, vd, vd);
+  // CHECK: @llvm.fma.v2f64(<2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x 
double> %{{.*}})
+
+  vf = __builtin_vsx_xvnmaddasp(vf, vf, vf);
+  // CHECK: [[RESULT:%[^ ]+]] = call <4 x float> @llvm.fma.v4f32(<4 x float> 
%{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}})
+  // CHECK: fneg <4 x float> [[RESULT]]
+
+  vd = __builtin_vsx_xvnmaddadp(vd, vd, vd);
+  // CHECK: [[RESULT:%[^ ]+]] = call <2 x double> @llvm.fma.v2f64(<2 x double> 
%{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}})
+  // CHECK: fneg <2 x double> [[RESULT]]
+
+  vf = __builtin_vsx_xvmsubasp(vf, vf, vf);
+  // CHECK: [[RESULT:%[^ ]+]] fneg <4 x float> %{{.*}}
+  // CHECK: @llvm.fma.v4f32(<4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x 
float> [[RESULT]])
+
+  vd = __builtin_vsx_xvmsubadp(vd, vd, vd);
+  // CHECK: fneg <2 x double> [[RESULT]]
+  // CHECK: [[RESULT:%[^ ]+]] = call <2 x double> @llvm.fma.v2f64(<2 x double> 
%{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}})
+
+  vf = __builtin_vsx_xvnmsubasp(vf, vf, vf);
+  // CHECK: [[RESULT:%[^ ]+]] = fneg <4 x float> %{{.*}}
+  // CHECK: [[RESULT2:%[^ ]+]] = call <4 x float> @llvm.fma.v2f64(<4 x float> 
%{{.*}}, <4 x float> %{{.*}}, <4 x float> [[RESULT]])
+  // CHECK: fneg <4 x float> [[RESULT2]]
+
+  vd = __builtin_vsx_xvnmsubadp(vd, vd, vd);
+  // CHECK: [[RESULT:%[^ ]+]] = fneg <2 x double> %{{.*}}
+  // CHECK: [[RESULT2:%[^ ]+]] = call <2 x double> @llvm.fma.v2f64(<2 x 
double> %{{.*}}, <2 x double> %{{.*}}, <2 x double> [[RESULT]])
+  // CHECK: fneg <2 x double> [[RESULT2]]
+}
Index: clang/lib/CodeGen/CGBuiltin.cpp
===
--- clang/lib/CodeGen/CGBuiltin.cpp
+++ clang/lib/CodeGen/CGBuiltin.cpp
@@ -13083,25 +13083,24 @@
 Value *X = EmitScalarExpr(E->getArg(0));
 Value *Y = EmitScalarExpr(E->getArg(1));
 Value *Z = EmitScalarExpr(E->getArg(2));
-Value *Zero = llvm::ConstantFP::getZeroValueForNegation(ResultType);
 llvm::Function *F = CGM.getIntrinsic(Intrinsic::fma, ResultType);
 switch (BuiltinID) {
   case PPC::BI__builtin_vsx_xvmaddadp:
   case PPC::BI__builtin_vsx_xvmaddasp:
 return Builder.CreateCall(F, {X, Y, Z});
+
   case PPC::BI__builtin_vsx_xvnmaddadp:
   case PPC::BI__builtin_vsx_xvnmaddasp:
-return Builder.CreateFSub(Zero,
-  Builder.CreateCall(F, {X, Y, Z}), "sub");
+return Builder.CreateFNeg(Builder.CreateCall(F, {X, Y, Z}), "neg");
+
   case PPC::BI__builtin_vsx_xvmsubadp:
   case PPC::BI__builtin_vsx_xvmsubasp:
-return Builder.CreateCall(F,
-  {X, Y, Builder.CreateFSub(Zero, Z, "sub")});
+return Builder.CreateCall(F, {X, Y, Builder.CreateFNeg(Z, "neg")});
+
   case PPC::BI__builtin_vsx_xvnmsubadp:
   case PPC::BI__builtin_vsx_xvnmsubasp:
-Value *FsubRes =
-  Builder.CreateCall(F, {X, Y, Builder.CreateFSub(Zero, Z, "sub")});
-return Builder.CreateFSub(Zero, FsubRes, "sub");
+return Builder.CreateFNeg(
+Builder.CreateCall(F, {X, Y, Builder.CreateFNeg(Z, "neg")}), 
"neg");
 }
 llvm_unreachable("Unknown FMA operation");
 return nullptr; // Suppress no-return warning


Index: clang/test/CodeGen/builtins-ppc-fma.c
===
--- clang/test/CodeGen/builtins-ppc-fma.c
+++ clang/test/CodeGen/builtins-ppc-fm

[PATCH] D76949: Replace subtract-from-zero float in version with fneg in PowerPC special fma compiler builtins

2020-04-02 Thread Andrew J Wock via Phabricator via cfe-commits
ajwock updated this revision to Diff 254609.
ajwock added a comment.

Rebased for harbormaster.


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D76949/new/

https://reviews.llvm.org/D76949

Files:
  clang/lib/CodeGen/CGBuiltin.cpp
  clang/test/CodeGen/builtins-ppc-fma.c


Index: clang/test/CodeGen/builtins-ppc-fma.c
===
--- /dev/null
+++ clang/test/CodeGen/builtins-ppc-fma.c
@@ -0,0 +1,43 @@
+// RUN: %clang_cc1 -triple powerpc64le-gnu-linux \
+// RUN: -target-feature +altivec -Wall -Wno-unused -Werror -emit-llvm %s -o - 
| FileCheck  \
+// RUN: %s
+
+typedef __attribute__((vector_size(4 * sizeof(float float vec_float;
+typedef __attribute__((vector_size(2 * sizeof(double double vec_double;
+
+volatile vec_double vd;
+volatile vec_float vf;
+
+void test_fma(void) {
+  vf = __builtin_vsx_xvmaddasp(vf, vf, vf);
+  // CHECK: @llvm.fma.v4f32(<4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x 
float> %{{.*}})
+
+  vd = __builtin_vsx_xvmaddadp(vd, vd, vd);
+  // CHECK: @llvm.fma.v2f64(<2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x 
double> %{{.*}})
+
+  vf = __builtin_vsx_xvnmaddasp(vf, vf, vf);
+  // CHECK: [[RESULT:%[^ ]+]] = call <4 x float> @llvm.fma.v4f32(<4 x float> 
%{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}})
+  // CHECK: fneg <4 x float> [[RESULT]]
+
+  vd = __builtin_vsx_xvnmaddadp(vd, vd, vd);
+  // CHECK: [[RESULT:%[^ ]+]] = call <2 x double> @llvm.fma.v2f64(<2 x double> 
%{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}})
+  // CHECK: fneg <2 x double> [[RESULT]]
+
+  vf = __builtin_vsx_xvmsubasp(vf, vf, vf);
+  // CHECK: [[RESULT:%[^ ]+]] fneg <4 x float> %{{.*}}
+  // CHECK: @llvm.fma.v4f32(<4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x 
float> [[RESULT]])
+
+  vd = __builtin_vsx_xvmsubadp(vd, vd, vd);
+  // CHECK: fneg <2 x double> [[RESULT]]
+  // CHECK: [[RESULT:%[^ ]+]] = call <2 x double> @llvm.fma.v2f64(<2 x double> 
%{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}})
+
+  vf = __builtin_vsx_xvnmsubasp(vf, vf, vf);
+  // CHECK: [[RESULT:%[^ ]+]] = fneg <4 x float> %{{.*}}
+  // CHECK: [[RESULT2:%[^ ]+]] = call <4 x float> @llvm.fma.v2f64(<4 x float> 
%{{.*}}, <4 x float> %{{.*}}, <4 x float> [[RESULT]])
+  // CHECK: fneg <4 x float> [[RESULT2]]
+
+  vd = __builtin_vsx_xvnmsubadp(vd, vd, vd);
+  // CHECK: [[RESULT:%[^ ]+]] = fneg <2 x double> %{{.*}}
+  // CHECK: [[RESULT2:%[^ ]+]] = call <2 x double> @llvm.fma.v2f64(<2 x 
double> %{{.*}}, <2 x double> %{{.*}}, <2 x double> [[RESULT]])
+  // CHECK: fneg <2 x double> [[RESULT2]]
+}
Index: clang/lib/CodeGen/CGBuiltin.cpp
===
--- clang/lib/CodeGen/CGBuiltin.cpp
+++ clang/lib/CodeGen/CGBuiltin.cpp
@@ -13214,25 +13214,24 @@
 Value *X = EmitScalarExpr(E->getArg(0));
 Value *Y = EmitScalarExpr(E->getArg(1));
 Value *Z = EmitScalarExpr(E->getArg(2));
-Value *Zero = llvm::ConstantFP::getZeroValueForNegation(ResultType);
 llvm::Function *F = CGM.getIntrinsic(Intrinsic::fma, ResultType);
 switch (BuiltinID) {
   case PPC::BI__builtin_vsx_xvmaddadp:
   case PPC::BI__builtin_vsx_xvmaddasp:
 return Builder.CreateCall(F, {X, Y, Z});
+
   case PPC::BI__builtin_vsx_xvnmaddadp:
   case PPC::BI__builtin_vsx_xvnmaddasp:
-return Builder.CreateFSub(Zero,
-  Builder.CreateCall(F, {X, Y, Z}), "sub");
+return Builder.CreateFNeg(Builder.CreateCall(F, {X, Y, Z}), "neg");
+
   case PPC::BI__builtin_vsx_xvmsubadp:
   case PPC::BI__builtin_vsx_xvmsubasp:
-return Builder.CreateCall(F,
-  {X, Y, Builder.CreateFSub(Zero, Z, "sub")});
+return Builder.CreateCall(F, {X, Y, Builder.CreateFNeg(Z, "neg")});
+
   case PPC::BI__builtin_vsx_xvnmsubadp:
   case PPC::BI__builtin_vsx_xvnmsubasp:
-Value *FsubRes =
-  Builder.CreateCall(F, {X, Y, Builder.CreateFSub(Zero, Z, "sub")});
-return Builder.CreateFSub(Zero, FsubRes, "sub");
+return Builder.CreateFNeg(
+Builder.CreateCall(F, {X, Y, Builder.CreateFNeg(Z, "neg")}), 
"neg");
 }
 llvm_unreachable("Unknown FMA operation");
 return nullptr; // Suppress no-return warning


Index: clang/test/CodeGen/builtins-ppc-fma.c
===
--- /dev/null
+++ clang/test/CodeGen/builtins-ppc-fma.c
@@ -0,0 +1,43 @@
+// RUN: %clang_cc1 -triple powerpc64le-gnu-linux \
+// RUN: -target-feature +altivec -Wall -Wno-unused -Werror -emit-llvm %s -o - | FileCheck  \
+// RUN: %s
+
+typedef __attribute__((vector_size(4 * sizeof(float float vec_float;
+typedef __attribute__((vector_size(2 * sizeof(double double vec_double;
+
+volatile vec_double vd;
+volatile vec_float vf;
+
+void test_fma(void) {
+  vf = __builtin_vsx_xvmaddasp(vf, vf, vf);
+  // CHECK: @llvm.fma.v4f32(<4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}})
+
+