kiausch updated this revision to Diff 314566.
kiausch edited the summary of this revision.
kiausch added a comment.

implemented review suggestions:

- updated clang/docs/ClangCommandLineReference.rst
- removed unneccessary HasEFPU2() wrapper function
- initialize HasEFPU member
- merged tests into spe.ll to remove identical parts


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D92935/new/

https://reviews.llvm.org/D92935

Files:
  clang/docs/ClangCommandLineReference.rst
  clang/include/clang/Driver/Options.td
  llvm/lib/Target/PowerPC/PPC.td
  llvm/lib/Target/PowerPC/PPCISelLowering.cpp
  llvm/lib/Target/PowerPC/PPCSubtarget.cpp
  llvm/lib/Target/PowerPC/PPCSubtarget.h
  llvm/test/CodeGen/PowerPC/spe.ll

Index: llvm/test/CodeGen/PowerPC/spe.ll
===================================================================
--- llvm/test/CodeGen/PowerPC/spe.ll
+++ llvm/test/CodeGen/PowerPC/spe.ll
@@ -1,6 +1,18 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -verify-machineinstrs < %s -mtriple=powerpc-unknown-linux-gnu \
-; RUN:          -mattr=+spe |  FileCheck %s
+; RUN: split-file %s %t
+; RUN: llc -verify-machineinstrs < %t/single.ll -mtriple=powerpc-unknown-linux-gnu \
+; RUN:          -mattr=+spe |  FileCheck %t/single.ll
+; RUN: llc -verify-machineinstrs < %t/double.ll -mtriple=powerpc-unknown-linux-gnu \
+; RUN:          -mattr=+spe |  FileCheck %t/double.ll -check-prefix=SPE
+; RUN: llc -verify-machineinstrs < %t/hwdouble.ll -mtriple=powerpc-unknown-linux-gnu \
+; RUN:          -mattr=+spe |  FileCheck %t/hwdouble.ll -check-prefix=SPE
+; RUN: llc -verify-machineinstrs < %t/single.ll -mtriple=powerpc-unknown-linux-gnu \
+; RUN:          -mattr=+efpu2 |  FileCheck %t/single.ll
+; RUN: llc -verify-machineinstrs < %t/double.ll -mtriple=powerpc-unknown-linux-gnu \
+; RUN:          -mattr=+efpu2 |  FileCheck %t/double.ll -check-prefix=EFPU2
+
+;--- single.ll
+; single tests (identical for -mattr=+spe and -mattr=+efpu2)
 
 declare float @llvm.fabs.float(float)
 define float @test_float_abs(float %a) #0 {
@@ -75,30 +87,19 @@
   ret float %v
 }
 
-define float @test_dtos(double %a) {
-; CHECK-LABEL: test_dtos:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    evmergelo 3, 3, 4
-; CHECK-NEXT:    efscfd 3, 3
-; CHECK-NEXT:    blr
-  entry:
-  %v = fptrunc double %a to float
-  ret float %v
-}
-
 define i32 @test_fcmpgt(float %a, float %b) {
 ; CHECK-LABEL: test_fcmpgt:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    stwu 1, -16(1)
 ; CHECK-NEXT:    .cfi_def_cfa_offset 16
 ; CHECK-NEXT:    efscmpgt 0, 3, 4
-; CHECK-NEXT:    ble 0, .LBB8_2
+; CHECK-NEXT:    ble 0, .LBB7_2
 ; CHECK-NEXT:  # %bb.1: # %tr
 ; CHECK-NEXT:    li 3, 1
-; CHECK-NEXT:    b .LBB8_3
-; CHECK-NEXT:  .LBB8_2: # %fa
+; CHECK-NEXT:    b .LBB7_3
+; CHECK-NEXT:  .LBB7_2: # %fa
 ; CHECK-NEXT:    li 3, 0
-; CHECK-NEXT:  .LBB8_3: # %ret
+; CHECK-NEXT:  .LBB7_3: # %ret
 ; CHECK-NEXT:    stw 3, 12(1)
 ; CHECK-NEXT:    lwz 3, 12(1)
 ; CHECK-NEXT:    addi 1, 1, 16
@@ -124,19 +125,19 @@
 ; CHECK-NEXT:    stwu 1, -16(1)
 ; CHECK-NEXT:    .cfi_def_cfa_offset 16
 ; CHECK-NEXT:    efscmpeq 0, 4, 4
-; CHECK-NEXT:    bc 4, 1, .LBB9_4
+; CHECK-NEXT:    bc 4, 1, .LBB8_4
 ; CHECK-NEXT:  # %bb.1: # %entry
 ; CHECK-NEXT:    efscmpeq 0, 3, 3
-; CHECK-NEXT:    bc 4, 1, .LBB9_4
+; CHECK-NEXT:    bc 4, 1, .LBB8_4
 ; CHECK-NEXT:  # %bb.2: # %entry
 ; CHECK-NEXT:    efscmpgt 0, 3, 4
-; CHECK-NEXT:    bc 12, 1, .LBB9_4
+; CHECK-NEXT:    bc 12, 1, .LBB8_4
 ; CHECK-NEXT:  # %bb.3: # %fa
 ; CHECK-NEXT:    li 3, 0
-; CHECK-NEXT:    b .LBB9_5
-; CHECK-NEXT:  .LBB9_4: # %tr
+; CHECK-NEXT:    b .LBB8_5
+; CHECK-NEXT:  .LBB8_4: # %tr
 ; CHECK-NEXT:    li 3, 1
-; CHECK-NEXT:  .LBB9_5: # %ret
+; CHECK-NEXT:  .LBB8_5: # %ret
 ; CHECK-NEXT:    stw 3, 12(1)
 ; CHECK-NEXT:    lwz 3, 12(1)
 ; CHECK-NEXT:    addi 1, 1, 16
@@ -162,19 +163,19 @@
 ; CHECK-NEXT:    stwu 1, -16(1)
 ; CHECK-NEXT:    .cfi_def_cfa_offset 16
 ; CHECK-NEXT:    efscmpeq 0, 3, 3
-; CHECK-NEXT:    bc 4, 1, .LBB10_4
+; CHECK-NEXT:    bc 4, 1, .LBB9_4
 ; CHECK-NEXT:  # %bb.1: # %entry
 ; CHECK-NEXT:    efscmpeq 0, 4, 4
-; CHECK-NEXT:    bc 4, 1, .LBB10_4
+; CHECK-NEXT:    bc 4, 1, .LBB9_4
 ; CHECK-NEXT:  # %bb.2: # %entry
 ; CHECK-NEXT:    efscmpgt 0, 3, 4
-; CHECK-NEXT:    bc 12, 1, .LBB10_4
+; CHECK-NEXT:    bc 12, 1, .LBB9_4
 ; CHECK-NEXT:  # %bb.3: # %tr
 ; CHECK-NEXT:    li 3, 1
-; CHECK-NEXT:    b .LBB10_5
-; CHECK-NEXT:  .LBB10_4: # %fa
+; CHECK-NEXT:    b .LBB9_5
+; CHECK-NEXT:  .LBB9_4: # %fa
 ; CHECK-NEXT:    li 3, 0
-; CHECK-NEXT:  .LBB10_5: # %ret
+; CHECK-NEXT:  .LBB9_5: # %ret
 ; CHECK-NEXT:    stw 3, 12(1)
 ; CHECK-NEXT:    lwz 3, 12(1)
 ; CHECK-NEXT:    addi 1, 1, 16
@@ -200,13 +201,13 @@
 ; CHECK-NEXT:    stwu 1, -16(1)
 ; CHECK-NEXT:    .cfi_def_cfa_offset 16
 ; CHECK-NEXT:    efscmpgt 0, 3, 4
-; CHECK-NEXT:    bgt 0, .LBB11_2
+; CHECK-NEXT:    bgt 0, .LBB10_2
 ; CHECK-NEXT:  # %bb.1: # %tr
 ; CHECK-NEXT:    li 3, 1
-; CHECK-NEXT:    b .LBB11_3
-; CHECK-NEXT:  .LBB11_2: # %fa
+; CHECK-NEXT:    b .LBB10_3
+; CHECK-NEXT:  .LBB10_2: # %fa
 ; CHECK-NEXT:    li 3, 0
-; CHECK-NEXT:  .LBB11_3: # %ret
+; CHECK-NEXT:  .LBB10_3: # %ret
 ; CHECK-NEXT:    stw 3, 12(1)
 ; CHECK-NEXT:    lwz 3, 12(1)
 ; CHECK-NEXT:    addi 1, 1, 16
@@ -233,13 +234,13 @@
 ; CHECK-NEXT:    stwu 1, -16(1)
 ; CHECK-NEXT:    .cfi_def_cfa_offset 16
 ; CHECK-NEXT:    efscmpeq 0, 3, 4
-; CHECK-NEXT:    ble 0, .LBB12_2
+; CHECK-NEXT:    ble 0, .LBB11_2
 ; CHECK-NEXT:  # %bb.1: # %tr
 ; CHECK-NEXT:    li 3, 1
-; CHECK-NEXT:    b .LBB12_3
-; CHECK-NEXT:  .LBB12_2: # %fa
+; CHECK-NEXT:    b .LBB11_3
+; CHECK-NEXT:  .LBB11_2: # %fa
 ; CHECK-NEXT:    li 3, 0
-; CHECK-NEXT:  .LBB12_3: # %ret
+; CHECK-NEXT:  .LBB11_3: # %ret
 ; CHECK-NEXT:    stw 3, 12(1)
 ; CHECK-NEXT:    lwz 3, 12(1)
 ; CHECK-NEXT:    addi 1, 1, 16
@@ -267,11 +268,11 @@
 ; CHECK-NEXT:    efscmpeq 1, 4, 4
 ; CHECK-NEXT:    li 5, 1
 ; CHECK-NEXT:    crand 20, 5, 1
-; CHECK-NEXT:    bc 12, 20, .LBB13_2
+; CHECK-NEXT:    bc 12, 20, .LBB12_2
 ; CHECK-NEXT:  # %bb.1: # %entry
 ; CHECK-NEXT:    ori 3, 5, 0
 ; CHECK-NEXT:    blr
-; CHECK-NEXT:  .LBB13_2: # %entry
+; CHECK-NEXT:  .LBB12_2: # %entry
 ; CHECK-NEXT:    li 3, 0
 ; CHECK-NEXT:    blr
   entry:
@@ -286,11 +287,11 @@
 ; CHECK-NEXT:    efscmpeq 1, 3, 3
 ; CHECK-NEXT:    li 5, 1
 ; CHECK-NEXT:    crnand 20, 5, 1
-; CHECK-NEXT:    bc 12, 20, .LBB14_2
+; CHECK-NEXT:    bc 12, 20, .LBB13_2
 ; CHECK-NEXT:  # %bb.1: # %entry
 ; CHECK-NEXT:    ori 3, 5, 0
 ; CHECK-NEXT:    blr
-; CHECK-NEXT:  .LBB14_2: # %entry
+; CHECK-NEXT:  .LBB13_2: # %entry
 ; CHECK-NEXT:    li 3, 0
 ; CHECK-NEXT:    blr
   entry:
@@ -307,11 +308,11 @@
 ; CHECK-NEXT:    efscmpeq 0, 3, 4
 ; CHECK-NEXT:    li 5, 1
 ; CHECK-NEXT:    crnor 20, 1, 20
-; CHECK-NEXT:    bc 12, 20, .LBB15_2
+; CHECK-NEXT:    bc 12, 20, .LBB14_2
 ; CHECK-NEXT:  # %bb.1: # %entry
 ; CHECK-NEXT:    ori 3, 5, 0
 ; CHECK-NEXT:    blr
-; CHECK-NEXT:  .LBB15_2: # %entry
+; CHECK-NEXT:  .LBB14_2: # %entry
 ; CHECK-NEXT:    li 3, 0
 ; CHECK-NEXT:    blr
   entry:
@@ -328,11 +329,11 @@
 ; CHECK-NEXT:    efscmpeq 0, 3, 4
 ; CHECK-NEXT:    li 5, 1
 ; CHECK-NEXT:    crorc 20, 1, 20
-; CHECK-NEXT:    bc 12, 20, .LBB16_2
+; CHECK-NEXT:    bc 12, 20, .LBB15_2
 ; CHECK-NEXT:  # %bb.1: # %entry
 ; CHECK-NEXT:    ori 3, 5, 0
 ; CHECK-NEXT:    blr
-; CHECK-NEXT:  .LBB16_2: # %entry
+; CHECK-NEXT:  .LBB15_2: # %entry
 ; CHECK-NEXT:    li 3, 0
 ; CHECK-NEXT:    blr
   entry:
@@ -346,13 +347,13 @@
 ; CHECK-NEXT:    stwu 1, -16(1)
 ; CHECK-NEXT:    .cfi_def_cfa_offset 16
 ; CHECK-NEXT:    efscmpeq 0, 3, 4
-; CHECK-NEXT:    bgt 0, .LBB17_2
+; CHECK-NEXT:    bgt 0, .LBB16_2
 ; CHECK-NEXT:  # %bb.1: # %tr
 ; CHECK-NEXT:    li 3, 1
-; CHECK-NEXT:    b .LBB17_3
-; CHECK-NEXT:  .LBB17_2: # %fa
+; CHECK-NEXT:    b .LBB16_3
+; CHECK-NEXT:  .LBB16_2: # %fa
 ; CHECK-NEXT:    li 3, 0
-; CHECK-NEXT:  .LBB17_3: # %ret
+; CHECK-NEXT:  .LBB16_3: # %ret
 ; CHECK-NEXT:    stw 3, 12(1)
 ; CHECK-NEXT:    lwz 3, 12(1)
 ; CHECK-NEXT:    addi 1, 1, 16
@@ -378,13 +379,13 @@
 ; CHECK-NEXT:    stwu 1, -16(1)
 ; CHECK-NEXT:    .cfi_def_cfa_offset 16
 ; CHECK-NEXT:    efscmplt 0, 3, 4
-; CHECK-NEXT:    ble 0, .LBB18_2
+; CHECK-NEXT:    ble 0, .LBB17_2
 ; CHECK-NEXT:  # %bb.1: # %tr
 ; CHECK-NEXT:    li 3, 1
-; CHECK-NEXT:    b .LBB18_3
-; CHECK-NEXT:  .LBB18_2: # %fa
+; CHECK-NEXT:    b .LBB17_3
+; CHECK-NEXT:  .LBB17_2: # %fa
 ; CHECK-NEXT:    li 3, 0
-; CHECK-NEXT:  .LBB18_3: # %ret
+; CHECK-NEXT:  .LBB17_3: # %ret
 ; CHECK-NEXT:    stw 3, 12(1)
 ; CHECK-NEXT:    lwz 3, 12(1)
 ; CHECK-NEXT:    addi 1, 1, 16
@@ -413,11 +414,11 @@
 ; CHECK-NEXT:    efscmplt 0, 3, 4
 ; CHECK-NEXT:    li 5, 1
 ; CHECK-NEXT:    crnor 20, 1, 20
-; CHECK-NEXT:    bc 12, 20, .LBB19_2
+; CHECK-NEXT:    bc 12, 20, .LBB18_2
 ; CHECK-NEXT:  # %bb.1: # %entry
 ; CHECK-NEXT:    ori 3, 5, 0
 ; CHECK-NEXT:    blr
-; CHECK-NEXT:  .LBB19_2: # %entry
+; CHECK-NEXT:  .LBB18_2: # %entry
 ; CHECK-NEXT:    li 3, 0
 ; CHECK-NEXT:    blr
   entry:
@@ -431,19 +432,19 @@
 ; CHECK-NEXT:    stwu 1, -16(1)
 ; CHECK-NEXT:    .cfi_def_cfa_offset 16
 ; CHECK-NEXT:    efscmpeq 0, 3, 3
-; CHECK-NEXT:    bc 4, 1, .LBB20_4
+; CHECK-NEXT:    bc 4, 1, .LBB19_4
 ; CHECK-NEXT:  # %bb.1: # %entry
 ; CHECK-NEXT:    efscmpeq 0, 4, 4
-; CHECK-NEXT:    bc 4, 1, .LBB20_4
+; CHECK-NEXT:    bc 4, 1, .LBB19_4
 ; CHECK-NEXT:  # %bb.2: # %entry
 ; CHECK-NEXT:    efscmplt 0, 3, 4
-; CHECK-NEXT:    bc 12, 1, .LBB20_4
+; CHECK-NEXT:    bc 12, 1, .LBB19_4
 ; CHECK-NEXT:  # %bb.3: # %tr
 ; CHECK-NEXT:    li 3, 1
-; CHECK-NEXT:    b .LBB20_5
-; CHECK-NEXT:  .LBB20_4: # %fa
+; CHECK-NEXT:    b .LBB19_5
+; CHECK-NEXT:  .LBB19_4: # %fa
 ; CHECK-NEXT:    li 3, 0
-; CHECK-NEXT:  .LBB20_5: # %ret
+; CHECK-NEXT:  .LBB19_5: # %ret
 ; CHECK-NEXT:    stw 3, 12(1)
 ; CHECK-NEXT:    lwz 3, 12(1)
 ; CHECK-NEXT:    addi 1, 1, 16
@@ -469,13 +470,13 @@
 ; CHECK-NEXT:    stwu 1, -16(1)
 ; CHECK-NEXT:    .cfi_def_cfa_offset 16
 ; CHECK-NEXT:    efscmplt 0, 3, 4
-; CHECK-NEXT:    bgt 0, .LBB21_2
+; CHECK-NEXT:    bgt 0, .LBB20_2
 ; CHECK-NEXT:  # %bb.1: # %tr
 ; CHECK-NEXT:    li 3, 1
-; CHECK-NEXT:    b .LBB21_3
-; CHECK-NEXT:  .LBB21_2: # %fa
+; CHECK-NEXT:    b .LBB20_3
+; CHECK-NEXT:  .LBB20_2: # %fa
 ; CHECK-NEXT:    li 3, 0
-; CHECK-NEXT:  .LBB21_3: # %ret
+; CHECK-NEXT:  .LBB20_3: # %ret
 ; CHECK-NEXT:    stw 3, 12(1)
 ; CHECK-NEXT:    lwz 3, 12(1)
 ; CHECK-NEXT:    addi 1, 1, 16
@@ -554,15 +555,48 @@
 ; Check that it's not loading a double
 }
 
+;--- double.ll
 ; Double tests
+; results depend on -mattr=+spe or -mattr=+efpu2
+
+define float @test_dtos(double %a) {
+; SPE-LABEL: test_dtos:
+; SPE:       # %bb.0: # %entry
+; SPE-NEXT:    evmergelo 3, 3, 4
+; SPE-NEXT:    efscfd 3, 3
+; SPE-NEXT:    blr
+;
+; EFPU2-LABEL: test_dtos:
+; EFPU2:       # %bb.0: # %entry
+; EFPU2-NEXT:    mflr 0
+; EFPU2-NEXT:    stw 0, 4(1)
+; EFPU2-NEXT:    stwu 1, -16(1)
+; EFPU2-NEXT:    .cfi_def_cfa_offset 16
+; EFPU2-NEXT:    .cfi_offset lr, 4
+; EFPU2-NEXT:    bl __truncdfsf2
+; EFPU2-NEXT:    lwz 0, 20(1)
+; EFPU2-NEXT:    addi 1, 1, 16
+; EFPU2-NEXT:    mtlr 0
+; EFPU2-NEXT:    blr
+  entry:
+  %v = fptrunc double %a to float
+  ret float %v
+}
 
 define void @test_double_abs(double * %aa) #0 {
-; CHECK-LABEL: test_double_abs:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    evldd 4, 0(3)
-; CHECK-NEXT:    efdabs 4, 4
-; CHECK-NEXT:    evstdd 4, 0(3)
-; CHECK-NEXT:    blr
+; SPE-LABEL: test_double_abs:
+; SPE:       # %bb.0: # %entry
+; SPE-NEXT:    evldd 4, 0(3)
+; SPE-NEXT:    efdabs 4, 4
+; SPE-NEXT:    evstdd 4, 0(3)
+; SPE-NEXT:    blr
+;
+; EFPU2-LABEL: test_double_abs:
+; EFPU2:       # %bb.0: # %entry
+; EFPU2-NEXT:    lwz 4, 0(3)
+; EFPU2-NEXT:    clrlwi	4, 4, 1
+; EFPU2-NEXT:    stw 4, 0(3)
+; EFPU2-NEXT:    blr
   entry:
     %0 = load double, double * %aa
     %1 = tail call double @llvm.fabs.f64(double %0) #2
@@ -574,12 +608,19 @@
 declare double @llvm.fabs.f64(double) #1
 
 define void @test_dnabs(double * %aa) #0 {
-; CHECK-LABEL: test_dnabs:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    evldd 4, 0(3)
-; CHECK-NEXT:    efdnabs 4, 4
-; CHECK-NEXT:    evstdd 4, 0(3)
-; CHECK-NEXT:    blr
+; SPE-LABEL: test_dnabs:
+; SPE:       # %bb.0: # %entry
+; SPE-NEXT:    evldd 4, 0(3)
+; SPE-NEXT:    efdnabs 4, 4
+; SPE-NEXT:    evstdd 4, 0(3)
+; SPE-NEXT:    blr
+;
+; EFPU2-LABEL: test_dnabs:
+; EFPU2:       # %bb.0: # %entry
+; EFPU2-NEXT:    lwz 4, 0(3)
+; EFPU2-NEXT:    oris 4, 4, 32768
+; EFPU2-NEXT:    stw 4, 0(3)
+; EFPU2-NEXT:    blr
   entry:
     %0 = load double, double * %aa
     %1 = tail call double @llvm.fabs.f64(double %0) #2
@@ -589,88 +630,107 @@
 }
 
 define double @test_ddiv(double %a, double %b) {
-; CHECK-LABEL: test_ddiv:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    evmergelo 5, 5, 6
-; CHECK-NEXT:    evmergelo 3, 3, 4
-; CHECK-NEXT:    efddiv 4, 3, 5
-; CHECK-NEXT:    evmergehi 3, 4, 4
-; CHECK-NEXT:    # kill: def $r4 killed $r4 killed $s4
-; CHECK-NEXT:    # kill: def $r3 killed $r3 killed $s3
-; CHECK-NEXT:    blr
+; SPE-LABEL: test_ddiv:
+; SPE:       # %bb.0: # %entry
+; SPE-NEXT:    evmergelo 5, 5, 6
+; SPE-NEXT:    evmergelo 3, 3, 4
+; SPE-NEXT:    efddiv 4, 3, 5
+; SPE-NEXT:    evmergehi 3, 4, 4
+; SPE-NEXT:    # kill: def $r4 killed $r4 killed $s4
+; SPE-NEXT:    # kill: def $r3 killed $r3 killed $s3
+; SPE-NEXT:    blr
+;
+; EFPU2-LABEL: test_ddiv:
+; EFPU2:    bl __divdf3
 entry:
   %v = fdiv double %a, %b
   ret double %v
-
 }
 
 define double @test_dmul(double %a, double %b) {
-; CHECK-LABEL: test_dmul:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    evmergelo 5, 5, 6
-; CHECK-NEXT:    evmergelo 3, 3, 4
-; CHECK-NEXT:    efdmul 4, 3, 5
-; CHECK-NEXT:    evmergehi 3, 4, 4
-; CHECK-NEXT:    # kill: def $r4 killed $r4 killed $s4
-; CHECK-NEXT:    # kill: def $r3 killed $r3 killed $s3
-; CHECK-NEXT:    blr
+; SPE-LABEL: test_dmul:
+; SPE:       # %bb.0: # %entry
+; SPE-NEXT:    evmergelo 5, 5, 6
+; SPE-NEXT:    evmergelo 3, 3, 4
+; SPE-NEXT:    efdmul 4, 3, 5
+; SPE-NEXT:    evmergehi 3, 4, 4
+; SPE-NEXT:    # kill: def $r4 killed $r4 killed $s4
+; SPE-NEXT:    # kill: def $r3 killed $r3 killed $s3
+; SPE-NEXT:    blr
+;
+; EFPU2-LABEL: test_dmul:
+; EFPU2:    bl __muldf3
   entry:
   %v = fmul double %a, %b
   ret double %v
 }
 
 define double @test_dadd(double %a, double %b) {
-; CHECK-LABEL: test_dadd:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    evmergelo 5, 5, 6
-; CHECK-NEXT:    evmergelo 3, 3, 4
-; CHECK-NEXT:    efdadd 4, 3, 5
-; CHECK-NEXT:    evmergehi 3, 4, 4
-; CHECK-NEXT:    # kill: def $r4 killed $r4 killed $s4
-; CHECK-NEXT:    # kill: def $r3 killed $r3 killed $s3
-; CHECK-NEXT:    blr
+; SPE-LABEL: test_dadd:
+; SPE:       # %bb.0: # %entry
+; SPE-NEXT:    evmergelo 5, 5, 6
+; SPE-NEXT:    evmergelo 3, 3, 4
+; SPE-NEXT:    efdadd 4, 3, 5
+; SPE-NEXT:    evmergehi 3, 4, 4
+; SPE-NEXT:    # kill: def $r4 killed $r4 killed $s4
+; SPE-NEXT:    # kill: def $r3 killed $r3 killed $s3
+; SPE-NEXT:    blr
+;
+; EFPU2-LABEL: test_dadd:
+; EFPU2:    bl __adddf3
   entry:
   %v = fadd double %a, %b
   ret double %v
 }
 
 define double @test_dsub(double %a, double %b) {
-; CHECK-LABEL: test_dsub:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    evmergelo 5, 5, 6
-; CHECK-NEXT:    evmergelo 3, 3, 4
-; CHECK-NEXT:    efdsub 4, 3, 5
-; CHECK-NEXT:    evmergehi 3, 4, 4
-; CHECK-NEXT:    # kill: def $r4 killed $r4 killed $s4
-; CHECK-NEXT:    # kill: def $r3 killed $r3 killed $s3
-; CHECK-NEXT:    blr
+; SPE-LABEL: test_dsub:
+; SPE:       # %bb.0: # %entry
+; SPE-NEXT:    evmergelo 5, 5, 6
+; SPE-NEXT:    evmergelo 3, 3, 4
+; SPE-NEXT:    efdsub 4, 3, 5
+; SPE-NEXT:    evmergehi 3, 4, 4
+; SPE-NEXT:    # kill: def $r4 killed $r4 killed $s4
+; SPE-NEXT:    # kill: def $r3 killed $r3 killed $s3
+; SPE-NEXT:    blr
+;
+; EFPU2-LABEL: test_dsub:
+; EFPU2:    bl __subdf3
   entry:
   %v = fsub double %a, %b
   ret double %v
 }
 
 define double @test_dneg(double %a) {
-; CHECK-LABEL: test_dneg:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    evmergelo 3, 3, 4
-; CHECK-NEXT:    efdneg 4, 3
-; CHECK-NEXT:    evmergehi 3, 4, 4
-; CHECK-NEXT:    # kill: def $r4 killed $r4 killed $s4
-; CHECK-NEXT:    # kill: def $r3 killed $r3 killed $s3
-; CHECK-NEXT:    blr
+; SPE-LABEL: test_dneg:
+; SPE:       # %bb.0: # %entry
+; SPE-NEXT:    evmergelo 3, 3, 4
+; SPE-NEXT:    efdneg 4, 3
+; SPE-NEXT:    evmergehi 3, 4, 4
+; SPE-NEXT:    # kill: def $r4 killed $r4 killed $s4
+; SPE-NEXT:    # kill: def $r3 killed $r3 killed $s3
+; SPE-NEXT:    blr
+;
+; EFPU2-LABEL: test_dneg:
+; EFPU2:       # %bb.0: # %entry
+; EFPU2-NEXT:    xoris 3, 3, 32768
+; EFPU2-NEXT:    blr
   entry:
   %v = fsub double -0.0, %a
   ret double %v
 }
 
 define double @test_stod(float %a) {
-; CHECK-LABEL: test_stod:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    efdcfs 4, 3
-; CHECK-NEXT:    evmergehi 3, 4, 4
-; CHECK-NEXT:    # kill: def $r4 killed $r4 killed $s4
-; CHECK-NEXT:    # kill: def $r3 killed $r3 killed $s3
-; CHECK-NEXT:    blr
+; SPE-LABEL: test_stod:
+; SPE:       # %bb.0: # %entry
+; SPE-NEXT:    efdcfs 4, 3
+; SPE-NEXT:    evmergehi 3, 4, 4
+; SPE-NEXT:    # kill: def $r4 killed $r4 killed $s4
+; SPE-NEXT:    # kill: def $r3 killed $r3 killed $s3
+; SPE-NEXT:    blr
+;
+; EFPU2-LABEL: test_stod:
+; EFPU2:    	bl __extendsfdf2
   entry:
   %v = fpext float %a to double
   ret double %v
@@ -678,66 +738,75 @@
 
 ; (un)ordered tests are expanded to une and oeq so verify
 define i1 @test_dcmpuno(double %a, double %b) {
-; CHECK-LABEL: test_dcmpuno:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    evmergelo 5, 5, 6
-; CHECK-NEXT:    evmergelo 3, 3, 4
-; CHECK-NEXT:    li 7, 1
-; CHECK-NEXT:    efdcmpeq 0, 3, 3
-; CHECK-NEXT:    efdcmpeq 1, 5, 5
-; CHECK-NEXT:    crand 20, 5, 1
-; CHECK-NEXT:    bc 12, 20, .LBB35_2
-; CHECK-NEXT:  # %bb.1: # %entry
-; CHECK-NEXT:    ori 3, 7, 0
-; CHECK-NEXT:    blr
-; CHECK-NEXT:  .LBB35_2: # %entry
-; CHECK-NEXT:    li 3, 0
-; CHECK-NEXT:    blr
+; SPE-LABEL: test_dcmpuno:
+; SPE:       # %bb.0: # %entry
+; SPE-NEXT:    evmergelo 5, 5, 6
+; SPE-NEXT:    evmergelo 3, 3, 4
+; SPE-NEXT:    li 7, 1
+; SPE-NEXT:    efdcmpeq 0, 3, 3
+; SPE-NEXT:    efdcmpeq 1, 5, 5
+; SPE-NEXT:    crand 20, 5, 1
+; SPE-NEXT:    bc 12, 20, .LBB9_2
+; SPE-NEXT:  # %bb.1: # %entry
+; SPE-NEXT:    ori 3, 7, 0
+; SPE-NEXT:    blr
+; SPE-NEXT:  .LBB9_2: # %entry
+; SPE-NEXT:    li 3, 0
+; SPE-NEXT:    blr
+;
+; EFPU2-LABEL: test_dcmpuno:
+; EFPU2:    bl __unorddf2
   entry:
   %r = fcmp uno double %a, %b
   ret i1 %r
 }
 
 define i1 @test_dcmpord(double %a, double %b) {
-; CHECK-LABEL: test_dcmpord:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    evmergelo 3, 3, 4
-; CHECK-NEXT:    evmergelo 4, 5, 6
-; CHECK-NEXT:    li 7, 1
-; CHECK-NEXT:    efdcmpeq 0, 4, 4
-; CHECK-NEXT:    efdcmpeq 1, 3, 3
-; CHECK-NEXT:    crnand 20, 5, 1
-; CHECK-NEXT:    bc 12, 20, .LBB36_2
-; CHECK-NEXT:  # %bb.1: # %entry
-; CHECK-NEXT:    ori 3, 7, 0
-; CHECK-NEXT:    blr
-; CHECK-NEXT:  .LBB36_2: # %entry
-; CHECK-NEXT:    li 3, 0
-; CHECK-NEXT:    blr
+; SPE-LABEL: test_dcmpord:
+; SPE:       # %bb.0: # %entry
+; SPE-NEXT:    evmergelo 3, 3, 4
+; SPE-NEXT:    evmergelo 4, 5, 6
+; SPE-NEXT:    li 7, 1
+; SPE-NEXT:    efdcmpeq 0, 4, 4
+; SPE-NEXT:    efdcmpeq 1, 3, 3
+; SPE-NEXT:    crnand 20, 5, 1
+; SPE-NEXT:    bc 12, 20, .LBB10_2
+; SPE-NEXT:  # %bb.1: # %entry
+; SPE-NEXT:    ori 3, 7, 0
+; SPE-NEXT:    blr
+; SPE-NEXT:  .LBB10_2: # %entry
+; SPE-NEXT:    li 3, 0
+; SPE-NEXT:    blr
+;
+; EFPU2-LABEL: test_dcmpord:
+; EFPU2:    bl __unorddf2
   entry:
   %r = fcmp ord double %a, %b
   ret i1 %r
 }
 
 define i32 @test_dcmpgt(double %a, double %b) {
-; CHECK-LABEL: test_dcmpgt:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    stwu 1, -16(1)
-; CHECK-NEXT:    .cfi_def_cfa_offset 16
-; CHECK-NEXT:    evmergelo 5, 5, 6
-; CHECK-NEXT:    evmergelo 3, 3, 4
-; CHECK-NEXT:    efdcmpgt 0, 3, 5
-; CHECK-NEXT:    ble 0, .LBB37_2
-; CHECK-NEXT:  # %bb.1: # %tr
-; CHECK-NEXT:    li 3, 1
-; CHECK-NEXT:    b .LBB37_3
-; CHECK-NEXT:  .LBB37_2: # %fa
-; CHECK-NEXT:    li 3, 0
-; CHECK-NEXT:  .LBB37_3: # %ret
-; CHECK-NEXT:    stw 3, 12(1)
-; CHECK-NEXT:    lwz 3, 12(1)
-; CHECK-NEXT:    addi 1, 1, 16
-; CHECK-NEXT:    blr
+; SPE-LABEL: test_dcmpgt:
+; SPE:       # %bb.0: # %entry
+; SPE-NEXT:    stwu 1, -16(1)
+; SPE-NEXT:    .cfi_def_cfa_offset 16
+; SPE-NEXT:    evmergelo 5, 5, 6
+; SPE-NEXT:    evmergelo 3, 3, 4
+; SPE-NEXT:    efdcmpgt 0, 3, 5
+; SPE-NEXT:    ble 0, .LBB11_2
+; SPE-NEXT:  # %bb.1: # %tr
+; SPE-NEXT:    li 3, 1
+; SPE-NEXT:    b .LBB11_3
+; SPE-NEXT:  .LBB11_2: # %fa
+; SPE-NEXT:    li 3, 0
+; SPE-NEXT:  .LBB11_3: # %ret
+; SPE-NEXT:    stw 3, 12(1)
+; SPE-NEXT:    lwz 3, 12(1)
+; SPE-NEXT:    addi 1, 1, 16
+; SPE-NEXT:    blr
+;
+; EFPU2-LABEL: test_dcmpgt:
+; EFPU2:    bl __gtdf2
   entry:
   %r = alloca i32, align 4
   %c = fcmp ogt double %a, %b
@@ -754,30 +823,33 @@
 }
 
 define i32 @test_dcmpugt(double %a, double %b) {
-; CHECK-LABEL: test_dcmpugt:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    stwu 1, -16(1)
-; CHECK-NEXT:    .cfi_def_cfa_offset 16
-; CHECK-NEXT:    evmergelo 3, 3, 4
-; CHECK-NEXT:    evmergelo 4, 5, 6
-; CHECK-NEXT:    efdcmpeq 0, 4, 4
-; CHECK-NEXT:    bc 4, 1, .LBB38_4
-; CHECK-NEXT:  # %bb.1: # %entry
-; CHECK-NEXT:    efdcmpeq 0, 3, 3
-; CHECK-NEXT:    bc 4, 1, .LBB38_4
-; CHECK-NEXT:  # %bb.2: # %entry
-; CHECK-NEXT:    efdcmpgt 0, 3, 4
-; CHECK-NEXT:    bc 12, 1, .LBB38_4
-; CHECK-NEXT:  # %bb.3: # %fa
-; CHECK-NEXT:    li 3, 0
-; CHECK-NEXT:    b .LBB38_5
-; CHECK-NEXT:  .LBB38_4: # %tr
-; CHECK-NEXT:    li 3, 1
-; CHECK-NEXT:  .LBB38_5: # %ret
-; CHECK-NEXT:    stw 3, 12(1)
-; CHECK-NEXT:    lwz 3, 12(1)
-; CHECK-NEXT:    addi 1, 1, 16
-; CHECK-NEXT:    blr
+; SPE-LABEL: test_dcmpugt:
+; SPE:       # %bb.0: # %entry
+; SPE-NEXT:    stwu 1, -16(1)
+; SPE-NEXT:    .cfi_def_cfa_offset 16
+; SPE-NEXT:    evmergelo 3, 3, 4
+; SPE-NEXT:    evmergelo 4, 5, 6
+; SPE-NEXT:    efdcmpeq 0, 4, 4
+; SPE-NEXT:    bc 4, 1, .LBB12_4
+; SPE-NEXT:  # %bb.1: # %entry
+; SPE-NEXT:    efdcmpeq 0, 3, 3
+; SPE-NEXT:    bc 4, 1, .LBB12_4
+; SPE-NEXT:  # %bb.2: # %entry
+; SPE-NEXT:    efdcmpgt 0, 3, 4
+; SPE-NEXT:    bc 12, 1, .LBB12_4
+; SPE-NEXT:  # %bb.3: # %fa
+; SPE-NEXT:    li 3, 0
+; SPE-NEXT:    b .LBB12_5
+; SPE-NEXT:  .LBB12_4: # %tr
+; SPE-NEXT:    li 3, 1
+; SPE-NEXT:  .LBB12_5: # %ret
+; SPE-NEXT:    stw 3, 12(1)
+; SPE-NEXT:    lwz 3, 12(1)
+; SPE-NEXT:    addi 1, 1, 16
+; SPE-NEXT:    blr
+;
+; EFPU2-LABEL: test_dcmpugt:
+; EFPU2:    bl __ledf2
   entry:
   %r = alloca i32, align 4
   %c = fcmp ugt double %a, %b
@@ -794,24 +866,27 @@
 }
 
 define i32 @test_dcmple(double %a, double %b) {
-; CHECK-LABEL: test_dcmple:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    stwu 1, -16(1)
-; CHECK-NEXT:    .cfi_def_cfa_offset 16
-; CHECK-NEXT:    evmergelo 5, 5, 6
-; CHECK-NEXT:    evmergelo 3, 3, 4
-; CHECK-NEXT:    efdcmpgt 0, 3, 5
-; CHECK-NEXT:    bgt 0, .LBB39_2
-; CHECK-NEXT:  # %bb.1: # %tr
-; CHECK-NEXT:    li 3, 1
-; CHECK-NEXT:    b .LBB39_3
-; CHECK-NEXT:  .LBB39_2: # %fa
-; CHECK-NEXT:    li 3, 0
-; CHECK-NEXT:  .LBB39_3: # %ret
-; CHECK-NEXT:    stw 3, 12(1)
-; CHECK-NEXT:    lwz 3, 12(1)
-; CHECK-NEXT:    addi 1, 1, 16
-; CHECK-NEXT:    blr
+; SPE-LABEL: test_dcmple:
+; SPE:       # %bb.0: # %entry
+; SPE-NEXT:    stwu 1, -16(1)
+; SPE-NEXT:    .cfi_def_cfa_offset 16
+; SPE-NEXT:    evmergelo 5, 5, 6
+; SPE-NEXT:    evmergelo 3, 3, 4
+; SPE-NEXT:    efdcmpgt 0, 3, 5
+; SPE-NEXT:    bgt 0, .LBB13_2
+; SPE-NEXT:  # %bb.1: # %tr
+; SPE-NEXT:    li 3, 1
+; SPE-NEXT:    b .LBB13_3
+; SPE-NEXT:  .LBB13_2: # %fa
+; SPE-NEXT:    li 3, 0
+; SPE-NEXT:  .LBB13_3: # %ret
+; SPE-NEXT:    stw 3, 12(1)
+; SPE-NEXT:    lwz 3, 12(1)
+; SPE-NEXT:    addi 1, 1, 16
+; SPE-NEXT:    blr
+;
+; EFPU2-LABEL: test_dcmple:
+; EFPU2:    bl __gtdf2
   entry:
   %r = alloca i32, align 4
   %c = fcmp ule double %a, %b
@@ -828,24 +903,27 @@
 }
 
 define i32 @test_dcmpule(double %a, double %b) {
-; CHECK-LABEL: test_dcmpule:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    stwu 1, -16(1)
-; CHECK-NEXT:    .cfi_def_cfa_offset 16
-; CHECK-NEXT:    evmergelo 5, 5, 6
-; CHECK-NEXT:    evmergelo 3, 3, 4
-; CHECK-NEXT:    efdcmpgt 0, 3, 5
-; CHECK-NEXT:    bgt 0, .LBB40_2
-; CHECK-NEXT:  # %bb.1: # %tr
-; CHECK-NEXT:    li 3, 1
-; CHECK-NEXT:    b .LBB40_3
-; CHECK-NEXT:  .LBB40_2: # %fa
-; CHECK-NEXT:    li 3, 0
-; CHECK-NEXT:  .LBB40_3: # %ret
-; CHECK-NEXT:    stw 3, 12(1)
-; CHECK-NEXT:    lwz 3, 12(1)
-; CHECK-NEXT:    addi 1, 1, 16
-; CHECK-NEXT:    blr
+; SPE-LABEL: test_dcmpule:
+; SPE:       # %bb.0: # %entry
+; SPE-NEXT:    stwu 1, -16(1)
+; SPE-NEXT:    .cfi_def_cfa_offset 16
+; SPE-NEXT:    evmergelo 5, 5, 6
+; SPE-NEXT:    evmergelo 3, 3, 4
+; SPE-NEXT:    efdcmpgt 0, 3, 5
+; SPE-NEXT:    bgt 0, .LBB14_2
+; SPE-NEXT:  # %bb.1: # %tr
+; SPE-NEXT:    li 3, 1
+; SPE-NEXT:    b .LBB14_3
+; SPE-NEXT:  .LBB14_2: # %fa
+; SPE-NEXT:    li 3, 0
+; SPE-NEXT:  .LBB14_3: # %ret
+; SPE-NEXT:    stw 3, 12(1)
+; SPE-NEXT:    lwz 3, 12(1)
+; SPE-NEXT:    addi 1, 1, 16
+; SPE-NEXT:    blr
+;
+; EFPU2-LABEL: test_dcmpule:
+; EFPU2:    bl __gtdf2
   entry:
   %r = alloca i32, align 4
   %c = fcmp ule double %a, %b
@@ -863,24 +941,27 @@
 
 ; The type of comparison found in C's if (x == y)
 define i32 @test_dcmpeq(double %a, double %b) {
-; CHECK-LABEL: test_dcmpeq:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    stwu 1, -16(1)
-; CHECK-NEXT:    .cfi_def_cfa_offset 16
-; CHECK-NEXT:    evmergelo 5, 5, 6
-; CHECK-NEXT:    evmergelo 3, 3, 4
-; CHECK-NEXT:    efdcmpeq 0, 3, 5
-; CHECK-NEXT:    ble 0, .LBB41_2
-; CHECK-NEXT:  # %bb.1: # %tr
-; CHECK-NEXT:    li 3, 1
-; CHECK-NEXT:    b .LBB41_3
-; CHECK-NEXT:  .LBB41_2: # %fa
-; CHECK-NEXT:    li 3, 0
-; CHECK-NEXT:  .LBB41_3: # %ret
-; CHECK-NEXT:    stw 3, 12(1)
-; CHECK-NEXT:    lwz 3, 12(1)
-; CHECK-NEXT:    addi 1, 1, 16
-; CHECK-NEXT:    blr
+; SPE-LABEL: test_dcmpeq:
+; SPE:       # %bb.0: # %entry
+; SPE-NEXT:    stwu 1, -16(1)
+; SPE-NEXT:    .cfi_def_cfa_offset 16
+; SPE-NEXT:    evmergelo 5, 5, 6
+; SPE-NEXT:    evmergelo 3, 3, 4
+; SPE-NEXT:    efdcmpeq 0, 3, 5
+; SPE-NEXT:    ble 0, .LBB15_2
+; SPE-NEXT:  # %bb.1: # %tr
+; SPE-NEXT:    li 3, 1
+; SPE-NEXT:    b .LBB15_3
+; SPE-NEXT:  .LBB15_2: # %fa
+; SPE-NEXT:    li 3, 0
+; SPE-NEXT:  .LBB15_3: # %ret
+; SPE-NEXT:    stw 3, 12(1)
+; SPE-NEXT:    lwz 3, 12(1)
+; SPE-NEXT:    addi 1, 1, 16
+; SPE-NEXT:    blr
+;
+; EFPU2-LABEL: test_dcmpeq:
+; EFPU2:    bl __nedf2
   entry:
   %r = alloca i32, align 4
   %c = fcmp oeq double %a, %b
@@ -897,30 +978,34 @@
 }
 
 define i32 @test_dcmpueq(double %a, double %b) {
-; CHECK-LABEL: test_dcmpueq:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    stwu 1, -16(1)
-; CHECK-NEXT:    .cfi_def_cfa_offset 16
-; CHECK-NEXT:    evmergelo 3, 3, 4
-; CHECK-NEXT:    evmergelo 4, 5, 6
-; CHECK-NEXT:    efdcmpeq 0, 4, 4
-; CHECK-NEXT:    bc 4, 1, .LBB42_4
-; CHECK-NEXT:  # %bb.1: # %entry
-; CHECK-NEXT:    efdcmpeq 0, 3, 3
-; CHECK-NEXT:    bc 4, 1, .LBB42_4
-; CHECK-NEXT:  # %bb.2: # %entry
-; CHECK-NEXT:    efdcmpeq 0, 3, 4
-; CHECK-NEXT:    bc 12, 1, .LBB42_4
-; CHECK-NEXT:  # %bb.3: # %fa
-; CHECK-NEXT:    li 3, 0
-; CHECK-NEXT:    b .LBB42_5
-; CHECK-NEXT:  .LBB42_4: # %tr
-; CHECK-NEXT:    li 3, 1
-; CHECK-NEXT:  .LBB42_5: # %ret
-; CHECK-NEXT:    stw 3, 12(1)
-; CHECK-NEXT:    lwz 3, 12(1)
-; CHECK-NEXT:    addi 1, 1, 16
-; CHECK-NEXT:    blr
+; SPE-LABEL: test_dcmpueq:
+; SPE:       # %bb.0: # %entry
+; SPE-NEXT:    stwu 1, -16(1)
+; SPE-NEXT:    .cfi_def_cfa_offset 16
+; SPE-NEXT:    evmergelo 3, 3, 4
+; SPE-NEXT:    evmergelo 4, 5, 6
+; SPE-NEXT:    efdcmpeq 0, 4, 4
+; SPE-NEXT:    bc 4, 1, .LBB16_4
+; SPE-NEXT:  # %bb.1: # %entry
+; SPE-NEXT:    efdcmpeq 0, 3, 3
+; SPE-NEXT:    bc 4, 1, .LBB16_4
+; SPE-NEXT:  # %bb.2: # %entry
+; SPE-NEXT:    efdcmpeq 0, 3, 4
+; SPE-NEXT:    bc 12, 1, .LBB16_4
+; SPE-NEXT:  # %bb.3: # %fa
+; SPE-NEXT:    li 3, 0
+; SPE-NEXT:    b .LBB16_5
+; SPE-NEXT:  .LBB16_4: # %tr
+; SPE-NEXT:    li 3, 1
+; SPE-NEXT:  .LBB16_5: # %ret
+; SPE-NEXT:    stw 3, 12(1)
+; SPE-NEXT:    lwz 3, 12(1)
+; SPE-NEXT:    addi 1, 1, 16
+; SPE-NEXT:    blr
+;
+; EFPU2-LABEL: test_dcmpueq:
+; EFPU2:    bl __eqdf2
+; EFPU2:    bl __unorddf2
   entry:
   %r = alloca i32, align 4
   %c = fcmp ueq double %a, %b
@@ -937,47 +1022,54 @@
 }
 
 define i1 @test_dcmpne(double %a, double %b) {
-; CHECK-LABEL: test_dcmpne:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    evmergelo 3, 3, 4
-; CHECK-NEXT:    evmergelo 4, 5, 6
-; CHECK-NEXT:    li 7, 1
-; CHECK-NEXT:    efdcmpeq 0, 4, 4
-; CHECK-NEXT:    efdcmpeq 1, 3, 3
-; CHECK-NEXT:    efdcmpeq 5, 3, 4
-; CHECK-NEXT:    crand 24, 5, 1
-; CHECK-NEXT:    crorc 20, 21, 24
-; CHECK-NEXT:    bc 12, 20, .LBB43_2
-; CHECK-NEXT:  # %bb.1: # %entry
-; CHECK-NEXT:    ori 3, 7, 0
-; CHECK-NEXT:    blr
-; CHECK-NEXT:  .LBB43_2: # %entry
-; CHECK-NEXT:    li 3, 0
-; CHECK-NEXT:    blr
+; SPE-LABEL: test_dcmpne:
+; SPE:       # %bb.0: # %entry
+; SPE-NEXT:    evmergelo 3, 3, 4
+; SPE-NEXT:    evmergelo 4, 5, 6
+; SPE-NEXT:    li 7, 1
+; SPE-NEXT:    efdcmpeq 0, 4, 4
+; SPE-NEXT:    efdcmpeq 1, 3, 3
+; SPE-NEXT:    efdcmpeq 5, 3, 4
+; SPE-NEXT:    crand 24, 5, 1
+; SPE-NEXT:    crorc 20, 21, 24
+; SPE-NEXT:    bc 12, 20, .LBB17_2
+; SPE-NEXT:  # %bb.1: # %entry
+; SPE-NEXT:    ori 3, 7, 0
+; SPE-NEXT:    blr
+; SPE-NEXT:  .LBB17_2: # %entry
+; SPE-NEXT:    li 3, 0
+; SPE-NEXT:    blr
+;
+; EFPU2-LABEL: test_dcmpne:
+; EFPU2:    bl __unorddf2
+; EFPU2:    bl __eqdf2
   entry:
   %r = fcmp one double %a, %b
   ret i1 %r
 }
 
 define i32 @test_dcmpune(double %a, double %b) {
-; CHECK-LABEL: test_dcmpune:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    stwu 1, -16(1)
-; CHECK-NEXT:    .cfi_def_cfa_offset 16
-; CHECK-NEXT:    evmergelo 5, 5, 6
-; CHECK-NEXT:    evmergelo 3, 3, 4
-; CHECK-NEXT:    efdcmpeq 0, 3, 5
-; CHECK-NEXT:    bgt 0, .LBB44_2
-; CHECK-NEXT:  # %bb.1: # %tr
-; CHECK-NEXT:    li 3, 1
-; CHECK-NEXT:    b .LBB44_3
-; CHECK-NEXT:  .LBB44_2: # %fa
-; CHECK-NEXT:    li 3, 0
-; CHECK-NEXT:  .LBB44_3: # %ret
-; CHECK-NEXT:    stw 3, 12(1)
-; CHECK-NEXT:    lwz 3, 12(1)
-; CHECK-NEXT:    addi 1, 1, 16
-; CHECK-NEXT:    blr
+; SPE-LABEL: test_dcmpune:
+; SPE:       # %bb.0: # %entry
+; SPE-NEXT:    stwu 1, -16(1)
+; SPE-NEXT:    .cfi_def_cfa_offset 16
+; SPE-NEXT:    evmergelo 5, 5, 6
+; SPE-NEXT:    evmergelo 3, 3, 4
+; SPE-NEXT:    efdcmpeq 0, 3, 5
+; SPE-NEXT:    bgt 0, .LBB18_2
+; SPE-NEXT:  # %bb.1: # %tr
+; SPE-NEXT:    li 3, 1
+; SPE-NEXT:    b .LBB18_3
+; SPE-NEXT:  .LBB18_2: # %fa
+; SPE-NEXT:    li 3, 0
+; SPE-NEXT:  .LBB18_3: # %ret
+; SPE-NEXT:    stw 3, 12(1)
+; SPE-NEXT:    lwz 3, 12(1)
+; SPE-NEXT:    addi 1, 1, 16
+; SPE-NEXT:    blr
+;
+; EFPU2-LABEL: test_dcmpune:
+; EFPU2:    bl __eqdf2
   entry:
   %r = alloca i32, align 4
   %c = fcmp une double %a, %b
@@ -994,24 +1086,27 @@
 }
 
 define i32 @test_dcmplt(double %a, double %b) {
-; CHECK-LABEL: test_dcmplt:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    stwu 1, -16(1)
-; CHECK-NEXT:    .cfi_def_cfa_offset 16
-; CHECK-NEXT:    evmergelo 5, 5, 6
-; CHECK-NEXT:    evmergelo 3, 3, 4
-; CHECK-NEXT:    efdcmplt 0, 3, 5
-; CHECK-NEXT:    ble 0, .LBB45_2
-; CHECK-NEXT:  # %bb.1: # %tr
-; CHECK-NEXT:    li 3, 1
-; CHECK-NEXT:    b .LBB45_3
-; CHECK-NEXT:  .LBB45_2: # %fa
-; CHECK-NEXT:    li 3, 0
-; CHECK-NEXT:  .LBB45_3: # %ret
-; CHECK-NEXT:    stw 3, 12(1)
-; CHECK-NEXT:    lwz 3, 12(1)
-; CHECK-NEXT:    addi 1, 1, 16
-; CHECK-NEXT:    blr
+; SPE-LABEL: test_dcmplt:
+; SPE:       # %bb.0: # %entry
+; SPE-NEXT:    stwu 1, -16(1)
+; SPE-NEXT:    .cfi_def_cfa_offset 16
+; SPE-NEXT:    evmergelo 5, 5, 6
+; SPE-NEXT:    evmergelo 3, 3, 4
+; SPE-NEXT:    efdcmplt 0, 3, 5
+; SPE-NEXT:    ble 0, .LBB19_2
+; SPE-NEXT:  # %bb.1: # %tr
+; SPE-NEXT:    li 3, 1
+; SPE-NEXT:    b .LBB19_3
+; SPE-NEXT:  .LBB19_2: # %fa
+; SPE-NEXT:    li 3, 0
+; SPE-NEXT:  .LBB19_3: # %ret
+; SPE-NEXT:    stw 3, 12(1)
+; SPE-NEXT:    lwz 3, 12(1)
+; SPE-NEXT:    addi 1, 1, 16
+; SPE-NEXT:    blr
+;
+; EFPU2-LABEL: test_dcmplt:
+; EFPU2:    bl __ltdf2
   entry:
   %r = alloca i32, align 4
   %c = fcmp olt double %a, %b
@@ -1028,30 +1123,33 @@
 }
 
 define i32 @test_dcmpult(double %a, double %b) {
-; CHECK-LABEL: test_dcmpult:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    stwu 1, -16(1)
-; CHECK-NEXT:    .cfi_def_cfa_offset 16
-; CHECK-NEXT:    evmergelo 3, 3, 4
-; CHECK-NEXT:    evmergelo 4, 5, 6
-; CHECK-NEXT:    efdcmpeq 0, 4, 4
-; CHECK-NEXT:    bc 4, 1, .LBB46_4
-; CHECK-NEXT:  # %bb.1: # %entry
-; CHECK-NEXT:    efdcmpeq 0, 3, 3
-; CHECK-NEXT:    bc 4, 1, .LBB46_4
-; CHECK-NEXT:  # %bb.2: # %entry
-; CHECK-NEXT:    efdcmplt 0, 3, 4
-; CHECK-NEXT:    bc 12, 1, .LBB46_4
-; CHECK-NEXT:  # %bb.3: # %fa
-; CHECK-NEXT:    li 3, 0
-; CHECK-NEXT:    b .LBB46_5
-; CHECK-NEXT:  .LBB46_4: # %tr
-; CHECK-NEXT:    li 3, 1
-; CHECK-NEXT:  .LBB46_5: # %ret
-; CHECK-NEXT:    stw 3, 12(1)
-; CHECK-NEXT:    lwz 3, 12(1)
-; CHECK-NEXT:    addi 1, 1, 16
-; CHECK-NEXT:    blr
+; SPE-LABEL: test_dcmpult:
+; SPE:       # %bb.0: # %entry
+; SPE-NEXT:    stwu 1, -16(1)
+; SPE-NEXT:    .cfi_def_cfa_offset 16
+; SPE-NEXT:    evmergelo 3, 3, 4
+; SPE-NEXT:    evmergelo 4, 5, 6
+; SPE-NEXT:    efdcmpeq 0, 4, 4
+; SPE-NEXT:    bc 4, 1, .LBB20_4
+; SPE-NEXT:  # %bb.1: # %entry
+; SPE-NEXT:    efdcmpeq 0, 3, 3
+; SPE-NEXT:    bc 4, 1, .LBB20_4
+; SPE-NEXT:  # %bb.2: # %entry
+; SPE-NEXT:    efdcmplt 0, 3, 4
+; SPE-NEXT:    bc 12, 1, .LBB20_4
+; SPE-NEXT:  # %bb.3: # %fa
+; SPE-NEXT:    li 3, 0
+; SPE-NEXT:    b .LBB20_5
+; SPE-NEXT:  .LBB20_4: # %tr
+; SPE-NEXT:    li 3, 1
+; SPE-NEXT:  .LBB20_5: # %ret
+; SPE-NEXT:    stw 3, 12(1)
+; SPE-NEXT:    lwz 3, 12(1)
+; SPE-NEXT:    addi 1, 1, 16
+; SPE-NEXT:    blr
+;
+; EFPU2-LABEL: test_dcmpult:
+; EFPU2:    bl __gedf2
   entry:
   %r = alloca i32, align 4
   %c = fcmp ult double %a, %b
@@ -1068,47 +1166,53 @@
 }
 
 define i1 @test_dcmpge(double %a, double %b) {
-; CHECK-LABEL: test_dcmpge:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    evmergelo 3, 3, 4
-; CHECK-NEXT:    evmergelo 4, 5, 6
-; CHECK-NEXT:    li 7, 1
-; CHECK-NEXT:    efdcmpeq 0, 4, 4
-; CHECK-NEXT:    efdcmpeq 1, 3, 3
-; CHECK-NEXT:    efdcmplt 5, 3, 4
-; CHECK-NEXT:    crand 24, 5, 1
-; CHECK-NEXT:    crorc 20, 21, 24
-; CHECK-NEXT:    bc 12, 20, .LBB47_2
-; CHECK-NEXT:  # %bb.1: # %entry
-; CHECK-NEXT:    ori 3, 7, 0
-; CHECK-NEXT:    blr
-; CHECK-NEXT:  .LBB47_2: # %entry
-; CHECK-NEXT:    li 3, 0
-; CHECK-NEXT:    blr
+; SPE-LABEL: test_dcmpge:
+; SPE:       # %bb.0: # %entry
+; SPE-NEXT:    evmergelo 3, 3, 4
+; SPE-NEXT:    evmergelo 4, 5, 6
+; SPE-NEXT:    li 7, 1
+; SPE-NEXT:    efdcmpeq 0, 4, 4
+; SPE-NEXT:    efdcmpeq 1, 3, 3
+; SPE-NEXT:    efdcmplt 5, 3, 4
+; SPE-NEXT:    crand 24, 5, 1
+; SPE-NEXT:    crorc 20, 21, 24
+; SPE-NEXT:    bc 12, 20, .LBB21_2
+; SPE-NEXT:  # %bb.1: # %entry
+; SPE-NEXT:    ori 3, 7, 0
+; SPE-NEXT:    blr
+; SPE-NEXT:  .LBB21_2: # %entry
+; SPE-NEXT:    li 3, 0
+; SPE-NEXT:    blr
+;
+; EFPU2-LABEL: test_dcmpge:
+; EFPU2:    bl __gedf2
   entry:
   %r = fcmp oge double %a, %b
   ret i1 %r
 }
 
 define i32 @test_dcmpuge(double %a, double %b) {
-; CHECK-LABEL: test_dcmpuge:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    stwu 1, -16(1)
-; CHECK-NEXT:    .cfi_def_cfa_offset 16
-; CHECK-NEXT:    evmergelo 5, 5, 6
-; CHECK-NEXT:    evmergelo 3, 3, 4
-; CHECK-NEXT:    efdcmplt 0, 3, 5
-; CHECK-NEXT:    bgt 0, .LBB48_2
-; CHECK-NEXT:  # %bb.1: # %tr
-; CHECK-NEXT:    li 3, 1
-; CHECK-NEXT:    b .LBB48_3
-; CHECK-NEXT:  .LBB48_2: # %fa
-; CHECK-NEXT:    li 3, 0
-; CHECK-NEXT:  .LBB48_3: # %ret
-; CHECK-NEXT:    stw 3, 12(1)
-; CHECK-NEXT:    lwz 3, 12(1)
-; CHECK-NEXT:    addi 1, 1, 16
-; CHECK-NEXT:    blr
+; SPE-LABEL: test_dcmpuge:
+; SPE:       # %bb.0: # %entry
+; SPE-NEXT:    stwu 1, -16(1)
+; SPE-NEXT:    .cfi_def_cfa_offset 16
+; SPE-NEXT:    evmergelo 5, 5, 6
+; SPE-NEXT:    evmergelo 3, 3, 4
+; SPE-NEXT:    efdcmplt 0, 3, 5
+; SPE-NEXT:    bgt 0, .LBB22_2
+; SPE-NEXT:  # %bb.1: # %tr
+; SPE-NEXT:    li 3, 1
+; SPE-NEXT:    b .LBB22_3
+; SPE-NEXT:  .LBB22_2: # %fa
+; SPE-NEXT:    li 3, 0
+; SPE-NEXT:  .LBB22_3: # %ret
+; SPE-NEXT:    stw 3, 12(1)
+; SPE-NEXT:    lwz 3, 12(1)
+; SPE-NEXT:    addi 1, 1, 16
+; SPE-NEXT:    blr
+;
+; EFPU2-LABEL: test_dcmpuge:
+; EFPU2:    bl __ltdf2
   entry:
   %r = alloca i32, align 4
   %c = fcmp uge double %a, %b
@@ -1125,97 +1229,102 @@
 }
 
 define double @test_dselect(double %a, double %b, i1 %c) {
-; CHECK-LABEL: test_dselect:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    andi. 7, 7, 1
-; CHECK-NEXT:    evmergelo 5, 5, 6
-; CHECK-NEXT:    evmergelo 4, 3, 4
-; CHECK-NEXT:    bc 12, 1, .LBB49_2
-; CHECK-NEXT:  # %bb.1: # %entry
-; CHECK-NEXT:    evor 4, 5, 5
-; CHECK-NEXT:  .LBB49_2: # %entry
-; CHECK-NEXT:    evmergehi 3, 4, 4
-; CHECK-NEXT:    # kill: def $r4 killed $r4 killed $s4
-; CHECK-NEXT:    # kill: def $r3 killed $r3 killed $s3
-; CHECK-NEXT:    blr
+; SPE-LABEL: test_dselect:
+; SPE:       # %bb.0: # %entry
+; SPE-NEXT:    andi. 7, 7, 1
+; SPE-NEXT:    evmergelo 5, 5, 6
+; SPE-NEXT:    evmergelo 4, 3, 4
+; SPE-NEXT:    bc 12, 1, .LBB23_2
+; SPE-NEXT:  # %bb.1: # %entry
+; SPE-NEXT:    evor 4, 5, 5
+; SPE-NEXT:  .LBB23_2: # %entry
+; SPE-NEXT:    evmergehi 3, 4, 4
+; SPE-NEXT:    # kill: def $r4 killed $r4 killed $s4
+; SPE-NEXT:    # kill: def $r3 killed $r3 killed $s3
+; SPE-NEXT:    blr
+;
+; EFPU2-LABEL: test_dselect:
+; EFPU2:       # %bb.0: # %entry
+; EFPU2-NEXT:    andi. 7, 7, 1
+; EFPU2-NEXT:    bclr 12, 1, 0
+; EFPU2-NEXT:  # %bb.1:                                # %entry
+; EFPU2-NEXT:    ori 3, 5, 0
+; EFPU2-NEXT:    ori 4, 6, 0
+; EFPU2-NEXT:    blr
 entry:
   %r = select i1 %c, double %a, double %b
   ret double %r
 }
 
 define i32 @test_dtoui(double %a) {
-; CHECK-LABEL: test_dtoui:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    evmergelo 3, 3, 4
-; CHECK-NEXT:    efdctuiz 3, 3
-; CHECK-NEXT:    blr
+; SPE-LABEL: test_dtoui:
+; SPE:       # %bb.0: # %entry
+; SPE-NEXT:    evmergelo 3, 3, 4
+; SPE-NEXT:    efdctuiz 3, 3
+; SPE-NEXT:    blr
+;
+; EFPU2-LABEL: test_dtoui:
+; EFPU2:    bl __fixunsdfsi
 entry:
   %v = fptoui double %a to i32
   ret i32 %v
 }
 
 define i32 @test_dtosi(double %a) {
-; CHECK-LABEL: test_dtosi:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    evmergelo 3, 3, 4
-; CHECK-NEXT:    efdctsiz 3, 3
-; CHECK-NEXT:    blr
+; SPE-LABEL: test_dtosi:
+; SPE:       # %bb.0: # %entry
+; SPE-NEXT:    evmergelo 3, 3, 4
+; SPE-NEXT:    efdctsiz 3, 3
+; SPE-NEXT:    blr
+;
+; EFPU2-LABEL: test_dtosi:
+; EFPU2:    bl __fixdfsi
 entry:
   %v = fptosi double %a to i32
   ret i32 %v
 }
 
 define double @test_dfromui(i32 %a) {
-; CHECK-LABEL: test_dfromui:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    efdcfui 4, 3
-; CHECK-NEXT:    evmergehi 3, 4, 4
-; CHECK-NEXT:    # kill: def $r4 killed $r4 killed $s4
-; CHECK-NEXT:    # kill: def $r3 killed $r3 killed $s3
-; CHECK-NEXT:    blr
+; SPE-LABEL: test_dfromui:
+; SPE:       # %bb.0: # %entry
+; SPE-NEXT:    efdcfui 4, 3
+; SPE-NEXT:    evmergehi 3, 4, 4
+; SPE-NEXT:    # kill: def $r4 killed $r4 killed $s4
+; SPE-NEXT:    # kill: def $r3 killed $r3 killed $s3
+; SPE-NEXT:    blr
+;
+; EFPU2-LABEL: test_dfromui:
+; EFPU2:    bl __floatunsidf
 entry:
   %v = uitofp i32 %a to double
   ret double %v
 }
 
 define double @test_dfromsi(i32 %a) {
-; CHECK-LABEL: test_dfromsi:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    efdcfsi 4, 3
-; CHECK-NEXT:    evmergehi 3, 4, 4
-; CHECK-NEXT:    # kill: def $r4 killed $r4 killed $s4
-; CHECK-NEXT:    # kill: def $r3 killed $r3 killed $s3
-; CHECK-NEXT:    blr
+; SPE-LABEL: test_dfromsi:
+; SPE:       # %bb.0: # %entry
+; SPE-NEXT:    efdcfsi 4, 3
+; SPE-NEXT:    evmergehi 3, 4, 4
+; SPE-NEXT:    # kill: def $r4 killed $r4 killed $s4
+; SPE-NEXT:    # kill: def $r3 killed $r3 killed $s3
+; SPE-NEXT:    blr
+;
+; EFPU2-LABEL: test_dfromsi:
+; EFPU2:    bl __floatsidf
 entry:
   %v = sitofp i32 %a to double
   ret double %v
 }
 
-define i32 @test_dasmconst(double %x) {
-; CHECK-LABEL: test_dasmconst:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    stwu 1, -16(1)
-; CHECK-NEXT:    .cfi_def_cfa_offset 16
-; CHECK-NEXT:    evmergelo 3, 3, 4
-; CHECK-NEXT:    evstdd 3, 8(1)
-; CHECK-NEXT:    #APP
-; CHECK-NEXT:    efdctsi 3, 3
-; CHECK-NEXT:    #NO_APP
-; CHECK-NEXT:    addi 1, 1, 16
-; CHECK-NEXT:    blr
-entry:
-  %x.addr = alloca double, align 8
-  store double %x, double* %x.addr, align 8
-  %0 = load double, double* %x.addr, align 8
-  %1 = call i32 asm sideeffect "efdctsi $0, $1", "=d,d"(double %0)
-  ret i32 %1
-}
-
 declare double @test_spill_spe_regs(double, double);
 define dso_local void @test_func2() #0 {
-; CHECK-LABEL: test_func2:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    blr
+; SPE-LABEL: test_func2:
+; SPE:       # %bb.0: # %entry
+; SPE-NEXT:    blr
+;
+; EFPU2-LABEL: test_func2:
+; EFPU2:       # %bb.0: # %entry
+; EFPU2-NEXT:    blr
 entry:
   ret void
 }
@@ -1223,120 +1332,174 @@
 declare void @test_memset(i8* nocapture writeonly, i8, i32, i1)
 @global_var1 = global i32 0, align 4
 define double @test_spill(double %a, i32 %a1, i64 %a2, i8 * %a3, i32 *%a4, i32* %a5) nounwind {
-; CHECK-LABEL: test_spill:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    mflr 0
-; CHECK-NEXT:    stw 0, 4(1)
-; CHECK-NEXT:    stwu 1, -352(1)
-; CHECK-NEXT:    li 5, 256
-; CHECK-NEXT:    evstddx 30, 1, 5 # 8-byte Folded Spill
-; CHECK-NEXT:    li 5, 264
-; CHECK-NEXT:    evstddx 31, 1, 5 # 8-byte Folded Spill
-; CHECK-NEXT:    li 5, .LCPI56_0@l
-; CHECK-NEXT:    lis 6, .LCPI56_0@ha
-; CHECK-NEXT:    evlddx 5, 6, 5
-; CHECK-NEXT:    stw 14, 280(1) # 4-byte Folded Spill
-; CHECK-NEXT:    stw 15, 284(1) # 4-byte Folded Spill
-; CHECK-NEXT:    stw 16, 288(1) # 4-byte Folded Spill
-; CHECK-NEXT:    stw 17, 292(1) # 4-byte Folded Spill
-; CHECK-NEXT:    stw 18, 296(1) # 4-byte Folded Spill
-; CHECK-NEXT:    stw 19, 300(1) # 4-byte Folded Spill
-; CHECK-NEXT:    stw 20, 304(1) # 4-byte Folded Spill
-; CHECK-NEXT:    stw 21, 308(1) # 4-byte Folded Spill
-; CHECK-NEXT:    stw 22, 312(1) # 4-byte Folded Spill
-; CHECK-NEXT:    stw 23, 316(1) # 4-byte Folded Spill
-; CHECK-NEXT:    stw 24, 320(1) # 4-byte Folded Spill
-; CHECK-NEXT:    stw 25, 324(1) # 4-byte Folded Spill
-; CHECK-NEXT:    stw 26, 328(1) # 4-byte Folded Spill
-; CHECK-NEXT:    stw 27, 332(1) # 4-byte Folded Spill
-; CHECK-NEXT:    stw 28, 336(1) # 4-byte Folded Spill
-; CHECK-NEXT:    stw 29, 340(1) # 4-byte Folded Spill
-; CHECK-NEXT:    stw 30, 344(1) # 4-byte Folded Spill
-; CHECK-NEXT:    stw 31, 348(1) # 4-byte Folded Spill
-; CHECK-NEXT:    evstdd 14, 128(1) # 8-byte Folded Spill
-; CHECK-NEXT:    evstdd 15, 136(1) # 8-byte Folded Spill
-; CHECK-NEXT:    evstdd 16, 144(1) # 8-byte Folded Spill
-; CHECK-NEXT:    evstdd 17, 152(1) # 8-byte Folded Spill
-; CHECK-NEXT:    evstdd 18, 160(1) # 8-byte Folded Spill
-; CHECK-NEXT:    evstdd 19, 168(1) # 8-byte Folded Spill
-; CHECK-NEXT:    evstdd 20, 176(1) # 8-byte Folded Spill
-; CHECK-NEXT:    evstdd 21, 184(1) # 8-byte Folded Spill
-; CHECK-NEXT:    evstdd 22, 192(1) # 8-byte Folded Spill
-; CHECK-NEXT:    evstdd 23, 200(1) # 8-byte Folded Spill
-; CHECK-NEXT:    evstdd 24, 208(1) # 8-byte Folded Spill
-; CHECK-NEXT:    evstdd 25, 216(1) # 8-byte Folded Spill
-; CHECK-NEXT:    evstdd 26, 224(1) # 8-byte Folded Spill
-; CHECK-NEXT:    evstdd 27, 232(1) # 8-byte Folded Spill
-; CHECK-NEXT:    evstdd 28, 240(1) # 8-byte Folded Spill
-; CHECK-NEXT:    evstdd 29, 248(1) # 8-byte Folded Spill
-; CHECK-NEXT:    evmergelo 3, 3, 4
-; CHECK-NEXT:    lwz 4, 360(1)
-; CHECK-NEXT:    efdadd 3, 3, 3
-; CHECK-NEXT:    efdadd 3, 3, 5
-; CHECK-NEXT:    evstdd 3, 24(1) # 8-byte Folded Spill
-; CHECK-NEXT:    stw 4, 20(1) # 4-byte Folded Spill
-; CHECK-NEXT:    #APP
-; CHECK-NEXT:    #NO_APP
-; CHECK-NEXT:    addi 3, 1, 76
-; CHECK-NEXT:    li 4, 0
-; CHECK-NEXT:    li 5, 24
-; CHECK-NEXT:    li 6, 1
-; CHECK-NEXT:    li 30, 0
-; CHECK-NEXT:    bl test_memset
-; CHECK-NEXT:    lwz 3, 20(1) # 4-byte Folded Reload
-; CHECK-NEXT:    stw 30, 0(3)
-; CHECK-NEXT:    bl test_func2
-; CHECK-NEXT:    addi 3, 1, 32
-; CHECK-NEXT:    li 4, 0
-; CHECK-NEXT:    li 5, 20
-; CHECK-NEXT:    li 6, 1
-; CHECK-NEXT:    bl test_memset
-; CHECK-NEXT:    evldd 4, 24(1) # 8-byte Folded Reload
-; CHECK-NEXT:    li 5, 264
-; CHECK-NEXT:    evmergehi 3, 4, 4
-; CHECK-NEXT:    evlddx 31, 1, 5 # 8-byte Folded Reload
-; CHECK-NEXT:    li 5, 256
-; CHECK-NEXT:    evlddx 30, 1, 5 # 8-byte Folded Reload
-; CHECK-NEXT:    # kill: def $r3 killed $r3 killed $s3
-; CHECK-NEXT:    # kill: def $r4 killed $r4 killed $s4
-; CHECK-NEXT:    evldd 29, 248(1) # 8-byte Folded Reload
-; CHECK-NEXT:    evldd 28, 240(1) # 8-byte Folded Reload
-; CHECK-NEXT:    evldd 27, 232(1) # 8-byte Folded Reload
-; CHECK-NEXT:    evldd 26, 224(1) # 8-byte Folded Reload
-; CHECK-NEXT:    evldd 25, 216(1) # 8-byte Folded Reload
-; CHECK-NEXT:    evldd 24, 208(1) # 8-byte Folded Reload
-; CHECK-NEXT:    evldd 23, 200(1) # 8-byte Folded Reload
-; CHECK-NEXT:    evldd 22, 192(1) # 8-byte Folded Reload
-; CHECK-NEXT:    evldd 21, 184(1) # 8-byte Folded Reload
-; CHECK-NEXT:    evldd 20, 176(1) # 8-byte Folded Reload
-; CHECK-NEXT:    evldd 19, 168(1) # 8-byte Folded Reload
-; CHECK-NEXT:    evldd 18, 160(1) # 8-byte Folded Reload
-; CHECK-NEXT:    evldd 17, 152(1) # 8-byte Folded Reload
-; CHECK-NEXT:    evldd 16, 144(1) # 8-byte Folded Reload
-; CHECK-NEXT:    evldd 15, 136(1) # 8-byte Folded Reload
-; CHECK-NEXT:    evldd 14, 128(1) # 8-byte Folded Reload
-; CHECK-NEXT:    lwz 31, 348(1) # 4-byte Folded Reload
-; CHECK-NEXT:    lwz 30, 344(1) # 4-byte Folded Reload
-; CHECK-NEXT:    lwz 29, 340(1) # 4-byte Folded Reload
-; CHECK-NEXT:    lwz 28, 336(1) # 4-byte Folded Reload
-; CHECK-NEXT:    lwz 27, 332(1) # 4-byte Folded Reload
-; CHECK-NEXT:    lwz 26, 328(1) # 4-byte Folded Reload
-; CHECK-NEXT:    lwz 25, 324(1) # 4-byte Folded Reload
-; CHECK-NEXT:    lwz 24, 320(1) # 4-byte Folded Reload
-; CHECK-NEXT:    lwz 23, 316(1) # 4-byte Folded Reload
-; CHECK-NEXT:    lwz 22, 312(1) # 4-byte Folded Reload
-; CHECK-NEXT:    lwz 21, 308(1) # 4-byte Folded Reload
-; CHECK-NEXT:    lwz 20, 304(1) # 4-byte Folded Reload
-; CHECK-NEXT:    lwz 19, 300(1) # 4-byte Folded Reload
-; CHECK-NEXT:    lwz 18, 296(1) # 4-byte Folded Reload
-; CHECK-NEXT:    lwz 17, 292(1) # 4-byte Folded Reload
-; CHECK-NEXT:    lwz 16, 288(1) # 4-byte Folded Reload
-; CHECK-NEXT:    lwz 15, 284(1) # 4-byte Folded Reload
-; CHECK-NEXT:    lwz 14, 280(1) # 4-byte Folded Reload
-; CHECK-NEXT:    lwz 0, 356(1)
-; CHECK-NEXT:    addi 1, 1, 352
-; CHECK-NEXT:    mtlr 0
-; CHECK-NEXT:    blr
+; SPE-LABEL: test_spill:
+; SPE:       # %bb.0: # %entry
+; SPE-NEXT:    mflr 0
+; SPE-NEXT:    stw 0, 4(1)
+; SPE-NEXT:    stwu 1, -352(1)
+; SPE-NEXT:    li 5, 256
+; SPE-NEXT:    evstddx 30, 1, 5 # 8-byte Folded Spill
+; SPE-NEXT:    li 5, 264
+; SPE-NEXT:    evstddx 31, 1, 5 # 8-byte Folded Spill
+; SPE-NEXT:    li 5, .LCPI29_0@l
+; SPE-NEXT:    lis 6, .LCPI29_0@ha
+; SPE-NEXT:    evlddx 5, 6, 5
+; SPE-NEXT:    stw 14, 280(1) # 4-byte Folded Spill
+; SPE-NEXT:    stw 15, 284(1) # 4-byte Folded Spill
+; SPE-NEXT:    stw 16, 288(1) # 4-byte Folded Spill
+; SPE-NEXT:    stw 17, 292(1) # 4-byte Folded Spill
+; SPE-NEXT:    stw 18, 296(1) # 4-byte Folded Spill
+; SPE-NEXT:    stw 19, 300(1) # 4-byte Folded Spill
+; SPE-NEXT:    stw 20, 304(1) # 4-byte Folded Spill
+; SPE-NEXT:    stw 21, 308(1) # 4-byte Folded Spill
+; SPE-NEXT:    stw 22, 312(1) # 4-byte Folded Spill
+; SPE-NEXT:    stw 23, 316(1) # 4-byte Folded Spill
+; SPE-NEXT:    stw 24, 320(1) # 4-byte Folded Spill
+; SPE-NEXT:    stw 25, 324(1) # 4-byte Folded Spill
+; SPE-NEXT:    stw 26, 328(1) # 4-byte Folded Spill
+; SPE-NEXT:    stw 27, 332(1) # 4-byte Folded Spill
+; SPE-NEXT:    stw 28, 336(1) # 4-byte Folded Spill
+; SPE-NEXT:    stw 29, 340(1) # 4-byte Folded Spill
+; SPE-NEXT:    stw 30, 344(1) # 4-byte Folded Spill
+; SPE-NEXT:    stw 31, 348(1) # 4-byte Folded Spill
+; SPE-NEXT:    evstdd 14, 128(1) # 8-byte Folded Spill
+; SPE-NEXT:    evstdd 15, 136(1) # 8-byte Folded Spill
+; SPE-NEXT:    evstdd 16, 144(1) # 8-byte Folded Spill
+; SPE-NEXT:    evstdd 17, 152(1) # 8-byte Folded Spill
+; SPE-NEXT:    evstdd 18, 160(1) # 8-byte Folded Spill
+; SPE-NEXT:    evstdd 19, 168(1) # 8-byte Folded Spill
+; SPE-NEXT:    evstdd 20, 176(1) # 8-byte Folded Spill
+; SPE-NEXT:    evstdd 21, 184(1) # 8-byte Folded Spill
+; SPE-NEXT:    evstdd 22, 192(1) # 8-byte Folded Spill
+; SPE-NEXT:    evstdd 23, 200(1) # 8-byte Folded Spill
+; SPE-NEXT:    evstdd 24, 208(1) # 8-byte Folded Spill
+; SPE-NEXT:    evstdd 25, 216(1) # 8-byte Folded Spill
+; SPE-NEXT:    evstdd 26, 224(1) # 8-byte Folded Spill
+; SPE-NEXT:    evstdd 27, 232(1) # 8-byte Folded Spill
+; SPE-NEXT:    evstdd 28, 240(1) # 8-byte Folded Spill
+; SPE-NEXT:    evstdd 29, 248(1) # 8-byte Folded Spill
+; SPE-NEXT:    evmergelo 3, 3, 4
+; SPE-NEXT:    lwz 4, 360(1)
+; SPE-NEXT:    efdadd 3, 3, 3
+; SPE-NEXT:    efdadd 3, 3, 5
+; SPE-NEXT:    evstdd 3, 24(1) # 8-byte Folded Spill
+; SPE-NEXT:    stw 4, 20(1) # 4-byte Folded Spill
+; SPE-NEXT:    #APP
+; SPE-NEXT:    #NO_APP
+; SPE-NEXT:    addi 3, 1, 76
+; SPE-NEXT:    li 4, 0
+; SPE-NEXT:    li 5, 24
+; SPE-NEXT:    li 6, 1
+; SPE-NEXT:    li 30, 0
+; SPE-NEXT:    bl test_memset
+; SPE-NEXT:    lwz 3, 20(1) # 4-byte Folded Reload
+; SPE-NEXT:    stw 30, 0(3)
+; SPE-NEXT:    bl test_func2
+; SPE-NEXT:    addi 3, 1, 32
+; SPE-NEXT:    li 4, 0
+; SPE-NEXT:    li 5, 20
+; SPE-NEXT:    li 6, 1
+; SPE-NEXT:    bl test_memset
+; SPE-NEXT:    evldd 4, 24(1) # 8-byte Folded Reload
+; SPE-NEXT:    li 5, 264
+; SPE-NEXT:    evmergehi 3, 4, 4
+; SPE-NEXT:    evlddx 31, 1, 5 # 8-byte Folded Reload
+; SPE-NEXT:    li 5, 256
+; SPE-NEXT:    evlddx 30, 1, 5 # 8-byte Folded Reload
+; SPE-NEXT:    # kill: def $r3 killed $r3 killed $s3
+; SPE-NEXT:    # kill: def $r4 killed $r4 killed $s4
+; SPE-NEXT:    evldd 29, 248(1) # 8-byte Folded Reload
+; SPE-NEXT:    evldd 28, 240(1) # 8-byte Folded Reload
+; SPE-NEXT:    evldd 27, 232(1) # 8-byte Folded Reload
+; SPE-NEXT:    evldd 26, 224(1) # 8-byte Folded Reload
+; SPE-NEXT:    evldd 25, 216(1) # 8-byte Folded Reload
+; SPE-NEXT:    evldd 24, 208(1) # 8-byte Folded Reload
+; SPE-NEXT:    evldd 23, 200(1) # 8-byte Folded Reload
+; SPE-NEXT:    evldd 22, 192(1) # 8-byte Folded Reload
+; SPE-NEXT:    evldd 21, 184(1) # 8-byte Folded Reload
+; SPE-NEXT:    evldd 20, 176(1) # 8-byte Folded Reload
+; SPE-NEXT:    evldd 19, 168(1) # 8-byte Folded Reload
+; SPE-NEXT:    evldd 18, 160(1) # 8-byte Folded Reload
+; SPE-NEXT:    evldd 17, 152(1) # 8-byte Folded Reload
+; SPE-NEXT:    evldd 16, 144(1) # 8-byte Folded Reload
+; SPE-NEXT:    evldd 15, 136(1) # 8-byte Folded Reload
+; SPE-NEXT:    evldd 14, 128(1) # 8-byte Folded Reload
+; SPE-NEXT:    lwz 31, 348(1) # 4-byte Folded Reload
+; SPE-NEXT:    lwz 30, 344(1) # 4-byte Folded Reload
+; SPE-NEXT:    lwz 29, 340(1) # 4-byte Folded Reload
+; SPE-NEXT:    lwz 28, 336(1) # 4-byte Folded Reload
+; SPE-NEXT:    lwz 27, 332(1) # 4-byte Folded Reload
+; SPE-NEXT:    lwz 26, 328(1) # 4-byte Folded Reload
+; SPE-NEXT:    lwz 25, 324(1) # 4-byte Folded Reload
+; SPE-NEXT:    lwz 24, 320(1) # 4-byte Folded Reload
+; SPE-NEXT:    lwz 23, 316(1) # 4-byte Folded Reload
+; SPE-NEXT:    lwz 22, 312(1) # 4-byte Folded Reload
+; SPE-NEXT:    lwz 21, 308(1) # 4-byte Folded Reload
+; SPE-NEXT:    lwz 20, 304(1) # 4-byte Folded Reload
+; SPE-NEXT:    lwz 19, 300(1) # 4-byte Folded Reload
+; SPE-NEXT:    lwz 18, 296(1) # 4-byte Folded Reload
+; SPE-NEXT:    lwz 17, 292(1) # 4-byte Folded Reload
+; SPE-NEXT:    lwz 16, 288(1) # 4-byte Folded Reload
+; SPE-NEXT:    lwz 15, 284(1) # 4-byte Folded Reload
+; SPE-NEXT:    lwz 14, 280(1) # 4-byte Folded Reload
+; SPE-NEXT:    lwz 0, 356(1)
+; SPE-NEXT:    addi 1, 1, 352
+; SPE-NEXT:    mtlr 0
+; SPE-NEXT:    blr
+;
+; EFPU2-LABEL: test_spill:
+; EFPU2:       # %bb.0: # %entry
+; EFPU2-NEXT:    mflr 0
+; EFPU2-NEXT:    stw 0, 4(1)
+; EFPU2-NEXT:    stwu 1, -176(1)
+; EFPU2-NEXT:    mr	5, 3
+; EFPU2-NEXT:    mr	6, 4
+; EFPU2-NEXT:    stw 27, 156(1) # 4-byte Folded Spill
+; EFPU2-NEXT:    stw 28, 160(1) # 4-byte Folded Spill
+; EFPU2-NEXT:    stw 29, 164(1) # 4-byte Folded Spill
+; EFPU2-NEXT:    stw 30, 168(1) # 4-byte Folded Spill
+; EFPU2-NEXT:    evstdd 27, 104(1) # 8-byte Folded Spill
+; EFPU2-NEXT:    evstdd 28, 112(1) # 8-byte Folded Spill
+; EFPU2-NEXT:    evstdd 29, 120(1) # 8-byte Folded Spill
+; EFPU2-NEXT:    evstdd 30, 128(1) # 8-byte Folded Spill
+; EFPU2-NEXT:    lwz 28, 184(1)
+; EFPU2-NEXT:    bl __adddf3
+; EFPU2-NEXT:    lis 5, 16393
+; EFPU2-NEXT:    lis 6, -4069
+; EFPU2-NEXT:    ori 5, 5, 8697
+; EFPU2-NEXT:    ori 6, 6, 34414
+; EFPU2-NEXT:    #APP
+; EFPU2-NEXT:    #NO_APP
+; EFPU2-NEXT:    bl __adddf3
+; EFPU2-NEXT:    mr	30, 3
+; EFPU2-NEXT:    mr	29, 4
+; EFPU2-NEXT:    addi 3, 1, 52
+; EFPU2-NEXT:    li 4, 0
+; EFPU2-NEXT:    li 5, 24
+; EFPU2-NEXT:    li 6, 1
+; EFPU2-NEXT:    li 27, 0
+; EFPU2-NEXT:    bl test_memset
+; EFPU2-NEXT:    stw 27, 0(28)
+; EFPU2-NEXT:    bl test_func2
+; EFPU2-NEXT:    addi 3, 1, 8
+; EFPU2-NEXT:    li 4, 0
+; EFPU2-NEXT:    li 5, 20
+; EFPU2-NEXT:    li 6, 1
+; EFPU2-NEXT:    bl test_memset
+; EFPU2-NEXT:    mr	3, 30
+; EFPU2-NEXT:    mr	4, 29
+; EFPU2-NEXT:    evldd 30, 128(1) # 8-byte Folded Reload
+; EFPU2-NEXT:    evldd 29, 120(1) # 8-byte Folded Reload
+; EFPU2-NEXT:    evldd 28, 112(1) # 8-byte Folded Reload
+; EFPU2-NEXT:    evldd 27, 104(1) # 8-byte Folded Reload
+; EFPU2-NEXT:    lwz 30, 168(1) # 4-byte Folded Reload
+; EFPU2-NEXT:    lwz 29, 164(1) # 4-byte Folded Reload
+; EFPU2-NEXT:    lwz 28, 160(1) # 4-byte Folded Reload
+; EFPU2-NEXT:    lwz 27, 156(1) # 4-byte Folded Reload
+; EFPU2-NEXT:    lwz 0, 180(1)
+; EFPU2-NEXT:    addi 1, 1, 176
+; EFPU2-NEXT:    mtlr 0
+; EFPU2-NEXT:    blr
 entry:
   %v1 = alloca [13 x i32], align 4
   %v2 = alloca [11 x i32], align 4
@@ -1357,49 +1520,93 @@
 }
 
 define dso_local float @test_fma(i32 %d) local_unnamed_addr #0 {
-; CHECK-LABEL: test_fma:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    mflr 0
-; CHECK-NEXT:    stw 0, 4(1)
-; CHECK-NEXT:    stwu 1, -48(1)
-; CHECK-NEXT:    .cfi_def_cfa_offset 48
-; CHECK-NEXT:    .cfi_offset lr, 4
-; CHECK-NEXT:    .cfi_offset r29, -12
-; CHECK-NEXT:    .cfi_offset r30, -8
-; CHECK-NEXT:    .cfi_offset r29, -40
-; CHECK-NEXT:    .cfi_offset r30, -32
-; CHECK-NEXT:    cmpwi 3, 1
-; CHECK-NEXT:    stw 29, 36(1) # 4-byte Folded Spill
-; CHECK-NEXT:    stw 30, 40(1) # 4-byte Folded Spill
-; CHECK-NEXT:    evstdd 29, 8(1) # 8-byte Folded Spill
-; CHECK-NEXT:    evstdd 30, 16(1) # 8-byte Folded Spill
-; CHECK-NEXT:    blt 0, .LBB57_3
-; CHECK-NEXT:  # %bb.1: # %for.body.preheader
-; CHECK-NEXT:    mr 30, 3
-; CHECK-NEXT:    li 29, 0
-; CHECK-NEXT:    # implicit-def: $r5
-; CHECK-NEXT:  .LBB57_2: # %for.body
-; CHECK-NEXT:    #
-; CHECK-NEXT:    efscfsi 3, 29
-; CHECK-NEXT:    mr 4, 3
-; CHECK-NEXT:    bl fmaf
-; CHECK-NEXT:    addi 29, 29, 1
-; CHECK-NEXT:    cmplw 30, 29
-; CHECK-NEXT:    mr 5, 3
-; CHECK-NEXT:    bne 0, .LBB57_2
-; CHECK-NEXT:    b .LBB57_4
-; CHECK-NEXT:  .LBB57_3:
-; CHECK-NEXT:    # implicit-def: $r5
-; CHECK-NEXT:  .LBB57_4: # %for.cond.cleanup
-; CHECK-NEXT:    evldd 30, 16(1) # 8-byte Folded Reload
-; CHECK-NEXT:    mr 3, 5
-; CHECK-NEXT:    evldd 29, 8(1) # 8-byte Folded Reload
-; CHECK-NEXT:    lwz 30, 40(1) # 4-byte Folded Reload
-; CHECK-NEXT:    lwz 29, 36(1) # 4-byte Folded Reload
-; CHECK-NEXT:    lwz 0, 52(1)
-; CHECK-NEXT:    addi 1, 1, 48
-; CHECK-NEXT:    mtlr 0
-; CHECK-NEXT:    blr
+; SPE-LABEL: test_fma:
+; SPE:       # %bb.0: # %entry
+; SPE-NEXT:    mflr 0
+; SPE-NEXT:    stw 0, 4(1)
+; SPE-NEXT:    stwu 1, -48(1)
+; SPE-NEXT:    .cfi_def_cfa_offset 48
+; SPE-NEXT:    .cfi_offset lr, 4
+; SPE-NEXT:    .cfi_offset r29, -12
+; SPE-NEXT:    .cfi_offset r30, -8
+; SPE-NEXT:    .cfi_offset r29, -40
+; SPE-NEXT:    .cfi_offset r30, -32
+; SPE-NEXT:    cmpwi 3, 1
+; SPE-NEXT:    stw 29, 36(1) # 4-byte Folded Spill
+; SPE-NEXT:    stw 30, 40(1) # 4-byte Folded Spill
+; SPE-NEXT:    evstdd 29, 8(1) # 8-byte Folded Spill
+; SPE-NEXT:    evstdd 30, 16(1) # 8-byte Folded Spill
+; SPE-NEXT:    blt 0, .LBB30_3
+; SPE-NEXT:  # %bb.1: # %for.body.preheader
+; SPE-NEXT:    mr 30, 3
+; SPE-NEXT:    li 29, 0
+; SPE-NEXT:    # implicit-def: $r5
+; SPE-NEXT:  .LBB30_2: # %for.body
+; SPE-NEXT:    #
+; SPE-NEXT:    efscfsi 3, 29
+; SPE-NEXT:    mr 4, 3
+; SPE-NEXT:    bl fmaf
+; SPE-NEXT:    addi 29, 29, 1
+; SPE-NEXT:    cmplw 30, 29
+; SPE-NEXT:    mr 5, 3
+; SPE-NEXT:    bne 0, .LBB30_2
+; SPE-NEXT:    b .LBB30_4
+; SPE-NEXT:  .LBB30_3:
+; SPE-NEXT:    # implicit-def: $r5
+; SPE-NEXT:  .LBB30_4: # %for.cond.cleanup
+; SPE-NEXT:    evldd 30, 16(1) # 8-byte Folded Reload
+; SPE-NEXT:    mr 3, 5
+; SPE-NEXT:    evldd 29, 8(1) # 8-byte Folded Reload
+; SPE-NEXT:    lwz 30, 40(1) # 4-byte Folded Reload
+; SPE-NEXT:    lwz 29, 36(1) # 4-byte Folded Reload
+; SPE-NEXT:    lwz 0, 52(1)
+; SPE-NEXT:    addi 1, 1, 48
+; SPE-NEXT:    mtlr 0
+; SPE-NEXT:    blr
+;
+; EFPU2-LABEL: test_fma:
+; EFPU2:       # %bb.0: # %entry
+; EFPU2-NEXT:    mflr 0
+; EFPU2-NEXT:    stw 0, 4(1)
+; EFPU2-NEXT:    stwu 1, -48(1)
+; EFPU2-NEXT:    .cfi_def_cfa_offset 48
+; EFPU2-NEXT:    .cfi_offset lr, 4
+; EFPU2-NEXT:    .cfi_offset r29, -12
+; EFPU2-NEXT:    .cfi_offset r30, -8
+; EFPU2-NEXT:    .cfi_offset r29, -40
+; EFPU2-NEXT:    .cfi_offset r30, -32
+; EFPU2-NEXT:    cmpwi	3, 1
+; EFPU2-NEXT:    stw 29, 36(1) # 4-byte Folded Spill
+; EFPU2-NEXT:    stw 30, 40(1) # 4-byte Folded Spill
+; EFPU2-NEXT:    evstdd 29, 8(1) # 8-byte Folded Spill
+; EFPU2-NEXT:    evstdd 30, 16(1) # 8-byte Folded Spill
+; EFPU2-NEXT:    blt	0, .LBB30_3
+; EFPU2-NEXT:  # %bb.1: # %for.body.preheader
+; EFPU2-NEXT:    mr	30, 3
+; EFPU2-NEXT:    li 29, 0
+; EFPU2-NEXT:    # implicit-def: $r5
+; EFPU2-NEXT:  .LBB30_2: # %for.body
+; EFPU2-NEXT:    # =>This Inner Loop Header: Depth=1
+; EFPU2-NEXT:    efscfsi 3, 29
+; EFPU2-NEXT:    mr	4, 3
+; EFPU2-NEXT:    bl fmaf
+; EFPU2-NEXT:    addi 29, 29, 1
+; EFPU2-NEXT:    cmplw	30, 29
+; EFPU2-NEXT:    mr	5, 3
+; EFPU2-NEXT:    bne	0, .LBB30_2
+; EFPU2-NEXT:    b .LBB30_4
+; EFPU2-NEXT:  .LBB30_3:
+; EFPU2-NEXT:    # implicit-def: $r5
+; EFPU2-NEXT:  .LBB30_4: # %for.cond.cleanup
+; EFPU2-NEXT:    evldd 30, 16(1) # 8-byte Folded Reload
+; EFPU2-NEXT:    mr	3, 5
+; EFPU2-NEXT:    evldd 29, 8(1) # 8-byte Folded Reload
+; EFPU2-NEXT:    lwz 30, 40(1) # 4-byte Folded Reload
+; EFPU2-NEXT:    lwz 29, 36(1) # 4-byte Folded Reload
+; EFPU2-NEXT:    lwz 0, 52(1)
+; EFPU2-NEXT:    addi 1, 1, 48
+; EFPU2-NEXT:    mtlr 0
+; EFPU2-NEXT:    blr
 entry:
   %cmp8 = icmp sgt i32 %d, 0
   br i1 %cmp8, label %for.body, label %for.cond.cleanup
@@ -1426,49 +1633,106 @@
 %struct.a = type { float, float }
 
 define void @d(%struct.a* %e, %struct.a* %f) {
-; CHECK-LABEL: d:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    mflr 0
-; CHECK-NEXT:    stw 0, 4(1)
-; CHECK-NEXT:    stwu 1, -48(1)
-; CHECK-NEXT:    .cfi_def_cfa_offset 48
-; CHECK-NEXT:    .cfi_offset lr, 4
-; CHECK-NEXT:    .cfi_offset r29, -12
-; CHECK-NEXT:    .cfi_offset r30, -8
-; CHECK-NEXT:    .cfi_offset r29, -40
-; CHECK-NEXT:    .cfi_offset r30, -32
-; CHECK-NEXT:    lwz 4, 0(4)
-; CHECK-NEXT:    lwz 3, 0(3)
-; CHECK-NEXT:    stw 29, 36(1) # 4-byte Folded Spill
-; CHECK-NEXT:    evstdd 29, 8(1) # 8-byte Folded Spill
-; CHECK-NEXT:    efdcfs 29, 4
-; CHECK-NEXT:    stw 30, 40(1) # 4-byte Folded Spill
-; CHECK-NEXT:    mr 4, 29
-; CHECK-NEXT:    evstdd 30, 16(1) # 8-byte Folded Spill
-; CHECK-NEXT:    efdcfs 30, 3
-; CHECK-NEXT:    evmergehi 3, 29, 29
-; CHECK-NEXT:    mtctr 3
-; CHECK-NEXT:    # kill: def $r3 killed $r3 killed $s3
-; CHECK-NEXT:    bctrl
-; CHECK-NEXT:    evmergehi 3, 30, 30
-; CHECK-NEXT:    mr 4, 30
-; CHECK-NEXT:    mtctr 3
-; CHECK-NEXT:    # kill: def $r3 killed $r3 killed $s3
-; CHECK-NEXT:    bctrl
-; CHECK-NEXT:    li 3, .LCPI58_0@l
-; CHECK-NEXT:    lis 4, .LCPI58_0@ha
-; CHECK-NEXT:    evlddx 3, 4, 3
-; CHECK-NEXT:    evldd 30, 16(1) # 8-byte Folded Reload
-; CHECK-NEXT:    efdmul 3, 29, 3
-; CHECK-NEXT:    evldd 29, 8(1) # 8-byte Folded Reload
-; CHECK-NEXT:    efscfd 3, 3
-; CHECK-NEXT:    stw 3, 0(3)
-; CHECK-NEXT:    lwz 30, 40(1) # 4-byte Folded Reload
-; CHECK-NEXT:    lwz 29, 36(1) # 4-byte Folded Reload
-; CHECK-NEXT:    lwz 0, 52(1)
-; CHECK-NEXT:    addi 1, 1, 48
-; CHECK-NEXT:    mtlr 0
-; CHECK-NEXT:    blr
+; SPE-LABEL: d:
+; SPE:       # %bb.0: # %entry
+; SPE-NEXT:    mflr 0
+; SPE-NEXT:    stw 0, 4(1)
+; SPE-NEXT:    stwu 1, -48(1)
+; SPE-NEXT:    .cfi_def_cfa_offset 48
+; SPE-NEXT:    .cfi_offset lr, 4
+; SPE-NEXT:    .cfi_offset r29, -12
+; SPE-NEXT:    .cfi_offset r30, -8
+; SPE-NEXT:    .cfi_offset r29, -40
+; SPE-NEXT:    .cfi_offset r30, -32
+; SPE-NEXT:    lwz 4, 0(4)
+; SPE-NEXT:    lwz 3, 0(3)
+; SPE-NEXT:    stw 29, 36(1) # 4-byte Folded Spill
+; SPE-NEXT:    evstdd 29, 8(1) # 8-byte Folded Spill
+; SPE-NEXT:    efdcfs 29, 4
+; SPE-NEXT:    stw 30, 40(1) # 4-byte Folded Spill
+; SPE-NEXT:    mr 4, 29
+; SPE-NEXT:    evstdd 30, 16(1) # 8-byte Folded Spill
+; SPE-NEXT:    efdcfs 30, 3
+; SPE-NEXT:    evmergehi 3, 29, 29
+; SPE-NEXT:    mtctr 3
+; SPE-NEXT:    # kill: def $r3 killed $r3 killed $s3
+; SPE-NEXT:    bctrl
+; SPE-NEXT:    evmergehi 3, 30, 30
+; SPE-NEXT:    mr 4, 30
+; SPE-NEXT:    mtctr 3
+; SPE-NEXT:    # kill: def $r3 killed $r3 killed $s3
+; SPE-NEXT:    bctrl
+; SPE-NEXT:    li 3, .LCPI31_0@l
+; SPE-NEXT:    lis 4, .LCPI31_0@ha
+; SPE-NEXT:    evlddx 3, 4, 3
+; SPE-NEXT:    evldd 30, 16(1) # 8-byte Folded Reload
+; SPE-NEXT:    efdmul 3, 29, 3
+; SPE-NEXT:    evldd 29, 8(1) # 8-byte Folded Reload
+; SPE-NEXT:    efscfd 3, 3
+; SPE-NEXT:    stw 3, 0(3)
+; SPE-NEXT:    lwz 30, 40(1) # 4-byte Folded Reload
+; SPE-NEXT:    lwz 29, 36(1) # 4-byte Folded Reload
+; SPE-NEXT:    lwz 0, 52(1)
+; SPE-NEXT:    addi 1, 1, 48
+; SPE-NEXT:    mtlr 0
+; SPE-NEXT:    blr
+; EFPU2-LABEL: d:
+; EFPU2:       # %bb.0: # %entry
+; EFPU2-NEXT:    mflr 0
+; EFPU2-NEXT:    stw 0, 4(1)
+; EFPU2-NEXT:    stwu 1, -80(1)
+; EFPU2-NEXT:    .cfi_def_cfa_offset 80
+; EFPU2-NEXT:    .cfi_offset lr, 4
+; EFPU2-NEXT:    .cfi_offset r27, -20
+; EFPU2-NEXT:    .cfi_offset r28, -16
+; EFPU2-NEXT:    .cfi_offset r29, -12
+; EFPU2-NEXT:    .cfi_offset r30, -8
+; EFPU2-NEXT:    .cfi_offset r27, -72
+; EFPU2-NEXT:    .cfi_offset r28, -64
+; EFPU2-NEXT:    .cfi_offset r29, -56
+; EFPU2-NEXT:    .cfi_offset r30, -48
+; EFPU2-NEXT:    lwz 3, 0(3)
+; EFPU2-NEXT:    stw 27, 60(1)                           # 4-byte Folded Spill
+; EFPU2-NEXT:    stw 28, 64(1)                           # 4-byte Folded Spill
+; EFPU2-NEXT:    stw 29, 68(1)                           # 4-byte Folded Spill
+; EFPU2-NEXT:    stw 30, 72(1)                           # 4-byte Folded Spill
+; EFPU2-NEXT:    evstdd 27, 8(1)                         # 8-byte Folded Spill
+; EFPU2-NEXT:    evstdd 28, 16(1)                        # 8-byte Folded Spill
+; EFPU2-NEXT:    evstdd 29, 24(1)                        # 8-byte Folded Spill
+; EFPU2-NEXT:    evstdd 30, 32(1)                        # 8-byte Folded Spill
+; EFPU2-NEXT:    mr	30, 4
+; EFPU2-NEXT:    bl __extendsfdf2
+; EFPU2-NEXT:    mr	29, 3
+; EFPU2-NEXT:    lwz 3, 0(30)
+; EFPU2-NEXT:    mr	28, 4
+; EFPU2-NEXT:    bl __extendsfdf2
+; EFPU2-NEXT:    mtctr 3
+; EFPU2-NEXT:    mr	30, 3
+; EFPU2-NEXT:    mr	27, 4
+; EFPU2-NEXT:    bctrl
+; EFPU2-NEXT:    mtctr 3
+; EFPU2-NEXT:    mr	3, 29
+; EFPU2-NEXT:    mr	4, 28
+; EFPU2-NEXT:    bctrl
+; EFPU2-NEXT:    mr	3, 30
+; EFPU2-NEXT:    mr	4, 27
+; EFPU2-NEXT:    li 5, 0
+; EFPU2-NEXT:    li 6, 0
+; EFPU2-NEXT:    bl __muldf3
+; EFPU2-NEXT:    bl __truncdfsf2
+; EFPU2-NEXT:    stw 3, 0(3)
+; EFPU2-NEXT:    evldd 30, 32(1)                         # 8-byte Folded Reload
+; EFPU2-NEXT:    evldd 29, 24(1)                         # 8-byte Folded Reload
+; EFPU2-NEXT:    evldd 28, 16(1)                         # 8-byte Folded Reload
+; EFPU2-NEXT:    evldd 27, 8(1)                          # 8-byte Folded Reload
+; EFPU2-NEXT:    lwz 30, 72(1)                           # 4-byte Folded Reload
+; EFPU2-NEXT:    lwz 29, 68(1)                           # 4-byte Folded Reload
+; EFPU2-NEXT:    lwz 28, 64(1)                           # 4-byte Folded Reload
+; EFPU2-NEXT:    lwz 27, 60(1)                           # 4-byte Folded Reload
+; EFPU2-NEXT:    lwz 0, 84(1)
+; EFPU2-NEXT:    addi 1, 1, 80
+; EFPU2-NEXT:    mtlr 0
+; EFPU2-NEXT:    blr
 entry:
   %0 = getelementptr %struct.a, %struct.a* %f, i32 0, i32 0
   %1 = load float, float* undef
@@ -1483,3 +1747,25 @@
   store float %l, float* undef
   ret void
 }
+
+;--- hwdouble.ll
+; split into separate file because the efd* instructions are invalid on efpu2
+define i32 @test_dasmconst(double %x) {
+; SPE-LABEL: test_dasmconst:
+; SPE:       # %bb.0: # %entry
+; SPE-NEXT:    stwu 1, -16(1)
+; SPE-NEXT:    .cfi_def_cfa_offset 16
+; SPE-NEXT:    evmergelo 3, 3, 4
+; SPE-NEXT:    evstdd 3, 8(1)
+; SPE-NEXT:    #APP
+; SPE-NEXT:    efdctsi 3, 3
+; SPE-NEXT:    #NO_APP
+; SPE-NEXT:    addi 1, 1, 16
+; SPE-NEXT:    blr
+entry:
+  %x.addr = alloca double, align 8
+  store double %x, double* %x.addr, align 8
+  %0 = load double, double* %x.addr, align 8
+  %1 = call i32 asm sideeffect "efdctsi $0, $1", "=d,d"(double %0)
+  ret i32 %1
+}
Index: llvm/lib/Target/PowerPC/PPCSubtarget.h
===================================================================
--- llvm/lib/Target/PowerPC/PPCSubtarget.h
+++ llvm/lib/Target/PowerPC/PPCSubtarget.h
@@ -100,6 +100,7 @@
   bool HasAltivec;
   bool HasFPU;
   bool HasSPE;
+  bool HasEFPU2;
   bool HasVSX;
   bool NeedsTwoConstNR;
   bool HasP8Vector;
@@ -260,6 +261,7 @@
   bool hasFPCVT() const { return HasFPCVT; }
   bool hasAltivec() const { return HasAltivec; }
   bool hasSPE() const { return HasSPE; }
+  bool hasEFPU2() const { return HasEFPU2; }
   bool hasFPU() const { return HasFPU; }
   bool hasVSX() const { return HasVSX; }
   bool needsTwoConstNR() const { return NeedsTwoConstNR; }
Index: llvm/lib/Target/PowerPC/PPCSubtarget.cpp
===================================================================
--- llvm/lib/Target/PowerPC/PPCSubtarget.cpp
+++ llvm/lib/Target/PowerPC/PPCSubtarget.cpp
@@ -77,6 +77,7 @@
   HasHardFloat = false;
   HasAltivec = false;
   HasSPE = false;
+  HasEFPU2 = false;
   HasFPU = false;
   HasVSX = false;
   NeedsTwoConstNR = false;
Index: llvm/lib/Target/PowerPC/PPCISelLowering.cpp
===================================================================
--- llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -151,7 +151,9 @@
   if (!useSoftFloat()) {
     if (hasSPE()) {
       addRegisterClass(MVT::f32, &PPC::GPRCRegClass);
-      addRegisterClass(MVT::f64, &PPC::SPERCRegClass);
+      // EFPU2 APU only supports f32
+      if (!Subtarget.hasEFPU2())
+        addRegisterClass(MVT::f64, &PPC::SPERCRegClass);
     } else {
       addRegisterClass(MVT::f32, &PPC::F4RCRegClass);
       addRegisterClass(MVT::f64, &PPC::F8RCRegClass);
Index: llvm/lib/Target/PowerPC/PPC.td
===================================================================
--- llvm/lib/Target/PowerPC/PPC.td
+++ llvm/lib/Target/PowerPC/PPC.td
@@ -72,6 +72,9 @@
 def FeatureSPE       : SubtargetFeature<"spe","HasSPE", "true",
                                         "Enable SPE instructions",
                                         [FeatureHardFloat]>;
+def FeatureEFPU2 : SubtargetFeature<"efpu2", "HasEFPU2", "true", 
+                                        "Enable Embedded Floating-Point APU 2 instructions",
+                                        [FeatureSPE]>;
 def FeatureMFOCRF    : SubtargetFeature<"mfocrf","HasMFOCRF", "true",
                                         "Enable the MFOCRF instruction">;
 def FeatureFSqrt     : SubtargetFeature<"fsqrt","HasFSQRT", "true",
Index: clang/include/clang/Driver/Options.td
===================================================================
--- clang/include/clang/Driver/Options.td
+++ clang/include/clang/Driver/Options.td
@@ -2600,6 +2600,7 @@
 def mno_pcrel: Flag<["-"], "mno-pcrel">, Group<m_ppc_Features_Group>;
 def mspe : Flag<["-"], "mspe">, Group<m_ppc_Features_Group>;
 def mno_spe : Flag<["-"], "mno-spe">, Group<m_ppc_Features_Group>;
+def mefpu2 : Flag<["-"], "mefpu2">, Group<m_ppc_Features_Group>;
 def mabi_EQ_vec_extabi : Flag<["-"], "mabi=vec-extabi">, Group<m_Group>, Flags<[CC1Option]>,
   HelpText<"Enable the extended Altivec ABI on AIX (AIX only). Uses volatile and nonvolatile vector registers">;
 def mabi_EQ_vec_default : Flag<["-"], "mabi=vec-default">, Group<m_Group>, Flags<[CC1Option]>,
Index: clang/docs/ClangCommandLineReference.rst
===================================================================
--- clang/docs/ClangCommandLineReference.rst
+++ clang/docs/ClangCommandLineReference.rst
@@ -3177,6 +3177,8 @@
 
 .. option:: -mspe, -mno-spe
 
+.. option:: -mefpu2, -mno-efpu2
+
 .. option:: -mvsx, -mno-vsx
 
 WebAssembly
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to