UPDATE: LLVM

Brad Smith Sun, 10 Feb 2019 23:06:46 -0800

Pull in 3 patches for the X86 backend..

- [X86] Add FPSW as a Def on some FP instructions that were missing it.
- [X86] Connect the default fpsr and dirflag clobbers in inline
  assembly to the registers we have defined for them.
- [X86] Add FPCW as a register and start using it as an implicit use on
  floating point instructions.


Resolves miscompilation of libm mentioned here..
https://marc.info/?l=openbsd-bugs&m=154896725914212&w=2


Index: Makefile
===================================================================
RCS file: /home/cvs/ports/devel/llvm/Makefile,v
retrieving revision 1.212
diff -u -p -u -p -r1.212 Makefile
--- Makefile    11 Feb 2019 05:33:57 -0000      1.212
+++ Makefile    11 Feb 2019 06:42:57 -0000
@@ -20,7 +20,7 @@ PKGSPEC-main =        llvm-=${LLVM_V}
 PKGNAME-main = llvm-${LLVM_V}
 PKGNAME-python =       py-llvm-${LLVM_V}
 PKGNAME-lldb = lldb-${LLVM_V}
-REVISION-main =        5
+REVISION-main =        6
 CATEGORIES =   devel
 DISTFILES =    llvm-${LLVM_V}.src${EXTRACT_SUFX} \
                cfe-${LLVM_V}.src${EXTRACT_SUFX} \
Index: patches/patch-lib_Target_X86_X86ISelLowering_cpp
===================================================================
RCS file: patches/patch-lib_Target_X86_X86ISelLowering_cpp
diff -N patches/patch-lib_Target_X86_X86ISelLowering_cpp
--- /dev/null   1 Jan 1970 00:00:00 -0000
+++ patches/patch-lib_Target_X86_X86ISelLowering_cpp    11 Feb 2019 06:19:24 
-0000
@@ -0,0 +1,29 @@
+$OpenBSD$
+
+[X86] Connect the default fpsr and dirflag clobbers in inline
+assembly to the registers we have defined for them.
+
+Index: lib/Target/X86/X86ISelLowering.cpp
+--- lib/Target/X86/X86ISelLowering.cpp.orig
++++ lib/Target/X86/X86ISelLowering.cpp
+@@ -40619,6 +40619,20 @@ X86TargetLowering::getRegForInlineAsmConstraint(const 
+       return Res;
+     }
+ 
++    // dirflag -> DF
++    if (StringRef("{dirflag}").equals_lower(Constraint)) {
++      Res.first = X86::DF;
++      Res.second = &X86::DFCCRRegClass;
++      return Res;
++    }
++
++    // fpsr -> FPSW
++    if (StringRef("{fpsr}").equals_lower(Constraint)) {
++      Res.first = X86::FPSW;
++      Res.second = &X86::FPCCRRegClass;
++      return Res;
++    }
++
+     // 'A' means [ER]AX + [ER]DX.
+     if (Constraint == "A") {
+       if (Subtarget.is64Bit()) {
Index: patches/patch-lib_Target_X86_X86InstrFPStack_td
===================================================================
RCS file: patches/patch-lib_Target_X86_X86InstrFPStack_td
diff -N patches/patch-lib_Target_X86_X86InstrFPStack_td
--- /dev/null   1 Jan 1970 00:00:00 -0000
+++ patches/patch-lib_Target_X86_X86InstrFPStack_td     11 Feb 2019 06:38:04 
-0000
@@ -0,0 +1,186 @@
+$OpenBSD$
+
+- [X86] Add FPSW as a Def on some FP instructions that were missing it.
+- [X86] Add FPCW as a register and start using it as an implicit use on 
floating
+  point instructions.
+
+Index: lib/Target/X86/X86InstrFPStack.td
+--- lib/Target/X86/X86InstrFPStack.td.orig
++++ lib/Target/X86/X86InstrFPStack.td
+@@ -230,7 +230,7 @@ def _FI32m  : FPI<0xDA, fp, (outs), (ins i32mem:$src),
+ } // mayLoad = 1, hasSideEffects = 1
+ }
+ 
+-let Defs = [FPSW] in {
++let Defs = [FPSW], Uses = [FPCW] in {
+ // FPBinary_rr just defines pseudo-instructions, no need to set a scheduling
+ // resources.
+ let hasNoSchedulingInfo = 1 in {
+@@ -267,7 +267,7 @@ class FPrST0PInst<Format fp, string asm>
+ // NOTE: GAS and apparently all other AT&T style assemblers have a broken 
notion
+ // of some of the 'reverse' forms of the fsub and fdiv instructions.  As such,
+ // we have to put some 'r's in and take them out of weird places.
+-let SchedRW = [WriteFAdd] in {
++let SchedRW = [WriteFAdd], Defs = [FPSW], Uses = [FPCW] in {
+ def ADD_FST0r   : FPST0rInst <MRM0r, "fadd\t$op">;
+ def ADD_FrST0   : FPrST0Inst <MRM0r, "fadd\t{%st(0), $op|$op, st(0)}">;
+ def ADD_FPrST0  : FPrST0PInst<MRM0r, "faddp\t$op">;
+@@ -278,16 +278,16 @@ def SUB_FST0r   : FPST0rInst <MRM4r, "fsub\t$op">;
+ def SUBR_FrST0  : FPrST0Inst <MRM4r, "fsub{|r}\t{%st(0), $op|$op, st(0)}">;
+ def SUBR_FPrST0 : FPrST0PInst<MRM4r, "fsub{|r}p\t$op">;
+ } // SchedRW
+-let SchedRW = [WriteFCom] in {
++let SchedRW = [WriteFCom], Defs = [FPSW], Uses = [FPCW] in {
+ def COM_FST0r   : FPST0rInst <MRM2r, "fcom\t$op">;
+ def COMP_FST0r  : FPST0rInst <MRM3r, "fcomp\t$op">;
+ } // SchedRW
+-let SchedRW = [WriteFMul] in {
++let SchedRW = [WriteFMul], Defs = [FPSW], Uses = [FPCW] in {
+ def MUL_FST0r   : FPST0rInst <MRM1r, "fmul\t$op">;
+ def MUL_FrST0   : FPrST0Inst <MRM1r, "fmul\t{%st(0), $op|$op, st(0)}">;
+ def MUL_FPrST0  : FPrST0PInst<MRM1r, "fmulp\t$op">;
+ } // SchedRW
+-let SchedRW = [WriteFDiv] in {
++let SchedRW = [WriteFDiv], Defs = [FPSW], Uses = [FPCW] in {
+ def DIVR_FST0r  : FPST0rInst <MRM7r, "fdivr\t$op">;
+ def DIV_FrST0   : FPrST0Inst <MRM7r, "fdiv{r}\t{%st(0), $op|$op, st(0)}">;
+ def DIV_FPrST0  : FPrST0PInst<MRM7r, "fdiv{r}p\t$op">;
+@@ -307,7 +307,7 @@ def _Fp80  : FpI_<(outs RFP80:$dst), (ins RFP80:$src),
+ def _F     : FPI<0xD9, fp, (outs), (ins), asmstring>;
+ }
+ 
+-let Defs = [FPSW] in {
++let Defs = [FPSW], Uses = [FPCW] in {
+ 
+ let SchedRW = [WriteFSign] in {
+ defm CHS : FPUnary<fneg, MRM_E0, "fchs">;
+@@ -335,7 +335,7 @@ def TST_F  : FPI<0xD9, MRM_E4, (outs), (ins), "ftst">;
+ 
+ // Versions of FP instructions that take a single memory operand.  Added for 
the
+ //   disassembler; remove as they are included with patterns elsewhere.
+-let SchedRW = [WriteFComLd] in {
++let SchedRW = [WriteFComLd], Defs = [FPSW], Uses = [FPCW] in {
+ def FCOM32m  : FPI<0xD8, MRM2m, (outs), (ins f32mem:$src), "fcom{s}\t$src">;
+ def FCOMP32m : FPI<0xD8, MRM3m, (outs), (ins f32mem:$src), "fcomp{s}\t$src">;
+ 
+@@ -454,7 +454,7 @@ def ILD_Fp64m80: FpI_<(outs RFP80:$dst), (ins i64mem:$
+                   [(set RFP80:$dst, (X86fild addr:$src, i64))]>;
+ } // SchedRW
+ 
+-let SchedRW = [WriteStore] in {
++let SchedRW = [WriteStore], Uses = [FPCW] in {
+ def ST_Fp32m   : FpIf32<(outs), (ins f32mem:$op, RFP32:$src), OneArgFP,
+                   [(store RFP32:$src, addr:$op)]>;
+ def ST_Fp64m32 : FpIf64<(outs), (ins f32mem:$op, RFP64:$src), OneArgFP,
+@@ -489,7 +489,7 @@ def IST_Fp16m80  : FpI_<(outs), (ins i16mem:$op, RFP80
+ def IST_Fp32m80  : FpI_<(outs), (ins i32mem:$op, RFP80:$src), OneArgFP, []>;
+ def IST_Fp64m80  : FpI_<(outs), (ins i64mem:$op, RFP80:$src), OneArgFP, []>;
+ } // mayStore
+-} // SchedRW
++} // SchedRW, Uses = [FPCW]
+ 
+ let mayLoad = 1, SchedRW = [WriteLoad] in {
+ def LD_F32m   : FPI<0xD9, MRM0m, (outs), (ins f32mem:$src), "fld{s}\t$src">;
+@@ -499,7 +499,7 @@ def ILD_F16m  : FPI<0xDF, MRM0m, (outs), (ins i16mem:$
+ def ILD_F32m  : FPI<0xDB, MRM0m, (outs), (ins i32mem:$src), "fild{l}\t$src">;
+ def ILD_F64m  : FPI<0xDF, MRM5m, (outs), (ins i64mem:$src), "fild{ll}\t$src">;
+ }
+-let mayStore = 1, SchedRW = [WriteStore] in {
++let mayStore = 1, SchedRW = [WriteStore], Uses = [FPCW] in {
+ def ST_F32m   : FPI<0xD9, MRM2m, (outs), (ins f32mem:$dst), "fst{s}\t$dst">;
+ def ST_F64m   : FPI<0xDD, MRM2m, (outs), (ins f64mem:$dst), "fst{l}\t$dst">;
+ def ST_FP32m  : FPI<0xD9, MRM3m, (outs), (ins f32mem:$dst), "fstp{s}\t$dst">;
+@@ -513,7 +513,7 @@ def IST_FP64m : FPI<0xDF, MRM7m, (outs), (ins i64mem:$
+ }
+ 
+ // FISTTP requires SSE3 even though it's a FPStack op.
+-let Predicates = [HasSSE3], SchedRW = [WriteStore] in {
++let Predicates = [HasSSE3], SchedRW = [WriteStore], Uses = [FPCW] in {
+ def ISTT_Fp16m32 : FpI_<(outs), (ins i16mem:$op, RFP32:$src), OneArgFP,
+                     [(X86fp_to_i16mem RFP32:$src, addr:$op)]>;
+ def ISTT_Fp32m32 : FpI_<(outs), (ins i32mem:$op, RFP32:$src), OneArgFP,
+@@ -534,7 +534,7 @@ def ISTT_Fp64m80 : FpI_<(outs), (ins i64mem:$op, RFP80
+                     [(X86fp_to_i64mem RFP80:$src, addr:$op)]>;
+ } // Predicates = [HasSSE3]
+ 
+-let mayStore = 1, SchedRW = [WriteStore] in {
++let mayStore = 1, SchedRW = [WriteStore], Uses = [FPCW] in {
+ def ISTT_FP16m : FPI<0xDF, MRM1m, (outs), (ins i16mem:$dst), 
"fisttp{s}\t$dst">;
+ def ISTT_FP32m : FPI<0xDB, MRM1m, (outs), (ins i32mem:$dst), 
"fisttp{l}\t$dst">;
+ def ISTT_FP64m : FPI<0xDD, MRM1m, (outs), (ins i64mem:$dst), 
"fisttp{ll}\t$dst">;
+@@ -570,7 +570,7 @@ def LD_F0 : FPI<0xD9, MRM_EE, (outs), (ins), "fldz">;
+ let SchedRW = [WriteFLD1] in
+ def LD_F1 : FPI<0xD9, MRM_E8, (outs), (ins), "fld1">;
+ 
+-let SchedRW = [WriteFLDC], Defs = [FPSW] in {
++let SchedRW = [WriteFLDC] in {
+ def FLDL2T : I<0xD9, MRM_E9, (outs), (ins), "fldl2t", []>;
+ def FLDL2E : I<0xD9, MRM_EA, (outs), (ins), "fldl2e", []>;
+ def FLDPI : I<0xD9, MRM_EB, (outs), (ins), "fldpi", []>;
+@@ -579,7 +579,7 @@ def FLDLN2 : I<0xD9, MRM_ED, (outs), (ins), "fldln2", 
+ } // SchedRW
+ 
+ // Floating point compares.
+-let SchedRW = [WriteFCom] in {
++let SchedRW = [WriteFCom], Uses = [FPCW] in {
+ def UCOM_Fpr32 : FpIf32<(outs), (ins RFP32:$lhs, RFP32:$rhs), CompareFP,
+                         [(set FPSW, (trunc (X86cmp RFP32:$lhs, 
RFP32:$rhs)))]>;
+ def UCOM_Fpr64 : FpIf64<(outs), (ins RFP64:$lhs, RFP64:$rhs), CompareFP,
+@@ -591,16 +591,19 @@ def UCOM_Fpr80 : FpI_  <(outs), (ins RFP80:$lhs, RFP80
+ 
+ let SchedRW = [WriteFCom] in {
+ // CC = ST(0) cmp ST(i)
+-let Defs = [EFLAGS, FPSW] in {
+-def UCOM_FpIr32: FpIf32<(outs), (ins RFP32:$lhs, RFP32:$rhs), CompareFP,
+-                  [(set EFLAGS, (X86cmp RFP32:$lhs, RFP32:$rhs))]>;
+-def UCOM_FpIr64: FpIf64<(outs), (ins RFP64:$lhs, RFP64:$rhs), CompareFP,
+-                  [(set EFLAGS, (X86cmp RFP64:$lhs, RFP64:$rhs))]>;
++let Defs = [EFLAGS, FPSW], Uses = [FPCW] in {
++def UCOM_FpIr32: FpI_<(outs), (ins RFP32:$lhs, RFP32:$rhs), CompareFP,
++                  [(set EFLAGS, (X86cmp RFP32:$lhs, RFP32:$rhs))]>,
++                  Requires<[FPStackf32, HasCMov]>;
++def UCOM_FpIr64: FpI_<(outs), (ins RFP64:$lhs, RFP64:$rhs), CompareFP,
++                  [(set EFLAGS, (X86cmp RFP64:$lhs, RFP64:$rhs))]>,
++                  Requires<[FPStackf64, HasCMov]>;
+ def UCOM_FpIr80: FpI_<(outs), (ins RFP80:$lhs, RFP80:$rhs), CompareFP,
+-                  [(set EFLAGS, (X86cmp RFP80:$lhs, RFP80:$rhs))]>;
++                  [(set EFLAGS, (X86cmp RFP80:$lhs, RFP80:$rhs))]>,
++                  Requires<[HasCMov]>;
+ }
+ 
+-let Defs = [FPSW], Uses = [ST0] in {
++let Defs = [FPSW], Uses = [ST0, FPCW] in {
+ def UCOM_Fr    : FPI<0xDD, MRM4r,    // FPSW = cmp ST(0) with ST(i)
+                     (outs), (ins RST:$reg), "fucom\t$reg">;
+ def UCOM_FPr   : FPI<0xDD, MRM5r,    // FPSW = cmp ST(0) with ST(i), pop
+@@ -609,14 +612,12 @@ def UCOM_FPPr  : FPI<0xDA, MRM_E9,       // cmp ST(0) 
+                     (outs), (ins), "fucompp">;
+ }
+ 
+-let Defs = [EFLAGS, FPSW], Uses = [ST0] in {
++let Defs = [EFLAGS, FPSW], Uses = [ST0, FPCW] in {
+ def UCOM_FIr   : FPI<0xDB, MRM5r,     // CC = cmp ST(0) with ST(i)
+                     (outs), (ins RST:$reg), "fucomi\t$reg">;
+ def UCOM_FIPr  : FPI<0xDF, MRM5r,     // CC = cmp ST(0) with ST(i), pop
+                     (outs), (ins RST:$reg), "fucompi\t$reg">;
+-}
+ 
+-let Defs = [EFLAGS, FPSW] in {
+ def COM_FIr : FPI<0xDB, MRM6r, (outs), (ins RST:$reg), "fcomi\t$reg">;
+ def COM_FIPr : FPI<0xDF, MRM6r, (outs), (ins RST:$reg), "fcompi\t$reg">;
+ }
+@@ -628,12 +629,12 @@ let Defs = [AX], Uses = [FPSW] in
+ def FNSTSW16r : I<0xDF, MRM_E0,                  // AX = fp flags
+                   (outs), (ins), "fnstsw\t{%ax|ax}",
+                   [(set AX, (X86fp_stsw FPSW))]>;
+-let Defs = [FPSW] in
++let Defs = [FPSW], Uses = [FPCW] in
+ def FNSTCW16m : I<0xD9, MRM7m,                   // [mem16] = X87 control 
world
+                   (outs), (ins i16mem:$dst), "fnstcw\t$dst",
+                   [(X86fp_cwd_get16 addr:$dst)]>;
+ } // SchedRW
+-let Defs = [FPSW], mayLoad = 1 in
++let Defs = [FPSW,FPCW], mayLoad = 1 in
+ def FLDCW16m  : I<0xD9, MRM5m,                   // X87 control world = 
[mem16]
+                   (outs), (ins i16mem:$dst), "fldcw\t$dst", []>,
+                 Sched<[WriteLoad]>;
Index: patches/patch-lib_Target_X86_X86RegisterInfo_cpp
===================================================================
RCS file: patches/patch-lib_Target_X86_X86RegisterInfo_cpp
diff -N patches/patch-lib_Target_X86_X86RegisterInfo_cpp
--- /dev/null   1 Jan 1970 00:00:00 -0000
+++ patches/patch-lib_Target_X86_X86RegisterInfo_cpp    11 Feb 2019 06:22:08 
-0000
@@ -0,0 +1,18 @@
+$OpenBSD$
+
+[X86] Add FPCW as a register and start using it as an implicit use on floating
+point instructions.
+
+Index: lib/Target/X86/X86RegisterInfo.cpp
+--- lib/Target/X86/X86RegisterInfo.cpp.orig
++++ lib/Target/X86/X86RegisterInfo.cpp
+@@ -497,6 +497,9 @@ BitVector X86RegisterInfo::getReservedRegs(const Machi
+   BitVector Reserved(getNumRegs());
+   const X86FrameLowering *TFI = getFrameLowering(MF);
+ 
++  // Set the floating point control register as reserved.
++  Reserved.set(X86::FPCW);
++
+   // Set the stack-pointer register and its aliases as reserved.
+   for (MCSubRegIterator I(X86::RSP, this, /*IncludeSelf=*/true); I.isValid();
+        ++I)
Index: patches/patch-lib_Target_X86_X86RegisterInfo_td
===================================================================
RCS file: 
/home/cvs/ports/devel/llvm/patches/patch-lib_Target_X86_X86RegisterInfo_td,v
retrieving revision 1.4
diff -u -p -u -p -r1.4 patch-lib_Target_X86_X86RegisterInfo_td
--- patches/patch-lib_Target_X86_X86RegisterInfo_td     28 Jan 2019 06:27:28 
-0000      1.4
+++ patches/patch-lib_Target_X86_X86RegisterInfo_td     11 Feb 2019 06:22:42 
-0000
@@ -1,29 +1,46 @@
 $OpenBSD: patch-lib_Target_X86_X86RegisterInfo_td,v 1.4 2019/01/28 06:27:28 
jca Exp $
 
-The compiler is generally free to allocate general purpose registers in
-whatever order it chooses. Reasons for choosing one register before another
-usually include compiled instruction size (avoidance of REX prefixes, etc.)
-or usage conventions, but somehow haven't included security implications in
-the compiled bytecode. Some bytecode is more useful in polymorphic ROP
-sequences than others, so it seems prudent to try to avoid that bytecode
-when possible.
+- [X86] Connect the default fpsr and dirflag clobbers in inline
+  assembly to the registers we have defined for them.
+- [X86] Add FPCW as a register and start using it as an implicit use on 
floating
+  point instructions.
+- The compiler is generally free to allocate general purpose registers in
+  whatever order it chooses. Reasons for choosing one register before another
+  usually include compiled instruction size (avoidance of REX prefixes, etc.)
+  or usage conventions, but somehow haven't included security implications in
+  the compiled bytecode. Some bytecode is more useful in polymorphic ROP
+  sequences than others, so it seems prudent to try to avoid that bytecode
+  when possible.
 
-This patch moves EBX/RBX towards the end of the allocation preference for 32
-and 64 bit general purpose registers. Some instructions using RBX/EBX/BX/BL
-as a destination register end up with a ModR/M byte of C3 or CB, which is often
-useful in ROP gadgets. Because these gadgets often occur in the middle of
-functions, they exhibit somewhat higher diversity than some other C3/CB
-terminated gadgets. This change removes about 3% of total gadgets from the
-kernel, but about 6% of unique gadgets.
+  This patch moves EBX/RBX towards the end of the allocation preference for 32
+  and 64 bit general purpose registers. Some instructions using RBX/EBX/BX/BL
+  as a destination register end up with a ModR/M byte of C3 or CB, which is 
often
+  useful in ROP gadgets. Because these gadgets often occur in the middle of
+  functions, they exhibit somewhat higher diversity than some other C3/CB
+  terminated gadgets. This change removes about 3% of total gadgets from the
+  kernel, but about 6% of unique gadgets.
 
-There are other possible changes in this direction. BX/BL are obvious next
-targets for avoidance, and MM3/XMM3 may also be useful to try to avoid if
-possible.
+  There are other possible changes in this direction. BX/BL are obvious next
+  targets for avoidance, and MM3/XMM3 may also be useful to try to avoid if
+  possible.
 
 Index: lib/Target/X86/X86RegisterInfo.td
 --- lib/Target/X86/X86RegisterInfo.td.orig
 +++ lib/Target/X86/X86RegisterInfo.td
-@@ -402,8 +402,8 @@ def GRH16 : RegisterClass<"X86", [i16], 16,
+@@ -288,8 +288,11 @@ def ST6 : X86Reg<"st(6)", 6>, DwarfRegNum<[39, 18, 17]
+ def ST7 : X86Reg<"st(7)", 7>, DwarfRegNum<[40, 19, 18]>;
+ 
+ // Floating-point status word
+-def FPSW : X86Reg<"fpsw", 0>;
++def FPSW : X86Reg<"fpsr", 0>;
+ 
++// Floating-point control word
++def FPCW : X86Reg<"fpcr", 0>;
++
+ // Status flags register.
+ //
+ // Note that some flags that are commonly thought of as part of the status
+@@ -402,8 +405,8 @@ def GRH16 : RegisterClass<"X86", [i16], 16,
                                 R15WH)>;
  
  def GR32 : RegisterClass<"X86", [i32], 32,
@@ -34,7 +51,7 @@ Index: lib/Target/X86/X86RegisterInfo.td
  
  // GR64 - 64-bit GPRs. This oddly includes RIP, which isn't accurate, since
  // RIP isn't really a register and it can't be used anywhere except in an
-@@ -412,7 +412,7 @@ def GR32 : RegisterClass<"X86", [i32], 32,
+@@ -412,7 +415,7 @@ def GR32 : RegisterClass<"X86", [i32], 32,
  // tests because of the inclusion of RIP in this register class.
  def GR64 : RegisterClass<"X86", [i64], 64,
                           (add RAX, RCX, RDX, RSI, RDI, R8, R9, R10, R11,

UPDATE: LLVM

Reply via email to