Pull in 3 patches for the X86 backend.. - [X86] Add FPSW as a Def on some FP instructions that were missing it. - [X86] Connect the default fpsr and dirflag clobbers in inline assembly to the registers we have defined for them. - [X86] Add FPCW as a register and start using it as an implicit use on floating point instructions.
Resolves miscompilation of libm mentioned here.. https://marc.info/?l=openbsd-bugs&m=154896725914212&w=2 Index: Makefile =================================================================== RCS file: /home/cvs/ports/devel/llvm/Makefile,v retrieving revision 1.212 diff -u -p -u -p -r1.212 Makefile --- Makefile 11 Feb 2019 05:33:57 -0000 1.212 +++ Makefile 11 Feb 2019 06:42:57 -0000 @@ -20,7 +20,7 @@ PKGSPEC-main = llvm-=${LLVM_V} PKGNAME-main = llvm-${LLVM_V} PKGNAME-python = py-llvm-${LLVM_V} PKGNAME-lldb = lldb-${LLVM_V} -REVISION-main = 5 +REVISION-main = 6 CATEGORIES = devel DISTFILES = llvm-${LLVM_V}.src${EXTRACT_SUFX} \ cfe-${LLVM_V}.src${EXTRACT_SUFX} \ Index: patches/patch-lib_Target_X86_X86ISelLowering_cpp =================================================================== RCS file: patches/patch-lib_Target_X86_X86ISelLowering_cpp diff -N patches/patch-lib_Target_X86_X86ISelLowering_cpp --- /dev/null 1 Jan 1970 00:00:00 -0000 +++ patches/patch-lib_Target_X86_X86ISelLowering_cpp 11 Feb 2019 06:19:24 -0000 @@ -0,0 +1,29 @@ +$OpenBSD$ + +[X86] Connect the default fpsr and dirflag clobbers in inline +assembly to the registers we have defined for them. + +Index: lib/Target/X86/X86ISelLowering.cpp +--- lib/Target/X86/X86ISelLowering.cpp.orig ++++ lib/Target/X86/X86ISelLowering.cpp +@@ -40619,6 +40619,20 @@ X86TargetLowering::getRegForInlineAsmConstraint(const + return Res; + } + ++ // dirflag -> DF ++ if (StringRef("{dirflag}").equals_lower(Constraint)) { ++ Res.first = X86::DF; ++ Res.second = &X86::DFCCRRegClass; ++ return Res; ++ } ++ ++ // fpsr -> FPSW ++ if (StringRef("{fpsr}").equals_lower(Constraint)) { ++ Res.first = X86::FPSW; ++ Res.second = &X86::FPCCRRegClass; ++ return Res; ++ } ++ + // 'A' means [ER]AX + [ER]DX. + if (Constraint == "A") { + if (Subtarget.is64Bit()) { Index: patches/patch-lib_Target_X86_X86InstrFPStack_td =================================================================== RCS file: patches/patch-lib_Target_X86_X86InstrFPStack_td diff -N patches/patch-lib_Target_X86_X86InstrFPStack_td --- /dev/null 1 Jan 1970 00:00:00 -0000 +++ patches/patch-lib_Target_X86_X86InstrFPStack_td 11 Feb 2019 06:38:04 -0000 @@ -0,0 +1,186 @@ +$OpenBSD$ + +- [X86] Add FPSW as a Def on some FP instructions that were missing it. +- [X86] Add FPCW as a register and start using it as an implicit use on floating + point instructions. + +Index: lib/Target/X86/X86InstrFPStack.td +--- lib/Target/X86/X86InstrFPStack.td.orig ++++ lib/Target/X86/X86InstrFPStack.td +@@ -230,7 +230,7 @@ def _FI32m : FPI<0xDA, fp, (outs), (ins i32mem:$src), + } // mayLoad = 1, hasSideEffects = 1 + } + +-let Defs = [FPSW] in { ++let Defs = [FPSW], Uses = [FPCW] in { + // FPBinary_rr just defines pseudo-instructions, no need to set a scheduling + // resources. + let hasNoSchedulingInfo = 1 in { +@@ -267,7 +267,7 @@ class FPrST0PInst<Format fp, string asm> + // NOTE: GAS and apparently all other AT&T style assemblers have a broken notion + // of some of the 'reverse' forms of the fsub and fdiv instructions. As such, + // we have to put some 'r's in and take them out of weird places. +-let SchedRW = [WriteFAdd] in { ++let SchedRW = [WriteFAdd], Defs = [FPSW], Uses = [FPCW] in { + def ADD_FST0r : FPST0rInst <MRM0r, "fadd\t$op">; + def ADD_FrST0 : FPrST0Inst <MRM0r, "fadd\t{%st(0), $op|$op, st(0)}">; + def ADD_FPrST0 : FPrST0PInst<MRM0r, "faddp\t$op">; +@@ -278,16 +278,16 @@ def SUB_FST0r : FPST0rInst <MRM4r, "fsub\t$op">; + def SUBR_FrST0 : FPrST0Inst <MRM4r, "fsub{|r}\t{%st(0), $op|$op, st(0)}">; + def SUBR_FPrST0 : FPrST0PInst<MRM4r, "fsub{|r}p\t$op">; + } // SchedRW +-let SchedRW = [WriteFCom] in { ++let SchedRW = [WriteFCom], Defs = [FPSW], Uses = [FPCW] in { + def COM_FST0r : FPST0rInst <MRM2r, "fcom\t$op">; + def COMP_FST0r : FPST0rInst <MRM3r, "fcomp\t$op">; + } // SchedRW +-let SchedRW = [WriteFMul] in { ++let SchedRW = [WriteFMul], Defs = [FPSW], Uses = [FPCW] in { + def MUL_FST0r : FPST0rInst <MRM1r, "fmul\t$op">; + def MUL_FrST0 : FPrST0Inst <MRM1r, "fmul\t{%st(0), $op|$op, st(0)}">; + def MUL_FPrST0 : FPrST0PInst<MRM1r, "fmulp\t$op">; + } // SchedRW +-let SchedRW = [WriteFDiv] in { ++let SchedRW = [WriteFDiv], Defs = [FPSW], Uses = [FPCW] in { + def DIVR_FST0r : FPST0rInst <MRM7r, "fdivr\t$op">; + def DIV_FrST0 : FPrST0Inst <MRM7r, "fdiv{r}\t{%st(0), $op|$op, st(0)}">; + def DIV_FPrST0 : FPrST0PInst<MRM7r, "fdiv{r}p\t$op">; +@@ -307,7 +307,7 @@ def _Fp80 : FpI_<(outs RFP80:$dst), (ins RFP80:$src), + def _F : FPI<0xD9, fp, (outs), (ins), asmstring>; + } + +-let Defs = [FPSW] in { ++let Defs = [FPSW], Uses = [FPCW] in { + + let SchedRW = [WriteFSign] in { + defm CHS : FPUnary<fneg, MRM_E0, "fchs">; +@@ -335,7 +335,7 @@ def TST_F : FPI<0xD9, MRM_E4, (outs), (ins), "ftst">; + + // Versions of FP instructions that take a single memory operand. Added for the + // disassembler; remove as they are included with patterns elsewhere. +-let SchedRW = [WriteFComLd] in { ++let SchedRW = [WriteFComLd], Defs = [FPSW], Uses = [FPCW] in { + def FCOM32m : FPI<0xD8, MRM2m, (outs), (ins f32mem:$src), "fcom{s}\t$src">; + def FCOMP32m : FPI<0xD8, MRM3m, (outs), (ins f32mem:$src), "fcomp{s}\t$src">; + +@@ -454,7 +454,7 @@ def ILD_Fp64m80: FpI_<(outs RFP80:$dst), (ins i64mem:$ + [(set RFP80:$dst, (X86fild addr:$src, i64))]>; + } // SchedRW + +-let SchedRW = [WriteStore] in { ++let SchedRW = [WriteStore], Uses = [FPCW] in { + def ST_Fp32m : FpIf32<(outs), (ins f32mem:$op, RFP32:$src), OneArgFP, + [(store RFP32:$src, addr:$op)]>; + def ST_Fp64m32 : FpIf64<(outs), (ins f32mem:$op, RFP64:$src), OneArgFP, +@@ -489,7 +489,7 @@ def IST_Fp16m80 : FpI_<(outs), (ins i16mem:$op, RFP80 + def IST_Fp32m80 : FpI_<(outs), (ins i32mem:$op, RFP80:$src), OneArgFP, []>; + def IST_Fp64m80 : FpI_<(outs), (ins i64mem:$op, RFP80:$src), OneArgFP, []>; + } // mayStore +-} // SchedRW ++} // SchedRW, Uses = [FPCW] + + let mayLoad = 1, SchedRW = [WriteLoad] in { + def LD_F32m : FPI<0xD9, MRM0m, (outs), (ins f32mem:$src), "fld{s}\t$src">; +@@ -499,7 +499,7 @@ def ILD_F16m : FPI<0xDF, MRM0m, (outs), (ins i16mem:$ + def ILD_F32m : FPI<0xDB, MRM0m, (outs), (ins i32mem:$src), "fild{l}\t$src">; + def ILD_F64m : FPI<0xDF, MRM5m, (outs), (ins i64mem:$src), "fild{ll}\t$src">; + } +-let mayStore = 1, SchedRW = [WriteStore] in { ++let mayStore = 1, SchedRW = [WriteStore], Uses = [FPCW] in { + def ST_F32m : FPI<0xD9, MRM2m, (outs), (ins f32mem:$dst), "fst{s}\t$dst">; + def ST_F64m : FPI<0xDD, MRM2m, (outs), (ins f64mem:$dst), "fst{l}\t$dst">; + def ST_FP32m : FPI<0xD9, MRM3m, (outs), (ins f32mem:$dst), "fstp{s}\t$dst">; +@@ -513,7 +513,7 @@ def IST_FP64m : FPI<0xDF, MRM7m, (outs), (ins i64mem:$ + } + + // FISTTP requires SSE3 even though it's a FPStack op. +-let Predicates = [HasSSE3], SchedRW = [WriteStore] in { ++let Predicates = [HasSSE3], SchedRW = [WriteStore], Uses = [FPCW] in { + def ISTT_Fp16m32 : FpI_<(outs), (ins i16mem:$op, RFP32:$src), OneArgFP, + [(X86fp_to_i16mem RFP32:$src, addr:$op)]>; + def ISTT_Fp32m32 : FpI_<(outs), (ins i32mem:$op, RFP32:$src), OneArgFP, +@@ -534,7 +534,7 @@ def ISTT_Fp64m80 : FpI_<(outs), (ins i64mem:$op, RFP80 + [(X86fp_to_i64mem RFP80:$src, addr:$op)]>; + } // Predicates = [HasSSE3] + +-let mayStore = 1, SchedRW = [WriteStore] in { ++let mayStore = 1, SchedRW = [WriteStore], Uses = [FPCW] in { + def ISTT_FP16m : FPI<0xDF, MRM1m, (outs), (ins i16mem:$dst), "fisttp{s}\t$dst">; + def ISTT_FP32m : FPI<0xDB, MRM1m, (outs), (ins i32mem:$dst), "fisttp{l}\t$dst">; + def ISTT_FP64m : FPI<0xDD, MRM1m, (outs), (ins i64mem:$dst), "fisttp{ll}\t$dst">; +@@ -570,7 +570,7 @@ def LD_F0 : FPI<0xD9, MRM_EE, (outs), (ins), "fldz">; + let SchedRW = [WriteFLD1] in + def LD_F1 : FPI<0xD9, MRM_E8, (outs), (ins), "fld1">; + +-let SchedRW = [WriteFLDC], Defs = [FPSW] in { ++let SchedRW = [WriteFLDC] in { + def FLDL2T : I<0xD9, MRM_E9, (outs), (ins), "fldl2t", []>; + def FLDL2E : I<0xD9, MRM_EA, (outs), (ins), "fldl2e", []>; + def FLDPI : I<0xD9, MRM_EB, (outs), (ins), "fldpi", []>; +@@ -579,7 +579,7 @@ def FLDLN2 : I<0xD9, MRM_ED, (outs), (ins), "fldln2", + } // SchedRW + + // Floating point compares. +-let SchedRW = [WriteFCom] in { ++let SchedRW = [WriteFCom], Uses = [FPCW] in { + def UCOM_Fpr32 : FpIf32<(outs), (ins RFP32:$lhs, RFP32:$rhs), CompareFP, + [(set FPSW, (trunc (X86cmp RFP32:$lhs, RFP32:$rhs)))]>; + def UCOM_Fpr64 : FpIf64<(outs), (ins RFP64:$lhs, RFP64:$rhs), CompareFP, +@@ -591,16 +591,19 @@ def UCOM_Fpr80 : FpI_ <(outs), (ins RFP80:$lhs, RFP80 + + let SchedRW = [WriteFCom] in { + // CC = ST(0) cmp ST(i) +-let Defs = [EFLAGS, FPSW] in { +-def UCOM_FpIr32: FpIf32<(outs), (ins RFP32:$lhs, RFP32:$rhs), CompareFP, +- [(set EFLAGS, (X86cmp RFP32:$lhs, RFP32:$rhs))]>; +-def UCOM_FpIr64: FpIf64<(outs), (ins RFP64:$lhs, RFP64:$rhs), CompareFP, +- [(set EFLAGS, (X86cmp RFP64:$lhs, RFP64:$rhs))]>; ++let Defs = [EFLAGS, FPSW], Uses = [FPCW] in { ++def UCOM_FpIr32: FpI_<(outs), (ins RFP32:$lhs, RFP32:$rhs), CompareFP, ++ [(set EFLAGS, (X86cmp RFP32:$lhs, RFP32:$rhs))]>, ++ Requires<[FPStackf32, HasCMov]>; ++def UCOM_FpIr64: FpI_<(outs), (ins RFP64:$lhs, RFP64:$rhs), CompareFP, ++ [(set EFLAGS, (X86cmp RFP64:$lhs, RFP64:$rhs))]>, ++ Requires<[FPStackf64, HasCMov]>; + def UCOM_FpIr80: FpI_<(outs), (ins RFP80:$lhs, RFP80:$rhs), CompareFP, +- [(set EFLAGS, (X86cmp RFP80:$lhs, RFP80:$rhs))]>; ++ [(set EFLAGS, (X86cmp RFP80:$lhs, RFP80:$rhs))]>, ++ Requires<[HasCMov]>; + } + +-let Defs = [FPSW], Uses = [ST0] in { ++let Defs = [FPSW], Uses = [ST0, FPCW] in { + def UCOM_Fr : FPI<0xDD, MRM4r, // FPSW = cmp ST(0) with ST(i) + (outs), (ins RST:$reg), "fucom\t$reg">; + def UCOM_FPr : FPI<0xDD, MRM5r, // FPSW = cmp ST(0) with ST(i), pop +@@ -609,14 +612,12 @@ def UCOM_FPPr : FPI<0xDA, MRM_E9, // cmp ST(0) + (outs), (ins), "fucompp">; + } + +-let Defs = [EFLAGS, FPSW], Uses = [ST0] in { ++let Defs = [EFLAGS, FPSW], Uses = [ST0, FPCW] in { + def UCOM_FIr : FPI<0xDB, MRM5r, // CC = cmp ST(0) with ST(i) + (outs), (ins RST:$reg), "fucomi\t$reg">; + def UCOM_FIPr : FPI<0xDF, MRM5r, // CC = cmp ST(0) with ST(i), pop + (outs), (ins RST:$reg), "fucompi\t$reg">; +-} + +-let Defs = [EFLAGS, FPSW] in { + def COM_FIr : FPI<0xDB, MRM6r, (outs), (ins RST:$reg), "fcomi\t$reg">; + def COM_FIPr : FPI<0xDF, MRM6r, (outs), (ins RST:$reg), "fcompi\t$reg">; + } +@@ -628,12 +629,12 @@ let Defs = [AX], Uses = [FPSW] in + def FNSTSW16r : I<0xDF, MRM_E0, // AX = fp flags + (outs), (ins), "fnstsw\t{%ax|ax}", + [(set AX, (X86fp_stsw FPSW))]>; +-let Defs = [FPSW] in ++let Defs = [FPSW], Uses = [FPCW] in + def FNSTCW16m : I<0xD9, MRM7m, // [mem16] = X87 control world + (outs), (ins i16mem:$dst), "fnstcw\t$dst", + [(X86fp_cwd_get16 addr:$dst)]>; + } // SchedRW +-let Defs = [FPSW], mayLoad = 1 in ++let Defs = [FPSW,FPCW], mayLoad = 1 in + def FLDCW16m : I<0xD9, MRM5m, // X87 control world = [mem16] + (outs), (ins i16mem:$dst), "fldcw\t$dst", []>, + Sched<[WriteLoad]>; Index: patches/patch-lib_Target_X86_X86RegisterInfo_cpp =================================================================== RCS file: patches/patch-lib_Target_X86_X86RegisterInfo_cpp diff -N patches/patch-lib_Target_X86_X86RegisterInfo_cpp --- /dev/null 1 Jan 1970 00:00:00 -0000 +++ patches/patch-lib_Target_X86_X86RegisterInfo_cpp 11 Feb 2019 06:22:08 -0000 @@ -0,0 +1,18 @@ +$OpenBSD$ + +[X86] Add FPCW as a register and start using it as an implicit use on floating +point instructions. + +Index: lib/Target/X86/X86RegisterInfo.cpp +--- lib/Target/X86/X86RegisterInfo.cpp.orig ++++ lib/Target/X86/X86RegisterInfo.cpp +@@ -497,6 +497,9 @@ BitVector X86RegisterInfo::getReservedRegs(const Machi + BitVector Reserved(getNumRegs()); + const X86FrameLowering *TFI = getFrameLowering(MF); + ++ // Set the floating point control register as reserved. ++ Reserved.set(X86::FPCW); ++ + // Set the stack-pointer register and its aliases as reserved. + for (MCSubRegIterator I(X86::RSP, this, /*IncludeSelf=*/true); I.isValid(); + ++I) Index: patches/patch-lib_Target_X86_X86RegisterInfo_td =================================================================== RCS file: /home/cvs/ports/devel/llvm/patches/patch-lib_Target_X86_X86RegisterInfo_td,v retrieving revision 1.4 diff -u -p -u -p -r1.4 patch-lib_Target_X86_X86RegisterInfo_td --- patches/patch-lib_Target_X86_X86RegisterInfo_td 28 Jan 2019 06:27:28 -0000 1.4 +++ patches/patch-lib_Target_X86_X86RegisterInfo_td 11 Feb 2019 06:22:42 -0000 @@ -1,29 +1,46 @@ $OpenBSD: patch-lib_Target_X86_X86RegisterInfo_td,v 1.4 2019/01/28 06:27:28 jca Exp $ -The compiler is generally free to allocate general purpose registers in -whatever order it chooses. Reasons for choosing one register before another -usually include compiled instruction size (avoidance of REX prefixes, etc.) -or usage conventions, but somehow haven't included security implications in -the compiled bytecode. Some bytecode is more useful in polymorphic ROP -sequences than others, so it seems prudent to try to avoid that bytecode -when possible. +- [X86] Connect the default fpsr and dirflag clobbers in inline + assembly to the registers we have defined for them. +- [X86] Add FPCW as a register and start using it as an implicit use on floating + point instructions. +- The compiler is generally free to allocate general purpose registers in + whatever order it chooses. Reasons for choosing one register before another + usually include compiled instruction size (avoidance of REX prefixes, etc.) + or usage conventions, but somehow haven't included security implications in + the compiled bytecode. Some bytecode is more useful in polymorphic ROP + sequences than others, so it seems prudent to try to avoid that bytecode + when possible. -This patch moves EBX/RBX towards the end of the allocation preference for 32 -and 64 bit general purpose registers. Some instructions using RBX/EBX/BX/BL -as a destination register end up with a ModR/M byte of C3 or CB, which is often -useful in ROP gadgets. Because these gadgets often occur in the middle of -functions, they exhibit somewhat higher diversity than some other C3/CB -terminated gadgets. This change removes about 3% of total gadgets from the -kernel, but about 6% of unique gadgets. + This patch moves EBX/RBX towards the end of the allocation preference for 32 + and 64 bit general purpose registers. Some instructions using RBX/EBX/BX/BL + as a destination register end up with a ModR/M byte of C3 or CB, which is often + useful in ROP gadgets. Because these gadgets often occur in the middle of + functions, they exhibit somewhat higher diversity than some other C3/CB + terminated gadgets. This change removes about 3% of total gadgets from the + kernel, but about 6% of unique gadgets. -There are other possible changes in this direction. BX/BL are obvious next -targets for avoidance, and MM3/XMM3 may also be useful to try to avoid if -possible. + There are other possible changes in this direction. BX/BL are obvious next + targets for avoidance, and MM3/XMM3 may also be useful to try to avoid if + possible. Index: lib/Target/X86/X86RegisterInfo.td --- lib/Target/X86/X86RegisterInfo.td.orig +++ lib/Target/X86/X86RegisterInfo.td -@@ -402,8 +402,8 @@ def GRH16 : RegisterClass<"X86", [i16], 16, +@@ -288,8 +288,11 @@ def ST6 : X86Reg<"st(6)", 6>, DwarfRegNum<[39, 18, 17] + def ST7 : X86Reg<"st(7)", 7>, DwarfRegNum<[40, 19, 18]>; + + // Floating-point status word +-def FPSW : X86Reg<"fpsw", 0>; ++def FPSW : X86Reg<"fpsr", 0>; + ++// Floating-point control word ++def FPCW : X86Reg<"fpcr", 0>; ++ + // Status flags register. + // + // Note that some flags that are commonly thought of as part of the status +@@ -402,8 +405,8 @@ def GRH16 : RegisterClass<"X86", [i16], 16, R15WH)>; def GR32 : RegisterClass<"X86", [i32], 32, @@ -34,7 +51,7 @@ Index: lib/Target/X86/X86RegisterInfo.td // GR64 - 64-bit GPRs. This oddly includes RIP, which isn't accurate, since // RIP isn't really a register and it can't be used anywhere except in an -@@ -412,7 +412,7 @@ def GR32 : RegisterClass<"X86", [i32], 32, +@@ -412,7 +415,7 @@ def GR32 : RegisterClass<"X86", [i32], 32, // tests because of the inclusion of RIP in this register class. def GR64 : RegisterClass<"X86", [i64], 64, (add RAX, RCX, RDX, RSI, RDI, R8, R9, R10, R11,