Hi Alan, On Mon, Jan 07, 2019 at 09:29:18AM +1030, Alan Modra wrote: > The direct cause of this PR is the fact that tls_gdld_nomark didn't > handle indirect calls. Adding the missing support revealed that most > indirect calls were being optimised back to direct calls anyway, due > to tls_gdld_nomark not checking any of the parallel elements except > the first (plus the extra element that distinguishes this call from > normal calls). Just checking the number of elements is enough to > separate the indirect calls from direct for ABI_ELFv2 and ABI_AIX, > while checking for the LONG_CALL bit in the cookie works for ABI_V4. > Direct calls being substituted for indirect calls is not the only > unwanted substitution. See the tls_nomark_call comment. I also saw a > _GLOBAL_OFFSET_TABLE_ symbol_ref being substituted for the GOT reg, > hence the unspec_tls change.
> Bootstrap and regression testing on powerpc64le-linux and > powerpc64-linux in progress. Note that the patch requires > https://gcc.gnu.org/ml/gcc-patches/2019-01/msg00252.html or the > earlier version for the attribute support. (Did you commit that yet?) > +;; Verify that elements of the tls_gdld_nomark call insn parallel past the > +;; second element (added to distinguish this call from normal calls) match > +;; the normal contours of a call insn. This is necessary to prevent > +;; substitutions we don't want, for example, an indirect call being > +;; optimised to a direct call, or (set (reg:r2) (unspec [] UNSPEC_TOCSLOT)) > +;; being cleverly optimised to (set (reg:r2) (reg:r2)) because gcc > +;; "knows" that r2 hasn't changed from a previous call. > +(define_predicate "tls_nomark_call" > + (match_code "parallel") > +{ > + int n = XVECLEN (op, 0); > + rtvec v = XVEC (op, 0); > + rtx set = RTVEC_ELT (v, 0); > + if (GET_CODE (set) != SET) > + return 0; > + rtx call = XEXP (set, 1); > + if (GET_CODE (call) != CALL) > + return 0; > + rtx mem = XEXP (call, 0); > + if (GET_CODE (mem) != MEM) > + return 0; > + rtx addr = XEXP (mem, 0); > + if (GET_CODE (addr) == SYMBOL_REF) > + { > + if (DEFAULT_ABI == ABI_ELFv2 || DEFAULT_ABI == ABI_AIX) > + return (n == 3 && GET_CODE (RTVEC_ELT (v, 2)) == CLOBBER > + && REG_P (XEXP (RTVEC_ELT (v, 2), 0)) > + && REGNO (XEXP (RTVEC_ELT (v, 2), 0)) == LR_REGNO); > + else if (DEFAULT_ABI == ABI_V4) > + return (n >= 4 && n <= 5 && GET_CODE (RTVEC_ELT (v, 2)) == USE > + && CONST_INT_P (XEXP (RTVEC_ELT (v, 2), 0)) > + && (INTVAL (XEXP (RTVEC_ELT (v, 2), 0)) & CALL_LONG) == 0 > + && (n == 4 > + || (GET_CODE (RTVEC_ELT (v, 3)) == USE > + && REG_P (XEXP (RTVEC_ELT (v, 3), 0)))) > + && GET_CODE (RTVEC_ELT (v, n - 1)) == CLOBBER > + && REG_P (XEXP (RTVEC_ELT (v, n - 1), 0)) > + && REGNO (XEXP (RTVEC_ELT (v, n - 1), 0)) == LR_REGNO); > + else > + gcc_unreachable (); > + } > + else if (indirect_call_operand (addr, mode)) > + { > + if (DEFAULT_ABI == ABI_ELFv2) > + return (n == 4 && GET_CODE (RTVEC_ELT (v, 2)) == SET > + && REG_P (XEXP (RTVEC_ELT (v, 2), 0)) > + && REGNO (XEXP (RTVEC_ELT (v, 2), 0)) == TOC_REGNUM > + && GET_CODE (XEXP (RTVEC_ELT (v, 2), 1)) == UNSPEC > + && XINT (XEXP (RTVEC_ELT (v, 2), 1), 1) == UNSPEC_TOCSLOT > + && XVECLEN (XEXP (RTVEC_ELT (v, 2), 1), 0) == 1 > + && CONST_INT_P (XVECEXP (XEXP (RTVEC_ELT (v, 2), 1), 0, 0)) > + && GET_CODE (RTVEC_ELT (v, 3)) == CLOBBER > + && REG_P (XEXP (RTVEC_ELT (v, 3), 0)) > + && REGNO (XEXP (RTVEC_ELT (v, 3), 0)) == LR_REGNO); > + else if (DEFAULT_ABI == ABI_AIX) > + return (n == 5 && GET_CODE (RTVEC_ELT (v, 2)) == USE > + && GET_CODE (XEXP (RTVEC_ELT (v, 2), 0)) == MEM > + && GET_CODE (RTVEC_ELT (v, 3)) == SET > + && REG_P (XEXP (RTVEC_ELT (v, 3), 0)) > + && REGNO (XEXP (RTVEC_ELT (v, 3), 0)) == TOC_REGNUM > + && GET_CODE (XEXP (RTVEC_ELT (v, 3), 1)) == UNSPEC > + && XINT (XEXP (RTVEC_ELT (v, 3), 1), 1) == UNSPEC_TOCSLOT > + && XVECLEN (XEXP (RTVEC_ELT (v, 3), 1), 0) == 1 > + && CONST_INT_P (XVECEXP (XEXP (RTVEC_ELT (v, 3), 1), 0, 0)) > + && GET_CODE (RTVEC_ELT (v, 4)) == CLOBBER > + && REG_P (XEXP (RTVEC_ELT (v, 4), 0)) > + && REGNO (XEXP (RTVEC_ELT (v, 4), 0)) == LR_REGNO); > + else if (DEFAULT_ABI == ABI_V4) > + return (n == 4 && GET_CODE (RTVEC_ELT (v, 2)) == USE > + && CONST_INT_P (XEXP (RTVEC_ELT (v, 2), 0)) > + && GET_CODE (RTVEC_ELT (v, 3)) == CLOBBER > + && REG_P (XEXP (RTVEC_ELT (v, 3), 0)) > + && REGNO (XEXP (RTVEC_ELT (v, 3), 0)) == LR_REGNO); > + else > + gcc_unreachable (); > + } > + else > + return 0; > +}) I find things like this almost impossible to read (and to verify for correctness). Maybe you can factor it a bit more? Does it need to be a predicate at all, or can you handle it by having various patterns? > + (plus (if_then_else (match_test "TARGET_CMODEL != CMODEL_SMALL") > + (const_int 8) > + (const_int 4)) > + (plus (if_then_else (match_test "GET_CODE (operands[1]) != > SYMBOL_REF") > + (plus (if_then_else (match_test > "!rs6000_speculate_indirect_jumps") > + (const_int 4) > + (const_int 0)) > + (if_then_else (match_test "DEFAULT_ABI == > ABI_AIX") > + (const_int 4) > + (const_int 0))) > + (const_int 0)) > + (if_then_else (match_test "DEFAULT_ABI != ABI_V4") > + (const_int 8) > + (const_int 4)))))]) This part is way too wide. Maybe it is would be readable if not :-( Segher