Hello! Attached patch converts *load_tp_<mode> and *add_tp_<mode> to a load from address 0 in DEFAULT_TLS_SEG_REG address space. The conversion is done as a split after combine pass, so we still combine addresses, before they are converted to a non-default address space.
2017-08-13 Uros Bizjak <ubiz...@gmail.com> * config/i386/i386.md (*load_tp_<mode>): Redefine as define_insn_and_split. Split to a memory load from 0 in DEFAULT_TLS_SEG_REG address space. Merge with *load_tp_x32 using PTR mode iterator. (*load_tp_x32_zext"): Redefine as define_insn_and_split. Split to a memory load from 0 in DEFAULT_TLS_SEG_REG address space. (*add_tp_<mode>): Redefine as define_insn_and_split. Split to an add with a memory load from 0 in DEFAULT_TLS_SEG_REG address space. Merge with *add_tp_x32 using PTR mode iterator. (*add_tp_x32_zext"): Redefine as define_insn_and_split. Split to an add with a memory load from 0 in DEFAULT_TLS_SEG_REG address space. Patch was bootstrapped and regression tested on x86_64-linux-gnu {,-m32}. Committed to mainline SVN. Uros.
Index: config/i386/i386.md =================================================================== --- config/i386/i386.md (revision 251053) +++ config/i386/i386.md (working copy) @@ -13773,57 +13773,59 @@ (clobber (match_dup 5)) (clobber (reg:CC FLAGS_REG))])]) -;; Segment register for the thread base ptr load -(define_mode_attr tp_seg [(SI "gs") (DI "fs")]) - ;; Load and add the thread base pointer from %<tp_seg>:0. -(define_insn "*load_tp_x32" - [(set (match_operand:SI 0 "register_operand" "=r") - (unspec:SI [(const_int 0)] UNSPEC_TP))] - "TARGET_X32" - "mov{l}\t{%%fs:0, %0|%0, DWORD PTR fs:0}" - [(set_attr "type" "imov") - (set_attr "modrm" "0") - (set_attr "length" "7") - (set_attr "memory" "load") - (set_attr "imm_disp" "false")]) +(define_insn_and_split "*load_tp_<mode>" + [(set (match_operand:PTR 0 "register_operand" "=r") + (unspec:PTR [(const_int 0)] UNSPEC_TP))] + "" + "#" + "" + [(set (match_dup 0) + (match_dup 1))] +{ + addr_space_t as = DEFAULT_TLS_SEG_REG; -(define_insn "*load_tp_x32_zext" + operands[1] = gen_const_mem (<MODE>mode, const0_rtx); + set_mem_addr_space (operands[1], as); +}) + +(define_insn_and_split "*load_tp_x32_zext" [(set (match_operand:DI 0 "register_operand" "=r") - (zero_extend:DI (unspec:SI [(const_int 0)] UNSPEC_TP)))] + (zero_extend:DI + (unspec:SI [(const_int 0)] UNSPEC_TP)))] "TARGET_X32" - "mov{l}\t{%%fs:0, %k0|%k0, DWORD PTR fs:0}" - [(set_attr "type" "imov") - (set_attr "modrm" "0") - (set_attr "length" "7") - (set_attr "memory" "load") - (set_attr "imm_disp" "false")]) + "#" + "" + [(set (match_dup 0) + (zero_extend:DI (match_dup 1)))] +{ + addr_space_t as = DEFAULT_TLS_SEG_REG; -(define_insn "*load_tp_<mode>" - [(set (match_operand:P 0 "register_operand" "=r") - (unspec:P [(const_int 0)] UNSPEC_TP))] - "!TARGET_X32" - "mov{<imodesuffix>}\t{%%<tp_seg>:0, %0|%0, <iptrsize> PTR <tp_seg>:0}" - [(set_attr "type" "imov") - (set_attr "modrm" "0") - (set_attr "length" "7") - (set_attr "memory" "load") - (set_attr "imm_disp" "false")]) + operands[1] = gen_const_mem (SImode, const0_rtx); + set_mem_addr_space (operands[1], as); +}) -(define_insn "*add_tp_x32" - [(set (match_operand:SI 0 "register_operand" "=r") - (plus:SI (unspec:SI [(const_int 0)] UNSPEC_TP) - (match_operand:SI 1 "register_operand" "0"))) +(define_insn_and_split "*add_tp_<mode>" + [(set (match_operand:PTR 0 "register_operand" "=r") + (plus:PTR + (unspec:PTR [(const_int 0)] UNSPEC_TP) + (match_operand:PTR 1 "register_operand" "0"))) (clobber (reg:CC FLAGS_REG))] - "TARGET_X32" - "add{l}\t{%%fs:0, %0|%0, DWORD PTR fs:0}" - [(set_attr "type" "alu") - (set_attr "modrm" "0") - (set_attr "length" "7") - (set_attr "memory" "load") - (set_attr "imm_disp" "false")]) + "" + "#" + "" + [(parallel + [(set (match_dup 0) + (plus:PTR (match_dup 1) (match_dup 2))) + (clobber (reg:CC FLAGS_REG))])] +{ + addr_space_t as = DEFAULT_TLS_SEG_REG; -(define_insn "*add_tp_x32_zext" + operands[2] = gen_const_mem (<MODE>mode, const0_rtx); + set_mem_addr_space (operands[2], as); +}) + +(define_insn_and_split "*add_tp_x32_zext" [(set (match_operand:DI 0 "register_operand" "=r") (zero_extend:DI (plus:SI (unspec:SI [(const_int 0)] UNSPEC_TP) @@ -13830,25 +13832,19 @@ (match_operand:SI 1 "register_operand" "0")))) (clobber (reg:CC FLAGS_REG))] "TARGET_X32" - "add{l}\t{%%fs:0, %k0|%k0, DWORD PTR fs:0}" - [(set_attr "type" "alu") - (set_attr "modrm" "0") - (set_attr "length" "7") - (set_attr "memory" "load") - (set_attr "imm_disp" "false")]) + "#" + "" + [(parallel + [(set (match_dup 0) + (zero_extend:DI + (plus:SI (match_dup 1) (match_dup 2)))) + (clobber (reg:CC FLAGS_REG))])] +{ + addr_space_t as = DEFAULT_TLS_SEG_REG; -(define_insn "*add_tp_<mode>" - [(set (match_operand:P 0 "register_operand" "=r") - (plus:P (unspec:P [(const_int 0)] UNSPEC_TP) - (match_operand:P 1 "register_operand" "0"))) - (clobber (reg:CC FLAGS_REG))] - "!TARGET_X32" - "add{<imodesuffix>}\t{%%<tp_seg>:0, %0|%0, <iptrsize> PTR <tp_seg>:0}" - [(set_attr "type" "alu") - (set_attr "modrm" "0") - (set_attr "length" "7") - (set_attr "memory" "load") - (set_attr "imm_disp" "false")]) + operands[2] = gen_const_mem (SImode, const0_rtx); + set_mem_addr_space (operands[2], as); +}) ;; The Sun linker took the AMD64 TLS spec literally and can only handle ;; %rax as destination of the initial executable code sequence.