Some comments about the final part: Chung-Ju Wu <jasonw...@gmail.com> writes: > +(define_constraint "Ibms" > + "The immediate value with power of 2" > + (and (match_code "const_int") > + (match_test "(TARGET_ISA_V3 || TARGET_ISA_V3M) > + && (floor_log2 (ival) < 8) > + && (ival > 0) > + && (ival == (1 << floor_log2 (ival)))")))
I think the last three lines are equivalent to: IN_RANGE (exact_log2 (ival), 0, 7) > +(define_constraint "Ifex" > + "The immediate value with power of 2 minus 1" > + (and (match_code "const_int") > + (match_test "(TARGET_ISA_V3 || TARGET_ISA_V3M) > + && (ival < 256) > + && (ival > 0) > + && (floor_log2 (ival + 1) - 1 < 8) > + && ((ival + 1) == (1 << floor_log2 (ival + 1)))"))) And here: IN_RANGE (exact_log2 (ival + 1), 1, 8) > +#ifndef nds32_OPTS_H > +#define nds32_OPTS_H Very minor, but the mixture of case looks odd. > +;; ------------------------------------------------------------- > +;; Boolean DImode instructions. > +;; ------------------------------------------------------------- > + > +;; Boolean and,ior,xor insns. > + > +;; 'and' operation. > + > +(define_expand "anddi3" > + [(set (match_operand:DI 0 "register_operand" "") > + (and:DI (match_operand:DI 1 "register_operand" "") > + (match_operand:DI 2 "register_operand" "")))] > + "" > + "" > +) > + > +; Use '#' to split instruction. > +(define_insn "*anddi3_insn" > + [(set (match_operand:DI 0 "register_operand" "=&r, &r") > + (and:DI (match_operand:DI 1 "register_operand" " %0, r") > + (match_operand:DI 2 "register_operand" " r, r")))] > + "" > + "#" > + [(set_attr "length" "8")]) > + > +; Use '#' to split instruction. > +; The zero extend of operand 2 clears the high word of the output operand. > +(define_insn_and_split "*anddi_zesidi_di" > + [(set (match_operand:DI 0 "register_operand" > "=&r, &r") > + (and:DI (zero_extend:DI (match_operand:SI 2 "register_operand" " r, > r")) > + (match_operand:DI 1 "register_operand" " 0, > r")))] > + "" > + "#" > + "reload_completed" > + [(set (match_dup 0) (and:SI (match_dup 1) (match_dup 2))) > + (set (match_dup 3) (const_int 0))] > +{ > + operands[3] = gen_highpart (SImode, operands[0]); > + operands[0] = gen_lowpart (SImode, operands[0]); > + operands[1] = gen_lowpart (SImode, operands[1]); > +} > + [(set_attr "length" "8")]) > + > +; Use '#' to split instruction. > +(define_insn "*anddi_sesidi_di" > + [(set (match_operand:DI 0 "register_operand" > "=&r, &r") > + (and:DI (sign_extend:DI (match_operand:SI 2 "register_operand" " r, > r")) > + (match_operand:DI 1 "register_operand" " 0, > r")))] > + "" > + "#" > + [(set_attr "length" "8")]) > + > + > +;; 'ior' operation. > + > +(define_expand "iordi3" > + [(set (match_operand:DI 0 "register_operand" "") > + (ior:DI (match_operand:DI 1 "register_operand" "") > + (match_operand:DI 2 "register_operand" "")))] > + "" > + "" > +) > + > +; Use '#' to split instruction. > +(define_insn "*iordi3_insn" > + [(set (match_operand:DI 0 "register_operand" "=&r, &r") > + (ior:DI (match_operand:DI 1 "register_operand" " %0, r") > + (match_operand:DI 2 "register_operand" " r, r")))] > + "" > + "#" > + [(set_attr "length" "8")]) > + > +; Use '#' to split instruction. > +(define_insn "*iordi_zesidi_di" > + [(set (match_operand:DI 0 "register_operand" > "=&r, &r") > + (ior:DI (zero_extend:DI (match_operand:SI 2 "register_operand" " r, > r")) > + (match_operand:DI 1 "register_operand" " 0, > ?r")))] > + "" > + "#" > + [(set_attr "length" "8")]) > + > +; Use '#' to split instruction. > +(define_insn "*iordi_sesidi_di" > + [(set (match_operand:DI 0 "register_operand" > "=&r, &r") > + (ior:DI (sign_extend:DI (match_operand:SI 2 "register_operand" " r, > r")) > + (match_operand:DI 1 "register_operand" " 0, > r")))] > + "" > + "#" > + [(set_attr "length" "8")]) > + > + > +;; 'xor' operation. > + > +(define_expand "xordi3" > + [(set (match_operand:DI 0 "register_operand" "") > + (xor:DI (match_operand:DI 1 "register_operand" "") > + (match_operand:DI 2 "register_operand" "")))] > + "" > + "" > +) > + > +; Use '#' to split instruction. > +(define_insn "*xordi3_insn" > + [(set (match_operand:DI 0 "register_operand" "=&r, &r") > + (xor:DI (match_operand:DI 1 "register_operand" " %0, r") > + (match_operand:DI 2 "register_operand" " r, r")))] > + "" > + "#" > + [(set_attr "length" "8")]) > + > +; Use '#' to split instruction. > +(define_insn "*xordi_zesidi_di" > + [(set (match_operand:DI 0 "register_operand" > "=&r, &r") > + (xor:DI (zero_extend:DI (match_operand:SI 2 "register_operand" " r, > r")) > + (match_operand:DI 1 "register_operand" " 0, > ?r")))] > + "" > + "#" > + [(set_attr "length" "8")]) > + > +; Use '#' to split instruction. > +(define_insn "*xordi_sesidi_di" > + [(set (match_operand:DI 0 "register_operand" > "=&r, &r") > + (xor:DI (sign_extend:DI (match_operand:SI 2 "register_operand" " r, > r")) > + (match_operand:DI 1 "register_operand" " 0, > r")))] > + "" > + "#" > + [(set_attr "length" "8")]) > + > + > +;; Split up double word logical operations. > + > +;; Split up simple DImode logical operations. Simply perform the logical > +;; operation on the upper and lower halves of the registers. > +(define_split > + [(set (match_operand:DI 0 "register_operand" "") > + (match_operator:DI 6 "nds32_logical_binary_operator" > + [(match_operand:DI 1 "register_operand" "") > + (match_operand:DI 2 "register_operand" "")]))] > + "reload_completed" > + [(set (match_dup 0) (match_op_dup:SI 6 [(match_dup 1) (match_dup 2)])) > + (set (match_dup 3) (match_op_dup:SI 6 [(match_dup 4) (match_dup 5)]))] > +{ > + /* Note that operands[0], operands[1], > + and operands[2] will be assigned new rtx, > + so be careful of the order when using them. */ > + > + operands[3] = gen_highpart (SImode, operands[0]); > + operands[0] = gen_lowpart (SImode, operands[0]); > + > + operands[4] = gen_highpart (SImode, operands[1]); > + operands[1] = gen_lowpart (SImode, operands[1]); > + > + operands[5] = gen_highpart (SImode, operands[2]); > + operands[2] = gen_lowpart (SImode, operands[2]); > +}) In the past it was good practice to define multiword operations like these so that they could be seen as simple binary operations during early rtl optimisation. That shouldn't be necessary now that we have gimple-level optimisation though. Instead, splitting from the outset allows the rtl passes to see and optimise the individual word operations. If you don't define the patterns, the generic code will split the operation in the same way. You should then get the zero_extend optimisation above for free. You should also get better code for cases where operand 2 is constant. So in theory it should be better to remove these patterns. > +;; Interrupt Instructions. > + > +(define_insn "unspec_volatile_setgie_en" > + [(unspec_volatile:SI [(const_int 0)] UNSPEC_VOLATILE_SETGIE_EN)] > + "" > + "setgie.e" > + [(set_attr "type" "misc")] > +) > + > +(define_insn "unspec_volatile_setgie_dis" > + [(unspec_volatile:SI [(const_int 0)] UNSPEC_VOLATILE_SETGIE_DIS)] > + "" > + "setgie.d" > + [(set_attr "type" "misc")] > +) Ah, following up from my comment in part 1 about plain [(unspec ...)] (as opposed to [(set ... (unspec ...))]) insns being dangerous: it looks like the intrinsics are all unspec_volatile instead, which is good. Maybe it'd be worth changing the .c comment to say unspec_volatile instead. > +;; Load Multiple Insns. > +;; > +;; opernads[0] is the first of the consecutive registers. Typo: operands > +misr-vector-size= > +Target Report RejectNegative Joined UInteger Var(nds32_isr_vector_size) > Init(NDS32_DEFAULT_ISR_VECTOR_SIZE) > +Specify the size of each vector for interrupt handler. The valid value is 4 > or 16. Maybe: Specify the size of each interrupt vector, which must be 4 or 16. > +mcache-block-size= > +Target Report RejectNegative Joined UInteger Var(nds32_cache_block_size) > Init(NDS32_DEFAULT_CACHE_BLOCK_SIZE) > +Specify the size of each cache block. The size is the power of 2 in bytes. > The valid value is: 4, 8, 16, 32, 64, 128, 256, or 512. And here: Specify the size of each cache block, which must be a power of 2 between 4 and 512. > +march= > +Target RejectNegative Joined Enum(nds32_arch_type) Var(nds32_arch_option) > Init(ARCH_V3) > +Specify the name of the target architecture. The valid value is: v2, v3, or > v3m. "The valid value..." part shouldn't be needed, since the general options machinery will print out the enum values. > +mforce-fp-as-gp > +Target Report RejectNegative Mask(FORCE_FP_AS_GP) > +Prevent $fp being allocated during register allocation so that compiler is > able to force using $fp to access static and global variables for code-size > reduction. Then compiler will use special directives and code generation to > guide linker doing fp-as-gp optimization (NOTE: This is link time > optimization so make sure you pass '--relax' option to linker at linking > stage). This seems a bit long for help text. Also, it might be better to drop the RejectNegative, so that "-mforce-fp-as-gp -mno-force-fp-as-gp" works. (This is sometimes useful when dealing with badly-written makefiles.) > +mex9 > +Target Report RejectNegative Mask(EX9) > +Use special directives to guide linker doing ex9 optimization (NOTE: This is > link time optimization so make sure you pass '--relax' and '--mex9' option to > linker at linking stage). The driver ought to pass down -mex9 as --mex9 instead, via LINK_SPEC, so that the user doesn't have to. Maybe it should pass down --relax too, if that makes sense. I.e. something like: %{mex9: --mex9} or (if there are no drawbacks to making -mex9 imply --relax): %{mex9: --relax --mex9} > +mno-ctor-dtor > +Target Report RejectNegative > +Disable constructor/destructor feature. How is this option used? > +;; Merge single move to sign_extend load. > +(define_peephole2 > + [(set (match_operand:SI 0 "register_operand" "") > + (match_operand:SI 1 "register_operand" "")) > + (set (match_operand:SI 2 "register_operand" "") > + (sign_extend:SI (mem:QIHISI (plus:SI (match_dup 0) > + (match_operand:SI 3 > "immediate_operand")))))] > + "peep2_reg_dead_p (2, operands[0])" > + [(set (match_dup 2) > + (sign_extend:SI (mem:QIHISI (plus:SI (match_dup 1) > + (match_dup 3)))))] > +) > + > +;; Merge single move to zero_extend load. > +(define_peephole2 > + [(set (match_operand:SI 0 "register_operand" "") > + (match_operand:SI 1 "register_operand" "")) > + (set (match_operand:SI 2 "register_operand" "") > + (zero_extend:SI (mem:QIHISI (plus:SI (match_dup 0) > + (match_operand:SI 3 > "immediate_operand")))))] > + "peep2_reg_dead_p (2, operands[0])" > + [(set (match_dup 2) > + (zero_extend:SI (mem:QIHISI (plus:SI (match_dup 1) > + (match_dup 3)))))] > +) > + > +;; Merge single move to load. > +(define_peephole2 > + [(set (match_operand:SI 0 "register_operand" "") > + (match_operand:SI 1 "register_operand" "")) > + (set (match_operand:QIHISI 2 "register_operand" "") > + (mem:QIHISI (plus:SI (match_dup 0) > + (match_operand:SI 3 "immediate_operand"))))] > + "peep2_reg_dead_p (2, operands[0])" > + [(set (match_dup 2) > + (mem:QIHISI (plus:SI (match_dup 1) > + (match_dup 3))))] > +) > + > +;; Merge single move to store. > +(define_peephole2 > + [(set (match_operand:SI 0 "register_operand" "") > + (match_operand:SI 1 "register_operand" "")) > + (set (mem:QIHISI (plus:SI (match_dup 0) > + (match_operand:SI 3 "immediate_operand"))) > + (match_operand:QIHISI 2 "register_operand" ""))] > + "peep2_reg_dead_p (2, operands[0])" > + [(set (mem:QIHISI (plus:SI (match_dup 1) > + (match_dup 3))) > + (match_dup 2))] > +) It looks like these peepholes are working around the fact that the move, sign_extend and zero_extend alternatives are split across several patterns. Hopefully they won't be needed if each operation has a single define_insn that lists all the alternatives. > +;; Merge single addi to sign_extend load. > +(define_peephole2 > + [(set (match_operand:SI 0 "register_operand" "") > + (plus:SI (match_operand:SI 1 "register_operand" "") > + (match_operand:SI 4 "immediate_operand" ""))) > + (set (match_operand:SI 2 "register_operand" "") > + (sign_extend:SI (mem:QIHISI (plus:SI (match_dup 0) > + (match_operand:SI 3 > "immediate_operand")))))] > + "peep2_reg_dead_p (2, operands[0]) > + && CONST_INT_P (operands[4]) > + && CONST_INT_P (operands[3]) > + && ((INTVAL (operands[3]) + INTVAL (operands[4])) % <byte> == 0) > + && satisfies_constraint_Is15 (GEN_INT (trunc_int_for_mode (INTVAL > (operands[3]) + INTVAL (operands[4]), SImode)))" > + [(set (match_dup 2) > + (sign_extend:SI (mem:QIHISI (plus:SI (match_dup 1) > + (match_dup 3)))))] > +{ > + operands[3] = gen_int_mode (INTVAL(operands[3]) + INTVAL(operands[4]), > + SImode); > +}) > + > +;; Merge single addi to zero_extend load. > +(define_peephole2 > + [(set (match_operand:SI 0 "register_operand" "") > + (plus:SI (match_operand:SI 1 "register_operand" "") > + (match_operand:SI 4 "immediate_operand" ""))) > + (set (match_operand:SI 2 "register_operand" "") > + (zero_extend:SI (mem:QIHISI (plus:SI (match_dup 0) > + (match_operand:SI 3 > "immediate_operand")))))] > + "peep2_reg_dead_p (2, operands[0]) > + && CONST_INT_P (operands[4]) > + && CONST_INT_P (operands[3]) > + && ((INTVAL (operands[3]) + INTVAL (operands[4])) % <byte> == 0) > + && satisfies_constraint_Is15 (GEN_INT (trunc_int_for_mode (INTVAL > (operands[3]) + INTVAL (operands[4]), SImode)))" > + [(set (match_dup 2) > + (zero_extend:SI (mem:QIHISI (plus:SI (match_dup 1) > + (match_dup 3)))))] > +{ > + operands[3] = gen_int_mode (INTVAL(operands[3]) + INTVAL(operands[4]), > + SImode); > +}) > + > +;; Merge single addi to load. > +(define_peephole2 > + [(set (match_operand:SI 0 "register_operand" "") > + (plus:SI (match_operand:SI 1 "register_operand" "") > + (match_operand:SI 4 "immediate_operand" ""))) > + (set (match_operand:QIHISI 2 "register_operand" "") > + (mem:QIHISI (plus:SI (match_dup 0) > + (match_operand:SI 3 "immediate_operand"))))] > + "peep2_reg_dead_p (2, operands[0]) > + && CONST_INT_P (operands[4]) > + && CONST_INT_P (operands[3]) > + && ((INTVAL (operands[3]) + INTVAL (operands[4])) % <byte> == 0) > + && satisfies_constraint_Is15 (GEN_INT (trunc_int_for_mode (INTVAL > (operands[3]) + INTVAL (operands[4]), SImode)))" > + [(set (match_dup 2) > + (mem:QIHISI (plus:SI (match_dup 1) > + (match_dup 3))))] > +{ > + operands[3] = gen_int_mode (INTVAL(operands[3]) + INTVAL(operands[4]), > + SImode); > +}) > + > +;; Merge single addi to store. > +(define_peephole2 > + [(set (match_operand:SI 0 "register_operand" "") > + (plus:SI (match_operand:SI 1 "register_operand" "") > + (match_operand:SI 4 "immediate_operand" ""))) > + (set (mem:QIHISI (plus:SI (match_dup 0) > + (match_operand:SI 3 "immediate_operand"))) > + (match_operand:QIHISI 2 "register_operand" ""))] > + "peep2_reg_dead_p (2, operands[0]) > + && CONST_INT_P (operands[4]) > + && CONST_INT_P (operands[3]) > + && ((INTVAL (operands[3]) + INTVAL (operands[4])) % <byte> == 0) > + && satisfies_constraint_Is15 (GEN_INT (trunc_int_for_mode (INTVAL > (operands[3]) + INTVAL (operands[4]), SImode)))" > + [(set (mem:QIHISI (plus:SI (match_dup 1) > + (match_dup 3))) > + (match_dup 2))] > +{ > + operands[3] = gen_int_mode (INTVAL(operands[3]) + INTVAL(operands[4]), > + SImode); > +}) TBH I'm suprised these are necessary. They seem to be doing a simple forward-propagation of the addition, which is something that the post-reload optimisers ought to be able to do already. Could you double-check whether these still trigger? It'd be interesting to know why if so. > +;; Reg, subreg(reg) or const_int. > +(define_predicate "nds32_reg_or_int_operand" > + (ior (match_operand 0 "immediate_operand") > + (match_operand 0 "register_operand")) > +{ > + if (GET_CODE (op) == SUBREG) > + op = SUBREG_REG (op); > + > + if (REG_P (op)) > + return true; > + > + if (GET_CODE (op) == CONST_INT) > + return true; > + return false; > +}) > + > +(define_predicate "nds32_nonmemory_nonsymbol_operand" > + (match_operand 0 "nonmemory_operand") > +{ > + switch (GET_CODE (op)) > + { > + case SYMBOL_REF: > + case LABEL_REF: > + case CONST: > + return false; > + default: > + return true; > + } > +}) Following up from my other comment yesterday, I'm not sure about the difference between these two. Is there something that one of the predicates is trying to reject but the other isn't? I suppose "nds32_nonmemory_nonsymbol_operand" could be used in a floating-point context to allow floating-point constants, but its only use was with SImode. You also have: (define_predicate "nds32_reg_constant_operand" (ior (match_operand 0 "register_operand") (match_operand 0 "const_int_operand"))) which looks like the natural definition of "nds32_reg_or_int_operand". Would it be possible to just keep that predicate (with whichever name seems best) and remove the two quoted above? > +(define_predicate "nds32_rimm15s_operand" > + (match_operand 0 "general_operand") > +{ > + if (GET_CODE (op) == SUBREG) > + op = SUBREG_REG (op); > + if (GET_CODE (op) == REG) > + return true; > + if (GET_CODE (op) != CONST_INT) > + return false; > + > + return satisfies_constraint_Is15 (op); > +}) Maybe: (ior (match_operand 0 "register_operand") (and (match_operand 0 "const_int_operand") (match_test "satisfies_constraint_Is15 (op)"))) In general, defining predicates in terms of other predicates should lead to better insn-recog.c code (although I'm sure there are exceptions). > +(define_predicate "nds32_imm5u_operand" > + (match_operand 0 "immediate_operand") > +{ > + return satisfies_constraint_Iu05 (op); > +}) Using const_int_operand would be tighter than immediate_operand. > +(define_special_predicate "nds32_load_multiple_operation" > + (match_code "parallel") > +{ > + HOST_WIDE_INT count; > + int dest_regno; > + rtx src_addr; > + > + int i; > + rtx elt; > + > + /* Get the counts of elements in the parallel rtx. */ > + count = XVECLEN (op, 0); > + > + /* Pick up the first element. */ > + elt = XVECEXP (op, 0, 0); > + > + /* Perform some quick check for the first element in the parallel rtx. */ > + if (GET_CODE (elt) != SET > + || count <= 1 > + || count > 8 > + || GET_CODE (SET_DEST (elt)) != REG > + || GET_CODE (SET_SRC (elt)) != MEM) > + return false; > + > + dest_regno = REGNO (SET_DEST (elt)); > + src_addr = XEXP (SET_SRC (elt), 0); > + > + /* Perform detail check for each element. */ > + for (i = 0; i < count; i++) > + { > + elt = XVECEXP (op, 0, i); > + > + /* Refer to nds32.multiple.md for more information > + about following checking. */ > + if (GET_CODE (elt) != SET > + || GET_CODE (SET_DEST (elt)) != REG > + || GET_MODE (SET_DEST (elt)) != SImode > + || REGNO (SET_DEST (elt)) != (unsigned int)(dest_regno + i) > + || GET_CODE (SET_SRC (elt)) != MEM > + || GET_MODE (SET_SRC (elt)) != SImode > + || (GET_CODE (src_addr) != REG && GET_CODE (src_addr) != PLUS)) > + return false; > + } > + > + return true; > +}) > + > +(define_special_predicate "nds32_store_multiple_operation" > + (match_code "parallel") > +{ > + HOST_WIDE_INT count; > + int src_regno; > + rtx dest_addr; > + > + int i; > + rtx elt; > + > + /* Get the counts of elements in the parallel rtx. */ > + count = XVECLEN (op, 0); > + > + /* Pick up the first element. */ > + elt = XVECEXP (op, 0, 0); > + > + /* Perform some quick check for the first element in the parallel rtx. */ > + if (GET_CODE (elt) != SET > + || count <= 1 > + || count > 8 > + || GET_CODE (SET_SRC (elt)) != REG > + || GET_CODE (SET_DEST (elt)) != MEM) > + return false; > + > + src_regno = REGNO (SET_SRC (elt)); > + dest_addr = XEXP (SET_DEST (elt), 0); > + > + /* Perform detail check for each element. */ > + for (i = 0; i < count; i++) > + { > + elt = XVECEXP (op, 0, i); > + > + /* Refer to nds32.multiple.md for more information > + about following checking. */ > + if (GET_CODE (elt) != SET > + || GET_CODE (SET_SRC (elt)) != REG > + || GET_MODE (SET_SRC (elt)) != SImode > + || REGNO (SET_SRC (elt)) != (unsigned int)(src_regno + i) > + || GET_CODE (SET_DEST (elt)) != MEM > + || GET_MODE (SET_DEST (elt)) != SImode > + || (GET_CODE (dest_addr) != REG && GET_CODE (dest_addr) != PLUS)) > + return false; > + } > + > + return true; > +}) These are basically the same, but with SET_SRC and SET_DEST swapped. It'd be good to have a single nds32.c function that handles both, with a parameter to choose between loads and stores. Those are all the comments I had. Like I say, it seems very clean overall. Thanks, Richard