---
diff --git a/gcc/config/aarch64/aarch64-protos.h
b/gcc/config/aarch64/aarch64-protos.h
index cda2895..9beb289 100644
--- a/gcc/config/aarch64/aarch64-protos.h
+++ b/gcc/config/aarch64/aarch64-protos.h
@@ -358,6 +358,7 @@ bool aarch64_emit_approx_div (rtx, rtx, rtx);
bool aarch64_emit_approx_sqrt (rtx, rtx, bool);
void aarch64_expand_call (rtx, rtx, bool);
bool aarch64_expand_movmem (rtx *);
+void aarch64_expand_strlen (rtx *);
bool aarch64_float_const_zero_rtx_p (rtx);
bool aarch64_float_const_rtx_p (rtx);
bool aarch64_function_arg_regno_p (unsigned);
diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index 4b5183b..d12fb6b 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -16107,6 +16107,81 @@ aarch64_expand_movmem (rtx *operands)
return true;
}
+/* Emit code to perform a strlen.
+
+ OPERANDS[0] is the destination.
+ OPERANDS[1] is the string.
+ OPERANDS[2] is the char to search.
+ OPERANDS[3] is the alignment. */
+
+void aarch64_expand_strlen (rtx* operands) {
+ rtx result = operands[0];
+ rtx src = operands[1];
+ rtx loop_label = gen_label_rtx ();
+ rtx end_label = gen_label_rtx ();
+ rtx end_loop_label = gen_label_rtx ();
+ rtx preloop_label = gen_label_rtx ();
+ rtx str = gen_reg_rtx (DImode);
+ rtx addr = force_reg (DImode, XEXP (src, 0));
+ rtx start_addr = gen_reg_rtx(DImode);
+ rtx tmp1 = gen_reg_rtx (DImode);
+ rtx tmp2 = gen_reg_rtx (DImode);
+ rtx tmp3 = gen_reg_rtx (DImode);
+ rtx mask1 = gen_reg_rtx (DImode);
+ rtx mask2 = gen_reg_rtx (DImode);
+ rtx x;
+ rtx mem;
+
+ emit_insn (gen_rtx_SET (start_addr, addr));
+ emit_insn (gen_anddi3 (tmp1, addr, GEN_INT (4096 - 1)));
+ /* if less than 16 bytes left till the end of the page */
+ x = gen_rtx_GT (DImode, tmp1, GEN_INT (4096 - 16));
+ x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
+ gen_rtx_LABEL_REF (Pmode, preloop_label), pc_rtx);
+
+ emit_move_insn (str, gen_rtx_MEM (DImode, addr));
+ emit_insn (gen_rtx_SET (mask1, GEN_INT (0x0101010101010101)));
+ emit_insn (gen_rtx_SET (mask2, GEN_INT (0x7f7f7f7f7f7f7f7f)));
+
+ /* process the chunk */
+ emit_insn (gen_subdi3 (tmp1, str, mask1));
+ emit_insn (gen_iordi3 (tmp2, str, mask2));
+ emit_insn (gen_rtx_SET (tmp2, gen_rtx_NOT (DImode, tmp2)));
+ emit_insn (gen_anddi3 (tmp3, tmp1, tmp2));
+
+
+ /* if NULL found jump to calculate it's exact position */
+ x = gen_rtx_NE (DImode, tmp3, GEN_INT (0));
+ x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
+ gen_rtx_LABEL_REF (Pmode, end_loop_label), pc_rtx);
+ emit_jump_insn (gen_rtx_SET (pc_rtx, x));
+
+ emit_insn (gen_adddi3 (addr, addr, GEN_INT (8)));
+ emit_label (preloop_label);
+ mem = gen_rtx_POST_MODIFY (DImode, addr, plus_constant (DImode, addr, 1));
+
+ /* simple byte loop */
+ emit_label (loop_label);
+ emit_move_insn (str, gen_rtx_ZERO_EXTEND (DImode, gen_rtx_MEM (QImode,
mem)));
+ x = gen_rtx_NE (SImode, str, GEN_INT(0));
+ x = gen_rtx_IF_THEN_ELSE (VOIDmode, x, gen_rtx_LABEL_REF (Pmode,
loop_label), pc_rtx);
+ emit_jump_insn (gen_rtx_SET (pc_rtx, x));
+
+ emit_insn (gen_subdi3 (result, addr, start_addr));
+ /* adjusting after the last post-decrement */
+ emit_insn (gen_adddi3 (result, result, GEN_INT (-1)));
+ emit_jump_insn (gen_jump (end_label));
+ emit_barrier ();
+
+ emit_label (end_loop_label);
+ emit_insn (gen_bswapdi2 (tmp3, tmp3));
+ emit_insn (gen_clzdi2 (tmp3, tmp3));
+ emit_insn (gen_ashrdi3 (tmp3, tmp3, GEN_INT (3)));
+ emit_move_insn (result, tmp3);
+
+ emit_label(end_label);
+}
+
/* Split a DImode store of a CONST_INT SRC to MEM DST as two
SImode stores. Handle the case when the constant has identical
bottom and top halves. This is beneficial when the two stores can be
diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
index 10fcde6..7c60b69 100644
--- a/gcc/config/aarch64/aarch64.md
+++ b/gcc/config/aarch64/aarch64.md
@@ -189,6 +189,7 @@
UNSPEC_CLASTB
UNSPEC_FADDA
UNSPEC_REV_SUBREG
+ UNSPEC_BUILTIN_STRLEN
])
(define_c_enum "unspecv" [
@@ -395,6 +396,19 @@
[(set_attr "type" "fccmp<s>")]
)
+(define_expand "strlen<mode>"
+ [(set (match_operand:P 0 "register_operand")
+ (unspec:P [(match_operand:BLK 1 "memory_operand")
+ (match_operand 2 "immediate_operand")
+ (match_operand 3 "immediate_operand")]
+ UNSPEC_BUILTIN_STRLEN))]
+ ""
+{
+ aarch64_expand_strlen (operands);
+ DONE;
+})
+
+
;; Expansion of signed mod by a power of 2 using CSNEG.
;; For x0 % n where n is a power of 2 produce:
;; negs x1, x0