Hi,
The vtbx intrinsics are implemented in assembly without noting
that their tmp1 operand is early-clobber. This can, when the
wind blows the wrong way, result in us making a total mess of
the state of registers.
Fix by marking the required operands as early-clobber.
Regression tested against aarch64.exp with no problems.
OK?
Thanks,
James
---
2013-10-11 James Greenhalgh <[email protected]>
* config/aarch64/arm_neon.h
(vtbx<1,3>_<psu>8): Fix register constriants.
diff --git a/gcc/config/aarch64/arm_neon.h b/gcc/config/aarch64/arm_neon.h
index 482d7d0..f7c9db6 100644
--- a/gcc/config/aarch64/arm_neon.h
+++ b/gcc/config/aarch64/arm_neon.h
@@ -15636,7 +15636,7 @@ vtbx1_s8 (int8x8_t r, int8x8_t tab, int8x8_t idx)
"cmhs %0.8b, %3.8b, %0.8b\n\t"
"tbl %1.8b, {%2.16b}, %3.8b\n\t"
"bsl %0.8b, %4.8b, %1.8b\n\t"
- : "+w"(result), "=w"(tmp1)
+ : "+w"(result), "=&w"(tmp1)
: "w"(temp), "w"(idx), "w"(r)
: /* No clobbers */);
return result;
@@ -15652,7 +15652,7 @@ vtbx1_u8 (uint8x8_t r, uint8x8_t tab, uint8x8_t idx)
"cmhs %0.8b, %3.8b, %0.8b\n\t"
"tbl %1.8b, {%2.16b}, %3.8b\n\t"
"bsl %0.8b, %4.8b, %1.8b\n\t"
- : "+w"(result), "=w"(tmp1)
+ : "+w"(result), "=&w"(tmp1)
: "w"(temp), "w"(idx), "w"(r)
: /* No clobbers */);
return result;
@@ -15668,7 +15668,7 @@ vtbx1_p8 (poly8x8_t r, poly8x8_t tab, uint8x8_t idx)
"cmhs %0.8b, %3.8b, %0.8b\n\t"
"tbl %1.8b, {%2.16b}, %3.8b\n\t"
"bsl %0.8b, %4.8b, %1.8b\n\t"
- : "+w"(result), "=w"(tmp1)
+ : "+w"(result), "=&w"(tmp1)
: "w"(temp), "w"(idx), "w"(r)
: /* No clobbers */);
return result;
@@ -15723,7 +15723,7 @@ vtbx3_s8 (int8x8_t r, int8x8x3_t tab, int8x8_t idx)
"cmhs %0.8b, %3.8b, %0.8b\n\t"
"tbl %1.8b, {v16.16b - v17.16b}, %3.8b\n\t"
"bsl %0.8b, %4.8b, %1.8b\n\t"
- : "+w"(result), "=w"(tmp1)
+ : "+w"(result), "=&w"(tmp1)
: "Q"(temp), "w"(idx), "w"(r)
: "v16", "v17", "memory");
return result;
@@ -15742,7 +15742,7 @@ vtbx3_u8 (uint8x8_t r, uint8x8x3_t tab, uint8x8_t idx)
"cmhs %0.8b, %3.8b, %0.8b\n\t"
"tbl %1.8b, {v16.16b - v17.16b}, %3.8b\n\t"
"bsl %0.8b, %4.8b, %1.8b\n\t"
- : "+w"(result), "=w"(tmp1)
+ : "+w"(result), "=&w"(tmp1)
: "Q"(temp), "w"(idx), "w"(r)
: "v16", "v17", "memory");
return result;
@@ -15761,7 +15761,7 @@ vtbx3_p8 (poly8x8_t r, poly8x8x3_t tab, uint8x8_t idx)
"cmhs %0.8b, %3.8b, %0.8b\n\t"
"tbl %1.8b, {v16.16b - v17.16b}, %3.8b\n\t"
"bsl %0.8b, %4.8b, %1.8b\n\t"
- : "+w"(result), "=w"(tmp1)
+ : "+w"(result), "=&w"(tmp1)
: "Q"(temp), "w"(idx), "w"(r)
: "v16", "v17", "memory");
return result;