================
@@ -1959,9 +2064,12 @@ multiclass VCMLA_ROTS<string type, string lanety, string
laneqty> {
let isLaneQ = 1 in {
// vcmla{ROT}_laneq
+ // ACLE specifies that the fp16 vcmla_#ROT_laneq variant has an
immedaite range of 0 <= lane <= 1.
+ // fp16 is the only variant for which these two differ.
+ // https://developer.arm.com/documentation/ihi0073/latest/
+ defvar getlanety = !if(!eq(type, "h"), lanety, laneqty);
def : SOpInst<"vcmla" # ROT # "_laneq", "...QI", type, Op<(call "vcmla"
# ROT, $p0, $p1,
- (bitcast $p0, (dup_typed lanety, (call "vget_lane", (bitcast
laneqty, $p2), $p3))))>>;
-
+ (bitcast $p0, (dup_typed lanety, (call "vget_lane", (bitcast
getlanety, $p2), $p3))))>>;
----------------
Lukacma wrote:
I am not sure I follow what you trying to say there. I think, it might be
because your link to FCMLA is for the old spec and there may be some
differences in instruction spec compared to the latest one. But if you look at
the most recent spec for this
[instruction](https://developer.arm.com/documentation/ddi0602/2024-06/SVE-Instructions/FCMLA--indexed---Floating-point-complex-multiply-add-by-indexed-values-with-rotate-),
it says quite clearly that immediate range for half-precision variant is from
0 to 3
https://github.com/llvm/llvm-project/pull/100278
_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits