================ @@ -1959,9 +2064,12 @@ multiclass VCMLA_ROTS<string type, string lanety, string laneqty> { let isLaneQ = 1 in { // vcmla{ROT}_laneq + // ACLE specifies that the fp16 vcmla_#ROT_laneq variant has an immedaite range of 0 <= lane <= 1. + // fp16 is the only variant for which these two differ. + // https://developer.arm.com/documentation/ihi0073/latest/ + defvar getlanety = !if(!eq(type, "h"), lanety, laneqty); def : SOpInst<"vcmla" # ROT # "_laneq", "...QI", type, Op<(call "vcmla" # ROT, $p0, $p1, - (bitcast $p0, (dup_typed lanety, (call "vget_lane", (bitcast laneqty, $p2), $p3))))>>; - + (bitcast $p0, (dup_typed lanety, (call "vget_lane", (bitcast getlanety, $p2), $p3))))>>; ---------------- SpencerAbson wrote:
If we look at what this intrinsic compiles to, [FCMLA](https://developer.arm.com/documentation/ddi0596/2021-03/SIMD-FP-Instructions/FCMLA--by-element---Floating-point-Complex-Multiply-Accumulate--by-element--?lang=en), it looks like the reduced range (`0<=lane<=1`) is due to the fact that we must have `H=='0'` for this instruction to be defined when we use the `'01'` size specifier and `Q='0'` (what is required for this variant). This means that only bit-field `L` can be used to encode `<index>`. Thanks to @rsandifo-arm for pointing this out. https://github.com/llvm/llvm-project/pull/100278 _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits