[llvm] 9b016e3 - [ARM] Add early-clobber to MVE VCMLA.f32 (#114995)
via llvm-commits
llvm-commits at lists.llvm.org
Wed Nov 6 06:46:12 PST 2024
Author: Oliver Stannard
Date: 2024-11-06T14:46:08Z
New Revision: 9b016e3cb2859ef06f0301ebbc48df294b2356dc
URL: https://github.com/llvm/llvm-project/commit/9b016e3cb2859ef06f0301ebbc48df294b2356dc
DIFF: https://github.com/llvm/llvm-project/commit/9b016e3cb2859ef06f0301ebbc48df294b2356dc.diff
LOG: [ARM] Add early-clobber to MVE VCMLA.f32 (#114995)
This instruction (but not the f16 variant) cannot us the same register
for the output as either of the inputs, so it needs to be marked as
early-clobber.
Added:
Modified:
llvm/lib/Target/ARM/ARMInstrMVE.td
llvm/test/CodeGen/Thumb2/mve-vcmla.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/ARM/ARMInstrMVE.td b/llvm/lib/Target/ARM/ARMInstrMVE.td
index 8c8403ac58b080..22af599f4f0859 100644
--- a/llvm/lib/Target/ARM/ARMInstrMVE.td
+++ b/llvm/lib/Target/ARM/ARMInstrMVE.td
@@ -3583,10 +3583,10 @@ def ARMimmOneH: PatLeaf<(bitconvert (v8i16 (ARMvmovImm (i32 2620))))>; // 1.0 ha
defm MVE_VMULf32 : MVE_VMUL_fp_m<MVE_v4f32, ARMimmOneF>;
defm MVE_VMULf16 : MVE_VMUL_fp_m<MVE_v8f16, ARMimmOneH>;
-class MVE_VCMLA<string suffix, bits<2> size>
+class MVE_VCMLA<string suffix, bits<2> size, string cstr>
: MVEFloatArithNeon<"vcmla", suffix, size{1}, (outs MQPR:$Qd),
(ins MQPR:$Qd_src, MQPR:$Qn, MQPR:$Qm, complexrotateop:$rot),
- "$Qd, $Qn, $Qm, $rot", vpred_n, "$Qd = $Qd_src", size, []> {
+ "$Qd, $Qn, $Qm, $rot", vpred_n, "$Qd = $Qd_src"#cstr, size, []> {
bits<4> Qd;
bits<4> Qn;
bits<2> rot;
@@ -3603,8 +3603,8 @@ class MVE_VCMLA<string suffix, bits<2> size>
let Inst{4} = 0b0;
}
-multiclass MVE_VCMLA_m<MVEVectorVTInfo VTI> {
- def "" : MVE_VCMLA<VTI.Suffix, VTI.Size>;
+multiclass MVE_VCMLA_m<MVEVectorVTInfo VTI, string cstr=""> {
+ def "" : MVE_VCMLA<VTI.Suffix, VTI.Size, cstr>;
defvar Inst = !cast<Instruction>(NAME);
let Predicates = [HasMVEFloat] in {
@@ -3633,7 +3633,7 @@ multiclass MVE_VCMLA_m<MVEVectorVTInfo VTI> {
}
defm MVE_VCMLAf16 : MVE_VCMLA_m<MVE_v8f16>;
-defm MVE_VCMLAf32 : MVE_VCMLA_m<MVE_v4f32>;
+defm MVE_VCMLAf32 : MVE_VCMLA_m<MVE_v4f32, ", at earlyclobber $Qd">;
class MVE_VADDSUBFMA_fp<string iname, string suffix, bits<2> size, bit bit_4,
bit bit_8, bit bit_21, dag iops=(ins),
diff --git a/llvm/test/CodeGen/Thumb2/mve-vcmla.ll b/llvm/test/CodeGen/Thumb2/mve-vcmla.ll
index d1976472e39460..df542be73c58cb 100644
--- a/llvm/test/CodeGen/Thumb2/mve-vcmla.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-vcmla.ll
@@ -121,3 +121,27 @@ entry:
%res = fadd <4 x float> %d, %a
ret <4 x float> %res
}
+
+define arm_aapcs_vfpcc <8 x half> @same_register_f16(<8 x half> %a) {
+; CHECK-LABEL: same_register_f16:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vcmla.f16 q0, q0, q0, #0
+; CHECK-NEXT: bx lr
+entry:
+ %d = tail call <8 x half> @llvm.arm.mve.vcmlaq.v8f16(i32 0, <8 x half> zeroinitializer, <8 x half> %a, <8 x half> %a)
+ %res = fadd fast <8 x half> %d, %a
+ ret <8 x half> %res
+}
+
+define arm_aapcs_vfpcc <4 x float> @same_register_f32(<4 x float> %a) {
+; CHECK-LABEL: same_register_f32:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: vmov q1, q0
+; CHECK-NEXT: vcmla.f32 q1, q0, q0, #0
+; CHECK-NEXT: vmov q0, q1
+; CHECK-NEXT: bx lr
+entry:
+ %d = tail call <4 x float> @llvm.arm.mve.vcmlaq.v4f32(i32 0, <4 x float> zeroinitializer, <4 x float> %a, <4 x float> %a)
+ %res = fadd fast <4 x float> %d, %a
+ ret <4 x float> %res
+}
More information about the llvm-commits
mailing list