[llvm] [ARM][NEON] Add constraint to vld2 Odd/Even Pseudo instructions. (PR #79287)
via llvm-commits
llvm-commits at lists.llvm.org
Wed Jan 24 05:17:20 PST 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-arm
Author: Alfie Richards (AlfieRichardsArm)
<details>
<summary>Changes</summary>
This ensures the odd/even pseudo instructions are allocated to the same register range.
This fixes #<!-- -->71763 (https://github.com/llvm/llvm-project/issues/71763)
---
Full diff: https://github.com/llvm/llvm-project/pull/79287.diff
4 Files Affected:
- (modified) llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp (+3-6)
- (modified) llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp (-5)
- (modified) llvm/lib/Target/ARM/ARMInstrNEON.td (+26-12)
- (modified) llvm/test/CodeGen/ARM/bf16-intrinsics-ld-st.ll (+1-1)
``````````diff
diff --git a/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp b/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp
index 2f9236bb977fc9..f0b69b0b09809f 100644
--- a/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp
+++ b/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp
@@ -640,12 +640,9 @@ void ARMExpandPseudo::ExpandVLD(MachineBasicBlock::iterator &MBBI) {
// has an extra operand that is a use of the super-register. Record the
// operand index and skip over it.
unsigned SrcOpIdx = 0;
- if (!IsVLD2DUP) {
- if (RegSpc == EvenDblSpc || RegSpc == OddDblSpc ||
- RegSpc == SingleLowSpc || RegSpc == SingleHighQSpc ||
- RegSpc == SingleHighTSpc)
- SrcOpIdx = OpIdx++;
- }
+ if (RegSpc == EvenDblSpc || RegSpc == OddDblSpc || RegSpc == SingleLowSpc ||
+ RegSpc == SingleHighQSpc || RegSpc == SingleHighTSpc)
+ SrcOpIdx = OpIdx++;
// Copy the predicate operands.
MIB.add(MI.getOperand(OpIdx++));
diff --git a/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp b/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp
index e99ee299412a5f..20dd3e7baf8498 100644
--- a/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp
+++ b/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp
@@ -3032,11 +3032,6 @@ void ARMDAGToDAGISel::SelectVLDDup(SDNode *N, bool IsIntrinsic,
}
if (is64BitVector || NumVecs == 1) {
// Double registers and VLD1 quad registers are directly supported.
- } else if (NumVecs == 2) {
- const SDValue OpsA[] = {MemAddr, Align, Pred, Reg0, Chain};
- SDNode *VLdA = CurDAG->getMachineNode(QOpcodes0[OpcodeIndex], dl, ResTy,
- MVT::Other, OpsA);
- Chain = SDValue(VLdA, 1);
} else {
SDValue ImplDef = SDValue(
CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, ResTy), 0);
diff --git a/llvm/lib/Target/ARM/ARMInstrNEON.td b/llvm/lib/Target/ARM/ARMInstrNEON.td
index f31e1e9f97892f..e7cf8b4657b2c8 100644
--- a/llvm/lib/Target/ARM/ARMInstrNEON.td
+++ b/llvm/lib/Target/ARM/ARMInstrNEON.td
@@ -1491,12 +1491,26 @@ def VLD2DUPd16x2 : VLD2DUP<{0,1,1,?}, "16", VecListDPairSpacedAllLanes,
def VLD2DUPd32x2 : VLD2DUP<{1,0,1,?}, "32", VecListDPairSpacedAllLanes,
addrmode6dupalign64>;
-def VLD2DUPq8EvenPseudo : VLDQQPseudo<IIC_VLD2dup>, Sched<[WriteVLD2]>;
-def VLD2DUPq8OddPseudo : VLDQQPseudo<IIC_VLD2dup>, Sched<[WriteVLD2]>;
-def VLD2DUPq16EvenPseudo : VLDQQPseudo<IIC_VLD2dup>, Sched<[WriteVLD2]>;
-def VLD2DUPq16OddPseudo : VLDQQPseudo<IIC_VLD2dup>, Sched<[WriteVLD2]>;
-def VLD2DUPq32EvenPseudo : VLDQQPseudo<IIC_VLD2dup>, Sched<[WriteVLD2]>;
-def VLD2DUPq32OddPseudo : VLDQQPseudo<IIC_VLD2dup>, Sched<[WriteVLD2]>;
+// Duplicate of VLDQQPseudo but with a constraint variable
+// to ensure the odd and even lanes use the same register range
+class VLDQQPseudoConstrained<InstrItinClass itin>
+ : PseudoNLdSt<(outs QQPR:$dst), (ins addrmode6:$addr, QQPR: $src), itin,
+ "$src = $dst">;
+class VLDQQWBPseudoConstrained<InstrItinClass itin>
+ : PseudoNLdSt<(outs QQPR:$dst, GPR:$wb),
+ (ins addrmode6:$addr, am6offset:$offset, QQPR: $src), itin,
+ "$addr.addr = $wb, $src = $dst">;
+class VLDQQWBfixedPseudoConstrained<InstrItinClass itin>
+ : PseudoNLdSt<(outs QQPR:$dst, GPR:$wb),
+ (ins addrmode6:$addr, QQPR: $src), itin,
+ "$addr.addr = $wb, $src = $dst">;
+
+def VLD2DUPq8EvenPseudo : VLDQQPseudoConstrained<IIC_VLD2dup>, Sched<[WriteVLD2]>;
+def VLD2DUPq8OddPseudo : VLDQQPseudoConstrained<IIC_VLD2dup>, Sched<[WriteVLD2]>;
+def VLD2DUPq16EvenPseudo : VLDQQPseudoConstrained<IIC_VLD2dup>, Sched<[WriteVLD2]>;
+def VLD2DUPq16OddPseudo : VLDQQPseudoConstrained<IIC_VLD2dup>, Sched<[WriteVLD2]>;
+def VLD2DUPq32EvenPseudo : VLDQQPseudoConstrained<IIC_VLD2dup>, Sched<[WriteVLD2]>;
+def VLD2DUPq32OddPseudo : VLDQQPseudoConstrained<IIC_VLD2dup>, Sched<[WriteVLD2]>;
// ...with address register writeback:
multiclass VLD2DUPWB<bits<4> op7_4, string Dt, RegisterOperand VdTy,
@@ -1534,12 +1548,12 @@ defm VLD2DUPd16x2wb : VLD2DUPWB<{0,1,1,?}, "16", VecListDPairSpacedAllLanes,
defm VLD2DUPd32x2wb : VLD2DUPWB<{1,0,1,?}, "32", VecListDPairSpacedAllLanes,
addrmode6dupalign64>;
-def VLD2DUPq8OddPseudoWB_fixed : VLDQQWBfixedPseudo<IIC_VLD2dup>, Sched<[WriteVLD2]>;
-def VLD2DUPq16OddPseudoWB_fixed : VLDQQWBfixedPseudo<IIC_VLD2dup>, Sched<[WriteVLD2]>;
-def VLD2DUPq32OddPseudoWB_fixed : VLDQQWBfixedPseudo<IIC_VLD2dup>, Sched<[WriteVLD2]>;
-def VLD2DUPq8OddPseudoWB_register : VLDQQWBPseudo<IIC_VLD2dup>, Sched<[WriteVLD2]>;
-def VLD2DUPq16OddPseudoWB_register : VLDQQWBPseudo<IIC_VLD2dup>, Sched<[WriteVLD2]>;
-def VLD2DUPq32OddPseudoWB_register : VLDQQWBPseudo<IIC_VLD2dup>, Sched<[WriteVLD2]>;
+def VLD2DUPq8OddPseudoWB_fixed : VLDQQWBfixedPseudoConstrained<IIC_VLD2dup>, Sched<[WriteVLD2]>;
+def VLD2DUPq16OddPseudoWB_fixed : VLDQQWBfixedPseudoConstrained<IIC_VLD2dup>, Sched<[WriteVLD2]>;
+def VLD2DUPq32OddPseudoWB_fixed : VLDQQWBfixedPseudoConstrained<IIC_VLD2dup>, Sched<[WriteVLD2]>;
+def VLD2DUPq8OddPseudoWB_register : VLDQQWBPseudoConstrained<IIC_VLD2dup>, Sched<[WriteVLD2]>;
+def VLD2DUPq16OddPseudoWB_register : VLDQQWBPseudoConstrained<IIC_VLD2dup>, Sched<[WriteVLD2]>;
+def VLD2DUPq32OddPseudoWB_register : VLDQQWBPseudoConstrained<IIC_VLD2dup>, Sched<[WriteVLD2]>;
// VLD3DUP : Vector Load (single 3-element structure to all lanes)
class VLD3DUP<bits<4> op7_4, string Dt>
diff --git a/llvm/test/CodeGen/ARM/bf16-intrinsics-ld-st.ll b/llvm/test/CodeGen/ARM/bf16-intrinsics-ld-st.ll
index cccbdd04357650..e49128f53b1157 100644
--- a/llvm/test/CodeGen/ARM/bf16-intrinsics-ld-st.ll
+++ b/llvm/test/CodeGen/ARM/bf16-intrinsics-ld-st.ll
@@ -488,7 +488,7 @@ entry:
define arm_aapcs_vfpcc [2 x <4 x i32>] @test_vld2q_dup_bf16(ptr %ptr) {
; CHECK-LABEL: test_vld2q_dup_bf16:
; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: vld2.16 {d16[], d18[]}, [r0]
+; CHECK-NEXT: vld2.16 {d0[], d2[]}, [r0]
; CHECK-NEXT: vld2.16 {d1[], d3[]}, [r0]
; CHECK-NEXT: bx lr
entry:
``````````
</details>
https://github.com/llvm/llvm-project/pull/79287
More information about the llvm-commits
mailing list