[llvm] 44843e2 - [ARM][NEON] Combine base address updates for vld1x intrinsics
Kristina Bessonova via llvm-commits
llvm-commits at lists.llvm.org
Tue May 25 02:07:14 PDT 2021
Author: Kristina Bessonova
Date: 2021-05-25T11:06:39+02:00
New Revision: 44843e2a046ef9959166e53d6c0cfb3b286fd4ce
URL: https://github.com/llvm/llvm-project/commit/44843e2a046ef9959166e53d6c0cfb3b286fd4ce
DIFF: https://github.com/llvm/llvm-project/commit/44843e2a046ef9959166e53d6c0cfb3b286fd4ce.diff
LOG: [ARM][NEON] Combine base address updates for vld1x intrinsics
Reviewed By: dmgreen
Differential Revision: https://reviews.llvm.org/D102855
Added:
Modified:
llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp
llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp
llvm/lib/Target/ARM/ARMISelLowering.cpp
llvm/lib/Target/ARM/ARMISelLowering.h
llvm/lib/Target/ARM/ARMInstrNEON.td
llvm/test/CodeGen/ARM/arm-vld1.ll
Removed:
llvm/test/CodeGen/ARM/pr45824.ll
################################################################################
diff --git a/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp b/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp
index 5fd4ed94b5989..deea297c02eac 100644
--- a/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp
+++ b/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp
@@ -190,31 +190,58 @@ static const NEONLdStTableEntry NEONLdStTable[] = {
{ ARM::VLD1LNq8Pseudo_UPD, ARM::VLD1LNd8_UPD, true, true, true, EvenDblSpc, 1, 8 ,true},
{ ARM::VLD1d16QPseudo, ARM::VLD1d16Q, true, false, false, SingleSpc, 4, 4 ,false},
+{ ARM::VLD1d16QPseudoWB_fixed, ARM::VLD1d16Qwb_fixed, true, true, false, SingleSpc, 4, 4 ,false},
+{ ARM::VLD1d16QPseudoWB_register, ARM::VLD1d16Qwb_register, true, true, true, SingleSpc, 4, 4 ,false},
{ ARM::VLD1d16TPseudo, ARM::VLD1d16T, true, false, false, SingleSpc, 3, 4 ,false},
+{ ARM::VLD1d16TPseudoWB_fixed, ARM::VLD1d16Twb_fixed, true, true, false, SingleSpc, 3, 4 ,false},
+{ ARM::VLD1d16TPseudoWB_register, ARM::VLD1d16Twb_register, true, true, true, SingleSpc, 3, 4 ,false},
+
{ ARM::VLD1d32QPseudo, ARM::VLD1d32Q, true, false, false, SingleSpc, 4, 2 ,false},
+{ ARM::VLD1d32QPseudoWB_fixed, ARM::VLD1d32Qwb_fixed, true, true, false, SingleSpc, 4, 2 ,false},
+{ ARM::VLD1d32QPseudoWB_register, ARM::VLD1d32Qwb_register, true, true, true, SingleSpc, 4, 2 ,false},
{ ARM::VLD1d32TPseudo, ARM::VLD1d32T, true, false, false, SingleSpc, 3, 2 ,false},
+{ ARM::VLD1d32TPseudoWB_fixed, ARM::VLD1d32Twb_fixed, true, true, false, SingleSpc, 3, 2 ,false},
+{ ARM::VLD1d32TPseudoWB_register, ARM::VLD1d32Twb_register, true, true, true, SingleSpc, 3, 2 ,false},
+
{ ARM::VLD1d64QPseudo, ARM::VLD1d64Q, true, false, false, SingleSpc, 4, 1 ,false},
{ ARM::VLD1d64QPseudoWB_fixed, ARM::VLD1d64Qwb_fixed, true, true, false, SingleSpc, 4, 1 ,false},
{ ARM::VLD1d64QPseudoWB_register, ARM::VLD1d64Qwb_register, true, true, true, SingleSpc, 4, 1 ,false},
{ ARM::VLD1d64TPseudo, ARM::VLD1d64T, true, false, false, SingleSpc, 3, 1 ,false},
{ ARM::VLD1d64TPseudoWB_fixed, ARM::VLD1d64Twb_fixed, true, true, false, SingleSpc, 3, 1 ,false},
{ ARM::VLD1d64TPseudoWB_register, ARM::VLD1d64Twb_register, true, true, true, SingleSpc, 3, 1 ,false},
+
{ ARM::VLD1d8QPseudo, ARM::VLD1d8Q, true, false, false, SingleSpc, 4, 8 ,false},
+{ ARM::VLD1d8QPseudoWB_fixed, ARM::VLD1d8Qwb_fixed, true, true, false, SingleSpc, 4, 8 ,false},
+{ ARM::VLD1d8QPseudoWB_register, ARM::VLD1d8Qwb_register, true, true, true, SingleSpc, 4, 8 ,false},
{ ARM::VLD1d8TPseudo, ARM::VLD1d8T, true, false, false, SingleSpc, 3, 8 ,false},
+{ ARM::VLD1d8TPseudoWB_fixed, ARM::VLD1d8Twb_fixed, true, true, false, SingleSpc, 3, 8 ,false},
+{ ARM::VLD1d8TPseudoWB_register, ARM::VLD1d8Twb_register, true, true, true, SingleSpc, 3, 8 ,false},
+
{ ARM::VLD1q16HighQPseudo, ARM::VLD1d16Q, true, false, false, SingleHighQSpc, 4, 4 ,false},
+{ ARM::VLD1q16HighQPseudo_UPD, ARM::VLD1d16Qwb_fixed, true, true, true, SingleHighQSpc, 4, 4 ,false},
{ ARM::VLD1q16HighTPseudo, ARM::VLD1d16T, true, false, false, SingleHighTSpc, 3, 4 ,false},
+{ ARM::VLD1q16HighTPseudo_UPD, ARM::VLD1d16Twb_fixed, true, true, true, SingleHighTSpc, 3, 4 ,false},
{ ARM::VLD1q16LowQPseudo_UPD, ARM::VLD1d16Qwb_fixed, true, true, true, SingleLowSpc, 4, 4 ,false},
{ ARM::VLD1q16LowTPseudo_UPD, ARM::VLD1d16Twb_fixed, true, true, true, SingleLowSpc, 3, 4 ,false},
+
{ ARM::VLD1q32HighQPseudo, ARM::VLD1d32Q, true, false, false, SingleHighQSpc, 4, 2 ,false},
+{ ARM::VLD1q32HighQPseudo_UPD, ARM::VLD1d32Qwb_fixed, true, true, true, SingleHighQSpc, 4, 2 ,false},
{ ARM::VLD1q32HighTPseudo, ARM::VLD1d32T, true, false, false, SingleHighTSpc, 3, 2 ,false},
+{ ARM::VLD1q32HighTPseudo_UPD, ARM::VLD1d32Twb_fixed, true, true, true, SingleHighTSpc, 3, 2 ,false},
{ ARM::VLD1q32LowQPseudo_UPD, ARM::VLD1d32Qwb_fixed, true, true, true, SingleLowSpc, 4, 2 ,false},
{ ARM::VLD1q32LowTPseudo_UPD, ARM::VLD1d32Twb_fixed, true, true, true, SingleLowSpc, 3, 2 ,false},
+
{ ARM::VLD1q64HighQPseudo, ARM::VLD1d64Q, true, false, false, SingleHighQSpc, 4, 1 ,false},
+{ ARM::VLD1q64HighQPseudo_UPD, ARM::VLD1d64Qwb_fixed, true, true, true, SingleHighQSpc, 4, 1 ,false},
{ ARM::VLD1q64HighTPseudo, ARM::VLD1d64T, true, false, false, SingleHighTSpc, 3, 1 ,false},
+{ ARM::VLD1q64HighTPseudo_UPD, ARM::VLD1d64Twb_fixed, true, true, true, SingleHighTSpc, 3, 1 ,false},
{ ARM::VLD1q64LowQPseudo_UPD, ARM::VLD1d64Qwb_fixed, true, true, true, SingleLowSpc, 4, 1 ,false},
{ ARM::VLD1q64LowTPseudo_UPD, ARM::VLD1d64Twb_fixed, true, true, true, SingleLowSpc, 3, 1 ,false},
+
{ ARM::VLD1q8HighQPseudo, ARM::VLD1d8Q, true, false, false, SingleHighQSpc, 4, 8 ,false},
+{ ARM::VLD1q8HighQPseudo_UPD, ARM::VLD1d8Qwb_fixed, true, true, true, SingleHighQSpc, 4, 8 ,false},
{ ARM::VLD1q8HighTPseudo, ARM::VLD1d8T, true, false, false, SingleHighTSpc, 3, 8 ,false},
+{ ARM::VLD1q8HighTPseudo_UPD, ARM::VLD1d8Twb_fixed, true, true, true, SingleHighTSpc, 3, 8 ,false},
{ ARM::VLD1q8LowQPseudo_UPD, ARM::VLD1d8Qwb_fixed, true, true, true, SingleLowSpc, 4, 8 ,false},
{ ARM::VLD1q8LowTPseudo_UPD, ARM::VLD1d8Twb_fixed, true, true, true, SingleLowSpc, 3, 8 ,false},
@@ -2578,8 +2605,14 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
case ARM::VLD3d16Pseudo:
case ARM::VLD3d32Pseudo:
case ARM::VLD1d8TPseudo:
+ case ARM::VLD1d8TPseudoWB_fixed:
+ case ARM::VLD1d8TPseudoWB_register:
case ARM::VLD1d16TPseudo:
+ case ARM::VLD1d16TPseudoWB_fixed:
+ case ARM::VLD1d16TPseudoWB_register:
case ARM::VLD1d32TPseudo:
+ case ARM::VLD1d32TPseudoWB_fixed:
+ case ARM::VLD1d32TPseudoWB_register:
case ARM::VLD1d64TPseudo:
case ARM::VLD1d64TPseudoWB_fixed:
case ARM::VLD1d64TPseudoWB_register:
@@ -2599,26 +2632,40 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
case ARM::VLD4d16Pseudo:
case ARM::VLD4d32Pseudo:
case ARM::VLD1d8QPseudo:
+ case ARM::VLD1d8QPseudoWB_fixed:
+ case ARM::VLD1d8QPseudoWB_register:
case ARM::VLD1d16QPseudo:
+ case ARM::VLD1d16QPseudoWB_fixed:
+ case ARM::VLD1d16QPseudoWB_register:
case ARM::VLD1d32QPseudo:
+ case ARM::VLD1d32QPseudoWB_fixed:
+ case ARM::VLD1d32QPseudoWB_register:
case ARM::VLD1d64QPseudo:
case ARM::VLD1d64QPseudoWB_fixed:
case ARM::VLD1d64QPseudoWB_register:
case ARM::VLD1q8HighQPseudo:
+ case ARM::VLD1q8HighQPseudo_UPD:
case ARM::VLD1q8LowQPseudo_UPD:
case ARM::VLD1q8HighTPseudo:
+ case ARM::VLD1q8HighTPseudo_UPD:
case ARM::VLD1q8LowTPseudo_UPD:
case ARM::VLD1q16HighQPseudo:
+ case ARM::VLD1q16HighQPseudo_UPD:
case ARM::VLD1q16LowQPseudo_UPD:
case ARM::VLD1q16HighTPseudo:
+ case ARM::VLD1q16HighTPseudo_UPD:
case ARM::VLD1q16LowTPseudo_UPD:
case ARM::VLD1q32HighQPseudo:
+ case ARM::VLD1q32HighQPseudo_UPD:
case ARM::VLD1q32LowQPseudo_UPD:
case ARM::VLD1q32HighTPseudo:
+ case ARM::VLD1q32HighTPseudo_UPD:
case ARM::VLD1q32LowTPseudo_UPD:
case ARM::VLD1q64HighQPseudo:
+ case ARM::VLD1q64HighQPseudo_UPD:
case ARM::VLD1q64LowQPseudo_UPD:
case ARM::VLD1q64HighTPseudo:
+ case ARM::VLD1q64HighTPseudo_UPD:
case ARM::VLD1q64LowTPseudo_UPD:
case ARM::VLD4d8Pseudo_UPD:
case ARM::VLD4d16Pseudo_UPD:
diff --git a/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp b/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp
index c5cf9d613fafc..e737b648017e0 100644
--- a/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp
+++ b/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp
@@ -1945,7 +1945,13 @@ static bool isVLDfixed(unsigned Opc)
case ARM::VLD1d64Qwb_fixed : return true;
case ARM::VLD1d32wb_fixed : return true;
case ARM::VLD1d64wb_fixed : return true;
+ case ARM::VLD1d8TPseudoWB_fixed : return true;
+ case ARM::VLD1d16TPseudoWB_fixed : return true;
+ case ARM::VLD1d32TPseudoWB_fixed : return true;
case ARM::VLD1d64TPseudoWB_fixed : return true;
+ case ARM::VLD1d8QPseudoWB_fixed : return true;
+ case ARM::VLD1d16QPseudoWB_fixed : return true;
+ case ARM::VLD1d32QPseudoWB_fixed : return true;
case ARM::VLD1d64QPseudoWB_fixed : return true;
case ARM::VLD1q8wb_fixed : return true;
case ARM::VLD1q16wb_fixed : return true;
@@ -2015,7 +2021,13 @@ static unsigned getVLDSTRegisterUpdateOpcode(unsigned Opc) {
case ARM::VLD1q64wb_fixed: return ARM::VLD1q64wb_register;
case ARM::VLD1d64Twb_fixed: return ARM::VLD1d64Twb_register;
case ARM::VLD1d64Qwb_fixed: return ARM::VLD1d64Qwb_register;
+ case ARM::VLD1d8TPseudoWB_fixed: return ARM::VLD1d8TPseudoWB_register;
+ case ARM::VLD1d16TPseudoWB_fixed: return ARM::VLD1d16TPseudoWB_register;
+ case ARM::VLD1d32TPseudoWB_fixed: return ARM::VLD1d32TPseudoWB_register;
case ARM::VLD1d64TPseudoWB_fixed: return ARM::VLD1d64TPseudoWB_register;
+ case ARM::VLD1d8QPseudoWB_fixed: return ARM::VLD1d8QPseudoWB_register;
+ case ARM::VLD1d16QPseudoWB_fixed: return ARM::VLD1d16QPseudoWB_register;
+ case ARM::VLD1d32QPseudoWB_fixed: return ARM::VLD1d32QPseudoWB_register;
case ARM::VLD1d64QPseudoWB_fixed: return ARM::VLD1d64QPseudoWB_register;
case ARM::VLD1DUPd8wb_fixed : return ARM::VLD1DUPd8wb_register;
case ARM::VLD1DUPd16wb_fixed : return ARM::VLD1DUPd16wb_register;
@@ -4281,6 +4293,54 @@ void ARMDAGToDAGISel::Select(SDNode *N) {
return;
}
+ case ARMISD::VLD1x2_UPD: {
+ if (Subtarget->hasNEON()) {
+ static const uint16_t DOpcodes[] = {
+ ARM::VLD1q8wb_fixed, ARM::VLD1q16wb_fixed, ARM::VLD1q32wb_fixed,
+ ARM::VLD1q64wb_fixed};
+ static const uint16_t QOpcodes[] = {
+ ARM::VLD1d8QPseudoWB_fixed, ARM::VLD1d16QPseudoWB_fixed,
+ ARM::VLD1d32QPseudoWB_fixed, ARM::VLD1d64QPseudoWB_fixed};
+ SelectVLD(N, true, 2, DOpcodes, QOpcodes, nullptr);
+ return;
+ }
+ break;
+ }
+
+ case ARMISD::VLD1x3_UPD: {
+ if (Subtarget->hasNEON()) {
+ static const uint16_t DOpcodes[] = {
+ ARM::VLD1d8TPseudoWB_fixed, ARM::VLD1d16TPseudoWB_fixed,
+ ARM::VLD1d32TPseudoWB_fixed, ARM::VLD1d64TPseudoWB_fixed};
+ static const uint16_t QOpcodes0[] = {
+ ARM::VLD1q8LowTPseudo_UPD, ARM::VLD1q16LowTPseudo_UPD,
+ ARM::VLD1q32LowTPseudo_UPD, ARM::VLD1q64LowTPseudo_UPD};
+ static const uint16_t QOpcodes1[] = {
+ ARM::VLD1q8HighTPseudo_UPD, ARM::VLD1q16HighTPseudo_UPD,
+ ARM::VLD1q32HighTPseudo_UPD, ARM::VLD1q64HighTPseudo_UPD};
+ SelectVLD(N, true, 3, DOpcodes, QOpcodes0, QOpcodes1);
+ return;
+ }
+ break;
+ }
+
+ case ARMISD::VLD1x4_UPD: {
+ if (Subtarget->hasNEON()) {
+ static const uint16_t DOpcodes[] = {
+ ARM::VLD1d8QPseudoWB_fixed, ARM::VLD1d16QPseudoWB_fixed,
+ ARM::VLD1d32QPseudoWB_fixed, ARM::VLD1d64QPseudoWB_fixed};
+ static const uint16_t QOpcodes0[] = {
+ ARM::VLD1q8LowQPseudo_UPD, ARM::VLD1q16LowQPseudo_UPD,
+ ARM::VLD1q32LowQPseudo_UPD, ARM::VLD1q64LowQPseudo_UPD};
+ static const uint16_t QOpcodes1[] = {
+ ARM::VLD1q8HighQPseudo_UPD, ARM::VLD1q16HighQPseudo_UPD,
+ ARM::VLD1q32HighQPseudo_UPD, ARM::VLD1q64HighQPseudo_UPD};
+ SelectVLD(N, true, 4, DOpcodes, QOpcodes0, QOpcodes1);
+ return;
+ }
+ break;
+ }
+
case ARMISD::VLD2LN_UPD: {
static const uint16_t DOpcodes[] = { ARM::VLD2LNd8Pseudo_UPD,
ARM::VLD2LNd16Pseudo_UPD,
diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp
index 38c6c32a16097..2ac09ac0ea691 100644
--- a/llvm/lib/Target/ARM/ARMISelLowering.cpp
+++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp
@@ -1781,6 +1781,9 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const {
MAKE_CASE(ARMISD::VLD2_UPD)
MAKE_CASE(ARMISD::VLD3_UPD)
MAKE_CASE(ARMISD::VLD4_UPD)
+ MAKE_CASE(ARMISD::VLD1x2_UPD)
+ MAKE_CASE(ARMISD::VLD1x3_UPD)
+ MAKE_CASE(ARMISD::VLD1x4_UPD)
MAKE_CASE(ARMISD::VLD2LN_UPD)
MAKE_CASE(ARMISD::VLD3LN_UPD)
MAKE_CASE(ARMISD::VLD4LN_UPD)
@@ -14625,7 +14628,8 @@ static SDValue CombineBaseUpdate(SDNode *N,
// Find the new opcode for the updating load/store.
bool isLoadOp = true;
bool isLaneOp = false;
- // Workaround for vst1x and vld1x which do not have alignment operand.
+ // Workaround for vst1x and vld1x intrinsics which do not have alignment
+ // as an operand.
bool hasAlignment = true;
unsigned NewOpc = 0;
unsigned NumVecs = 0;
@@ -14641,13 +14645,16 @@ static SDValue CombineBaseUpdate(SDNode *N,
NumVecs = 3; break;
case Intrinsic::arm_neon_vld4: NewOpc = ARMISD::VLD4_UPD;
NumVecs = 4; break;
- case Intrinsic::arm_neon_vld1x2:
- case Intrinsic::arm_neon_vld1x3:
- case Intrinsic::arm_neon_vld1x4:
+ case Intrinsic::arm_neon_vld1x2: NewOpc = ARMISD::VLD1x2_UPD;
+ NumVecs = 2; hasAlignment = false; break;
+ case Intrinsic::arm_neon_vld1x3: NewOpc = ARMISD::VLD1x3_UPD;
+ NumVecs = 3; hasAlignment = false; break;
+ case Intrinsic::arm_neon_vld1x4: NewOpc = ARMISD::VLD1x4_UPD;
+ NumVecs = 4; hasAlignment = false; break;
case Intrinsic::arm_neon_vld2dup:
case Intrinsic::arm_neon_vld3dup:
case Intrinsic::arm_neon_vld4dup:
- // TODO: Support updating VLD1x and VLDxDUP nodes. For now, we just skip
+ // TODO: Support updating VLDxDUP nodes. For now, we just skip
// combining base updates for such intrinsics.
continue;
case Intrinsic::arm_neon_vld2lane: NewOpc = ARMISD::VLD2LN_UPD;
diff --git a/llvm/lib/Target/ARM/ARMISelLowering.h b/llvm/lib/Target/ARM/ARMISelLowering.h
index b07162cd7969b..80e4e12c702e6 100644
--- a/llvm/lib/Target/ARM/ARMISelLowering.h
+++ b/llvm/lib/Target/ARM/ARMISelLowering.h
@@ -330,6 +330,9 @@ class VectorType;
VLD2DUP_UPD,
VLD3DUP_UPD,
VLD4DUP_UPD,
+ VLD1x2_UPD,
+ VLD1x3_UPD,
+ VLD1x4_UPD,
// NEON stores with post-increment base updates:
VST1_UPD,
diff --git a/llvm/lib/Target/ARM/ARMInstrNEON.td b/llvm/lib/Target/ARM/ARMInstrNEON.td
index a34865c58bff2..ba637be05d389 100644
--- a/llvm/lib/Target/ARM/ARMInstrNEON.td
+++ b/llvm/lib/Target/ARM/ARMInstrNEON.td
@@ -705,21 +705,31 @@ defm VLD1d16Twb : VLD1D3WB<{0,1,0,?}, "16", addrmode6align64>;
defm VLD1d32Twb : VLD1D3WB<{1,0,0,?}, "32", addrmode6align64>;
defm VLD1d64Twb : VLD1D3WB<{1,1,0,?}, "64", addrmode6align64>;
-def VLD1d8TPseudo : VLDQQPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>;
-def VLD1d16TPseudo : VLDQQPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>;
-def VLD1d32TPseudo : VLDQQPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>;
-def VLD1d64TPseudo : VLDQQPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>;
-def VLD1d64TPseudoWB_fixed : VLDQQWBfixedPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>;
+def VLD1d8TPseudo : VLDQQPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>;
+def VLD1d8TPseudoWB_fixed : VLDQQWBfixedPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>;
+def VLD1d8TPseudoWB_register : VLDQQWBregisterPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>;
+def VLD1d16TPseudo : VLDQQPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>;
+def VLD1d16TPseudoWB_fixed : VLDQQWBfixedPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>;
+def VLD1d16TPseudoWB_register : VLDQQWBregisterPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>;
+def VLD1d32TPseudo : VLDQQPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>;
+def VLD1d32TPseudoWB_fixed : VLDQQWBfixedPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>;
+def VLD1d32TPseudoWB_register : VLDQQWBregisterPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>;
+def VLD1d64TPseudo : VLDQQPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>;
+def VLD1d64TPseudoWB_fixed : VLDQQWBfixedPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>;
def VLD1d64TPseudoWB_register : VLDQQWBregisterPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>;
-def VLD1q8HighTPseudo : VLDQQQQPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>;
-def VLD1q8LowTPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>;
-def VLD1q16HighTPseudo : VLDQQQQPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>;
-def VLD1q16LowTPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>;
-def VLD1q32HighTPseudo : VLDQQQQPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>;
-def VLD1q32LowTPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>;
-def VLD1q64HighTPseudo : VLDQQQQPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>;
-def VLD1q64LowTPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>;
+def VLD1q8HighTPseudo : VLDQQQQPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>;
+def VLD1q8HighTPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>;
+def VLD1q8LowTPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>;
+def VLD1q16HighTPseudo : VLDQQQQPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>;
+def VLD1q16HighTPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>;
+def VLD1q16LowTPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>;
+def VLD1q32HighTPseudo : VLDQQQQPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>;
+def VLD1q32HighTPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>;
+def VLD1q32LowTPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>;
+def VLD1q64HighTPseudo : VLDQQQQPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>;
+def VLD1q64HighTPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>;
+def VLD1q64LowTPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>;
// ...with 4 registers
class VLD1D4<bits<4> op7_4, string Dt, Operand AddrMode>
@@ -758,21 +768,31 @@ defm VLD1d16Qwb : VLD1D4WB<{0,1,?,?}, "16", addrmode6align64or128or256>;
defm VLD1d32Qwb : VLD1D4WB<{1,0,?,?}, "32", addrmode6align64or128or256>;
defm VLD1d64Qwb : VLD1D4WB<{1,1,?,?}, "64", addrmode6align64or128or256>;
-def VLD1d8QPseudo : VLDQQPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>;
-def VLD1d16QPseudo : VLDQQPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>;
-def VLD1d32QPseudo : VLDQQPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>;
-def VLD1d64QPseudo : VLDQQPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>;
-def VLD1d64QPseudoWB_fixed : VLDQQWBfixedPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>;
+def VLD1d8QPseudo : VLDQQPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>;
+def VLD1d8QPseudoWB_fixed : VLDQQWBfixedPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>;
+def VLD1d8QPseudoWB_register : VLDQQWBregisterPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>;
+def VLD1d16QPseudo : VLDQQPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>;
+def VLD1d16QPseudoWB_fixed : VLDQQWBfixedPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>;
+def VLD1d16QPseudoWB_register : VLDQQWBregisterPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>;
+def VLD1d32QPseudo : VLDQQPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>;
+def VLD1d32QPseudoWB_fixed : VLDQQWBfixedPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>;
+def VLD1d32QPseudoWB_register : VLDQQWBregisterPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>;
+def VLD1d64QPseudo : VLDQQPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>;
+def VLD1d64QPseudoWB_fixed : VLDQQWBfixedPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>;
def VLD1d64QPseudoWB_register : VLDQQWBregisterPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>;
-def VLD1q8LowQPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>;
-def VLD1q8HighQPseudo : VLDQQQQPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>;
-def VLD1q16LowQPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>;
-def VLD1q16HighQPseudo : VLDQQQQPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>;
-def VLD1q32LowQPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>;
-def VLD1q32HighQPseudo : VLDQQQQPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>;
-def VLD1q64LowQPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>;
-def VLD1q64HighQPseudo : VLDQQQQPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>;
+def VLD1q8LowQPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>;
+def VLD1q8HighQPseudo : VLDQQQQPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>;
+def VLD1q8HighQPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>;
+def VLD1q16LowQPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>;
+def VLD1q16HighQPseudo : VLDQQQQPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>;
+def VLD1q16HighQPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>;
+def VLD1q32LowQPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>;
+def VLD1q32HighQPseudo : VLDQQQQPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>;
+def VLD1q32HighQPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>;
+def VLD1q64LowQPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>;
+def VLD1q64HighQPseudo : VLDQQQQPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>;
+def VLD1q64HighQPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>;
// VLD2 : Vector Load (multiple 2-element structures)
class VLD2<bits<4> op11_8, bits<4> op7_4, string Dt, RegisterOperand VdTy,
diff --git a/llvm/test/CodeGen/ARM/arm-vld1.ll b/llvm/test/CodeGen/ARM/arm-vld1.ll
index f1229789c3004..8cd27ccef4ff8 100644
--- a/llvm/test/CodeGen/ARM/arm-vld1.ll
+++ b/llvm/test/CodeGen/ARM/arm-vld1.ll
@@ -240,3 +240,346 @@ define %struct.uint8x16x4_t @test_vld1q_u8_x4(i8* %a) nounwind {
%tmp = tail call %struct.uint8x16x4_t @llvm.arm.neon.vld1x4.v16i8.p0i8(i8* %a)
ret %struct.uint8x16x4_t %tmp
}
+
+; Post-increment.
+
+define %struct.uint16x4x2_t @test_vld1_u16_x2_post_imm(i16* %a, i16** %ptr) nounwind {
+; CHECK-LABEL: test_vld1_u16_x2_post_imm:
+; CHECK: vld1.16 {d16, d17}, [r0:64]!
+ %ld = tail call %struct.uint16x4x2_t @llvm.arm.neon.vld1x2.v4i16.p0i16(i16* %a)
+ %tmp = getelementptr i16, i16* %a, i32 8
+ store i16* %tmp, i16** %ptr
+ ret %struct.uint16x4x2_t %ld
+}
+
+define %struct.uint16x4x2_t @test_vld1_u16_x2_post_reg(i16* %a, i16** %ptr, i32 %inc) nounwind {
+; CHECK-LABEL: test_vld1_u16_x2_post_reg:
+; CHECK: lsl r2, r2, #1
+; CHECK-NEXT: vld1.16 {d16, d17}, [r0:64], r2
+ %ld = tail call %struct.uint16x4x2_t @llvm.arm.neon.vld1x2.v4i16.p0i16(i16* %a)
+ %tmp = getelementptr i16, i16* %a, i32 %inc
+ store i16* %tmp, i16** %ptr
+ ret %struct.uint16x4x2_t %ld
+}
+
+define %struct.uint16x4x3_t @test_vld1_u16_x3_post_imm(i16* %a, i16** %ptr) nounwind {
+; CHECK-LABEL: test_vld1_u16_x3_post_imm:
+; CHECK: vld1.16 {d16, d17, d18}, [r1:64]!
+ %ld = tail call %struct.uint16x4x3_t @llvm.arm.neon.vld1x3.v4i16.p0i16(i16* %a)
+ %tmp = getelementptr i16, i16* %a, i32 12
+ store i16* %tmp, i16** %ptr
+ ret %struct.uint16x4x3_t %ld
+}
+
+define %struct.uint16x4x3_t @test_vld1_u16_x3_post_reg(i16* %a, i16** %ptr, i32 %inc) nounwind {
+; CHECK-LABEL: test_vld1_u16_x3_post_reg:
+; CHECK: lsl r3, r3, #1
+; CHECK-NEXT: vld1.16 {d16, d17, d18}, [r1:64], r3
+ %ld = tail call %struct.uint16x4x3_t @llvm.arm.neon.vld1x3.v4i16.p0i16(i16* %a)
+ %tmp = getelementptr i16, i16* %a, i32 %inc
+ store i16* %tmp, i16** %ptr
+ ret %struct.uint16x4x3_t %ld
+}
+
+define %struct.uint16x4x4_t @test_vld1_u16_x4_post_imm(i16* %a, i16** %ptr) nounwind {
+; CHECK-LABEL: test_vld1_u16_x4_post_imm:
+; CHECK: vld1.16 {d16, d17, d18, d19}, [r1:256]!
+ %ld = tail call %struct.uint16x4x4_t @llvm.arm.neon.vld1x4.v4i16.p0i16(i16* %a)
+ %tmp = getelementptr i16, i16* %a, i32 16
+ store i16* %tmp, i16** %ptr
+ ret %struct.uint16x4x4_t %ld
+}
+
+define %struct.uint16x4x4_t @test_vld1_u16_x4_post_reg(i16* %a, i16** %ptr, i32 %inc) nounwind {
+; CHECK-LABEL: test_vld1_u16_x4_post_reg:
+; CHECK: lsl r3, r3, #1
+; CHECK-NEXT: vld1.16 {d16, d17, d18, d19}, [r1:256], r3
+ %ld = tail call %struct.uint16x4x4_t @llvm.arm.neon.vld1x4.v4i16.p0i16(i16* %a)
+ %tmp = getelementptr i16, i16* %a, i32 %inc
+ store i16* %tmp, i16** %ptr
+ ret %struct.uint16x4x4_t %ld
+}
+
+define %struct.uint32x2x2_t @test_vld1_u32_x2_post_imm(i32* %a, i32** %ptr) nounwind {
+; CHECK-LABEL: test_vld1_u32_x2_post_imm:
+; CHECK: vld1.32 {d16, d17}, [r0:64]!
+ %ld = tail call %struct.uint32x2x2_t @llvm.arm.neon.vld1x2.v2i32.p0i32(i32* %a)
+ %tmp = getelementptr i32, i32* %a, i32 4
+ store i32* %tmp, i32** %ptr
+ ret %struct.uint32x2x2_t %ld
+}
+
+define %struct.uint32x2x2_t @test_vld1_u32_x2_post_reg(i32* %a, i32** %ptr, i32 %inc) nounwind {
+; CHECK-LABEL: test_vld1_u32_x2_post_reg:
+; CHECK: lsl r2, r2, #2
+; CHECK-NEXT: vld1.32 {d16, d17}, [r0:64], r2
+ %ld = tail call %struct.uint32x2x2_t @llvm.arm.neon.vld1x2.v2i32.p0i32(i32* %a)
+ %tmp = getelementptr i32, i32* %a, i32 %inc
+ store i32* %tmp, i32** %ptr
+ ret %struct.uint32x2x2_t %ld
+}
+
+define %struct.uint32x2x3_t @test_vld1_u32_x3_post_imm(i32* %a, i32** %ptr) nounwind {
+; CHECK-LABEL: test_vld1_u32_x3_post_imm:
+; CHECK: vld1.32 {d16, d17, d18}, [r1:64]!
+ %ld = tail call %struct.uint32x2x3_t @llvm.arm.neon.vld1x3.v2i32.p0i32(i32* %a)
+ %tmp = getelementptr i32, i32* %a, i32 6
+ store i32* %tmp, i32** %ptr
+ ret %struct.uint32x2x3_t %ld
+}
+
+define %struct.uint32x2x3_t @test_vld1_u32_x3_post_reg(i32* %a, i32** %ptr, i32 %inc) nounwind {
+; CHECK-LABEL: test_vld1_u32_x3_post_reg:
+; CHECK: lsl r3, r3, #2
+; CHECK-NEXT: vld1.32 {d16, d17, d18}, [r1:64], r3
+ %ld = tail call %struct.uint32x2x3_t @llvm.arm.neon.vld1x3.v2i32.p0i32(i32* %a)
+ %tmp = getelementptr i32, i32* %a, i32 %inc
+ store i32* %tmp, i32** %ptr
+ ret %struct.uint32x2x3_t %ld
+}
+
+define %struct.uint32x2x4_t @test_vld1_u32_x4_post_imm(i32* %a, i32** %ptr) nounwind {
+; CHECK-LABEL: test_vld1_u32_x4_post_imm:
+; CHECK: vld1.32 {d16, d17, d18, d19}, [r1:256]!
+ %ld = tail call %struct.uint32x2x4_t @llvm.arm.neon.vld1x4.v2i32.p0i32(i32* %a)
+ %tmp = getelementptr i32, i32* %a, i32 8
+ store i32* %tmp, i32** %ptr
+ ret %struct.uint32x2x4_t %ld
+}
+
+define %struct.uint32x2x4_t @test_vld1_u32_x4_post_reg(i32* %a, i32** %ptr, i32 %inc) nounwind {
+; CHECK-LABEL: test_vld1_u32_x4_post_reg:
+; CHECK: lsl r3, r3, #2
+; CHECK-NEXT: vld1.32 {d16, d17, d18, d19}, [r1:256], r3
+ %ld = tail call %struct.uint32x2x4_t @llvm.arm.neon.vld1x4.v2i32.p0i32(i32* %a)
+ %tmp = getelementptr i32, i32* %a, i32 %inc
+ store i32* %tmp, i32** %ptr
+ ret %struct.uint32x2x4_t %ld
+}
+
+define %struct.uint64x1x2_t @test_vld1_u64_x2_post_imm(i64* %a, i64** %ptr) nounwind {
+; CHECK-LABEL: test_vld1_u64_x2_post_imm:
+; CHECK: vld1.64 {d16, d17}, [r0:64]!
+ %ld = tail call %struct.uint64x1x2_t @llvm.arm.neon.vld1x2.v1i64.p0i64(i64* %a)
+ %tmp = getelementptr i64, i64* %a, i32 2
+ store i64* %tmp, i64** %ptr
+ ret %struct.uint64x1x2_t %ld
+}
+
+define %struct.uint64x1x2_t @test_vld1_u64_x2_post_reg(i64* %a, i64** %ptr, i32 %inc) nounwind {
+; CHECK-LABEL: test_vld1_u64_x2_post_reg:
+; CHECK: lsl r2, r2, #3
+; CHECK-NEXT: vld1.64 {d16, d17}, [r0:64], r2
+ %ld = tail call %struct.uint64x1x2_t @llvm.arm.neon.vld1x2.v1i64.p0i64(i64* %a)
+ %tmp = getelementptr i64, i64* %a, i32 %inc
+ store i64* %tmp, i64** %ptr
+ ret %struct.uint64x1x2_t %ld
+}
+
+define %struct.uint64x1x3_t @test_vld1_u64_x3_post_imm(i64* %a, i64** %ptr) nounwind {
+; CHECK-LABEL: test_vld1_u64_x3_post_imm:
+; CHECK: vld1.64 {d16, d17, d18}, [r1:64]!
+ %ld = tail call %struct.uint64x1x3_t @llvm.arm.neon.vld1x3.v1i64.p0i64(i64* %a)
+ %tmp = getelementptr i64, i64* %a, i32 3
+ store i64* %tmp, i64** %ptr
+ ret %struct.uint64x1x3_t %ld
+}
+
+define %struct.uint64x1x3_t @test_vld1_u64_x3_post_reg(i64* %a, i64** %ptr, i32 %inc) nounwind {
+; CHECK-LABEL: test_vld1_u64_x3_post_reg:
+; CHECK: lsl r3, r3, #3
+; CHECK-NEXT: vld1.64 {d16, d17, d18}, [r1:64], r3
+ %ld = tail call %struct.uint64x1x3_t @llvm.arm.neon.vld1x3.v1i64.p0i64(i64* %a)
+ %tmp = getelementptr i64, i64* %a, i32 %inc
+ store i64* %tmp, i64** %ptr
+ ret %struct.uint64x1x3_t %ld
+}
+
+define %struct.uint64x1x4_t @test_vld1_u64_x4_post_imm(i64* %a, i64** %ptr) nounwind {
+; CHECK-LABEL: test_vld1_u64_x4_post_imm:
+; CHECK: vld1.64 {d16, d17, d18, d19}, [r1:256]!
+ %ld = tail call %struct.uint64x1x4_t @llvm.arm.neon.vld1x4.v1i64.p0i64(i64* %a)
+ %tmp = getelementptr i64, i64* %a, i32 4
+ store i64* %tmp, i64** %ptr
+ ret %struct.uint64x1x4_t %ld
+}
+
+define %struct.uint64x1x4_t @test_vld1_u64_x4_post_reg(i64* %a, i64** %ptr, i32 %inc) nounwind {
+; CHECK-LABEL: test_vld1_u64_x4_post_reg:
+; CHECK: lsl r3, r3, #3
+; CHECK-NEXT: vld1.64 {d16, d17, d18, d19}, [r1:256], r3
+ %ld = tail call %struct.uint64x1x4_t @llvm.arm.neon.vld1x4.v1i64.p0i64(i64* %a)
+ %tmp = getelementptr i64, i64* %a, i32 %inc
+ store i64* %tmp, i64** %ptr
+ ret %struct.uint64x1x4_t %ld
+}
+
+define %struct.uint8x8x2_t @test_vld1_u8_x2_post_imm(i8* %a, i8** %ptr) nounwind {
+; CHECK-LABEL: test_vld1_u8_x2_post_imm:
+; CHECK: vld1.8 {d16, d17}, [r0:64]!
+ %ld = tail call %struct.uint8x8x2_t @llvm.arm.neon.vld1x2.v8i8.p0i8(i8* %a)
+ %tmp = getelementptr i8, i8* %a, i32 16
+ store i8* %tmp, i8** %ptr
+ ret %struct.uint8x8x2_t %ld
+}
+
+define %struct.uint8x8x2_t @test_vld1_u8_x2_post_reg(i8* %a, i8** %ptr, i32 %inc) nounwind {
+; CHECK-LABEL: test_vld1_u8_x2_post_reg:
+; CHECK: vld1.8 {d16, d17}, [r0:64], r2
+ %ld = tail call %struct.uint8x8x2_t @llvm.arm.neon.vld1x2.v8i8.p0i8(i8* %a)
+ %tmp = getelementptr i8, i8* %a, i32 %inc
+ store i8* %tmp, i8** %ptr
+ ret %struct.uint8x8x2_t %ld
+}
+
+define %struct.uint8x8x3_t @test_vld1_u8_x3_post_imm(i8* %a, i8** %ptr) nounwind {
+; CHECK-LABEL: test_vld1_u8_x3_post_imm:
+; CHECK: vld1.8 {d16, d17, d18}, [r1:64]!
+ %ld = tail call %struct.uint8x8x3_t @llvm.arm.neon.vld1x3.v8i8.p0i8(i8* %a)
+ %tmp = getelementptr i8, i8* %a, i32 24
+ store i8* %tmp, i8** %ptr
+ ret %struct.uint8x8x3_t %ld
+}
+
+define %struct.uint8x8x3_t @test_vld1_u8_x3_post_reg(i8* %a, i8** %ptr, i32 %inc) nounwind {
+; CHECK-LABEL: test_vld1_u8_x3_post_reg:
+; CHECK: vld1.8 {d16, d17, d18}, [r1:64], r3
+ %ld = tail call %struct.uint8x8x3_t @llvm.arm.neon.vld1x3.v8i8.p0i8(i8* %a)
+ %tmp = getelementptr i8, i8* %a, i32 %inc
+ store i8* %tmp, i8** %ptr
+ ret %struct.uint8x8x3_t %ld
+}
+
+define %struct.uint8x8x4_t @test_vld1_u8_x4_post_imm(i8* %a, i8** %ptr) nounwind {
+; CHECK-LABEL: test_vld1_u8_x4_post_imm:
+; CHECK: vld1.8 {d16, d17, d18, d19}, [r1:256]!
+ %ld = tail call %struct.uint8x8x4_t @llvm.arm.neon.vld1x4.v8i8.p0i8(i8* %a)
+ %tmp = getelementptr i8, i8* %a, i32 32
+ store i8* %tmp, i8** %ptr
+ ret %struct.uint8x8x4_t %ld
+}
+
+define %struct.uint8x8x4_t @test_vld1_u8_x4_post_reg(i8* %a, i8** %ptr, i32 %inc) nounwind {
+; CHECK-LABEL: test_vld1_u8_x4_post_reg:
+; CHECK: vld1.8 {d16, d17, d18, d19}, [r1:256], r3
+ %ld = tail call %struct.uint8x8x4_t @llvm.arm.neon.vld1x4.v8i8.p0i8(i8* %a)
+ %tmp = getelementptr i8, i8* %a, i32 %inc
+ store i8* %tmp, i8** %ptr
+ ret %struct.uint8x8x4_t %ld
+}
+
+define %struct.uint16x8x2_t @test_vld1q_u16_x2_post_imm(i16* %a, i16** %ptr) nounwind {
+; CHECK-LABEL: test_vld1q_u16_x2_post_imm:
+; CHECK: vld1.16 {d16, d17, d18, d19}, [r1:256]!
+ %ld = tail call %struct.uint16x8x2_t @llvm.arm.neon.vld1x2.v8i16.p0i16(i16* %a)
+ %tmp = getelementptr i16, i16* %a, i32 16
+ store i16* %tmp, i16** %ptr
+ ret %struct.uint16x8x2_t %ld
+}
+
+define %struct.uint16x8x3_t @test_vld1q_u16_x3_post_imm(i16* %a, i16** %ptr) nounwind {
+; CHECK-LABEL: test_vld1q_u16_x3_post_imm:
+; CHECK: vld1.16 {d16, d17, d18}, [r1:64]!
+; CHECK-NEXT: vld1.16 {d19, d20, d21}, [r1:64]!
+ %ld = tail call %struct.uint16x8x3_t @llvm.arm.neon.vld1x3.v8i16.p0i16(i16* %a)
+ %tmp = getelementptr i16, i16* %a, i32 24
+ store i16* %tmp, i16** %ptr
+ ret %struct.uint16x8x3_t %ld
+}
+
+define %struct.uint16x8x4_t @test_vld1q_u16_x4_post_imm(i16* %a, i16** %ptr) nounwind {
+; CHECK-LABEL: test_vld1q_u16_x4_post_imm:
+; CHECK: vld1.16 {d16, d17, d18, d19}, [r1:256]!
+; CHECK-NEXT: vld1.16 {d20, d21, d22, d23}, [r1:256]!
+ %ld = tail call %struct.uint16x8x4_t @llvm.arm.neon.vld1x4.v8i16.p0i16(i16* %a)
+ %tmp = getelementptr i16, i16* %a, i32 32
+ store i16* %tmp, i16** %ptr
+ ret %struct.uint16x8x4_t %ld
+}
+
+define %struct.uint32x4x2_t @test_vld1q_u32_x2_post_imm(i32* %a, i32** %ptr) nounwind {
+; CHECK-LABEL: test_vld1q_u32_x2_post_imm:
+; CHECK: vld1.32 {d16, d17, d18, d19}, [r1:256]!
+ %ld = tail call %struct.uint32x4x2_t @llvm.arm.neon.vld1x2.v4i32.p0i32(i32* %a)
+ %tmp = getelementptr i32, i32* %a, i32 8
+ store i32* %tmp, i32** %ptr
+ ret %struct.uint32x4x2_t %ld
+}
+
+define %struct.uint32x4x3_t @test_vld1q_u32_x3_post_imm(i32* %a, i32** %ptr) nounwind {
+; CHECK-LABEL: test_vld1q_u32_x3_post_imm:
+; CHECK: vld1.32 {d16, d17, d18}, [r1:64]!
+; CHECK-NEXT: vld1.32 {d19, d20, d21}, [r1:64]!
+ %ld = tail call %struct.uint32x4x3_t @llvm.arm.neon.vld1x3.v4i32.p0i32(i32* %a)
+ %tmp = getelementptr i32, i32* %a, i32 12
+ store i32* %tmp, i32** %ptr
+ ret %struct.uint32x4x3_t %ld
+}
+
+define %struct.uint32x4x4_t @test_vld1q_u32_x4_post_imm(i32* %a, i32** %ptr) nounwind {
+; CHECK-LABEL: test_vld1q_u32_x4_post_imm:
+; CHECK: vld1.32 {d16, d17, d18, d19}, [r1:256]!
+; CHECK-NEXT: vld1.32 {d20, d21, d22, d23}, [r1:256]!
+ %ld = tail call %struct.uint32x4x4_t @llvm.arm.neon.vld1x4.v4i32.p0i32(i32* %a)
+ %tmp = getelementptr i32, i32* %a, i32 16
+ store i32* %tmp, i32** %ptr
+ ret %struct.uint32x4x4_t %ld
+}
+
+define %struct.uint64x2x2_t @test_vld1q_u64_x2_post_imm(i64* %a, i64** %ptr) nounwind {
+; CHECK-LABEL: test_vld1q_u64_x2_post_imm:
+; CHECK: vld1.64 {d16, d17, d18, d19}, [r1:256]!
+ %ld = tail call %struct.uint64x2x2_t @llvm.arm.neon.vld1x2.v2i64.p0i64(i64* %a)
+ %tmp = getelementptr i64, i64* %a, i32 4
+ store i64* %tmp, i64** %ptr
+ ret %struct.uint64x2x2_t %ld
+}
+
+define %struct.uint64x2x3_t @test_vld1q_u64_x3_post_imm(i64* %a, i64** %ptr) nounwind {
+; CHECK-LABEL: test_vld1q_u64_x3_post_imm:
+; CHECK: vld1.64 {d16, d17, d18}, [r1:64]!
+; CHECK-NEXT: vld1.64 {d19, d20, d21}, [r1:64]!
+ %ld = tail call %struct.uint64x2x3_t @llvm.arm.neon.vld1x3.v2i64.p0i64(i64* %a)
+ %tmp = getelementptr i64, i64* %a, i32 6
+ store i64* %tmp, i64** %ptr
+ ret %struct.uint64x2x3_t %ld
+}
+
+define %struct.uint64x2x4_t @test_vld1q_u64_x4_post_imm(i64* %a, i64** %ptr) nounwind {
+; CHECK-LABEL: test_vld1q_u64_x4_post_imm:
+; CHECK: vld1.64 {d16, d17, d18, d19}, [r1:256]!
+; CHECK-NEXT: vld1.64 {d20, d21, d22, d23}, [r1:256]!
+ %ld = tail call %struct.uint64x2x4_t @llvm.arm.neon.vld1x4.v2i64.p0i64(i64* %a)
+ %tmp = getelementptr i64, i64* %a, i32 8
+ store i64* %tmp, i64** %ptr
+ ret %struct.uint64x2x4_t %ld
+}
+
+define %struct.uint8x16x2_t @test_vld1q_u8_x2_post_imm(i8* %a, i8** %ptr) nounwind {
+; CHECK-LABEL: test_vld1q_u8_x2_post_imm:
+; CHECK: vld1.8 {d16, d17, d18, d19}, [r1:256]!
+ %ld = tail call %struct.uint8x16x2_t @llvm.arm.neon.vld1x2.v16i8.p0i8(i8* %a)
+ %tmp = getelementptr i8, i8* %a, i32 32
+ store i8* %tmp, i8** %ptr
+ ret %struct.uint8x16x2_t %ld
+}
+
+define %struct.uint8x16x3_t @test_vld1q_u8_x3_post_imm(i8* %a, i8** %ptr) nounwind {
+; CHECK-LABEL: test_vld1q_u8_x3_post_imm:
+; CHECK: vld1.8 {d16, d17, d18}, [r1:64]!
+; CHECK-NEXT: vld1.8 {d19, d20, d21}, [r1:64]!
+ %ld = tail call %struct.uint8x16x3_t @llvm.arm.neon.vld1x3.v16i8.p0i8(i8* %a)
+ %tmp = getelementptr i8, i8* %a, i32 48
+ store i8* %tmp, i8** %ptr
+ ret %struct.uint8x16x3_t %ld
+}
+
+define %struct.uint8x16x4_t @test_vld1q_u8_x4_post_imm(i8* %a, i8** %ptr) nounwind {
+; CHECK-LABEL: test_vld1q_u8_x4_post_imm:
+; CHECK: vld1.8 {d16, d17, d18, d19}, [r1:256]!
+; CHECK-NEXT: vld1.8 {d20, d21, d22, d23}, [r1:256]!
+ %ld = tail call %struct.uint8x16x4_t @llvm.arm.neon.vld1x4.v16i8.p0i8(i8* %a)
+ %tmp = getelementptr i8, i8* %a, i32 64
+ store i8* %tmp, i8** %ptr
+ ret %struct.uint8x16x4_t %ld
+}
diff --git a/llvm/test/CodeGen/ARM/pr45824.ll b/llvm/test/CodeGen/ARM/pr45824.ll
deleted file mode 100644
index 221c764526b44..0000000000000
--- a/llvm/test/CodeGen/ARM/pr45824.ll
+++ /dev/null
@@ -1,85 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=armv7-none-linux-eabi < %s | FileCheck %s
-
-define void @vld1x2(i8* %v4, i32 %v2) {
-; CHECK-LABEL: vld1x2:
-; CHECK: @ %bb.0:
-; CHECK-NEXT: mov r0, #0
-; CHECK-NEXT: .LBB0_1: @ %.preheader
-; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
-; CHECK-NEXT: cmp r0, #0
-; CHECK-NEXT: bne .LBB0_1
-; CHECK-NEXT: @ %bb.2: @ %.loopexit
-; CHECK-NEXT: @ in Loop: Header=BB0_1 Depth=1
-; CHECK-NEXT: vst3.8 {d16, d17, d18}, [r0]
-; CHECK-NEXT: b .LBB0_1
- br label %.preheader
-
-.preheader: ; preds = %.preheader, %3
- %v5 = icmp eq i8* %v4, undef
- br i1 %v5, label %.loopexit, label %.preheader
-
-.loopexit: ; preds = %.preheader
- %v6 = tail call { <8 x i8>, <8 x i8> } @llvm.arm.neon.vld1x2.v8i8.p0i8(i8* %v4)
- %v7 = getelementptr inbounds i8, i8* %v4, i32 %v2
- %v8 = tail call { <8 x i8>, <8 x i8> } @llvm.arm.neon.vld1x2.v8i8.p0i8(i8* %v7)
- tail call void @llvm.arm.neon.vst3.p0i8.v8i8(i8* undef, <8 x i8> undef, <8 x i8> undef, <8 x i8> undef, i32 1)
- br label %.preheader
-}
-
-define void @vld1x3(i8* %v4, i32 %v2) {
-; CHECK-LABEL: vld1x3:
-; CHECK: @ %bb.0:
-; CHECK-NEXT: mov r0, #0
-; CHECK-NEXT: .LBB1_1: @ %.preheader
-; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
-; CHECK-NEXT: cmp r0, #0
-; CHECK-NEXT: bne .LBB1_1
-; CHECK-NEXT: @ %bb.2: @ %.loopexit
-; CHECK-NEXT: @ in Loop: Header=BB1_1 Depth=1
-; CHECK-NEXT: vst3.8 {d16, d17, d18}, [r0]
-; CHECK-NEXT: b .LBB1_1
- br label %.preheader
-
-.preheader: ; preds = %.preheader, %3
- %v5 = icmp eq i8* %v4, undef
- br i1 %v5, label %.loopexit, label %.preheader
-
-.loopexit: ; preds = %.preheader
- %v6 = tail call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.arm.neon.vld1x3.v8i8.p0i8(i8* %v4)
- %v7 = getelementptr inbounds i8, i8* %v4, i32 %v2
- %v8 = tail call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.arm.neon.vld1x3.v8i8.p0i8(i8* %v7)
- tail call void @llvm.arm.neon.vst3.p0i8.v8i8(i8* undef, <8 x i8> undef, <8 x i8> undef, <8 x i8> undef, i32 1)
- br label %.preheader
-}
-
-define void @vld1x4(i8* %v4, i32 %v2) {
-; CHECK-LABEL: vld1x4:
-; CHECK: @ %bb.0:
-; CHECK-NEXT: mov r0, #0
-; CHECK-NEXT: .LBB2_1: @ %.preheader
-; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
-; CHECK-NEXT: cmp r0, #0
-; CHECK-NEXT: bne .LBB2_1
-; CHECK-NEXT: @ %bb.2: @ %.loopexit
-; CHECK-NEXT: @ in Loop: Header=BB2_1 Depth=1
-; CHECK-NEXT: vst3.8 {d16, d17, d18}, [r0]
-; CHECK-NEXT: b .LBB2_1
- br label %.preheader
-
-.preheader: ; preds = %.preheader, %3
- %v5 = icmp eq i8* %v4, undef
- br i1 %v5, label %.loopexit, label %.preheader
-
-.loopexit: ; preds = %.preheader
- %v6 = tail call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.arm.neon.vld1x4.v8i8.p0i8(i8* %v4)
- %v7 = getelementptr inbounds i8, i8* %v4, i32 %v2
- %v8 = tail call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.arm.neon.vld1x4.v8i8.p0i8(i8* %v7)
- tail call void @llvm.arm.neon.vst3.p0i8.v8i8(i8* undef, <8 x i8> undef, <8 x i8> undef, <8 x i8> undef, i32 1)
- br label %.preheader
-}
-
-declare void @llvm.arm.neon.vst3.p0i8.v8i8(i8*, <8 x i8>, <8 x i8>, <8 x i8>, i32)
-declare { <8 x i8>, <8 x i8> } @llvm.arm.neon.vld1x2.v8i8.p0i8(i8*)
-declare { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.arm.neon.vld1x3.v8i8.p0i8(i8*)
-declare { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.arm.neon.vld1x4.v8i8.p0i8(i8*)
More information about the llvm-commits
mailing list