[llvm] c75e454 - [RISCV] Transform unaligned RVV vector loads/stores to aligned ones
Fraser Cormack via llvm-commits
llvm-commits at lists.llvm.org
Mon Jun 14 10:20:35 PDT 2021
Author: Fraser Cormack
Date: 2021-06-14T18:12:18+01:00
New Revision: c75e454cb93206833f8cedde1ed5d12ef161e357
URL: https://github.com/llvm/llvm-project/commit/c75e454cb93206833f8cedde1ed5d12ef161e357
DIFF: https://github.com/llvm/llvm-project/commit/c75e454cb93206833f8cedde1ed5d12ef161e357.diff
LOG: [RISCV] Transform unaligned RVV vector loads/stores to aligned ones
This patch adds support for loading and storing unaligned vectors via an
equivalently-sized i8 vector type, which has support in the RVV
specification for byte-aligned access.
This offers a more optimal path for handling of unaligned fixed-length
vector accesses, which are currently scalarized. It also prevents
crashing when `LegalizeDAG` sees an unaligned scalable-vector load/store
operation.
Future work could be to investigate loading/storing via the largest
vector element type for the given alignment, in case that would be more
optimal on hardware. For instance, a 4-byte-aligned nxv2i64 vector load
could loaded as nxv4i32 instead of as nxv16i8.
Reviewed By: craig.topper
Differential Revision: https://reviews.llvm.org/D104032
Added:
llvm/test/CodeGen/RISCV/rvv/unaligned-loads-stores.ll
Modified:
llvm/lib/Target/RISCV/RISCVISelLowering.cpp
llvm/lib/Target/RISCV/RISCVISelLowering.h
llvm/test/CodeGen/RISCV/rvv/fixed-vectors-unaligned.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 94095e7c6a81..45f7f10d4951 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -523,6 +523,9 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
for (unsigned VPOpc : IntegerVPOps)
setOperationAction(VPOpc, VT, Custom);
+ setOperationAction(ISD::LOAD, VT, Custom);
+ setOperationAction(ISD::STORE, VT, Custom);
+
setOperationAction(ISD::MLOAD, VT, Custom);
setOperationAction(ISD::MSTORE, VT, Custom);
setOperationAction(ISD::MGATHER, VT, Custom);
@@ -584,6 +587,9 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::VECREDUCE_FMAX, VT, Custom);
setOperationAction(ISD::FCOPYSIGN, VT, Legal);
+ setOperationAction(ISD::LOAD, VT, Custom);
+ setOperationAction(ISD::STORE, VT, Custom);
+
setOperationAction(ISD::MLOAD, VT, Custom);
setOperationAction(ISD::MSTORE, VT, Custom);
setOperationAction(ISD::MGATHER, VT, Custom);
@@ -1891,6 +1897,66 @@ static SDValue getRVVFPExtendOrRound(SDValue Op, MVT VT, MVT ContainerVT,
return DAG.getNode(RVVOpc, DL, ContainerVT, Op, Mask, VL);
}
+// While RVV has alignment restrictions, we should always be able to load as a
+// legal equivalently-sized byte-typed vector instead. This method is
+// responsible for re-expressing a ISD::LOAD via a correctly-aligned type. If
+// the load is already correctly-aligned, it returns SDValue().
+SDValue RISCVTargetLowering::expandUnalignedRVVLoad(SDValue Op,
+ SelectionDAG &DAG) const {
+ auto *Load = cast<LoadSDNode>(Op);
+ assert(Load && Load->getMemoryVT().isVector() && "Expected vector load");
+
+ if (allowsMemoryAccessForAlignment(*DAG.getContext(), DAG.getDataLayout(),
+ Load->getMemoryVT(),
+ *Load->getMemOperand()))
+ return SDValue();
+
+ SDLoc DL(Op);
+ MVT VT = Op.getSimpleValueType();
+ unsigned EltSizeBits = VT.getScalarSizeInBits();
+ assert((EltSizeBits == 16 || EltSizeBits == 32 || EltSizeBits == 64) &&
+ "Unexpected unaligned RVV load type");
+ MVT NewVT =
+ MVT::getVectorVT(MVT::i8, VT.getVectorElementCount() * (EltSizeBits / 8));
+ assert(NewVT.isValid() &&
+ "Expecting equally-sized RVV vector types to be legal");
+ SDValue L = DAG.getLoad(NewVT, DL, Load->getChain(), Load->getBasePtr(),
+ Load->getPointerInfo(), Load->getOriginalAlign(),
+ Load->getMemOperand()->getFlags());
+ return DAG.getMergeValues({DAG.getBitcast(VT, L), L.getValue(1)}, DL);
+}
+
+// While RVV has alignment restrictions, we should always be able to store as a
+// legal equivalently-sized byte-typed vector instead. This method is
+// responsible for re-expressing a ISD::STORE via a correctly-aligned type. It
+// returns SDValue() if the store is already correctly aligned.
+SDValue RISCVTargetLowering::expandUnalignedRVVStore(SDValue Op,
+ SelectionDAG &DAG) const {
+ auto *Store = cast<StoreSDNode>(Op);
+ assert(Store && Store->getValue().getValueType().isVector() &&
+ "Expected vector store");
+
+ if (allowsMemoryAccessForAlignment(*DAG.getContext(), DAG.getDataLayout(),
+ Store->getMemoryVT(),
+ *Store->getMemOperand()))
+ return SDValue();
+
+ SDLoc DL(Op);
+ SDValue StoredVal = Store->getValue();
+ MVT VT = StoredVal.getSimpleValueType();
+ unsigned EltSizeBits = VT.getScalarSizeInBits();
+ assert((EltSizeBits == 16 || EltSizeBits == 32 || EltSizeBits == 64) &&
+ "Unexpected unaligned RVV store type");
+ MVT NewVT =
+ MVT::getVectorVT(MVT::i8, VT.getVectorElementCount() * (EltSizeBits / 8));
+ assert(NewVT.isValid() &&
+ "Expecting equally-sized RVV vector types to be legal");
+ StoredVal = DAG.getBitcast(NewVT, StoredVal);
+ return DAG.getStore(Store->getChain(), DL, StoredVal, Store->getBasePtr(),
+ Store->getPointerInfo(), Store->getOriginalAlign(),
+ Store->getMemOperand()->getFlags());
+}
+
SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
SelectionDAG &DAG) const {
switch (Op.getOpcode()) {
@@ -2310,9 +2376,17 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
return Vec;
}
case ISD::LOAD:
- return lowerFixedLengthVectorLoadToRVV(Op, DAG);
+ if (auto V = expandUnalignedRVVLoad(Op, DAG))
+ return V;
+ if (Op.getValueType().isFixedLengthVector())
+ return lowerFixedLengthVectorLoadToRVV(Op, DAG);
+ return Op;
case ISD::STORE:
- return lowerFixedLengthVectorStoreToRVV(Op, DAG);
+ if (auto V = expandUnalignedRVVStore(Op, DAG))
+ return V;
+ if (Op.getOperand(1).getValueType().isFixedLengthVector())
+ return lowerFixedLengthVectorStoreToRVV(Op, DAG);
+ return Op;
case ISD::MLOAD:
return lowerMLOAD(Op, DAG);
case ISD::MSTORE:
@@ -4031,13 +4105,10 @@ RISCVTargetLowering::lowerFixedLengthVectorLoadToRVV(SDValue Op,
SDLoc DL(Op);
auto *Load = cast<LoadSDNode>(Op);
- if (!allowsMemoryAccessForAlignment(*DAG.getContext(), DAG.getDataLayout(),
- Load->getMemoryVT(),
- *Load->getMemOperand())) {
- SDValue Result, Chain;
- std::tie(Result, Chain) = expandUnalignedLoad(Load, DAG);
- return DAG.getMergeValues({Result, Chain}, DL);
- }
+ assert(allowsMemoryAccessForAlignment(*DAG.getContext(), DAG.getDataLayout(),
+ Load->getMemoryVT(),
+ *Load->getMemOperand()) &&
+ "Expecting a correctly-aligned load");
MVT VT = Op.getSimpleValueType();
MVT ContainerVT = getContainerForFixedLengthVector(VT);
@@ -4060,10 +4131,10 @@ RISCVTargetLowering::lowerFixedLengthVectorStoreToRVV(SDValue Op,
SDLoc DL(Op);
auto *Store = cast<StoreSDNode>(Op);
- if (!allowsMemoryAccessForAlignment(*DAG.getContext(), DAG.getDataLayout(),
- Store->getMemoryVT(),
- *Store->getMemOperand()))
- return expandUnalignedStore(Store, DAG);
+ assert(allowsMemoryAccessForAlignment(*DAG.getContext(), DAG.getDataLayout(),
+ Store->getMemoryVT(),
+ *Store->getMemOperand()) &&
+ "Expecting a correctly-aligned store");
SDValue StoreVal = Store->getValue();
MVT VT = StoreVal.getSimpleValueType();
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.h b/llvm/lib/Target/RISCV/RISCVISelLowering.h
index eee680f39860..8fc92f65c38f 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.h
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.h
@@ -562,6 +562,9 @@ class RISCVTargetLowering : public TargetLowering {
SDValue lowerGET_ROUNDING(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerSET_ROUNDING(SDValue Op, SelectionDAG &DAG) const;
+ SDValue expandUnalignedRVVLoad(SDValue Op, SelectionDAG &DAG) const;
+ SDValue expandUnalignedRVVStore(SDValue Op, SelectionDAG &DAG) const;
+
bool isEligibleForTailCallOptimization(
CCState &CCInfo, CallLoweringInfo &CLI, MachineFunction &MF,
const SmallVector<CCValAssign, 16> &ArgLocs) const;
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-unaligned.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-unaligned.ll
index 834860673c4b..55da966d298d 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-unaligned.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-unaligned.ll
@@ -7,112 +7,14 @@
define <4 x i32> @load_v4i32_align1(<4 x i32>* %ptr) {
; RV32-LABEL: load_v4i32_align1:
; RV32: # %bb.0:
-; RV32-NEXT: addi sp, sp, -32
-; RV32-NEXT: .cfi_def_cfa_offset 32
-; RV32-NEXT: lbu a1, 13(a0)
-; RV32-NEXT: lbu a2, 12(a0)
-; RV32-NEXT: lbu a3, 15(a0)
-; RV32-NEXT: lbu a4, 14(a0)
-; RV32-NEXT: slli a1, a1, 8
-; RV32-NEXT: or a1, a1, a2
-; RV32-NEXT: slli a2, a3, 8
-; RV32-NEXT: or a2, a2, a4
-; RV32-NEXT: slli a2, a2, 16
-; RV32-NEXT: or a1, a2, a1
-; RV32-NEXT: sw a1, 28(sp)
-; RV32-NEXT: lbu a1, 9(a0)
-; RV32-NEXT: lbu a2, 8(a0)
-; RV32-NEXT: lbu a3, 11(a0)
-; RV32-NEXT: lbu a4, 10(a0)
-; RV32-NEXT: slli a1, a1, 8
-; RV32-NEXT: or a1, a1, a2
-; RV32-NEXT: slli a2, a3, 8
-; RV32-NEXT: or a2, a2, a4
-; RV32-NEXT: slli a2, a2, 16
-; RV32-NEXT: or a1, a2, a1
-; RV32-NEXT: sw a1, 24(sp)
-; RV32-NEXT: lbu a1, 5(a0)
-; RV32-NEXT: lbu a2, 4(a0)
-; RV32-NEXT: lbu a3, 7(a0)
-; RV32-NEXT: lbu a4, 6(a0)
-; RV32-NEXT: slli a1, a1, 8
-; RV32-NEXT: or a1, a1, a2
-; RV32-NEXT: slli a2, a3, 8
-; RV32-NEXT: or a2, a2, a4
-; RV32-NEXT: slli a2, a2, 16
-; RV32-NEXT: or a1, a2, a1
-; RV32-NEXT: sw a1, 20(sp)
-; RV32-NEXT: lbu a1, 1(a0)
-; RV32-NEXT: lbu a2, 0(a0)
-; RV32-NEXT: lbu a3, 3(a0)
-; RV32-NEXT: lbu a0, 2(a0)
-; RV32-NEXT: slli a1, a1, 8
-; RV32-NEXT: or a1, a1, a2
-; RV32-NEXT: slli a2, a3, 8
-; RV32-NEXT: or a0, a2, a0
-; RV32-NEXT: slli a0, a0, 16
-; RV32-NEXT: or a0, a0, a1
-; RV32-NEXT: sw a0, 16(sp)
-; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, mu
-; RV32-NEXT: addi a0, sp, 16
-; RV32-NEXT: vle32.v v8, (a0)
-; RV32-NEXT: addi sp, sp, 32
+; RV32-NEXT: vsetivli zero, 16, e8, m1, ta, mu
+; RV32-NEXT: vle8.v v8, (a0)
; RV32-NEXT: ret
;
; RV64-LABEL: load_v4i32_align1:
; RV64: # %bb.0:
-; RV64-NEXT: addi sp, sp, -32
-; RV64-NEXT: .cfi_def_cfa_offset 32
-; RV64-NEXT: lbu a1, 9(a0)
-; RV64-NEXT: lbu a2, 8(a0)
-; RV64-NEXT: lbu a3, 11(a0)
-; RV64-NEXT: lbu a4, 10(a0)
-; RV64-NEXT: slli a1, a1, 8
-; RV64-NEXT: or a1, a1, a2
-; RV64-NEXT: slli a2, a3, 8
-; RV64-NEXT: or a2, a2, a4
-; RV64-NEXT: slli a2, a2, 16
-; RV64-NEXT: or a1, a2, a1
-; RV64-NEXT: lbu a2, 13(a0)
-; RV64-NEXT: lbu a3, 12(a0)
-; RV64-NEXT: lbu a4, 15(a0)
-; RV64-NEXT: lbu a5, 14(a0)
-; RV64-NEXT: slli a2, a2, 8
-; RV64-NEXT: or a2, a2, a3
-; RV64-NEXT: slli a3, a4, 8
-; RV64-NEXT: or a3, a3, a5
-; RV64-NEXT: slli a3, a3, 16
-; RV64-NEXT: or a2, a3, a2
-; RV64-NEXT: slli a2, a2, 32
-; RV64-NEXT: or a1, a2, a1
-; RV64-NEXT: sd a1, 24(sp)
-; RV64-NEXT: lbu a1, 1(a0)
-; RV64-NEXT: lbu a2, 0(a0)
-; RV64-NEXT: lbu a3, 3(a0)
-; RV64-NEXT: lbu a4, 2(a0)
-; RV64-NEXT: slli a1, a1, 8
-; RV64-NEXT: or a1, a1, a2
-; RV64-NEXT: slli a2, a3, 8
-; RV64-NEXT: or a2, a2, a4
-; RV64-NEXT: slli a2, a2, 16
-; RV64-NEXT: or a1, a2, a1
-; RV64-NEXT: lbu a2, 5(a0)
-; RV64-NEXT: lbu a3, 4(a0)
-; RV64-NEXT: lbu a4, 7(a0)
-; RV64-NEXT: lbu a0, 6(a0)
-; RV64-NEXT: slli a2, a2, 8
-; RV64-NEXT: or a2, a2, a3
-; RV64-NEXT: slli a3, a4, 8
-; RV64-NEXT: or a0, a3, a0
-; RV64-NEXT: slli a0, a0, 16
-; RV64-NEXT: or a0, a0, a2
-; RV64-NEXT: slli a0, a0, 32
-; RV64-NEXT: or a0, a0, a1
-; RV64-NEXT: sd a0, 16(sp)
-; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, mu
-; RV64-NEXT: addi a0, sp, 16
-; RV64-NEXT: vle32.v v8, (a0)
-; RV64-NEXT: addi sp, sp, 32
+; RV64-NEXT: vsetivli zero, 16, e8, m1, ta, mu
+; RV64-NEXT: vle8.v v8, (a0)
; RV64-NEXT: ret
%z = load <4 x i32>, <4 x i32>* %ptr, align 1
ret <4 x i32> %z
@@ -121,64 +23,14 @@ define <4 x i32> @load_v4i32_align1(<4 x i32>* %ptr) {
define <4 x i32> @load_v4i32_align2(<4 x i32>* %ptr) {
; RV32-LABEL: load_v4i32_align2:
; RV32: # %bb.0:
-; RV32-NEXT: addi sp, sp, -32
-; RV32-NEXT: .cfi_def_cfa_offset 32
-; RV32-NEXT: lhu a1, 14(a0)
-; RV32-NEXT: lhu a2, 12(a0)
-; RV32-NEXT: slli a1, a1, 16
-; RV32-NEXT: or a1, a1, a2
-; RV32-NEXT: sw a1, 28(sp)
-; RV32-NEXT: lhu a1, 10(a0)
-; RV32-NEXT: lhu a2, 8(a0)
-; RV32-NEXT: slli a1, a1, 16
-; RV32-NEXT: or a1, a1, a2
-; RV32-NEXT: sw a1, 24(sp)
-; RV32-NEXT: lhu a1, 6(a0)
-; RV32-NEXT: lhu a2, 4(a0)
-; RV32-NEXT: slli a1, a1, 16
-; RV32-NEXT: or a1, a1, a2
-; RV32-NEXT: sw a1, 20(sp)
-; RV32-NEXT: lhu a1, 2(a0)
-; RV32-NEXT: lhu a0, 0(a0)
-; RV32-NEXT: slli a1, a1, 16
-; RV32-NEXT: or a0, a1, a0
-; RV32-NEXT: sw a0, 16(sp)
-; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, mu
-; RV32-NEXT: addi a0, sp, 16
-; RV32-NEXT: vle32.v v8, (a0)
-; RV32-NEXT: addi sp, sp, 32
+; RV32-NEXT: vsetivli zero, 16, e8, m1, ta, mu
+; RV32-NEXT: vle8.v v8, (a0)
; RV32-NEXT: ret
;
; RV64-LABEL: load_v4i32_align2:
; RV64: # %bb.0:
-; RV64-NEXT: addi sp, sp, -32
-; RV64-NEXT: .cfi_def_cfa_offset 32
-; RV64-NEXT: lhu a1, 10(a0)
-; RV64-NEXT: lhu a2, 8(a0)
-; RV64-NEXT: lhu a3, 14(a0)
-; RV64-NEXT: lhu a4, 12(a0)
-; RV64-NEXT: slli a1, a1, 16
-; RV64-NEXT: or a1, a1, a2
-; RV64-NEXT: slli a2, a3, 16
-; RV64-NEXT: or a2, a2, a4
-; RV64-NEXT: slli a2, a2, 32
-; RV64-NEXT: or a1, a2, a1
-; RV64-NEXT: sd a1, 24(sp)
-; RV64-NEXT: lhu a1, 2(a0)
-; RV64-NEXT: lhu a2, 0(a0)
-; RV64-NEXT: lhu a3, 6(a0)
-; RV64-NEXT: lhu a0, 4(a0)
-; RV64-NEXT: slli a1, a1, 16
-; RV64-NEXT: or a1, a1, a2
-; RV64-NEXT: slli a2, a3, 16
-; RV64-NEXT: or a0, a2, a0
-; RV64-NEXT: slli a0, a0, 32
-; RV64-NEXT: or a0, a0, a1
-; RV64-NEXT: sd a0, 16(sp)
-; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, mu
-; RV64-NEXT: addi a0, sp, 16
-; RV64-NEXT: vle32.v v8, (a0)
-; RV64-NEXT: addi sp, sp, 32
+; RV64-NEXT: vsetivli zero, 16, e8, m1, ta, mu
+; RV64-NEXT: vle8.v v8, (a0)
; RV64-NEXT: ret
%z = load <4 x i32>, <4 x i32>* %ptr, align 2
ret <4 x i32> %z
@@ -187,86 +39,14 @@ define <4 x i32> @load_v4i32_align2(<4 x i32>* %ptr) {
define void @store_v4i32_align1(<4 x i32> %x, <4 x i32>* %ptr) {
; RV32-LABEL: store_v4i32_align1:
; RV32: # %bb.0:
-; RV32-NEXT: addi sp, sp, -32
-; RV32-NEXT: .cfi_def_cfa_offset 32
-; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, mu
-; RV32-NEXT: addi a1, sp, 16
-; RV32-NEXT: vse32.v v8, (a1)
-; RV32-NEXT: lw a1, 28(sp)
-; RV32-NEXT: sb a1, 12(a0)
-; RV32-NEXT: lw a2, 24(sp)
-; RV32-NEXT: sb a2, 8(a0)
-; RV32-NEXT: lw a3, 20(sp)
-; RV32-NEXT: sb a3, 4(a0)
-; RV32-NEXT: lw a4, 16(sp)
-; RV32-NEXT: sb a4, 0(a0)
-; RV32-NEXT: srli a5, a1, 24
-; RV32-NEXT: sb a5, 15(a0)
-; RV32-NEXT: srli a5, a1, 16
-; RV32-NEXT: sb a5, 14(a0)
-; RV32-NEXT: srli a1, a1, 8
-; RV32-NEXT: sb a1, 13(a0)
-; RV32-NEXT: srli a1, a2, 24
-; RV32-NEXT: sb a1, 11(a0)
-; RV32-NEXT: srli a1, a2, 16
-; RV32-NEXT: sb a1, 10(a0)
-; RV32-NEXT: srli a1, a2, 8
-; RV32-NEXT: sb a1, 9(a0)
-; RV32-NEXT: srli a1, a3, 24
-; RV32-NEXT: sb a1, 7(a0)
-; RV32-NEXT: srli a1, a3, 16
-; RV32-NEXT: sb a1, 6(a0)
-; RV32-NEXT: srli a1, a3, 8
-; RV32-NEXT: sb a1, 5(a0)
-; RV32-NEXT: srli a1, a4, 24
-; RV32-NEXT: sb a1, 3(a0)
-; RV32-NEXT: srli a1, a4, 16
-; RV32-NEXT: sb a1, 2(a0)
-; RV32-NEXT: srli a1, a4, 8
-; RV32-NEXT: sb a1, 1(a0)
-; RV32-NEXT: addi sp, sp, 32
+; RV32-NEXT: vsetivli zero, 16, e8, m1, ta, mu
+; RV32-NEXT: vse8.v v8, (a0)
; RV32-NEXT: ret
;
; RV64-LABEL: store_v4i32_align1:
; RV64: # %bb.0:
-; RV64-NEXT: addi sp, sp, -32
-; RV64-NEXT: .cfi_def_cfa_offset 32
-; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, mu
-; RV64-NEXT: addi a1, sp, 16
-; RV64-NEXT: vse32.v v8, (a1)
-; RV64-NEXT: ld a1, 24(sp)
-; RV64-NEXT: sb a1, 8(a0)
-; RV64-NEXT: ld a2, 16(sp)
-; RV64-NEXT: sb a2, 0(a0)
-; RV64-NEXT: srli a3, a1, 56
-; RV64-NEXT: sb a3, 15(a0)
-; RV64-NEXT: srli a3, a1, 48
-; RV64-NEXT: sb a3, 14(a0)
-; RV64-NEXT: srli a3, a1, 40
-; RV64-NEXT: sb a3, 13(a0)
-; RV64-NEXT: srli a3, a1, 32
-; RV64-NEXT: sb a3, 12(a0)
-; RV64-NEXT: srli a3, a1, 24
-; RV64-NEXT: sb a3, 11(a0)
-; RV64-NEXT: srli a3, a1, 16
-; RV64-NEXT: sb a3, 10(a0)
-; RV64-NEXT: srli a1, a1, 8
-; RV64-NEXT: sb a1, 9(a0)
-; RV64-NEXT: srli a1, a2, 40
-; RV64-NEXT: sb a1, 5(a0)
-; RV64-NEXT: srli a1, a2, 32
-; RV64-NEXT: sb a1, 4(a0)
-; RV64-NEXT: srli a1, a2, 56
-; RV64-NEXT: sb a1, 7(a0)
-; RV64-NEXT: srli a1, a2, 48
-; RV64-NEXT: sb a1, 6(a0)
-; RV64-NEXT: srli a1, a2, 24
-; RV64-NEXT: sb a1, 3(a0)
-; RV64-NEXT: srli a1, a2, 16
-; RV64-NEXT: sb a1, 2(a0)
-; RV64-NEXT: srli a1, a2, 8
-; RV64-NEXT: sb a1, 1(a0)
-; RV64-NEXT: addi sp, sp, 32
+; RV64-NEXT: vsetivli zero, 16, e8, m1, ta, mu
+; RV64-NEXT: vse8.v v8, (a0)
; RV64-NEXT: ret
store <4 x i32> %x, <4 x i32>* %ptr, align 1
ret void
@@ -275,54 +55,14 @@ define void @store_v4i32_align1(<4 x i32> %x, <4 x i32>* %ptr) {
define void @store_v4i32_align2(<4 x i32> %x, <4 x i32>* %ptr) {
; RV32-LABEL: store_v4i32_align2:
; RV32: # %bb.0:
-; RV32-NEXT: addi sp, sp, -32
-; RV32-NEXT: .cfi_def_cfa_offset 32
-; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, mu
-; RV32-NEXT: addi a1, sp, 16
-; RV32-NEXT: vse32.v v8, (a1)
-; RV32-NEXT: lw a1, 28(sp)
-; RV32-NEXT: sh a1, 12(a0)
-; RV32-NEXT: lw a2, 24(sp)
-; RV32-NEXT: sh a2, 8(a0)
-; RV32-NEXT: lw a3, 20(sp)
-; RV32-NEXT: sh a3, 4(a0)
-; RV32-NEXT: lw a4, 16(sp)
-; RV32-NEXT: sh a4, 0(a0)
-; RV32-NEXT: srli a1, a1, 16
-; RV32-NEXT: sh a1, 14(a0)
-; RV32-NEXT: srli a1, a2, 16
-; RV32-NEXT: sh a1, 10(a0)
-; RV32-NEXT: srli a1, a3, 16
-; RV32-NEXT: sh a1, 6(a0)
-; RV32-NEXT: srli a1, a4, 16
-; RV32-NEXT: sh a1, 2(a0)
-; RV32-NEXT: addi sp, sp, 32
+; RV32-NEXT: vsetivli zero, 16, e8, m1, ta, mu
+; RV32-NEXT: vse8.v v8, (a0)
; RV32-NEXT: ret
;
; RV64-LABEL: store_v4i32_align2:
; RV64: # %bb.0:
-; RV64-NEXT: addi sp, sp, -32
-; RV64-NEXT: .cfi_def_cfa_offset 32
-; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, mu
-; RV64-NEXT: addi a1, sp, 16
-; RV64-NEXT: vse32.v v8, (a1)
-; RV64-NEXT: ld a1, 24(sp)
-; RV64-NEXT: sh a1, 8(a0)
-; RV64-NEXT: ld a2, 16(sp)
-; RV64-NEXT: sh a2, 0(a0)
-; RV64-NEXT: srli a3, a1, 48
-; RV64-NEXT: sh a3, 14(a0)
-; RV64-NEXT: srli a3, a1, 32
-; RV64-NEXT: sh a3, 12(a0)
-; RV64-NEXT: srli a1, a1, 16
-; RV64-NEXT: sh a1, 10(a0)
-; RV64-NEXT: srli a1, a2, 48
-; RV64-NEXT: sh a1, 6(a0)
-; RV64-NEXT: srli a1, a2, 32
-; RV64-NEXT: sh a1, 4(a0)
-; RV64-NEXT: srli a1, a2, 16
-; RV64-NEXT: sh a1, 2(a0)
-; RV64-NEXT: addi sp, sp, 32
+; RV64-NEXT: vsetivli zero, 16, e8, m1, ta, mu
+; RV64-NEXT: vse8.v v8, (a0)
; RV64-NEXT: ret
store <4 x i32> %x, <4 x i32>* %ptr, align 2
ret void
diff --git a/llvm/test/CodeGen/RISCV/rvv/unaligned-loads-stores.ll b/llvm/test/CodeGen/RISCV/rvv/unaligned-loads-stores.ll
new file mode 100644
index 000000000000..e7f2e26bac96
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/unaligned-loads-stores.ll
@@ -0,0 +1,192 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple riscv32 -mattr=+d,+experimental-zfh,+experimental-v < %s \
+; RUN: -verify-machineinstrs | FileCheck %s
+; RUN: llc -mtriple riscv64 -mattr=+d,+experimental-zfh,+experimental-v < %s \
+; RUN: -verify-machineinstrs | FileCheck %s
+
+define <vscale x 1 x i32> @unaligned_load_nxv1i32_a1(<vscale x 1 x i32>* %ptr) {
+; CHECK-LABEL: unaligned_load_nxv1i32_a1:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, zero, e8, mf2, ta, mu
+; CHECK-NEXT: vle8.v v8, (a0)
+; CHECK-NEXT: ret
+ %v = load <vscale x 1 x i32>, <vscale x 1 x i32>* %ptr, align 1
+ ret <vscale x 1 x i32> %v
+}
+
+define <vscale x 1 x i32> @unaligned_load_nxv1i32_a2(<vscale x 1 x i32>* %ptr) {
+; CHECK-LABEL: unaligned_load_nxv1i32_a2:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, zero, e8, mf2, ta, mu
+; CHECK-NEXT: vle8.v v8, (a0)
+; CHECK-NEXT: ret
+ %v = load <vscale x 1 x i32>, <vscale x 1 x i32>* %ptr, align 2
+ ret <vscale x 1 x i32> %v
+}
+
+define <vscale x 1 x i32> @aligned_load_nxv1i32_a4(<vscale x 1 x i32>* %ptr) {
+; CHECK-LABEL: aligned_load_nxv1i32_a4:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, zero, e32, mf2, ta, mu
+; CHECK-NEXT: vle32.v v8, (a0)
+; CHECK-NEXT: ret
+ %v = load <vscale x 1 x i32>, <vscale x 1 x i32>* %ptr, align 4
+ ret <vscale x 1 x i32> %v
+}
+
+define <vscale x 1 x i64> @unaligned_load_nxv1i64_a1(<vscale x 1 x i64>* %ptr) {
+; CHECK-LABEL: unaligned_load_nxv1i64_a1:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vl1r.v v8, (a0)
+; CHECK-NEXT: ret
+ %v = load <vscale x 1 x i64>, <vscale x 1 x i64>* %ptr, align 1
+ ret <vscale x 1 x i64> %v
+}
+
+define <vscale x 1 x i64> @unaligned_load_nxv1i64_a4(<vscale x 1 x i64>* %ptr) {
+; CHECK-LABEL: unaligned_load_nxv1i64_a4:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vl1r.v v8, (a0)
+; CHECK-NEXT: ret
+ %v = load <vscale x 1 x i64>, <vscale x 1 x i64>* %ptr, align 4
+ ret <vscale x 1 x i64> %v
+}
+
+define <vscale x 1 x i64> @aligned_load_nxv1i64_a8(<vscale x 1 x i64>* %ptr) {
+; CHECK-LABEL: aligned_load_nxv1i64_a8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vl1re64.v v8, (a0)
+; CHECK-NEXT: ret
+ %v = load <vscale x 1 x i64>, <vscale x 1 x i64>* %ptr, align 8
+ ret <vscale x 1 x i64> %v
+}
+
+define <vscale x 2 x i64> @unaligned_load_nxv2i64_a1(<vscale x 2 x i64>* %ptr) {
+; CHECK-LABEL: unaligned_load_nxv2i64_a1:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vl2r.v v8, (a0)
+; CHECK-NEXT: ret
+ %v = load <vscale x 2 x i64>, <vscale x 2 x i64>* %ptr, align 1
+ ret <vscale x 2 x i64> %v
+}
+
+define <vscale x 2 x i64> @unaligned_load_nxv2i64_a4(<vscale x 2 x i64>* %ptr) {
+; CHECK-LABEL: unaligned_load_nxv2i64_a4:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vl2r.v v8, (a0)
+; CHECK-NEXT: ret
+ %v = load <vscale x 2 x i64>, <vscale x 2 x i64>* %ptr, align 4
+ ret <vscale x 2 x i64> %v
+}
+
+define <vscale x 2 x i64> @aligned_load_nxv2i64_a8(<vscale x 2 x i64>* %ptr) {
+; CHECK-LABEL: aligned_load_nxv2i64_a8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vl2re64.v v8, (a0)
+; CHECK-NEXT: ret
+ %v = load <vscale x 2 x i64>, <vscale x 2 x i64>* %ptr, align 8
+ ret <vscale x 2 x i64> %v
+}
+
+; Masks should always be aligned
+define <vscale x 1 x i1> @unaligned_load_nxv1i1_a1(<vscale x 1 x i1>* %ptr) {
+; CHECK-LABEL: unaligned_load_nxv1i1_a1:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, zero, e8, mf8, ta, mu
+; CHECK-NEXT: vle1.v v0, (a0)
+; CHECK-NEXT: ret
+ %v = load <vscale x 1 x i1>, <vscale x 1 x i1>* %ptr, align 1
+ ret <vscale x 1 x i1> %v
+}
+
+define <vscale x 4 x float> @unaligned_load_nxv4f32_a1(<vscale x 4 x float>* %ptr) {
+; CHECK-LABEL: unaligned_load_nxv4f32_a1:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vl2r.v v8, (a0)
+; CHECK-NEXT: ret
+ %v = load <vscale x 4 x float>, <vscale x 4 x float>* %ptr, align 1
+ ret <vscale x 4 x float> %v
+}
+
+define <vscale x 4 x float> @unaligned_load_nxv4f32_a2(<vscale x 4 x float>* %ptr) {
+; CHECK-LABEL: unaligned_load_nxv4f32_a2:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vl2r.v v8, (a0)
+; CHECK-NEXT: ret
+ %v = load <vscale x 4 x float>, <vscale x 4 x float>* %ptr, align 2
+ ret <vscale x 4 x float> %v
+}
+
+define <vscale x 4 x float> @aligned_load_nxv4f32_a4(<vscale x 4 x float>* %ptr) {
+; CHECK-LABEL: aligned_load_nxv4f32_a4:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vl2re32.v v8, (a0)
+; CHECK-NEXT: ret
+ %v = load <vscale x 4 x float>, <vscale x 4 x float>* %ptr, align 4
+ ret <vscale x 4 x float> %v
+}
+
+define <vscale x 8 x half> @unaligned_load_nxv8f16_a1(<vscale x 8 x half>* %ptr) {
+; CHECK-LABEL: unaligned_load_nxv8f16_a1:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vl2r.v v8, (a0)
+; CHECK-NEXT: ret
+ %v = load <vscale x 8 x half>, <vscale x 8 x half>* %ptr, align 1
+ ret <vscale x 8 x half> %v
+}
+
+define <vscale x 8 x half> @aligned_load_nxv8f16_a2(<vscale x 8 x half>* %ptr) {
+; CHECK-LABEL: aligned_load_nxv8f16_a2:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vl2re16.v v8, (a0)
+; CHECK-NEXT: ret
+ %v = load <vscale x 8 x half>, <vscale x 8 x half>* %ptr, align 2
+ ret <vscale x 8 x half> %v
+}
+
+define void @unaligned_store_nxv4i32_a1(<vscale x 4 x i32> %x, <vscale x 4 x i32>* %ptr) {
+; CHECK-LABEL: unaligned_store_nxv4i32_a1:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vs2r.v v8, (a0)
+; CHECK-NEXT: ret
+ store <vscale x 4 x i32> %x, <vscale x 4 x i32>* %ptr, align 1
+ ret void
+}
+
+define void @unaligned_store_nxv4i32_a2(<vscale x 4 x i32> %x, <vscale x 4 x i32>* %ptr) {
+; CHECK-LABEL: unaligned_store_nxv4i32_a2:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vs2r.v v8, (a0)
+; CHECK-NEXT: ret
+ store <vscale x 4 x i32> %x, <vscale x 4 x i32>* %ptr, align 2
+ ret void
+}
+
+define void @aligned_store_nxv4i32_a4(<vscale x 4 x i32> %x, <vscale x 4 x i32>* %ptr) {
+; CHECK-LABEL: aligned_store_nxv4i32_a4:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vs2r.v v8, (a0)
+; CHECK-NEXT: ret
+ store <vscale x 4 x i32> %x, <vscale x 4 x i32>* %ptr, align 4
+ ret void
+}
+
+define void @unaligned_store_nxv1i16_a1(<vscale x 1 x i16> %x, <vscale x 1 x i16>* %ptr) {
+; CHECK-LABEL: unaligned_store_nxv1i16_a1:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, zero, e8, mf4, ta, mu
+; CHECK-NEXT: vse8.v v8, (a0)
+; CHECK-NEXT: ret
+ store <vscale x 1 x i16> %x, <vscale x 1 x i16>* %ptr, align 1
+ ret void
+}
+
+define void @aligned_store_nxv1i16_a2(<vscale x 1 x i16> %x, <vscale x 1 x i16>* %ptr) {
+; CHECK-LABEL: aligned_store_nxv1i16_a2:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, zero, e16, mf4, ta, mu
+; CHECK-NEXT: vse16.v v8, (a0)
+; CHECK-NEXT: ret
+ store <vscale x 1 x i16> %x, <vscale x 1 x i16>* %ptr, align 2
+ ret void
+}
More information about the llvm-commits
mailing list