[llvm] d065b03 - [RISCV] Optimize vp.load with an all-ones mask
Fraser Cormack via llvm-commits
llvm-commits at lists.llvm.org
Tue Nov 2 10:32:21 PDT 2021
Author: Fraser Cormack
Date: 2021-11-02T17:23:39Z
New Revision: d065b03801d93551dcb4cbc6d74e804775c7ed03
URL: https://github.com/llvm/llvm-project/commit/d065b03801d93551dcb4cbc6d74e804775c7ed03
DIFF: https://github.com/llvm/llvm-project/commit/d065b03801d93551dcb4cbc6d74e804775c7ed03.diff
LOG: [RISCV] Optimize vp.load with an all-ones mask
Similar to D110206, this patch optimizes unmasked vp.load intrinsics to
avoid the need of a vmset instruction to set the mask. It does so by
selecting a riscv_vle intrinsic rather than a riscv_vle_mask intrinsic.
Reviewed By: craig.topper
Differential Revision: https://reviews.llvm.org/D113022
Added:
Modified:
llvm/lib/Target/RISCV/RISCVISelLowering.cpp
llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpload.ll
llvm/test/CodeGen/RISCV/rvv/vpload.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 5e1acc295078..e37c86475281 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -4871,24 +4871,38 @@ SDValue RISCVTargetLowering::lowerMaskedLoad(SDValue Op,
PassThru = MLoad->getPassThru();
}
+ bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode());
+
MVT XLenVT = Subtarget.getXLenVT();
MVT ContainerVT = VT;
if (VT.isFixedLengthVector()) {
ContainerVT = getContainerForFixedLengthVector(VT);
- MVT MaskVT = MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount());
-
- Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
PassThru = convertToScalableVector(ContainerVT, PassThru, DAG, Subtarget);
+ if (!IsUnmasked) {
+ MVT MaskVT =
+ MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount());
+ Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
+ }
}
if (!VL)
VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;
+ unsigned IntID =
+ IsUnmasked ? Intrinsic::riscv_vle : Intrinsic::riscv_vle_mask;
+ SmallVector<SDValue, 8> Ops{Chain, DAG.getTargetConstant(IntID, DL, XLenVT)};
+ if (!IsUnmasked)
+ Ops.push_back(PassThru);
+ Ops.push_back(BasePtr);
+ if (!IsUnmasked)
+ Ops.push_back(Mask);
+ Ops.push_back(VL);
+ if (!IsUnmasked)
+ Ops.push_back(DAG.getTargetConstant(RISCVII::TAIL_AGNOSTIC, DL, XLenVT));
+
SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other});
- SDValue IntID = DAG.getTargetConstant(Intrinsic::riscv_vle_mask, DL, XLenVT);
- SDValue Policy = DAG.getTargetConstant(RISCVII::TAIL_AGNOSTIC, DL, XLenVT);
- SDValue Ops[] = {Chain, IntID, PassThru, BasePtr, Mask, VL, Policy};
+
SDValue Result =
DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops, MemVT, MMO);
Chain = Result.getValue(1);
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpload.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpload.ll
index a8348fde944e..67a5900ba69e 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpload.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpload.ll
@@ -31,10 +31,8 @@ define <4 x i8> @vpload_v4i8(<4 x i8>* %ptr, <4 x i1> %m, i32 zeroext %evl) {
define <4 x i8> @vpload_v4i8_allones_mask(<4 x i8>* %ptr, i32 zeroext %evl) {
; CHECK-LABEL: vpload_v4i8_allones_mask:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, mu
-; CHECK-NEXT: vmset.m v0
; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, mu
-; CHECK-NEXT: vle8.v v8, (a0), v0.t
+; CHECK-NEXT: vle8.v v8, (a0)
; CHECK-NEXT: ret
%a = insertelement <4 x i1> undef, i1 true, i32 0
%b = shufflevector <4 x i1> %a, <4 x i1> poison, <4 x i32> zeroinitializer
@@ -93,10 +91,8 @@ define <8 x i16> @vpload_v8i16(<8 x i16>* %ptr, <8 x i1> %m, i32 zeroext %evl) {
define <8 x i16> @vpload_v8i16_allones_mask(<8 x i16>* %ptr, i32 zeroext %evl) {
; CHECK-LABEL: vpload_v8i16_allones_mask:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu
-; CHECK-NEXT: vmset.m v0
; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, mu
-; CHECK-NEXT: vle16.v v8, (a0), v0.t
+; CHECK-NEXT: vle16.v v8, (a0)
; CHECK-NEXT: ret
%a = insertelement <8 x i1> undef, i1 true, i32 0
%b = shufflevector <8 x i1> %a, <8 x i1> poison, <8 x i32> zeroinitializer
@@ -143,10 +139,8 @@ define <8 x i32> @vpload_v8i32(<8 x i32>* %ptr, <8 x i1> %m, i32 zeroext %evl) {
define <8 x i32> @vpload_v8i32_allones_mask(<8 x i32>* %ptr, i32 zeroext %evl) {
; CHECK-LABEL: vpload_v8i32_allones_mask:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu
-; CHECK-NEXT: vmset.m v0
; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, mu
-; CHECK-NEXT: vle32.v v8, (a0), v0.t
+; CHECK-NEXT: vle32.v v8, (a0)
; CHECK-NEXT: ret
%a = insertelement <8 x i1> undef, i1 true, i32 0
%b = shufflevector <8 x i1> %a, <8 x i1> poison, <8 x i32> zeroinitializer
@@ -181,10 +175,8 @@ define <4 x i64> @vpload_v4i64(<4 x i64>* %ptr, <4 x i1> %m, i32 zeroext %evl) {
define <4 x i64> @vpload_v4i64_allones_mask(<4 x i64>* %ptr, i32 zeroext %evl) {
; CHECK-LABEL: vpload_v4i64_allones_mask:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, mu
-; CHECK-NEXT: vmset.m v0
; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, mu
-; CHECK-NEXT: vle64.v v8, (a0), v0.t
+; CHECK-NEXT: vle64.v v8, (a0)
; CHECK-NEXT: ret
%a = insertelement <4 x i1> undef, i1 true, i32 0
%b = shufflevector <4 x i1> %a, <4 x i1> poison, <4 x i32> zeroinitializer
@@ -219,10 +211,8 @@ define <2 x half> @vpload_v2f16(<2 x half>* %ptr, <2 x i1> %m, i32 zeroext %evl)
define <2 x half> @vpload_v2f16_allones_mask(<2 x half>* %ptr, i32 zeroext %evl) {
; CHECK-LABEL: vpload_v2f16_allones_mask:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, mu
-; CHECK-NEXT: vmset.m v0
; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, mu
-; CHECK-NEXT: vle16.v v8, (a0), v0.t
+; CHECK-NEXT: vle16.v v8, (a0)
; CHECK-NEXT: ret
%a = insertelement <2 x i1> undef, i1 true, i32 0
%b = shufflevector <2 x i1> %a, <2 x i1> poison, <2 x i32> zeroinitializer
@@ -293,10 +283,8 @@ define <8 x float> @vpload_v8f32(<8 x float>* %ptr, <8 x i1> %m, i32 zeroext %ev
define <8 x float> @vpload_v8f32_allones_mask(<8 x float>* %ptr, i32 zeroext %evl) {
; CHECK-LABEL: vpload_v8f32_allones_mask:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu
-; CHECK-NEXT: vmset.m v0
; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, mu
-; CHECK-NEXT: vle32.v v8, (a0), v0.t
+; CHECK-NEXT: vle32.v v8, (a0)
; CHECK-NEXT: ret
%a = insertelement <8 x i1> undef, i1 true, i32 0
%b = shufflevector <8 x i1> %a, <8 x i1> poison, <8 x i32> zeroinitializer
@@ -331,10 +319,8 @@ define <4 x double> @vpload_v4f64(<4 x double>* %ptr, <4 x i1> %m, i32 zeroext %
define <4 x double> @vpload_v4f64_allones_mask(<4 x double>* %ptr, i32 zeroext %evl) {
; CHECK-LABEL: vpload_v4f64_allones_mask:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, mu
-; CHECK-NEXT: vmset.m v0
; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, mu
-; CHECK-NEXT: vle64.v v8, (a0), v0.t
+; CHECK-NEXT: vle64.v v8, (a0)
; CHECK-NEXT: ret
%a = insertelement <4 x i1> undef, i1 true, i32 0
%b = shufflevector <4 x i1> %a, <4 x i1> poison, <4 x i32> zeroinitializer
diff --git a/llvm/test/CodeGen/RISCV/rvv/vpload.ll b/llvm/test/CodeGen/RISCV/rvv/vpload.ll
index 3fef4f9ac32e..b652654c1bbf 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vpload.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vpload.ll
@@ -19,10 +19,8 @@ define <vscale x 1 x i8> @vpload_nxv1i8(<vscale x 1 x i8>* %ptr, <vscale x 1 x i
define <vscale x 1 x i8> @vpload_nxv1i8_allones_mask(<vscale x 1 x i8>* %ptr, i32 zeroext %evl) {
; CHECK-LABEL: vpload_nxv1i8_allones_mask:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli a2, zero, e8, mf8, ta, mu
-; CHECK-NEXT: vmset.m v0
; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, mu
-; CHECK-NEXT: vle8.v v8, (a0), v0.t
+; CHECK-NEXT: vle8.v v8, (a0)
; CHECK-NEXT: ret
%a = insertelement <vscale x 1 x i1> undef, i1 true, i32 0
%b = shufflevector <vscale x 1 x i1> %a, <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer
@@ -69,10 +67,8 @@ define <vscale x 8 x i8> @vpload_nxv8i8(<vscale x 8 x i8>* %ptr, <vscale x 8 x i
define <vscale x 8 x i8> @vpload_nxv8i8_allones_mask(<vscale x 8 x i8>* %ptr, i32 zeroext %evl) {
; CHECK-LABEL: vpload_nxv8i8_allones_mask:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli a2, zero, e8, m1, ta, mu
-; CHECK-NEXT: vmset.m v0
; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, mu
-; CHECK-NEXT: vle8.v v8, (a0), v0.t
+; CHECK-NEXT: vle8.v v8, (a0)
; CHECK-NEXT: ret
%a = insertelement <vscale x 8 x i1> undef, i1 true, i32 0
%b = shufflevector <vscale x 8 x i1> %a, <vscale x 8 x i1> poison, <vscale x 8 x i32> zeroinitializer
@@ -107,10 +103,8 @@ define <vscale x 2 x i16> @vpload_nxv2i16(<vscale x 2 x i16>* %ptr, <vscale x 2
define <vscale x 2 x i16> @vpload_nxv2i16_allones_mask(<vscale x 2 x i16>* %ptr, i32 zeroext %evl) {
; CHECK-LABEL: vpload_nxv2i16_allones_mask:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli a2, zero, e8, mf4, ta, mu
-; CHECK-NEXT: vmset.m v0
; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, mu
-; CHECK-NEXT: vle16.v v8, (a0), v0.t
+; CHECK-NEXT: vle16.v v8, (a0)
; CHECK-NEXT: ret
%a = insertelement <vscale x 2 x i1> undef, i1 true, i32 0
%b = shufflevector <vscale x 2 x i1> %a, <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer
@@ -181,10 +175,8 @@ define <vscale x 4 x i32> @vpload_nxv4i32(<vscale x 4 x i32>* %ptr, <vscale x 4
define <vscale x 4 x i32> @vpload_nxv4i32_allones_mask(<vscale x 4 x i32>* %ptr, i32 zeroext %evl) {
; CHECK-LABEL: vpload_nxv4i32_allones_mask:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli a2, zero, e8, mf2, ta, mu
-; CHECK-NEXT: vmset.m v0
; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, mu
-; CHECK-NEXT: vle32.v v8, (a0), v0.t
+; CHECK-NEXT: vle32.v v8, (a0)
; CHECK-NEXT: ret
%a = insertelement <vscale x 4 x i1> undef, i1 true, i32 0
%b = shufflevector <vscale x 4 x i1> %a, <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer
@@ -219,10 +211,8 @@ define <vscale x 1 x i64> @vpload_nxv1i64(<vscale x 1 x i64>* %ptr, <vscale x 1
define <vscale x 1 x i64> @vpload_nxv1i64_allones_mask(<vscale x 1 x i64>* %ptr, i32 zeroext %evl) {
; CHECK-LABEL: vpload_nxv1i64_allones_mask:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli a2, zero, e8, mf8, ta, mu
-; CHECK-NEXT: vmset.m v0
; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, mu
-; CHECK-NEXT: vle64.v v8, (a0), v0.t
+; CHECK-NEXT: vle64.v v8, (a0)
; CHECK-NEXT: ret
%a = insertelement <vscale x 1 x i1> undef, i1 true, i32 0
%b = shufflevector <vscale x 1 x i1> %a, <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer
@@ -293,10 +283,8 @@ define <vscale x 2 x half> @vpload_nxv2f16(<vscale x 2 x half>* %ptr, <vscale x
define <vscale x 2 x half> @vpload_nxv2f16_allones_mask(<vscale x 2 x half>* %ptr, i32 zeroext %evl) {
; CHECK-LABEL: vpload_nxv2f16_allones_mask:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli a2, zero, e8, mf4, ta, mu
-; CHECK-NEXT: vmset.m v0
; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, mu
-; CHECK-NEXT: vle16.v v8, (a0), v0.t
+; CHECK-NEXT: vle16.v v8, (a0)
; CHECK-NEXT: ret
%a = insertelement <vscale x 2 x i1> undef, i1 true, i32 0
%b = shufflevector <vscale x 2 x i1> %a, <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer
@@ -379,10 +367,8 @@ define <vscale x 8 x float> @vpload_nxv8f32(<vscale x 8 x float>* %ptr, <vscale
define <vscale x 8 x float> @vpload_nxv8f32_allones_mask(<vscale x 8 x float>* %ptr, i32 zeroext %evl) {
; CHECK-LABEL: vpload_nxv8f32_allones_mask:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli a2, zero, e8, m1, ta, mu
-; CHECK-NEXT: vmset.m v0
; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, mu
-; CHECK-NEXT: vle32.v v8, (a0), v0.t
+; CHECK-NEXT: vle32.v v8, (a0)
; CHECK-NEXT: ret
%a = insertelement <vscale x 8 x i1> undef, i1 true, i32 0
%b = shufflevector <vscale x 8 x i1> %a, <vscale x 8 x i1> poison, <vscale x 8 x i32> zeroinitializer
@@ -429,10 +415,8 @@ define <vscale x 4 x double> @vpload_nxv4f64(<vscale x 4 x double>* %ptr, <vscal
define <vscale x 4 x double> @vpload_nxv4f64_allones_mask(<vscale x 4 x double>* %ptr, i32 zeroext %evl) {
; CHECK-LABEL: vpload_nxv4f64_allones_mask:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli a2, zero, e8, mf2, ta, mu
-; CHECK-NEXT: vmset.m v0
; CHECK-NEXT: vsetvli zero, a1, e64, m4, ta, mu
-; CHECK-NEXT: vle64.v v8, (a0), v0.t
+; CHECK-NEXT: vle64.v v8, (a0)
; CHECK-NEXT: ret
%a = insertelement <vscale x 4 x i1> undef, i1 true, i32 0
%b = shufflevector <vscale x 4 x i1> %a, <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer
More information about the llvm-commits
mailing list