[llvm] [RISCV] Expand vp.stride.load to splat of a scalar load. (PR #98140)
Yeting Kuo via llvm-commits
llvm-commits at lists.llvm.org
Tue Jul 9 20:33:02 PDT 2024
https://github.com/yetingk updated https://github.com/llvm/llvm-project/pull/98140
>From 26ac382feb0030266da42ed2e4dbb6614cecace1 Mon Sep 17 00:00:00 2001
From: Yeting Kuo <yeting.kuo at sifive.com>
Date: Mon, 1 Jul 2024 21:08:26 -0700
Subject: [PATCH 1/2] [RISCV] Expand vp.stride.load to splat of a scalar load.
It's a similar patch as a214c521f8763b36dd400b89017f74ad5ae4b6c7 for vp.stride.load.
Some targets prefer pattern (vmv.v.x (load)) instead of vlse with zero stride.
---
llvm/lib/Target/RISCV/RISCVCodeGenPrepare.cpp | 46 +++++++++++++++++++
.../RISCV/rvv/fixed-vectors-strided-vpload.ll | 38 +++++++++++++--
llvm/test/CodeGen/RISCV/rvv/strided-vpload.ll | 46 ++++++++++++++++++-
3 files changed, 124 insertions(+), 6 deletions(-)
diff --git a/llvm/lib/Target/RISCV/RISCVCodeGenPrepare.cpp b/llvm/lib/Target/RISCV/RISCVCodeGenPrepare.cpp
index 6e0f429c34b2f..60ae0d49bd020 100644
--- a/llvm/lib/Target/RISCV/RISCVCodeGenPrepare.cpp
+++ b/llvm/lib/Target/RISCV/RISCVCodeGenPrepare.cpp
@@ -18,9 +18,11 @@
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/CodeGen/TargetPassConfig.h"
+#include "llvm/IR/Dominators.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/InstVisitor.h"
#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/IntrinsicsRISCV.h"
#include "llvm/IR/PatternMatch.h"
#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
@@ -35,6 +37,7 @@ namespace {
class RISCVCodeGenPrepare : public FunctionPass,
public InstVisitor<RISCVCodeGenPrepare, bool> {
const DataLayout *DL;
+ const DominatorTree *DT;
const RISCVSubtarget *ST;
public:
@@ -48,12 +51,14 @@ class RISCVCodeGenPrepare : public FunctionPass,
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.setPreservesCFG();
+ AU.addRequired<DominatorTreeWrapperPass>();
AU.addRequired<TargetPassConfig>();
}
bool visitInstruction(Instruction &I) { return false; }
bool visitAnd(BinaryOperator &BO);
bool visitIntrinsicInst(IntrinsicInst &I);
+ bool expandVPStrideLoad(IntrinsicInst &I);
};
} // end anonymous namespace
@@ -128,6 +133,9 @@ bool RISCVCodeGenPrepare::visitAnd(BinaryOperator &BO) {
// Which eliminates the scalar -> vector -> scalar crossing during instruction
// selection.
bool RISCVCodeGenPrepare::visitIntrinsicInst(IntrinsicInst &I) {
+ if (expandVPStrideLoad(I))
+ return true;
+
if (I.getIntrinsicID() != Intrinsic::vector_reduce_fadd)
return false;
@@ -155,6 +163,43 @@ bool RISCVCodeGenPrepare::visitIntrinsicInst(IntrinsicInst &I) {
return true;
}
+bool RISCVCodeGenPrepare::expandVPStrideLoad(IntrinsicInst &II) {
+ if (ST->hasOptimizedZeroStrideLoad())
+ return false;
+
+ Value *BasePtr, *VL;
+ using namespace PatternMatch;
+ if (!match(&II, m_Intrinsic<Intrinsic::experimental_vp_strided_load>(
+ m_Value(BasePtr), m_Zero(), m_AllOnes(), m_Value(VL))))
+ return false;
+
+ if (!isKnownNonZero(VL, {*DL, DT, nullptr, &II}))
+ return false;
+
+ auto *VTy = cast<VectorType>(II.getType());
+
+ // FIXME: Support fixed vector types.
+ if (!isa<ScalableVectorType>(VTy))
+ return false;
+
+ IRBuilder<> Builder(&II);
+
+ // Extend VL from i32 to XLen if needed.
+ if (ST->is64Bit())
+ VL = Builder.CreateZExt(VL, Builder.getInt64Ty());
+
+ Type *STy = VTy->getElementType();
+ Value *Val = Builder.CreateLoad(STy, BasePtr);
+ unsigned VMVOp = STy->isFloatingPointTy() ? Intrinsic::riscv_vfmv_v_f
+ : Intrinsic::riscv_vmv_v_x;
+ Value *Res = Builder.CreateIntrinsic(VMVOp, {VTy, VL->getType()},
+ {PoisonValue::get(VTy), Val, VL});
+
+ II.replaceAllUsesWith(Res);
+ II.eraseFromParent();
+ return true;
+}
+
bool RISCVCodeGenPrepare::runOnFunction(Function &F) {
if (skipFunction(F))
return false;
@@ -164,6 +209,7 @@ bool RISCVCodeGenPrepare::runOnFunction(Function &F) {
ST = &TM.getSubtarget<RISCVSubtarget>(F);
DL = &F.getDataLayout();
+ DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
bool MadeChange = false;
for (auto &BB : F)
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-vpload.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-vpload.ll
index 5e64e9fbc1a2f..50e8e34cee2a1 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-vpload.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-vpload.ll
@@ -1,10 +1,16 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=riscv32 -mattr=+m,+d,+zfh,+v,+zvfh \
-; RUN: -verify-machineinstrs < %s \
-; RUN: | FileCheck %s --check-prefixes=CHECK,CHECK-RV32
+; RUN: -verify-machineinstrs < %s | FileCheck %s \
+; RUN: -check-prefixes=CHECK,CHECK-RV32
; RUN: llc -mtriple=riscv64 -mattr=+m,+d,+zfh,+v,+zvfh \
-; RUN: -verify-machineinstrs < %s \
-; RUN: | FileCheck %s --check-prefixes=CHECK,CHECK-RV64
+; RUN: -verify-machineinstrs < %s | FileCheck %s \
+; RUN: -check-prefixes=CHECK,CHECK-RV64,CHECK-OPT
+; RUN: llc -mtriple=riscv32 -mattr=+m,+d,+zfh,+v,+zvfh,+no-optimized-zero-stride-load \
+; RUN: -verify-machineinstrs < %s | FileCheck %s \
+; RUN: -check-prefixes=CHECK,CHECK-RV32
+; RUN: llc -mtriple=riscv64 -mattr=+m,+d,+zfh,+v,+zvfh,+no-optimized-zero-stride-load \
+; RUN: -verify-machineinstrs < %s | FileCheck %s \
+; RUN: -check-prefixes=CHECK,CHECK-RV64
declare <2 x i8> @llvm.experimental.vp.strided.load.v2i8.p0.i8(ptr, i8, <2 x i1>, i32)
@@ -626,3 +632,27 @@ define <33 x double> @strided_load_v33f64(ptr %ptr, i64 %stride, <33 x i1> %mask
}
declare <33 x double> @llvm.experimental.vp.strided.load.v33f64.p0.i64(ptr, i64, <33 x i1>, i32)
+
+; Test unmasked integer zero strided
+define <4 x i8> @zero_strided_unmasked_vpload_4i8_i8(ptr %ptr) {
+; CHECK-LABEL: zero_strided_unmasked_vpload_4i8_i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
+; CHECK-NEXT: vlse8.v v8, (a0), zero
+; CHECK-NEXT: ret
+ %load = call <4 x i8> @llvm.experimental.vp.strided.load.4i8.p0.i8(ptr %ptr, i8 0, <4 x i1> splat (i1 true), i32 4)
+ ret <4 x i8> %load
+}
+
+; Test unmasked float zero strided
+define <4 x half> @zero_strided_unmasked_vpload_4f16(ptr %ptr) {
+; CHECK-LABEL: zero_strided_unmasked_vpload_4f16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
+; CHECK-NEXT: vlse16.v v8, (a0), zero
+; CHECK-NEXT: ret
+ %load = call <4 x half> @llvm.experimental.vp.strided.load.4f16.p0.i32(ptr %ptr, i32 0, <4 x i1> splat (i1 true), i32 4)
+ ret <4 x half> %load
+}
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; CHECK-OPT: {{.*}}
diff --git a/llvm/test/CodeGen/RISCV/rvv/strided-vpload.ll b/llvm/test/CodeGen/RISCV/rvv/strided-vpload.ll
index 4d3bced0bcb50..d422ed5dcfc22 100644
--- a/llvm/test/CodeGen/RISCV/rvv/strided-vpload.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/strided-vpload.ll
@@ -1,10 +1,16 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=riscv32 -mattr=+m,+d,+zfh,+v,+zvfh \
; RUN: -verify-machineinstrs < %s | FileCheck %s \
-; RUN: -check-prefixes=CHECK,CHECK-RV32
+; RUN: -check-prefixes=CHECK,CHECK-RV32,CHECK-OPT
; RUN: llc -mtriple=riscv64 -mattr=+m,+d,+zfh,+v,+zvfh \
; RUN: -verify-machineinstrs < %s | FileCheck %s \
-; RUN: -check-prefixes=CHECK,CHECK-RV64
+; RUN: -check-prefixes=CHECK,CHECK-RV64,CHECK-OPT
+; RUN: llc -mtriple=riscv32 -mattr=+m,+d,+zfh,+v,+zvfh,+no-optimized-zero-stride-load \
+; RUN: -verify-machineinstrs < %s | FileCheck %s \
+; RUN: -check-prefixes=CHECK,CHECK-RV32,CHECK-NOOPT
+; RUN: llc -mtriple=riscv64 -mattr=+m,+d,+zfh,+v,+zvfh,+no-optimized-zero-stride-load \
+; RUN: -verify-machineinstrs < %s | FileCheck %s \
+; RUN: -check-prefixes=CHECK,CHECK-RV64,CHECK-NOOPT
declare <vscale x 1 x i8> @llvm.experimental.vp.strided.load.nxv1i8.p0.i8(ptr, i8, <vscale x 1 x i1>, i32)
@@ -780,3 +786,39 @@ define <vscale x 16 x double> @strided_load_nxv17f64(ptr %ptr, i64 %stride, <vsc
declare <vscale x 17 x double> @llvm.experimental.vp.strided.load.nxv17f64.p0.i64(ptr, i64, <vscale x 17 x i1>, i32)
declare <vscale x 1 x double> @llvm.experimental.vector.extract.nxv1f64(<vscale x 17 x double> %vec, i64 %idx)
declare <vscale x 16 x double> @llvm.experimental.vector.extract.nxv16f64(<vscale x 17 x double> %vec, i64 %idx)
+
+; Test unmasked integer zero strided
+define <vscale x 1 x i8> @zero_strided_unmasked_vpload_nxv1i8_i8(ptr %ptr) {
+; CHECK-OPT-LABEL: zero_strided_unmasked_vpload_nxv1i8_i8:
+; CHECK-OPT: # %bb.0:
+; CHECK-OPT-NEXT: vsetivli zero, 4, e8, mf8, ta, ma
+; CHECK-OPT-NEXT: vlse8.v v8, (a0), zero
+; CHECK-OPT-NEXT: ret
+;
+; CHECK-NOOPT-LABEL: zero_strided_unmasked_vpload_nxv1i8_i8:
+; CHECK-NOOPT: # %bb.0:
+; CHECK-NOOPT-NEXT: lbu a0, 0(a0)
+; CHECK-NOOPT-NEXT: vsetivli zero, 4, e8, mf8, ta, ma
+; CHECK-NOOPT-NEXT: vmv.v.x v8, a0
+; CHECK-NOOPT-NEXT: ret
+ %load = call <vscale x 1 x i8> @llvm.experimental.vp.strided.load.nxv1i8.p0.i8(ptr %ptr, i8 0, <vscale x 1 x i1> splat (i1 true), i32 4)
+ ret <vscale x 1 x i8> %load
+}
+
+; Test unmasked float zero strided
+define <vscale x 1 x half> @zero_strided_unmasked_vpload_nxv1f16(ptr %ptr) {
+; CHECK-OPT-LABEL: zero_strided_unmasked_vpload_nxv1f16:
+; CHECK-OPT: # %bb.0:
+; CHECK-OPT-NEXT: vsetivli zero, 4, e16, mf4, ta, ma
+; CHECK-OPT-NEXT: vlse16.v v8, (a0), zero
+; CHECK-OPT-NEXT: ret
+;
+; CHECK-NOOPT-LABEL: zero_strided_unmasked_vpload_nxv1f16:
+; CHECK-NOOPT: # %bb.0:
+; CHECK-NOOPT-NEXT: flh fa5, 0(a0)
+; CHECK-NOOPT-NEXT: vsetivli zero, 4, e16, mf4, ta, ma
+; CHECK-NOOPT-NEXT: vfmv.v.f v8, fa5
+; CHECK-NOOPT-NEXT: ret
+ %load = call <vscale x 1 x half> @llvm.experimental.vp.strided.load.nxv1f16.p0.i32(ptr %ptr, i32 0, <vscale x 1 x i1> splat (i1 true), i32 4)
+ ret <vscale x 1 x half> %load
+}
>From 8fdd86bece66ebe8bc5abb7df921dfb0152a652f Mon Sep 17 00:00:00 2001
From: Yeting Kuo <yeting.kuo at sifive.com>
Date: Tue, 9 Jul 2024 20:31:39 -0700
Subject: [PATCH 2/2] Use CreateVectorSplat for fixed vector.
---
llvm/lib/Target/RISCV/RISCVCodeGenPrepare.cpp | 18 +++++----
.../RISCV/rvv/fixed-vectors-strided-vpload.ll | 40 ++++++++++++-------
2 files changed, 36 insertions(+), 22 deletions(-)
diff --git a/llvm/lib/Target/RISCV/RISCVCodeGenPrepare.cpp b/llvm/lib/Target/RISCV/RISCVCodeGenPrepare.cpp
index 60ae0d49bd020..a2200e2d8642d 100644
--- a/llvm/lib/Target/RISCV/RISCVCodeGenPrepare.cpp
+++ b/llvm/lib/Target/RISCV/RISCVCodeGenPrepare.cpp
@@ -178,10 +178,6 @@ bool RISCVCodeGenPrepare::expandVPStrideLoad(IntrinsicInst &II) {
auto *VTy = cast<VectorType>(II.getType());
- // FIXME: Support fixed vector types.
- if (!isa<ScalableVectorType>(VTy))
- return false;
-
IRBuilder<> Builder(&II);
// Extend VL from i32 to XLen if needed.
@@ -190,10 +186,16 @@ bool RISCVCodeGenPrepare::expandVPStrideLoad(IntrinsicInst &II) {
Type *STy = VTy->getElementType();
Value *Val = Builder.CreateLoad(STy, BasePtr);
- unsigned VMVOp = STy->isFloatingPointTy() ? Intrinsic::riscv_vfmv_v_f
- : Intrinsic::riscv_vmv_v_x;
- Value *Res = Builder.CreateIntrinsic(VMVOp, {VTy, VL->getType()},
- {PoisonValue::get(VTy), Val, VL});
+ const auto &TLI = *ST->getTargetLowering();
+ Value *Res = Builder.CreateVectorSplat(VTy->getElementCount(), Val);
+
+ // TODO: Also support fixed/illegal vector types to splat with evl = vl.
+ if (isa<ScalableVectorType>(VTy) && TLI.isTypeLegal(EVT::getEVT(VTy))) {
+ unsigned VMVOp = STy->isFloatingPointTy() ? Intrinsic::riscv_vfmv_v_f
+ : Intrinsic::riscv_vmv_v_x;
+ Res = Builder.CreateIntrinsic(VMVOp, {VTy, VL->getType()},
+ {PoisonValue::get(VTy), Val, VL});
+ }
II.replaceAllUsesWith(Res);
II.eraseFromParent();
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-vpload.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-vpload.ll
index 50e8e34cee2a1..86359043a90d9 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-vpload.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-vpload.ll
@@ -7,10 +7,10 @@
; RUN: -check-prefixes=CHECK,CHECK-RV64,CHECK-OPT
; RUN: llc -mtriple=riscv32 -mattr=+m,+d,+zfh,+v,+zvfh,+no-optimized-zero-stride-load \
; RUN: -verify-machineinstrs < %s | FileCheck %s \
-; RUN: -check-prefixes=CHECK,CHECK-RV32
+; RUN: -check-prefixes=CHECK,CHECK-RV32,CHECK-NOOPT
; RUN: llc -mtriple=riscv64 -mattr=+m,+d,+zfh,+v,+zvfh,+no-optimized-zero-stride-load \
; RUN: -verify-machineinstrs < %s | FileCheck %s \
-; RUN: -check-prefixes=CHECK,CHECK-RV64
+; RUN: -check-prefixes=CHECK,CHECK-RV64,CHECK-NOOPT
declare <2 x i8> @llvm.experimental.vp.strided.load.v2i8.p0.i8(ptr, i8, <2 x i1>, i32)
@@ -635,24 +635,36 @@ declare <33 x double> @llvm.experimental.vp.strided.load.v33f64.p0.i64(ptr, i64,
; Test unmasked integer zero strided
define <4 x i8> @zero_strided_unmasked_vpload_4i8_i8(ptr %ptr) {
-; CHECK-LABEL: zero_strided_unmasked_vpload_4i8_i8:
-; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
-; CHECK-NEXT: vlse8.v v8, (a0), zero
-; CHECK-NEXT: ret
+; CHECK-OPT-LABEL: zero_strided_unmasked_vpload_4i8_i8:
+; CHECK-OPT: # %bb.0:
+; CHECK-OPT-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
+; CHECK-OPT-NEXT: vlse8.v v8, (a0), zero
+; CHECK-OPT-NEXT: ret
+;
+; CHECK-NOOPT-LABEL: zero_strided_unmasked_vpload_4i8_i8:
+; CHECK-NOOPT: # %bb.0:
+; CHECK-NOOPT-NEXT: lbu a0, 0(a0)
+; CHECK-NOOPT-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
+; CHECK-NOOPT-NEXT: vmv.v.x v8, a0
+; CHECK-NOOPT-NEXT: ret
%load = call <4 x i8> @llvm.experimental.vp.strided.load.4i8.p0.i8(ptr %ptr, i8 0, <4 x i1> splat (i1 true), i32 4)
ret <4 x i8> %load
}
; Test unmasked float zero strided
define <4 x half> @zero_strided_unmasked_vpload_4f16(ptr %ptr) {
-; CHECK-LABEL: zero_strided_unmasked_vpload_4f16:
-; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
-; CHECK-NEXT: vlse16.v v8, (a0), zero
-; CHECK-NEXT: ret
+; CHECK-OPT-LABEL: zero_strided_unmasked_vpload_4f16:
+; CHECK-OPT: # %bb.0:
+; CHECK-OPT-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
+; CHECK-OPT-NEXT: vlse16.v v8, (a0), zero
+; CHECK-OPT-NEXT: ret
+;
+; CHECK-NOOPT-LABEL: zero_strided_unmasked_vpload_4f16:
+; CHECK-NOOPT: # %bb.0:
+; CHECK-NOOPT-NEXT: flh fa5, 0(a0)
+; CHECK-NOOPT-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
+; CHECK-NOOPT-NEXT: vfmv.v.f v8, fa5
+; CHECK-NOOPT-NEXT: ret
%load = call <4 x half> @llvm.experimental.vp.strided.load.4f16.p0.i32(ptr %ptr, i32 0, <4 x i1> splat (i1 true), i32 4)
ret <4 x half> %load
}
-;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
-; CHECK-OPT: {{.*}}
More information about the llvm-commits
mailing list