[llvm] [RISCV] Account for zvfhmin and zvfbfmin promotion in register usage (PR #108370)
via llvm-commits
llvm-commits at lists.llvm.org
Thu Sep 12 04:38:39 PDT 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-llvm-transforms
Author: Luke Lau (lukel97)
<details>
<summary>Changes</summary>
A half with only zvfhmin or bfloat will end up getting promoted to a f32 for most instructions.
Unless the loop consists only of memory ops and permutation instructions which don't need promoted (is this common?), we'll end up using double the LMUL than what's currently being returned by getRegUsageForType.
Since this is used by the loop vectorizer, it seems better to be conservative and assume that any usage of a zvfhmin half/bfloat will end up being widened to a f32
---
Full diff: https://github.com/llvm/llvm-project/pull/108370.diff
3 Files Affected:
- (modified) llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp (+8-1)
- (added) llvm/test/Transforms/LoopVectorize/RISCV/reg-usage-bf16.ll (+31)
- (added) llvm/test/Transforms/LoopVectorize/RISCV/reg-usage-f16.ll (+37)
``````````diff
diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
index 2b5e7c47279284..3303534ecb4968 100644
--- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
@@ -2030,8 +2030,15 @@ void RISCVTTIImpl::getPeelingPreferences(Loop *L, ScalarEvolution &SE,
}
unsigned RISCVTTIImpl::getRegUsageForType(Type *Ty) {
- TypeSize Size = DL.getTypeSizeInBits(Ty);
if (Ty->isVectorTy()) {
+ // f16 w/ zvfhmin and bf16 types will be promoted to f32
+ Type *EltTy = cast<VectorType>(Ty)->getElementType();
+ if ((EltTy->isHalfTy() && !ST->hasVInstructionsF16()) ||
+ EltTy->isBFloatTy())
+ Ty = VectorType::get(Type::getFloatTy(Ty->getContext()),
+ cast<VectorType>(Ty));
+
+ TypeSize Size = DL.getTypeSizeInBits(Ty);
if (Size.isScalable() && ST->hasVInstructions())
return divideCeil(Size.getKnownMinValue(), RISCV::RVVBitsPerBlock);
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/reg-usage-bf16.ll b/llvm/test/Transforms/LoopVectorize/RISCV/reg-usage-bf16.ll
new file mode 100644
index 00000000000000..89514431278a74
--- /dev/null
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/reg-usage-bf16.ll
@@ -0,0 +1,31 @@
+; RUN: opt -passes=loop-vectorize -mtriple riscv64 -mattr=+v,+zvfbfmin -debug-only=loop-vectorize -riscv-v-register-bit-width-lmul=1 -S < %s 2>&1 | FileCheck %s
+
+define void @add(ptr noalias nocapture readonly %src1, ptr noalias nocapture readonly %src2, i32 signext %size, ptr noalias nocapture writeonly %result) {
+; CHECK-LABEL: add
+; CHECK: LV(REG): Found max usage: 2 item
+; CHECK-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 2 registers
+; CHECK-NEXT: LV(REG): RegisterClass: RISCV::VRRC, 4 registers
+; CHECK-NEXT: LV(REG): Found invariant usage: 1 item
+; CHECK-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 1 registers
+
+entry:
+ %conv = zext i32 %size to i64
+ %cmp10.not = icmp eq i32 %size, 0
+ br i1 %cmp10.not, label %for.cond.cleanup, label %for.body
+
+for.cond.cleanup:
+ ret void
+
+for.body:
+ %i.011 = phi i64 [ %add4, %for.body ], [ 0, %entry ]
+ %arrayidx = getelementptr inbounds bfloat, ptr %src1, i64 %i.011
+ %0 = load bfloat, ptr %arrayidx, align 4
+ %arrayidx2 = getelementptr inbounds bfloat, ptr %src2, i64 %i.011
+ %1 = load bfloat, ptr %arrayidx2, align 4
+ %add = fadd bfloat %0, %1
+ %arrayidx3 = getelementptr inbounds bfloat, ptr %result, i64 %i.011
+ store bfloat %add, ptr %arrayidx3, align 4
+ %add4 = add nuw nsw i64 %i.011, 1
+ %exitcond.not = icmp eq i64 %add4, %conv
+ br i1 %exitcond.not, label %for.cond.cleanup, label %for.body
+}
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/reg-usage-f16.ll b/llvm/test/Transforms/LoopVectorize/RISCV/reg-usage-f16.ll
new file mode 100644
index 00000000000000..ceedcfba4691e1
--- /dev/null
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/reg-usage-f16.ll
@@ -0,0 +1,37 @@
+; RUN: opt -passes=loop-vectorize -mtriple riscv64 -mattr=+v,+zvfh -debug-only=loop-vectorize -riscv-v-register-bit-width-lmul=1 -S < %s 2>&1 | FileCheck %s --check-prefix=ZVFH
+; RUN: opt -passes=loop-vectorize -mtriple riscv64 -mattr=+v,+zvfhmin -debug-only=loop-vectorize -riscv-v-register-bit-width-lmul=1 -S < %s 2>&1 | FileCheck %s --check-prefix=ZVFHMIN
+
+define void @add(ptr noalias nocapture readonly %src1, ptr noalias nocapture readonly %src2, i32 signext %size, ptr noalias nocapture writeonly %result) {
+; CHECK-LABEL: add
+; ZVFH: LV(REG): Found max usage: 2 item
+; ZVFH-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 2 registers
+; ZVFH-NEXT: LV(REG): RegisterClass: RISCV::VRRC, 2 registers
+; ZVFH-NEXT: LV(REG): Found invariant usage: 1 item
+; ZVFH-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 1 registers
+; ZVFHMIN: LV(REG): Found max usage: 2 item
+; ZVFHMIN-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 2 registers
+; ZVFHMIN-NEXT: LV(REG): RegisterClass: RISCV::VRRC, 4 registers
+; ZVFHMIN-NEXT: LV(REG): Found invariant usage: 1 item
+; ZVFHMIN-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 1 registers
+
+entry:
+ %conv = zext i32 %size to i64
+ %cmp10.not = icmp eq i32 %size, 0
+ br i1 %cmp10.not, label %for.cond.cleanup, label %for.body
+
+for.cond.cleanup:
+ ret void
+
+for.body:
+ %i.011 = phi i64 [ %add4, %for.body ], [ 0, %entry ]
+ %arrayidx = getelementptr inbounds half, ptr %src1, i64 %i.011
+ %0 = load half, ptr %arrayidx, align 4
+ %arrayidx2 = getelementptr inbounds half, ptr %src2, i64 %i.011
+ %1 = load half, ptr %arrayidx2, align 4
+ %add = fadd half %0, %1
+ %arrayidx3 = getelementptr inbounds half, ptr %result, i64 %i.011
+ store half %add, ptr %arrayidx3, align 4
+ %add4 = add nuw nsw i64 %i.011, 1
+ %exitcond.not = icmp eq i64 %add4, %conv
+ br i1 %exitcond.not, label %for.cond.cleanup, label %for.body
+}
``````````
</details>
https://github.com/llvm/llvm-project/pull/108370
More information about the llvm-commits
mailing list