[llvm] 41f1b46 - [RISCV] Account for zvfhmin and zvfbfmin promotion in register usage (#108370)

via llvm-commits llvm-commits at lists.llvm.org
Mon Sep 16 22:50:22 PDT 2024


Author: Luke Lau
Date: 2024-09-17T13:50:19+08:00
New Revision: 41f1b467a29d2ca4e35df37c3aa79a0a8c04bc4f

URL: https://github.com/llvm/llvm-project/commit/41f1b467a29d2ca4e35df37c3aa79a0a8c04bc4f
DIFF: https://github.com/llvm/llvm-project/commit/41f1b467a29d2ca4e35df37c3aa79a0a8c04bc4f.diff

LOG: [RISCV] Account for zvfhmin and zvfbfmin promotion in register usage (#108370)

A half with only zvfhmin or bfloat will end up getting promoted to a f32
for most instructions.

Unless the loop consists only of memory ops and permutation instructions
which don't need promoted (is this common?), we'll end up using double
the LMUL than what's currently being returned by getRegUsageForType.

Since this is used by the loop vectorizer, it seems better to be
conservative and assume that any usage of a zvfhmin half/bfloat will end
up being widened to a f32

Added: 
    llvm/test/Transforms/LoopVectorize/RISCV/reg-usage-bf16.ll
    llvm/test/Transforms/LoopVectorize/RISCV/reg-usage-f16.ll

Modified: 
    llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
index 2b5e7c47279284..0e065111385299 100644
--- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
@@ -2030,8 +2030,15 @@ void RISCVTTIImpl::getPeelingPreferences(Loop *L, ScalarEvolution &SE,
 }
 
 unsigned RISCVTTIImpl::getRegUsageForType(Type *Ty) {
-  TypeSize Size = DL.getTypeSizeInBits(Ty);
   if (Ty->isVectorTy()) {
+    // f16 with only zvfhmin and bf16 will be promoted to f32
+    Type *EltTy = cast<VectorType>(Ty)->getElementType();
+    if ((EltTy->isHalfTy() && !ST->hasVInstructionsF16()) ||
+        EltTy->isBFloatTy())
+      Ty = VectorType::get(Type::getFloatTy(Ty->getContext()),
+                           cast<VectorType>(Ty));
+
+    TypeSize Size = DL.getTypeSizeInBits(Ty);
     if (Size.isScalable() && ST->hasVInstructions())
       return divideCeil(Size.getKnownMinValue(), RISCV::RVVBitsPerBlock);
 

diff  --git a/llvm/test/Transforms/LoopVectorize/RISCV/reg-usage-bf16.ll b/llvm/test/Transforms/LoopVectorize/RISCV/reg-usage-bf16.ll
new file mode 100644
index 00000000000000..89514431278a74
--- /dev/null
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/reg-usage-bf16.ll
@@ -0,0 +1,31 @@
+; RUN: opt -passes=loop-vectorize -mtriple riscv64 -mattr=+v,+zvfbfmin -debug-only=loop-vectorize -riscv-v-register-bit-width-lmul=1 -S < %s 2>&1 | FileCheck %s
+
+define void @add(ptr noalias nocapture readonly %src1, ptr noalias nocapture readonly %src2, i32 signext %size, ptr noalias nocapture writeonly %result) {
+; CHECK-LABEL: add
+; CHECK:       LV(REG): Found max usage: 2 item
+; CHECK-NEXT:  LV(REG): RegisterClass: RISCV::GPRRC, 2 registers
+; CHECK-NEXT:  LV(REG): RegisterClass: RISCV::VRRC, 4 registers
+; CHECK-NEXT:  LV(REG): Found invariant usage: 1 item
+; CHECK-NEXT:  LV(REG): RegisterClass: RISCV::GPRRC, 1 registers
+
+entry:
+  %conv = zext i32 %size to i64
+  %cmp10.not = icmp eq i32 %size, 0
+  br i1 %cmp10.not, label %for.cond.cleanup, label %for.body
+
+for.cond.cleanup:
+  ret void
+
+for.body:
+  %i.011 = phi i64 [ %add4, %for.body ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds bfloat, ptr %src1, i64 %i.011
+  %0 = load bfloat, ptr %arrayidx, align 4
+  %arrayidx2 = getelementptr inbounds bfloat, ptr %src2, i64 %i.011
+  %1 = load bfloat, ptr %arrayidx2, align 4
+  %add = fadd bfloat %0, %1
+  %arrayidx3 = getelementptr inbounds bfloat, ptr %result, i64 %i.011
+  store bfloat %add, ptr %arrayidx3, align 4
+  %add4 = add nuw nsw i64 %i.011, 1
+  %exitcond.not = icmp eq i64 %add4, %conv
+  br i1 %exitcond.not, label %for.cond.cleanup, label %for.body
+}

diff  --git a/llvm/test/Transforms/LoopVectorize/RISCV/reg-usage-f16.ll b/llvm/test/Transforms/LoopVectorize/RISCV/reg-usage-f16.ll
new file mode 100644
index 00000000000000..ceedcfba4691e1
--- /dev/null
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/reg-usage-f16.ll
@@ -0,0 +1,37 @@
+; RUN: opt -passes=loop-vectorize -mtriple riscv64 -mattr=+v,+zvfh -debug-only=loop-vectorize -riscv-v-register-bit-width-lmul=1 -S < %s 2>&1 | FileCheck %s --check-prefix=ZVFH
+; RUN: opt -passes=loop-vectorize -mtriple riscv64 -mattr=+v,+zvfhmin -debug-only=loop-vectorize -riscv-v-register-bit-width-lmul=1 -S < %s 2>&1 | FileCheck %s --check-prefix=ZVFHMIN
+
+define void @add(ptr noalias nocapture readonly %src1, ptr noalias nocapture readonly %src2, i32 signext %size, ptr noalias nocapture writeonly %result) {
+; CHECK-LABEL: add
+; ZVFH:       LV(REG): Found max usage: 2 item
+; ZVFH-NEXT:  LV(REG): RegisterClass: RISCV::GPRRC, 2 registers
+; ZVFH-NEXT:  LV(REG): RegisterClass: RISCV::VRRC, 2 registers
+; ZVFH-NEXT:  LV(REG): Found invariant usage: 1 item
+; ZVFH-NEXT:  LV(REG): RegisterClass: RISCV::GPRRC, 1 registers
+; ZVFHMIN:       LV(REG): Found max usage: 2 item
+; ZVFHMIN-NEXT:  LV(REG): RegisterClass: RISCV::GPRRC, 2 registers
+; ZVFHMIN-NEXT:  LV(REG): RegisterClass: RISCV::VRRC, 4 registers
+; ZVFHMIN-NEXT:  LV(REG): Found invariant usage: 1 item
+; ZVFHMIN-NEXT:  LV(REG): RegisterClass: RISCV::GPRRC, 1 registers
+
+entry:
+  %conv = zext i32 %size to i64
+  %cmp10.not = icmp eq i32 %size, 0
+  br i1 %cmp10.not, label %for.cond.cleanup, label %for.body
+
+for.cond.cleanup:
+  ret void
+
+for.body:
+  %i.011 = phi i64 [ %add4, %for.body ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds half, ptr %src1, i64 %i.011
+  %0 = load half, ptr %arrayidx, align 4
+  %arrayidx2 = getelementptr inbounds half, ptr %src2, i64 %i.011
+  %1 = load half, ptr %arrayidx2, align 4
+  %add = fadd half %0, %1
+  %arrayidx3 = getelementptr inbounds half, ptr %result, i64 %i.011
+  store half %add, ptr %arrayidx3, align 4
+  %add4 = add nuw nsw i64 %i.011, 1
+  %exitcond.not = icmp eq i64 %add4, %conv
+  br i1 %exitcond.not, label %for.cond.cleanup, label %for.body
+}


        


More information about the llvm-commits mailing list