[llvm] 6c09a4e - [RISCV][VP] expand vp intrinsics if no +zve32x feature

Thu Jan 12 22:48:12 PST 2023

Author: Yingchi Long
Date: 2023-01-13T14:46:42+08:00
New Revision: 6c09a4e5ba2e4197ec6acdb448cd405002ce08f5

URL: https://github.com/llvm/llvm-project/commit/6c09a4e5ba2e4197ec6acdb448cd405002ce08f5
DIFF: https://github.com/llvm/llvm-project/commit/6c09a4e5ba2e4197ec6acdb448cd405002ce08f5.diff

LOG: [RISCV][VP] expand vp intrinsics if no +zve32x feature

If the subtarget does not support VInstructions, expand vp intrinscs to scalar instructions.

Reviewed By: craig.topper

Differential Revision: https://reviews.llvm.org/D139706

Added: 
    llvm/test/CodeGen/RISCV/rvv/expand-no-v.ll

Modified: 
    llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h
index badb840cd0f86..c4cc7984df872 100644

--- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h
+++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h
@@ -242,10 +242,11 @@ class RISCVTTIImpl : public BasicTTIImplBase<RISCVTTIImpl> {
   TargetTransformInfo::VPLegalization
   getVPLegalizationStrategy(const VPIntrinsic &PI) const {
     using VPLegalization = TargetTransformInfo::VPLegalization;
-    if (PI.getIntrinsicID() == Intrinsic::vp_reduce_mul &&
-        cast<VectorType>(PI.getArgOperand(1)->getType())
-                ->getElementType()
-                ->getIntegerBitWidth() != 1)
+    if (!ST->hasVInstructions() ||
+        (PI.getIntrinsicID() == Intrinsic::vp_reduce_mul &&
+         cast<VectorType>(PI.getArgOperand(1)->getType())
+                 ->getElementType()
+                 ->getIntegerBitWidth() != 1))
       return VPLegalization(VPLegalization::Discard, VPLegalization::Convert);
     return VPLegalization(VPLegalization::Legal, VPLegalization::Legal);
   }

diff  --git a/llvm/test/CodeGen/RISCV/rvv/expand-no-v.ll b/llvm/test/CodeGen/RISCV/rvv/expand-no-v.ll
new file mode 100644
index 0000000000000..b1d52a6154f99
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/expand-no-v.ll
@@ -0,0 +1,81 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=riscv32 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=RV32
+; RUN: llc -mtriple=riscv64 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=RV64
+; Should expand to scalar instructions and do not crash
+
+declare i32 @llvm.vp.reduce.add.v4i32(i32, <4 x i32>, <4 x i1>, i32)
+
+define i32 @vpreduce_add_v4i32(i32 %s, <4 x i32> %v, <4 x i1> %m, i32 %evl) {
+; RV32-LABEL: vpreduce_add_v4i32:
+; RV32:       # %bb.0:
+; RV32-NEXT:    lw a4, 4(a1)
+; RV32-NEXT:    lw a5, 12(a1)
+; RV32-NEXT:    lw a6, 8(a1)
+; RV32-NEXT:    lw a1, 0(a1)
+; RV32-NEXT:    lw a7, 0(a2)
+; RV32-NEXT:    lw t0, 8(a2)
+; RV32-NEXT:    lw t1, 12(a2)
+; RV32-NEXT:    lw a2, 4(a2)
+; RV32-NEXT:    snez t2, a3
+; RV32-NEXT:    sltiu t3, a3, 3
+; RV32-NEXT:    xori t3, t3, 1
+; RV32-NEXT:    sltiu t4, a3, 4
+; RV32-NEXT:    xori t4, t4, 1
+; RV32-NEXT:    sltiu a3, a3, 2
+; RV32-NEXT:    xori a3, a3, 1
+; RV32-NEXT:    and a2, a3, a2
+; RV32-NEXT:    and a3, t4, t1
+; RV32-NEXT:    and t0, t3, t0
+; RV32-NEXT:    and a7, t2, a7
+; RV32-NEXT:    neg a7, a7
+; RV32-NEXT:    and a1, a7, a1
+; RV32-NEXT:    neg a7, t0
+; RV32-NEXT:    and a6, a7, a6
+; RV32-NEXT:    neg a3, a3
+; RV32-NEXT:    and a3, a3, a5
+; RV32-NEXT:    neg a2, a2
+; RV32-NEXT:    and a2, a2, a4
+; RV32-NEXT:    add a2, a2, a3
+; RV32-NEXT:    add a2, a6, a2
+; RV32-NEXT:    add a0, a2, a0
+; RV32-NEXT:    add a0, a1, a0
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: vpreduce_add_v4i32:
+; RV64:       # %bb.0:
+; RV64-NEXT:    lw a4, 8(a1)
+; RV64-NEXT:    lw a5, 24(a1)
+; RV64-NEXT:    lw a6, 16(a1)
+; RV64-NEXT:    lw a1, 0(a1)
+; RV64-NEXT:    ld a7, 0(a2)
+; RV64-NEXT:    ld t0, 16(a2)
+; RV64-NEXT:    ld t1, 24(a2)
+; RV64-NEXT:    ld a2, 8(a2)
+; RV64-NEXT:    sext.w a3, a3
+; RV64-NEXT:    snez t2, a3
+; RV64-NEXT:    sltiu t3, a3, 3
+; RV64-NEXT:    xori t3, t3, 1
+; RV64-NEXT:    sltiu t4, a3, 4
+; RV64-NEXT:    xori t4, t4, 1
+; RV64-NEXT:    sltiu a3, a3, 2
+; RV64-NEXT:    xori a3, a3, 1
+; RV64-NEXT:    and a2, a3, a2
+; RV64-NEXT:    and a3, t4, t1
+; RV64-NEXT:    and t0, t3, t0
+; RV64-NEXT:    and a7, t2, a7
+; RV64-NEXT:    negw a7, a7
+; RV64-NEXT:    and a1, a7, a1
+; RV64-NEXT:    negw a7, t0
+; RV64-NEXT:    and a6, a7, a6
+; RV64-NEXT:    negw a3, a3
+; RV64-NEXT:    and a3, a3, a5
+; RV64-NEXT:    negw a2, a2
+; RV64-NEXT:    and a2, a2, a4
+; RV64-NEXT:    add a2, a2, a3
+; RV64-NEXT:    add a2, a6, a2
+; RV64-NEXT:    add a0, a2, a0
+; RV64-NEXT:    addw a0, a1, a0
+; RV64-NEXT:    ret
+  %r = call i32 @llvm.vp.reduce.add.v4i32(i32 %s, <4 x i32> %v, <4 x i1> %m, i32 %evl)
+  ret i32 %r
+}