[llvm] 6c09a4e - [RISCV][VP] expand vp intrinsics if no +zve32x feature
Yingchi Long via llvm-commits
llvm-commits at lists.llvm.org
Thu Jan 12 22:48:12 PST 2023
Author: Yingchi Long
Date: 2023-01-13T14:46:42+08:00
New Revision: 6c09a4e5ba2e4197ec6acdb448cd405002ce08f5
URL: https://github.com/llvm/llvm-project/commit/6c09a4e5ba2e4197ec6acdb448cd405002ce08f5
DIFF: https://github.com/llvm/llvm-project/commit/6c09a4e5ba2e4197ec6acdb448cd405002ce08f5.diff
LOG: [RISCV][VP] expand vp intrinsics if no +zve32x feature
If the subtarget does not support VInstructions, expand vp intrinscs to scalar instructions.
Reviewed By: craig.topper
Differential Revision: https://reviews.llvm.org/D139706
Added:
llvm/test/CodeGen/RISCV/rvv/expand-no-v.ll
Modified:
llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h
Removed:
################################################################################
diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h
index badb840cd0f86..c4cc7984df872 100644
--- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h
+++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h
@@ -242,10 +242,11 @@ class RISCVTTIImpl : public BasicTTIImplBase<RISCVTTIImpl> {
TargetTransformInfo::VPLegalization
getVPLegalizationStrategy(const VPIntrinsic &PI) const {
using VPLegalization = TargetTransformInfo::VPLegalization;
- if (PI.getIntrinsicID() == Intrinsic::vp_reduce_mul &&
- cast<VectorType>(PI.getArgOperand(1)->getType())
- ->getElementType()
- ->getIntegerBitWidth() != 1)
+ if (!ST->hasVInstructions() ||
+ (PI.getIntrinsicID() == Intrinsic::vp_reduce_mul &&
+ cast<VectorType>(PI.getArgOperand(1)->getType())
+ ->getElementType()
+ ->getIntegerBitWidth() != 1))
return VPLegalization(VPLegalization::Discard, VPLegalization::Convert);
return VPLegalization(VPLegalization::Legal, VPLegalization::Legal);
}
diff --git a/llvm/test/CodeGen/RISCV/rvv/expand-no-v.ll b/llvm/test/CodeGen/RISCV/rvv/expand-no-v.ll
new file mode 100644
index 0000000000000..b1d52a6154f99
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/expand-no-v.ll
@@ -0,0 +1,81 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=riscv32 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=RV32
+; RUN: llc -mtriple=riscv64 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=RV64
+; Should expand to scalar instructions and do not crash
+
+declare i32 @llvm.vp.reduce.add.v4i32(i32, <4 x i32>, <4 x i1>, i32)
+
+define i32 @vpreduce_add_v4i32(i32 %s, <4 x i32> %v, <4 x i1> %m, i32 %evl) {
+; RV32-LABEL: vpreduce_add_v4i32:
+; RV32: # %bb.0:
+; RV32-NEXT: lw a4, 4(a1)
+; RV32-NEXT: lw a5, 12(a1)
+; RV32-NEXT: lw a6, 8(a1)
+; RV32-NEXT: lw a1, 0(a1)
+; RV32-NEXT: lw a7, 0(a2)
+; RV32-NEXT: lw t0, 8(a2)
+; RV32-NEXT: lw t1, 12(a2)
+; RV32-NEXT: lw a2, 4(a2)
+; RV32-NEXT: snez t2, a3
+; RV32-NEXT: sltiu t3, a3, 3
+; RV32-NEXT: xori t3, t3, 1
+; RV32-NEXT: sltiu t4, a3, 4
+; RV32-NEXT: xori t4, t4, 1
+; RV32-NEXT: sltiu a3, a3, 2
+; RV32-NEXT: xori a3, a3, 1
+; RV32-NEXT: and a2, a3, a2
+; RV32-NEXT: and a3, t4, t1
+; RV32-NEXT: and t0, t3, t0
+; RV32-NEXT: and a7, t2, a7
+; RV32-NEXT: neg a7, a7
+; RV32-NEXT: and a1, a7, a1
+; RV32-NEXT: neg a7, t0
+; RV32-NEXT: and a6, a7, a6
+; RV32-NEXT: neg a3, a3
+; RV32-NEXT: and a3, a3, a5
+; RV32-NEXT: neg a2, a2
+; RV32-NEXT: and a2, a2, a4
+; RV32-NEXT: add a2, a2, a3
+; RV32-NEXT: add a2, a6, a2
+; RV32-NEXT: add a0, a2, a0
+; RV32-NEXT: add a0, a1, a0
+; RV32-NEXT: ret
+;
+; RV64-LABEL: vpreduce_add_v4i32:
+; RV64: # %bb.0:
+; RV64-NEXT: lw a4, 8(a1)
+; RV64-NEXT: lw a5, 24(a1)
+; RV64-NEXT: lw a6, 16(a1)
+; RV64-NEXT: lw a1, 0(a1)
+; RV64-NEXT: ld a7, 0(a2)
+; RV64-NEXT: ld t0, 16(a2)
+; RV64-NEXT: ld t1, 24(a2)
+; RV64-NEXT: ld a2, 8(a2)
+; RV64-NEXT: sext.w a3, a3
+; RV64-NEXT: snez t2, a3
+; RV64-NEXT: sltiu t3, a3, 3
+; RV64-NEXT: xori t3, t3, 1
+; RV64-NEXT: sltiu t4, a3, 4
+; RV64-NEXT: xori t4, t4, 1
+; RV64-NEXT: sltiu a3, a3, 2
+; RV64-NEXT: xori a3, a3, 1
+; RV64-NEXT: and a2, a3, a2
+; RV64-NEXT: and a3, t4, t1
+; RV64-NEXT: and t0, t3, t0
+; RV64-NEXT: and a7, t2, a7
+; RV64-NEXT: negw a7, a7
+; RV64-NEXT: and a1, a7, a1
+; RV64-NEXT: negw a7, t0
+; RV64-NEXT: and a6, a7, a6
+; RV64-NEXT: negw a3, a3
+; RV64-NEXT: and a3, a3, a5
+; RV64-NEXT: negw a2, a2
+; RV64-NEXT: and a2, a2, a4
+; RV64-NEXT: add a2, a2, a3
+; RV64-NEXT: add a2, a6, a2
+; RV64-NEXT: add a0, a2, a0
+; RV64-NEXT: addw a0, a1, a0
+; RV64-NEXT: ret
+ %r = call i32 @llvm.vp.reduce.add.v4i32(i32 %s, <4 x i32> %v, <4 x i1> %m, i32 %evl)
+ ret i32 %r
+}
More information about the llvm-commits
mailing list