[llvm] d8800ea - [RISCV] Scalarize binop followed by extractelement.
via llvm-commits
llvm-commits at lists.llvm.org
Mon Jul 25 02:23:49 PDT 2022
Author: jacquesguan
Date: 2022-07-25T17:23:31+08:00
New Revision: d8800ead629d403a298d32e3b1a7ac4f7aac554a
URL: https://github.com/llvm/llvm-project/commit/d8800ead629d403a298d32e3b1a7ac4f7aac554a
DIFF: https://github.com/llvm/llvm-project/commit/d8800ead629d403a298d32e3b1a7ac4f7aac554a.diff
LOG: [RISCV] Scalarize binop followed by extractelement.
This patch adds shouldScalarizeBinop to RISCV target in order to convert an extract element of a vector binary operation into an extract element followed by a scalar binary operation.
Differential Revision: https://reviews.llvm.org/D129545
Added:
Modified:
llvm/lib/Target/RISCV/RISCVISelLowering.cpp
llvm/lib/Target/RISCV/RISCVISelLowering.h
llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 1702546b58a6..146605782e9a 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -1313,6 +1313,25 @@ bool RISCVTargetLowering::shouldSinkOperands(
return true;
}
+bool RISCVTargetLowering::shouldScalarizeBinop(SDValue VecOp) const {
+ unsigned Opc = VecOp.getOpcode();
+
+ // Assume target opcodes can't be scalarized.
+ // TODO - do we have any exceptions?
+ if (Opc >= ISD::BUILTIN_OP_END)
+ return false;
+
+ // If the vector op is not supported, try to convert to scalar.
+ EVT VecVT = VecOp.getValueType();
+ if (!isOperationLegalOrCustomOrPromote(Opc, VecVT))
+ return true;
+
+ // If the vector op is supported, but the scalar op is not, the transform may
+ // not be worthwhile.
+ EVT ScalarVT = VecVT.getScalarType();
+ return isOperationLegalOrCustomOrPromote(Opc, ScalarVT);
+}
+
bool RISCVTargetLowering::isOffsetFoldingLegal(
const GlobalAddressSDNode *GA) const {
// In order to maximise the opportunity for common subexpression elimination,
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.h b/llvm/lib/Target/RISCV/RISCVISelLowering.h
index 5e15176de59c..6ecf8b8324d4 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.h
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.h
@@ -376,6 +376,7 @@ class RISCVTargetLowering : public TargetLowering {
SelectionDAG &DAG) const override;
bool shouldSinkOperands(Instruction *I,
SmallVectorImpl<Use *> &Ops) const override;
+ bool shouldScalarizeBinop(SDValue VecOp) const override;
bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override;
bool isFPImmLegal(const APFloat &Imm, EVT VT,
bool ForCodeSize) const override;
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract.ll
index 18df0ef51306..d9e2a1c409eb 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract.ll
@@ -1,6 +1,8 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -target-abi=ilp32d -mattr=+v,+zfh,+experimental-zvfh,+f,+d -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32
+; RUN: llc -mtriple=riscv32 -target-abi=ilp32d -mattr=+v,+zfh,+experimental-zvfh,+f,+d -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32,RV32NOM
+; RUN: llc -mtriple=riscv32 -target-abi=ilp32d -mattr=+v,+zfh,+experimental-zvfh,+f,+d,+m -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32,RV32M
; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+v,+zfh,+experimental-zvfh,+f,+d -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64
+; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+v,+zfh,+experimental-zvfh,+f,+d,+m -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64
define i8 @extractelt_v16i8(<16 x i8>* %x) nounwind {
; CHECK-LABEL: extractelt_v16i8:
@@ -613,74 +615,123 @@ define void @store_extractelt_v2f64(<2 x double>* %x, double* %p) nounwind {
}
define i32 @extractelt_add_v4i32(<4 x i32> %x) {
-; CHECK-LABEL: extractelt_add_v4i32:
-; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu
-; CHECK-NEXT: vadd.vi v8, v8, 13
-; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, mu
-; CHECK-NEXT: vslidedown.vi v8, v8, 2
-; CHECK-NEXT: vmv.x.s a0, v8
-; CHECK-NEXT: ret
+; RV32-LABEL: extractelt_add_v4i32:
+; RV32: # %bb.0:
+; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, mu
+; RV32-NEXT: vslidedown.vi v8, v8, 2
+; RV32-NEXT: vmv.x.s a0, v8
+; RV32-NEXT: addi a0, a0, 13
+; RV32-NEXT: ret
+;
+; RV64-LABEL: extractelt_add_v4i32:
+; RV64: # %bb.0:
+; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, mu
+; RV64-NEXT: vadd.vi v8, v8, 13
+; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, mu
+; RV64-NEXT: vslidedown.vi v8, v8, 2
+; RV64-NEXT: vmv.x.s a0, v8
+; RV64-NEXT: ret
%bo = add <4 x i32> %x, <i32 11, i32 12, i32 13, i32 14>
%ext = extractelement <4 x i32> %bo, i32 2
ret i32 %ext
}
define i32 @extractelt_sub_v4i32(<4 x i32> %x) {
-; CHECK-LABEL: extractelt_sub_v4i32:
-; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu
-; CHECK-NEXT: vrsub.vi v8, v8, 13
-; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, mu
-; CHECK-NEXT: vslidedown.vi v8, v8, 2
-; CHECK-NEXT: vmv.x.s a0, v8
-; CHECK-NEXT: ret
+; RV32-LABEL: extractelt_sub_v4i32:
+; RV32: # %bb.0:
+; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, mu
+; RV32-NEXT: vslidedown.vi v8, v8, 2
+; RV32-NEXT: vmv.x.s a0, v8
+; RV32-NEXT: li a1, 13
+; RV32-NEXT: sub a0, a1, a0
+; RV32-NEXT: ret
+;
+; RV64-LABEL: extractelt_sub_v4i32:
+; RV64: # %bb.0:
+; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, mu
+; RV64-NEXT: vrsub.vi v8, v8, 13
+; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, mu
+; RV64-NEXT: vslidedown.vi v8, v8, 2
+; RV64-NEXT: vmv.x.s a0, v8
+; RV64-NEXT: ret
%bo = sub <4 x i32> <i32 11, i32 12, i32 13, i32 14>, %x
%ext = extractelement <4 x i32> %bo, i32 2
ret i32 %ext
}
define i32 @extractelt_mul_v4i32(<4 x i32> %x) {
-; CHECK-LABEL: extractelt_mul_v4i32:
-; CHECK: # %bb.0:
-; CHECK-NEXT: li a0, 13
-; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu
-; CHECK-NEXT: vmul.vx v8, v8, a0
-; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, mu
-; CHECK-NEXT: vslidedown.vi v8, v8, 2
-; CHECK-NEXT: vmv.x.s a0, v8
-; CHECK-NEXT: ret
+; RV32NOM-LABEL: extractelt_mul_v4i32:
+; RV32NOM: # %bb.0:
+; RV32NOM-NEXT: li a0, 13
+; RV32NOM-NEXT: vsetivli zero, 4, e32, m1, ta, mu
+; RV32NOM-NEXT: vmul.vx v8, v8, a0
+; RV32NOM-NEXT: vsetivli zero, 1, e32, m1, ta, mu
+; RV32NOM-NEXT: vslidedown.vi v8, v8, 2
+; RV32NOM-NEXT: vmv.x.s a0, v8
+; RV32NOM-NEXT: ret
+;
+; RV32M-LABEL: extractelt_mul_v4i32:
+; RV32M: # %bb.0:
+; RV32M-NEXT: vsetivli zero, 1, e32, m1, ta, mu
+; RV32M-NEXT: vslidedown.vi v8, v8, 2
+; RV32M-NEXT: vmv.x.s a0, v8
+; RV32M-NEXT: li a1, 13
+; RV32M-NEXT: mul a0, a0, a1
+; RV32M-NEXT: ret
+;
+; RV64-LABEL: extractelt_mul_v4i32:
+; RV64: # %bb.0:
+; RV64-NEXT: li a0, 13
+; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, mu
+; RV64-NEXT: vmul.vx v8, v8, a0
+; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, mu
+; RV64-NEXT: vslidedown.vi v8, v8, 2
+; RV64-NEXT: vmv.x.s a0, v8
+; RV64-NEXT: ret
%bo = mul <4 x i32> %x, <i32 11, i32 12, i32 13, i32 14>
%ext = extractelement <4 x i32> %bo, i32 2
ret i32 %ext
}
define i32 @extractelt_sdiv_v4i32(<4 x i32> %x) {
-; RV32-LABEL: extractelt_sdiv_v4i32:
-; RV32: # %bb.0:
-; RV32-NEXT: li a0, -1
-; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, mu
-; RV32-NEXT: vmv.s.x v9, a0
-; RV32-NEXT: vmv.v.i v10, 0
-; RV32-NEXT: vsetvli zero, zero, e32, m1, tu, mu
-; RV32-NEXT: vslideup.vi v10, v9, 3
-; RV32-NEXT: vsetvli zero, zero, e32, m1, ta, mu
-; RV32-NEXT: lui a0, %hi(.LCPI38_0)
-; RV32-NEXT: addi a0, a0, %lo(.LCPI38_0)
-; RV32-NEXT: vle32.v v9, (a0)
-; RV32-NEXT: lui a0, %hi(.LCPI38_1)
-; RV32-NEXT: addi a0, a0, %lo(.LCPI38_1)
-; RV32-NEXT: vle32.v v11, (a0)
-; RV32-NEXT: vand.vv v10, v8, v10
-; RV32-NEXT: vmulh.vv v8, v8, v9
-; RV32-NEXT: vadd.vv v8, v8, v10
-; RV32-NEXT: vsra.vv v9, v8, v11
-; RV32-NEXT: vsrl.vi v8, v8, 31
-; RV32-NEXT: vadd.vv v8, v9, v8
-; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, mu
-; RV32-NEXT: vslidedown.vi v8, v8, 2
-; RV32-NEXT: vmv.x.s a0, v8
-; RV32-NEXT: ret
+; RV32NOM-LABEL: extractelt_sdiv_v4i32:
+; RV32NOM: # %bb.0:
+; RV32NOM-NEXT: li a0, -1
+; RV32NOM-NEXT: vsetivli zero, 4, e32, m1, ta, mu
+; RV32NOM-NEXT: vmv.s.x v9, a0
+; RV32NOM-NEXT: vmv.v.i v10, 0
+; RV32NOM-NEXT: vsetvli zero, zero, e32, m1, tu, mu
+; RV32NOM-NEXT: vslideup.vi v10, v9, 3
+; RV32NOM-NEXT: vsetvli zero, zero, e32, m1, ta, mu
+; RV32NOM-NEXT: lui a0, %hi(.LCPI38_0)
+; RV32NOM-NEXT: addi a0, a0, %lo(.LCPI38_0)
+; RV32NOM-NEXT: vle32.v v9, (a0)
+; RV32NOM-NEXT: lui a0, %hi(.LCPI38_1)
+; RV32NOM-NEXT: addi a0, a0, %lo(.LCPI38_1)
+; RV32NOM-NEXT: vle32.v v11, (a0)
+; RV32NOM-NEXT: vand.vv v10, v8, v10
+; RV32NOM-NEXT: vmulh.vv v8, v8, v9
+; RV32NOM-NEXT: vadd.vv v8, v8, v10
+; RV32NOM-NEXT: vsra.vv v9, v8, v11
+; RV32NOM-NEXT: vsrl.vi v8, v8, 31
+; RV32NOM-NEXT: vadd.vv v8, v9, v8
+; RV32NOM-NEXT: vsetivli zero, 1, e32, m1, ta, mu
+; RV32NOM-NEXT: vslidedown.vi v8, v8, 2
+; RV32NOM-NEXT: vmv.x.s a0, v8
+; RV32NOM-NEXT: ret
+;
+; RV32M-LABEL: extractelt_sdiv_v4i32:
+; RV32M: # %bb.0:
+; RV32M-NEXT: vsetivli zero, 1, e32, m1, ta, mu
+; RV32M-NEXT: vslidedown.vi v8, v8, 2
+; RV32M-NEXT: vmv.x.s a0, v8
+; RV32M-NEXT: lui a1, 322639
+; RV32M-NEXT: addi a1, a1, -945
+; RV32M-NEXT: mulh a0, a0, a1
+; RV32M-NEXT: srli a1, a0, 31
+; RV32M-NEXT: srai a0, a0, 2
+; RV32M-NEXT: add a0, a0, a1
+; RV32M-NEXT: ret
;
; RV64-LABEL: extractelt_sdiv_v4i32:
; RV64: # %bb.0:
@@ -713,25 +764,56 @@ define i32 @extractelt_sdiv_v4i32(<4 x i32> %x) {
}
define i32 @extractelt_udiv_v4i32(<4 x i32> %x) {
-; CHECK-LABEL: extractelt_udiv_v4i32:
-; CHECK: # %bb.0:
-; CHECK-NEXT: li a0, 1
-; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu
-; CHECK-NEXT: vmv.s.x v9, a0
-; CHECK-NEXT: vmv.v.i v10, 0
-; CHECK-NEXT: vsetvli zero, zero, e32, m1, tu, mu
-; CHECK-NEXT: vslideup.vi v10, v9, 3
-; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, mu
-; CHECK-NEXT: lui a0, %hi(.LCPI39_0)
-; CHECK-NEXT: addi a0, a0, %lo(.LCPI39_0)
-; CHECK-NEXT: vle32.v v9, (a0)
-; CHECK-NEXT: vsrl.vv v8, v8, v10
-; CHECK-NEXT: vmulhu.vv v8, v8, v9
-; CHECK-NEXT: vsrl.vi v8, v8, 2
-; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, mu
-; CHECK-NEXT: vslidedown.vi v8, v8, 2
-; CHECK-NEXT: vmv.x.s a0, v8
-; CHECK-NEXT: ret
+; RV32NOM-LABEL: extractelt_udiv_v4i32:
+; RV32NOM: # %bb.0:
+; RV32NOM-NEXT: li a0, 1
+; RV32NOM-NEXT: vsetivli zero, 4, e32, m1, ta, mu
+; RV32NOM-NEXT: vmv.s.x v9, a0
+; RV32NOM-NEXT: vmv.v.i v10, 0
+; RV32NOM-NEXT: vsetvli zero, zero, e32, m1, tu, mu
+; RV32NOM-NEXT: vslideup.vi v10, v9, 3
+; RV32NOM-NEXT: vsetvli zero, zero, e32, m1, ta, mu
+; RV32NOM-NEXT: lui a0, %hi(.LCPI39_0)
+; RV32NOM-NEXT: addi a0, a0, %lo(.LCPI39_0)
+; RV32NOM-NEXT: vle32.v v9, (a0)
+; RV32NOM-NEXT: vsrl.vv v8, v8, v10
+; RV32NOM-NEXT: vmulhu.vv v8, v8, v9
+; RV32NOM-NEXT: vsetivli zero, 1, e32, m1, ta, mu
+; RV32NOM-NEXT: vslidedown.vi v8, v8, 2
+; RV32NOM-NEXT: vmv.x.s a0, v8
+; RV32NOM-NEXT: srli a0, a0, 2
+; RV32NOM-NEXT: ret
+;
+; RV32M-LABEL: extractelt_udiv_v4i32:
+; RV32M: # %bb.0:
+; RV32M-NEXT: vsetivli zero, 1, e32, m1, ta, mu
+; RV32M-NEXT: vslidedown.vi v8, v8, 2
+; RV32M-NEXT: vmv.x.s a0, v8
+; RV32M-NEXT: lui a1, 322639
+; RV32M-NEXT: addi a1, a1, -945
+; RV32M-NEXT: mulhu a0, a0, a1
+; RV32M-NEXT: srli a0, a0, 2
+; RV32M-NEXT: ret
+;
+; RV64-LABEL: extractelt_udiv_v4i32:
+; RV64: # %bb.0:
+; RV64-NEXT: li a0, 1
+; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, mu
+; RV64-NEXT: vmv.s.x v9, a0
+; RV64-NEXT: vmv.v.i v10, 0
+; RV64-NEXT: vsetvli zero, zero, e32, m1, tu, mu
+; RV64-NEXT: vslideup.vi v10, v9, 3
+; RV64-NEXT: vsetvli zero, zero, e32, m1, ta, mu
+; RV64-NEXT: lui a0, %hi(.LCPI39_0)
+; RV64-NEXT: addi a0, a0, %lo(.LCPI39_0)
+; RV64-NEXT: vle32.v v9, (a0)
+; RV64-NEXT: vsrl.vv v8, v8, v10
+; RV64-NEXT: vmulhu.vv v8, v8, v9
+; RV64-NEXT: vsrl.vi v8, v8, 2
+; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, mu
+; RV64-NEXT: vslidedown.vi v8, v8, 2
+; RV64-NEXT: vmv.x.s a0, v8
+; RV64-NEXT: ret
%bo = udiv <4 x i32> %x, <i32 11, i32 12, i32 13, i32 14>
%ext = extractelement <4 x i32> %bo, i32 2
ret i32 %ext
@@ -742,11 +824,10 @@ define float @extractelt_fadd_v4f32(<4 x float> %x) {
; CHECK: # %bb.0:
; CHECK-NEXT: lui a0, %hi(.LCPI40_0)
; CHECK-NEXT: flw ft0, %lo(.LCPI40_0)(a0)
-; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu
-; CHECK-NEXT: vfadd.vf v8, v8, ft0
; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, mu
; CHECK-NEXT: vslidedown.vi v8, v8, 2
-; CHECK-NEXT: vfmv.f.s fa0, v8
+; CHECK-NEXT: vfmv.f.s ft1, v8
+; CHECK-NEXT: fadd.s fa0, ft1, ft0
; CHECK-NEXT: ret
%bo = fadd <4 x float> %x, <float 11.0, float 12.0, float 13.0, float 14.0>
%ext = extractelement <4 x float> %bo, i32 2
@@ -758,11 +839,10 @@ define float @extractelt_fsub_v4f32(<4 x float> %x) {
; CHECK: # %bb.0:
; CHECK-NEXT: lui a0, %hi(.LCPI41_0)
; CHECK-NEXT: flw ft0, %lo(.LCPI41_0)(a0)
-; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu
-; CHECK-NEXT: vfrsub.vf v8, v8, ft0
; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, mu
; CHECK-NEXT: vslidedown.vi v8, v8, 2
-; CHECK-NEXT: vfmv.f.s fa0, v8
+; CHECK-NEXT: vfmv.f.s ft1, v8
+; CHECK-NEXT: fsub.s fa0, ft0, ft1
; CHECK-NEXT: ret
%bo = fsub <4 x float> <float 11.0, float 12.0, float 13.0, float 14.0>, %x
%ext = extractelement <4 x float> %bo, i32 2
@@ -774,11 +854,10 @@ define float @extractelt_fmul_v4f32(<4 x float> %x) {
; CHECK: # %bb.0:
; CHECK-NEXT: lui a0, %hi(.LCPI42_0)
; CHECK-NEXT: flw ft0, %lo(.LCPI42_0)(a0)
-; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu
-; CHECK-NEXT: vfmul.vf v8, v8, ft0
; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, mu
; CHECK-NEXT: vslidedown.vi v8, v8, 2
-; CHECK-NEXT: vfmv.f.s fa0, v8
+; CHECK-NEXT: vfmv.f.s ft1, v8
+; CHECK-NEXT: fmul.s fa0, ft1, ft0
; CHECK-NEXT: ret
%bo = fmul <4 x float> %x, <float 11.0, float 12.0, float 13.0, float 14.0>
%ext = extractelement <4 x float> %bo, i32 2
@@ -790,11 +869,10 @@ define float @extractelt_fdiv_v4f32(<4 x float> %x) {
; CHECK: # %bb.0:
; CHECK-NEXT: lui a0, %hi(.LCPI43_0)
; CHECK-NEXT: flw ft0, %lo(.LCPI43_0)(a0)
-; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu
-; CHECK-NEXT: vfdiv.vf v8, v8, ft0
; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, mu
; CHECK-NEXT: vslidedown.vi v8, v8, 2
-; CHECK-NEXT: vfmv.f.s fa0, v8
+; CHECK-NEXT: vfmv.f.s ft1, v8
+; CHECK-NEXT: fdiv.s fa0, ft1, ft0
; CHECK-NEXT: ret
%bo = fdiv <4 x float> %x, <float 11.0, float 12.0, float 13.0, float 14.0>
%ext = extractelement <4 x float> %bo, i32 2
More information about the llvm-commits
mailing list