[llvm] dc00cbb - [RISCV] Match trunc_vector_vl+sra_vl/srl_vl with splat shift amount to vnsra/vnsrl.
Craig Topper via llvm-commits
llvm-commits at lists.llvm.org
Tue May 11 09:29:44 PDT 2021
Author: Craig Topper
Date: 2021-05-11T09:29:31-07:00
New Revision: dc00cbb5053895356955a6dc03632d4fa05048e3
URL: https://github.com/llvm/llvm-project/commit/dc00cbb5053895356955a6dc03632d4fa05048e3
DIFF: https://github.com/llvm/llvm-project/commit/dc00cbb5053895356955a6dc03632d4fa05048e3.diff
LOG: [RISCV] Match trunc_vector_vl+sra_vl/srl_vl with splat shift amount to vnsra/vnsrl.
Limited to splats because we would need to truncate the shift
amount vector otherwise.
I tried to do this with new ISD nodes and a DAG combine to
avoid such a large pattern, but we don't form the splat until
LegalizeDAG and need DAG combine to remove a scalable->fixed->scalable
cast before it becomes visible to the shift node. By the time that
happens we've already visited the truncate node and won't revisit it.
I think I have an idea how to improve i64 on RV32 I'll save for a
follow up.
Reviewed By: frasercrmck
Differential Revision: https://reviews.llvm.org/D102019
Added:
llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vnsra-vnsrl.ll
Modified:
llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td
Removed:
################################################################################
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td
index 4ae46a18a6f41..f5b7f5c3a4211 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td
@@ -636,15 +636,47 @@ defm : VPatBinaryVL_VV_VX_VI<riscv_shl_vl, "PseudoVSLL", uimm5>;
defm : VPatBinaryVL_VV_VX_VI<riscv_srl_vl, "PseudoVSRL", uimm5>;
defm : VPatBinaryVL_VV_VX_VI<riscv_sra_vl, "PseudoVSRA", uimm5>;
+
+
// 12.7. Vector Narrowing Integer Right Shift Instructions
-foreach vtiTofti = AllFractionableVF2IntVectors in {
- defvar vti = vtiTofti.Vti;
- defvar fti = vtiTofti.Fti;
- def : Pat<(fti.Vector (riscv_trunc_vector_vl (vti.Vector vti.RegClass:$rs1),
+foreach vtiTowti = AllWidenableIntVectors in {
+ defvar vti = vtiTowti.Vti;
+ defvar wti = vtiTowti.Wti;
+ def : Pat<(vti.Vector (riscv_trunc_vector_vl (wti.Vector wti.RegClass:$rs1),
(vti.Mask true_mask),
VLOpFrag)),
- (!cast<Instruction>("PseudoVNSRL_WI_"#fti.LMul.MX)
- vti.RegClass:$rs1, 0, GPR:$vl, fti.Log2SEW)>;
+ (!cast<Instruction>("PseudoVNSRL_WI_"#vti.LMul.MX)
+ wti.RegClass:$rs1, 0, GPR:$vl, vti.Log2SEW)>;
+
+ def : Pat<(vti.Vector
+ (riscv_trunc_vector_vl
+ (wti.Vector
+ (riscv_sra_vl wti.RegClass:$rs1, (SplatPat XLenVT:$rs2),
+ true_mask, VLOpFrag)), true_mask, VLOpFrag)),
+ (!cast<Instruction>("PseudoVNSRA_WX_"#vti.LMul.MX)
+ wti.RegClass:$rs1, GPR:$rs2, GPR:$vl, vti.Log2SEW)>;
+ def : Pat<(vti.Vector
+ (riscv_trunc_vector_vl
+ (wti.Vector
+ (riscv_sra_vl wti.RegClass:$rs1, (SplatPat_uimm5 uimm5:$rs2),
+ true_mask, VLOpFrag)), true_mask, VLOpFrag)),
+ (!cast<Instruction>("PseudoVNSRA_WI_"#vti.LMul.MX)
+ wti.RegClass:$rs1, uimm5:$rs2, GPR:$vl, vti.Log2SEW)>;
+
+ def : Pat<(vti.Vector
+ (riscv_trunc_vector_vl
+ (wti.Vector
+ (riscv_srl_vl wti.RegClass:$rs1, (SplatPat XLenVT:$rs2),
+ true_mask, VLOpFrag)), true_mask, VLOpFrag)),
+ (!cast<Instruction>("PseudoVNSRL_WX_"#vti.LMul.MX)
+ wti.RegClass:$rs1, GPR:$rs2, GPR:$vl, vti.Log2SEW)>;
+ def : Pat<(vti.Vector
+ (riscv_trunc_vector_vl
+ (wti.Vector
+ (riscv_srl_vl wti.RegClass:$rs1, (SplatPat_uimm5 uimm5:$rs2),
+ true_mask, VLOpFrag)), true_mask, VLOpFrag)),
+ (!cast<Instruction>("PseudoVNSRL_WI_"#vti.LMul.MX)
+ wti.RegClass:$rs1, uimm5:$rs2, GPR:$vl, vti.Log2SEW)>;
}
// 12.8. Vector Integer Comparison Instructions
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vnsra-vnsrl.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vnsra-vnsrl.ll
new file mode 100644
index 0000000000000..40249fc76eab5
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vnsra-vnsrl.ll
@@ -0,0 +1,189 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -verify-machineinstrs -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32
+; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64
+
+define <8 x i8> @vnsra_v8i16_v8i8_scalar(<8 x i16> %x, i16 %y) {
+; CHECK-LABEL: vnsra_v8i16_v8i8_scalar:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli a1, 8, e8,mf2,ta,mu
+; CHECK-NEXT: vnsra.wx v25, v8, a0
+; CHECK-NEXT: vmv1r.v v8, v25
+; CHECK-NEXT: ret
+ %insert = insertelement <8 x i16> undef, i16 %y, i16 0
+ %splat = shufflevector <8 x i16> %insert, <8 x i16> undef, <8 x i32> zeroinitializer
+ %a = ashr <8 x i16> %x, %splat
+ %b = trunc <8 x i16> %a to <8 x i8>
+ ret <8 x i8> %b
+}
+
+define <4 x i16> @vnsra_v4i32_v4i16_scalar(<4 x i32> %x, i32 %y) {
+; CHECK-LABEL: vnsra_v4i32_v4i16_scalar:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli a1, 4, e16,mf2,ta,mu
+; CHECK-NEXT: vnsra.wx v25, v8, a0
+; CHECK-NEXT: vmv1r.v v8, v25
+; CHECK-NEXT: ret
+ %insert = insertelement <4 x i32> undef, i32 %y, i32 0
+ %splat = shufflevector <4 x i32> %insert, <4 x i32> undef, <4 x i32> zeroinitializer
+ %a = ashr <4 x i32> %x, %splat
+ %b = trunc <4 x i32> %a to <4 x i16>
+ ret <4 x i16> %b
+}
+
+define <2 x i32> @vnsra_v2i64_v2i32_scalar(<2 x i64> %x, i64 %y) {
+; RV32-LABEL: vnsra_v2i64_v2i32_scalar:
+; RV32: # %bb.0:
+; RV32-NEXT: addi sp, sp, -16
+; RV32-NEXT: .cfi_def_cfa_offset 16
+; RV32-NEXT: sw a1, 12(sp)
+; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: vsetivli a0, 2, e64,m1,ta,mu
+; RV32-NEXT: addi a0, sp, 8
+; RV32-NEXT: vlse64.v v25, (a0), zero
+; RV32-NEXT: vsra.vv v25, v8, v25
+; RV32-NEXT: vsetivli a0, 2, e32,mf2,ta,mu
+; RV32-NEXT: vnsrl.wi v8, v25, 0
+; RV32-NEXT: addi sp, sp, 16
+; RV32-NEXT: ret
+;
+; RV64-LABEL: vnsra_v2i64_v2i32_scalar:
+; RV64: # %bb.0:
+; RV64-NEXT: vsetivli a1, 2, e32,mf2,ta,mu
+; RV64-NEXT: vnsra.wx v25, v8, a0
+; RV64-NEXT: vmv1r.v v8, v25
+; RV64-NEXT: ret
+ %insert = insertelement <2 x i64> undef, i64 %y, i32 0
+ %splat = shufflevector <2 x i64> %insert, <2 x i64> undef, <2 x i32> zeroinitializer
+ %a = ashr <2 x i64> %x, %splat
+ %b = trunc <2 x i64> %a to <2 x i32>
+ ret <2 x i32> %b
+}
+
+define <8 x i8> @vnsra_v8i16_v8i8_imm(<8 x i16> %x) {
+; CHECK-LABEL: vnsra_v8i16_v8i8_imm:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli a0, 8, e8,mf2,ta,mu
+; CHECK-NEXT: vnsrl.wi v25, v8, 8
+; CHECK-NEXT: vmv1r.v v8, v25
+; CHECK-NEXT: ret
+ %a = ashr <8 x i16> %x, <i16 8, i16 8, i16 8, i16 8,i16 8, i16 8, i16 8, i16 8>
+ %b = trunc <8 x i16> %a to <8 x i8>
+ ret <8 x i8> %b
+}
+
+define <4 x i16> @vnsra_v4i32_v4i16_imm(<4 x i32> %x) {
+; CHECK-LABEL: vnsra_v4i32_v4i16_imm:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli a0, 4, e16,mf2,ta,mu
+; CHECK-NEXT: vnsrl.wi v25, v8, 16
+; CHECK-NEXT: vmv1r.v v8, v25
+; CHECK-NEXT: ret
+ %a = ashr <4 x i32> %x, <i32 16, i32 16, i32 16, i32 16>
+ %b = trunc <4 x i32> %a to <4 x i16>
+ ret <4 x i16> %b
+}
+
+define <2 x i32> @vnsra_v2i64_v2i32_imm(<2 x i64> %x) {
+; CHECK-LABEL: vnsra_v2i64_v2i32_imm:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli a0, 2, e32,mf2,ta,mu
+; CHECK-NEXT: vnsrl.wi v25, v8, 31
+; CHECK-NEXT: vmv1r.v v8, v25
+; CHECK-NEXT: ret
+ %a = ashr <2 x i64> %x, <i64 31, i64 31>
+ %b = trunc <2 x i64> %a to <2 x i32>
+ ret <2 x i32> %b
+}
+
+define <8 x i8> @vnsrl_v8i16_v8i8_scalar(<8 x i16> %x, i16 %y) {
+; CHECK-LABEL: vnsrl_v8i16_v8i8_scalar:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli a1, 8, e8,mf2,ta,mu
+; CHECK-NEXT: vnsrl.wx v25, v8, a0
+; CHECK-NEXT: vmv1r.v v8, v25
+; CHECK-NEXT: ret
+ %insert = insertelement <8 x i16> undef, i16 %y, i16 0
+ %splat = shufflevector <8 x i16> %insert, <8 x i16> undef, <8 x i32> zeroinitializer
+ %a = lshr <8 x i16> %x, %splat
+ %b = trunc <8 x i16> %a to <8 x i8>
+ ret <8 x i8> %b
+}
+
+define <4 x i16> @vnsrl_v4i32_v4i16_scalar(<4 x i32> %x, i32 %y) {
+; CHECK-LABEL: vnsrl_v4i32_v4i16_scalar:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli a1, 4, e16,mf2,ta,mu
+; CHECK-NEXT: vnsrl.wx v25, v8, a0
+; CHECK-NEXT: vmv1r.v v8, v25
+; CHECK-NEXT: ret
+ %insert = insertelement <4 x i32> undef, i32 %y, i32 0
+ %splat = shufflevector <4 x i32> %insert, <4 x i32> undef, <4 x i32> zeroinitializer
+ %a = lshr <4 x i32> %x, %splat
+ %b = trunc <4 x i32> %a to <4 x i16>
+ ret <4 x i16> %b
+}
+
+define <2 x i32> @vnsrl_v2i64_v2i32_scalar(<2 x i64> %x, i64 %y) {
+; RV32-LABEL: vnsrl_v2i64_v2i32_scalar:
+; RV32: # %bb.0:
+; RV32-NEXT: addi sp, sp, -16
+; RV32-NEXT: .cfi_def_cfa_offset 16
+; RV32-NEXT: sw a1, 12(sp)
+; RV32-NEXT: sw a0, 8(sp)
+; RV32-NEXT: vsetivli a0, 2, e64,m1,ta,mu
+; RV32-NEXT: addi a0, sp, 8
+; RV32-NEXT: vlse64.v v25, (a0), zero
+; RV32-NEXT: vsrl.vv v25, v8, v25
+; RV32-NEXT: vsetivli a0, 2, e32,mf2,ta,mu
+; RV32-NEXT: vnsrl.wi v8, v25, 0
+; RV32-NEXT: addi sp, sp, 16
+; RV32-NEXT: ret
+;
+; RV64-LABEL: vnsrl_v2i64_v2i32_scalar:
+; RV64: # %bb.0:
+; RV64-NEXT: vsetivli a1, 2, e32,mf2,ta,mu
+; RV64-NEXT: vnsrl.wx v25, v8, a0
+; RV64-NEXT: vmv1r.v v8, v25
+; RV64-NEXT: ret
+ %insert = insertelement <2 x i64> undef, i64 %y, i32 0
+ %splat = shufflevector <2 x i64> %insert, <2 x i64> undef, <2 x i32> zeroinitializer
+ %a = lshr <2 x i64> %x, %splat
+ %b = trunc <2 x i64> %a to <2 x i32>
+ ret <2 x i32> %b
+}
+
+define <8 x i8> @vnsrl_v8i16_v8i8_imm(<8 x i16> %x) {
+; CHECK-LABEL: vnsrl_v8i16_v8i8_imm:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli a0, 8, e8,mf2,ta,mu
+; CHECK-NEXT: vnsrl.wi v25, v8, 8
+; CHECK-NEXT: vmv1r.v v8, v25
+; CHECK-NEXT: ret
+ %a = lshr <8 x i16> %x, <i16 8, i16 8, i16 8, i16 8,i16 8, i16 8, i16 8, i16 8>
+ %b = trunc <8 x i16> %a to <8 x i8>
+ ret <8 x i8> %b
+}
+
+define <4 x i16> @vnsrl_v4i32_v4i16_imm(<4 x i32> %x) {
+; CHECK-LABEL: vnsrl_v4i32_v4i16_imm:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli a0, 4, e16,mf2,ta,mu
+; CHECK-NEXT: vnsrl.wi v25, v8, 16
+; CHECK-NEXT: vmv1r.v v8, v25
+; CHECK-NEXT: ret
+ %a = lshr <4 x i32> %x, <i32 16, i32 16, i32 16, i32 16>
+ %b = trunc <4 x i32> %a to <4 x i16>
+ ret <4 x i16> %b
+}
+
+define <2 x i32> @vnsrl_v2i64_v2i32_imm(<2 x i64> %x) {
+; CHECK-LABEL: vnsrl_v2i64_v2i32_imm:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli a0, 2, e32,mf2,ta,mu
+; CHECK-NEXT: vnsrl.wi v25, v8, 31
+; CHECK-NEXT: vmv1r.v v8, v25
+; CHECK-NEXT: ret
+ %a = lshr <2 x i64> %x, <i64 31, i64 31>
+ %b = trunc <2 x i64> %a to <2 x i32>
+ ret <2 x i32> %b
+}
More information about the llvm-commits
mailing list