[llvm] eb5fe55 - [RISCV] Expand codegen test coverage extract/insert element
Philip Reames via llvm-commits
llvm-commits at lists.llvm.org
Wed Aug 30 10:56:47 PDT 2023
Author: Philip Reames
Date: 2023-08-30T10:10:40-07:00
New Revision: eb5fe55b810cc186d2ff31b64294b0d666d623da
URL: https://github.com/llvm/llvm-project/commit/eb5fe55b810cc186d2ff31b64294b0d666d623da
DIFF: https://github.com/llvm/llvm-project/commit/eb5fe55b810cc186d2ff31b64294b0d666d623da.diff
LOG: [RISCV] Expand codegen test coverage extract/insert element
In particular, at mixed LMULS, high LMULS, and types which require splitting.
Added:
Modified:
llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract.ll
llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert.ll
Removed:
################################################################################
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract.ll
index cd806ed8e12572..8ea9c15e86208e 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract.ll
@@ -1,8 +1,8 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -target-abi=ilp32d -mattr=+v,+zfh,+zvfh,+f,+d -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32,RV32NOM
-; RUN: llc -mtriple=riscv32 -target-abi=ilp32d -mattr=+v,+zfh,+zvfh,+f,+d,+m -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32,RV32M
-; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+v,+zfh,+zvfh,+f,+d -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64,RV64NOM
-; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+v,+zfh,+zvfh,+f,+d,+m -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64,RV64M
+; RUN: llc -mtriple=riscv32 -target-abi=ilp32d -mattr=+v,+zfh,+zvfh,+f,+d -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32,RV32NOM
+; RUN: llc -mtriple=riscv32 -target-abi=ilp32d -mattr=+v,+zfh,+zvfh,+f,+d,+m -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32,RV32M
+; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+v,+zfh,+zvfh,+f,+d -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64,RV64NOM
+; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+v,+zfh,+zvfh,+f,+d,+m -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64,RV64M
define i8 @extractelt_v16i8(ptr %x) nounwind {
; CHECK-LABEL: extractelt_v16i8:
@@ -242,6 +242,39 @@ define i64 @extractelt_v3i64(ptr %x) nounwind {
ret i64 %b
}
+; A LMUL8 type
+define i32 @extractelt_v32i32(ptr %x) nounwind {
+; CHECK-LABEL: extractelt_v32i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: li a1, 32
+; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma
+; CHECK-NEXT: vle32.v v8, (a0)
+; CHECK-NEXT: vsetivli zero, 1, e32, m8, ta, ma
+; CHECK-NEXT: vslidedown.vi v8, v8, 31
+; CHECK-NEXT: vmv.x.s a0, v8
+; CHECK-NEXT: ret
+ %a = load <32 x i32>, ptr %x
+ %b = extractelement <32 x i32> %a, i32 31
+ ret i32 %b
+}
+
+; Exercise type legalization for type beyond LMUL8
+define i32 @extractelt_v64i32(ptr %x) nounwind {
+; CHECK-LABEL: extractelt_v64i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi a0, a0, 128
+; CHECK-NEXT: li a1, 32
+; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma
+; CHECK-NEXT: vle32.v v8, (a0)
+; CHECK-NEXT: vsetivli zero, 1, e32, m8, ta, ma
+; CHECK-NEXT: vslidedown.vi v8, v8, 31
+; CHECK-NEXT: vmv.x.s a0, v8
+; CHECK-NEXT: ret
+ %a = load <64 x i32>, ptr %x
+ %b = extractelement <64 x i32> %a, i32 63
+ ret i32 %b
+}
+
define i8 @extractelt_v16i8_idx(ptr %x, i32 zeroext %idx) nounwind {
; CHECK-LABEL: extractelt_v16i8_idx:
; CHECK: # %bb.0:
@@ -514,6 +547,85 @@ define i64 @extractelt_v3i64_idx(ptr %x, i32 zeroext %idx) nounwind {
ret i64 %c
}
+define i32 @extractelt_v32i32_idx(ptr %x, i32 zeroext %idx) nounwind {
+; CHECK-LABEL: extractelt_v32i32_idx:
+; CHECK: # %bb.0:
+; CHECK-NEXT: li a2, 32
+; CHECK-NEXT: vsetvli zero, a2, e32, m8, ta, ma
+; CHECK-NEXT: vle32.v v8, (a0)
+; CHECK-NEXT: vadd.vv v8, v8, v8
+; CHECK-NEXT: vsetivli zero, 1, e32, m8, ta, ma
+; CHECK-NEXT: vslidedown.vx v8, v8, a1
+; CHECK-NEXT: vmv.x.s a0, v8
+; CHECK-NEXT: ret
+ %a = load <32 x i32>, ptr %x
+ %b = add <32 x i32> %a, %a
+ %c = extractelement <32 x i32> %b, i32 %idx
+ ret i32 %c
+}
+
+define i32 @extractelt_v64i32_idx(ptr %x, i32 zeroext %idx) nounwind {
+; RV32-LABEL: extractelt_v64i32_idx:
+; RV32: # %bb.0:
+; RV32-NEXT: addi sp, sp, -384
+; RV32-NEXT: sw ra, 380(sp) # 4-byte Folded Spill
+; RV32-NEXT: sw s0, 376(sp) # 4-byte Folded Spill
+; RV32-NEXT: addi s0, sp, 384
+; RV32-NEXT: andi sp, sp, -128
+; RV32-NEXT: andi a1, a1, 63
+; RV32-NEXT: slli a1, a1, 2
+; RV32-NEXT: li a2, 32
+; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma
+; RV32-NEXT: addi a2, a0, 128
+; RV32-NEXT: vle32.v v8, (a2)
+; RV32-NEXT: vle32.v v16, (a0)
+; RV32-NEXT: mv a0, sp
+; RV32-NEXT: add a1, a0, a1
+; RV32-NEXT: vadd.vv v8, v8, v8
+; RV32-NEXT: vadd.vv v16, v16, v16
+; RV32-NEXT: vse32.v v16, (a0)
+; RV32-NEXT: addi a0, sp, 128
+; RV32-NEXT: vse32.v v8, (a0)
+; RV32-NEXT: lw a0, 0(a1)
+; RV32-NEXT: addi sp, s0, -384
+; RV32-NEXT: lw ra, 380(sp) # 4-byte Folded Reload
+; RV32-NEXT: lw s0, 376(sp) # 4-byte Folded Reload
+; RV32-NEXT: addi sp, sp, 384
+; RV32-NEXT: ret
+;
+; RV64-LABEL: extractelt_v64i32_idx:
+; RV64: # %bb.0:
+; RV64-NEXT: addi sp, sp, -384
+; RV64-NEXT: sd ra, 376(sp) # 8-byte Folded Spill
+; RV64-NEXT: sd s0, 368(sp) # 8-byte Folded Spill
+; RV64-NEXT: addi s0, sp, 384
+; RV64-NEXT: andi sp, sp, -128
+; RV64-NEXT: andi a1, a1, 63
+; RV64-NEXT: slli a1, a1, 2
+; RV64-NEXT: li a2, 32
+; RV64-NEXT: vsetvli zero, a2, e32, m8, ta, ma
+; RV64-NEXT: addi a2, a0, 128
+; RV64-NEXT: vle32.v v8, (a2)
+; RV64-NEXT: vle32.v v16, (a0)
+; RV64-NEXT: mv a0, sp
+; RV64-NEXT: add a1, a0, a1
+; RV64-NEXT: vadd.vv v8, v8, v8
+; RV64-NEXT: vadd.vv v16, v16, v16
+; RV64-NEXT: vse32.v v16, (a0)
+; RV64-NEXT: addi a0, sp, 128
+; RV64-NEXT: vse32.v v8, (a0)
+; RV64-NEXT: lw a0, 0(a1)
+; RV64-NEXT: addi sp, s0, -384
+; RV64-NEXT: ld ra, 376(sp) # 8-byte Folded Reload
+; RV64-NEXT: ld s0, 368(sp) # 8-byte Folded Reload
+; RV64-NEXT: addi sp, sp, 384
+; RV64-NEXT: ret
+ %a = load <64 x i32>, ptr %x
+ %b = add <64 x i32> %a, %a
+ %c = extractelement <64 x i32> %b, i32 %idx
+ ret i32 %c
+}
+
define void @store_extractelt_v16i8(ptr %x, ptr %p) nounwind {
; CHECK-LABEL: store_extractelt_v16i8:
; CHECK: # %bb.0:
@@ -696,11 +808,11 @@ define i32 @extractelt_sdiv_v4i32(<4 x i32> %x) {
; RV32NOM-NEXT: vmv.v.i v9, 0
; RV32NOM-NEXT: li a0, -1
; RV32NOM-NEXT: vslide1down.vx v9, v9, a0
-; RV32NOM-NEXT: lui a0, %hi(.LCPI38_0)
-; RV32NOM-NEXT: addi a0, a0, %lo(.LCPI38_0)
+; RV32NOM-NEXT: lui a0, %hi(.LCPI42_0)
+; RV32NOM-NEXT: addi a0, a0, %lo(.LCPI42_0)
; RV32NOM-NEXT: vle32.v v10, (a0)
-; RV32NOM-NEXT: lui a0, %hi(.LCPI38_1)
-; RV32NOM-NEXT: addi a0, a0, %lo(.LCPI38_1)
+; RV32NOM-NEXT: lui a0, %hi(.LCPI42_1)
+; RV32NOM-NEXT: addi a0, a0, %lo(.LCPI42_1)
; RV32NOM-NEXT: vle32.v v11, (a0)
; RV32NOM-NEXT: vand.vv v9, v8, v9
; RV32NOM-NEXT: vmulh.vv v8, v8, v10
@@ -731,11 +843,11 @@ define i32 @extractelt_sdiv_v4i32(<4 x i32> %x) {
; RV64NOM-NEXT: vmv.v.i v9, 0
; RV64NOM-NEXT: li a0, -1
; RV64NOM-NEXT: vslide1down.vx v9, v9, a0
-; RV64NOM-NEXT: lui a0, %hi(.LCPI38_0)
-; RV64NOM-NEXT: addi a0, a0, %lo(.LCPI38_0)
+; RV64NOM-NEXT: lui a0, %hi(.LCPI42_0)
+; RV64NOM-NEXT: addi a0, a0, %lo(.LCPI42_0)
; RV64NOM-NEXT: vle32.v v10, (a0)
-; RV64NOM-NEXT: lui a0, %hi(.LCPI38_1)
-; RV64NOM-NEXT: addi a0, a0, %lo(.LCPI38_1)
+; RV64NOM-NEXT: lui a0, %hi(.LCPI42_1)
+; RV64NOM-NEXT: addi a0, a0, %lo(.LCPI42_1)
; RV64NOM-NEXT: vle32.v v11, (a0)
; RV64NOM-NEXT: vand.vv v9, v8, v9
; RV64NOM-NEXT: vmulh.vv v8, v8, v10
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert.ll
index 1fff6b0e82f1e4..ea02b0280ead8e 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert.ll
@@ -2,6 +2,178 @@
; RUN: llc -mtriple=riscv32 -target-abi=ilp32d -mattr=+v,+zfh,+zvfh,+f,+d -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32
; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+v,+zfh,+zvfh,+f,+d -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64
+define <4 x i32> @insertelt_v4i32_0(<4 x i32> %a, i32 %y) {
+; CHECK-LABEL: insertelt_v4i32_0:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 4, e32, m1, tu, ma
+; CHECK-NEXT: vmv.s.x v8, a0
+; CHECK-NEXT: ret
+ %b = insertelement <4 x i32> %a, i32 %y, i32 0
+ ret <4 x i32> %b
+}
+
+define <4 x i32> @insertelt_v4i32_3(<4 x i32> %a, i32 %y) {
+; CHECK-LABEL: insertelt_v4i32_3:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
+; CHECK-NEXT: vmv.s.x v9, a0
+; CHECK-NEXT: vslideup.vi v8, v9, 3
+; CHECK-NEXT: ret
+ %b = insertelement <4 x i32> %a, i32 %y, i32 3
+ ret <4 x i32> %b
+}
+
+define <4 x i32> @insertelt_v4i32_idx(<4 x i32> %a, i32 %y, i32 zeroext %idx) {
+; CHECK-LABEL: insertelt_v4i32_idx:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi a2, a1, 1
+; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
+; CHECK-NEXT: vmv.s.x v9, a0
+; CHECK-NEXT: vsetvli zero, a2, e32, m1, tu, ma
+; CHECK-NEXT: vslideup.vx v8, v9, a1
+; CHECK-NEXT: ret
+ %b = insertelement <4 x i32> %a, i32 %y, i32 %idx
+ ret <4 x i32> %b
+}
+
+define <32 x i32> @insertelt_v32i32_0(<32 x i32> %a, i32 %y) {
+; CHECK-LABEL: insertelt_v32i32_0:
+; CHECK: # %bb.0:
+; CHECK-NEXT: li a1, 32
+; CHECK-NEXT: vsetvli zero, a1, e32, m8, tu, ma
+; CHECK-NEXT: vmv.s.x v8, a0
+; CHECK-NEXT: ret
+ %b = insertelement <32 x i32> %a, i32 %y, i32 0
+ ret <32 x i32> %b
+}
+
+; FIXME: Should only require an m2 slideup
+define <32 x i32> @insertelt_v32i32_4(<32 x i32> %a, i32 %y) {
+; CHECK-LABEL: insertelt_v32i32_4:
+; CHECK: # %bb.0:
+; CHECK-NEXT: li a1, 32
+; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma
+; CHECK-NEXT: vmv.s.x v16, a0
+; CHECK-NEXT: vsetivli zero, 5, e32, m8, tu, ma
+; CHECK-NEXT: vslideup.vi v8, v16, 4
+; CHECK-NEXT: ret
+ %b = insertelement <32 x i32> %a, i32 %y, i32 4
+ ret <32 x i32> %b
+}
+
+define <32 x i32> @insertelt_v32i32_31(<32 x i32> %a, i32 %y) {
+; CHECK-LABEL: insertelt_v32i32_31:
+; CHECK: # %bb.0:
+; CHECK-NEXT: li a1, 32
+; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma
+; CHECK-NEXT: vmv.s.x v16, a0
+; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma
+; CHECK-NEXT: vslideup.vi v8, v16, 31
+; CHECK-NEXT: ret
+ %b = insertelement <32 x i32> %a, i32 %y, i32 31
+ ret <32 x i32> %b
+}
+
+define <32 x i32> @insertelt_v32i32_idx(<32 x i32> %a, i32 %y, i32 zeroext %idx) {
+; CHECK-LABEL: insertelt_v32i32_idx:
+; CHECK: # %bb.0:
+; CHECK-NEXT: li a2, 32
+; CHECK-NEXT: vsetvli zero, a2, e32, m1, ta, ma
+; CHECK-NEXT: vmv.s.x v16, a0
+; CHECK-NEXT: addi a0, a1, 1
+; CHECK-NEXT: vsetvli zero, a0, e32, m8, tu, ma
+; CHECK-NEXT: vslideup.vx v8, v16, a1
+; CHECK-NEXT: ret
+ %b = insertelement <32 x i32> %a, i32 %y, i32 %idx
+ ret <32 x i32> %b
+}
+
+define <64 x i32> @insertelt_v64i32_0(<64 x i32> %a, i32 %y) {
+; CHECK-LABEL: insertelt_v64i32_0:
+; CHECK: # %bb.0:
+; CHECK-NEXT: li a1, 32
+; CHECK-NEXT: vsetvli zero, a1, e32, m8, tu, ma
+; CHECK-NEXT: vmv.s.x v8, a0
+; CHECK-NEXT: ret
+ %b = insertelement <64 x i32> %a, i32 %y, i32 0
+ ret <64 x i32> %b
+}
+
+define <64 x i32> @insertelt_v64i32_63(<64 x i32> %a, i32 %y) {
+; CHECK-LABEL: insertelt_v64i32_63:
+; CHECK: # %bb.0:
+; CHECK-NEXT: li a1, 32
+; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma
+; CHECK-NEXT: vmv.s.x v24, a0
+; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma
+; CHECK-NEXT: vslideup.vi v16, v24, 31
+; CHECK-NEXT: ret
+ %b = insertelement <64 x i32> %a, i32 %y, i32 63
+ ret <64 x i32> %b
+}
+
+define <64 x i32> @insertelt_v64i32_idx(<64 x i32> %a, i32 %y, i32 zeroext %idx) {
+; RV32-LABEL: insertelt_v64i32_idx:
+; RV32: # %bb.0:
+; RV32-NEXT: addi sp, sp, -384
+; RV32-NEXT: .cfi_def_cfa_offset 384
+; RV32-NEXT: sw ra, 380(sp) # 4-byte Folded Spill
+; RV32-NEXT: sw s0, 376(sp) # 4-byte Folded Spill
+; RV32-NEXT: .cfi_offset ra, -4
+; RV32-NEXT: .cfi_offset s0, -8
+; RV32-NEXT: addi s0, sp, 384
+; RV32-NEXT: .cfi_def_cfa s0, 0
+; RV32-NEXT: andi sp, sp, -128
+; RV32-NEXT: andi a1, a1, 63
+; RV32-NEXT: slli a1, a1, 2
+; RV32-NEXT: mv a2, sp
+; RV32-NEXT: add a1, a2, a1
+; RV32-NEXT: addi a3, sp, 128
+; RV32-NEXT: li a4, 32
+; RV32-NEXT: vsetvli zero, a4, e32, m8, ta, ma
+; RV32-NEXT: vse32.v v16, (a3)
+; RV32-NEXT: vse32.v v8, (a2)
+; RV32-NEXT: sw a0, 0(a1)
+; RV32-NEXT: vle32.v v8, (a2)
+; RV32-NEXT: vle32.v v16, (a3)
+; RV32-NEXT: addi sp, s0, -384
+; RV32-NEXT: lw ra, 380(sp) # 4-byte Folded Reload
+; RV32-NEXT: lw s0, 376(sp) # 4-byte Folded Reload
+; RV32-NEXT: addi sp, sp, 384
+; RV32-NEXT: ret
+;
+; RV64-LABEL: insertelt_v64i32_idx:
+; RV64: # %bb.0:
+; RV64-NEXT: addi sp, sp, -384
+; RV64-NEXT: .cfi_def_cfa_offset 384
+; RV64-NEXT: sd ra, 376(sp) # 8-byte Folded Spill
+; RV64-NEXT: sd s0, 368(sp) # 8-byte Folded Spill
+; RV64-NEXT: .cfi_offset ra, -8
+; RV64-NEXT: .cfi_offset s0, -16
+; RV64-NEXT: addi s0, sp, 384
+; RV64-NEXT: .cfi_def_cfa s0, 0
+; RV64-NEXT: andi sp, sp, -128
+; RV64-NEXT: andi a1, a1, 63
+; RV64-NEXT: slli a1, a1, 2
+; RV64-NEXT: mv a2, sp
+; RV64-NEXT: add a1, a2, a1
+; RV64-NEXT: addi a3, sp, 128
+; RV64-NEXT: li a4, 32
+; RV64-NEXT: vsetvli zero, a4, e32, m8, ta, ma
+; RV64-NEXT: vse32.v v16, (a3)
+; RV64-NEXT: vse32.v v8, (a2)
+; RV64-NEXT: sw a0, 0(a1)
+; RV64-NEXT: vle32.v v8, (a2)
+; RV64-NEXT: vle32.v v16, (a3)
+; RV64-NEXT: addi sp, s0, -384
+; RV64-NEXT: ld ra, 376(sp) # 8-byte Folded Reload
+; RV64-NEXT: ld s0, 368(sp) # 8-byte Folded Reload
+; RV64-NEXT: addi sp, sp, 384
+; RV64-NEXT: ret
+ %b = insertelement <64 x i32> %a, i32 %y, i32 %idx
+ ret <64 x i32> %b
+}
+
; FIXME: This codegen needs to be improved. These tests previously asserted
; type legalizing the i64 type on RV32.
More information about the llvm-commits
mailing list