[llvm] ae6549a - [RISCV] Add build vector coverage for rva22u + V
Philip Reames via llvm-commits
llvm-commits at lists.llvm.org
Mon Jul 1 13:10:16 PDT 2024
Author: Philip Reames
Date: 2024-07-01T13:10:04-07:00
New Revision: ae6549a7d98f848f8b17d709471665c84e5108d5
URL: https://github.com/llvm/llvm-project/commit/ae6549a7d98f848f8b17d709471665c84e5108d5
DIFF: https://github.com/llvm/llvm-project/commit/ae6549a7d98f848f8b17d709471665c84e5108d5.diff
LOG: [RISCV] Add build vector coverage for rva22u + V
Added:
Modified:
llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll
llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll
Removed:
################################################################################
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll
index 379a51f4eee30..d7ffed3b01ddb 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll
@@ -1,7 +1,8 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=riscv32 -target-abi=ilp32d -mattr=+v,+zfh,+zvfh,+f,+d -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32
-; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+v,+zfh,+zvfh,+f,+d -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64
+; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+v,+zfh,+zvfh,+f,+d -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64,RV64V
+; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+v,+zfh,+zvfh,+rva22u64 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64,RVA22U64
; Tests that a floating-point build_vector doesn't try and generate a VID
; instruction
@@ -206,19 +207,47 @@ define <8 x float> @splat_idx_v8f32(<8 x float> %v, i64 %idx) {
; Test that we pull the vlse of the constant pool out of the loop.
define dso_local void @splat_load_licm(ptr %0) {
-; CHECK-LABEL: splat_load_licm:
-; CHECK: # %bb.0:
-; CHECK-NEXT: lui a1, 1
-; CHECK-NEXT: add a1, a0, a1
-; CHECK-NEXT: lui a2, 263168
-; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
-; CHECK-NEXT: vmv.v.x v8, a2
-; CHECK-NEXT: .LBB12_1: # =>This Inner Loop Header: Depth=1
-; CHECK-NEXT: vse32.v v8, (a0)
-; CHECK-NEXT: addi a0, a0, 16
-; CHECK-NEXT: bne a0, a1, .LBB12_1
-; CHECK-NEXT: # %bb.2:
-; CHECK-NEXT: ret
+; RV32-LABEL: splat_load_licm:
+; RV32: # %bb.0:
+; RV32-NEXT: lui a1, 1
+; RV32-NEXT: add a1, a0, a1
+; RV32-NEXT: lui a2, 263168
+; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
+; RV32-NEXT: vmv.v.x v8, a2
+; RV32-NEXT: .LBB12_1: # =>This Inner Loop Header: Depth=1
+; RV32-NEXT: vse32.v v8, (a0)
+; RV32-NEXT: addi a0, a0, 16
+; RV32-NEXT: bne a0, a1, .LBB12_1
+; RV32-NEXT: # %bb.2:
+; RV32-NEXT: ret
+;
+; RV64V-LABEL: splat_load_licm:
+; RV64V: # %bb.0:
+; RV64V-NEXT: lui a1, 1
+; RV64V-NEXT: add a1, a0, a1
+; RV64V-NEXT: lui a2, 263168
+; RV64V-NEXT: vsetivli zero, 4, e32, m1, ta, ma
+; RV64V-NEXT: vmv.v.x v8, a2
+; RV64V-NEXT: .LBB12_1: # =>This Inner Loop Header: Depth=1
+; RV64V-NEXT: vse32.v v8, (a0)
+; RV64V-NEXT: addi a0, a0, 16
+; RV64V-NEXT: bne a0, a1, .LBB12_1
+; RV64V-NEXT: # %bb.2:
+; RV64V-NEXT: ret
+;
+; RVA22U64-LABEL: splat_load_licm:
+; RVA22U64: # %bb.0:
+; RVA22U64-NEXT: lui a1, 1
+; RVA22U64-NEXT: add a1, a1, a0
+; RVA22U64-NEXT: lui a2, 263168
+; RVA22U64-NEXT: vsetivli zero, 4, e32, m1, ta, ma
+; RVA22U64-NEXT: vmv.v.x v8, a2
+; RVA22U64-NEXT: .LBB12_1: # =>This Inner Loop Header: Depth=1
+; RVA22U64-NEXT: vse32.v v8, (a0)
+; RVA22U64-NEXT: addi a0, a0, 16
+; RVA22U64-NEXT: bne a0, a1, .LBB12_1
+; RVA22U64-NEXT: # %bb.2:
+; RVA22U64-NEXT: ret
br label %2
2: ; preds = %2, %1
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll
index 4a5d37b2a85a2..6cd69bac46e3c 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll
@@ -1,6 +1,7 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32
-; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64,RV64V
+; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64,RV64V,RV64V-ONLY
+; RUN: llc -mtriple=riscv64 -mattr=+v,+rva22u64 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64,RV64V,RVA22U64
; RUN: llc -mtriple=riscv64 -mattr=+zve32x,+zvl128b -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64,RV64ZVE32
define void @buildvec_vid_v16i8(ptr %x) {
@@ -1182,46 +1183,169 @@ define <8 x i64> @v8xi64_exact_undef_prefix(i64 %a, i64 %b, i64 %c, i64 %d) vsca
define <16 x i8> @buildvec_v16i8_loads_contigous(ptr %p) {
-; CHECK-LABEL: buildvec_v16i8_loads_contigous:
-; CHECK: # %bb.0:
-; CHECK-NEXT: addi a1, a0, 8
-; CHECK-NEXT: lbu a2, 1(a0)
-; CHECK-NEXT: lbu a3, 2(a0)
-; CHECK-NEXT: lbu a4, 3(a0)
-; CHECK-NEXT: lbu a5, 4(a0)
-; CHECK-NEXT: lbu a6, 5(a0)
-; CHECK-NEXT: lbu a7, 6(a0)
-; CHECK-NEXT: lbu t0, 7(a0)
-; CHECK-NEXT: lbu t1, 9(a0)
-; CHECK-NEXT: lbu t2, 10(a0)
-; CHECK-NEXT: lbu t3, 11(a0)
-; CHECK-NEXT: lbu t4, 12(a0)
-; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma
-; CHECK-NEXT: vlse8.v v8, (a0), zero
-; CHECK-NEXT: lbu t5, 13(a0)
-; CHECK-NEXT: lbu t6, 14(a0)
-; CHECK-NEXT: lbu a0, 15(a0)
-; CHECK-NEXT: vslide1down.vx v8, v8, a2
-; CHECK-NEXT: vslide1down.vx v8, v8, a3
-; CHECK-NEXT: vslide1down.vx v8, v8, a4
-; CHECK-NEXT: vslide1down.vx v8, v8, a5
-; CHECK-NEXT: vlse8.v v9, (a1), zero
-; CHECK-NEXT: vslide1down.vx v8, v8, a6
-; CHECK-NEXT: vslide1down.vx v8, v8, a7
-; CHECK-NEXT: vslide1down.vx v10, v8, t0
-; CHECK-NEXT: vslide1down.vx v8, v9, t1
-; CHECK-NEXT: vslide1down.vx v8, v8, t2
-; CHECK-NEXT: vslide1down.vx v8, v8, t3
-; CHECK-NEXT: vslide1down.vx v8, v8, t4
-; CHECK-NEXT: vslide1down.vx v8, v8, t5
-; CHECK-NEXT: vslide1down.vx v8, v8, t6
-; CHECK-NEXT: li a1, 255
-; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma
-; CHECK-NEXT: vmv.s.x v0, a1
-; CHECK-NEXT: vsetvli zero, zero, e8, m1, ta, mu
-; CHECK-NEXT: vslide1down.vx v8, v8, a0
-; CHECK-NEXT: vslidedown.vi v8, v10, 8, v0.t
-; CHECK-NEXT: ret
+; RV32-LABEL: buildvec_v16i8_loads_contigous:
+; RV32: # %bb.0:
+; RV32-NEXT: addi a1, a0, 8
+; RV32-NEXT: lbu a2, 1(a0)
+; RV32-NEXT: lbu a3, 2(a0)
+; RV32-NEXT: lbu a4, 3(a0)
+; RV32-NEXT: lbu a5, 4(a0)
+; RV32-NEXT: lbu a6, 5(a0)
+; RV32-NEXT: lbu a7, 6(a0)
+; RV32-NEXT: lbu t0, 7(a0)
+; RV32-NEXT: lbu t1, 9(a0)
+; RV32-NEXT: lbu t2, 10(a0)
+; RV32-NEXT: lbu t3, 11(a0)
+; RV32-NEXT: lbu t4, 12(a0)
+; RV32-NEXT: vsetivli zero, 16, e8, m1, ta, ma
+; RV32-NEXT: vlse8.v v8, (a0), zero
+; RV32-NEXT: lbu t5, 13(a0)
+; RV32-NEXT: lbu t6, 14(a0)
+; RV32-NEXT: lbu a0, 15(a0)
+; RV32-NEXT: vslide1down.vx v8, v8, a2
+; RV32-NEXT: vslide1down.vx v8, v8, a3
+; RV32-NEXT: vslide1down.vx v8, v8, a4
+; RV32-NEXT: vslide1down.vx v8, v8, a5
+; RV32-NEXT: vlse8.v v9, (a1), zero
+; RV32-NEXT: vslide1down.vx v8, v8, a6
+; RV32-NEXT: vslide1down.vx v8, v8, a7
+; RV32-NEXT: vslide1down.vx v10, v8, t0
+; RV32-NEXT: vslide1down.vx v8, v9, t1
+; RV32-NEXT: vslide1down.vx v8, v8, t2
+; RV32-NEXT: vslide1down.vx v8, v8, t3
+; RV32-NEXT: vslide1down.vx v8, v8, t4
+; RV32-NEXT: vslide1down.vx v8, v8, t5
+; RV32-NEXT: vslide1down.vx v8, v8, t6
+; RV32-NEXT: li a1, 255
+; RV32-NEXT: vsetvli zero, zero, e16, m2, ta, ma
+; RV32-NEXT: vmv.s.x v0, a1
+; RV32-NEXT: vsetvli zero, zero, e8, m1, ta, mu
+; RV32-NEXT: vslide1down.vx v8, v8, a0
+; RV32-NEXT: vslidedown.vi v8, v10, 8, v0.t
+; RV32-NEXT: ret
+;
+; RV64V-ONLY-LABEL: buildvec_v16i8_loads_contigous:
+; RV64V-ONLY: # %bb.0:
+; RV64V-ONLY-NEXT: addi a1, a0, 8
+; RV64V-ONLY-NEXT: lbu a2, 1(a0)
+; RV64V-ONLY-NEXT: lbu a3, 2(a0)
+; RV64V-ONLY-NEXT: lbu a4, 3(a0)
+; RV64V-ONLY-NEXT: lbu a5, 4(a0)
+; RV64V-ONLY-NEXT: lbu a6, 5(a0)
+; RV64V-ONLY-NEXT: lbu a7, 6(a0)
+; RV64V-ONLY-NEXT: lbu t0, 7(a0)
+; RV64V-ONLY-NEXT: lbu t1, 9(a0)
+; RV64V-ONLY-NEXT: lbu t2, 10(a0)
+; RV64V-ONLY-NEXT: lbu t3, 11(a0)
+; RV64V-ONLY-NEXT: lbu t4, 12(a0)
+; RV64V-ONLY-NEXT: vsetivli zero, 16, e8, m1, ta, ma
+; RV64V-ONLY-NEXT: vlse8.v v8, (a0), zero
+; RV64V-ONLY-NEXT: lbu t5, 13(a0)
+; RV64V-ONLY-NEXT: lbu t6, 14(a0)
+; RV64V-ONLY-NEXT: lbu a0, 15(a0)
+; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, a2
+; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, a3
+; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, a4
+; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, a5
+; RV64V-ONLY-NEXT: vlse8.v v9, (a1), zero
+; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, a6
+; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, a7
+; RV64V-ONLY-NEXT: vslide1down.vx v10, v8, t0
+; RV64V-ONLY-NEXT: vslide1down.vx v8, v9, t1
+; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, t2
+; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, t3
+; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, t4
+; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, t5
+; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, t6
+; RV64V-ONLY-NEXT: li a1, 255
+; RV64V-ONLY-NEXT: vsetvli zero, zero, e16, m2, ta, ma
+; RV64V-ONLY-NEXT: vmv.s.x v0, a1
+; RV64V-ONLY-NEXT: vsetvli zero, zero, e8, m1, ta, mu
+; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, a0
+; RV64V-ONLY-NEXT: vslidedown.vi v8, v10, 8, v0.t
+; RV64V-ONLY-NEXT: ret
+;
+; RVA22U64-LABEL: buildvec_v16i8_loads_contigous:
+; RVA22U64: # %bb.0:
+; RVA22U64-NEXT: addi a6, a0, 8
+; RVA22U64-NEXT: lbu t6, 1(a0)
+; RVA22U64-NEXT: lbu a3, 2(a0)
+; RVA22U64-NEXT: lbu a4, 3(a0)
+; RVA22U64-NEXT: lbu a5, 4(a0)
+; RVA22U64-NEXT: lbu t5, 5(a0)
+; RVA22U64-NEXT: lbu a7, 6(a0)
+; RVA22U64-NEXT: lbu t0, 7(a0)
+; RVA22U64-NEXT: lbu t1, 9(a0)
+; RVA22U64-NEXT: lbu t2, 10(a0)
+; RVA22U64-NEXT: lbu t3, 11(a0)
+; RVA22U64-NEXT: lbu t4, 12(a0)
+; RVA22U64-NEXT: vsetivli zero, 16, e8, m1, ta, ma
+; RVA22U64-NEXT: vlse8.v v8, (a0), zero
+; RVA22U64-NEXT: lbu a1, 13(a0)
+; RVA22U64-NEXT: lbu a2, 14(a0)
+; RVA22U64-NEXT: lbu a0, 15(a0)
+; RVA22U64-NEXT: vslide1down.vx v8, v8, t6
+; RVA22U64-NEXT: vslide1down.vx v8, v8, a3
+; RVA22U64-NEXT: vslide1down.vx v8, v8, a4
+; RVA22U64-NEXT: vslide1down.vx v8, v8, a5
+; RVA22U64-NEXT: vlse8.v v9, (a6), zero
+; RVA22U64-NEXT: vslide1down.vx v8, v8, t5
+; RVA22U64-NEXT: vslide1down.vx v8, v8, a7
+; RVA22U64-NEXT: vslide1down.vx v10, v8, t0
+; RVA22U64-NEXT: vslide1down.vx v8, v9, t1
+; RVA22U64-NEXT: vslide1down.vx v8, v8, t2
+; RVA22U64-NEXT: vslide1down.vx v8, v8, t3
+; RVA22U64-NEXT: vslide1down.vx v8, v8, t4
+; RVA22U64-NEXT: vslide1down.vx v8, v8, a1
+; RVA22U64-NEXT: vslide1down.vx v8, v8, a2
+; RVA22U64-NEXT: li a1, 255
+; RVA22U64-NEXT: vsetvli zero, zero, e16, m2, ta, ma
+; RVA22U64-NEXT: vmv.s.x v0, a1
+; RVA22U64-NEXT: vsetvli zero, zero, e8, m1, ta, mu
+; RVA22U64-NEXT: vslide1down.vx v8, v8, a0
+; RVA22U64-NEXT: vslidedown.vi v8, v10, 8, v0.t
+; RVA22U64-NEXT: ret
+;
+; RV64ZVE32-LABEL: buildvec_v16i8_loads_contigous:
+; RV64ZVE32: # %bb.0:
+; RV64ZVE32-NEXT: addi a1, a0, 8
+; RV64ZVE32-NEXT: lbu a2, 1(a0)
+; RV64ZVE32-NEXT: lbu a3, 2(a0)
+; RV64ZVE32-NEXT: lbu a4, 3(a0)
+; RV64ZVE32-NEXT: lbu a5, 4(a0)
+; RV64ZVE32-NEXT: lbu a6, 5(a0)
+; RV64ZVE32-NEXT: lbu a7, 6(a0)
+; RV64ZVE32-NEXT: lbu t0, 7(a0)
+; RV64ZVE32-NEXT: lbu t1, 9(a0)
+; RV64ZVE32-NEXT: lbu t2, 10(a0)
+; RV64ZVE32-NEXT: lbu t3, 11(a0)
+; RV64ZVE32-NEXT: lbu t4, 12(a0)
+; RV64ZVE32-NEXT: vsetivli zero, 16, e8, m1, ta, ma
+; RV64ZVE32-NEXT: vlse8.v v8, (a0), zero
+; RV64ZVE32-NEXT: lbu t5, 13(a0)
+; RV64ZVE32-NEXT: lbu t6, 14(a0)
+; RV64ZVE32-NEXT: lbu a0, 15(a0)
+; RV64ZVE32-NEXT: vslide1down.vx v8, v8, a2
+; RV64ZVE32-NEXT: vslide1down.vx v8, v8, a3
+; RV64ZVE32-NEXT: vslide1down.vx v8, v8, a4
+; RV64ZVE32-NEXT: vslide1down.vx v8, v8, a5
+; RV64ZVE32-NEXT: vlse8.v v9, (a1), zero
+; RV64ZVE32-NEXT: vslide1down.vx v8, v8, a6
+; RV64ZVE32-NEXT: vslide1down.vx v8, v8, a7
+; RV64ZVE32-NEXT: vslide1down.vx v10, v8, t0
+; RV64ZVE32-NEXT: vslide1down.vx v8, v9, t1
+; RV64ZVE32-NEXT: vslide1down.vx v8, v8, t2
+; RV64ZVE32-NEXT: vslide1down.vx v8, v8, t3
+; RV64ZVE32-NEXT: vslide1down.vx v8, v8, t4
+; RV64ZVE32-NEXT: vslide1down.vx v8, v8, t5
+; RV64ZVE32-NEXT: vslide1down.vx v8, v8, t6
+; RV64ZVE32-NEXT: li a1, 255
+; RV64ZVE32-NEXT: vsetvli zero, zero, e16, m2, ta, ma
+; RV64ZVE32-NEXT: vmv.s.x v0, a1
+; RV64ZVE32-NEXT: vsetvli zero, zero, e8, m1, ta, mu
+; RV64ZVE32-NEXT: vslide1down.vx v8, v8, a0
+; RV64ZVE32-NEXT: vslidedown.vi v8, v10, 8, v0.t
+; RV64ZVE32-NEXT: ret
%p2 = getelementptr i8, ptr %p, i32 1
%p3 = getelementptr i8, ptr %p, i32 2
%p4 = getelementptr i8, ptr %p, i32 3
@@ -1276,46 +1400,169 @@ define <16 x i8> @buildvec_v16i8_loads_contigous(ptr %p) {
define <16 x i8> @buildvec_v16i8_loads_gather(ptr %p) {
-; CHECK-LABEL: buildvec_v16i8_loads_gather:
-; CHECK: # %bb.0:
-; CHECK-NEXT: addi a1, a0, 82
-; CHECK-NEXT: lbu a2, 1(a0)
-; CHECK-NEXT: lbu a3, 22(a0)
-; CHECK-NEXT: lbu a4, 31(a0)
-; CHECK-NEXT: lbu a5, 44(a0)
-; CHECK-NEXT: lbu a6, 55(a0)
-; CHECK-NEXT: lbu a7, 623(a0)
-; CHECK-NEXT: lbu t0, 75(a0)
-; CHECK-NEXT: lbu t1, 93(a0)
-; CHECK-NEXT: lbu t2, 105(a0)
-; CHECK-NEXT: lbu t3, 161(a0)
-; CHECK-NEXT: lbu t4, 124(a0)
-; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma
-; CHECK-NEXT: vlse8.v v8, (a0), zero
-; CHECK-NEXT: lbu t5, 163(a0)
-; CHECK-NEXT: lbu t6, 144(a0)
-; CHECK-NEXT: lbu a0, 154(a0)
-; CHECK-NEXT: vslide1down.vx v8, v8, a2
-; CHECK-NEXT: vslide1down.vx v8, v8, a3
-; CHECK-NEXT: vslide1down.vx v8, v8, a4
-; CHECK-NEXT: vslide1down.vx v8, v8, a5
-; CHECK-NEXT: vlse8.v v9, (a1), zero
-; CHECK-NEXT: vslide1down.vx v8, v8, a6
-; CHECK-NEXT: vslide1down.vx v8, v8, a7
-; CHECK-NEXT: vslide1down.vx v10, v8, t0
-; CHECK-NEXT: vslide1down.vx v8, v9, t1
-; CHECK-NEXT: vslide1down.vx v8, v8, t2
-; CHECK-NEXT: vslide1down.vx v8, v8, t3
-; CHECK-NEXT: vslide1down.vx v8, v8, t4
-; CHECK-NEXT: vslide1down.vx v8, v8, t5
-; CHECK-NEXT: vslide1down.vx v8, v8, t6
-; CHECK-NEXT: li a1, 255
-; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma
-; CHECK-NEXT: vmv.s.x v0, a1
-; CHECK-NEXT: vsetvli zero, zero, e8, m1, ta, mu
-; CHECK-NEXT: vslide1down.vx v8, v8, a0
-; CHECK-NEXT: vslidedown.vi v8, v10, 8, v0.t
-; CHECK-NEXT: ret
+; RV32-LABEL: buildvec_v16i8_loads_gather:
+; RV32: # %bb.0:
+; RV32-NEXT: addi a1, a0, 82
+; RV32-NEXT: lbu a2, 1(a0)
+; RV32-NEXT: lbu a3, 22(a0)
+; RV32-NEXT: lbu a4, 31(a0)
+; RV32-NEXT: lbu a5, 44(a0)
+; RV32-NEXT: lbu a6, 55(a0)
+; RV32-NEXT: lbu a7, 623(a0)
+; RV32-NEXT: lbu t0, 75(a0)
+; RV32-NEXT: lbu t1, 93(a0)
+; RV32-NEXT: lbu t2, 105(a0)
+; RV32-NEXT: lbu t3, 161(a0)
+; RV32-NEXT: lbu t4, 124(a0)
+; RV32-NEXT: vsetivli zero, 16, e8, m1, ta, ma
+; RV32-NEXT: vlse8.v v8, (a0), zero
+; RV32-NEXT: lbu t5, 163(a0)
+; RV32-NEXT: lbu t6, 144(a0)
+; RV32-NEXT: lbu a0, 154(a0)
+; RV32-NEXT: vslide1down.vx v8, v8, a2
+; RV32-NEXT: vslide1down.vx v8, v8, a3
+; RV32-NEXT: vslide1down.vx v8, v8, a4
+; RV32-NEXT: vslide1down.vx v8, v8, a5
+; RV32-NEXT: vlse8.v v9, (a1), zero
+; RV32-NEXT: vslide1down.vx v8, v8, a6
+; RV32-NEXT: vslide1down.vx v8, v8, a7
+; RV32-NEXT: vslide1down.vx v10, v8, t0
+; RV32-NEXT: vslide1down.vx v8, v9, t1
+; RV32-NEXT: vslide1down.vx v8, v8, t2
+; RV32-NEXT: vslide1down.vx v8, v8, t3
+; RV32-NEXT: vslide1down.vx v8, v8, t4
+; RV32-NEXT: vslide1down.vx v8, v8, t5
+; RV32-NEXT: vslide1down.vx v8, v8, t6
+; RV32-NEXT: li a1, 255
+; RV32-NEXT: vsetvli zero, zero, e16, m2, ta, ma
+; RV32-NEXT: vmv.s.x v0, a1
+; RV32-NEXT: vsetvli zero, zero, e8, m1, ta, mu
+; RV32-NEXT: vslide1down.vx v8, v8, a0
+; RV32-NEXT: vslidedown.vi v8, v10, 8, v0.t
+; RV32-NEXT: ret
+;
+; RV64V-ONLY-LABEL: buildvec_v16i8_loads_gather:
+; RV64V-ONLY: # %bb.0:
+; RV64V-ONLY-NEXT: addi a1, a0, 82
+; RV64V-ONLY-NEXT: lbu a2, 1(a0)
+; RV64V-ONLY-NEXT: lbu a3, 22(a0)
+; RV64V-ONLY-NEXT: lbu a4, 31(a0)
+; RV64V-ONLY-NEXT: lbu a5, 44(a0)
+; RV64V-ONLY-NEXT: lbu a6, 55(a0)
+; RV64V-ONLY-NEXT: lbu a7, 623(a0)
+; RV64V-ONLY-NEXT: lbu t0, 75(a0)
+; RV64V-ONLY-NEXT: lbu t1, 93(a0)
+; RV64V-ONLY-NEXT: lbu t2, 105(a0)
+; RV64V-ONLY-NEXT: lbu t3, 161(a0)
+; RV64V-ONLY-NEXT: lbu t4, 124(a0)
+; RV64V-ONLY-NEXT: vsetivli zero, 16, e8, m1, ta, ma
+; RV64V-ONLY-NEXT: vlse8.v v8, (a0), zero
+; RV64V-ONLY-NEXT: lbu t5, 163(a0)
+; RV64V-ONLY-NEXT: lbu t6, 144(a0)
+; RV64V-ONLY-NEXT: lbu a0, 154(a0)
+; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, a2
+; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, a3
+; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, a4
+; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, a5
+; RV64V-ONLY-NEXT: vlse8.v v9, (a1), zero
+; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, a6
+; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, a7
+; RV64V-ONLY-NEXT: vslide1down.vx v10, v8, t0
+; RV64V-ONLY-NEXT: vslide1down.vx v8, v9, t1
+; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, t2
+; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, t3
+; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, t4
+; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, t5
+; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, t6
+; RV64V-ONLY-NEXT: li a1, 255
+; RV64V-ONLY-NEXT: vsetvli zero, zero, e16, m2, ta, ma
+; RV64V-ONLY-NEXT: vmv.s.x v0, a1
+; RV64V-ONLY-NEXT: vsetvli zero, zero, e8, m1, ta, mu
+; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, a0
+; RV64V-ONLY-NEXT: vslidedown.vi v8, v10, 8, v0.t
+; RV64V-ONLY-NEXT: ret
+;
+; RVA22U64-LABEL: buildvec_v16i8_loads_gather:
+; RVA22U64: # %bb.0:
+; RVA22U64-NEXT: addi a6, a0, 82
+; RVA22U64-NEXT: lbu t6, 1(a0)
+; RVA22U64-NEXT: lbu a3, 22(a0)
+; RVA22U64-NEXT: lbu a4, 31(a0)
+; RVA22U64-NEXT: lbu a5, 44(a0)
+; RVA22U64-NEXT: lbu t5, 55(a0)
+; RVA22U64-NEXT: lbu a7, 623(a0)
+; RVA22U64-NEXT: lbu t0, 75(a0)
+; RVA22U64-NEXT: lbu t1, 93(a0)
+; RVA22U64-NEXT: lbu t2, 105(a0)
+; RVA22U64-NEXT: lbu t3, 161(a0)
+; RVA22U64-NEXT: lbu t4, 124(a0)
+; RVA22U64-NEXT: vsetivli zero, 16, e8, m1, ta, ma
+; RVA22U64-NEXT: vlse8.v v8, (a0), zero
+; RVA22U64-NEXT: lbu a1, 163(a0)
+; RVA22U64-NEXT: lbu a2, 144(a0)
+; RVA22U64-NEXT: lbu a0, 154(a0)
+; RVA22U64-NEXT: vslide1down.vx v8, v8, t6
+; RVA22U64-NEXT: vslide1down.vx v8, v8, a3
+; RVA22U64-NEXT: vslide1down.vx v8, v8, a4
+; RVA22U64-NEXT: vslide1down.vx v8, v8, a5
+; RVA22U64-NEXT: vlse8.v v9, (a6), zero
+; RVA22U64-NEXT: vslide1down.vx v8, v8, t5
+; RVA22U64-NEXT: vslide1down.vx v8, v8, a7
+; RVA22U64-NEXT: vslide1down.vx v10, v8, t0
+; RVA22U64-NEXT: vslide1down.vx v8, v9, t1
+; RVA22U64-NEXT: vslide1down.vx v8, v8, t2
+; RVA22U64-NEXT: vslide1down.vx v8, v8, t3
+; RVA22U64-NEXT: vslide1down.vx v8, v8, t4
+; RVA22U64-NEXT: vslide1down.vx v8, v8, a1
+; RVA22U64-NEXT: vslide1down.vx v8, v8, a2
+; RVA22U64-NEXT: li a1, 255
+; RVA22U64-NEXT: vsetvli zero, zero, e16, m2, ta, ma
+; RVA22U64-NEXT: vmv.s.x v0, a1
+; RVA22U64-NEXT: vsetvli zero, zero, e8, m1, ta, mu
+; RVA22U64-NEXT: vslide1down.vx v8, v8, a0
+; RVA22U64-NEXT: vslidedown.vi v8, v10, 8, v0.t
+; RVA22U64-NEXT: ret
+;
+; RV64ZVE32-LABEL: buildvec_v16i8_loads_gather:
+; RV64ZVE32: # %bb.0:
+; RV64ZVE32-NEXT: addi a1, a0, 82
+; RV64ZVE32-NEXT: lbu a2, 1(a0)
+; RV64ZVE32-NEXT: lbu a3, 22(a0)
+; RV64ZVE32-NEXT: lbu a4, 31(a0)
+; RV64ZVE32-NEXT: lbu a5, 44(a0)
+; RV64ZVE32-NEXT: lbu a6, 55(a0)
+; RV64ZVE32-NEXT: lbu a7, 623(a0)
+; RV64ZVE32-NEXT: lbu t0, 75(a0)
+; RV64ZVE32-NEXT: lbu t1, 93(a0)
+; RV64ZVE32-NEXT: lbu t2, 105(a0)
+; RV64ZVE32-NEXT: lbu t3, 161(a0)
+; RV64ZVE32-NEXT: lbu t4, 124(a0)
+; RV64ZVE32-NEXT: vsetivli zero, 16, e8, m1, ta, ma
+; RV64ZVE32-NEXT: vlse8.v v8, (a0), zero
+; RV64ZVE32-NEXT: lbu t5, 163(a0)
+; RV64ZVE32-NEXT: lbu t6, 144(a0)
+; RV64ZVE32-NEXT: lbu a0, 154(a0)
+; RV64ZVE32-NEXT: vslide1down.vx v8, v8, a2
+; RV64ZVE32-NEXT: vslide1down.vx v8, v8, a3
+; RV64ZVE32-NEXT: vslide1down.vx v8, v8, a4
+; RV64ZVE32-NEXT: vslide1down.vx v8, v8, a5
+; RV64ZVE32-NEXT: vlse8.v v9, (a1), zero
+; RV64ZVE32-NEXT: vslide1down.vx v8, v8, a6
+; RV64ZVE32-NEXT: vslide1down.vx v8, v8, a7
+; RV64ZVE32-NEXT: vslide1down.vx v10, v8, t0
+; RV64ZVE32-NEXT: vslide1down.vx v8, v9, t1
+; RV64ZVE32-NEXT: vslide1down.vx v8, v8, t2
+; RV64ZVE32-NEXT: vslide1down.vx v8, v8, t3
+; RV64ZVE32-NEXT: vslide1down.vx v8, v8, t4
+; RV64ZVE32-NEXT: vslide1down.vx v8, v8, t5
+; RV64ZVE32-NEXT: vslide1down.vx v8, v8, t6
+; RV64ZVE32-NEXT: li a1, 255
+; RV64ZVE32-NEXT: vsetvli zero, zero, e16, m2, ta, ma
+; RV64ZVE32-NEXT: vmv.s.x v0, a1
+; RV64ZVE32-NEXT: vsetvli zero, zero, e8, m1, ta, mu
+; RV64ZVE32-NEXT: vslide1down.vx v8, v8, a0
+; RV64ZVE32-NEXT: vslidedown.vi v8, v10, 8, v0.t
+; RV64ZVE32-NEXT: ret
%p2 = getelementptr i8, ptr %p, i32 1
%p3 = getelementptr i8, ptr %p, i32 22
%p4 = getelementptr i8, ptr %p, i32 31
@@ -1369,26 +1616,89 @@ define <16 x i8> @buildvec_v16i8_loads_gather(ptr %p) {
}
define <16 x i8> @buildvec_v16i8_undef_low_half(ptr %p) {
-; CHECK-LABEL: buildvec_v16i8_undef_low_half:
-; CHECK: # %bb.0:
-; CHECK-NEXT: addi a1, a0, 82
-; CHECK-NEXT: lbu a2, 93(a0)
-; CHECK-NEXT: lbu a3, 105(a0)
-; CHECK-NEXT: lbu a4, 161(a0)
-; CHECK-NEXT: lbu a5, 124(a0)
-; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma
-; CHECK-NEXT: vlse8.v v8, (a1), zero
-; CHECK-NEXT: lbu a1, 163(a0)
-; CHECK-NEXT: lbu a6, 144(a0)
-; CHECK-NEXT: lbu a0, 154(a0)
-; CHECK-NEXT: vslide1down.vx v8, v8, a2
-; CHECK-NEXT: vslide1down.vx v8, v8, a3
-; CHECK-NEXT: vslide1down.vx v8, v8, a4
-; CHECK-NEXT: vslide1down.vx v8, v8, a5
-; CHECK-NEXT: vslide1down.vx v8, v8, a1
-; CHECK-NEXT: vslide1down.vx v8, v8, a6
-; CHECK-NEXT: vslide1down.vx v8, v8, a0
-; CHECK-NEXT: ret
+; RV32-LABEL: buildvec_v16i8_undef_low_half:
+; RV32: # %bb.0:
+; RV32-NEXT: addi a1, a0, 82
+; RV32-NEXT: lbu a2, 93(a0)
+; RV32-NEXT: lbu a3, 105(a0)
+; RV32-NEXT: lbu a4, 161(a0)
+; RV32-NEXT: lbu a5, 124(a0)
+; RV32-NEXT: vsetivli zero, 16, e8, m1, ta, ma
+; RV32-NEXT: vlse8.v v8, (a1), zero
+; RV32-NEXT: lbu a1, 163(a0)
+; RV32-NEXT: lbu a6, 144(a0)
+; RV32-NEXT: lbu a0, 154(a0)
+; RV32-NEXT: vslide1down.vx v8, v8, a2
+; RV32-NEXT: vslide1down.vx v8, v8, a3
+; RV32-NEXT: vslide1down.vx v8, v8, a4
+; RV32-NEXT: vslide1down.vx v8, v8, a5
+; RV32-NEXT: vslide1down.vx v8, v8, a1
+; RV32-NEXT: vslide1down.vx v8, v8, a6
+; RV32-NEXT: vslide1down.vx v8, v8, a0
+; RV32-NEXT: ret
+;
+; RV64V-ONLY-LABEL: buildvec_v16i8_undef_low_half:
+; RV64V-ONLY: # %bb.0:
+; RV64V-ONLY-NEXT: addi a1, a0, 82
+; RV64V-ONLY-NEXT: lbu a2, 93(a0)
+; RV64V-ONLY-NEXT: lbu a3, 105(a0)
+; RV64V-ONLY-NEXT: lbu a4, 161(a0)
+; RV64V-ONLY-NEXT: lbu a5, 124(a0)
+; RV64V-ONLY-NEXT: vsetivli zero, 16, e8, m1, ta, ma
+; RV64V-ONLY-NEXT: vlse8.v v8, (a1), zero
+; RV64V-ONLY-NEXT: lbu a1, 163(a0)
+; RV64V-ONLY-NEXT: lbu a6, 144(a0)
+; RV64V-ONLY-NEXT: lbu a0, 154(a0)
+; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, a2
+; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, a3
+; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, a4
+; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, a5
+; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, a1
+; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, a6
+; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, a0
+; RV64V-ONLY-NEXT: ret
+;
+; RVA22U64-LABEL: buildvec_v16i8_undef_low_half:
+; RVA22U64: # %bb.0:
+; RVA22U64-NEXT: addi a1, a0, 82
+; RVA22U64-NEXT: lbu a6, 93(a0)
+; RVA22U64-NEXT: lbu a3, 105(a0)
+; RVA22U64-NEXT: lbu a4, 161(a0)
+; RVA22U64-NEXT: lbu a5, 124(a0)
+; RVA22U64-NEXT: vsetivli zero, 16, e8, m1, ta, ma
+; RVA22U64-NEXT: vlse8.v v8, (a1), zero
+; RVA22U64-NEXT: lbu a1, 163(a0)
+; RVA22U64-NEXT: lbu a2, 144(a0)
+; RVA22U64-NEXT: lbu a0, 154(a0)
+; RVA22U64-NEXT: vslide1down.vx v8, v8, a6
+; RVA22U64-NEXT: vslide1down.vx v8, v8, a3
+; RVA22U64-NEXT: vslide1down.vx v8, v8, a4
+; RVA22U64-NEXT: vslide1down.vx v8, v8, a5
+; RVA22U64-NEXT: vslide1down.vx v8, v8, a1
+; RVA22U64-NEXT: vslide1down.vx v8, v8, a2
+; RVA22U64-NEXT: vslide1down.vx v8, v8, a0
+; RVA22U64-NEXT: ret
+;
+; RV64ZVE32-LABEL: buildvec_v16i8_undef_low_half:
+; RV64ZVE32: # %bb.0:
+; RV64ZVE32-NEXT: addi a1, a0, 82
+; RV64ZVE32-NEXT: lbu a2, 93(a0)
+; RV64ZVE32-NEXT: lbu a3, 105(a0)
+; RV64ZVE32-NEXT: lbu a4, 161(a0)
+; RV64ZVE32-NEXT: lbu a5, 124(a0)
+; RV64ZVE32-NEXT: vsetivli zero, 16, e8, m1, ta, ma
+; RV64ZVE32-NEXT: vlse8.v v8, (a1), zero
+; RV64ZVE32-NEXT: lbu a1, 163(a0)
+; RV64ZVE32-NEXT: lbu a6, 144(a0)
+; RV64ZVE32-NEXT: lbu a0, 154(a0)
+; RV64ZVE32-NEXT: vslide1down.vx v8, v8, a2
+; RV64ZVE32-NEXT: vslide1down.vx v8, v8, a3
+; RV64ZVE32-NEXT: vslide1down.vx v8, v8, a4
+; RV64ZVE32-NEXT: vslide1down.vx v8, v8, a5
+; RV64ZVE32-NEXT: vslide1down.vx v8, v8, a1
+; RV64ZVE32-NEXT: vslide1down.vx v8, v8, a6
+; RV64ZVE32-NEXT: vslide1down.vx v8, v8, a0
+; RV64ZVE32-NEXT: ret
%p9 = getelementptr i8, ptr %p, i32 82
%p10 = getelementptr i8, ptr %p, i32 93
%p11 = getelementptr i8, ptr %p, i32 105
@@ -1419,26 +1729,89 @@ define <16 x i8> @buildvec_v16i8_undef_low_half(ptr %p) {
}
define <16 x i8> @buildvec_v16i8_undef_high_half(ptr %p) {
-; CHECK-LABEL: buildvec_v16i8_undef_high_half:
-; CHECK: # %bb.0:
-; CHECK-NEXT: lbu a1, 1(a0)
-; CHECK-NEXT: lbu a2, 22(a0)
-; CHECK-NEXT: lbu a3, 31(a0)
-; CHECK-NEXT: lbu a4, 44(a0)
-; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma
-; CHECK-NEXT: vlse8.v v8, (a0), zero
-; CHECK-NEXT: lbu a5, 55(a0)
-; CHECK-NEXT: lbu a6, 623(a0)
-; CHECK-NEXT: lbu a0, 75(a0)
-; CHECK-NEXT: vslide1down.vx v8, v8, a1
-; CHECK-NEXT: vslide1down.vx v8, v8, a2
-; CHECK-NEXT: vslide1down.vx v8, v8, a3
-; CHECK-NEXT: vslide1down.vx v8, v8, a4
-; CHECK-NEXT: vslide1down.vx v8, v8, a5
-; CHECK-NEXT: vslide1down.vx v8, v8, a6
-; CHECK-NEXT: vslide1down.vx v8, v8, a0
-; CHECK-NEXT: vslidedown.vi v8, v8, 8
-; CHECK-NEXT: ret
+; RV32-LABEL: buildvec_v16i8_undef_high_half:
+; RV32: # %bb.0:
+; RV32-NEXT: lbu a1, 1(a0)
+; RV32-NEXT: lbu a2, 22(a0)
+; RV32-NEXT: lbu a3, 31(a0)
+; RV32-NEXT: lbu a4, 44(a0)
+; RV32-NEXT: vsetivli zero, 16, e8, m1, ta, ma
+; RV32-NEXT: vlse8.v v8, (a0), zero
+; RV32-NEXT: lbu a5, 55(a0)
+; RV32-NEXT: lbu a6, 623(a0)
+; RV32-NEXT: lbu a0, 75(a0)
+; RV32-NEXT: vslide1down.vx v8, v8, a1
+; RV32-NEXT: vslide1down.vx v8, v8, a2
+; RV32-NEXT: vslide1down.vx v8, v8, a3
+; RV32-NEXT: vslide1down.vx v8, v8, a4
+; RV32-NEXT: vslide1down.vx v8, v8, a5
+; RV32-NEXT: vslide1down.vx v8, v8, a6
+; RV32-NEXT: vslide1down.vx v8, v8, a0
+; RV32-NEXT: vslidedown.vi v8, v8, 8
+; RV32-NEXT: ret
+;
+; RV64V-ONLY-LABEL: buildvec_v16i8_undef_high_half:
+; RV64V-ONLY: # %bb.0:
+; RV64V-ONLY-NEXT: lbu a1, 1(a0)
+; RV64V-ONLY-NEXT: lbu a2, 22(a0)
+; RV64V-ONLY-NEXT: lbu a3, 31(a0)
+; RV64V-ONLY-NEXT: lbu a4, 44(a0)
+; RV64V-ONLY-NEXT: vsetivli zero, 16, e8, m1, ta, ma
+; RV64V-ONLY-NEXT: vlse8.v v8, (a0), zero
+; RV64V-ONLY-NEXT: lbu a5, 55(a0)
+; RV64V-ONLY-NEXT: lbu a6, 623(a0)
+; RV64V-ONLY-NEXT: lbu a0, 75(a0)
+; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, a1
+; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, a2
+; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, a3
+; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, a4
+; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, a5
+; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, a6
+; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, a0
+; RV64V-ONLY-NEXT: vslidedown.vi v8, v8, 8
+; RV64V-ONLY-NEXT: ret
+;
+; RVA22U64-LABEL: buildvec_v16i8_undef_high_half:
+; RVA22U64: # %bb.0:
+; RVA22U64-NEXT: lbu a6, 1(a0)
+; RVA22U64-NEXT: lbu a2, 22(a0)
+; RVA22U64-NEXT: lbu a3, 31(a0)
+; RVA22U64-NEXT: lbu a4, 44(a0)
+; RVA22U64-NEXT: vsetivli zero, 16, e8, m1, ta, ma
+; RVA22U64-NEXT: vlse8.v v8, (a0), zero
+; RVA22U64-NEXT: lbu a5, 55(a0)
+; RVA22U64-NEXT: lbu a1, 623(a0)
+; RVA22U64-NEXT: lbu a0, 75(a0)
+; RVA22U64-NEXT: vslide1down.vx v8, v8, a6
+; RVA22U64-NEXT: vslide1down.vx v8, v8, a2
+; RVA22U64-NEXT: vslide1down.vx v8, v8, a3
+; RVA22U64-NEXT: vslide1down.vx v8, v8, a4
+; RVA22U64-NEXT: vslide1down.vx v8, v8, a5
+; RVA22U64-NEXT: vslide1down.vx v8, v8, a1
+; RVA22U64-NEXT: vslide1down.vx v8, v8, a0
+; RVA22U64-NEXT: vslidedown.vi v8, v8, 8
+; RVA22U64-NEXT: ret
+;
+; RV64ZVE32-LABEL: buildvec_v16i8_undef_high_half:
+; RV64ZVE32: # %bb.0:
+; RV64ZVE32-NEXT: lbu a1, 1(a0)
+; RV64ZVE32-NEXT: lbu a2, 22(a0)
+; RV64ZVE32-NEXT: lbu a3, 31(a0)
+; RV64ZVE32-NEXT: lbu a4, 44(a0)
+; RV64ZVE32-NEXT: vsetivli zero, 16, e8, m1, ta, ma
+; RV64ZVE32-NEXT: vlse8.v v8, (a0), zero
+; RV64ZVE32-NEXT: lbu a5, 55(a0)
+; RV64ZVE32-NEXT: lbu a6, 623(a0)
+; RV64ZVE32-NEXT: lbu a0, 75(a0)
+; RV64ZVE32-NEXT: vslide1down.vx v8, v8, a1
+; RV64ZVE32-NEXT: vslide1down.vx v8, v8, a2
+; RV64ZVE32-NEXT: vslide1down.vx v8, v8, a3
+; RV64ZVE32-NEXT: vslide1down.vx v8, v8, a4
+; RV64ZVE32-NEXT: vslide1down.vx v8, v8, a5
+; RV64ZVE32-NEXT: vslide1down.vx v8, v8, a6
+; RV64ZVE32-NEXT: vslide1down.vx v8, v8, a0
+; RV64ZVE32-NEXT: vslidedown.vi v8, v8, 8
+; RV64ZVE32-NEXT: ret
%p2 = getelementptr i8, ptr %p, i32 1
%p3 = getelementptr i8, ptr %p, i32 22
%p4 = getelementptr i8, ptr %p, i32 31
@@ -1468,34 +1841,121 @@ define <16 x i8> @buildvec_v16i8_undef_high_half(ptr %p) {
}
define <16 x i8> @buildvec_v16i8_undef_edges(ptr %p) {
-; CHECK-LABEL: buildvec_v16i8_undef_edges:
-; CHECK: # %bb.0:
-; CHECK-NEXT: addi a1, a0, 31
-; CHECK-NEXT: addi a2, a0, 82
-; CHECK-NEXT: lbu a3, 44(a0)
-; CHECK-NEXT: lbu a4, 55(a0)
-; CHECK-NEXT: lbu a5, 623(a0)
-; CHECK-NEXT: lbu a6, 75(a0)
-; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma
-; CHECK-NEXT: vlse8.v v8, (a1), zero
-; CHECK-NEXT: lbu a1, 93(a0)
-; CHECK-NEXT: lbu a7, 105(a0)
-; CHECK-NEXT: lbu a0, 161(a0)
-; CHECK-NEXT: vslide1down.vx v8, v8, a3
-; CHECK-NEXT: vlse8.v v9, (a2), zero
-; CHECK-NEXT: vslide1down.vx v8, v8, a4
-; CHECK-NEXT: vslide1down.vx v8, v8, a5
-; CHECK-NEXT: vslide1down.vx v10, v8, a6
-; CHECK-NEXT: vslide1down.vx v8, v9, a1
-; CHECK-NEXT: vslide1down.vx v8, v8, a7
-; CHECK-NEXT: vslide1down.vx v8, v8, a0
-; CHECK-NEXT: li a0, 255
-; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma
-; CHECK-NEXT: vmv.s.x v0, a0
-; CHECK-NEXT: vsetvli zero, zero, e8, m1, ta, mu
-; CHECK-NEXT: vslidedown.vi v8, v8, 4
-; CHECK-NEXT: vslidedown.vi v8, v10, 8, v0.t
-; CHECK-NEXT: ret
+; RV32-LABEL: buildvec_v16i8_undef_edges:
+; RV32: # %bb.0:
+; RV32-NEXT: addi a1, a0, 31
+; RV32-NEXT: addi a2, a0, 82
+; RV32-NEXT: lbu a3, 44(a0)
+; RV32-NEXT: lbu a4, 55(a0)
+; RV32-NEXT: lbu a5, 623(a0)
+; RV32-NEXT: lbu a6, 75(a0)
+; RV32-NEXT: vsetivli zero, 16, e8, m1, ta, ma
+; RV32-NEXT: vlse8.v v8, (a1), zero
+; RV32-NEXT: lbu a1, 93(a0)
+; RV32-NEXT: lbu a7, 105(a0)
+; RV32-NEXT: lbu a0, 161(a0)
+; RV32-NEXT: vslide1down.vx v8, v8, a3
+; RV32-NEXT: vlse8.v v9, (a2), zero
+; RV32-NEXT: vslide1down.vx v8, v8, a4
+; RV32-NEXT: vslide1down.vx v8, v8, a5
+; RV32-NEXT: vslide1down.vx v10, v8, a6
+; RV32-NEXT: vslide1down.vx v8, v9, a1
+; RV32-NEXT: vslide1down.vx v8, v8, a7
+; RV32-NEXT: vslide1down.vx v8, v8, a0
+; RV32-NEXT: li a0, 255
+; RV32-NEXT: vsetvli zero, zero, e16, m2, ta, ma
+; RV32-NEXT: vmv.s.x v0, a0
+; RV32-NEXT: vsetvli zero, zero, e8, m1, ta, mu
+; RV32-NEXT: vslidedown.vi v8, v8, 4
+; RV32-NEXT: vslidedown.vi v8, v10, 8, v0.t
+; RV32-NEXT: ret
+;
+; RV64V-ONLY-LABEL: buildvec_v16i8_undef_edges:
+; RV64V-ONLY: # %bb.0:
+; RV64V-ONLY-NEXT: addi a1, a0, 31
+; RV64V-ONLY-NEXT: addi a2, a0, 82
+; RV64V-ONLY-NEXT: lbu a3, 44(a0)
+; RV64V-ONLY-NEXT: lbu a4, 55(a0)
+; RV64V-ONLY-NEXT: lbu a5, 623(a0)
+; RV64V-ONLY-NEXT: lbu a6, 75(a0)
+; RV64V-ONLY-NEXT: vsetivli zero, 16, e8, m1, ta, ma
+; RV64V-ONLY-NEXT: vlse8.v v8, (a1), zero
+; RV64V-ONLY-NEXT: lbu a1, 93(a0)
+; RV64V-ONLY-NEXT: lbu a7, 105(a0)
+; RV64V-ONLY-NEXT: lbu a0, 161(a0)
+; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, a3
+; RV64V-ONLY-NEXT: vlse8.v v9, (a2), zero
+; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, a4
+; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, a5
+; RV64V-ONLY-NEXT: vslide1down.vx v10, v8, a6
+; RV64V-ONLY-NEXT: vslide1down.vx v8, v9, a1
+; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, a7
+; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, a0
+; RV64V-ONLY-NEXT: li a0, 255
+; RV64V-ONLY-NEXT: vsetvli zero, zero, e16, m2, ta, ma
+; RV64V-ONLY-NEXT: vmv.s.x v0, a0
+; RV64V-ONLY-NEXT: vsetvli zero, zero, e8, m1, ta, mu
+; RV64V-ONLY-NEXT: vslidedown.vi v8, v8, 4
+; RV64V-ONLY-NEXT: vslidedown.vi v8, v10, 8, v0.t
+; RV64V-ONLY-NEXT: ret
+;
+; RVA22U64-LABEL: buildvec_v16i8_undef_edges:
+; RVA22U64: # %bb.0:
+; RVA22U64-NEXT: addi a1, a0, 31
+; RVA22U64-NEXT: addi a6, a0, 82
+; RVA22U64-NEXT: lbu a3, 44(a0)
+; RVA22U64-NEXT: lbu a4, 55(a0)
+; RVA22U64-NEXT: lbu a5, 623(a0)
+; RVA22U64-NEXT: lbu a7, 75(a0)
+; RVA22U64-NEXT: vsetivli zero, 16, e8, m1, ta, ma
+; RVA22U64-NEXT: vlse8.v v8, (a1), zero
+; RVA22U64-NEXT: lbu a1, 93(a0)
+; RVA22U64-NEXT: lbu a2, 105(a0)
+; RVA22U64-NEXT: lbu a0, 161(a0)
+; RVA22U64-NEXT: vslide1down.vx v8, v8, a3
+; RVA22U64-NEXT: vlse8.v v9, (a6), zero
+; RVA22U64-NEXT: vslide1down.vx v8, v8, a4
+; RVA22U64-NEXT: vslide1down.vx v8, v8, a5
+; RVA22U64-NEXT: vslide1down.vx v10, v8, a7
+; RVA22U64-NEXT: vslide1down.vx v8, v9, a1
+; RVA22U64-NEXT: vslide1down.vx v8, v8, a2
+; RVA22U64-NEXT: vslide1down.vx v8, v8, a0
+; RVA22U64-NEXT: li a0, 255
+; RVA22U64-NEXT: vsetvli zero, zero, e16, m2, ta, ma
+; RVA22U64-NEXT: vmv.s.x v0, a0
+; RVA22U64-NEXT: vsetvli zero, zero, e8, m1, ta, mu
+; RVA22U64-NEXT: vslidedown.vi v8, v8, 4
+; RVA22U64-NEXT: vslidedown.vi v8, v10, 8, v0.t
+; RVA22U64-NEXT: ret
+;
+; RV64ZVE32-LABEL: buildvec_v16i8_undef_edges:
+; RV64ZVE32: # %bb.0:
+; RV64ZVE32-NEXT: addi a1, a0, 31
+; RV64ZVE32-NEXT: addi a2, a0, 82
+; RV64ZVE32-NEXT: lbu a3, 44(a0)
+; RV64ZVE32-NEXT: lbu a4, 55(a0)
+; RV64ZVE32-NEXT: lbu a5, 623(a0)
+; RV64ZVE32-NEXT: lbu a6, 75(a0)
+; RV64ZVE32-NEXT: vsetivli zero, 16, e8, m1, ta, ma
+; RV64ZVE32-NEXT: vlse8.v v8, (a1), zero
+; RV64ZVE32-NEXT: lbu a1, 93(a0)
+; RV64ZVE32-NEXT: lbu a7, 105(a0)
+; RV64ZVE32-NEXT: lbu a0, 161(a0)
+; RV64ZVE32-NEXT: vslide1down.vx v8, v8, a3
+; RV64ZVE32-NEXT: vlse8.v v9, (a2), zero
+; RV64ZVE32-NEXT: vslide1down.vx v8, v8, a4
+; RV64ZVE32-NEXT: vslide1down.vx v8, v8, a5
+; RV64ZVE32-NEXT: vslide1down.vx v10, v8, a6
+; RV64ZVE32-NEXT: vslide1down.vx v8, v9, a1
+; RV64ZVE32-NEXT: vslide1down.vx v8, v8, a7
+; RV64ZVE32-NEXT: vslide1down.vx v8, v8, a0
+; RV64ZVE32-NEXT: li a0, 255
+; RV64ZVE32-NEXT: vsetvli zero, zero, e16, m2, ta, ma
+; RV64ZVE32-NEXT: vmv.s.x v0, a0
+; RV64ZVE32-NEXT: vsetvli zero, zero, e8, m1, ta, mu
+; RV64ZVE32-NEXT: vslidedown.vi v8, v8, 4
+; RV64ZVE32-NEXT: vslidedown.vi v8, v10, 8, v0.t
+; RV64ZVE32-NEXT: ret
%p4 = getelementptr i8, ptr %p, i32 31
%p5 = getelementptr i8, ptr %p, i32 44
%p6 = getelementptr i8, ptr %p, i32 55
@@ -1529,38 +1989,137 @@ define <16 x i8> @buildvec_v16i8_undef_edges(ptr %p) {
}
define <16 x i8> @buildvec_v16i8_loads_undef_scattered(ptr %p) {
-; CHECK-LABEL: buildvec_v16i8_loads_undef_scattered:
-; CHECK: # %bb.0:
-; CHECK-NEXT: addi a1, a0, 82
-; CHECK-NEXT: lbu a2, 1(a0)
-; CHECK-NEXT: lbu a3, 44(a0)
-; CHECK-NEXT: lbu a4, 55(a0)
-; CHECK-NEXT: lbu a5, 75(a0)
-; CHECK-NEXT: lbu a6, 93(a0)
-; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma
-; CHECK-NEXT: vlse8.v v8, (a0), zero
-; CHECK-NEXT: lbu a7, 124(a0)
-; CHECK-NEXT: lbu t0, 144(a0)
-; CHECK-NEXT: lbu a0, 154(a0)
-; CHECK-NEXT: vslide1down.vx v8, v8, a2
-; CHECK-NEXT: vslidedown.vi v8, v8, 2
-; CHECK-NEXT: vslide1down.vx v8, v8, a3
-; CHECK-NEXT: vlse8.v v9, (a1), zero
-; CHECK-NEXT: vslide1down.vx v8, v8, a4
-; CHECK-NEXT: vslidedown.vi v8, v8, 1
-; CHECK-NEXT: vslide1down.vx v10, v8, a5
-; CHECK-NEXT: vslide1down.vx v8, v9, a6
-; CHECK-NEXT: vslidedown.vi v8, v8, 2
-; CHECK-NEXT: vslide1down.vx v8, v8, a7
-; CHECK-NEXT: vslidedown.vi v8, v8, 1
-; CHECK-NEXT: vslide1down.vx v8, v8, t0
-; CHECK-NEXT: li a1, 255
-; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma
-; CHECK-NEXT: vmv.s.x v0, a1
-; CHECK-NEXT: vsetvli zero, zero, e8, m1, ta, mu
-; CHECK-NEXT: vslide1down.vx v8, v8, a0
-; CHECK-NEXT: vslidedown.vi v8, v10, 8, v0.t
-; CHECK-NEXT: ret
+; RV32-LABEL: buildvec_v16i8_loads_undef_scattered:
+; RV32: # %bb.0:
+; RV32-NEXT: addi a1, a0, 82
+; RV32-NEXT: lbu a2, 1(a0)
+; RV32-NEXT: lbu a3, 44(a0)
+; RV32-NEXT: lbu a4, 55(a0)
+; RV32-NEXT: lbu a5, 75(a0)
+; RV32-NEXT: lbu a6, 93(a0)
+; RV32-NEXT: vsetivli zero, 16, e8, m1, ta, ma
+; RV32-NEXT: vlse8.v v8, (a0), zero
+; RV32-NEXT: lbu a7, 124(a0)
+; RV32-NEXT: lbu t0, 144(a0)
+; RV32-NEXT: lbu a0, 154(a0)
+; RV32-NEXT: vslide1down.vx v8, v8, a2
+; RV32-NEXT: vslidedown.vi v8, v8, 2
+; RV32-NEXT: vslide1down.vx v8, v8, a3
+; RV32-NEXT: vlse8.v v9, (a1), zero
+; RV32-NEXT: vslide1down.vx v8, v8, a4
+; RV32-NEXT: vslidedown.vi v8, v8, 1
+; RV32-NEXT: vslide1down.vx v10, v8, a5
+; RV32-NEXT: vslide1down.vx v8, v9, a6
+; RV32-NEXT: vslidedown.vi v8, v8, 2
+; RV32-NEXT: vslide1down.vx v8, v8, a7
+; RV32-NEXT: vslidedown.vi v8, v8, 1
+; RV32-NEXT: vslide1down.vx v8, v8, t0
+; RV32-NEXT: li a1, 255
+; RV32-NEXT: vsetvli zero, zero, e16, m2, ta, ma
+; RV32-NEXT: vmv.s.x v0, a1
+; RV32-NEXT: vsetvli zero, zero, e8, m1, ta, mu
+; RV32-NEXT: vslide1down.vx v8, v8, a0
+; RV32-NEXT: vslidedown.vi v8, v10, 8, v0.t
+; RV32-NEXT: ret
+;
+; RV64V-ONLY-LABEL: buildvec_v16i8_loads_undef_scattered:
+; RV64V-ONLY: # %bb.0:
+; RV64V-ONLY-NEXT: addi a1, a0, 82
+; RV64V-ONLY-NEXT: lbu a2, 1(a0)
+; RV64V-ONLY-NEXT: lbu a3, 44(a0)
+; RV64V-ONLY-NEXT: lbu a4, 55(a0)
+; RV64V-ONLY-NEXT: lbu a5, 75(a0)
+; RV64V-ONLY-NEXT: lbu a6, 93(a0)
+; RV64V-ONLY-NEXT: vsetivli zero, 16, e8, m1, ta, ma
+; RV64V-ONLY-NEXT: vlse8.v v8, (a0), zero
+; RV64V-ONLY-NEXT: lbu a7, 124(a0)
+; RV64V-ONLY-NEXT: lbu t0, 144(a0)
+; RV64V-ONLY-NEXT: lbu a0, 154(a0)
+; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, a2
+; RV64V-ONLY-NEXT: vslidedown.vi v8, v8, 2
+; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, a3
+; RV64V-ONLY-NEXT: vlse8.v v9, (a1), zero
+; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, a4
+; RV64V-ONLY-NEXT: vslidedown.vi v8, v8, 1
+; RV64V-ONLY-NEXT: vslide1down.vx v10, v8, a5
+; RV64V-ONLY-NEXT: vslide1down.vx v8, v9, a6
+; RV64V-ONLY-NEXT: vslidedown.vi v8, v8, 2
+; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, a7
+; RV64V-ONLY-NEXT: vslidedown.vi v8, v8, 1
+; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, t0
+; RV64V-ONLY-NEXT: li a1, 255
+; RV64V-ONLY-NEXT: vsetvli zero, zero, e16, m2, ta, ma
+; RV64V-ONLY-NEXT: vmv.s.x v0, a1
+; RV64V-ONLY-NEXT: vsetvli zero, zero, e8, m1, ta, mu
+; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, a0
+; RV64V-ONLY-NEXT: vslidedown.vi v8, v10, 8, v0.t
+; RV64V-ONLY-NEXT: ret
+;
+; RVA22U64-LABEL: buildvec_v16i8_loads_undef_scattered:
+; RVA22U64: # %bb.0:
+; RVA22U64-NEXT: addi a6, a0, 82
+; RVA22U64-NEXT: lbu a2, 1(a0)
+; RVA22U64-NEXT: lbu a3, 44(a0)
+; RVA22U64-NEXT: lbu a4, 55(a0)
+; RVA22U64-NEXT: lbu t0, 75(a0)
+; RVA22U64-NEXT: lbu a7, 93(a0)
+; RVA22U64-NEXT: vsetivli zero, 16, e8, m1, ta, ma
+; RVA22U64-NEXT: vlse8.v v8, (a0), zero
+; RVA22U64-NEXT: lbu a1, 124(a0)
+; RVA22U64-NEXT: lbu a5, 144(a0)
+; RVA22U64-NEXT: lbu a0, 154(a0)
+; RVA22U64-NEXT: vslide1down.vx v8, v8, a2
+; RVA22U64-NEXT: vslidedown.vi v8, v8, 2
+; RVA22U64-NEXT: vslide1down.vx v8, v8, a3
+; RVA22U64-NEXT: vlse8.v v9, (a6), zero
+; RVA22U64-NEXT: vslide1down.vx v8, v8, a4
+; RVA22U64-NEXT: vslidedown.vi v8, v8, 1
+; RVA22U64-NEXT: vslide1down.vx v10, v8, t0
+; RVA22U64-NEXT: vslide1down.vx v8, v9, a7
+; RVA22U64-NEXT: vslidedown.vi v8, v8, 2
+; RVA22U64-NEXT: vslide1down.vx v8, v8, a1
+; RVA22U64-NEXT: vslidedown.vi v8, v8, 1
+; RVA22U64-NEXT: vslide1down.vx v8, v8, a5
+; RVA22U64-NEXT: li a1, 255
+; RVA22U64-NEXT: vsetvli zero, zero, e16, m2, ta, ma
+; RVA22U64-NEXT: vmv.s.x v0, a1
+; RVA22U64-NEXT: vsetvli zero, zero, e8, m1, ta, mu
+; RVA22U64-NEXT: vslide1down.vx v8, v8, a0
+; RVA22U64-NEXT: vslidedown.vi v8, v10, 8, v0.t
+; RVA22U64-NEXT: ret
+;
+; RV64ZVE32-LABEL: buildvec_v16i8_loads_undef_scattered:
+; RV64ZVE32: # %bb.0:
+; RV64ZVE32-NEXT: addi a1, a0, 82
+; RV64ZVE32-NEXT: lbu a2, 1(a0)
+; RV64ZVE32-NEXT: lbu a3, 44(a0)
+; RV64ZVE32-NEXT: lbu a4, 55(a0)
+; RV64ZVE32-NEXT: lbu a5, 75(a0)
+; RV64ZVE32-NEXT: lbu a6, 93(a0)
+; RV64ZVE32-NEXT: vsetivli zero, 16, e8, m1, ta, ma
+; RV64ZVE32-NEXT: vlse8.v v8, (a0), zero
+; RV64ZVE32-NEXT: lbu a7, 124(a0)
+; RV64ZVE32-NEXT: lbu t0, 144(a0)
+; RV64ZVE32-NEXT: lbu a0, 154(a0)
+; RV64ZVE32-NEXT: vslide1down.vx v8, v8, a2
+; RV64ZVE32-NEXT: vslidedown.vi v8, v8, 2
+; RV64ZVE32-NEXT: vslide1down.vx v8, v8, a3
+; RV64ZVE32-NEXT: vlse8.v v9, (a1), zero
+; RV64ZVE32-NEXT: vslide1down.vx v8, v8, a4
+; RV64ZVE32-NEXT: vslidedown.vi v8, v8, 1
+; RV64ZVE32-NEXT: vslide1down.vx v10, v8, a5
+; RV64ZVE32-NEXT: vslide1down.vx v8, v9, a6
+; RV64ZVE32-NEXT: vslidedown.vi v8, v8, 2
+; RV64ZVE32-NEXT: vslide1down.vx v8, v8, a7
+; RV64ZVE32-NEXT: vslidedown.vi v8, v8, 1
+; RV64ZVE32-NEXT: vslide1down.vx v8, v8, t0
+; RV64ZVE32-NEXT: li a1, 255
+; RV64ZVE32-NEXT: vsetvli zero, zero, e16, m2, ta, ma
+; RV64ZVE32-NEXT: vmv.s.x v0, a1
+; RV64ZVE32-NEXT: vsetvli zero, zero, e8, m1, ta, mu
+; RV64ZVE32-NEXT: vslide1down.vx v8, v8, a0
+; RV64ZVE32-NEXT: vslidedown.vi v8, v10, 8, v0.t
+; RV64ZVE32-NEXT: ret
%p2 = getelementptr i8, ptr %p, i32 1
%p3 = getelementptr i8, ptr %p, i32 22
%p4 = getelementptr i8, ptr %p, i32 31
More information about the llvm-commits
mailing list