[llvm] 5948d4d - [RISCV] Add test coverage for buildvectors with long vslidedown sequences
Philip Reames via llvm-commits
llvm-commits at lists.llvm.org
Fri Feb 9 11:27:13 PST 2024
Author: Philip Reames
Date: 2024-02-09T11:10:35-08:00
New Revision: 5948d4de1d965d88c8ca05cc84bd94a28fa53ba4
URL: https://github.com/llvm/llvm-project/commit/5948d4de1d965d88c8ca05cc84bd94a28fa53ba4
DIFF: https://github.com/llvm/llvm-project/commit/5948d4de1d965d88c8ca05cc84bd94a28fa53ba4.diff
LOG: [RISCV] Add test coverage for buildvectors with long vslidedown sequences
In advance of an upcoming change.
Added:
Modified:
llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll
llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll
Removed:
################################################################################
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll
index 57b219343c3e38..a2bd862e2ce14f 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll
@@ -1394,3 +1394,77 @@ define <2 x double> @vid_step2_v2f64() {
; CHECK-NEXT: ret
ret <2 x double> <double 0.0, double 2.0>
}
+
+
+define <8 x float> @buildvec_v8f32_zvl256(float %e0, float %e1, float %e2, float %e3, float %e4, float %e5, float %e6, float %e7) vscale_range(4, 128) {
+; CHECK-LABEL: buildvec_v8f32_zvl256:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 8, e32, m1, ta, ma
+; CHECK-NEXT: vfmv.v.f v8, fa0
+; CHECK-NEXT: vfslide1down.vf v8, v8, fa1
+; CHECK-NEXT: vfslide1down.vf v8, v8, fa2
+; CHECK-NEXT: vfslide1down.vf v8, v8, fa3
+; CHECK-NEXT: vfslide1down.vf v8, v8, fa4
+; CHECK-NEXT: vfslide1down.vf v8, v8, fa5
+; CHECK-NEXT: vfslide1down.vf v8, v8, fa6
+; CHECK-NEXT: vfslide1down.vf v8, v8, fa7
+; CHECK-NEXT: ret
+ %v0 = insertelement <8 x float> poison, float %e0, i64 0
+ %v1 = insertelement <8 x float> %v0, float %e1, i64 1
+ %v2 = insertelement <8 x float> %v1, float %e2, i64 2
+ %v3 = insertelement <8 x float> %v2, float %e3, i64 3
+ %v4 = insertelement <8 x float> %v3, float %e4, i64 4
+ %v5 = insertelement <8 x float> %v4, float %e5, i64 5
+ %v6 = insertelement <8 x float> %v5, float %e6, i64 6
+ %v7 = insertelement <8 x float> %v6, float %e7, i64 7
+ ret <8 x float> %v7
+}
+
+
+define <8 x double> @buildvec_v8f64_zvl256(double %e0, double %e1, double %e2, double %e3, double %e4, double %e5, double %e6, double %e7) vscale_range(4, 128) {
+; CHECK-LABEL: buildvec_v8f64_zvl256:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 8, e64, m2, ta, ma
+; CHECK-NEXT: vfmv.v.f v8, fa0
+; CHECK-NEXT: vfslide1down.vf v8, v8, fa1
+; CHECK-NEXT: vfslide1down.vf v8, v8, fa2
+; CHECK-NEXT: vfslide1down.vf v8, v8, fa3
+; CHECK-NEXT: vfslide1down.vf v8, v8, fa4
+; CHECK-NEXT: vfslide1down.vf v8, v8, fa5
+; CHECK-NEXT: vfslide1down.vf v8, v8, fa6
+; CHECK-NEXT: vfslide1down.vf v8, v8, fa7
+; CHECK-NEXT: ret
+ %v0 = insertelement <8 x double> poison, double %e0, i64 0
+ %v1 = insertelement <8 x double> %v0, double %e1, i64 1
+ %v2 = insertelement <8 x double> %v1, double %e2, i64 2
+ %v3 = insertelement <8 x double> %v2, double %e3, i64 3
+ %v4 = insertelement <8 x double> %v3, double %e4, i64 4
+ %v5 = insertelement <8 x double> %v4, double %e5, i64 5
+ %v6 = insertelement <8 x double> %v5, double %e6, i64 6
+ %v7 = insertelement <8 x double> %v6, double %e7, i64 7
+ ret <8 x double> %v7
+}
+
+define <8 x double> @buildvec_v8f64_zvl512(double %e0, double %e1, double %e2, double %e3, double %e4, double %e5, double %e6, double %e7) vscale_range(8, 128) {
+; CHECK-LABEL: buildvec_v8f64_zvl512:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 8, e64, m1, ta, ma
+; CHECK-NEXT: vfmv.v.f v8, fa0
+; CHECK-NEXT: vfslide1down.vf v8, v8, fa1
+; CHECK-NEXT: vfslide1down.vf v8, v8, fa2
+; CHECK-NEXT: vfslide1down.vf v8, v8, fa3
+; CHECK-NEXT: vfslide1down.vf v8, v8, fa4
+; CHECK-NEXT: vfslide1down.vf v8, v8, fa5
+; CHECK-NEXT: vfslide1down.vf v8, v8, fa6
+; CHECK-NEXT: vfslide1down.vf v8, v8, fa7
+; CHECK-NEXT: ret
+ %v0 = insertelement <8 x double> poison, double %e0, i64 0
+ %v1 = insertelement <8 x double> %v0, double %e1, i64 1
+ %v2 = insertelement <8 x double> %v1, double %e2, i64 2
+ %v3 = insertelement <8 x double> %v2, double %e3, i64 3
+ %v4 = insertelement <8 x double> %v3, double %e4, i64 4
+ %v5 = insertelement <8 x double> %v4, double %e5, i64 5
+ %v6 = insertelement <8 x double> %v5, double %e6, i64 6
+ %v7 = insertelement <8 x double> %v6, double %e7, i64 7
+ ret <8 x double> %v7
+}
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll
index dfafbfb97284cd..e691e635811544 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll
@@ -1178,3 +1178,512 @@ define <8 x i64> @v8xi64_exact_undef_prefix(i64 %a, i64 %b, i64 %c, i64 %d) vsca
%v4 = insertelement <8 x i64> %v3, i64 %d, i32 7
ret <8 x i64> %v4
}
+
+
+define <16 x i8> @buildvec_v16i8_loads_contigous(ptr %p) {
+; RV32-LABEL: buildvec_v16i8_loads_contigous:
+; RV32: # %bb.0:
+; RV32-NEXT: addi sp, sp, -16
+; RV32-NEXT: .cfi_def_cfa_offset 16
+; RV32-NEXT: sw s0, 12(sp) # 4-byte Folded Spill
+; RV32-NEXT: .cfi_offset s0, -4
+; RV32-NEXT: lbu a1, 1(a0)
+; RV32-NEXT: lbu a2, 2(a0)
+; RV32-NEXT: lbu a3, 3(a0)
+; RV32-NEXT: lbu a4, 4(a0)
+; RV32-NEXT: lbu a5, 5(a0)
+; RV32-NEXT: lbu a6, 6(a0)
+; RV32-NEXT: lbu a7, 7(a0)
+; RV32-NEXT: lbu t0, 8(a0)
+; RV32-NEXT: lbu t1, 9(a0)
+; RV32-NEXT: lbu t2, 10(a0)
+; RV32-NEXT: lbu t3, 11(a0)
+; RV32-NEXT: lbu t4, 12(a0)
+; RV32-NEXT: lbu t5, 13(a0)
+; RV32-NEXT: lbu t6, 14(a0)
+; RV32-NEXT: lbu s0, 15(a0)
+; RV32-NEXT: vsetivli zero, 16, e8, m1, ta, ma
+; RV32-NEXT: vlse8.v v8, (a0), zero
+; RV32-NEXT: vslide1down.vx v8, v8, a1
+; RV32-NEXT: vslide1down.vx v8, v8, a2
+; RV32-NEXT: vslide1down.vx v8, v8, a3
+; RV32-NEXT: vslide1down.vx v8, v8, a4
+; RV32-NEXT: vslide1down.vx v8, v8, a5
+; RV32-NEXT: vslide1down.vx v8, v8, a6
+; RV32-NEXT: vslide1down.vx v8, v8, a7
+; RV32-NEXT: vslide1down.vx v8, v8, t0
+; RV32-NEXT: vslide1down.vx v8, v8, t1
+; RV32-NEXT: vslide1down.vx v8, v8, t2
+; RV32-NEXT: vslide1down.vx v8, v8, t3
+; RV32-NEXT: vslide1down.vx v8, v8, t4
+; RV32-NEXT: vslide1down.vx v8, v8, t5
+; RV32-NEXT: vslide1down.vx v8, v8, t6
+; RV32-NEXT: vslide1down.vx v8, v8, s0
+; RV32-NEXT: lw s0, 12(sp) # 4-byte Folded Reload
+; RV32-NEXT: addi sp, sp, 16
+; RV32-NEXT: ret
+;
+; RV64-LABEL: buildvec_v16i8_loads_contigous:
+; RV64: # %bb.0:
+; RV64-NEXT: addi sp, sp, -16
+; RV64-NEXT: .cfi_def_cfa_offset 16
+; RV64-NEXT: sd s0, 8(sp) # 8-byte Folded Spill
+; RV64-NEXT: .cfi_offset s0, -8
+; RV64-NEXT: lbu a1, 1(a0)
+; RV64-NEXT: lbu a2, 2(a0)
+; RV64-NEXT: lbu a3, 3(a0)
+; RV64-NEXT: lbu a4, 4(a0)
+; RV64-NEXT: lbu a5, 5(a0)
+; RV64-NEXT: lbu a6, 6(a0)
+; RV64-NEXT: lbu a7, 7(a0)
+; RV64-NEXT: lbu t0, 8(a0)
+; RV64-NEXT: lbu t1, 9(a0)
+; RV64-NEXT: lbu t2, 10(a0)
+; RV64-NEXT: lbu t3, 11(a0)
+; RV64-NEXT: lbu t4, 12(a0)
+; RV64-NEXT: lbu t5, 13(a0)
+; RV64-NEXT: lbu t6, 14(a0)
+; RV64-NEXT: lbu s0, 15(a0)
+; RV64-NEXT: vsetivli zero, 16, e8, m1, ta, ma
+; RV64-NEXT: vlse8.v v8, (a0), zero
+; RV64-NEXT: vslide1down.vx v8, v8, a1
+; RV64-NEXT: vslide1down.vx v8, v8, a2
+; RV64-NEXT: vslide1down.vx v8, v8, a3
+; RV64-NEXT: vslide1down.vx v8, v8, a4
+; RV64-NEXT: vslide1down.vx v8, v8, a5
+; RV64-NEXT: vslide1down.vx v8, v8, a6
+; RV64-NEXT: vslide1down.vx v8, v8, a7
+; RV64-NEXT: vslide1down.vx v8, v8, t0
+; RV64-NEXT: vslide1down.vx v8, v8, t1
+; RV64-NEXT: vslide1down.vx v8, v8, t2
+; RV64-NEXT: vslide1down.vx v8, v8, t3
+; RV64-NEXT: vslide1down.vx v8, v8, t4
+; RV64-NEXT: vslide1down.vx v8, v8, t5
+; RV64-NEXT: vslide1down.vx v8, v8, t6
+; RV64-NEXT: vslide1down.vx v8, v8, s0
+; RV64-NEXT: ld s0, 8(sp) # 8-byte Folded Reload
+; RV64-NEXT: addi sp, sp, 16
+; RV64-NEXT: ret
+ %p2 = getelementptr i8, ptr %p, i32 1
+ %p3 = getelementptr i8, ptr %p, i32 2
+ %p4 = getelementptr i8, ptr %p, i32 3
+ %p5 = getelementptr i8, ptr %p, i32 4
+ %p6 = getelementptr i8, ptr %p, i32 5
+ %p7 = getelementptr i8, ptr %p, i32 6
+ %p8 = getelementptr i8, ptr %p, i32 7
+ %p9 = getelementptr i8, ptr %p, i32 8
+ %p10 = getelementptr i8, ptr %p, i32 9
+ %p11 = getelementptr i8, ptr %p, i32 10
+ %p12 = getelementptr i8, ptr %p, i32 11
+ %p13 = getelementptr i8, ptr %p, i32 12
+ %p14 = getelementptr i8, ptr %p, i32 13
+ %p15 = getelementptr i8, ptr %p, i32 14
+ %p16 = getelementptr i8, ptr %p, i32 15
+
+ %ld1 = load i8, ptr %p
+ %ld2 = load i8, ptr %p2
+ %ld3 = load i8, ptr %p3
+ %ld4 = load i8, ptr %p4
+ %ld5 = load i8, ptr %p5
+ %ld6 = load i8, ptr %p6
+ %ld7 = load i8, ptr %p7
+ %ld8 = load i8, ptr %p8
+ %ld9 = load i8, ptr %p9
+ %ld10 = load i8, ptr %p10
+ %ld11 = load i8, ptr %p11
+ %ld12 = load i8, ptr %p12
+ %ld13 = load i8, ptr %p13
+ %ld14 = load i8, ptr %p14
+ %ld15 = load i8, ptr %p15
+ %ld16 = load i8, ptr %p16
+
+ %v1 = insertelement <16 x i8> poison, i8 %ld1, i32 0
+ %v2 = insertelement <16 x i8> %v1, i8 %ld2, i32 1
+ %v3 = insertelement <16 x i8> %v2, i8 %ld3, i32 2
+ %v4 = insertelement <16 x i8> %v3, i8 %ld4, i32 3
+ %v5 = insertelement <16 x i8> %v4, i8 %ld5, i32 4
+ %v6 = insertelement <16 x i8> %v5, i8 %ld6, i32 5
+ %v7 = insertelement <16 x i8> %v6, i8 %ld7, i32 6
+ %v8 = insertelement <16 x i8> %v7, i8 %ld8, i32 7
+ %v9 = insertelement <16 x i8> %v8, i8 %ld9, i32 8
+ %v10 = insertelement <16 x i8> %v9, i8 %ld10, i32 9
+ %v11 = insertelement <16 x i8> %v10, i8 %ld11, i32 10
+ %v12 = insertelement <16 x i8> %v11, i8 %ld12, i32 11
+ %v13 = insertelement <16 x i8> %v12, i8 %ld13, i32 12
+ %v14 = insertelement <16 x i8> %v13, i8 %ld14, i32 13
+ %v15 = insertelement <16 x i8> %v14, i8 %ld15, i32 14
+ %v16 = insertelement <16 x i8> %v15, i8 %ld16, i32 15
+ ret <16 x i8> %v16
+}
+
+
+define <16 x i8> @buildvec_v16i8_loads_gather(ptr %p) {
+; RV32-LABEL: buildvec_v16i8_loads_gather:
+; RV32: # %bb.0:
+; RV32-NEXT: addi sp, sp, -16
+; RV32-NEXT: .cfi_def_cfa_offset 16
+; RV32-NEXT: sw s0, 12(sp) # 4-byte Folded Spill
+; RV32-NEXT: .cfi_offset s0, -4
+; RV32-NEXT: lbu a1, 1(a0)
+; RV32-NEXT: lbu a2, 22(a0)
+; RV32-NEXT: lbu a3, 31(a0)
+; RV32-NEXT: lbu a4, 44(a0)
+; RV32-NEXT: lbu a5, 55(a0)
+; RV32-NEXT: lbu a6, 623(a0)
+; RV32-NEXT: lbu a7, 75(a0)
+; RV32-NEXT: lbu t0, 82(a0)
+; RV32-NEXT: lbu t1, 93(a0)
+; RV32-NEXT: lbu t2, 105(a0)
+; RV32-NEXT: lbu t3, 161(a0)
+; RV32-NEXT: lbu t4, 124(a0)
+; RV32-NEXT: lbu t5, 163(a0)
+; RV32-NEXT: lbu t6, 144(a0)
+; RV32-NEXT: lbu s0, 154(a0)
+; RV32-NEXT: vsetivli zero, 16, e8, m1, ta, ma
+; RV32-NEXT: vlse8.v v8, (a0), zero
+; RV32-NEXT: vslide1down.vx v8, v8, a1
+; RV32-NEXT: vslide1down.vx v8, v8, a2
+; RV32-NEXT: vslide1down.vx v8, v8, a3
+; RV32-NEXT: vslide1down.vx v8, v8, a4
+; RV32-NEXT: vslide1down.vx v8, v8, a5
+; RV32-NEXT: vslide1down.vx v8, v8, a6
+; RV32-NEXT: vslide1down.vx v8, v8, a7
+; RV32-NEXT: vslide1down.vx v8, v8, t0
+; RV32-NEXT: vslide1down.vx v8, v8, t1
+; RV32-NEXT: vslide1down.vx v8, v8, t2
+; RV32-NEXT: vslide1down.vx v8, v8, t3
+; RV32-NEXT: vslide1down.vx v8, v8, t4
+; RV32-NEXT: vslide1down.vx v8, v8, t5
+; RV32-NEXT: vslide1down.vx v8, v8, t6
+; RV32-NEXT: vslide1down.vx v8, v8, s0
+; RV32-NEXT: lw s0, 12(sp) # 4-byte Folded Reload
+; RV32-NEXT: addi sp, sp, 16
+; RV32-NEXT: ret
+;
+; RV64-LABEL: buildvec_v16i8_loads_gather:
+; RV64: # %bb.0:
+; RV64-NEXT: addi sp, sp, -16
+; RV64-NEXT: .cfi_def_cfa_offset 16
+; RV64-NEXT: sd s0, 8(sp) # 8-byte Folded Spill
+; RV64-NEXT: .cfi_offset s0, -8
+; RV64-NEXT: lbu a1, 1(a0)
+; RV64-NEXT: lbu a2, 22(a0)
+; RV64-NEXT: lbu a3, 31(a0)
+; RV64-NEXT: lbu a4, 44(a0)
+; RV64-NEXT: lbu a5, 55(a0)
+; RV64-NEXT: lbu a6, 623(a0)
+; RV64-NEXT: lbu a7, 75(a0)
+; RV64-NEXT: lbu t0, 82(a0)
+; RV64-NEXT: lbu t1, 93(a0)
+; RV64-NEXT: lbu t2, 105(a0)
+; RV64-NEXT: lbu t3, 161(a0)
+; RV64-NEXT: lbu t4, 124(a0)
+; RV64-NEXT: lbu t5, 163(a0)
+; RV64-NEXT: lbu t6, 144(a0)
+; RV64-NEXT: lbu s0, 154(a0)
+; RV64-NEXT: vsetivli zero, 16, e8, m1, ta, ma
+; RV64-NEXT: vlse8.v v8, (a0), zero
+; RV64-NEXT: vslide1down.vx v8, v8, a1
+; RV64-NEXT: vslide1down.vx v8, v8, a2
+; RV64-NEXT: vslide1down.vx v8, v8, a3
+; RV64-NEXT: vslide1down.vx v8, v8, a4
+; RV64-NEXT: vslide1down.vx v8, v8, a5
+; RV64-NEXT: vslide1down.vx v8, v8, a6
+; RV64-NEXT: vslide1down.vx v8, v8, a7
+; RV64-NEXT: vslide1down.vx v8, v8, t0
+; RV64-NEXT: vslide1down.vx v8, v8, t1
+; RV64-NEXT: vslide1down.vx v8, v8, t2
+; RV64-NEXT: vslide1down.vx v8, v8, t3
+; RV64-NEXT: vslide1down.vx v8, v8, t4
+; RV64-NEXT: vslide1down.vx v8, v8, t5
+; RV64-NEXT: vslide1down.vx v8, v8, t6
+; RV64-NEXT: vslide1down.vx v8, v8, s0
+; RV64-NEXT: ld s0, 8(sp) # 8-byte Folded Reload
+; RV64-NEXT: addi sp, sp, 16
+; RV64-NEXT: ret
+ %p2 = getelementptr i8, ptr %p, i32 1
+ %p3 = getelementptr i8, ptr %p, i32 22
+ %p4 = getelementptr i8, ptr %p, i32 31
+ %p5 = getelementptr i8, ptr %p, i32 44
+ %p6 = getelementptr i8, ptr %p, i32 55
+ %p7 = getelementptr i8, ptr %p, i32 623
+ %p8 = getelementptr i8, ptr %p, i32 75
+ %p9 = getelementptr i8, ptr %p, i32 82
+ %p10 = getelementptr i8, ptr %p, i32 93
+ %p11 = getelementptr i8, ptr %p, i32 105
+ %p12 = getelementptr i8, ptr %p, i32 161
+ %p13 = getelementptr i8, ptr %p, i32 124
+ %p14 = getelementptr i8, ptr %p, i32 163
+ %p15 = getelementptr i8, ptr %p, i32 144
+ %p16 = getelementptr i8, ptr %p, i32 154
+
+ %ld1 = load i8, ptr %p
+ %ld2 = load i8, ptr %p2
+ %ld3 = load i8, ptr %p3
+ %ld4 = load i8, ptr %p4
+ %ld5 = load i8, ptr %p5
+ %ld6 = load i8, ptr %p6
+ %ld7 = load i8, ptr %p7
+ %ld8 = load i8, ptr %p8
+ %ld9 = load i8, ptr %p9
+ %ld10 = load i8, ptr %p10
+ %ld11 = load i8, ptr %p11
+ %ld12 = load i8, ptr %p12
+ %ld13 = load i8, ptr %p13
+ %ld14 = load i8, ptr %p14
+ %ld15 = load i8, ptr %p15
+ %ld16 = load i8, ptr %p16
+
+ %v1 = insertelement <16 x i8> poison, i8 %ld1, i32 0
+ %v2 = insertelement <16 x i8> %v1, i8 %ld2, i32 1
+ %v3 = insertelement <16 x i8> %v2, i8 %ld3, i32 2
+ %v4 = insertelement <16 x i8> %v3, i8 %ld4, i32 3
+ %v5 = insertelement <16 x i8> %v4, i8 %ld5, i32 4
+ %v6 = insertelement <16 x i8> %v5, i8 %ld6, i32 5
+ %v7 = insertelement <16 x i8> %v6, i8 %ld7, i32 6
+ %v8 = insertelement <16 x i8> %v7, i8 %ld8, i32 7
+ %v9 = insertelement <16 x i8> %v8, i8 %ld9, i32 8
+ %v10 = insertelement <16 x i8> %v9, i8 %ld10, i32 9
+ %v11 = insertelement <16 x i8> %v10, i8 %ld11, i32 10
+ %v12 = insertelement <16 x i8> %v11, i8 %ld12, i32 11
+ %v13 = insertelement <16 x i8> %v12, i8 %ld13, i32 12
+ %v14 = insertelement <16 x i8> %v13, i8 %ld14, i32 13
+ %v15 = insertelement <16 x i8> %v14, i8 %ld15, i32 14
+ %v16 = insertelement <16 x i8> %v15, i8 %ld16, i32 15
+ ret <16 x i8> %v16
+}
+
+define <16 x i8> @buildvec_v16i8_undef_low_half(ptr %p) {
+; CHECK-LABEL: buildvec_v16i8_undef_low_half:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi a1, a0, 82
+; CHECK-NEXT: lbu a2, 93(a0)
+; CHECK-NEXT: lbu a3, 105(a0)
+; CHECK-NEXT: lbu a4, 161(a0)
+; CHECK-NEXT: lbu a5, 124(a0)
+; CHECK-NEXT: lbu a6, 163(a0)
+; CHECK-NEXT: lbu a7, 144(a0)
+; CHECK-NEXT: lbu a0, 154(a0)
+; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma
+; CHECK-NEXT: vlse8.v v8, (a1), zero
+; CHECK-NEXT: vslide1down.vx v8, v8, a2
+; CHECK-NEXT: vslide1down.vx v8, v8, a3
+; CHECK-NEXT: vslide1down.vx v8, v8, a4
+; CHECK-NEXT: vslide1down.vx v8, v8, a5
+; CHECK-NEXT: vslide1down.vx v8, v8, a6
+; CHECK-NEXT: vslide1down.vx v8, v8, a7
+; CHECK-NEXT: vslide1down.vx v8, v8, a0
+; CHECK-NEXT: ret
+ %p9 = getelementptr i8, ptr %p, i32 82
+ %p10 = getelementptr i8, ptr %p, i32 93
+ %p11 = getelementptr i8, ptr %p, i32 105
+ %p12 = getelementptr i8, ptr %p, i32 161
+ %p13 = getelementptr i8, ptr %p, i32 124
+ %p14 = getelementptr i8, ptr %p, i32 163
+ %p15 = getelementptr i8, ptr %p, i32 144
+ %p16 = getelementptr i8, ptr %p, i32 154
+
+ %ld9 = load i8, ptr %p9
+ %ld10 = load i8, ptr %p10
+ %ld11 = load i8, ptr %p11
+ %ld12 = load i8, ptr %p12
+ %ld13 = load i8, ptr %p13
+ %ld14 = load i8, ptr %p14
+ %ld15 = load i8, ptr %p15
+ %ld16 = load i8, ptr %p16
+
+ %v9 = insertelement <16 x i8> poison, i8 %ld9, i32 8
+ %v10 = insertelement <16 x i8> %v9, i8 %ld10, i32 9
+ %v11 = insertelement <16 x i8> %v10, i8 %ld11, i32 10
+ %v12 = insertelement <16 x i8> %v11, i8 %ld12, i32 11
+ %v13 = insertelement <16 x i8> %v12, i8 %ld13, i32 12
+ %v14 = insertelement <16 x i8> %v13, i8 %ld14, i32 13
+ %v15 = insertelement <16 x i8> %v14, i8 %ld15, i32 14
+ %v16 = insertelement <16 x i8> %v15, i8 %ld16, i32 15
+ ret <16 x i8> %v16
+}
+
+define <16 x i8> @buildvec_v16i8_undef_high_half(ptr %p) {
+; CHECK-LABEL: buildvec_v16i8_undef_high_half:
+; CHECK: # %bb.0:
+; CHECK-NEXT: lbu a1, 1(a0)
+; CHECK-NEXT: lbu a2, 22(a0)
+; CHECK-NEXT: lbu a3, 31(a0)
+; CHECK-NEXT: lbu a4, 44(a0)
+; CHECK-NEXT: lbu a5, 55(a0)
+; CHECK-NEXT: lbu a6, 623(a0)
+; CHECK-NEXT: lbu a7, 75(a0)
+; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma
+; CHECK-NEXT: vlse8.v v8, (a0), zero
+; CHECK-NEXT: vslide1down.vx v8, v8, a1
+; CHECK-NEXT: vslide1down.vx v8, v8, a2
+; CHECK-NEXT: vslide1down.vx v8, v8, a3
+; CHECK-NEXT: vslide1down.vx v8, v8, a4
+; CHECK-NEXT: vslide1down.vx v8, v8, a5
+; CHECK-NEXT: vslide1down.vx v8, v8, a6
+; CHECK-NEXT: vslide1down.vx v8, v8, a7
+; CHECK-NEXT: vslidedown.vi v8, v8, 8
+; CHECK-NEXT: ret
+ %p2 = getelementptr i8, ptr %p, i32 1
+ %p3 = getelementptr i8, ptr %p, i32 22
+ %p4 = getelementptr i8, ptr %p, i32 31
+ %p5 = getelementptr i8, ptr %p, i32 44
+ %p6 = getelementptr i8, ptr %p, i32 55
+ %p7 = getelementptr i8, ptr %p, i32 623
+ %p8 = getelementptr i8, ptr %p, i32 75
+
+ %ld1 = load i8, ptr %p
+ %ld2 = load i8, ptr %p2
+ %ld3 = load i8, ptr %p3
+ %ld4 = load i8, ptr %p4
+ %ld5 = load i8, ptr %p5
+ %ld6 = load i8, ptr %p6
+ %ld7 = load i8, ptr %p7
+ %ld8 = load i8, ptr %p8
+
+ %v1 = insertelement <16 x i8> poison, i8 %ld1, i32 0
+ %v2 = insertelement <16 x i8> %v1, i8 %ld2, i32 1
+ %v3 = insertelement <16 x i8> %v2, i8 %ld3, i32 2
+ %v4 = insertelement <16 x i8> %v3, i8 %ld4, i32 3
+ %v5 = insertelement <16 x i8> %v4, i8 %ld5, i32 4
+ %v6 = insertelement <16 x i8> %v5, i8 %ld6, i32 5
+ %v7 = insertelement <16 x i8> %v6, i8 %ld7, i32 6
+ %v8 = insertelement <16 x i8> %v7, i8 %ld8, i32 7
+ ret <16 x i8> %v8
+}
+
+define <16 x i8> @buildvec_v16i8_undef_edges(ptr %p) {
+; CHECK-LABEL: buildvec_v16i8_undef_edges:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi a1, a0, 31
+; CHECK-NEXT: lbu a2, 44(a0)
+; CHECK-NEXT: lbu a3, 55(a0)
+; CHECK-NEXT: lbu a4, 623(a0)
+; CHECK-NEXT: lbu a5, 75(a0)
+; CHECK-NEXT: lbu a6, 82(a0)
+; CHECK-NEXT: lbu a7, 93(a0)
+; CHECK-NEXT: lbu t0, 105(a0)
+; CHECK-NEXT: lbu a0, 161(a0)
+; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma
+; CHECK-NEXT: vlse8.v v8, (a1), zero
+; CHECK-NEXT: vslide1down.vx v8, v8, a2
+; CHECK-NEXT: vslide1down.vx v8, v8, a3
+; CHECK-NEXT: vslide1down.vx v8, v8, a4
+; CHECK-NEXT: vslide1down.vx v8, v8, a5
+; CHECK-NEXT: vslide1down.vx v8, v8, a6
+; CHECK-NEXT: vslide1down.vx v8, v8, a7
+; CHECK-NEXT: vslide1down.vx v8, v8, t0
+; CHECK-NEXT: vslide1down.vx v8, v8, a0
+; CHECK-NEXT: vslidedown.vi v8, v8, 4
+; CHECK-NEXT: ret
+ %p4 = getelementptr i8, ptr %p, i32 31
+ %p5 = getelementptr i8, ptr %p, i32 44
+ %p6 = getelementptr i8, ptr %p, i32 55
+ %p7 = getelementptr i8, ptr %p, i32 623
+ %p8 = getelementptr i8, ptr %p, i32 75
+ %p9 = getelementptr i8, ptr %p, i32 82
+ %p10 = getelementptr i8, ptr %p, i32 93
+ %p11 = getelementptr i8, ptr %p, i32 105
+ %p12 = getelementptr i8, ptr %p, i32 161
+
+ %ld4 = load i8, ptr %p4
+ %ld5 = load i8, ptr %p5
+ %ld6 = load i8, ptr %p6
+ %ld7 = load i8, ptr %p7
+ %ld8 = load i8, ptr %p8
+ %ld9 = load i8, ptr %p9
+ %ld10 = load i8, ptr %p10
+ %ld11 = load i8, ptr %p11
+ %ld12 = load i8, ptr %p12
+
+ %v4 = insertelement <16 x i8> poison, i8 %ld4, i32 3
+ %v5 = insertelement <16 x i8> %v4, i8 %ld5, i32 4
+ %v6 = insertelement <16 x i8> %v5, i8 %ld6, i32 5
+ %v7 = insertelement <16 x i8> %v6, i8 %ld7, i32 6
+ %v8 = insertelement <16 x i8> %v7, i8 %ld8, i32 7
+ %v9 = insertelement <16 x i8> %v8, i8 %ld9, i32 8
+ %v10 = insertelement <16 x i8> %v9, i8 %ld10, i32 9
+ %v11 = insertelement <16 x i8> %v10, i8 %ld11, i32 10
+ %v12 = insertelement <16 x i8> %v11, i8 %ld12, i32 11
+ ret <16 x i8> %v12
+}
+
+define <16 x i8> @buildvec_v16i8_loads_undef_scattered(ptr %p) {
+; CHECK-LABEL: buildvec_v16i8_loads_undef_scattered:
+; CHECK: # %bb.0:
+; CHECK-NEXT: lbu a1, 1(a0)
+; CHECK-NEXT: lbu a2, 44(a0)
+; CHECK-NEXT: lbu a3, 55(a0)
+; CHECK-NEXT: lbu a4, 75(a0)
+; CHECK-NEXT: lbu a5, 82(a0)
+; CHECK-NEXT: lbu a6, 93(a0)
+; CHECK-NEXT: lbu a7, 124(a0)
+; CHECK-NEXT: lbu t0, 144(a0)
+; CHECK-NEXT: lbu t1, 154(a0)
+; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma
+; CHECK-NEXT: vlse8.v v8, (a0), zero
+; CHECK-NEXT: vslide1down.vx v8, v8, a1
+; CHECK-NEXT: vslidedown.vi v8, v8, 2
+; CHECK-NEXT: vslide1down.vx v8, v8, a2
+; CHECK-NEXT: vslide1down.vx v8, v8, a3
+; CHECK-NEXT: vslidedown.vi v8, v8, 1
+; CHECK-NEXT: vslide1down.vx v8, v8, a4
+; CHECK-NEXT: vslide1down.vx v8, v8, a5
+; CHECK-NEXT: vslide1down.vx v8, v8, a6
+; CHECK-NEXT: vslidedown.vi v8, v8, 2
+; CHECK-NEXT: vslide1down.vx v8, v8, a7
+; CHECK-NEXT: vslidedown.vi v8, v8, 1
+; CHECK-NEXT: vslide1down.vx v8, v8, t0
+; CHECK-NEXT: vslide1down.vx v8, v8, t1
+; CHECK-NEXT: ret
+ %p2 = getelementptr i8, ptr %p, i32 1
+ %p3 = getelementptr i8, ptr %p, i32 22
+ %p4 = getelementptr i8, ptr %p, i32 31
+ %p5 = getelementptr i8, ptr %p, i32 44
+ %p6 = getelementptr i8, ptr %p, i32 55
+ %p7 = getelementptr i8, ptr %p, i32 623
+ %p8 = getelementptr i8, ptr %p, i32 75
+ %p9 = getelementptr i8, ptr %p, i32 82
+ %p10 = getelementptr i8, ptr %p, i32 93
+ %p11 = getelementptr i8, ptr %p, i32 105
+ %p12 = getelementptr i8, ptr %p, i32 161
+ %p13 = getelementptr i8, ptr %p, i32 124
+ %p14 = getelementptr i8, ptr %p, i32 163
+ %p15 = getelementptr i8, ptr %p, i32 144
+ %p16 = getelementptr i8, ptr %p, i32 154
+
+ %ld1 = load i8, ptr %p
+ %ld2 = load i8, ptr %p2
+ %ld3 = load i8, ptr %p3
+ %ld4 = load i8, ptr %p4
+ %ld5 = load i8, ptr %p5
+ %ld6 = load i8, ptr %p6
+ %ld7 = load i8, ptr %p7
+ %ld8 = load i8, ptr %p8
+ %ld9 = load i8, ptr %p9
+ %ld10 = load i8, ptr %p10
+ %ld11 = load i8, ptr %p11
+ %ld12 = load i8, ptr %p12
+ %ld13 = load i8, ptr %p13
+ %ld14 = load i8, ptr %p14
+ %ld15 = load i8, ptr %p15
+ %ld16 = load i8, ptr %p16
+
+ %v1 = insertelement <16 x i8> poison, i8 %ld1, i32 0
+ %v2 = insertelement <16 x i8> %v1, i8 %ld2, i32 1
+ %v3 = insertelement <16 x i8> %v2, i8 undef, i32 2
+ %v4 = insertelement <16 x i8> %v3, i8 undef, i32 3
+ %v5 = insertelement <16 x i8> %v4, i8 %ld5, i32 4
+ %v6 = insertelement <16 x i8> %v5, i8 %ld6, i32 5
+ %v7 = insertelement <16 x i8> %v6, i8 undef, i32 6
+ %v8 = insertelement <16 x i8> %v7, i8 %ld8, i32 7
+ %v9 = insertelement <16 x i8> %v8, i8 %ld9, i32 8
+ %v10 = insertelement <16 x i8> %v9, i8 %ld10, i32 9
+ %v11 = insertelement <16 x i8> %v10, i8 undef, i32 10
+ %v12 = insertelement <16 x i8> %v11, i8 undef, i32 11
+ %v13 = insertelement <16 x i8> %v12, i8 %ld13, i32 12
+ %v14 = insertelement <16 x i8> %v13, i8 undef, i32 13
+ %v15 = insertelement <16 x i8> %v14, i8 %ld15, i32 14
+ %v16 = insertelement <16 x i8> %v15, i8 %ld16, i32 15
+ ret <16 x i8> %v16
+}
More information about the llvm-commits
mailing list