[llvm] 72ce9d1 - [RISCV][NFC] Add tests for SLP vectorization of smin/smax/umin/umax
Ben Shi via llvm-commits
llvm-commits at lists.llvm.org
Wed Mar 15 22:30:40 PDT 2023
Author: Ben Shi
Date: 2023-03-16T13:30:16+08:00
New Revision: 72ce9d1ccdd0d64addca47cdf3f9dd1551529685
URL: https://github.com/llvm/llvm-project/commit/72ce9d1ccdd0d64addca47cdf3f9dd1551529685
DIFF: https://github.com/llvm/llvm-project/commit/72ce9d1ccdd0d64addca47cdf3f9dd1551529685.diff
LOG: [RISCV][NFC] Add tests for SLP vectorization of smin/smax/umin/umax
Reviewed By: reames
Differential Revision: https://reviews.llvm.org/D146015
Added:
Modified:
llvm/test/Transforms/SLPVectorizer/RISCV/reductions.ll
Removed:
################################################################################
diff --git a/llvm/test/Transforms/SLPVectorizer/RISCV/reductions.ll b/llvm/test/Transforms/SLPVectorizer/RISCV/reductions.ll
index 5ea7d14e9e5b4..268e4f3189d5e 100644
--- a/llvm/test/Transforms/SLPVectorizer/RISCV/reductions.ll
+++ b/llvm/test/Transforms/SLPVectorizer/RISCV/reductions.ll
@@ -400,6 +400,261 @@ entry:
ret i8 %add13.7
}
+declare i8 @llvm.smin.i8(i8, i8)
+
+define i8 @reduce_smin(ptr %a, ptr %b) {
+; CHECK-LABEL: @reduce_smin(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [[STRUCT_BUF:%.*]], ptr [[A:%.*]], i64 0, i32 0, i64 0
+; CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds [[STRUCT_BUF]], ptr [[B:%.*]], i64 0, i32 0, i64 0
+; CHECK-NEXT: [[TMP0:%.*]] = load <8 x i8>, ptr [[ARRAYIDX]], align 1
+; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i8>, ptr [[ARRAYIDX3]], align 1
+; CHECK-NEXT: [[TMP2:%.*]] = and <8 x i8> [[TMP1]], [[TMP0]]
+; CHECK-NEXT: [[TMP3:%.*]] = call i8 @llvm.vector.reduce.smin.v8i8(<8 x i8> [[TMP2]])
+; CHECK-NEXT: ret i8 [[TMP3]]
+;
+entry:
+ %arrayidx = getelementptr inbounds %struct.buf, ptr %a, i64 0, i32 0, i64 0
+ %0 = load i8, i8* %arrayidx, align 1
+ %arrayidx3 = getelementptr inbounds %struct.buf, ptr %b, i64 0, i32 0, i64 0
+ %1 = load i8, i8* %arrayidx3, align 1
+ %and12 = and i8 %1, %0
+ %arrayidx.1 = getelementptr inbounds %struct.buf, ptr %a, i64 0, i32 0, i64 1
+ %2 = load i8, i8* %arrayidx.1, align 1
+ %arrayidx3.1 = getelementptr inbounds %struct.buf, ptr %b, i64 0, i32 0, i64 1
+ %3 = load i8, i8* %arrayidx3.1, align 1
+ %and12.1 = and i8 %3, %2
+ %4 = tail call i8 @llvm.smin.i8(i8 %and12, i8 %and12.1)
+ %arrayidx.2 = getelementptr inbounds %struct.buf, ptr %a, i64 0, i32 0, i64 2
+ %5 = load i8, i8* %arrayidx.2, align 1
+ %arrayidx3.2 = getelementptr inbounds %struct.buf, ptr %b, i64 0, i32 0, i64 2
+ %6 = load i8, i8* %arrayidx3.2, align 1
+ %and12.2 = and i8 %6, %5
+ %7 = tail call i8 @llvm.smin.i8(i8 %4, i8 %and12.2)
+ %arrayidx.3 = getelementptr inbounds %struct.buf, ptr %a, i64 0, i32 0, i64 3
+ %8 = load i8, i8* %arrayidx.3, align 1
+ %arrayidx3.3 = getelementptr inbounds %struct.buf, ptr %b, i64 0, i32 0, i64 3
+ %9 = load i8, i8* %arrayidx3.3, align 1
+ %and12.3 = and i8 %9, %8
+ %10 = tail call i8 @llvm.smin.i8(i8 %7, i8 %and12.3)
+ %arrayidx.4 = getelementptr inbounds %struct.buf, ptr %a, i64 0, i32 0, i64 4
+ %11 = load i8, i8* %arrayidx.4, align 1
+ %arrayidx3.4 = getelementptr inbounds %struct.buf, ptr %b, i64 0, i32 0, i64 4
+ %12 = load i8, i8* %arrayidx3.4, align 1
+ %and12.4 = and i8 %12, %11
+ %13 = tail call i8 @llvm.smin.i8(i8 %10, i8 %and12.4)
+ %arrayidx.5 = getelementptr inbounds %struct.buf, ptr %a, i64 0, i32 0, i64 5
+ %14 = load i8, i8* %arrayidx.5, align 1
+ %arrayidx3.5 = getelementptr inbounds %struct.buf, ptr %b, i64 0, i32 0, i64 5
+ %15 = load i8, i8* %arrayidx3.5, align 1
+ %and12.5 = and i8 %15, %14
+ %16 = tail call i8 @llvm.smin.i8(i8 %13, i8 %and12.5)
+ %arrayidx.6 = getelementptr inbounds %struct.buf, ptr %a, i64 0, i32 0, i64 6
+ %17 = load i8, i8* %arrayidx.6, align 1
+ %arrayidx3.6 = getelementptr inbounds %struct.buf, ptr %b, i64 0, i32 0, i64 6
+ %18 = load i8, i8* %arrayidx3.6, align 1
+ %and12.6 = and i8 %18, %17
+ %19 = tail call i8 @llvm.smin.i8(i8 %16, i8 %and12.6)
+ %arrayidx.7 = getelementptr inbounds %struct.buf, ptr %a, i64 0, i32 0, i64 7
+ %20 = load i8, i8* %arrayidx.7, align 1
+ %arrayidx3.7 = getelementptr inbounds %struct.buf, ptr %b, i64 0, i32 0, i64 7
+ %21 = load i8, i8* %arrayidx3.7, align 1
+ %and12.7 = and i8 %21, %20
+ %22 = tail call i8 @llvm.smin.i8(i8 %19, i8 %and12.7)
+ ret i8 %22
+}
+
+declare i8 @llvm.smax.i8(i8, i8)
+
+define i8 @reduce_smax(ptr %a, ptr %b) {
+; CHECK-LABEL: @reduce_smax(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [[STRUCT_BUF:%.*]], ptr [[A:%.*]], i64 0, i32 0, i64 0
+; CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds [[STRUCT_BUF]], ptr [[B:%.*]], i64 0, i32 0, i64 0
+; CHECK-NEXT: [[TMP0:%.*]] = load <8 x i8>, ptr [[ARRAYIDX]], align 1
+; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i8>, ptr [[ARRAYIDX3]], align 1
+; CHECK-NEXT: [[TMP2:%.*]] = and <8 x i8> [[TMP1]], [[TMP0]]
+; CHECK-NEXT: [[TMP3:%.*]] = call i8 @llvm.vector.reduce.smax.v8i8(<8 x i8> [[TMP2]])
+; CHECK-NEXT: ret i8 [[TMP3]]
+;
+entry:
+ %arrayidx = getelementptr inbounds %struct.buf, ptr %a, i64 0, i32 0, i64 0
+ %0 = load i8, i8* %arrayidx, align 1
+ %arrayidx3 = getelementptr inbounds %struct.buf, ptr %b, i64 0, i32 0, i64 0
+ %1 = load i8, i8* %arrayidx3, align 1
+ %and12 = and i8 %1, %0
+ %arrayidx.1 = getelementptr inbounds %struct.buf, ptr %a, i64 0, i32 0, i64 1
+ %2 = load i8, i8* %arrayidx.1, align 1
+ %arrayidx3.1 = getelementptr inbounds %struct.buf, ptr %b, i64 0, i32 0, i64 1
+ %3 = load i8, i8* %arrayidx3.1, align 1
+ %and12.1 = and i8 %3, %2
+ %4 = tail call i8 @llvm.smax.i8(i8 %and12, i8 %and12.1)
+ %arrayidx.2 = getelementptr inbounds %struct.buf, ptr %a, i64 0, i32 0, i64 2
+ %5 = load i8, i8* %arrayidx.2, align 1
+ %arrayidx3.2 = getelementptr inbounds %struct.buf, ptr %b, i64 0, i32 0, i64 2
+ %6 = load i8, i8* %arrayidx3.2, align 1
+ %and12.2 = and i8 %6, %5
+ %7 = tail call i8 @llvm.smax.i8(i8 %4, i8 %and12.2)
+ %arrayidx.3 = getelementptr inbounds %struct.buf, ptr %a, i64 0, i32 0, i64 3
+ %8 = load i8, i8* %arrayidx.3, align 1
+ %arrayidx3.3 = getelementptr inbounds %struct.buf, ptr %b, i64 0, i32 0, i64 3
+ %9 = load i8, i8* %arrayidx3.3, align 1
+ %and12.3 = and i8 %9, %8
+ %10 = tail call i8 @llvm.smax.i8(i8 %7, i8 %and12.3)
+ %arrayidx.4 = getelementptr inbounds %struct.buf, ptr %a, i64 0, i32 0, i64 4
+ %11 = load i8, i8* %arrayidx.4, align 1
+ %arrayidx3.4 = getelementptr inbounds %struct.buf, ptr %b, i64 0, i32 0, i64 4
+ %12 = load i8, i8* %arrayidx3.4, align 1
+ %and12.4 = and i8 %12, %11
+ %13 = tail call i8 @llvm.smax.i8(i8 %10, i8 %and12.4)
+ %arrayidx.5 = getelementptr inbounds %struct.buf, ptr %a, i64 0, i32 0, i64 5
+ %14 = load i8, i8* %arrayidx.5, align 1
+ %arrayidx3.5 = getelementptr inbounds %struct.buf, ptr %b, i64 0, i32 0, i64 5
+ %15 = load i8, i8* %arrayidx3.5, align 1
+ %and12.5 = and i8 %15, %14
+ %16 = tail call i8 @llvm.smax.i8(i8 %13, i8 %and12.5)
+ %arrayidx.6 = getelementptr inbounds %struct.buf, ptr %a, i64 0, i32 0, i64 6
+ %17 = load i8, i8* %arrayidx.6, align 1
+ %arrayidx3.6 = getelementptr inbounds %struct.buf, ptr %b, i64 0, i32 0, i64 6
+ %18 = load i8, i8* %arrayidx3.6, align 1
+ %and12.6 = and i8 %18, %17
+ %19 = tail call i8 @llvm.smax.i8(i8 %16, i8 %and12.6)
+ %arrayidx.7 = getelementptr inbounds %struct.buf, ptr %a, i64 0, i32 0, i64 7
+ %20 = load i8, i8* %arrayidx.7, align 1
+ %arrayidx3.7 = getelementptr inbounds %struct.buf, ptr %b, i64 0, i32 0, i64 7
+ %21 = load i8, i8* %arrayidx3.7, align 1
+ %and12.7 = and i8 %21, %20
+ %22 = tail call i8 @llvm.smax.i8(i8 %19, i8 %and12.7)
+ ret i8 %22
+}
+
+declare i8 @llvm.umax.i8(i8, i8)
+
+define i8 @reduce_umax(ptr %a, ptr %b) {
+; CHECK-LABEL: @reduce_umax(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [[STRUCT_BUF:%.*]], ptr [[A:%.*]], i64 0, i32 0, i64 0
+; CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds [[STRUCT_BUF]], ptr [[B:%.*]], i64 0, i32 0, i64 0
+; CHECK-NEXT: [[TMP0:%.*]] = load <8 x i8>, ptr [[ARRAYIDX]], align 1
+; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i8>, ptr [[ARRAYIDX3]], align 1
+; CHECK-NEXT: [[TMP2:%.*]] = and <8 x i8> [[TMP1]], [[TMP0]]
+; CHECK-NEXT: [[TMP3:%.*]] = call i8 @llvm.vector.reduce.umax.v8i8(<8 x i8> [[TMP2]])
+; CHECK-NEXT: ret i8 [[TMP3]]
+;
+entry:
+ %arrayidx = getelementptr inbounds %struct.buf, ptr %a, i64 0, i32 0, i64 0
+ %0 = load i8, i8* %arrayidx, align 1
+ %arrayidx3 = getelementptr inbounds %struct.buf, ptr %b, i64 0, i32 0, i64 0
+ %1 = load i8, i8* %arrayidx3, align 1
+ %and12 = and i8 %1, %0
+ %arrayidx.1 = getelementptr inbounds %struct.buf, ptr %a, i64 0, i32 0, i64 1
+ %2 = load i8, i8* %arrayidx.1, align 1
+ %arrayidx3.1 = getelementptr inbounds %struct.buf, ptr %b, i64 0, i32 0, i64 1
+ %3 = load i8, i8* %arrayidx3.1, align 1
+ %and12.1 = and i8 %3, %2
+ %4 = tail call i8 @llvm.umax.i8(i8 %and12, i8 %and12.1)
+ %arrayidx.2 = getelementptr inbounds %struct.buf, ptr %a, i64 0, i32 0, i64 2
+ %5 = load i8, i8* %arrayidx.2, align 1
+ %arrayidx3.2 = getelementptr inbounds %struct.buf, ptr %b, i64 0, i32 0, i64 2
+ %6 = load i8, i8* %arrayidx3.2, align 1
+ %and12.2 = and i8 %6, %5
+ %7 = tail call i8 @llvm.umax.i8(i8 %4, i8 %and12.2)
+ %arrayidx.3 = getelementptr inbounds %struct.buf, ptr %a, i64 0, i32 0, i64 3
+ %8 = load i8, i8* %arrayidx.3, align 1
+ %arrayidx3.3 = getelementptr inbounds %struct.buf, ptr %b, i64 0, i32 0, i64 3
+ %9 = load i8, i8* %arrayidx3.3, align 1
+ %and12.3 = and i8 %9, %8
+ %10 = tail call i8 @llvm.umax.i8(i8 %7, i8 %and12.3)
+ %arrayidx.4 = getelementptr inbounds %struct.buf, ptr %a, i64 0, i32 0, i64 4
+ %11 = load i8, i8* %arrayidx.4, align 1
+ %arrayidx3.4 = getelementptr inbounds %struct.buf, ptr %b, i64 0, i32 0, i64 4
+ %12 = load i8, i8* %arrayidx3.4, align 1
+ %and12.4 = and i8 %12, %11
+ %13 = tail call i8 @llvm.umax.i8(i8 %10, i8 %and12.4)
+ %arrayidx.5 = getelementptr inbounds %struct.buf, ptr %a, i64 0, i32 0, i64 5
+ %14 = load i8, i8* %arrayidx.5, align 1
+ %arrayidx3.5 = getelementptr inbounds %struct.buf, ptr %b, i64 0, i32 0, i64 5
+ %15 = load i8, i8* %arrayidx3.5, align 1
+ %and12.5 = and i8 %15, %14
+ %16 = tail call i8 @llvm.umax.i8(i8 %13, i8 %and12.5)
+ %arrayidx.6 = getelementptr inbounds %struct.buf, ptr %a, i64 0, i32 0, i64 6
+ %17 = load i8, i8* %arrayidx.6, align 1
+ %arrayidx3.6 = getelementptr inbounds %struct.buf, ptr %b, i64 0, i32 0, i64 6
+ %18 = load i8, i8* %arrayidx3.6, align 1
+ %and12.6 = and i8 %18, %17
+ %19 = tail call i8 @llvm.umax.i8(i8 %16, i8 %and12.6)
+ %arrayidx.7 = getelementptr inbounds %struct.buf, ptr %a, i64 0, i32 0, i64 7
+ %20 = load i8, i8* %arrayidx.7, align 1
+ %arrayidx3.7 = getelementptr inbounds %struct.buf, ptr %b, i64 0, i32 0, i64 7
+ %21 = load i8, i8* %arrayidx3.7, align 1
+ %and12.7 = and i8 %21, %20
+ %22 = tail call i8 @llvm.umax.i8(i8 %19, i8 %and12.7)
+ ret i8 %22
+}
+
+declare i8 @llvm.umin.i8(i8, i8)
+
+define i8 @reduce_umin(ptr %a, ptr %b) {
+; CHECK-LABEL: @reduce_umin(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [[STRUCT_BUF:%.*]], ptr [[A:%.*]], i64 0, i32 0, i64 0
+; CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds [[STRUCT_BUF]], ptr [[B:%.*]], i64 0, i32 0, i64 0
+; CHECK-NEXT: [[TMP0:%.*]] = load <8 x i8>, ptr [[ARRAYIDX]], align 1
+; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i8>, ptr [[ARRAYIDX3]], align 1
+; CHECK-NEXT: [[TMP2:%.*]] = and <8 x i8> [[TMP1]], [[TMP0]]
+; CHECK-NEXT: [[TMP3:%.*]] = call i8 @llvm.vector.reduce.umin.v8i8(<8 x i8> [[TMP2]])
+; CHECK-NEXT: ret i8 [[TMP3]]
+;
+entry:
+ %arrayidx = getelementptr inbounds %struct.buf, ptr %a, i64 0, i32 0, i64 0
+ %0 = load i8, i8* %arrayidx, align 1
+ %arrayidx3 = getelementptr inbounds %struct.buf, ptr %b, i64 0, i32 0, i64 0
+ %1 = load i8, i8* %arrayidx3, align 1
+ %and12 = and i8 %1, %0
+ %arrayidx.1 = getelementptr inbounds %struct.buf, ptr %a, i64 0, i32 0, i64 1
+ %2 = load i8, i8* %arrayidx.1, align 1
+ %arrayidx3.1 = getelementptr inbounds %struct.buf, ptr %b, i64 0, i32 0, i64 1
+ %3 = load i8, i8* %arrayidx3.1, align 1
+ %and12.1 = and i8 %3, %2
+ %4 = tail call i8 @llvm.umin.i8(i8 %and12, i8 %and12.1)
+ %arrayidx.2 = getelementptr inbounds %struct.buf, ptr %a, i64 0, i32 0, i64 2
+ %5 = load i8, i8* %arrayidx.2, align 1
+ %arrayidx3.2 = getelementptr inbounds %struct.buf, ptr %b, i64 0, i32 0, i64 2
+ %6 = load i8, i8* %arrayidx3.2, align 1
+ %and12.2 = and i8 %6, %5
+ %7 = tail call i8 @llvm.umin.i8(i8 %4, i8 %and12.2)
+ %arrayidx.3 = getelementptr inbounds %struct.buf, ptr %a, i64 0, i32 0, i64 3
+ %8 = load i8, i8* %arrayidx.3, align 1
+ %arrayidx3.3 = getelementptr inbounds %struct.buf, ptr %b, i64 0, i32 0, i64 3
+ %9 = load i8, i8* %arrayidx3.3, align 1
+ %and12.3 = and i8 %9, %8
+ %10 = tail call i8 @llvm.umin.i8(i8 %7, i8 %and12.3)
+ %arrayidx.4 = getelementptr inbounds %struct.buf, ptr %a, i64 0, i32 0, i64 4
+ %11 = load i8, i8* %arrayidx.4, align 1
+ %arrayidx3.4 = getelementptr inbounds %struct.buf, ptr %b, i64 0, i32 0, i64 4
+ %12 = load i8, i8* %arrayidx3.4, align 1
+ %and12.4 = and i8 %12, %11
+ %13 = tail call i8 @llvm.umin.i8(i8 %10, i8 %and12.4)
+ %arrayidx.5 = getelementptr inbounds %struct.buf, ptr %a, i64 0, i32 0, i64 5
+ %14 = load i8, i8* %arrayidx.5, align 1
+ %arrayidx3.5 = getelementptr inbounds %struct.buf, ptr %b, i64 0, i32 0, i64 5
+ %15 = load i8, i8* %arrayidx3.5, align 1
+ %and12.5 = and i8 %15, %14
+ %16 = tail call i8 @llvm.umin.i8(i8 %13, i8 %and12.5)
+ %arrayidx.6 = getelementptr inbounds %struct.buf, ptr %a, i64 0, i32 0, i64 6
+ %17 = load i8, i8* %arrayidx.6, align 1
+ %arrayidx3.6 = getelementptr inbounds %struct.buf, ptr %b, i64 0, i32 0, i64 6
+ %18 = load i8, i8* %arrayidx3.6, align 1
+ %and12.6 = and i8 %18, %17
+ %19 = tail call i8 @llvm.umin.i8(i8 %16, i8 %and12.6)
+ %arrayidx.7 = getelementptr inbounds %struct.buf, ptr %a, i64 0, i32 0, i64 7
+ %20 = load i8, i8* %arrayidx.7, align 1
+ %arrayidx3.7 = getelementptr inbounds %struct.buf, ptr %b, i64 0, i32 0, i64 7
+ %21 = load i8, i8* %arrayidx3.7, align 1
+ %and12.7 = and i8 %21, %20
+ %22 = tail call i8 @llvm.umin.i8(i8 %19, i8 %and12.7)
+ ret i8 %22
+}
; Next batch exercise reductions involing zext of narrower loads
More information about the llvm-commits
mailing list