[llvm-branch-commits] [llvm] AMDGPU: Add baseline test for vectorize of integer min/max (PR #100513)
Matt Arsenault via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Wed Jul 24 23:06:36 PDT 2024
https://github.com/arsenm created https://github.com/llvm/llvm-project/pull/100513
None
>From 7a8f09d99fa0a90fc7fe442d87103e66ea2ff806 Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault at amd.com>
Date: Tue, 2 Jul 2024 21:28:30 +0200
Subject: [PATCH] AMDGPU: Add baseline test for vectorize of integer min/max
---
.../SLPVectorizer/AMDGPU/min_max.ll | 366 ++++++++++++++++++
1 file changed, 366 insertions(+)
create mode 100644 llvm/test/Transforms/SLPVectorizer/AMDGPU/min_max.ll
diff --git a/llvm/test/Transforms/SLPVectorizer/AMDGPU/min_max.ll b/llvm/test/Transforms/SLPVectorizer/AMDGPU/min_max.ll
new file mode 100644
index 0000000000000..47b0dbd6b2cff
--- /dev/null
+++ b/llvm/test/Transforms/SLPVectorizer/AMDGPU/min_max.ll
@@ -0,0 +1,366 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=hawaii -passes=slp-vectorizer,instcombine %s | FileCheck -check-prefixes=GCN,GFX7 %s
+; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=fiji -passes=slp-vectorizer,instcombine %s | FileCheck -check-prefixes=GCN,GFX8 %s
+; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -passes=slp-vectorizer,instcombine %s | FileCheck -check-prefixes=GCN,GFX9 %s
+
+define <2 x i16> @uadd_sat_v2i16(<2 x i16> %arg0, <2 x i16> %arg1) {
+; GFX7-LABEL: @uadd_sat_v2i16(
+; GFX7-NEXT: bb:
+; GFX7-NEXT: [[ARG0_0:%.*]] = extractelement <2 x i16> [[ARG0:%.*]], i64 0
+; GFX7-NEXT: [[ARG0_1:%.*]] = extractelement <2 x i16> [[ARG0]], i64 1
+; GFX7-NEXT: [[ARG1_0:%.*]] = extractelement <2 x i16> [[ARG1:%.*]], i64 0
+; GFX7-NEXT: [[ARG1_1:%.*]] = extractelement <2 x i16> [[ARG1]], i64 1
+; GFX7-NEXT: [[ADD_0:%.*]] = call i16 @llvm.umin.i16(i16 [[ARG0_0]], i16 [[ARG1_0]])
+; GFX7-NEXT: [[ADD_1:%.*]] = call i16 @llvm.umin.i16(i16 [[ARG0_1]], i16 [[ARG1_1]])
+; GFX7-NEXT: [[INS_0:%.*]] = insertelement <2 x i16> poison, i16 [[ADD_0]], i64 0
+; GFX7-NEXT: [[INS_1:%.*]] = insertelement <2 x i16> [[INS_0]], i16 [[ADD_1]], i64 1
+; GFX7-NEXT: ret <2 x i16> [[INS_1]]
+;
+; GFX8-LABEL: @uadd_sat_v2i16(
+; GFX8-NEXT: bb:
+; GFX8-NEXT: [[TMP0:%.*]] = call <2 x i16> @llvm.umin.v2i16(<2 x i16> [[ARG0:%.*]], <2 x i16> [[ARG1:%.*]])
+; GFX8-NEXT: ret <2 x i16> [[TMP0]]
+;
+; GFX9-LABEL: @uadd_sat_v2i16(
+; GFX9-NEXT: bb:
+; GFX9-NEXT: [[TMP0:%.*]] = call <2 x i16> @llvm.umin.v2i16(<2 x i16> [[ARG0:%.*]], <2 x i16> [[ARG1:%.*]])
+; GFX9-NEXT: ret <2 x i16> [[TMP0]]
+;
+bb:
+ %arg0.0 = extractelement <2 x i16> %arg0, i64 0
+ %arg0.1 = extractelement <2 x i16> %arg0, i64 1
+ %arg1.0 = extractelement <2 x i16> %arg1, i64 0
+ %arg1.1 = extractelement <2 x i16> %arg1, i64 1
+ %add.0 = call i16 @llvm.umin.i16(i16 %arg0.0, i16 %arg1.0)
+ %add.1 = call i16 @llvm.umin.i16(i16 %arg0.1, i16 %arg1.1)
+ %ins.0 = insertelement <2 x i16> undef, i16 %add.0, i64 0
+ %ins.1 = insertelement <2 x i16> %ins.0, i16 %add.1, i64 1
+ ret <2 x i16> %ins.1
+}
+
+define <2 x i16> @usub_sat_v2i16(<2 x i16> %arg0, <2 x i16> %arg1) {
+; GFX7-LABEL: @usub_sat_v2i16(
+; GFX7-NEXT: bb:
+; GFX7-NEXT: [[ARG0_0:%.*]] = extractelement <2 x i16> [[ARG0:%.*]], i64 0
+; GFX7-NEXT: [[ARG0_1:%.*]] = extractelement <2 x i16> [[ARG0]], i64 1
+; GFX7-NEXT: [[ARG1_0:%.*]] = extractelement <2 x i16> [[ARG1:%.*]], i64 0
+; GFX7-NEXT: [[ARG1_1:%.*]] = extractelement <2 x i16> [[ARG1]], i64 1
+; GFX7-NEXT: [[ADD_0:%.*]] = call i16 @llvm.umax.i16(i16 [[ARG0_0]], i16 [[ARG1_0]])
+; GFX7-NEXT: [[ADD_1:%.*]] = call i16 @llvm.umax.i16(i16 [[ARG0_1]], i16 [[ARG1_1]])
+; GFX7-NEXT: [[INS_0:%.*]] = insertelement <2 x i16> poison, i16 [[ADD_0]], i64 0
+; GFX7-NEXT: [[INS_1:%.*]] = insertelement <2 x i16> [[INS_0]], i16 [[ADD_1]], i64 1
+; GFX7-NEXT: ret <2 x i16> [[INS_1]]
+;
+; GFX8-LABEL: @usub_sat_v2i16(
+; GFX8-NEXT: bb:
+; GFX8-NEXT: [[TMP0:%.*]] = call <2 x i16> @llvm.umax.v2i16(<2 x i16> [[ARG0:%.*]], <2 x i16> [[ARG1:%.*]])
+; GFX8-NEXT: ret <2 x i16> [[TMP0]]
+;
+; GFX9-LABEL: @usub_sat_v2i16(
+; GFX9-NEXT: bb:
+; GFX9-NEXT: [[TMP0:%.*]] = call <2 x i16> @llvm.umax.v2i16(<2 x i16> [[ARG0:%.*]], <2 x i16> [[ARG1:%.*]])
+; GFX9-NEXT: ret <2 x i16> [[TMP0]]
+;
+bb:
+ %arg0.0 = extractelement <2 x i16> %arg0, i64 0
+ %arg0.1 = extractelement <2 x i16> %arg0, i64 1
+ %arg1.0 = extractelement <2 x i16> %arg1, i64 0
+ %arg1.1 = extractelement <2 x i16> %arg1, i64 1
+ %add.0 = call i16 @llvm.umax.i16(i16 %arg0.0, i16 %arg1.0)
+ %add.1 = call i16 @llvm.umax.i16(i16 %arg0.1, i16 %arg1.1)
+ %ins.0 = insertelement <2 x i16> undef, i16 %add.0, i64 0
+ %ins.1 = insertelement <2 x i16> %ins.0, i16 %add.1, i64 1
+ ret <2 x i16> %ins.1
+}
+
+define <2 x i16> @sadd_sat_v2i16(<2 x i16> %arg0, <2 x i16> %arg1) {
+; GFX7-LABEL: @sadd_sat_v2i16(
+; GFX7-NEXT: bb:
+; GFX7-NEXT: [[ARG0_0:%.*]] = extractelement <2 x i16> [[ARG0:%.*]], i64 0
+; GFX7-NEXT: [[ARG0_1:%.*]] = extractelement <2 x i16> [[ARG0]], i64 1
+; GFX7-NEXT: [[ARG1_0:%.*]] = extractelement <2 x i16> [[ARG1:%.*]], i64 0
+; GFX7-NEXT: [[ARG1_1:%.*]] = extractelement <2 x i16> [[ARG1]], i64 1
+; GFX7-NEXT: [[ADD_0:%.*]] = call i16 @llvm.smin.i16(i16 [[ARG0_0]], i16 [[ARG1_0]])
+; GFX7-NEXT: [[ADD_1:%.*]] = call i16 @llvm.smin.i16(i16 [[ARG0_1]], i16 [[ARG1_1]])
+; GFX7-NEXT: [[INS_0:%.*]] = insertelement <2 x i16> poison, i16 [[ADD_0]], i64 0
+; GFX7-NEXT: [[INS_1:%.*]] = insertelement <2 x i16> [[INS_0]], i16 [[ADD_1]], i64 1
+; GFX7-NEXT: ret <2 x i16> [[INS_1]]
+;
+; GFX8-LABEL: @sadd_sat_v2i16(
+; GFX8-NEXT: bb:
+; GFX8-NEXT: [[TMP0:%.*]] = call <2 x i16> @llvm.smin.v2i16(<2 x i16> [[ARG0:%.*]], <2 x i16> [[ARG1:%.*]])
+; GFX8-NEXT: ret <2 x i16> [[TMP0]]
+;
+; GFX9-LABEL: @sadd_sat_v2i16(
+; GFX9-NEXT: bb:
+; GFX9-NEXT: [[TMP0:%.*]] = call <2 x i16> @llvm.smin.v2i16(<2 x i16> [[ARG0:%.*]], <2 x i16> [[ARG1:%.*]])
+; GFX9-NEXT: ret <2 x i16> [[TMP0]]
+;
+bb:
+ %arg0.0 = extractelement <2 x i16> %arg0, i64 0
+ %arg0.1 = extractelement <2 x i16> %arg0, i64 1
+ %arg1.0 = extractelement <2 x i16> %arg1, i64 0
+ %arg1.1 = extractelement <2 x i16> %arg1, i64 1
+ %add.0 = call i16 @llvm.smin.i16(i16 %arg0.0, i16 %arg1.0)
+ %add.1 = call i16 @llvm.smin.i16(i16 %arg0.1, i16 %arg1.1)
+ %ins.0 = insertelement <2 x i16> undef, i16 %add.0, i64 0
+ %ins.1 = insertelement <2 x i16> %ins.0, i16 %add.1, i64 1
+ ret <2 x i16> %ins.1
+}
+
+define <2 x i16> @ssub_sat_v2i16(<2 x i16> %arg0, <2 x i16> %arg1) {
+; GFX7-LABEL: @ssub_sat_v2i16(
+; GFX7-NEXT: bb:
+; GFX7-NEXT: [[ARG0_0:%.*]] = extractelement <2 x i16> [[ARG0:%.*]], i64 0
+; GFX7-NEXT: [[ARG0_1:%.*]] = extractelement <2 x i16> [[ARG0]], i64 1
+; GFX7-NEXT: [[ARG1_0:%.*]] = extractelement <2 x i16> [[ARG1:%.*]], i64 0
+; GFX7-NEXT: [[ARG1_1:%.*]] = extractelement <2 x i16> [[ARG1]], i64 1
+; GFX7-NEXT: [[ADD_0:%.*]] = call i16 @llvm.smax.i16(i16 [[ARG0_0]], i16 [[ARG1_0]])
+; GFX7-NEXT: [[ADD_1:%.*]] = call i16 @llvm.smax.i16(i16 [[ARG0_1]], i16 [[ARG1_1]])
+; GFX7-NEXT: [[INS_0:%.*]] = insertelement <2 x i16> poison, i16 [[ADD_0]], i64 0
+; GFX7-NEXT: [[INS_1:%.*]] = insertelement <2 x i16> [[INS_0]], i16 [[ADD_1]], i64 1
+; GFX7-NEXT: ret <2 x i16> [[INS_1]]
+;
+; GFX8-LABEL: @ssub_sat_v2i16(
+; GFX8-NEXT: bb:
+; GFX8-NEXT: [[TMP0:%.*]] = call <2 x i16> @llvm.smax.v2i16(<2 x i16> [[ARG0:%.*]], <2 x i16> [[ARG1:%.*]])
+; GFX8-NEXT: ret <2 x i16> [[TMP0]]
+;
+; GFX9-LABEL: @ssub_sat_v2i16(
+; GFX9-NEXT: bb:
+; GFX9-NEXT: [[TMP0:%.*]] = call <2 x i16> @llvm.smax.v2i16(<2 x i16> [[ARG0:%.*]], <2 x i16> [[ARG1:%.*]])
+; GFX9-NEXT: ret <2 x i16> [[TMP0]]
+;
+bb:
+ %arg0.0 = extractelement <2 x i16> %arg0, i64 0
+ %arg0.1 = extractelement <2 x i16> %arg0, i64 1
+ %arg1.0 = extractelement <2 x i16> %arg1, i64 0
+ %arg1.1 = extractelement <2 x i16> %arg1, i64 1
+ %add.0 = call i16 @llvm.smax.i16(i16 %arg0.0, i16 %arg1.0)
+ %add.1 = call i16 @llvm.smax.i16(i16 %arg0.1, i16 %arg1.1)
+ %ins.0 = insertelement <2 x i16> undef, i16 %add.0, i64 0
+ %ins.1 = insertelement <2 x i16> %ins.0, i16 %add.1, i64 1
+ ret <2 x i16> %ins.1
+}
+
+define <2 x i32> @uadd_sat_v2i32(<2 x i32> %arg0, <2 x i32> %arg1) {
+; GCN-LABEL: @uadd_sat_v2i32(
+; GCN-NEXT: bb:
+; GCN-NEXT: [[ARG0_0:%.*]] = extractelement <2 x i32> [[ARG0:%.*]], i64 0
+; GCN-NEXT: [[ARG0_1:%.*]] = extractelement <2 x i32> [[ARG0]], i64 1
+; GCN-NEXT: [[ARG1_0:%.*]] = extractelement <2 x i32> [[ARG1:%.*]], i64 0
+; GCN-NEXT: [[ARG1_1:%.*]] = extractelement <2 x i32> [[ARG1]], i64 1
+; GCN-NEXT: [[ADD_0:%.*]] = call i32 @llvm.umin.i32(i32 [[ARG0_0]], i32 [[ARG1_0]])
+; GCN-NEXT: [[ADD_1:%.*]] = call i32 @llvm.umin.i32(i32 [[ARG0_1]], i32 [[ARG1_1]])
+; GCN-NEXT: [[INS_0:%.*]] = insertelement <2 x i32> poison, i32 [[ADD_0]], i64 0
+; GCN-NEXT: [[INS_1:%.*]] = insertelement <2 x i32> [[INS_0]], i32 [[ADD_1]], i64 1
+; GCN-NEXT: ret <2 x i32> [[INS_1]]
+;
+bb:
+ %arg0.0 = extractelement <2 x i32> %arg0, i64 0
+ %arg0.1 = extractelement <2 x i32> %arg0, i64 1
+ %arg1.0 = extractelement <2 x i32> %arg1, i64 0
+ %arg1.1 = extractelement <2 x i32> %arg1, i64 1
+ %add.0 = call i32 @llvm.umin.i32(i32 %arg0.0, i32 %arg1.0)
+ %add.1 = call i32 @llvm.umin.i32(i32 %arg0.1, i32 %arg1.1)
+ %ins.0 = insertelement <2 x i32> undef, i32 %add.0, i64 0
+ %ins.1 = insertelement <2 x i32> %ins.0, i32 %add.1, i64 1
+ ret <2 x i32> %ins.1
+}
+
+define <2 x i32> @usub_sat_v2i32(<2 x i32> %arg0, <2 x i32> %arg1) {
+; GCN-LABEL: @usub_sat_v2i32(
+; GCN-NEXT: bb:
+; GCN-NEXT: [[ARG0_0:%.*]] = extractelement <2 x i32> [[ARG0:%.*]], i64 0
+; GCN-NEXT: [[ARG0_1:%.*]] = extractelement <2 x i32> [[ARG0]], i64 1
+; GCN-NEXT: [[ARG1_0:%.*]] = extractelement <2 x i32> [[ARG1:%.*]], i64 0
+; GCN-NEXT: [[ARG1_1:%.*]] = extractelement <2 x i32> [[ARG1]], i64 1
+; GCN-NEXT: [[ADD_0:%.*]] = call i32 @llvm.umax.i32(i32 [[ARG0_0]], i32 [[ARG1_0]])
+; GCN-NEXT: [[ADD_1:%.*]] = call i32 @llvm.umax.i32(i32 [[ARG0_1]], i32 [[ARG1_1]])
+; GCN-NEXT: [[INS_0:%.*]] = insertelement <2 x i32> poison, i32 [[ADD_0]], i64 0
+; GCN-NEXT: [[INS_1:%.*]] = insertelement <2 x i32> [[INS_0]], i32 [[ADD_1]], i64 1
+; GCN-NEXT: ret <2 x i32> [[INS_1]]
+;
+bb:
+ %arg0.0 = extractelement <2 x i32> %arg0, i64 0
+ %arg0.1 = extractelement <2 x i32> %arg0, i64 1
+ %arg1.0 = extractelement <2 x i32> %arg1, i64 0
+ %arg1.1 = extractelement <2 x i32> %arg1, i64 1
+ %add.0 = call i32 @llvm.umax.i32(i32 %arg0.0, i32 %arg1.0)
+ %add.1 = call i32 @llvm.umax.i32(i32 %arg0.1, i32 %arg1.1)
+ %ins.0 = insertelement <2 x i32> undef, i32 %add.0, i64 0
+ %ins.1 = insertelement <2 x i32> %ins.0, i32 %add.1, i64 1
+ ret <2 x i32> %ins.1
+}
+
+define <2 x i32> @sadd_sat_v2i32(<2 x i32> %arg0, <2 x i32> %arg1) {
+; GCN-LABEL: @sadd_sat_v2i32(
+; GCN-NEXT: bb:
+; GCN-NEXT: [[ARG0_0:%.*]] = extractelement <2 x i32> [[ARG0:%.*]], i64 0
+; GCN-NEXT: [[ARG0_1:%.*]] = extractelement <2 x i32> [[ARG0]], i64 1
+; GCN-NEXT: [[ARG1_0:%.*]] = extractelement <2 x i32> [[ARG1:%.*]], i64 0
+; GCN-NEXT: [[ARG1_1:%.*]] = extractelement <2 x i32> [[ARG1]], i64 1
+; GCN-NEXT: [[ADD_0:%.*]] = call i32 @llvm.smin.i32(i32 [[ARG0_0]], i32 [[ARG1_0]])
+; GCN-NEXT: [[ADD_1:%.*]] = call i32 @llvm.smin.i32(i32 [[ARG0_1]], i32 [[ARG1_1]])
+; GCN-NEXT: [[INS_0:%.*]] = insertelement <2 x i32> poison, i32 [[ADD_0]], i64 0
+; GCN-NEXT: [[INS_1:%.*]] = insertelement <2 x i32> [[INS_0]], i32 [[ADD_1]], i64 1
+; GCN-NEXT: ret <2 x i32> [[INS_1]]
+;
+bb:
+ %arg0.0 = extractelement <2 x i32> %arg0, i64 0
+ %arg0.1 = extractelement <2 x i32> %arg0, i64 1
+ %arg1.0 = extractelement <2 x i32> %arg1, i64 0
+ %arg1.1 = extractelement <2 x i32> %arg1, i64 1
+ %add.0 = call i32 @llvm.smin.i32(i32 %arg0.0, i32 %arg1.0)
+ %add.1 = call i32 @llvm.smin.i32(i32 %arg0.1, i32 %arg1.1)
+ %ins.0 = insertelement <2 x i32> undef, i32 %add.0, i64 0
+ %ins.1 = insertelement <2 x i32> %ins.0, i32 %add.1, i64 1
+ ret <2 x i32> %ins.1
+}
+
+define <2 x i32> @ssub_sat_v2i32(<2 x i32> %arg0, <2 x i32> %arg1) {
+; GCN-LABEL: @ssub_sat_v2i32(
+; GCN-NEXT: bb:
+; GCN-NEXT: [[ARG0_0:%.*]] = extractelement <2 x i32> [[ARG0:%.*]], i64 0
+; GCN-NEXT: [[ARG0_1:%.*]] = extractelement <2 x i32> [[ARG0]], i64 1
+; GCN-NEXT: [[ARG1_0:%.*]] = extractelement <2 x i32> [[ARG1:%.*]], i64 0
+; GCN-NEXT: [[ARG1_1:%.*]] = extractelement <2 x i32> [[ARG1]], i64 1
+; GCN-NEXT: [[ADD_0:%.*]] = call i32 @llvm.smax.i32(i32 [[ARG0_0]], i32 [[ARG1_0]])
+; GCN-NEXT: [[ADD_1:%.*]] = call i32 @llvm.smax.i32(i32 [[ARG0_1]], i32 [[ARG1_1]])
+; GCN-NEXT: [[INS_0:%.*]] = insertelement <2 x i32> poison, i32 [[ADD_0]], i64 0
+; GCN-NEXT: [[INS_1:%.*]] = insertelement <2 x i32> [[INS_0]], i32 [[ADD_1]], i64 1
+; GCN-NEXT: ret <2 x i32> [[INS_1]]
+;
+bb:
+ %arg0.0 = extractelement <2 x i32> %arg0, i64 0
+ %arg0.1 = extractelement <2 x i32> %arg0, i64 1
+ %arg1.0 = extractelement <2 x i32> %arg1, i64 0
+ %arg1.1 = extractelement <2 x i32> %arg1, i64 1
+ %add.0 = call i32 @llvm.smax.i32(i32 %arg0.0, i32 %arg1.0)
+ %add.1 = call i32 @llvm.smax.i32(i32 %arg0.1, i32 %arg1.1)
+ %ins.0 = insertelement <2 x i32> undef, i32 %add.0, i64 0
+ %ins.1 = insertelement <2 x i32> %ins.0, i32 %add.1, i64 1
+ ret <2 x i32> %ins.1
+}
+
+define <3 x i16> @uadd_sat_v3i16(<3 x i16> %arg0, <3 x i16> %arg1) {
+; GFX7-LABEL: @uadd_sat_v3i16(
+; GFX7-NEXT: bb:
+; GFX7-NEXT: [[ARG0_0:%.*]] = extractelement <3 x i16> [[ARG0:%.*]], i64 0
+; GFX7-NEXT: [[ARG0_1:%.*]] = extractelement <3 x i16> [[ARG0]], i64 1
+; GFX7-NEXT: [[ARG0_2:%.*]] = extractelement <3 x i16> [[ARG0]], i64 2
+; GFX7-NEXT: [[ARG1_0:%.*]] = extractelement <3 x i16> [[ARG1:%.*]], i64 0
+; GFX7-NEXT: [[ARG1_1:%.*]] = extractelement <3 x i16> [[ARG1]], i64 1
+; GFX7-NEXT: [[ARG1_2:%.*]] = extractelement <3 x i16> [[ARG1]], i64 2
+; GFX7-NEXT: [[ADD_0:%.*]] = call i16 @llvm.umin.i16(i16 [[ARG0_0]], i16 [[ARG1_0]])
+; GFX7-NEXT: [[ADD_1:%.*]] = call i16 @llvm.umin.i16(i16 [[ARG0_1]], i16 [[ARG1_1]])
+; GFX7-NEXT: [[ADD_2:%.*]] = call i16 @llvm.umin.i16(i16 [[ARG0_2]], i16 [[ARG1_2]])
+; GFX7-NEXT: [[INS_0:%.*]] = insertelement <3 x i16> poison, i16 [[ADD_0]], i64 0
+; GFX7-NEXT: [[INS_1:%.*]] = insertelement <3 x i16> [[INS_0]], i16 [[ADD_1]], i64 1
+; GFX7-NEXT: [[INS_2:%.*]] = insertelement <3 x i16> [[INS_1]], i16 [[ADD_2]], i64 2
+; GFX7-NEXT: ret <3 x i16> [[INS_2]]
+;
+; GFX8-LABEL: @uadd_sat_v3i16(
+; GFX8-NEXT: bb:
+; GFX8-NEXT: [[ARG0_2:%.*]] = extractelement <3 x i16> [[ARG0:%.*]], i64 2
+; GFX8-NEXT: [[ARG1_2:%.*]] = extractelement <3 x i16> [[ARG1:%.*]], i64 2
+; GFX8-NEXT: [[TMP0:%.*]] = call <3 x i16> @llvm.umin.v3i16(<3 x i16> [[ARG0]], <3 x i16> [[ARG1]])
+; GFX8-NEXT: [[ADD_2:%.*]] = call i16 @llvm.umin.i16(i16 [[ARG0_2]], i16 [[ARG1_2]])
+; GFX8-NEXT: [[INS_2:%.*]] = insertelement <3 x i16> [[TMP0]], i16 [[ADD_2]], i64 2
+; GFX8-NEXT: ret <3 x i16> [[INS_2]]
+;
+; GFX9-LABEL: @uadd_sat_v3i16(
+; GFX9-NEXT: bb:
+; GFX9-NEXT: [[ARG0_2:%.*]] = extractelement <3 x i16> [[ARG0:%.*]], i64 2
+; GFX9-NEXT: [[ARG1_2:%.*]] = extractelement <3 x i16> [[ARG1:%.*]], i64 2
+; GFX9-NEXT: [[TMP0:%.*]] = call <3 x i16> @llvm.umin.v3i16(<3 x i16> [[ARG0]], <3 x i16> [[ARG1]])
+; GFX9-NEXT: [[ADD_2:%.*]] = call i16 @llvm.umin.i16(i16 [[ARG0_2]], i16 [[ARG1_2]])
+; GFX9-NEXT: [[INS_2:%.*]] = insertelement <3 x i16> [[TMP0]], i16 [[ADD_2]], i64 2
+; GFX9-NEXT: ret <3 x i16> [[INS_2]]
+;
+bb:
+ %arg0.0 = extractelement <3 x i16> %arg0, i64 0
+ %arg0.1 = extractelement <3 x i16> %arg0, i64 1
+ %arg0.2 = extractelement <3 x i16> %arg0, i64 2
+ %arg1.0 = extractelement <3 x i16> %arg1, i64 0
+ %arg1.1 = extractelement <3 x i16> %arg1, i64 1
+ %arg1.2 = extractelement <3 x i16> %arg1, i64 2
+ %add.0 = call i16 @llvm.umin.i16(i16 %arg0.0, i16 %arg1.0)
+ %add.1 = call i16 @llvm.umin.i16(i16 %arg0.1, i16 %arg1.1)
+ %add.2 = call i16 @llvm.umin.i16(i16 %arg0.2, i16 %arg1.2)
+ %ins.0 = insertelement <3 x i16> undef, i16 %add.0, i64 0
+ %ins.1 = insertelement <3 x i16> %ins.0, i16 %add.1, i64 1
+ %ins.2 = insertelement <3 x i16> %ins.1, i16 %add.2, i64 2
+ ret <3 x i16> %ins.2
+}
+
+define <4 x i16> @uadd_sat_v4i16(<4 x i16> %arg0, <4 x i16> %arg1) {
+; GFX7-LABEL: @uadd_sat_v4i16(
+; GFX7-NEXT: bb:
+; GFX7-NEXT: [[ARG0_0:%.*]] = extractelement <4 x i16> [[ARG0:%.*]], i64 0
+; GFX7-NEXT: [[ARG0_1:%.*]] = extractelement <4 x i16> [[ARG0]], i64 1
+; GFX7-NEXT: [[ARG0_2:%.*]] = extractelement <4 x i16> [[ARG0]], i64 2
+; GFX7-NEXT: [[ARG0_3:%.*]] = extractelement <4 x i16> [[ARG0]], i64 3
+; GFX7-NEXT: [[ARG1_0:%.*]] = extractelement <4 x i16> [[ARG1:%.*]], i64 0
+; GFX7-NEXT: [[ARG1_1:%.*]] = extractelement <4 x i16> [[ARG1]], i64 1
+; GFX7-NEXT: [[ARG1_2:%.*]] = extractelement <4 x i16> [[ARG1]], i64 2
+; GFX7-NEXT: [[ARG1_3:%.*]] = extractelement <4 x i16> [[ARG1]], i64 3
+; GFX7-NEXT: [[ADD_0:%.*]] = call i16 @llvm.umin.i16(i16 [[ARG0_0]], i16 [[ARG1_0]])
+; GFX7-NEXT: [[ADD_1:%.*]] = call i16 @llvm.umin.i16(i16 [[ARG0_1]], i16 [[ARG1_1]])
+; GFX7-NEXT: [[ADD_2:%.*]] = call i16 @llvm.umin.i16(i16 [[ARG0_2]], i16 [[ARG1_2]])
+; GFX7-NEXT: [[ADD_3:%.*]] = call i16 @llvm.umin.i16(i16 [[ARG0_3]], i16 [[ARG1_3]])
+; GFX7-NEXT: [[INS_0:%.*]] = insertelement <4 x i16> poison, i16 [[ADD_0]], i64 0
+; GFX7-NEXT: [[INS_1:%.*]] = insertelement <4 x i16> [[INS_0]], i16 [[ADD_1]], i64 1
+; GFX7-NEXT: [[INS_2:%.*]] = insertelement <4 x i16> [[INS_1]], i16 [[ADD_2]], i64 2
+; GFX7-NEXT: [[INS_3:%.*]] = insertelement <4 x i16> [[INS_2]], i16 [[ADD_3]], i64 3
+; GFX7-NEXT: ret <4 x i16> [[INS_3]]
+;
+; GFX8-LABEL: @uadd_sat_v4i16(
+; GFX8-NEXT: bb:
+; GFX8-NEXT: [[TMP0:%.*]] = call <4 x i16> @llvm.umin.v4i16(<4 x i16> [[ARG0:%.*]], <4 x i16> [[ARG1:%.*]])
+; GFX8-NEXT: [[TMP1:%.*]] = call <4 x i16> @llvm.umin.v4i16(<4 x i16> [[ARG0]], <4 x i16> [[ARG1]])
+; GFX8-NEXT: [[TMP2:%.*]] = shufflevector <4 x i16> [[TMP1]], <4 x i16> poison, <2 x i32> <i32 2, i32 3>
+; GFX8-NEXT: [[TMP3:%.*]] = shufflevector <2 x i16> [[TMP2]], <2 x i16> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
+; GFX8-NEXT: [[INS_31:%.*]] = shufflevector <4 x i16> [[TMP0]], <4 x i16> [[TMP3]], <4 x i32> <i32 0, i32 1, i32 4, i32 5>
+; GFX8-NEXT: ret <4 x i16> [[INS_31]]
+;
+; GFX9-LABEL: @uadd_sat_v4i16(
+; GFX9-NEXT: bb:
+; GFX9-NEXT: [[TMP0:%.*]] = call <4 x i16> @llvm.umin.v4i16(<4 x i16> [[ARG0:%.*]], <4 x i16> [[ARG1:%.*]])
+; GFX9-NEXT: [[TMP1:%.*]] = call <4 x i16> @llvm.umin.v4i16(<4 x i16> [[ARG0]], <4 x i16> [[ARG1]])
+; GFX9-NEXT: [[TMP2:%.*]] = shufflevector <4 x i16> [[TMP1]], <4 x i16> poison, <2 x i32> <i32 2, i32 3>
+; GFX9-NEXT: [[TMP3:%.*]] = shufflevector <2 x i16> [[TMP2]], <2 x i16> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
+; GFX9-NEXT: [[INS_31:%.*]] = shufflevector <4 x i16> [[TMP0]], <4 x i16> [[TMP3]], <4 x i32> <i32 0, i32 1, i32 4, i32 5>
+; GFX9-NEXT: ret <4 x i16> [[INS_31]]
+;
+bb:
+ %arg0.0 = extractelement <4 x i16> %arg0, i64 0
+ %arg0.1 = extractelement <4 x i16> %arg0, i64 1
+ %arg0.2 = extractelement <4 x i16> %arg0, i64 2
+ %arg0.3 = extractelement <4 x i16> %arg0, i64 3
+ %arg1.0 = extractelement <4 x i16> %arg1, i64 0
+ %arg1.1 = extractelement <4 x i16> %arg1, i64 1
+ %arg1.2 = extractelement <4 x i16> %arg1, i64 2
+ %arg1.3 = extractelement <4 x i16> %arg1, i64 3
+ %add.0 = call i16 @llvm.umin.i16(i16 %arg0.0, i16 %arg1.0)
+ %add.1 = call i16 @llvm.umin.i16(i16 %arg0.1, i16 %arg1.1)
+ %add.2 = call i16 @llvm.umin.i16(i16 %arg0.2, i16 %arg1.2)
+ %add.3 = call i16 @llvm.umin.i16(i16 %arg0.3, i16 %arg1.3)
+ %ins.0 = insertelement <4 x i16> undef, i16 %add.0, i64 0
+ %ins.1 = insertelement <4 x i16> %ins.0, i16 %add.1, i64 1
+ %ins.2 = insertelement <4 x i16> %ins.1, i16 %add.2, i64 2
+ %ins.3 = insertelement <4 x i16> %ins.2, i16 %add.3, i64 3
+ ret <4 x i16> %ins.3
+}
+
+declare i16 @llvm.umin.i16(i16, i16) #0
+declare i16 @llvm.umax.i16(i16, i16) #0
+declare i16 @llvm.smin.i16(i16, i16) #0
+declare i16 @llvm.smax.i16(i16, i16) #0
+
+declare i32 @llvm.umin.i32(i32, i32) #0
+declare i32 @llvm.umax.i32(i32, i32) #0
+declare i32 @llvm.smin.i32(i32, i32) #0
+declare i32 @llvm.smax.i32(i32, i32) #0
+
+attributes #0 = { nounwind readnone speculatable willreturn }
More information about the llvm-branch-commits
mailing list