[llvm] [InstCombine] Pattern match minmax calls for unsigned saturation. (PR #99250)
Huihui Zhang via llvm-commits
llvm-commits at lists.llvm.org
Mon Jul 22 16:38:56 PDT 2024
https://github.com/huihzhang updated https://github.com/llvm/llvm-project/pull/99250
>From fb1188977b59fd590d2beea77955344e047eb926 Mon Sep 17 00:00:00 2001
From: Huihui Zhang <huihuiz at quicinc.com>
Date: Tue, 16 Jul 2024 15:53:20 -0700
Subject: [PATCH 1/4] [InstCombine] Add test for unsigned addsub saturation.
NFC.
---
.../Transforms/InstCombine/uaddsub_sat.ll | 590 ++++++++++++++++++
1 file changed, 590 insertions(+)
create mode 100644 llvm/test/Transforms/InstCombine/uaddsub_sat.ll
diff --git a/llvm/test/Transforms/InstCombine/uaddsub_sat.ll b/llvm/test/Transforms/InstCombine/uaddsub_sat.ll
new file mode 100644
index 0000000000000..362345a15949b
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/uaddsub_sat.ll
@@ -0,0 +1,590 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt < %s -passes=instcombine -S | FileCheck %s
+
+target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64"
+
+define i32 @uadd_sat32(i32 %a, i32 %b) {
+; CHECK-LABEL: define i32 @uadd_sat32(
+; CHECK-SAME: i32 [[A:%.*]], i32 [[B:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: [[CONV:%.*]] = zext i32 [[A]] to i64
+; CHECK-NEXT: [[CONV1:%.*]] = zext i32 [[B]] to i64
+; CHECK-NEXT: [[ADD:%.*]] = add nuw nsw i64 [[CONV1]], [[CONV]]
+; CHECK-NEXT: [[SELECT:%.*]] = call i64 @llvm.umin.i64(i64 [[ADD]], i64 4294967295)
+; CHECK-NEXT: [[TMP0:%.*]] = trunc nuw i64 [[SELECT]] to i32
+; CHECK-NEXT: ret i32 [[TMP0]]
+;
+entry:
+ %conv = zext i32 %a to i64
+ %conv1 = zext i32 %b to i64
+ %add = add i64 %conv1, %conv
+ %0 = icmp ult i64 %add, 4294967295
+ %select = select i1 %0, i64 %add, i64 4294967295
+ %conv2 = trunc i64 %select to i32
+ ret i32 %conv2
+}
+
+define i32 @uadd_sat32_min(i32 %a, i32 %b) {
+; CHECK-LABEL: define i32 @uadd_sat32_min(
+; CHECK-SAME: i32 [[A:%.*]], i32 [[B:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: [[CONV:%.*]] = zext i32 [[A]] to i64
+; CHECK-NEXT: [[CONV1:%.*]] = zext i32 [[B]] to i64
+; CHECK-NEXT: [[ADD:%.*]] = add nuw nsw i64 [[CONV1]], [[CONV]]
+; CHECK-NEXT: [[MIN:%.*]] = call i64 @llvm.umin.i64(i64 [[ADD]], i64 4294967295)
+; CHECK-NEXT: [[TMP0:%.*]] = trunc nuw i64 [[MIN]] to i32
+; CHECK-NEXT: ret i32 [[TMP0]]
+;
+entry:
+ %conv = zext i32 %a to i64
+ %conv1 = zext i32 %b to i64
+ %add = add i64 %conv1, %conv
+ %min = call i64 @llvm.umin.i64(i64 %add, i64 4294967295)
+ %conv2 = trunc i64 %min to i32
+ ret i32 %conv2
+}
+
+define i32 @usub_sat32(i32 %a, i32 %b) {
+; CHECK-LABEL: define i32 @usub_sat32(
+; CHECK-SAME: i32 [[A:%.*]], i32 [[B:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: [[CONV:%.*]] = zext i32 [[A]] to i64
+; CHECK-NEXT: [[CONV1:%.*]] = zext i32 [[B]] to i64
+; CHECK-NEXT: [[SUB:%.*]] = sub nsw i64 [[CONV]], [[CONV1]]
+; CHECK-NEXT: [[COND:%.*]] = call i64 @llvm.smin.i64(i64 [[SUB]], i64 4294967295)
+; CHECK-NEXT: [[COND11:%.*]] = call i64 @llvm.smax.i64(i64 [[COND]], i64 0)
+; CHECK-NEXT: [[TMP0:%.*]] = trunc i64 [[COND11]] to i32
+; CHECK-NEXT: ret i32 [[TMP0]]
+;
+entry:
+ %conv = zext i32 %a to i64
+ %conv1 = zext i32 %b to i64
+ %sub = sub i64 %conv, %conv1
+ %cmp4 = icmp sgt i64 %sub, 0
+ %cmp6 = icmp slt i64 %sub, 4294967295
+ %cond = select i1 %cmp6, i64 %sub, i64 4294967295
+ %cond11 = select i1 %cmp4, i64 %cond, i64 0
+ %conv12 = trunc i64 %cond11 to i32
+ ret i32 %conv12
+}
+
+define i32 @usub_sat32_minmax(i32 %a, i32 %b) {
+; CHECK-LABEL: define i32 @usub_sat32_minmax(
+; CHECK-SAME: i32 [[A:%.*]], i32 [[B:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: [[CONV:%.*]] = zext i32 [[A]] to i64
+; CHECK-NEXT: [[CONV1:%.*]] = zext i32 [[B]] to i64
+; CHECK-NEXT: [[SUB:%.*]] = sub nsw i64 [[CONV]], [[CONV1]]
+; CHECK-NEXT: [[COND:%.*]] = call i64 @llvm.smin.i64(i64 [[SUB]], i64 4294967295)
+; CHECK-NEXT: [[COND11:%.*]] = call i64 @llvm.smax.i64(i64 [[COND]], i64 0)
+; CHECK-NEXT: [[TMP0:%.*]] = trunc i64 [[COND11]] to i32
+; CHECK-NEXT: ret i32 [[TMP0]]
+;
+entry:
+ %conv = zext i32 %a to i64
+ %conv1 = zext i32 %b to i64
+ %sub = sub i64 %conv, %conv1
+ %cond = call i64 @llvm.smin.i64(i64 %sub, i64 4294967295)
+ %cond11 = call i64 @llvm.smax.i64(i64 %cond, i64 0)
+ %conv12 = trunc i64 %cond11 to i32
+ ret i32 %conv12
+}
+
+define i16 @uadd_sat16(i16 %a, i16 %b) {
+; CHECK-LABEL: define i16 @uadd_sat16(
+; CHECK-SAME: i16 [[A:%.*]], i16 [[B:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: [[CONV:%.*]] = zext i16 [[A]] to i32
+; CHECK-NEXT: [[CONV1:%.*]] = zext i16 [[B]] to i32
+; CHECK-NEXT: [[ADD:%.*]] = add nuw nsw i32 [[CONV1]], [[CONV]]
+; CHECK-NEXT: [[SELECT:%.*]] = call i32 @llvm.umin.i32(i32 [[ADD]], i32 65535)
+; CHECK-NEXT: [[TMP0:%.*]] = trunc nuw i32 [[SELECT]] to i16
+; CHECK-NEXT: ret i16 [[TMP0]]
+;
+entry:
+ %conv = zext i16 %a to i32
+ %conv1 = zext i16 %b to i32
+ %add = add i32 %conv1, %conv
+ %0 = icmp ult i32 %add, 65535
+ %select = select i1 %0, i32 %add, i32 65535
+ %conv2 = trunc i32 %select to i16
+ ret i16 %conv2
+}
+
+define i16 @uadd_sat16_min(i16 %a, i16 %b) {
+; CHECK-LABEL: define i16 @uadd_sat16_min(
+; CHECK-SAME: i16 [[A:%.*]], i16 [[B:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: [[CONV:%.*]] = zext i16 [[A]] to i32
+; CHECK-NEXT: [[CONV1:%.*]] = zext i16 [[B]] to i32
+; CHECK-NEXT: [[ADD:%.*]] = add nuw nsw i32 [[CONV1]], [[CONV]]
+; CHECK-NEXT: [[MIN:%.*]] = call i32 @llvm.umin.i32(i32 [[ADD]], i32 65535)
+; CHECK-NEXT: [[TMP0:%.*]] = trunc nuw i32 [[MIN]] to i16
+; CHECK-NEXT: ret i16 [[TMP0]]
+;
+entry:
+ %conv = zext i16 %a to i32
+ %conv1 = zext i16 %b to i32
+ %add = add i32 %conv1, %conv
+ %min = call i32 @llvm.umin.i32(i32 %add, i32 65535)
+ %conv2 = trunc i32 %min to i16
+ ret i16 %conv2
+}
+
+define i16 @usub_sat16(i16 %a, i16 %b) {
+; CHECK-LABEL: define i16 @usub_sat16(
+; CHECK-SAME: i16 [[A:%.*]], i16 [[B:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: [[CONV:%.*]] = zext i16 [[A]] to i32
+; CHECK-NEXT: [[CONV1:%.*]] = zext i16 [[B]] to i32
+; CHECK-NEXT: [[SUB:%.*]] = sub nsw i32 [[CONV]], [[CONV1]]
+; CHECK-NEXT: [[COND:%.*]] = call i32 @llvm.smin.i32(i32 [[SUB]], i32 65535)
+; CHECK-NEXT: [[COND11:%.*]] = call i32 @llvm.smax.i32(i32 [[COND]], i32 0)
+; CHECK-NEXT: [[TMP0:%.*]] = trunc i32 [[COND11]] to i16
+; CHECK-NEXT: ret i16 [[TMP0]]
+;
+entry:
+ %conv = zext i16 %a to i32
+ %conv1 = zext i16 %b to i32
+ %sub = sub i32 %conv, %conv1
+ %cmp4 = icmp sgt i32 %sub, 0
+ %cmp6 = icmp slt i32 %sub, 65535
+ %cond = select i1 %cmp6, i32 %sub, i32 65535
+ %cond11 = select i1 %cmp4, i32 %cond, i32 0
+ %conv12 = trunc i32 %cond11 to i16
+ ret i16 %conv12
+}
+
+define i16 @usub_sat16_minmax(i16 %a, i16 %b) {
+; CHECK-LABEL: define i16 @usub_sat16_minmax(
+; CHECK-SAME: i16 [[A:%.*]], i16 [[B:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: [[CONV:%.*]] = zext i16 [[A]] to i32
+; CHECK-NEXT: [[CONV1:%.*]] = zext i16 [[B]] to i32
+; CHECK-NEXT: [[SUB:%.*]] = sub nsw i32 [[CONV]], [[CONV1]]
+; CHECK-NEXT: [[COND:%.*]] = call i32 @llvm.smin.i32(i32 [[SUB]], i32 65535)
+; CHECK-NEXT: [[COND11:%.*]] = call i32 @llvm.smax.i32(i32 [[COND]], i32 0)
+; CHECK-NEXT: [[TMP0:%.*]] = trunc i32 [[COND11]] to i16
+; CHECK-NEXT: ret i16 [[TMP0]]
+;
+entry:
+ %conv = zext i16 %a to i32
+ %conv1 = zext i16 %b to i32
+ %sub = sub i32 %conv, %conv1
+ %cond = call i32 @llvm.smin.i32(i32 %sub, i32 65535)
+ %cond11 = call i32 @llvm.smax.i32(i32 %cond, i32 0)
+ %conv12 = trunc i32 %cond11 to i16
+ ret i16 %conv12
+}
+
+define i8 @uadd_sat8(i8 %a, i8 %b) {
+; CHECK-LABEL: define i8 @uadd_sat8(
+; CHECK-SAME: i8 [[A:%.*]], i8 [[B:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: [[CONV:%.*]] = zext i8 [[A]] to i32
+; CHECK-NEXT: [[CONV1:%.*]] = zext i8 [[B]] to i32
+; CHECK-NEXT: [[ADD:%.*]] = add nuw nsw i32 [[CONV1]], [[CONV]]
+; CHECK-NEXT: [[SELECT:%.*]] = call i32 @llvm.umin.i32(i32 [[ADD]], i32 255)
+; CHECK-NEXT: [[TMP0:%.*]] = trunc nuw i32 [[SELECT]] to i8
+; CHECK-NEXT: ret i8 [[TMP0]]
+;
+entry:
+ %conv = zext i8 %a to i32
+ %conv1 = zext i8 %b to i32
+ %add = add i32 %conv1, %conv
+ %0 = icmp ult i32 %add, 255
+ %select = select i1 %0, i32 %add, i32 255
+ %conv2 = trunc i32 %select to i8
+ ret i8 %conv2
+}
+
+define i8 @uadd_sat8_min(i8 %a, i8 %b) {
+; CHECK-LABEL: define i8 @uadd_sat8_min(
+; CHECK-SAME: i8 [[A:%.*]], i8 [[B:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: [[CONV:%.*]] = zext i8 [[A]] to i32
+; CHECK-NEXT: [[CONV1:%.*]] = zext i8 [[B]] to i32
+; CHECK-NEXT: [[ADD:%.*]] = add nuw nsw i32 [[CONV1]], [[CONV]]
+; CHECK-NEXT: [[MIN:%.*]] = call i32 @llvm.umin.i32(i32 [[ADD]], i32 255)
+; CHECK-NEXT: [[TMP0:%.*]] = trunc nuw i32 [[MIN]] to i8
+; CHECK-NEXT: ret i8 [[TMP0]]
+;
+entry:
+ %conv = zext i8 %a to i32
+ %conv1 = zext i8 %b to i32
+ %add = add i32 %conv1, %conv
+ %min = call i32 @llvm.umin.i32(i32 %add, i32 255)
+ %conv2 = trunc i32 %min to i8
+ ret i8 %conv2
+}
+
+define i8 @usub_sat8(i8 %a, i8 %b) {
+; CHECK-LABEL: define i8 @usub_sat8(
+; CHECK-SAME: i8 [[A:%.*]], i8 [[B:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: [[CONV:%.*]] = zext i8 [[A]] to i32
+; CHECK-NEXT: [[CONV1:%.*]] = zext i8 [[B]] to i32
+; CHECK-NEXT: [[SUB:%.*]] = sub nsw i32 [[CONV]], [[CONV1]]
+; CHECK-NEXT: [[COND:%.*]] = call i32 @llvm.smin.i32(i32 [[SUB]], i32 255)
+; CHECK-NEXT: [[COND11:%.*]] = call i32 @llvm.smax.i32(i32 [[COND]], i32 0)
+; CHECK-NEXT: [[TMP0:%.*]] = trunc i32 [[COND11]] to i8
+; CHECK-NEXT: ret i8 [[TMP0]]
+;
+entry:
+ %conv = zext i8 %a to i32
+ %conv1 = zext i8 %b to i32
+ %sub = sub i32 %conv, %conv1
+ %cmp4 = icmp sgt i32 %sub, 0
+ %cmp6 = icmp slt i32 %sub, 255
+ %cond = select i1 %cmp6, i32 %sub, i32 255
+ %cond11 = select i1 %cmp4, i32 %cond, i32 0
+ %conv12 = trunc i32 %cond11 to i8
+ ret i8 %conv12
+}
+
+define i8 @usub_sat8_minmax(i8 %a, i8 %b) {
+; CHECK-LABEL: define i8 @usub_sat8_minmax(
+; CHECK-SAME: i8 [[A:%.*]], i8 [[B:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: [[CONV:%.*]] = zext i8 [[A]] to i32
+; CHECK-NEXT: [[CONV1:%.*]] = zext i8 [[B]] to i32
+; CHECK-NEXT: [[SUB:%.*]] = sub nsw i32 [[CONV]], [[CONV1]]
+; CHECK-NEXT: [[COND:%.*]] = call i32 @llvm.smin.i32(i32 [[SUB]], i32 255)
+; CHECK-NEXT: [[COND11:%.*]] = call i32 @llvm.smax.i32(i32 [[COND]], i32 0)
+; CHECK-NEXT: [[TMP0:%.*]] = trunc i32 [[COND11]] to i8
+; CHECK-NEXT: ret i8 [[TMP0]]
+;
+entry:
+ %conv = zext i8 %a to i32
+ %conv1 = zext i8 %b to i32
+ %sub = sub i32 %conv, %conv1
+ %cond = call i32 @llvm.smin.i32(i32 %sub, i32 255)
+ %cond11 = call i32 @llvm.smax.i32(i32 %cond, i32 0)
+ %conv12 = trunc i32 %cond11 to i8
+ ret i8 %conv12
+}
+
+define i64 @uadd_sat64(i64 %a, i64 %b) {
+; CHECK-LABEL: define i64 @uadd_sat64(
+; CHECK-SAME: i64 [[A:%.*]], i64 [[B:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: [[CONV:%.*]] = zext i64 [[A]] to i65
+; CHECK-NEXT: [[CONV1:%.*]] = zext i64 [[B]] to i65
+; CHECK-NEXT: [[ADD:%.*]] = add nuw i65 [[CONV1]], [[CONV]]
+; CHECK-NEXT: [[SELECT:%.*]] = call i65 @llvm.umin.i65(i65 [[ADD]], i65 18446744073709551615)
+; CHECK-NEXT: [[TMP0:%.*]] = trunc nuw i65 [[SELECT]] to i64
+; CHECK-NEXT: ret i64 [[TMP0]]
+;
+entry:
+ %conv = zext i64 %a to i65
+ %conv1 = zext i64 %b to i65
+ %add = add i65 %conv1, %conv
+ %0 = icmp ult i65 %add, 18446744073709551615
+ %select = select i1 %0, i65 %add, i65 18446744073709551615
+ %conv2 = trunc i65 %select to i64
+ ret i64 %conv2
+}
+
+define i64 @usub_sat64(i64 %a, i64 %b) {
+; CHECK-LABEL: define i64 @usub_sat64(
+; CHECK-SAME: i64 [[A:%.*]], i64 [[B:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: [[CONV:%.*]] = zext i64 [[A]] to i128
+; CHECK-NEXT: [[CONV1:%.*]] = zext i64 [[B]] to i128
+; CHECK-NEXT: [[SUB:%.*]] = sub nsw i128 [[CONV]], [[CONV1]]
+; CHECK-NEXT: [[COND:%.*]] = call i128 @llvm.smin.i128(i128 [[SUB]], i128 18446744073709551615)
+; CHECK-NEXT: [[COND11:%.*]] = call i128 @llvm.smax.i128(i128 [[COND]], i128 0)
+; CHECK-NEXT: [[TMP0:%.*]] = trunc i128 [[COND11]] to i64
+; CHECK-NEXT: ret i64 [[TMP0]]
+;
+entry:
+ %conv = zext i64 %a to i128
+ %conv1 = zext i64 %b to i128
+ %sub = sub i128 %conv, %conv1
+ %cmp4 = icmp sgt i128 %sub, 0
+ %cmp6 = icmp slt i128 %sub, 18446744073709551615
+ %cond = select i1 %cmp6, i128 %sub, i128 18446744073709551615
+ %cond11 = select i1 %cmp4, i128 %cond, i128 0
+ %conv12 = trunc i128 %cond11 to i64
+ ret i64 %conv12
+}
+
+define <4 x i32> @uadd_satv4i32(<4 x i32> %a, <4 x i32> %b) {
+; CHECK-LABEL: define <4 x i32> @uadd_satv4i32(
+; CHECK-SAME: <4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: [[CONV:%.*]] = zext <4 x i32> [[A]] to <4 x i64>
+; CHECK-NEXT: [[CONV1:%.*]] = zext <4 x i32> [[B]] to <4 x i64>
+; CHECK-NEXT: [[ADD:%.*]] = add nuw nsw <4 x i64> [[CONV1]], [[CONV]]
+; CHECK-NEXT: [[SELECT:%.*]] = call <4 x i64> @llvm.umin.v4i64(<4 x i64> [[ADD]], <4 x i64> <i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295>)
+; CHECK-NEXT: [[TMP0:%.*]] = trunc nuw <4 x i64> [[SELECT]] to <4 x i32>
+; CHECK-NEXT: ret <4 x i32> [[TMP0]]
+;
+entry:
+ %conv = zext <4 x i32> %a to <4 x i64>
+ %conv1 = zext <4 x i32> %b to <4 x i64>
+ %add = add <4 x i64> %conv1, %conv
+ %0 = icmp ult <4 x i64> %add, <i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295>
+ %select = select <4 x i1> %0, <4 x i64> %add, <4 x i64> <i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295>
+ %conv7 = trunc <4 x i64> %select to <4 x i32>
+ ret <4 x i32> %conv7
+}
+
+define <8 x i16> @uadd_satv8i16_minmax(<8 x i16> %a, <8 x i16> %b) {
+; CHECK-LABEL: define <8 x i16> @uadd_satv8i16_minmax(
+; CHECK-SAME: <8 x i16> [[A:%.*]], <8 x i16> [[B:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: [[CONV:%.*]] = zext <8 x i16> [[A]] to <8 x i32>
+; CHECK-NEXT: [[CONV1:%.*]] = zext <8 x i16> [[B]] to <8 x i32>
+; CHECK-NEXT: [[ADD:%.*]] = add nuw nsw <8 x i32> [[CONV1]], [[CONV]]
+; CHECK-NEXT: [[SELECT:%.*]] = call <8 x i32> @llvm.umin.v8i32(<8 x i32> [[ADD]], <8 x i32> <i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535>)
+; CHECK-NEXT: [[TMP0:%.*]] = trunc nuw <8 x i32> [[SELECT]] to <8 x i16>
+; CHECK-NEXT: ret <8 x i16> [[TMP0]]
+;
+entry:
+ %conv = zext <8 x i16> %a to <8 x i32>
+ %conv1 = zext <8 x i16> %b to <8 x i32>
+ %add = add <8 x i32> %conv1, %conv
+ %select = call <8 x i32> @llvm.umin.v8i32(<8 x i32> %add, <8 x i32> <i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535>)
+ %conv7 = trunc <8 x i32> %select to <8 x i16>
+ ret <8 x i16> %conv7
+}
+
+define <16 x i8> @usub_satv16i8(<16 x i8> %a, <16 x i8> %b) {
+; CHECK-LABEL: define <16 x i8> @usub_satv16i8(
+; CHECK-SAME: <16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: [[CONV:%.*]] = zext <16 x i8> [[A]] to <16 x i32>
+; CHECK-NEXT: [[CONV1:%.*]] = zext <16 x i8> [[B]] to <16 x i32>
+; CHECK-NEXT: [[SUB:%.*]] = sub nsw <16 x i32> [[CONV1]], [[CONV]]
+; CHECK-NEXT: [[SELECT:%.*]] = call <16 x i32> @llvm.smin.v16i32(<16 x i32> [[SUB]], <16 x i32> <i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255>)
+; CHECK-NEXT: [[SELECT8:%.*]] = call <16 x i32> @llvm.smax.v16i32(<16 x i32> [[SELECT]], <16 x i32> zeroinitializer)
+; CHECK-NEXT: [[TMP0:%.*]] = trunc <16 x i32> [[SELECT8]] to <16 x i8>
+; CHECK-NEXT: ret <16 x i8> [[TMP0]]
+;
+entry:
+ %conv = zext <16 x i8> %a to <16 x i32>
+ %conv1 = zext <16 x i8> %b to <16 x i32>
+ %sub = sub <16 x i32> %conv1, %conv
+ %0 = icmp slt <16 x i32> %sub, <i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255>
+ %select = select <16 x i1> %0, <16 x i32> %sub, <16 x i32> <i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255>
+ %1 = icmp sgt <16 x i32> %select, <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+ %select8 = select <16 x i1> %1, <16 x i32> %select, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+ %conv7 = trunc <16 x i32> %select8 to <16 x i8>
+ ret <16 x i8> %conv7
+}
+
+define <2 x i64> @usub_satv2i64_minmax(<2 x i64> %a, <2 x i64> %b) {
+; CHECK-LABEL: define <2 x i64> @usub_satv2i64_minmax(
+; CHECK-SAME: <2 x i64> [[A:%.*]], <2 x i64> [[B:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: [[CONV:%.*]] = zext <2 x i64> [[A]] to <2 x i128>
+; CHECK-NEXT: [[CONV1:%.*]] = zext <2 x i64> [[B]] to <2 x i128>
+; CHECK-NEXT: [[SUB:%.*]] = sub nsw <2 x i128> [[CONV1]], [[CONV]]
+; CHECK-NEXT: [[SELECT:%.*]] = call <2 x i128> @llvm.smin.v2i128(<2 x i128> [[SUB]], <2 x i128> <i128 18446744073709551615, i128 18446744073709551615>)
+; CHECK-NEXT: [[SELECT8:%.*]] = call <2 x i128> @llvm.smax.v2i128(<2 x i128> [[SELECT]], <2 x i128> zeroinitializer)
+; CHECK-NEXT: [[TMP0:%.*]] = trunc <2 x i128> [[SELECT8]] to <2 x i64>
+; CHECK-NEXT: ret <2 x i64> [[TMP0]]
+;
+entry:
+ %conv = zext <2 x i64> %a to <2 x i128>
+ %conv1 = zext <2 x i64> %b to <2 x i128>
+ %sub = sub <2 x i128> %conv1, %conv
+ %select = call <2 x i128> @llvm.smin.v2i128(<2 x i128> %sub, <2 x i128> <i128 18446744073709551615, i128 18446744073709551615>)
+ %select8 = call <2 x i128> @llvm.smax.v2i128(<2 x i128> %select, <2 x i128> <i128 0, i128 0>)
+ %conv7 = trunc <2 x i128> %select8 to <2 x i64>
+ ret <2 x i64> %conv7
+}
+
+define i32 @uadd_sat32_extra_use_1(i32 %a, i32 %b) {
+; CHECK-LABEL: define i32 @uadd_sat32_extra_use_1(
+; CHECK-SAME: i32 [[A:%.*]], i32 [[B:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: [[CONV:%.*]] = zext i32 [[A]] to i64
+; CHECK-NEXT: [[CONV1:%.*]] = zext i32 [[B]] to i64
+; CHECK-NEXT: [[ADD:%.*]] = add nuw nsw i64 [[CONV1]], [[CONV]]
+; CHECK-NEXT: [[SELECT:%.*]] = call i64 @llvm.umin.i64(i64 [[ADD]], i64 4294967295)
+; CHECK-NEXT: [[TMP0:%.*]] = trunc nuw i64 [[SELECT]] to i32
+; CHECK-NEXT: call void @use64(i64 [[SELECT]])
+; CHECK-NEXT: ret i32 [[TMP0]]
+;
+entry:
+ %conv = zext i32 %a to i64
+ %conv1 = zext i32 %b to i64
+ %add = add i64 %conv1, %conv
+ %0 = icmp ult i64 %add, 4294967295
+ %select = select i1 %0, i64 %add, i64 4294967295
+ %conv7 = trunc i64 %select to i32
+ call void @use64(i64 %select)
+ ret i32 %conv7
+}
+
+define i32 @uadd_sat32_extra_use_2(i32 %a, i32 %b) {
+; CHECK-LABEL: define i32 @uadd_sat32_extra_use_2(
+; CHECK-SAME: i32 [[A:%.*]], i32 [[B:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: [[CONV:%.*]] = zext i32 [[A]] to i64
+; CHECK-NEXT: [[CONV1:%.*]] = zext i32 [[B]] to i64
+; CHECK-NEXT: [[ADD:%.*]] = add nuw nsw i64 [[CONV1]], [[CONV]]
+; CHECK-NEXT: [[SELECT:%.*]] = call i64 @llvm.umin.i64(i64 [[ADD]], i64 4294967295)
+; CHECK-NEXT: [[CONV7:%.*]] = trunc nuw i64 [[SELECT]] to i32
+; CHECK-NEXT: call void @use64(i64 [[ADD]])
+; CHECK-NEXT: ret i32 [[CONV7]]
+;
+entry:
+ %conv = zext i32 %a to i64
+ %conv1 = zext i32 %b to i64
+ %add = add i64 %conv1, %conv
+ %0 = icmp ult i64 %add, 4294967295
+ %select = select i1 %0, i64 %add, i64 4294967295
+ %conv7 = trunc i64 %select to i32
+ call void @use64(i64 %add)
+ ret i32 %conv7
+}
+
+define i32 @usub_sat32_extra_use_3(i32 %a, i32 %b) {
+; CHECK-LABEL: define i32 @usub_sat32_extra_use_3(
+; CHECK-SAME: i32 [[A:%.*]], i32 [[B:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: [[CONV:%.*]] = zext i32 [[A]] to i64
+; CHECK-NEXT: [[CONV1:%.*]] = zext i32 [[B]] to i64
+; CHECK-NEXT: [[SUB:%.*]] = sub nsw i64 [[CONV]], [[CONV1]]
+; CHECK-NEXT: [[COND:%.*]] = call i64 @llvm.smin.i64(i64 [[SUB]], i64 4294967295)
+; CHECK-NEXT: [[COND11:%.*]] = call i64 @llvm.smax.i64(i64 [[COND]], i64 0)
+; CHECK-NEXT: [[TMP0:%.*]] = trunc i64 [[COND11]] to i32
+; CHECK-NEXT: call void @use64(i64 [[COND]])
+; CHECK-NEXT: ret i32 [[TMP0]]
+;
+entry:
+ %conv = zext i32 %a to i64
+ %conv1 = zext i32 %b to i64
+ %sub = sub i64 %conv, %conv1
+ %cmp4 = icmp sgt i64 %sub, 0
+ %cmp6 = icmp slt i64 %sub, 4294967295
+ %cond = select i1 %cmp6, i64 %sub, i64 4294967295
+ %cond11 = select i1 %cmp4, i64 %cond, i64 0
+ %conv12 = trunc i64 %cond11 to i32
+ call void @use64(i64 %cond)
+ ret i32 %conv12
+}
+
+define i32 @usub_sat32_minmax_extra_use_4(i32 %a, i32 %b) {
+; CHECK-LABEL: define i32 @usub_sat32_minmax_extra_use_4(
+; CHECK-SAME: i32 [[A:%.*]], i32 [[B:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: [[CONV:%.*]] = zext i32 [[A]] to i64
+; CHECK-NEXT: [[CONV1:%.*]] = zext i32 [[B]] to i64
+; CHECK-NEXT: [[SUB:%.*]] = sub nsw i64 [[CONV]], [[CONV1]]
+; CHECK-NEXT: [[COND:%.*]] = call i64 @llvm.smin.i64(i64 [[SUB]], i64 4294967295)
+; CHECK-NEXT: [[COND11:%.*]] = call i64 @llvm.smax.i64(i64 [[COND]], i64 0)
+; CHECK-NEXT: [[CONV12:%.*]] = trunc i64 [[COND11]] to i32
+; CHECK-NEXT: call void @use64(i64 [[COND]])
+; CHECK-NEXT: ret i32 [[CONV12]]
+;
+entry:
+ %conv = zext i32 %a to i64
+ %conv1 = zext i32 %b to i64
+ %sub = sub i64 %conv, %conv1
+ %cond = call i64 @llvm.smin.i64(i64 %sub, i64 4294967295)
+ %cond11 = call i64 @llvm.smax.i64(i64 %cond, i64 0)
+ %conv12 = trunc i64 %cond11 to i32
+ call void @use64(i64 %cond)
+ ret i32 %conv12
+}
+
+define i32 @usub_sat32_sext(i32 %a, i32 %b) {
+; CHECK-LABEL: define i32 @usub_sat32_sext(
+; CHECK-SAME: i32 [[A:%.*]], i32 [[B:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: [[CONV:%.*]] = sext i32 [[A]] to i64
+; CHECK-NEXT: [[CONV1:%.*]] = sext i32 [[B]] to i64
+; CHECK-NEXT: [[SUB:%.*]] = sub nsw i64 [[CONV]], [[CONV1]]
+; CHECK-NEXT: [[COND:%.*]] = call i64 @llvm.smin.i64(i64 [[SUB]], i64 4294967295)
+; CHECK-NEXT: [[COND11:%.*]] = call i64 @llvm.smax.i64(i64 [[COND]], i64 0)
+; CHECK-NEXT: [[CONV12:%.*]] = trunc i64 [[COND11]] to i32
+; CHECK-NEXT: ret i32 [[CONV12]]
+;
+entry:
+ %conv = sext i32 %a to i64
+ %conv1 = sext i32 %b to i64
+ %sub = sub i64 %conv, %conv1
+ %cmp4 = icmp sgt i64 %sub, 0
+ %cmp6 = icmp slt i64 %sub, 4294967295
+ %cond = select i1 %cmp6, i64 %sub, i64 4294967295
+ %cond11 = select i1 %cmp4, i64 %cond, i64 0
+ %conv12 = trunc i64 %cond11 to i32
+ ret i32 %conv12
+}
+
+define i32 @usub_sat32_maxmin(i32 %a, i32 %b) {
+; CHECK-LABEL: define i32 @usub_sat32_maxmin(
+; CHECK-SAME: i32 [[A:%.*]], i32 [[B:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: [[CONV:%.*]] = zext i32 [[A]] to i64
+; CHECK-NEXT: [[CONV1:%.*]] = zext i32 [[B]] to i64
+; CHECK-NEXT: [[SUB:%.*]] = sub nsw i64 [[CONV]], [[CONV1]]
+; CHECK-NEXT: [[COND:%.*]] = call i64 @llvm.smax.i64(i64 [[SUB]], i64 0)
+; CHECK-NEXT: [[COND11:%.*]] = call i64 @llvm.smin.i64(i64 [[COND]], i64 4294967295)
+; CHECK-NEXT: [[CONV12:%.*]] = trunc nuw i64 [[COND11]] to i32
+; CHECK-NEXT: ret i32 [[CONV12]]
+;
+entry:
+ %conv = zext i32 %a to i64
+ %conv1 = zext i32 %b to i64
+ %sub = sub i64 %conv, %conv1
+ %cmp4 = icmp sgt i64 %sub, 0
+ %cmp6 = icmp slt i64 %sub, 4294967295
+ %cond = select i1 %cmp4, i64 %sub, i64 0
+ %cond11 = select i1 %cmp6, i64 %cond, i64 4294967295
+ %conv12 = trunc i64 %cond11 to i32
+ ret i32 %conv12
+}
+
+define i64 @uadd_sat32_no_trunc(i32 %a, i32 %b) {
+; CHECK-LABEL: define i64 @uadd_sat32_no_trunc(
+; CHECK-SAME: i32 [[A:%.*]], i32 [[B:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: [[CONV:%.*]] = zext i32 [[A]] to i64
+; CHECK-NEXT: [[CONV1:%.*]] = zext i32 [[B]] to i64
+; CHECK-NEXT: [[ADD:%.*]] = add nuw nsw i64 [[CONV1]], [[CONV]]
+; CHECK-NEXT: [[SELECT:%.*]] = call i64 @llvm.umin.i64(i64 [[ADD]], i64 4294967295)
+; CHECK-NEXT: ret i64 [[SELECT]]
+;
+entry:
+ %conv = zext i32 %a to i64
+ %conv1 = zext i32 %b to i64
+ %add = add i64 %conv1, %conv
+ %0 = icmp ult i64 %add, 4294967295
+ %select = select i1 %0, i64 %add, i64 4294967295
+ ret i64 %select
+}
+
+define i8 @const(i8 %X) {
+; CHECK-LABEL: define i8 @const(
+; CHECK-SAME: i8 [[X:%.*]]) {
+; CHECK-NEXT: [[CONV10:%.*]] = zext i8 [[X]] to i16
+; CHECK-NEXT: [[TMP1:%.*]] = call i16 @llvm.smax.i16(i16 [[CONV10]], i16 10)
+; CHECK-NEXT: [[TMP2:%.*]] = trunc nuw i16 [[TMP1]] to i8
+; CHECK-NEXT: [[CONV:%.*]] = add i8 [[TMP2]], -10
+; CHECK-NEXT: ret i8 [[CONV]]
+;
+ %conv10 = zext i8 %X to i16
+ %sub = sub i16 %conv10, 10
+ %l9 = icmp slt i16 %sub, 255
+ %l10 = select i1 %l9, i16 %sub, i16 255
+ %l11 = icmp sgt i16 %sub, 0
+ %l12 = select i1 %l11, i16 %l10, i16 0
+ %conv = trunc i16 %l12 to i8
+ ret i8 %conv
+}
+
+declare void @use64(i64)
+declare i64 @llvm.umin.i64(i64, i64)
+declare i64 @llvm.smin.i64(i64, i64)
+declare i64 @llvm.smax.i64(i64, i64)
+declare i32 @llvm.umin.i32(i32, i32)
+declare i32 @llvm.smin.i32(i32, i32)
+declare i32 @llvm.smax.i32(i32, i32)
+declare <8 x i32> @llvm.umin.v8i32(<8 x i32>, <8 x i32>)
+declare <2 x i128> @llvm.smin.v2i128(<2 x i128>, <2 x i128>)
+declare <2 x i128> @llvm.smax.v2i128(<2 x i128>, <2 x i128>)
>From 501093767750bc313f9ae28ca1cdfa3255ddaebf Mon Sep 17 00:00:00 2001
From: Huihui Zhang <huihuiz at quicinc.com>
Date: Tue, 16 Jul 2024 15:53:58 -0700
Subject: [PATCH 2/4] [InstCombine] Pattern match minmax calls for unsigned
saturation.
This patch matches the following patterns for unsigned saturation:
1) fold smax(UINT_MIN, smin(UINT_MAX, sub(zext(A), zext(B)))) into usub_sat,
where smin smax could be reversed.
2) fold umin(UINT_MAX, add(zext(A), zext(B))) into uadd_sat.
Note that this patch extends the signed saturation (sadd|ssub_sat) pattern
matching from D68651.
---
.../InstCombine/InstCombineCalls.cpp | 57 +++++---
.../InstCombine/InstCombineInternal.h | 2 +-
.../Transforms/InstCombine/uaddsub_sat.ll | 137 +++---------------
3 files changed, 63 insertions(+), 133 deletions(-)
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
index 467b291f9a4c3..bbb2f994e1aea 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -1117,16 +1117,22 @@ static Instruction *moveAddAfterMinMax(IntrinsicInst *II,
return IsSigned ? BinaryOperator::CreateNSWAdd(NewMinMax, Add->getOperand(1))
: BinaryOperator::CreateNUWAdd(NewMinMax, Add->getOperand(1));
}
-/// Match a sadd_sat or ssub_sat which is using min/max to clamp the value.
-Instruction *InstCombinerImpl::matchSAddSubSat(IntrinsicInst &MinMax1) {
+/// Match a [s|u]add_sat or [s|u]sub_sat which is using min/max to clamp the
+/// value.
+Instruction *InstCombinerImpl::matchAddSubSat(IntrinsicInst &MinMax1) {
Type *Ty = MinMax1.getType();
- // We are looking for a tree of:
- // max(INT_MIN, min(INT_MAX, add(sext(A), sext(B))))
- // Where the min and max could be reversed
- Instruction *MinMax2;
+ // 1. We are looking for a tree of signed saturation:
+ // smax(SINT_MIN, smin(SINT_MAX, add|sub(sext(A), sext(B))))
+ // Where the smin and smax could be reversed.
+ // 2. A tree of unsigned saturation:
+ // smax(UINT_MIN, smin(UINT_MAX, sub(zext(A), zext(B))))
+ // Where the smin and smax could be reversed.
+ // Or umin(UINT_MAX, add(zext(A), zext(B)))
+ Instruction *MinMax2 = nullptr;
BinaryOperator *AddSub;
const APInt *MinValue, *MaxValue;
+ bool IsUnsignedSaturate = false;
if (match(&MinMax1, m_SMin(m_Instruction(MinMax2), m_APInt(MaxValue)))) {
if (!match(MinMax2, m_SMax(m_BinOp(AddSub), m_APInt(MinValue))))
return nullptr;
@@ -1134,22 +1140,29 @@ Instruction *InstCombinerImpl::matchSAddSubSat(IntrinsicInst &MinMax1) {
m_SMax(m_Instruction(MinMax2), m_APInt(MinValue)))) {
if (!match(MinMax2, m_SMin(m_BinOp(AddSub), m_APInt(MaxValue))))
return nullptr;
+ } else if (match(&MinMax1, m_UMin(m_BinOp(AddSub), m_APInt(MaxValue)))) {
+ IsUnsignedSaturate = true;
} else
return nullptr;
+ if (!IsUnsignedSaturate && MinValue && MinValue->isZero())
+ IsUnsignedSaturate = true;
+
// Check that the constants clamp a saturate, and that the new type would be
// sensible to convert to.
- if (!(*MaxValue + 1).isPowerOf2() || -*MinValue != *MaxValue + 1)
+ if (!(*MaxValue + 1).isPowerOf2() ||
+ (!IsUnsignedSaturate && -*MinValue != *MaxValue + 1))
return nullptr;
// In what bitwidth can this be treated as saturating arithmetics?
- unsigned NewBitWidth = (*MaxValue + 1).logBase2() + 1;
+ unsigned NewBitWidth =
+ (*MaxValue + 1).logBase2() + (IsUnsignedSaturate ? 0 : 1);
// FIXME: This isn't quite right for vectors, but using the scalar type is a
// good first approximation for what should be done there.
if (!shouldChangeType(Ty->getScalarType()->getIntegerBitWidth(), NewBitWidth))
return nullptr;
// Also make sure that the inner min/max and the add/sub have one use.
- if (!MinMax2->hasOneUse() || !AddSub->hasOneUse())
+ if ((MinMax2 && !MinMax2->hasOneUse()) || !AddSub->hasOneUse())
return nullptr;
// Create the new type (which can be a vector type)
@@ -1157,17 +1170,25 @@ Instruction *InstCombinerImpl::matchSAddSubSat(IntrinsicInst &MinMax1) {
Intrinsic::ID IntrinsicID;
if (AddSub->getOpcode() == Instruction::Add)
- IntrinsicID = Intrinsic::sadd_sat;
+ IntrinsicID =
+ IsUnsignedSaturate ? Intrinsic::uadd_sat : Intrinsic::sadd_sat;
else if (AddSub->getOpcode() == Instruction::Sub)
- IntrinsicID = Intrinsic::ssub_sat;
+ IntrinsicID =
+ IsUnsignedSaturate ? Intrinsic::usub_sat : Intrinsic::ssub_sat;
else
return nullptr;
// The two operands of the add/sub must be nsw-truncatable to the NewTy. This
// is usually achieved via a sext from a smaller type.
- if (ComputeMaxSignificantBits(AddSub->getOperand(0), 0, AddSub) >
- NewBitWidth ||
- ComputeMaxSignificantBits(AddSub->getOperand(1), 0, AddSub) > NewBitWidth)
+ Value *Op0 = AddSub->getOperand(0);
+ Value *Op1 = AddSub->getOperand(1);
+ unsigned Op0MaxBitWidth =
+ IsUnsignedSaturate ? computeKnownBits(Op0, 0, AddSub).countMaxActiveBits()
+ : ComputeMaxSignificantBits(Op0, 0, AddSub);
+ unsigned Op1MaxBitWidth =
+ IsUnsignedSaturate ? computeKnownBits(Op1, 0, AddSub).countMaxActiveBits()
+ : ComputeMaxSignificantBits(Op1, 0, AddSub);
+ if (Op0MaxBitWidth > NewBitWidth || Op1MaxBitWidth > NewBitWidth)
return nullptr;
// Finally create and return the sat intrinsic, truncated to the new type
@@ -1175,10 +1196,10 @@ Instruction *InstCombinerImpl::matchSAddSubSat(IntrinsicInst &MinMax1) {
Value *AT = Builder.CreateTrunc(AddSub->getOperand(0), NewTy);
Value *BT = Builder.CreateTrunc(AddSub->getOperand(1), NewTy);
Value *Sat = Builder.CreateCall(F, {AT, BT});
- return CastInst::Create(Instruction::SExt, Sat, Ty);
+ return CastInst::Create(
+ IsUnsignedSaturate ? Instruction::ZExt : Instruction::SExt, Sat, Ty);
}
-
/// If we have a clamp pattern like max (min X, 42), 41 -- where the output
/// can only be one of two possible constant values -- turn that into a select
/// of constants.
@@ -1878,8 +1899,8 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) {
if (Instruction *Sel = foldClampRangeOfTwo(II, Builder))
return Sel;
- if (Instruction *SAdd = matchSAddSubSat(*II))
- return SAdd;
+ if (Instruction *AddSubSat = matchAddSubSat(*II))
+ return AddSubSat;
if (Value *NewMinMax = reassociateMinMaxWithConstants(II, Builder, SQ))
return replaceInstUsesWith(*II, NewMinMax);
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineInternal.h b/llvm/lib/Transforms/InstCombine/InstCombineInternal.h
index 64fbcc80e0edf..b76d71da230a7 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineInternal.h
+++ b/llvm/lib/Transforms/InstCombine/InstCombineInternal.h
@@ -392,7 +392,7 @@ class LLVM_LIBRARY_VISIBILITY InstCombinerImpl final
Instruction *narrowMathIfNoOverflow(BinaryOperator &I);
Instruction *narrowFunnelShift(TruncInst &Trunc);
Instruction *optimizeBitCastFromPhi(CastInst &CI, PHINode *PN);
- Instruction *matchSAddSubSat(IntrinsicInst &MinMax1);
+ Instruction *matchAddSubSat(IntrinsicInst &MinMax1);
Instruction *foldNot(BinaryOperator &I);
Instruction *foldBinOpOfDisplacedShifts(BinaryOperator &I);
diff --git a/llvm/test/Transforms/InstCombine/uaddsub_sat.ll b/llvm/test/Transforms/InstCombine/uaddsub_sat.ll
index 362345a15949b..469a651ef9a93 100644
--- a/llvm/test/Transforms/InstCombine/uaddsub_sat.ll
+++ b/llvm/test/Transforms/InstCombine/uaddsub_sat.ll
@@ -7,11 +7,7 @@ define i32 @uadd_sat32(i32 %a, i32 %b) {
; CHECK-LABEL: define i32 @uadd_sat32(
; CHECK-SAME: i32 [[A:%.*]], i32 [[B:%.*]]) {
; CHECK-NEXT: [[ENTRY:.*:]]
-; CHECK-NEXT: [[CONV:%.*]] = zext i32 [[A]] to i64
-; CHECK-NEXT: [[CONV1:%.*]] = zext i32 [[B]] to i64
-; CHECK-NEXT: [[ADD:%.*]] = add nuw nsw i64 [[CONV1]], [[CONV]]
-; CHECK-NEXT: [[SELECT:%.*]] = call i64 @llvm.umin.i64(i64 [[ADD]], i64 4294967295)
-; CHECK-NEXT: [[TMP0:%.*]] = trunc nuw i64 [[SELECT]] to i32
+; CHECK-NEXT: [[TMP0:%.*]] = call i32 @llvm.uadd.sat.i32(i32 [[B]], i32 [[A]])
; CHECK-NEXT: ret i32 [[TMP0]]
;
entry:
@@ -28,11 +24,7 @@ define i32 @uadd_sat32_min(i32 %a, i32 %b) {
; CHECK-LABEL: define i32 @uadd_sat32_min(
; CHECK-SAME: i32 [[A:%.*]], i32 [[B:%.*]]) {
; CHECK-NEXT: [[ENTRY:.*:]]
-; CHECK-NEXT: [[CONV:%.*]] = zext i32 [[A]] to i64
-; CHECK-NEXT: [[CONV1:%.*]] = zext i32 [[B]] to i64
-; CHECK-NEXT: [[ADD:%.*]] = add nuw nsw i64 [[CONV1]], [[CONV]]
-; CHECK-NEXT: [[MIN:%.*]] = call i64 @llvm.umin.i64(i64 [[ADD]], i64 4294967295)
-; CHECK-NEXT: [[TMP0:%.*]] = trunc nuw i64 [[MIN]] to i32
+; CHECK-NEXT: [[TMP0:%.*]] = call i32 @llvm.uadd.sat.i32(i32 [[B]], i32 [[A]])
; CHECK-NEXT: ret i32 [[TMP0]]
;
entry:
@@ -48,12 +40,7 @@ define i32 @usub_sat32(i32 %a, i32 %b) {
; CHECK-LABEL: define i32 @usub_sat32(
; CHECK-SAME: i32 [[A:%.*]], i32 [[B:%.*]]) {
; CHECK-NEXT: [[ENTRY:.*:]]
-; CHECK-NEXT: [[CONV:%.*]] = zext i32 [[A]] to i64
-; CHECK-NEXT: [[CONV1:%.*]] = zext i32 [[B]] to i64
-; CHECK-NEXT: [[SUB:%.*]] = sub nsw i64 [[CONV]], [[CONV1]]
-; CHECK-NEXT: [[COND:%.*]] = call i64 @llvm.smin.i64(i64 [[SUB]], i64 4294967295)
-; CHECK-NEXT: [[COND11:%.*]] = call i64 @llvm.smax.i64(i64 [[COND]], i64 0)
-; CHECK-NEXT: [[TMP0:%.*]] = trunc i64 [[COND11]] to i32
+; CHECK-NEXT: [[TMP0:%.*]] = call i32 @llvm.usub.sat.i32(i32 [[A]], i32 [[B]])
; CHECK-NEXT: ret i32 [[TMP0]]
;
entry:
@@ -72,12 +59,7 @@ define i32 @usub_sat32_minmax(i32 %a, i32 %b) {
; CHECK-LABEL: define i32 @usub_sat32_minmax(
; CHECK-SAME: i32 [[A:%.*]], i32 [[B:%.*]]) {
; CHECK-NEXT: [[ENTRY:.*:]]
-; CHECK-NEXT: [[CONV:%.*]] = zext i32 [[A]] to i64
-; CHECK-NEXT: [[CONV1:%.*]] = zext i32 [[B]] to i64
-; CHECK-NEXT: [[SUB:%.*]] = sub nsw i64 [[CONV]], [[CONV1]]
-; CHECK-NEXT: [[COND:%.*]] = call i64 @llvm.smin.i64(i64 [[SUB]], i64 4294967295)
-; CHECK-NEXT: [[COND11:%.*]] = call i64 @llvm.smax.i64(i64 [[COND]], i64 0)
-; CHECK-NEXT: [[TMP0:%.*]] = trunc i64 [[COND11]] to i32
+; CHECK-NEXT: [[TMP0:%.*]] = call i32 @llvm.usub.sat.i32(i32 [[A]], i32 [[B]])
; CHECK-NEXT: ret i32 [[TMP0]]
;
entry:
@@ -94,11 +76,7 @@ define i16 @uadd_sat16(i16 %a, i16 %b) {
; CHECK-LABEL: define i16 @uadd_sat16(
; CHECK-SAME: i16 [[A:%.*]], i16 [[B:%.*]]) {
; CHECK-NEXT: [[ENTRY:.*:]]
-; CHECK-NEXT: [[CONV:%.*]] = zext i16 [[A]] to i32
-; CHECK-NEXT: [[CONV1:%.*]] = zext i16 [[B]] to i32
-; CHECK-NEXT: [[ADD:%.*]] = add nuw nsw i32 [[CONV1]], [[CONV]]
-; CHECK-NEXT: [[SELECT:%.*]] = call i32 @llvm.umin.i32(i32 [[ADD]], i32 65535)
-; CHECK-NEXT: [[TMP0:%.*]] = trunc nuw i32 [[SELECT]] to i16
+; CHECK-NEXT: [[TMP0:%.*]] = call i16 @llvm.uadd.sat.i16(i16 [[B]], i16 [[A]])
; CHECK-NEXT: ret i16 [[TMP0]]
;
entry:
@@ -115,11 +93,7 @@ define i16 @uadd_sat16_min(i16 %a, i16 %b) {
; CHECK-LABEL: define i16 @uadd_sat16_min(
; CHECK-SAME: i16 [[A:%.*]], i16 [[B:%.*]]) {
; CHECK-NEXT: [[ENTRY:.*:]]
-; CHECK-NEXT: [[CONV:%.*]] = zext i16 [[A]] to i32
-; CHECK-NEXT: [[CONV1:%.*]] = zext i16 [[B]] to i32
-; CHECK-NEXT: [[ADD:%.*]] = add nuw nsw i32 [[CONV1]], [[CONV]]
-; CHECK-NEXT: [[MIN:%.*]] = call i32 @llvm.umin.i32(i32 [[ADD]], i32 65535)
-; CHECK-NEXT: [[TMP0:%.*]] = trunc nuw i32 [[MIN]] to i16
+; CHECK-NEXT: [[TMP0:%.*]] = call i16 @llvm.uadd.sat.i16(i16 [[B]], i16 [[A]])
; CHECK-NEXT: ret i16 [[TMP0]]
;
entry:
@@ -135,12 +109,7 @@ define i16 @usub_sat16(i16 %a, i16 %b) {
; CHECK-LABEL: define i16 @usub_sat16(
; CHECK-SAME: i16 [[A:%.*]], i16 [[B:%.*]]) {
; CHECK-NEXT: [[ENTRY:.*:]]
-; CHECK-NEXT: [[CONV:%.*]] = zext i16 [[A]] to i32
-; CHECK-NEXT: [[CONV1:%.*]] = zext i16 [[B]] to i32
-; CHECK-NEXT: [[SUB:%.*]] = sub nsw i32 [[CONV]], [[CONV1]]
-; CHECK-NEXT: [[COND:%.*]] = call i32 @llvm.smin.i32(i32 [[SUB]], i32 65535)
-; CHECK-NEXT: [[COND11:%.*]] = call i32 @llvm.smax.i32(i32 [[COND]], i32 0)
-; CHECK-NEXT: [[TMP0:%.*]] = trunc i32 [[COND11]] to i16
+; CHECK-NEXT: [[TMP0:%.*]] = call i16 @llvm.usub.sat.i16(i16 [[A]], i16 [[B]])
; CHECK-NEXT: ret i16 [[TMP0]]
;
entry:
@@ -159,12 +128,7 @@ define i16 @usub_sat16_minmax(i16 %a, i16 %b) {
; CHECK-LABEL: define i16 @usub_sat16_minmax(
; CHECK-SAME: i16 [[A:%.*]], i16 [[B:%.*]]) {
; CHECK-NEXT: [[ENTRY:.*:]]
-; CHECK-NEXT: [[CONV:%.*]] = zext i16 [[A]] to i32
-; CHECK-NEXT: [[CONV1:%.*]] = zext i16 [[B]] to i32
-; CHECK-NEXT: [[SUB:%.*]] = sub nsw i32 [[CONV]], [[CONV1]]
-; CHECK-NEXT: [[COND:%.*]] = call i32 @llvm.smin.i32(i32 [[SUB]], i32 65535)
-; CHECK-NEXT: [[COND11:%.*]] = call i32 @llvm.smax.i32(i32 [[COND]], i32 0)
-; CHECK-NEXT: [[TMP0:%.*]] = trunc i32 [[COND11]] to i16
+; CHECK-NEXT: [[TMP0:%.*]] = call i16 @llvm.usub.sat.i16(i16 [[A]], i16 [[B]])
; CHECK-NEXT: ret i16 [[TMP0]]
;
entry:
@@ -181,11 +145,7 @@ define i8 @uadd_sat8(i8 %a, i8 %b) {
; CHECK-LABEL: define i8 @uadd_sat8(
; CHECK-SAME: i8 [[A:%.*]], i8 [[B:%.*]]) {
; CHECK-NEXT: [[ENTRY:.*:]]
-; CHECK-NEXT: [[CONV:%.*]] = zext i8 [[A]] to i32
-; CHECK-NEXT: [[CONV1:%.*]] = zext i8 [[B]] to i32
-; CHECK-NEXT: [[ADD:%.*]] = add nuw nsw i32 [[CONV1]], [[CONV]]
-; CHECK-NEXT: [[SELECT:%.*]] = call i32 @llvm.umin.i32(i32 [[ADD]], i32 255)
-; CHECK-NEXT: [[TMP0:%.*]] = trunc nuw i32 [[SELECT]] to i8
+; CHECK-NEXT: [[TMP0:%.*]] = call i8 @llvm.uadd.sat.i8(i8 [[B]], i8 [[A]])
; CHECK-NEXT: ret i8 [[TMP0]]
;
entry:
@@ -202,11 +162,7 @@ define i8 @uadd_sat8_min(i8 %a, i8 %b) {
; CHECK-LABEL: define i8 @uadd_sat8_min(
; CHECK-SAME: i8 [[A:%.*]], i8 [[B:%.*]]) {
; CHECK-NEXT: [[ENTRY:.*:]]
-; CHECK-NEXT: [[CONV:%.*]] = zext i8 [[A]] to i32
-; CHECK-NEXT: [[CONV1:%.*]] = zext i8 [[B]] to i32
-; CHECK-NEXT: [[ADD:%.*]] = add nuw nsw i32 [[CONV1]], [[CONV]]
-; CHECK-NEXT: [[MIN:%.*]] = call i32 @llvm.umin.i32(i32 [[ADD]], i32 255)
-; CHECK-NEXT: [[TMP0:%.*]] = trunc nuw i32 [[MIN]] to i8
+; CHECK-NEXT: [[TMP0:%.*]] = call i8 @llvm.uadd.sat.i8(i8 [[B]], i8 [[A]])
; CHECK-NEXT: ret i8 [[TMP0]]
;
entry:
@@ -222,12 +178,7 @@ define i8 @usub_sat8(i8 %a, i8 %b) {
; CHECK-LABEL: define i8 @usub_sat8(
; CHECK-SAME: i8 [[A:%.*]], i8 [[B:%.*]]) {
; CHECK-NEXT: [[ENTRY:.*:]]
-; CHECK-NEXT: [[CONV:%.*]] = zext i8 [[A]] to i32
-; CHECK-NEXT: [[CONV1:%.*]] = zext i8 [[B]] to i32
-; CHECK-NEXT: [[SUB:%.*]] = sub nsw i32 [[CONV]], [[CONV1]]
-; CHECK-NEXT: [[COND:%.*]] = call i32 @llvm.smin.i32(i32 [[SUB]], i32 255)
-; CHECK-NEXT: [[COND11:%.*]] = call i32 @llvm.smax.i32(i32 [[COND]], i32 0)
-; CHECK-NEXT: [[TMP0:%.*]] = trunc i32 [[COND11]] to i8
+; CHECK-NEXT: [[TMP0:%.*]] = call i8 @llvm.usub.sat.i8(i8 [[A]], i8 [[B]])
; CHECK-NEXT: ret i8 [[TMP0]]
;
entry:
@@ -246,12 +197,7 @@ define i8 @usub_sat8_minmax(i8 %a, i8 %b) {
; CHECK-LABEL: define i8 @usub_sat8_minmax(
; CHECK-SAME: i8 [[A:%.*]], i8 [[B:%.*]]) {
; CHECK-NEXT: [[ENTRY:.*:]]
-; CHECK-NEXT: [[CONV:%.*]] = zext i8 [[A]] to i32
-; CHECK-NEXT: [[CONV1:%.*]] = zext i8 [[B]] to i32
-; CHECK-NEXT: [[SUB:%.*]] = sub nsw i32 [[CONV]], [[CONV1]]
-; CHECK-NEXT: [[COND:%.*]] = call i32 @llvm.smin.i32(i32 [[SUB]], i32 255)
-; CHECK-NEXT: [[COND11:%.*]] = call i32 @llvm.smax.i32(i32 [[COND]], i32 0)
-; CHECK-NEXT: [[TMP0:%.*]] = trunc i32 [[COND11]] to i8
+; CHECK-NEXT: [[TMP0:%.*]] = call i8 @llvm.usub.sat.i8(i8 [[A]], i8 [[B]])
; CHECK-NEXT: ret i8 [[TMP0]]
;
entry:
@@ -268,11 +214,7 @@ define i64 @uadd_sat64(i64 %a, i64 %b) {
; CHECK-LABEL: define i64 @uadd_sat64(
; CHECK-SAME: i64 [[A:%.*]], i64 [[B:%.*]]) {
; CHECK-NEXT: [[ENTRY:.*:]]
-; CHECK-NEXT: [[CONV:%.*]] = zext i64 [[A]] to i65
-; CHECK-NEXT: [[CONV1:%.*]] = zext i64 [[B]] to i65
-; CHECK-NEXT: [[ADD:%.*]] = add nuw i65 [[CONV1]], [[CONV]]
-; CHECK-NEXT: [[SELECT:%.*]] = call i65 @llvm.umin.i65(i65 [[ADD]], i65 18446744073709551615)
-; CHECK-NEXT: [[TMP0:%.*]] = trunc nuw i65 [[SELECT]] to i64
+; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.uadd.sat.i64(i64 [[B]], i64 [[A]])
; CHECK-NEXT: ret i64 [[TMP0]]
;
entry:
@@ -289,12 +231,7 @@ define i64 @usub_sat64(i64 %a, i64 %b) {
; CHECK-LABEL: define i64 @usub_sat64(
; CHECK-SAME: i64 [[A:%.*]], i64 [[B:%.*]]) {
; CHECK-NEXT: [[ENTRY:.*:]]
-; CHECK-NEXT: [[CONV:%.*]] = zext i64 [[A]] to i128
-; CHECK-NEXT: [[CONV1:%.*]] = zext i64 [[B]] to i128
-; CHECK-NEXT: [[SUB:%.*]] = sub nsw i128 [[CONV]], [[CONV1]]
-; CHECK-NEXT: [[COND:%.*]] = call i128 @llvm.smin.i128(i128 [[SUB]], i128 18446744073709551615)
-; CHECK-NEXT: [[COND11:%.*]] = call i128 @llvm.smax.i128(i128 [[COND]], i128 0)
-; CHECK-NEXT: [[TMP0:%.*]] = trunc i128 [[COND11]] to i64
+; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.usub.sat.i64(i64 [[A]], i64 [[B]])
; CHECK-NEXT: ret i64 [[TMP0]]
;
entry:
@@ -313,11 +250,7 @@ define <4 x i32> @uadd_satv4i32(<4 x i32> %a, <4 x i32> %b) {
; CHECK-LABEL: define <4 x i32> @uadd_satv4i32(
; CHECK-SAME: <4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]]) {
; CHECK-NEXT: [[ENTRY:.*:]]
-; CHECK-NEXT: [[CONV:%.*]] = zext <4 x i32> [[A]] to <4 x i64>
-; CHECK-NEXT: [[CONV1:%.*]] = zext <4 x i32> [[B]] to <4 x i64>
-; CHECK-NEXT: [[ADD:%.*]] = add nuw nsw <4 x i64> [[CONV1]], [[CONV]]
-; CHECK-NEXT: [[SELECT:%.*]] = call <4 x i64> @llvm.umin.v4i64(<4 x i64> [[ADD]], <4 x i64> <i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295>)
-; CHECK-NEXT: [[TMP0:%.*]] = trunc nuw <4 x i64> [[SELECT]] to <4 x i32>
+; CHECK-NEXT: [[TMP0:%.*]] = call <4 x i32> @llvm.uadd.sat.v4i32(<4 x i32> [[B]], <4 x i32> [[A]])
; CHECK-NEXT: ret <4 x i32> [[TMP0]]
;
entry:
@@ -334,11 +267,7 @@ define <8 x i16> @uadd_satv8i16_minmax(<8 x i16> %a, <8 x i16> %b) {
; CHECK-LABEL: define <8 x i16> @uadd_satv8i16_minmax(
; CHECK-SAME: <8 x i16> [[A:%.*]], <8 x i16> [[B:%.*]]) {
; CHECK-NEXT: [[ENTRY:.*:]]
-; CHECK-NEXT: [[CONV:%.*]] = zext <8 x i16> [[A]] to <8 x i32>
-; CHECK-NEXT: [[CONV1:%.*]] = zext <8 x i16> [[B]] to <8 x i32>
-; CHECK-NEXT: [[ADD:%.*]] = add nuw nsw <8 x i32> [[CONV1]], [[CONV]]
-; CHECK-NEXT: [[SELECT:%.*]] = call <8 x i32> @llvm.umin.v8i32(<8 x i32> [[ADD]], <8 x i32> <i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535>)
-; CHECK-NEXT: [[TMP0:%.*]] = trunc nuw <8 x i32> [[SELECT]] to <8 x i16>
+; CHECK-NEXT: [[TMP0:%.*]] = call <8 x i16> @llvm.uadd.sat.v8i16(<8 x i16> [[B]], <8 x i16> [[A]])
; CHECK-NEXT: ret <8 x i16> [[TMP0]]
;
entry:
@@ -354,12 +283,7 @@ define <16 x i8> @usub_satv16i8(<16 x i8> %a, <16 x i8> %b) {
; CHECK-LABEL: define <16 x i8> @usub_satv16i8(
; CHECK-SAME: <16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]]) {
; CHECK-NEXT: [[ENTRY:.*:]]
-; CHECK-NEXT: [[CONV:%.*]] = zext <16 x i8> [[A]] to <16 x i32>
-; CHECK-NEXT: [[CONV1:%.*]] = zext <16 x i8> [[B]] to <16 x i32>
-; CHECK-NEXT: [[SUB:%.*]] = sub nsw <16 x i32> [[CONV1]], [[CONV]]
-; CHECK-NEXT: [[SELECT:%.*]] = call <16 x i32> @llvm.smin.v16i32(<16 x i32> [[SUB]], <16 x i32> <i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255>)
-; CHECK-NEXT: [[SELECT8:%.*]] = call <16 x i32> @llvm.smax.v16i32(<16 x i32> [[SELECT]], <16 x i32> zeroinitializer)
-; CHECK-NEXT: [[TMP0:%.*]] = trunc <16 x i32> [[SELECT8]] to <16 x i8>
+; CHECK-NEXT: [[TMP0:%.*]] = call <16 x i8> @llvm.usub.sat.v16i8(<16 x i8> [[B]], <16 x i8> [[A]])
; CHECK-NEXT: ret <16 x i8> [[TMP0]]
;
entry:
@@ -378,12 +302,7 @@ define <2 x i64> @usub_satv2i64_minmax(<2 x i64> %a, <2 x i64> %b) {
; CHECK-LABEL: define <2 x i64> @usub_satv2i64_minmax(
; CHECK-SAME: <2 x i64> [[A:%.*]], <2 x i64> [[B:%.*]]) {
; CHECK-NEXT: [[ENTRY:.*:]]
-; CHECK-NEXT: [[CONV:%.*]] = zext <2 x i64> [[A]] to <2 x i128>
-; CHECK-NEXT: [[CONV1:%.*]] = zext <2 x i64> [[B]] to <2 x i128>
-; CHECK-NEXT: [[SUB:%.*]] = sub nsw <2 x i128> [[CONV1]], [[CONV]]
-; CHECK-NEXT: [[SELECT:%.*]] = call <2 x i128> @llvm.smin.v2i128(<2 x i128> [[SUB]], <2 x i128> <i128 18446744073709551615, i128 18446744073709551615>)
-; CHECK-NEXT: [[SELECT8:%.*]] = call <2 x i128> @llvm.smax.v2i128(<2 x i128> [[SELECT]], <2 x i128> zeroinitializer)
-; CHECK-NEXT: [[TMP0:%.*]] = trunc <2 x i128> [[SELECT8]] to <2 x i64>
+; CHECK-NEXT: [[TMP0:%.*]] = call <2 x i64> @llvm.usub.sat.v2i64(<2 x i64> [[B]], <2 x i64> [[A]])
; CHECK-NEXT: ret <2 x i64> [[TMP0]]
;
entry:
@@ -400,11 +319,8 @@ define i32 @uadd_sat32_extra_use_1(i32 %a, i32 %b) {
; CHECK-LABEL: define i32 @uadd_sat32_extra_use_1(
; CHECK-SAME: i32 [[A:%.*]], i32 [[B:%.*]]) {
; CHECK-NEXT: [[ENTRY:.*:]]
-; CHECK-NEXT: [[CONV:%.*]] = zext i32 [[A]] to i64
-; CHECK-NEXT: [[CONV1:%.*]] = zext i32 [[B]] to i64
-; CHECK-NEXT: [[ADD:%.*]] = add nuw nsw i64 [[CONV1]], [[CONV]]
-; CHECK-NEXT: [[SELECT:%.*]] = call i64 @llvm.umin.i64(i64 [[ADD]], i64 4294967295)
-; CHECK-NEXT: [[TMP0:%.*]] = trunc nuw i64 [[SELECT]] to i32
+; CHECK-NEXT: [[TMP0:%.*]] = call i32 @llvm.uadd.sat.i32(i32 [[B]], i32 [[A]])
+; CHECK-NEXT: [[SELECT:%.*]] = zext i32 [[TMP0]] to i64
; CHECK-NEXT: call void @use64(i64 [[SELECT]])
; CHECK-NEXT: ret i32 [[TMP0]]
;
@@ -520,12 +436,7 @@ define i32 @usub_sat32_maxmin(i32 %a, i32 %b) {
; CHECK-LABEL: define i32 @usub_sat32_maxmin(
; CHECK-SAME: i32 [[A:%.*]], i32 [[B:%.*]]) {
; CHECK-NEXT: [[ENTRY:.*:]]
-; CHECK-NEXT: [[CONV:%.*]] = zext i32 [[A]] to i64
-; CHECK-NEXT: [[CONV1:%.*]] = zext i32 [[B]] to i64
-; CHECK-NEXT: [[SUB:%.*]] = sub nsw i64 [[CONV]], [[CONV1]]
-; CHECK-NEXT: [[COND:%.*]] = call i64 @llvm.smax.i64(i64 [[SUB]], i64 0)
-; CHECK-NEXT: [[COND11:%.*]] = call i64 @llvm.smin.i64(i64 [[COND]], i64 4294967295)
-; CHECK-NEXT: [[CONV12:%.*]] = trunc nuw i64 [[COND11]] to i32
+; CHECK-NEXT: [[CONV12:%.*]] = call i32 @llvm.usub.sat.i32(i32 [[A]], i32 [[B]])
; CHECK-NEXT: ret i32 [[CONV12]]
;
entry:
@@ -544,10 +455,8 @@ define i64 @uadd_sat32_no_trunc(i32 %a, i32 %b) {
; CHECK-LABEL: define i64 @uadd_sat32_no_trunc(
; CHECK-SAME: i32 [[A:%.*]], i32 [[B:%.*]]) {
; CHECK-NEXT: [[ENTRY:.*:]]
-; CHECK-NEXT: [[CONV:%.*]] = zext i32 [[A]] to i64
-; CHECK-NEXT: [[CONV1:%.*]] = zext i32 [[B]] to i64
-; CHECK-NEXT: [[ADD:%.*]] = add nuw nsw i64 [[CONV1]], [[CONV]]
-; CHECK-NEXT: [[SELECT:%.*]] = call i64 @llvm.umin.i64(i64 [[ADD]], i64 4294967295)
+; CHECK-NEXT: [[TMP0:%.*]] = call i32 @llvm.uadd.sat.i32(i32 [[B]], i32 [[A]])
+; CHECK-NEXT: [[SELECT:%.*]] = zext i32 [[TMP0]] to i64
; CHECK-NEXT: ret i64 [[SELECT]]
;
entry:
>From 603655d2ed39e4622756924ae4d449210bc553fd Mon Sep 17 00:00:00 2001
From: Huihui Zhang <huihuiz at quicinc.com>
Date: Wed, 17 Jul 2024 16:25:09 -0700
Subject: [PATCH 3/4] Fix pattern match for umin(UINT_MAX, BinOp(zext(A),
zext(B))).
Bail out if BinOp is not known non-negative.
---
.../InstCombine/InstCombineCalls.cpp | 3 ++
.../Transforms/InstCombine/uaddsub_sat.ll | 30 +++++++++++++++----
2 files changed, 27 insertions(+), 6 deletions(-)
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
index bbb2f994e1aea..a3fc5e9f175a9 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -1141,6 +1141,9 @@ Instruction *InstCombinerImpl::matchAddSubSat(IntrinsicInst &MinMax1) {
if (!match(MinMax2, m_SMin(m_BinOp(AddSub), m_APInt(MaxValue))))
return nullptr;
} else if (match(&MinMax1, m_UMin(m_BinOp(AddSub), m_APInt(MaxValue)))) {
+ // Bail out if AddSub could be negative.
+ if (!isKnownNonNegative(AddSub, SQ.getWithInstruction(AddSub)))
+ return nullptr;
IsUnsignedSaturate = true;
} else
return nullptr;
diff --git a/llvm/test/Transforms/InstCombine/uaddsub_sat.ll b/llvm/test/Transforms/InstCombine/uaddsub_sat.ll
index 469a651ef9a93..affc81ab62392 100644
--- a/llvm/test/Transforms/InstCombine/uaddsub_sat.ll
+++ b/llvm/test/Transforms/InstCombine/uaddsub_sat.ll
@@ -218,12 +218,12 @@ define i64 @uadd_sat64(i64 %a, i64 %b) {
; CHECK-NEXT: ret i64 [[TMP0]]
;
entry:
- %conv = zext i64 %a to i65
- %conv1 = zext i64 %b to i65
- %add = add i65 %conv1, %conv
- %0 = icmp ult i65 %add, 18446744073709551615
- %select = select i1 %0, i65 %add, i65 18446744073709551615
- %conv2 = trunc i65 %select to i64
+ %conv = zext i64 %a to i128
+ %conv1 = zext i64 %b to i128
+ %add = add i128 %conv1, %conv
+ %0 = icmp ult i128 %add, 18446744073709551615
+ %select = select i1 %0, i128 %add, i128 18446744073709551615
+ %conv2 = trunc i128 %select to i64
ret i64 %conv2
}
@@ -487,6 +487,24 @@ define i8 @const(i8 %X) {
ret i8 %conv
}
+define i32 @invalid_sub_could_be_negative(i32 %a, i32 %b) {
+; CHECK-LABEL: define i32 @invalid_sub_could_be_negative(
+; CHECK-SAME: i32 [[A:%.*]], i32 [[B:%.*]]) {
+; CHECK-NEXT: [[CONV:%.*]] = zext i32 [[A]] to i64
+; CHECK-NEXT: [[CONV3:%.*]] = zext i32 [[B]] to i64
+; CHECK-NEXT: [[SUB:%.*]] = sub nsw i64 [[CONV]], [[CONV3]]
+; CHECK-NEXT: [[SPEC_STORE_SELECT13:%.*]] = call i64 @llvm.umin.i64(i64 [[SUB]], i64 4294967295)
+; CHECK-NEXT: [[CONV10:%.*]] = trunc nuw i64 [[SPEC_STORE_SELECT13]] to i32
+; CHECK-NEXT: ret i32 [[CONV10]]
+;
+ %conv = zext i32 %a to i64
+ %conv3 = zext i32 %b to i64
+ %sub = sub nsw i64 %conv, %conv3
+ %spec.store.select13 = call i64 @llvm.umin.i64(i64 %sub, i64 4294967295)
+ %conv10 = trunc i64 %spec.store.select13 to i32
+ ret i32 %conv10
+}
+
declare void @use64(i64)
declare i64 @llvm.umin.i64(i64, i64)
declare i64 @llvm.smin.i64(i64, i64)
>From 6f86421be209686ccab1ed35cbb3c10c44650cfe Mon Sep 17 00:00:00 2001
From: Huihui Zhang <huihuiz at quicinc.com>
Date: Mon, 22 Jul 2024 16:29:11 -0700
Subject: [PATCH 4/4] Simplify unsigned saturation pattern match rules:
1. fold smax(UINT_MIN, sub(zext(A), zext(B))) into usub_sat;
2. fold umin(UINT_MAX, add(zext(A), zext(B))) into uadd_sat.
---
.../InstCombine/InstCombineCalls.cpp | 82 +++--
.../Transforms/InstCombine/uaddsub_sat.ll | 323 ++++++++++--------
2 files changed, 226 insertions(+), 179 deletions(-)
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
index a3fc5e9f175a9..8cd28b785abe1 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -1126,39 +1126,68 @@ Instruction *InstCombinerImpl::matchAddSubSat(IntrinsicInst &MinMax1) {
// smax(SINT_MIN, smin(SINT_MAX, add|sub(sext(A), sext(B))))
// Where the smin and smax could be reversed.
// 2. A tree of unsigned saturation:
- // smax(UINT_MIN, smin(UINT_MAX, sub(zext(A), zext(B))))
- // Where the smin and smax could be reversed.
- // Or umin(UINT_MAX, add(zext(A), zext(B)))
+ // smax(UINT_MIN, sub(zext(A), zext(B)))
+ // Or umin(UINT_MAX, add(zext(A), zext(B))).
Instruction *MinMax2 = nullptr;
BinaryOperator *AddSub;
- const APInt *MinValue, *MaxValue;
+ const APInt *MinValue = nullptr, *MaxValue = nullptr;
bool IsUnsignedSaturate = false;
- if (match(&MinMax1, m_SMin(m_Instruction(MinMax2), m_APInt(MaxValue)))) {
- if (!match(MinMax2, m_SMax(m_BinOp(AddSub), m_APInt(MinValue))))
- return nullptr;
- } else if (match(&MinMax1,
- m_SMax(m_Instruction(MinMax2), m_APInt(MinValue)))) {
- if (!match(MinMax2, m_SMin(m_BinOp(AddSub), m_APInt(MaxValue))))
- return nullptr;
- } else if (match(&MinMax1, m_UMin(m_BinOp(AddSub), m_APInt(MaxValue)))) {
+ // Pattern match for unsigned saturation.
+ if (match(&MinMax1, m_UMin(m_BinOp(AddSub), m_APInt(MaxValue)))) {
// Bail out if AddSub could be negative.
if (!isKnownNonNegative(AddSub, SQ.getWithInstruction(AddSub)))
return nullptr;
IsUnsignedSaturate = true;
- } else
- return nullptr;
-
- if (!IsUnsignedSaturate && MinValue && MinValue->isZero())
+ } else if (match(&MinMax1, m_SMax(m_BinOp(AddSub), m_APInt(MinValue)))) {
+ if (!MinValue->isZero())
+ return nullptr;
IsUnsignedSaturate = true;
+ } else {
+ // Pattern match for signed saturation.
+ if (match(&MinMax1, m_SMin(m_Instruction(MinMax2), m_APInt(MaxValue)))) {
+ if (!match(MinMax2, m_SMax(m_BinOp(AddSub), m_APInt(MinValue))))
+ return nullptr;
+ } else if (match(&MinMax1,
+ m_SMax(m_Instruction(MinMax2), m_APInt(MinValue)))) {
+ if (!match(MinMax2, m_SMin(m_BinOp(AddSub), m_APInt(MaxValue))))
+ return nullptr;
+ } else
+ return nullptr;
+ }
// Check that the constants clamp a saturate, and that the new type would be
// sensible to convert to.
- if (!(*MaxValue + 1).isPowerOf2() ||
+ if ((MaxValue && !(*MaxValue + 1).isPowerOf2()) ||
(!IsUnsignedSaturate && -*MinValue != *MaxValue + 1))
return nullptr;
- // In what bitwidth can this be treated as saturating arithmetics?
- unsigned NewBitWidth =
- (*MaxValue + 1).logBase2() + (IsUnsignedSaturate ? 0 : 1);
+
+ // Trying to decide the bitwidth for saturating arithmetics.
+ Value *Op0 = AddSub->getOperand(0);
+ Value *Op1 = AddSub->getOperand(1);
+ unsigned Op0MaxBitWidth =
+ IsUnsignedSaturate ? computeKnownBits(Op0, 0, AddSub).countMaxActiveBits()
+ : ComputeMaxSignificantBits(Op0, 0, AddSub);
+ unsigned Op1MaxBitWidth =
+ IsUnsignedSaturate ? computeKnownBits(Op1, 0, AddSub).countMaxActiveBits()
+ : ComputeMaxSignificantBits(Op1, 0, AddSub);
+ unsigned NewBitWidth = IsUnsignedSaturate
+ ? std::max(Op0MaxBitWidth, Op1MaxBitWidth)
+ : (*MaxValue + 1).logBase2() + 1;
+
+ if (!IsUnsignedSaturate) {
+ // The two operands of the add/sub must be nsw-truncatable to type with
+ // NewBitWidth. This is usually achieved via a sext from a smaller type.
+ if (Op0MaxBitWidth > NewBitWidth || Op1MaxBitWidth > NewBitWidth)
+ return nullptr;
+ } else {
+ // Bail out if NewBitWidth is not smaller than the bitwidth of MinMax1.
+ if (NewBitWidth == Ty->getScalarType()->getIntegerBitWidth())
+ return nullptr;
+ // Bail out if MaxValue is not a valid unsigned saturating maximum value.
+ if (MaxValue && (*MaxValue + 1).logBase2() != NewBitWidth)
+ return nullptr;
+ }
+
// FIXME: This isn't quite right for vectors, but using the scalar type is a
// good first approximation for what should be done there.
if (!shouldChangeType(Ty->getScalarType()->getIntegerBitWidth(), NewBitWidth))
@@ -1181,19 +1210,6 @@ Instruction *InstCombinerImpl::matchAddSubSat(IntrinsicInst &MinMax1) {
else
return nullptr;
- // The two operands of the add/sub must be nsw-truncatable to the NewTy. This
- // is usually achieved via a sext from a smaller type.
- Value *Op0 = AddSub->getOperand(0);
- Value *Op1 = AddSub->getOperand(1);
- unsigned Op0MaxBitWidth =
- IsUnsignedSaturate ? computeKnownBits(Op0, 0, AddSub).countMaxActiveBits()
- : ComputeMaxSignificantBits(Op0, 0, AddSub);
- unsigned Op1MaxBitWidth =
- IsUnsignedSaturate ? computeKnownBits(Op1, 0, AddSub).countMaxActiveBits()
- : ComputeMaxSignificantBits(Op1, 0, AddSub);
- if (Op0MaxBitWidth > NewBitWidth || Op1MaxBitWidth > NewBitWidth)
- return nullptr;
-
// Finally create and return the sat intrinsic, truncated to the new type
Function *F = Intrinsic::getDeclaration(MinMax1.getModule(), IntrinsicID, NewTy);
Value *AT = Builder.CreateTrunc(AddSub->getOperand(0), NewTy);
diff --git a/llvm/test/Transforms/InstCombine/uaddsub_sat.ll b/llvm/test/Transforms/InstCombine/uaddsub_sat.ll
index affc81ab62392..3738555aaa684 100644
--- a/llvm/test/Transforms/InstCombine/uaddsub_sat.ll
+++ b/llvm/test/Transforms/InstCombine/uaddsub_sat.ll
@@ -1,8 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
; RUN: opt < %s -passes=instcombine -S | FileCheck %s
-target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64"
-
define i32 @uadd_sat32(i32 %a, i32 %b) {
; CHECK-LABEL: define i32 @uadd_sat32(
; CHECK-SAME: i32 [[A:%.*]], i32 [[B:%.*]]) {
@@ -14,8 +12,8 @@ entry:
%conv = zext i32 %a to i64
%conv1 = zext i32 %b to i64
%add = add i64 %conv1, %conv
- %0 = icmp ult i64 %add, 4294967295
- %select = select i1 %0, i64 %add, i64 4294967295
+ %cmp = icmp ult i64 %add, 4294967295
+ %select = select i1 %cmp, i64 %add, i64 4294967295
%conv2 = trunc i64 %select to i32
ret i32 %conv2
}
@@ -47,16 +45,15 @@ entry:
%conv = zext i32 %a to i64
%conv1 = zext i32 %b to i64
%sub = sub i64 %conv, %conv1
- %cmp4 = icmp sgt i64 %sub, 0
- %cmp6 = icmp slt i64 %sub, 4294967295
- %cond = select i1 %cmp6, i64 %sub, i64 4294967295
- %cond11 = select i1 %cmp4, i64 %cond, i64 0
- %conv12 = trunc i64 %cond11 to i32
- ret i32 %conv12
+ %cmp = icmp sgt i64 %sub, 0
+ %cond = select i1 %cmp, i64 %sub, i64 0
+ %conv2 = trunc i64 %cond to i32
+ ret i32 %conv2
+
}
-define i32 @usub_sat32_minmax(i32 %a, i32 %b) {
-; CHECK-LABEL: define i32 @usub_sat32_minmax(
+define i32 @usub_sat32_max(i32 %a, i32 %b) {
+; CHECK-LABEL: define i32 @usub_sat32_max(
; CHECK-SAME: i32 [[A:%.*]], i32 [[B:%.*]]) {
; CHECK-NEXT: [[ENTRY:.*:]]
; CHECK-NEXT: [[TMP0:%.*]] = call i32 @llvm.usub.sat.i32(i32 [[A]], i32 [[B]])
@@ -66,10 +63,9 @@ entry:
%conv = zext i32 %a to i64
%conv1 = zext i32 %b to i64
%sub = sub i64 %conv, %conv1
- %cond = call i64 @llvm.smin.i64(i64 %sub, i64 4294967295)
- %cond11 = call i64 @llvm.smax.i64(i64 %cond, i64 0)
- %conv12 = trunc i64 %cond11 to i32
- ret i32 %conv12
+ %cond = call i64 @llvm.smax.i64(i64 %sub, i64 0)
+ %conv2 = trunc i64 %cond to i32
+ ret i32 %conv2
}
define i16 @uadd_sat16(i16 %a, i16 %b) {
@@ -83,8 +79,8 @@ entry:
%conv = zext i16 %a to i32
%conv1 = zext i16 %b to i32
%add = add i32 %conv1, %conv
- %0 = icmp ult i32 %add, 65535
- %select = select i1 %0, i32 %add, i32 65535
+ %cmp = icmp ult i32 %add, 65535
+ %select = select i1 %cmp, i32 %add, i32 65535
%conv2 = trunc i32 %select to i16
ret i16 %conv2
}
@@ -116,16 +112,14 @@ entry:
%conv = zext i16 %a to i32
%conv1 = zext i16 %b to i32
%sub = sub i32 %conv, %conv1
- %cmp4 = icmp sgt i32 %sub, 0
- %cmp6 = icmp slt i32 %sub, 65535
- %cond = select i1 %cmp6, i32 %sub, i32 65535
- %cond11 = select i1 %cmp4, i32 %cond, i32 0
- %conv12 = trunc i32 %cond11 to i16
- ret i16 %conv12
+ %cmp = icmp sgt i32 %sub, 0
+ %cond = select i1 %cmp, i32 %sub, i32 0
+ %conv2 = trunc i32 %cond to i16
+ ret i16 %conv2
}
-define i16 @usub_sat16_minmax(i16 %a, i16 %b) {
-; CHECK-LABEL: define i16 @usub_sat16_minmax(
+define i16 @usub_sat16_max(i16 %a, i16 %b) {
+; CHECK-LABEL: define i16 @usub_sat16_max(
; CHECK-SAME: i16 [[A:%.*]], i16 [[B:%.*]]) {
; CHECK-NEXT: [[ENTRY:.*:]]
; CHECK-NEXT: [[TMP0:%.*]] = call i16 @llvm.usub.sat.i16(i16 [[A]], i16 [[B]])
@@ -135,10 +129,9 @@ entry:
%conv = zext i16 %a to i32
%conv1 = zext i16 %b to i32
%sub = sub i32 %conv, %conv1
- %cond = call i32 @llvm.smin.i32(i32 %sub, i32 65535)
- %cond11 = call i32 @llvm.smax.i32(i32 %cond, i32 0)
- %conv12 = trunc i32 %cond11 to i16
- ret i16 %conv12
+ %cond = call i32 @llvm.smax.i32(i32 %sub, i32 0)
+ %conv2 = trunc i32 %cond to i16
+ ret i16 %conv2
}
define i8 @uadd_sat8(i8 %a, i8 %b) {
@@ -152,8 +145,8 @@ entry:
%conv = zext i8 %a to i32
%conv1 = zext i8 %b to i32
%add = add i32 %conv1, %conv
- %0 = icmp ult i32 %add, 255
- %select = select i1 %0, i32 %add, i32 255
+ %cmp = icmp ult i32 %add, 255
+ %select = select i1 %cmp, i32 %add, i32 255
%conv2 = trunc i32 %select to i8
ret i8 %conv2
}
@@ -185,16 +178,14 @@ entry:
%conv = zext i8 %a to i32
%conv1 = zext i8 %b to i32
%sub = sub i32 %conv, %conv1
- %cmp4 = icmp sgt i32 %sub, 0
- %cmp6 = icmp slt i32 %sub, 255
- %cond = select i1 %cmp6, i32 %sub, i32 255
- %cond11 = select i1 %cmp4, i32 %cond, i32 0
- %conv12 = trunc i32 %cond11 to i8
- ret i8 %conv12
+ %cmp = icmp sgt i32 %sub, 0
+ %cond = select i1 %cmp, i32 %sub, i32 0
+ %conv2 = trunc i32 %cond to i8
+ ret i8 %conv2
}
-define i8 @usub_sat8_minmax(i8 %a, i8 %b) {
-; CHECK-LABEL: define i8 @usub_sat8_minmax(
+define i8 @usub_sat8_max(i8 %a, i8 %b) {
+; CHECK-LABEL: define i8 @usub_sat8_max(
; CHECK-SAME: i8 [[A:%.*]], i8 [[B:%.*]]) {
; CHECK-NEXT: [[ENTRY:.*:]]
; CHECK-NEXT: [[TMP0:%.*]] = call i8 @llvm.usub.sat.i8(i8 [[A]], i8 [[B]])
@@ -204,10 +195,9 @@ entry:
%conv = zext i8 %a to i32
%conv1 = zext i8 %b to i32
%sub = sub i32 %conv, %conv1
- %cond = call i32 @llvm.smin.i32(i32 %sub, i32 255)
- %cond11 = call i32 @llvm.smax.i32(i32 %cond, i32 0)
- %conv12 = trunc i32 %cond11 to i8
- ret i8 %conv12
+ %cond = call i32 @llvm.smax.i32(i32 %sub, i32 0)
+ %conv2 = trunc i32 %cond to i8
+ ret i8 %conv2
}
define i64 @uadd_sat64(i64 %a, i64 %b) {
@@ -221,12 +211,28 @@ entry:
%conv = zext i64 %a to i128
%conv1 = zext i64 %b to i128
%add = add i128 %conv1, %conv
- %0 = icmp ult i128 %add, 18446744073709551615
- %select = select i1 %0, i128 %add, i128 18446744073709551615
+ %cmp = icmp ult i128 %add, 18446744073709551615
+ %select = select i1 %cmp, i128 %add, i128 18446744073709551615
%conv2 = trunc i128 %select to i64
ret i64 %conv2
}
+define i64 @uadd_sat64_min(i64 %a, i64 %b) {
+; CHECK-LABEL: define i64 @uadd_sat64_min(
+; CHECK-SAME: i64 [[A:%.*]], i64 [[B:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: [[CONV2:%.*]] = call i64 @llvm.uadd.sat.i64(i64 [[B]], i64 [[A]])
+; CHECK-NEXT: ret i64 [[CONV2]]
+;
+entry:
+ %conv = zext i64 %a to i128
+ %conv1 = zext i64 %b to i128
+ %add = add i128 %conv1, %conv
+ %cond = call i128 @llvm.umin.i128(i128 %add, i128 18446744073709551615)
+ %conv2 = trunc i128 %cond to i64
+ ret i64 %conv2
+}
+
define i64 @usub_sat64(i64 %a, i64 %b) {
; CHECK-LABEL: define i64 @usub_sat64(
; CHECK-SAME: i64 [[A:%.*]], i64 [[B:%.*]]) {
@@ -238,12 +244,26 @@ entry:
%conv = zext i64 %a to i128
%conv1 = zext i64 %b to i128
%sub = sub i128 %conv, %conv1
- %cmp4 = icmp sgt i128 %sub, 0
- %cmp6 = icmp slt i128 %sub, 18446744073709551615
- %cond = select i1 %cmp6, i128 %sub, i128 18446744073709551615
- %cond11 = select i1 %cmp4, i128 %cond, i128 0
- %conv12 = trunc i128 %cond11 to i64
- ret i64 %conv12
+ %cmp = icmp sgt i128 %sub, 0
+ %cond = select i1 %cmp, i128 %sub, i128 0
+ %conv2 = trunc i128 %cond to i64
+ ret i64 %conv2
+}
+
+define i64 @uadd_sat64_max(i64 %a, i64 %b) {
+; CHECK-LABEL: define i64 @uadd_sat64_max(
+; CHECK-SAME: i64 [[A:%.*]], i64 [[B:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.usub.sat.i64(i64 [[B]], i64 [[A]])
+; CHECK-NEXT: ret i64 [[TMP0]]
+;
+entry:
+ %conv = zext i64 %a to i128
+ %conv1 = zext i64 %b to i128
+ %sub = sub i128 %conv1, %conv
+ %cond = call i128 @llvm.smax.i128(i128 %sub, i128 0)
+ %conv2 = trunc i128 %cond to i64
+ ret i64 %conv2
}
define <4 x i32> @uadd_satv4i32(<4 x i32> %a, <4 x i32> %b) {
@@ -257,14 +277,14 @@ entry:
%conv = zext <4 x i32> %a to <4 x i64>
%conv1 = zext <4 x i32> %b to <4 x i64>
%add = add <4 x i64> %conv1, %conv
- %0 = icmp ult <4 x i64> %add, <i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295>
- %select = select <4 x i1> %0, <4 x i64> %add, <4 x i64> <i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295>
+ %cmp = icmp ult <4 x i64> %add, <i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295>
+ %select = select <4 x i1> %cmp, <4 x i64> %add, <4 x i64> <i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295>
%conv7 = trunc <4 x i64> %select to <4 x i32>
ret <4 x i32> %conv7
}
-define <8 x i16> @uadd_satv8i16_minmax(<8 x i16> %a, <8 x i16> %b) {
-; CHECK-LABEL: define <8 x i16> @uadd_satv8i16_minmax(
+define <8 x i16> @uadd_satv8i16_min(<8 x i16> %a, <8 x i16> %b) {
+; CHECK-LABEL: define <8 x i16> @uadd_satv8i16_min(
; CHECK-SAME: <8 x i16> [[A:%.*]], <8 x i16> [[B:%.*]]) {
; CHECK-NEXT: [[ENTRY:.*:]]
; CHECK-NEXT: [[TMP0:%.*]] = call <8 x i16> @llvm.uadd.sat.v8i16(<8 x i16> [[B]], <8 x i16> [[A]])
@@ -290,28 +310,25 @@ entry:
%conv = zext <16 x i8> %a to <16 x i32>
%conv1 = zext <16 x i8> %b to <16 x i32>
%sub = sub <16 x i32> %conv1, %conv
- %0 = icmp slt <16 x i32> %sub, <i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255>
- %select = select <16 x i1> %0, <16 x i32> %sub, <16 x i32> <i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255>
- %1 = icmp sgt <16 x i32> %select, <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
- %select8 = select <16 x i1> %1, <16 x i32> %select, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
- %conv7 = trunc <16 x i32> %select8 to <16 x i8>
+ %cmp = icmp sgt <16 x i32> %sub, <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+ %select = select <16 x i1> %cmp, <16 x i32> %sub, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+ %conv7 = trunc <16 x i32> %select to <16 x i8>
ret <16 x i8> %conv7
}
-define <2 x i64> @usub_satv2i64_minmax(<2 x i64> %a, <2 x i64> %b) {
-; CHECK-LABEL: define <2 x i64> @usub_satv2i64_minmax(
+define <2 x i64> @usub_satv2i64_max(<2 x i64> %a, <2 x i64> %b) {
+; CHECK-LABEL: define <2 x i64> @usub_satv2i64_max(
; CHECK-SAME: <2 x i64> [[A:%.*]], <2 x i64> [[B:%.*]]) {
; CHECK-NEXT: [[ENTRY:.*:]]
-; CHECK-NEXT: [[TMP0:%.*]] = call <2 x i64> @llvm.usub.sat.v2i64(<2 x i64> [[B]], <2 x i64> [[A]])
-; CHECK-NEXT: ret <2 x i64> [[TMP0]]
+; CHECK-NEXT: [[CONV7:%.*]] = call <2 x i64> @llvm.usub.sat.v2i64(<2 x i64> [[B]], <2 x i64> [[A]])
+; CHECK-NEXT: ret <2 x i64> [[CONV7]]
;
entry:
%conv = zext <2 x i64> %a to <2 x i128>
%conv1 = zext <2 x i64> %b to <2 x i128>
%sub = sub <2 x i128> %conv1, %conv
- %select = call <2 x i128> @llvm.smin.v2i128(<2 x i128> %sub, <2 x i128> <i128 18446744073709551615, i128 18446744073709551615>)
- %select8 = call <2 x i128> @llvm.smax.v2i128(<2 x i128> %select, <2 x i128> <i128 0, i128 0>)
- %conv7 = trunc <2 x i128> %select8 to <2 x i64>
+ %select = call <2 x i128> @llvm.smax.v2i128(<2 x i128> %sub, <2 x i128> <i128 0, i128 0>)
+ %conv7 = trunc <2 x i128> %select to <2 x i64>
ret <2 x i64> %conv7
}
@@ -328,8 +345,8 @@ entry:
%conv = zext i32 %a to i64
%conv1 = zext i32 %b to i64
%add = add i64 %conv1, %conv
- %0 = icmp ult i64 %add, 4294967295
- %select = select i1 %0, i64 %add, i64 4294967295
+ %cmp = icmp ult i64 %add, 4294967295
+ %select = select i1 %cmp, i64 %add, i64 4294967295
%conv7 = trunc i64 %select to i32
call void @use64(i64 %select)
ret i32 %conv7
@@ -351,8 +368,8 @@ entry:
%conv = zext i32 %a to i64
%conv1 = zext i32 %b to i64
%add = add i64 %conv1, %conv
- %0 = icmp ult i64 %add, 4294967295
- %select = select i1 %0, i64 %add, i64 4294967295
+ %cmp = icmp ult i64 %add, 4294967295
+ %select = select i1 %cmp, i64 %add, i64 4294967295
%conv7 = trunc i64 %select to i32
call void @use64(i64 %add)
ret i32 %conv7
@@ -362,12 +379,8 @@ define i32 @usub_sat32_extra_use_3(i32 %a, i32 %b) {
; CHECK-LABEL: define i32 @usub_sat32_extra_use_3(
; CHECK-SAME: i32 [[A:%.*]], i32 [[B:%.*]]) {
; CHECK-NEXT: [[ENTRY:.*:]]
-; CHECK-NEXT: [[CONV:%.*]] = zext i32 [[A]] to i64
-; CHECK-NEXT: [[CONV1:%.*]] = zext i32 [[B]] to i64
-; CHECK-NEXT: [[SUB:%.*]] = sub nsw i64 [[CONV]], [[CONV1]]
-; CHECK-NEXT: [[COND:%.*]] = call i64 @llvm.smin.i64(i64 [[SUB]], i64 4294967295)
-; CHECK-NEXT: [[COND11:%.*]] = call i64 @llvm.smax.i64(i64 [[COND]], i64 0)
-; CHECK-NEXT: [[TMP0:%.*]] = trunc i64 [[COND11]] to i32
+; CHECK-NEXT: [[TMP0:%.*]] = call i32 @llvm.usub.sat.i32(i32 [[A]], i32 [[B]])
+; CHECK-NEXT: [[COND:%.*]] = zext i32 [[TMP0]] to i64
; CHECK-NEXT: call void @use64(i64 [[COND]])
; CHECK-NEXT: ret i32 [[TMP0]]
;
@@ -375,80 +388,75 @@ entry:
%conv = zext i32 %a to i64
%conv1 = zext i32 %b to i64
%sub = sub i64 %conv, %conv1
- %cmp4 = icmp sgt i64 %sub, 0
- %cmp6 = icmp slt i64 %sub, 4294967295
- %cond = select i1 %cmp6, i64 %sub, i64 4294967295
- %cond11 = select i1 %cmp4, i64 %cond, i64 0
- %conv12 = trunc i64 %cond11 to i32
+ %cmp = icmp sgt i64 %sub, 0
+ %cond = select i1 %cmp, i64 %sub, i64 0
+ %conv2 = trunc i64 %cond to i32
call void @use64(i64 %cond)
- ret i32 %conv12
+ ret i32 %conv2
}
-define i32 @usub_sat32_minmax_extra_use_4(i32 %a, i32 %b) {
-; CHECK-LABEL: define i32 @usub_sat32_minmax_extra_use_4(
+define i32 @usub_sat32_max_extra_use_4(i32 %a, i32 %b) {
+; CHECK-LABEL: define i32 @usub_sat32_max_extra_use_4(
; CHECK-SAME: i32 [[A:%.*]], i32 [[B:%.*]]) {
; CHECK-NEXT: [[ENTRY:.*:]]
; CHECK-NEXT: [[CONV:%.*]] = zext i32 [[A]] to i64
; CHECK-NEXT: [[CONV1:%.*]] = zext i32 [[B]] to i64
-; CHECK-NEXT: [[SUB:%.*]] = sub nsw i64 [[CONV]], [[CONV1]]
-; CHECK-NEXT: [[COND:%.*]] = call i64 @llvm.smin.i64(i64 [[SUB]], i64 4294967295)
-; CHECK-NEXT: [[COND11:%.*]] = call i64 @llvm.smax.i64(i64 [[COND]], i64 0)
-; CHECK-NEXT: [[CONV12:%.*]] = trunc i64 [[COND11]] to i32
+; CHECK-NEXT: [[COND:%.*]] = sub nsw i64 [[CONV]], [[CONV1]]
+; CHECK-NEXT: [[COND1:%.*]] = call i64 @llvm.smax.i64(i64 [[COND]], i64 0)
+; CHECK-NEXT: [[TMP0:%.*]] = trunc i64 [[COND1]] to i32
; CHECK-NEXT: call void @use64(i64 [[COND]])
-; CHECK-NEXT: ret i32 [[CONV12]]
+; CHECK-NEXT: ret i32 [[TMP0]]
;
entry:
%conv = zext i32 %a to i64
%conv1 = zext i32 %b to i64
%sub = sub i64 %conv, %conv1
- %cond = call i64 @llvm.smin.i64(i64 %sub, i64 4294967295)
- %cond11 = call i64 @llvm.smax.i64(i64 %cond, i64 0)
- %conv12 = trunc i64 %cond11 to i32
- call void @use64(i64 %cond)
- ret i32 %conv12
+ %cond = call i64 @llvm.smax.i64(i64 %sub, i64 0)
+ %conv2 = trunc i64 %cond to i32
+ call void @use64(i64 %sub)
+ ret i32 %conv2
}
-define i32 @usub_sat32_sext(i32 %a, i32 %b) {
-; CHECK-LABEL: define i32 @usub_sat32_sext(
+define i32 @uadd_sat32_sext_zext(i32 %a, i32 %b) {
+; CHECK-LABEL: define i32 @uadd_sat32_sext_zext(
; CHECK-SAME: i32 [[A:%.*]], i32 [[B:%.*]]) {
; CHECK-NEXT: [[ENTRY:.*:]]
; CHECK-NEXT: [[CONV:%.*]] = sext i32 [[A]] to i64
-; CHECK-NEXT: [[CONV1:%.*]] = sext i32 [[B]] to i64
-; CHECK-NEXT: [[SUB:%.*]] = sub nsw i64 [[CONV]], [[CONV1]]
-; CHECK-NEXT: [[COND:%.*]] = call i64 @llvm.smin.i64(i64 [[SUB]], i64 4294967295)
-; CHECK-NEXT: [[COND11:%.*]] = call i64 @llvm.smax.i64(i64 [[COND]], i64 0)
-; CHECK-NEXT: [[CONV12:%.*]] = trunc i64 [[COND11]] to i32
-; CHECK-NEXT: ret i32 [[CONV12]]
+; CHECK-NEXT: [[CONV1:%.*]] = zext i32 [[B]] to i64
+; CHECK-NEXT: [[ADD:%.*]] = add nsw i64 [[CONV]], [[CONV1]]
+; CHECK-NEXT: [[COND:%.*]] = call i64 @llvm.smin.i64(i64 [[ADD]], i64 4294967295)
+; CHECK-NEXT: [[CONV2:%.*]] = trunc i64 [[COND]] to i32
+; CHECK-NEXT: ret i32 [[CONV2]]
;
entry:
%conv = sext i32 %a to i64
- %conv1 = sext i32 %b to i64
- %sub = sub i64 %conv, %conv1
- %cmp4 = icmp sgt i64 %sub, 0
- %cmp6 = icmp slt i64 %sub, 4294967295
- %cond = select i1 %cmp6, i64 %sub, i64 4294967295
- %cond11 = select i1 %cmp4, i64 %cond, i64 0
- %conv12 = trunc i64 %cond11 to i32
- ret i32 %conv12
+ %conv1 = zext i32 %b to i64
+ %add = add i64 %conv, %conv1
+ %cmp = icmp slt i64 %add, 4294967295
+ %cond = select i1 %cmp, i64 %add, i64 4294967295
+ %conv2 = trunc i64 %cond to i32
+ ret i32 %conv2
}
-define i32 @usub_sat32_maxmin(i32 %a, i32 %b) {
-; CHECK-LABEL: define i32 @usub_sat32_maxmin(
+define i32 @usub_sat32_sext(i32 %a, i32 %b) {
+; CHECK-LABEL: define i32 @usub_sat32_sext(
; CHECK-SAME: i32 [[A:%.*]], i32 [[B:%.*]]) {
; CHECK-NEXT: [[ENTRY:.*:]]
-; CHECK-NEXT: [[CONV12:%.*]] = call i32 @llvm.usub.sat.i32(i32 [[A]], i32 [[B]])
-; CHECK-NEXT: ret i32 [[CONV12]]
+; CHECK-NEXT: [[CONV:%.*]] = sext i32 [[A]] to i64
+; CHECK-NEXT: [[CONV1:%.*]] = sext i32 [[B]] to i64
+; CHECK-NEXT: [[SUB:%.*]] = sub nsw i64 [[CONV]], [[CONV1]]
+; CHECK-NEXT: [[COND:%.*]] = call i64 @llvm.smax.i64(i64 [[SUB]], i64 0)
+; CHECK-NEXT: [[CONV2:%.*]] = trunc i64 [[COND]] to i32
+; CHECK-NEXT: ret i32 [[CONV2]]
;
entry:
- %conv = zext i32 %a to i64
- %conv1 = zext i32 %b to i64
+ %conv = sext i32 %a to i64
+ %conv1 = sext i32 %b to i64
%sub = sub i64 %conv, %conv1
- %cmp4 = icmp sgt i64 %sub, 0
- %cmp6 = icmp slt i64 %sub, 4294967295
- %cond = select i1 %cmp4, i64 %sub, i64 0
- %cond11 = select i1 %cmp6, i64 %cond, i64 4294967295
- %conv12 = trunc i64 %cond11 to i32
- ret i32 %conv12
+ %cmp = icmp sgt i64 %sub, 0
+ %cond = select i1 %cmp, i64 %sub, i64 0
+ %conv2 = trunc i64 %cond to i32
+ ret i32 %conv2
}
define i64 @uadd_sat32_no_trunc(i32 %a, i32 %b) {
@@ -463,28 +471,26 @@ entry:
%conv = zext i32 %a to i64
%conv1 = zext i32 %b to i64
%add = add i64 %conv1, %conv
- %0 = icmp ult i64 %add, 4294967295
- %select = select i1 %0, i64 %add, i64 4294967295
+ %cmp = icmp ult i64 %add, 4294967295
+ %select = select i1 %cmp, i64 %add, i64 4294967295
ret i64 %select
}
define i8 @const(i8 %X) {
; CHECK-LABEL: define i8 @const(
; CHECK-SAME: i8 [[X:%.*]]) {
-; CHECK-NEXT: [[CONV10:%.*]] = zext i8 [[X]] to i16
-; CHECK-NEXT: [[TMP1:%.*]] = call i16 @llvm.smax.i16(i16 [[CONV10]], i16 10)
+; CHECK-NEXT: [[CONV1:%.*]] = zext i8 [[X]] to i16
+; CHECK-NEXT: [[TMP1:%.*]] = call i16 @llvm.smax.i16(i16 [[CONV1]], i16 10)
; CHECK-NEXT: [[TMP2:%.*]] = trunc nuw i16 [[TMP1]] to i8
; CHECK-NEXT: [[CONV:%.*]] = add i8 [[TMP2]], -10
; CHECK-NEXT: ret i8 [[CONV]]
;
- %conv10 = zext i8 %X to i16
- %sub = sub i16 %conv10, 10
- %l9 = icmp slt i16 %sub, 255
- %l10 = select i1 %l9, i16 %sub, i16 255
- %l11 = icmp sgt i16 %sub, 0
- %l12 = select i1 %l11, i16 %l10, i16 0
- %conv = trunc i16 %l12 to i8
- ret i8 %conv
+ %conv = zext i8 %X to i16
+ %sub = sub i16 %conv, 10
+ %cmp = icmp sgt i16 %sub, 0
+ %select = select i1 %cmp, i16 %sub, i16 0
+ %conv2 = trunc i16 %select to i8
+ ret i8 %conv2
}
define i32 @invalid_sub_could_be_negative(i32 %a, i32 %b) {
@@ -499,19 +505,44 @@ define i32 @invalid_sub_could_be_negative(i32 %a, i32 %b) {
;
%conv = zext i32 %a to i64
%conv3 = zext i32 %b to i64
- %sub = sub nsw i64 %conv, %conv3
+ %sub = sub i64 %conv, %conv3
%spec.store.select13 = call i64 @llvm.umin.i64(i64 %sub, i64 4294967295)
%conv10 = trunc i64 %spec.store.select13 to i32
ret i32 %conv10
}
+define i32 @invalid_saturating_max_value1(i32 %a, i32 %b) {
+; CHECK-LABEL: define i32 @invalid_saturating_max_value1(
+; CHECK-SAME: i32 [[A:%.*]], i32 [[B:%.*]]) {
+; CHECK-NEXT: [[CONV:%.*]] = zext i32 [[A]] to i64
+; CHECK-NEXT: [[CONV1:%.*]] = zext i32 [[B]] to i64
+; CHECK-NEXT: [[ADD:%.*]] = add nuw nsw i64 [[CONV1]], [[CONV]]
+; CHECK-NEXT: [[SELECT:%.*]] = call i64 @llvm.umin.i64(i64 [[ADD]], i64 2147483647)
+; CHECK-NEXT: [[CONV7:%.*]] = trunc nuw nsw i64 [[SELECT]] to i32
+; CHECK-NEXT: ret i32 [[CONV7]]
+;
+ %conv = zext i32 %a to i64
+ %conv1 = zext i32 %b to i64
+ %add = add i64 %conv1, %conv
+ %select = call i64 @llvm.umin.i64(i64 %add, i64 2147483647)
+ %conv7 = trunc i64 %select to i32
+ ret i32 %conv7
+}
+
+define i32 @invalid_saturating_max_value2(i8 %a, i16 %b) {
+; CHECK-LABEL: define i32 @invalid_saturating_max_value2(
+; CHECK-SAME: i8 [[A:%.*]], i16 [[B:%.*]]) {
+; CHECK-NEXT: [[CONV:%.*]] = zext i8 [[A]] to i32
+; CHECK-NEXT: [[CONV1:%.*]] = zext i16 [[B]] to i32
+; CHECK-NEXT: [[CONV7:%.*]] = add nuw nsw i32 [[CONV1]], [[CONV]]
+; CHECK-NEXT: ret i32 [[CONV7]]
+;
+ %conv = zext i8 %a to i64
+ %conv1 = zext i16 %b to i64
+ %add = add i64 %conv1, %conv
+ %select = call i64 @llvm.umin.i64(i64 %add, i64 4294967295)
+ %conv7 = trunc i64 %select to i32
+ ret i32 %conv7
+}
+
declare void @use64(i64)
-declare i64 @llvm.umin.i64(i64, i64)
-declare i64 @llvm.smin.i64(i64, i64)
-declare i64 @llvm.smax.i64(i64, i64)
-declare i32 @llvm.umin.i32(i32, i32)
-declare i32 @llvm.smin.i32(i32, i32)
-declare i32 @llvm.smax.i32(i32, i32)
-declare <8 x i32> @llvm.umin.v8i32(<8 x i32>, <8 x i32>)
-declare <2 x i128> @llvm.smin.v2i128(<2 x i128>, <2 x i128>)
-declare <2 x i128> @llvm.smax.v2i128(<2 x i128>, <2 x i128>)
More information about the llvm-commits
mailing list