[llvm] [llvm] Optimize usub.sat fix for #79690 (PR #151044)

Nimit Sachdeva via llvm-commits llvm-commits at lists.llvm.org
Sun Aug 24 14:19:25 PDT 2025


================
@@ -0,0 +1,206 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+
+; RUN: opt -passes=instcombine -S < %s 2>&1 | FileCheck %s
+
+declare i8 @llvm.usub.sat.i8(i8, i8)
+declare i16 @llvm.usub.sat.i16(i16, i16)
+declare i32 @llvm.usub.sat.i32(i32, i32)
+declare i64 @llvm.usub.sat.i64(i64, i64)
+
+define i8 @test_i8(i8 %a, i8 %b) {
+; CHECK-LABEL: define i8 @test_i8(
+; CHECK-SAME: i8 [[A:%.*]], i8 [[B:%.*]]) {
+; CHECK-NEXT:    [[TMP1:%.*]] = call i8 @llvm.usub.sat.i8(i8 [[A]], i8 96)
+; CHECK-NEXT:    [[TMP2:%.*]] = call i8 @llvm.usub.sat.i8(i8 [[B]], i8 112)
+; CHECK-NEXT:    [[TMP3:%.*]] = or i8 [[TMP1]], [[TMP2]]
+; CHECK-NEXT:    [[RES:%.*]] = and i8 [[TMP3]], -128
+; CHECK-NEXT:    ret i8 [[RES]]
+;
+
+  %a_sub = call i8 @llvm.usub.sat.i8(i8 %a, i8 223)
+  %b_sub = call i8 @llvm.usub.sat.i8(i8 %b, i8 239)
+  %or = or i8 %a_sub, %b_sub
+  %cmp = icmp eq i8 %or, 0
+  %res = select i1 %cmp, i8 0, i8 128
+  ret i8 %res
+}
+
+define i8 @test_i8_ne(i8 %a, i8 %b) {
+; CHECK-LABEL: define i8 @test_i8_ne(
+; CHECK-SAME: i8 [[A:%.*]], i8 [[B:%.*]]) {
+; CHECK-NEXT:    [[TMP1:%.*]] = call i8 @llvm.usub.sat.i8(i8 [[A]], i8 96)
+; CHECK-NEXT:    [[TMP2:%.*]] = call i8 @llvm.usub.sat.i8(i8 [[B]], i8 112)
+; CHECK-NEXT:    [[TMP3:%.*]] = or i8 [[TMP1]], [[TMP2]]
+; CHECK-NEXT:    [[RES:%.*]] = and i8 [[TMP3]], -128
+; CHECK-NEXT:    ret i8 [[RES]]
+;
+
+  %a_sub = call i8 @llvm.usub.sat.i8(i8 %a, i8 223)
+  %b_sub = call i8 @llvm.usub.sat.i8(i8 %b, i8 239)
+  %or = or i8 %a_sub, %b_sub
+  %cmp = icmp ne i8 %or, 0
+  %res = select i1 %cmp, i8 128, i8 0
+  ret i8 %res
+}
+
+define i16 @test_i16(i16 %a, i16 %b) {
+; CHECK-LABEL: define i16 @test_i16(
+; CHECK-SAME: i16 [[A:%.*]], i16 [[B:%.*]]) {
+; CHECK-NEXT:    [[TMP1:%.*]] = call i16 @llvm.usub.sat.i16(i16 [[A]], i16 32642)
+; CHECK-NEXT:    [[TMP2:%.*]] = call i16 @llvm.usub.sat.i16(i16 [[B]], i16 32656)
+; CHECK-NEXT:    [[TMP3:%.*]] = or i16 [[TMP1]], [[TMP2]]
+; CHECK-NEXT:    [[RES:%.*]] = and i16 [[TMP3]], -32768
+; CHECK-NEXT:    ret i16 [[RES]]
+;
+
+  %a_sub = call i16 @llvm.usub.sat.i16(i16 %a, i16 65409)
+  %b_sub = call i16 @llvm.usub.sat.i16(i16 %b, i16 65423)
+  %or = or i16 %a_sub, %b_sub
+  %cmp = icmp eq i16 %or, 0
+  %res = select i1 %cmp, i16 0, i16 32768
+  ret i16 %res
+}
+
+define i32 @test_i32(i32 %a, i32 %b) {
+; CHECK-LABEL: define i32 @test_i32(
+; CHECK-SAME: i32 [[A:%.*]], i32 [[B:%.*]]) {
+; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @llvm.usub.sat.i32(i32 [[A]], i32 224)
+; CHECK-NEXT:    [[TMP2:%.*]] = call i32 @llvm.usub.sat.i32(i32 [[B]], i32 240)
+; CHECK-NEXT:    [[TMP3:%.*]] = or i32 [[TMP1]], [[TMP2]]
+; CHECK-NEXT:    [[RES:%.*]] = and i32 [[TMP3]], -2147483648
+; CHECK-NEXT:    ret i32 [[RES]]
+;
+
+  %a_sub = call i32 @llvm.usub.sat.i32(i32 %a, i32 2147483871)
+  %b_sub = call i32 @llvm.usub.sat.i32(i32 %b, i32 2147483887)
+  %or = or i32 %a_sub, %b_sub
+  %cmp = icmp eq i32 %or, 0
+  %res = select i1 %cmp, i32 0, i32 2147483648
+  ret i32 %res
+}
+
+define i64 @test_i64(i64 %a, i64 %b) {
+; CHECK-LABEL: define i64 @test_i64(
+; CHECK-SAME: i64 [[A:%.*]], i64 [[B:%.*]]) {
+; CHECK-NEXT:    [[TMP1:%.*]] = call i64 @llvm.usub.sat.i64(i64 [[A]], i64 224)
+; CHECK-NEXT:    [[TMP2:%.*]] = call i64 @llvm.usub.sat.i64(i64 [[B]], i64 240)
+; CHECK-NEXT:    [[TMP3:%.*]] = or i64 [[TMP1]], [[TMP2]]
+; CHECK-NEXT:    [[RES:%.*]] = and i64 [[TMP3]], -9223372036854775808
+; CHECK-NEXT:    ret i64 [[RES]]
+;
+
+  %a_sub = call i64 @llvm.usub.sat.i64(i64 %a, i64 9223372036854776031)
+  %b_sub = call i64 @llvm.usub.sat.i64(i64 %b, i64 9223372036854776047)
+  %or = or i64 %a_sub, %b_sub
+  %cmp = icmp eq i64 %or, 0
+  %res = select i1 %cmp, i64 0, i64 9223372036854775808
+  ret i64 %res
+}
+
+define i32 @no_fold_due_to_small_K(i32 %a, i32 %b) {
+; CHECK-LABEL: define i32 @no_fold_due_to_small_K(
+; CHECK-SAME: i32 [[A:%.*]], i32 [[B:%.*]]) {
+; CHECK-NEXT:    [[A_SUB:%.*]] = call i32 @llvm.usub.sat.i32(i32 [[A]], i32 100)
+; CHECK-NEXT:    [[B_SUB:%.*]] = call i32 @llvm.usub.sat.i32(i32 [[B]], i32 239)
+; CHECK-NEXT:    [[OR:%.*]] = or i32 [[A_SUB]], [[B_SUB]]
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[OR]], 0
+; CHECK-NEXT:    [[RES:%.*]] = select i1 [[CMP]], i32 0, i32 -2147483648
+; CHECK-NEXT:    ret i32 [[RES]]
+;
+
+  %a_sub = call i32 @llvm.usub.sat.i32(i32 %a, i32 100)
+  %b_sub = call i32 @llvm.usub.sat.i32(i32 %b, i32 239)
+  %or = or i32 %a_sub, %b_sub
+  %cmp = icmp eq i32 %or, 0
+  %res = select i1 %cmp, i32 0, i32 2147483648
+  ret i32 %res
+}
+
+define i32 @commuted_test_neg(i32 %a, i32 %b) {
+; CHECK-LABEL: define i32 @commuted_test_neg(
+; CHECK-SAME: i32 [[A:%.*]], i32 [[B:%.*]]) {
+; CHECK-NEXT:    [[B_SUB:%.*]] = call i32 @llvm.usub.sat.i32(i32 [[B]], i32 239)
+; CHECK-NEXT:    [[A_SUB:%.*]] = call i32 @llvm.usub.sat.i32(i32 [[A]], i32 223)
+; CHECK-NEXT:    [[OR:%.*]] = or i32 [[B_SUB]], [[A_SUB]]
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[OR]], 0
+; CHECK-NEXT:    [[RES:%.*]] = select i1 [[CMP]], i32 0, i32 -2147483648
+; CHECK-NEXT:    ret i32 [[RES]]
+;
+
+  %b_sub = call i32 @llvm.usub.sat.i32(i32 %b, i32 239)
+  %a_sub = call i32 @llvm.usub.sat.i32(i32 %a, i32 223)
+  %or = or i32 %b_sub, %a_sub
+  %cmp = icmp eq i32 %or, 0
+  %res = select i1 %cmp, i32 0, i32 2147483648
+  ret i32 %res
+}
+define <4 x i32> @vector_test(<4 x i32> %a, <4 x i32> %b) {
+; CHECK-LABEL: define <4 x i32> @vector_test(
+; CHECK-SAME: <4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]]) {
+; CHECK-NEXT:    [[TMP1:%.*]] = call <4 x i32> @llvm.usub.sat.v4i32(<4 x i32> [[A]], <4 x i32> splat (i32 224))
+; CHECK-NEXT:    [[TMP2:%.*]] = call <4 x i32> @llvm.usub.sat.v4i32(<4 x i32> [[B]], <4 x i32> splat (i32 240))
+; CHECK-NEXT:    [[TMP3:%.*]] = or <4 x i32> [[TMP1]], [[TMP2]]
+; CHECK-NEXT:    [[RES:%.*]] = and <4 x i32> [[TMP3]], splat (i32 -2147483648)
+; CHECK-NEXT:    ret <4 x i32> [[RES]]
+;
+
+
+  %a_sub = call <4 x i32> @llvm.usub.sat.v4i32(
+  <4 x i32> %a,
+  <4 x i32> <i32 2147483871, i32 2147483871, i32 2147483871, i32 2147483871>)
+  %b_sub = call <4 x i32> @llvm.usub.sat.v4i32(
+  <4 x i32> %b,
+  <4 x i32> <i32 2147483887, i32 2147483887, i32 2147483887, i32 2147483887>)
+  %or = or <4 x i32> %a_sub, %b_sub
+  %cmp = icmp eq <4 x i32> %or, zeroinitializer
+  %res = select <4 x i1> %cmp, <4 x i32> zeroinitializer,
+  <4 x i32> <i32 -2147483648, i32 -2147483648, i32 -2147483648, i32 -2147483648>
+  ret <4 x i32> %res
+}
+
+define <4 x i32> @vector_negative_test(<4 x i32> %a, <4 x i32> %b) {
+; CHECK-LABEL: define <4 x i32> @vector_negative_test(
+; CHECK-SAME: <4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]]) {
+; CHECK-NEXT:    [[A_SUB:%.*]] = call <4 x i32> @llvm.usub.sat.v4i32(<4 x i32> [[A]], <4 x i32> <i32 -2147483425, i32 0, i32 -2147483425, i32 -2147483425>)
+; CHECK-NEXT:    [[B_SUB:%.*]] = call <4 x i32> @llvm.usub.sat.v4i32(<4 x i32> [[B]], <4 x i32> splat (i32 -2147483409))
+; CHECK-NEXT:    [[OR:%.*]] = or <4 x i32> [[A_SUB]], [[B_SUB]]
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq <4 x i32> [[OR]], zeroinitializer
+; CHECK-NEXT:    [[RES:%.*]] = select <4 x i1> [[CMP]], <4 x i32> zeroinitializer, <4 x i32> splat (i32 -2147483648)
+; CHECK-NEXT:    ret <4 x i32> [[RES]]
+;
+  %a_sub = call <4 x i32> @llvm.usub.sat.v4i32(
+  <4 x i32> %a,
+  <4 x i32> <i32 2147483871, i32 0, i32 2147483871, i32 2147483871>)
+  %b_sub = call <4 x i32> @llvm.usub.sat.v4i32(
+  <4 x i32> %b,
+  <4 x i32> <i32 2147483887, i32 2147483887, i32 2147483887, i32 2147483887>)
+  %or = or <4 x i32> %a_sub, %b_sub
+  %cmp = icmp eq <4 x i32> %or, zeroinitializer
+  %res = select <4 x i1> %cmp, <4 x i32> zeroinitializer,
+  <4 x i32> <i32 -2147483648, i32 -2147483648, i32 -2147483648, i32 -2147483648>
+  ret <4 x i32> %res
+}
+
+define <4 x i32> @vector_ne_test(<4 x i32> %a, <4 x i32> %b) {
+; CHECK-LABEL: define <4 x i32> @vector_ne_test(
+; CHECK-SAME: <4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]]) {
+; CHECK-NEXT:    [[TMP1:%.*]] = call <4 x i32> @llvm.usub.sat.v4i32(<4 x i32> [[A]], <4 x i32> splat (i32 224))
+; CHECK-NEXT:    [[TMP2:%.*]] = call <4 x i32> @llvm.usub.sat.v4i32(<4 x i32> [[B]], <4 x i32> splat (i32 240))
+; CHECK-NEXT:    [[TMP3:%.*]] = or <4 x i32> [[TMP1]], [[TMP2]]
+; CHECK-NEXT:    [[RES:%.*]] = and <4 x i32> [[TMP3]], splat (i32 -2147483648)
+; CHECK-NEXT:    ret <4 x i32> [[RES]]
+;
+
+
+  %a_sub = call <4 x i32> @llvm.usub.sat.v4i32(
+  <4 x i32> %a,
+  <4 x i32> <i32 2147483871, i32 2147483871, i32 2147483871, i32 2147483871>)
----------------
nimit25 wrote:

will change

https://github.com/llvm/llvm-project/pull/151044


More information about the llvm-commits mailing list