[llvm] [InstCombine] Canonicalize manual signed mul overflows (PR #100048)
via llvm-commits
llvm-commits at lists.llvm.org
Sat Mar 15 08:25:57 PDT 2025
https://github.com/AZero13 updated https://github.com/llvm/llvm-project/pull/100048
>From 88aa199f97717181a3f2568aadf1cfed789e7f9a Mon Sep 17 00:00:00 2001
From: Rose <gfunni234 at gmail.com>
Date: Fri, 24 May 2024 23:54:06 -0400
Subject: [PATCH 1/2] Pre-commit tests (NFC)
---
.../Transforms/InstCombine/overflow-mul.ll | 398 ++++++++++++++++++
1 file changed, 398 insertions(+)
diff --git a/llvm/test/Transforms/InstCombine/overflow-mul.ll b/llvm/test/Transforms/InstCombine/overflow-mul.ll
index 1d18d9ffd46d2..6322356cf506f 100644
--- a/llvm/test/Transforms/InstCombine/overflow-mul.ll
+++ b/llvm/test/Transforms/InstCombine/overflow-mul.ll
@@ -12,6 +12,7 @@ target datalayout = "i32:8:8"
; The mask is no longer in the form 2^n-1 and this prevents the transformation.
declare void @use.i64(i64)
+declare void @use.i32(i32)
; return mul(zext x, zext y) > MAX
define i32 @pr4917_1(i32 %x, i32 %y) nounwind {
@@ -343,3 +344,400 @@ define i32 @extra_and_use_mask_too_large(i32 %x, i32 %y) {
%retval = zext i1 %overflow to i32
ret i32 %retval
}
+
+define i32 @smul(i32 %a, i32 %b) {
+; CHECK-LABEL: @smul(
+; CHECK-NEXT: [[CONV:%.*]] = sext i32 [[A:%.*]] to i64
+; CHECK-NEXT: [[CONV1:%.*]] = sext i32 [[B:%.*]] to i64
+; CHECK-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV1]], [[CONV]]
+; CHECK-NEXT: [[TMP1:%.*]] = add nsw i64 [[MUL]], -2147483648
+; CHECK-NEXT: [[TMP2:%.*]] = icmp ult i64 [[TMP1]], -4294967296
+; CHECK-NEXT: [[CONV3:%.*]] = zext i1 [[TMP2]] to i32
+; CHECK-NEXT: ret i32 [[CONV3]]
+;
+ %conv = sext i32 %a to i64
+ %conv1 = sext i32 %b to i64
+ %mul = mul nsw i64 %conv1, %conv
+ %1 = add nsw i64 %mul, -2147483648
+ %2 = icmp ult i64 %1, -4294967296
+ %conv3 = zext i1 %2 to i32
+ ret i32 %conv3
+}
+
+define i32 @smul2(i32 %a, i32 %b) {
+; CHECK-LABEL: @smul2(
+; CHECK-NEXT: [[CONV:%.*]] = sext i32 [[A:%.*]] to i64
+; CHECK-NEXT: [[CONV1:%.*]] = sext i32 [[B:%.*]] to i64
+; CHECK-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV1]], [[CONV]]
+; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[MUL]], 2147483647
+; CHECK-NEXT: [[TMP2:%.*]] = icmp ult i64 [[TMP1]], 4294967295
+; CHECK-NEXT: [[CONV3:%.*]] = zext i1 [[TMP2]] to i32
+; CHECK-NEXT: ret i32 [[CONV3]]
+;
+ %conv = sext i32 %a to i64
+ %conv1 = sext i32 %b to i64
+ %mul = mul nsw i64 %conv1, %conv
+ %cmp = icmp sle i64 %mul, 2147483647
+ %cmp2 = icmp sgt i64 %mul, -2147483648
+ %1 = select i1 %cmp, i1 %cmp2, i1 false
+ %conv3 = zext i1 %1 to i32
+ ret i32 %conv3
+}
+
+define i1 @smul_sext_add_pattern(i8 %a, i8 %b) {
+; CHECK-LABEL: @smul_sext_add_pattern(
+; CHECK-NEXT: [[A_EXT:%.*]] = sext i8 [[A:%.*]] to i32
+; CHECK-NEXT: [[B_EXT:%.*]] = sext i8 [[B:%.*]] to i32
+; CHECK-NEXT: [[MUL:%.*]] = mul nsw i32 [[A_EXT]], [[B_EXT]]
+; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[MUL]], 128
+; CHECK-NEXT: [[CMP:%.*]] = icmp ult i32 [[ADD]], 256
+; CHECK-NEXT: ret i1 [[CMP]]
+;
+ %a.ext = sext i8 %a to i32
+ %b.ext = sext i8 %b to i32
+ %mul = mul nsw i32 %a.ext, %b.ext
+ %add = add i32 %mul, 128
+ %cmp = icmp ult i32 %add, 256
+ ret i1 %cmp
+}
+
+define i1 @smul_sext_add_wrong_constants(i8 %a, i8 %b) {
+; CHECK-LABEL: @smul_sext_add_wrong_constants(
+; CHECK-NEXT: [[A_EXT:%.*]] = sext i8 [[A:%.*]] to i32
+; CHECK-NEXT: [[B_EXT:%.*]] = sext i8 [[B:%.*]] to i32
+; CHECK-NEXT: [[MUL:%.*]] = mul nsw i32 [[A_EXT]], [[B_EXT]]
+; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[MUL]], 58
+; CHECK-NEXT: ret i1 [[CMP]]
+;
+ %a.ext = sext i8 %a to i32
+ %b.ext = sext i8 %b to i32
+ %mul = mul nsw i32 %a.ext, %b.ext
+ %add = add i32 %mul, 42
+ %cmp = icmp slt i32 %add, 100
+ ret i1 %cmp
+}
+
+define i1 @smul_sext_add_eq_predicate(i8 %a, i8 %b) {
+; CHECK-LABEL: @smul_sext_add_eq_predicate(
+; CHECK-NEXT: [[A_EXT:%.*]] = sext i8 [[A:%.*]] to i32
+; CHECK-NEXT: [[B_EXT:%.*]] = sext i8 [[B:%.*]] to i32
+; CHECK-NEXT: [[MUL:%.*]] = mul nsw i32 [[A_EXT]], [[B_EXT]]
+; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[MUL]], 128
+; CHECK-NEXT: ret i1 [[CMP]]
+;
+ %a.ext = sext i8 %a to i32
+ %b.ext = sext i8 %b to i32
+ %mul = mul nsw i32 %a.ext, %b.ext
+ %add = add i32 %mul, 128
+ %cmp = icmp eq i32 %add, 256
+ ret i1 %cmp
+}
+
+define i1 @smul_sext_add_different_widths(i4 %a, i16 %b) {
+; CHECK-LABEL: @smul_sext_add_different_widths(
+; CHECK-NEXT: [[A_EXT:%.*]] = sext i4 [[A:%.*]] to i32
+; CHECK-NEXT: [[B_EXT:%.*]] = sext i16 [[B:%.*]] to i32
+; CHECK-NEXT: [[MUL:%.*]] = mul nsw i32 [[A_EXT]], [[B_EXT]]
+; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[MUL]], 128
+; CHECK-NEXT: [[CMP:%.*]] = icmp ult i32 [[ADD]], 256
+; CHECK-NEXT: ret i1 [[CMP]]
+;
+ %a.ext = sext i4 %a to i32
+ %b.ext = sext i16 %b to i32
+ %mul = mul nsw i32 %a.ext, %b.ext
+ %add = add i32 %mul, 128
+ %cmp = icmp ult i32 %add, 256
+ ret i1 %cmp
+}
+
+define i1 @smul_sext_add_no_nsw(i8 %a, i8 %b) {
+; CHECK-LABEL: @smul_sext_add_no_nsw(
+; CHECK-NEXT: [[A_EXT:%.*]] = sext i8 [[A:%.*]] to i32
+; CHECK-NEXT: [[B_EXT:%.*]] = sext i8 [[B:%.*]] to i32
+; CHECK-NEXT: [[MUL:%.*]] = mul nsw i32 [[A_EXT]], [[B_EXT]]
+; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[MUL]], 128
+; CHECK-NEXT: [[CMP:%.*]] = icmp ult i32 [[ADD]], 256
+; CHECK-NEXT: ret i1 [[CMP]]
+;
+ %a.ext = sext i8 %a to i32
+ %b.ext = sext i8 %b to i32
+ %mul = mul i32 %a.ext, %b.ext ; No nsw flag
+ %add = add i32 %mul, 128
+ %cmp = icmp ult i32 %add, 256
+ ret i1 %cmp
+}
+
+define <2 x i1> @smul_sext_add_vector(<2 x i8> %a, <2 x i8> %b) {
+; CHECK-LABEL: @smul_sext_add_vector(
+; CHECK-NEXT: [[A_EXT:%.*]] = sext <2 x i8> [[A:%.*]] to <2 x i32>
+; CHECK-NEXT: [[B_EXT:%.*]] = sext <2 x i8> [[B:%.*]] to <2 x i32>
+; CHECK-NEXT: [[MUL:%.*]] = mul nsw <2 x i32> [[A_EXT]], [[B_EXT]]
+; CHECK-NEXT: [[ADD:%.*]] = add nsw <2 x i32> [[MUL]], splat (i32 128)
+; CHECK-NEXT: [[CMP:%.*]] = icmp ult <2 x i32> [[ADD]], splat (i32 256)
+; CHECK-NEXT: ret <2 x i1> [[CMP]]
+;
+ %a.ext = sext <2 x i8> %a to <2 x i32>
+ %b.ext = sext <2 x i8> %b to <2 x i32>
+ %mul = mul nsw <2 x i32> %a.ext, %b.ext
+ %add = add <2 x i32> %mul, <i32 128, i32 128>
+ %cmp = icmp ult <2 x i32> %add, <i32 256, i32 256>
+ ret <2 x i1> %cmp
+}
+
+define i1 @smul_sext_add_negative2(i8 %a, i8 %b) {
+; CHECK-LABEL: @smul_sext_add_negative2(
+; CHECK-NEXT: [[A_EXT:%.*]] = sext i8 [[A:%.*]] to i32
+; CHECK-NEXT: [[B_EXT:%.*]] = sext i8 [[B:%.*]] to i32
+; CHECK-NEXT: [[MUL:%.*]] = mul nsw i32 [[A_EXT]], [[B_EXT]]
+; CHECK-NEXT: [[CMP:%.*]] = icmp ult i32 [[MUL]], 128
+; CHECK-NEXT: ret i1 [[CMP]]
+;
+ %a.ext = sext i8 %a to i32
+ %b.ext = sext i8 %b to i32
+ %mul = mul nsw i32 %a.ext, %b.ext
+ %cmp = icmp ult i32 %mul, 128
+ %add = add i32 %mul, 128
+ ret i1 %cmp
+}
+
+define i1 @smul_sext_add_multiple_uses(i8 %a, i8 %b) {
+; CHECK-LABEL: @smul_sext_add_multiple_uses(
+; CHECK-NEXT: [[A_EXT:%.*]] = sext i8 [[A:%.*]] to i32
+; CHECK-NEXT: [[B_EXT:%.*]] = sext i8 [[B:%.*]] to i32
+; CHECK-NEXT: [[MUL:%.*]] = mul nsw i32 [[A_EXT]], [[B_EXT]]
+; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[MUL]], 128
+; CHECK-NEXT: [[CMP:%.*]] = icmp ult i32 [[ADD]], 256
+; CHECK-NEXT: call void @use.i32(i32 [[MUL]])
+; CHECK-NEXT: ret i1 [[CMP]]
+;
+ %a.ext = sext i8 %a to i32
+ %b.ext = sext i8 %b to i32
+ %mul = mul nsw i32 %a.ext, %b.ext
+ %add = add i32 %mul, 128
+ %cmp = icmp ult i32 %add, 256
+ call void @use.i32(i32 %mul)
+ ret i1 %cmp
+}
+
+define i1 @smul_sext_add_extreme_constants(i8 %a, i8 %b) {
+; CHECK-LABEL: @smul_sext_add_extreme_constants(
+; CHECK-NEXT: ret i1 false
+;
+ %a.ext = sext i8 %a to i32
+ %b.ext = sext i8 %b to i32
+ %mul = mul nsw i32 %a.ext, %b.ext
+ %add = add i32 %mul, 2147483647 ; INT_MAX
+ %cmp = icmp slt i32 %add, -2147483648 ; INT_MIN
+ ret i1 %cmp
+}
+
+define i1 @smul_sext_add_nsw(i8 %a, i8 %b) {
+; CHECK-LABEL: @smul_sext_add_nsw(
+; CHECK-NEXT: [[A_EXT:%.*]] = sext i8 [[A:%.*]] to i32
+; CHECK-NEXT: [[B_EXT:%.*]] = sext i8 [[B:%.*]] to i32
+; CHECK-NEXT: [[MUL:%.*]] = mul nsw i32 [[A_EXT]], [[B_EXT]]
+; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[MUL]], 128
+; CHECK-NEXT: [[CMP:%.*]] = icmp ult i32 [[ADD]], 256
+; CHECK-NEXT: ret i1 [[CMP]]
+;
+ %a.ext = sext i8 %a to i32
+ %b.ext = sext i8 %b to i32
+ %mul = mul nsw i32 %a.ext, %b.ext
+ %add = add nsw i32 %mul, 128
+ %cmp = icmp ult i32 %add, 256
+ ret i1 %cmp
+}
+
+define i1 @smul_sext_add_nuw_negative(i8 %a, i8 %b) {
+; CHECK-LABEL: @smul_sext_add_nuw_negative(
+; CHECK-NEXT: [[A_EXT:%.*]] = sext i8 [[A:%.*]] to i32
+; CHECK-NEXT: [[B_EXT:%.*]] = sext i8 [[B:%.*]] to i32
+; CHECK-NEXT: [[MUL:%.*]] = mul nsw i32 [[A_EXT]], [[B_EXT]]
+; CHECK-NEXT: [[CMP:%.*]] = icmp ult i32 [[MUL]], 128
+; CHECK-NEXT: ret i1 [[CMP]]
+;
+ %a.ext = sext i8 %a to i32
+ %b.ext = sext i8 %b to i32
+ %mul = mul nsw i32 %a.ext, %b.ext
+ %add = add nuw i32 %mul, 128
+ %cmp = icmp ult i32 %add, 256
+ ret i1 %cmp
+}
+
+define i32 @smul_extra_and_use(i32 %a, i32 %b) {
+; CHECK-LABEL: @smul_extra_and_use(
+; CHECK-NEXT: [[CONV:%.*]] = sext i32 [[A:%.*]] to i64
+; CHECK-NEXT: [[CONV1:%.*]] = sext i32 [[B:%.*]] to i64
+; CHECK-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV1]], [[CONV]]
+; CHECK-NEXT: [[TMP1:%.*]] = add nsw i64 [[MUL]], -2147483648
+; CHECK-NEXT: [[TMP2:%.*]] = icmp ult i64 [[TMP1]], -4294967296
+; CHECK-NEXT: [[AND:%.*]] = and i64 [[MUL]], 4294967295
+; CHECK-NEXT: call void @use.i64(i64 [[AND]])
+; CHECK-NEXT: [[RETVAL:%.*]] = zext i1 [[TMP2]] to i32
+; CHECK-NEXT: ret i32 [[RETVAL]]
+;
+ %conv = sext i32 %a to i64
+ %conv1 = sext i32 %b to i64
+ %mul = mul nsw i64 %conv1, %conv
+ %1 = add nsw i64 %mul, -2147483648
+ %2 = icmp ult i64 %1, -4294967296
+ %and = and i64 %mul, 4294967295
+ call void @use.i64(i64 %and)
+ %retval = zext i1 %2 to i32
+ ret i32 %retval
+}
+
+define i32 @smul_extra_trunc_use(i32 %a, i32 %b) {
+; CHECK-LABEL: @smul_extra_trunc_use(
+; CHECK-NEXT: [[CONV:%.*]] = sext i32 [[A:%.*]] to i64
+; CHECK-NEXT: [[CONV1:%.*]] = sext i32 [[B:%.*]] to i64
+; CHECK-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV1]], [[CONV]]
+; CHECK-NEXT: [[TMP1:%.*]] = add nsw i64 [[MUL]], -2147483648
+; CHECK-NEXT: [[TMP2:%.*]] = icmp ult i64 [[TMP1]], -4294967296
+; CHECK-NEXT: [[TRUNC:%.*]] = trunc i64 [[MUL]] to i32
+; CHECK-NEXT: call void @use.i32(i32 [[TRUNC]])
+; CHECK-NEXT: [[RETVAL:%.*]] = zext i1 [[TMP2]] to i32
+; CHECK-NEXT: ret i32 [[RETVAL]]
+;
+ %conv = sext i32 %a to i64
+ %conv1 = sext i32 %b to i64
+ %mul = mul nsw i64 %conv1, %conv
+ %1 = add nsw i64 %mul, -2147483648
+ %2 = icmp ult i64 %1, -4294967296
+ %trunc = trunc i64 %mul to i32
+ call void @use.i32(i32 %trunc)
+ %retval = zext i1 %2 to i32
+ ret i32 %retval
+}
+
+define i32 @smul_extra_and_use_small_mask(i32 %a, i32 %b) {
+; CHECK-LABEL: @smul_extra_and_use_small_mask(
+; CHECK-NEXT: [[CONV:%.*]] = sext i32 [[A:%.*]] to i64
+; CHECK-NEXT: [[CONV1:%.*]] = sext i32 [[B:%.*]] to i64
+; CHECK-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV1]], [[CONV]]
+; CHECK-NEXT: [[TMP1:%.*]] = add nsw i64 [[MUL]], -2147483648
+; CHECK-NEXT: [[TMP2:%.*]] = icmp ult i64 [[TMP1]], -4294967296
+; CHECK-NEXT: [[AND:%.*]] = and i64 [[MUL]], 268435455
+; CHECK-NEXT: call void @use.i64(i64 [[AND]])
+; CHECK-NEXT: [[RETVAL:%.*]] = zext i1 [[TMP2]] to i32
+; CHECK-NEXT: ret i32 [[RETVAL]]
+;
+ %conv = sext i32 %a to i64
+ %conv1 = sext i32 %b to i64
+ %mul = mul nsw i64 %conv1, %conv
+ %1 = add nsw i64 %mul, -2147483648
+ %2 = icmp ult i64 %1, -4294967296
+ %and = and i64 %mul, u0xfffffff
+ call void @use.i64(i64 %and)
+ %retval = zext i1 %2 to i32
+ ret i32 %retval
+}
+
+define i32 @smul_multiple_uses(i32 %a, i32 %b) {
+; CHECK-LABEL: @smul_multiple_uses(
+; CHECK-NEXT: [[CONV:%.*]] = sext i32 [[A:%.*]] to i64
+; CHECK-NEXT: [[CONV1:%.*]] = sext i32 [[B:%.*]] to i64
+; CHECK-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV1]], [[CONV]]
+; CHECK-NEXT: [[TMP1:%.*]] = add nsw i64 [[MUL]], -2147483648
+; CHECK-NEXT: [[TMP2:%.*]] = icmp ult i64 [[TMP1]], -4294967296
+; CHECK-NEXT: [[AND:%.*]] = and i64 [[MUL]], 4294967295
+; CHECK-NEXT: [[TRUNC:%.*]] = trunc i64 [[MUL]] to i32
+; CHECK-NEXT: call void @use.i64(i64 [[AND]])
+; CHECK-NEXT: call void @use.i32(i32 [[TRUNC]])
+; CHECK-NEXT: [[RETVAL:%.*]] = zext i1 [[TMP2]] to i32
+; CHECK-NEXT: ret i32 [[RETVAL]]
+;
+ %conv = sext i32 %a to i64
+ %conv1 = sext i32 %b to i64
+ %mul = mul nsw i64 %conv1, %conv
+ %1 = add nsw i64 %mul, -2147483648
+ %2 = icmp ult i64 %1, -4294967296
+ %and = and i64 %mul, 4294967295
+ %trunc = trunc i64 %mul to i32
+ call void @use.i64(i64 %and)
+ call void @use.i32(i32 %trunc)
+ %retval = zext i1 %2 to i32
+ ret i32 %retval
+}
+
+define i32 @smul_extra_and_use_mask_too_large(i32 %a, i32 %b) {
+; CHECK-LABEL: @smul_extra_and_use_mask_too_large(
+; CHECK-NEXT: [[CONV:%.*]] = sext i32 [[A:%.*]] to i64
+; CHECK-NEXT: [[CONV1:%.*]] = sext i32 [[B:%.*]] to i64
+; CHECK-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV1]], [[CONV]]
+; CHECK-NEXT: [[TMP1:%.*]] = add nsw i64 [[MUL]], -2147483648
+; CHECK-NEXT: [[TMP2:%.*]] = icmp ult i64 [[TMP1]], -4294967296
+; CHECK-NEXT: [[AND:%.*]] = and i64 [[MUL]], 68719476735
+; CHECK-NEXT: call void @use.i64(i64 [[AND]])
+; CHECK-NEXT: [[RETVAL:%.*]] = zext i1 [[TMP2]] to i32
+; CHECK-NEXT: ret i32 [[RETVAL]]
+;
+ %conv = sext i32 %a to i64
+ %conv1 = sext i32 %b to i64
+ %mul = mul nsw i64 %conv1, %conv
+ %1 = add nsw i64 %mul, -2147483648
+ %2 = icmp ult i64 %1, -4294967296
+ %and = and i64 %mul, u0xfffffffff
+ call void @use.i64(i64 %and)
+ %retval = zext i1 %2 to i32
+ ret i32 %retval
+}
+
+define i32 @smul_different_sizes(i32 %a, i8 %b) {
+; CHECK-LABEL: @smul_different_sizes(
+; CHECK-NEXT: [[CONV:%.*]] = sext i32 [[A:%.*]] to i64
+; CHECK-NEXT: [[CONV1:%.*]] = sext i8 [[B:%.*]] to i64
+; CHECK-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV1]], [[CONV]]
+; CHECK-NEXT: [[TMP1:%.*]] = add nsw i64 [[MUL]], -2147483648
+; CHECK-NEXT: [[TMP2:%.*]] = icmp ult i64 [[TMP1]], -4294967296
+; CHECK-NEXT: [[TRUNC:%.*]] = trunc i64 [[MUL]] to i32
+; CHECK-NEXT: [[RETVAL:%.*]] = select i1 [[TMP2]], i32 [[TRUNC]], i32 111
+; CHECK-NEXT: ret i32 [[RETVAL]]
+;
+ %conv = sext i32 %a to i64
+ %conv1 = sext i8 %b to i64
+ %mul = mul nsw i64 %conv1, %conv
+ %1 = add nsw i64 %mul, -2147483648
+ %2 = icmp ult i64 %1, -4294967296
+ %trunc = trunc i64 %mul to i32
+ %retval = select i1 %2, i32 %trunc, i32 111
+ ret i32 %retval
+}
+
+define i32 @smul_inverse_pattern(i32 %a, i32 %b) {
+; CHECK-LABEL: @smul_inverse_pattern(
+; CHECK-NEXT: [[CONV:%.*]] = sext i32 [[A:%.*]] to i64
+; CHECK-NEXT: [[CONV1:%.*]] = sext i32 [[B:%.*]] to i64
+; CHECK-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV1]], [[CONV]]
+; CHECK-NEXT: [[ADD:%.*]] = add i64 [[MUL]], 2147483647
+; CHECK-NEXT: [[CMP:%.*]] = icmp ult i64 [[ADD]], 4294967295
+; CHECK-NEXT: [[RETVAL:%.*]] = zext i1 [[CMP]] to i32
+; CHECK-NEXT: ret i32 [[RETVAL]]
+;
+ %conv = sext i32 %a to i64
+ %conv1 = sext i32 %b to i64
+ %mul = mul nsw i64 %conv1, %conv
+ %add = add i64 %mul, 2147483647
+ %cmp = icmp ult i64 %add, 4294967295
+ %retval = zext i1 %cmp to i32
+ ret i32 %retval
+}
+
+define <2 x i32> @smul_vector_operations(<2 x i32> %a, <2 x i32> %b) {
+; CHECK-LABEL: @smul_vector_operations(
+; CHECK-NEXT: [[CONV:%.*]] = sext <2 x i32> [[A:%.*]] to <2 x i64>
+; CHECK-NEXT: [[CONV1:%.*]] = sext <2 x i32> [[B:%.*]] to <2 x i64>
+; CHECK-NEXT: [[MUL:%.*]] = mul nsw <2 x i64> [[CONV1]], [[CONV]]
+; CHECK-NEXT: [[TMP1:%.*]] = add nsw <2 x i64> [[MUL]], splat (i64 -2147483648)
+; CHECK-NEXT: [[TMP2:%.*]] = icmp ult <2 x i64> [[TMP1]], splat (i64 -4294967296)
+; CHECK-NEXT: [[V:%.*]] = zext <2 x i1> [[TMP2]] to <2 x i32>
+; CHECK-NEXT: ret <2 x i32> [[V]]
+;
+ %conv = sext <2 x i32> %a to <2 x i64>
+ %conv1 = sext <2 x i32> %b to <2 x i64>
+ %mul = mul nsw <2 x i64> %conv1, %conv
+ %1 = add nsw <2 x i64> %mul, <i64 -2147483648, i64 -2147483648>
+ %2 = icmp ult <2 x i64> %1, <i64 -4294967296, i64 -4294967296>
+ %v = select <2 x i1> %2, <2 x i32> <i32 1, i32 1>, <2 x i32> <i32 0, i32 0>
+ ret <2 x i32> %v
+}
>From 22a652c9d7d4d25d038791610710066a17339e4f Mon Sep 17 00:00:00 2001
From: Rose <gfunni234 at gmail.com>
Date: Fri, 24 May 2024 23:50:47 -0400
Subject: [PATCH 2/2] [InstCombine] Canonicalize manual signed mul overflows
Alive2 Proof:
https://alive2.llvm.org/ce/z/02pU-B
---
.../InstCombine/InstCombineCompares.cpp | 179 ++++++++++++++++++
.../Transforms/InstCombine/overflow-mul.ll | 48 ++---
2 files changed, 198 insertions(+), 29 deletions(-)
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
index 76020d2b1dbf4..cfbfd6c969a8c 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
@@ -6526,6 +6526,172 @@ static Instruction *processUMulZExtIdiom(ICmpInst &I, Value *MulVal,
return ExtractValueInst::Create(Call, 1);
}
+/// Recognize and process idiom involving test for multiplication
+/// overflow.
+///
+/// The caller has matched a pattern of the form:
+/// I = cmp u add (mul(sext A, sext B), V, W
+/// The function checks if this is a test for overflow and if so replaces
+/// multiplication with call to 'mul.with.overflow' intrinsic.
+///
+/// \param I Compare instruction.
+/// \param MulVal Result of 'mult' instruction. It is one of the arguments of
+/// the compare instruction. Must be of integer type.
+/// \param OtherVal The other argument of compare instruction.
+/// \returns Instruction which must replace the compare instruction, NULL if no
+/// replacement required.
+static Instruction *processSMulSExtIdiom(ICmpInst &I, Value *MulVal,
+ const APInt *AddVal,
+ const APInt *OtherVal,
+ InstCombinerImpl &IC) {
+ // Don't bother doing this transformation for pointers, don't do it for
+ // vectors.
+ if (!isa<IntegerType>(MulVal->getType()))
+ return nullptr;
+
+ auto *MulInstr = dyn_cast<Instruction>(MulVal);
+ if (!MulInstr)
+ return nullptr;
+ assert(MulInstr->getOpcode() == Instruction::Mul);
+
+ auto *LHS = cast<SExtInst>(MulInstr->getOperand(0)),
+ *RHS = cast<SExtInst>(MulInstr->getOperand(1));
+ assert(LHS->getOpcode() == Instruction::SExt);
+ assert(RHS->getOpcode() == Instruction::SExt);
+ Value *A = LHS->getOperand(0), *B = RHS->getOperand(0);
+
+ // Calculate type and width of the result produced by mul.with.overflow.
+ Type *TyA = A->getType(), *TyB = B->getType();
+ unsigned WidthA = TyA->getPrimitiveSizeInBits(),
+ WidthB = TyB->getPrimitiveSizeInBits();
+ unsigned MulWidth;
+ Type *MulType;
+ if (WidthB > WidthA) {
+ MulWidth = WidthB;
+ MulType = TyB;
+ } else {
+ MulWidth = WidthA;
+ MulType = TyA;
+ }
+
+ // In order to replace the original mul with a narrower mul.with.overflow,
+ // all uses must ignore upper bits of the product. The number of used low
+ // bits must be not greater than the width of mul.with.overflow.
+ if (MulVal->hasNUsesOrMore(2))
+ for (User *U : MulVal->users()) {
+ if (U == &I)
+ continue;
+ if (TruncInst *TI = dyn_cast<TruncInst>(U)) {
+ // Check if truncation ignores bits above MulWidth.
+ unsigned TruncWidth = TI->getType()->getPrimitiveSizeInBits();
+ if (TruncWidth > MulWidth)
+ return nullptr;
+ } else if (BinaryOperator *BO = dyn_cast<BinaryOperator>(U)) {
+ // Check if AND ignores bits above MulWidth.
+ if (BO->getOpcode() != Instruction::And)
+ return nullptr;
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(BO->getOperand(1))) {
+ const APInt &CVal = CI->getValue();
+ if (CVal.getBitWidth() - CVal.countl_zero() > MulWidth)
+ return nullptr;
+ } else {
+ // In this case we could have the operand of the binary operation
+ // being defined in another block, and performing the replacement
+ // could break the dominance relation.
+ return nullptr;
+ }
+ } else {
+ // Other uses prohibit this transformation.
+ return nullptr;
+ }
+ }
+
+ // Recognize patterns
+ bool IsInverse = false;
+ switch (I.getPredicate()) {
+ case ICmpInst::ICMP_ULT: {
+ // Recognize pattern:
+ // mulval = mul(sext A, sext B)
+ // addval = add (mulval, min)
+ // cmp ult addval, -min * 2 + 1
+ APInt MinVal = APInt::getSignedMinValue(MulWidth);
+ MinVal = MinVal.sext(OtherVal->getBitWidth());
+ APInt MinMinVal = APInt::getSignedMinValue(MulWidth + 1);
+ MinMinVal = MinMinVal.sext(OtherVal->getBitWidth());
+ if (MinVal.eq(*AddVal) && MinMinVal.eq(*OtherVal))
+ break; // Recognized
+
+ // Recognize pattern:
+ // mulval = mul(sext A, sext B)
+ // addval = add (mulval, signedMax)
+ // cmp ult addval, unsignedMax
+ APInt MaxVal = APInt::getSignedMaxValue(MulWidth);
+ MaxVal = MaxVal.zext(OtherVal->getBitWidth()) + 1;
+ APInt MaxMaxVal = APInt::getMaxValue(MulWidth);
+ MaxMaxVal = MaxMaxVal.zext(OtherVal->getBitWidth()) + 1;
+ if (MaxVal.eq(*AddVal) && MaxMaxVal.eq(*OtherVal)) {
+ IsInverse = true;
+ break; // Recognized
+ }
+ return nullptr;
+ }
+
+ default:
+ return nullptr;
+ }
+
+ InstCombiner::BuilderTy &Builder = IC.Builder;
+ Builder.SetInsertPoint(MulInstr);
+
+ // Replace: mul(sext A, sext B) --> mul.with.overflow(A, B)
+ Value *MulA = A, *MulB = B;
+ if (WidthA < MulWidth)
+ MulA = Builder.CreateSExt(A, MulType);
+ if (WidthB < MulWidth)
+ MulB = Builder.CreateSExt(B, MulType);
+ Function *F = Intrinsic::getOrInsertDeclaration(
+ I.getModule(), Intrinsic::smul_with_overflow, MulType);
+ CallInst *Call = Builder.CreateCall(F, {MulA, MulB}, "smul");
+ IC.addToWorklist(MulInstr);
+
+ // If there are uses of mul result other than the comparison, we know that
+ // they are truncation or binary AND. Change them to use result of
+ // mul.with.overflow and adjust properly mask/size.
+ if (MulVal->hasNUsesOrMore(2)) {
+ Value *Mul = Builder.CreateExtractValue(Call, 0, "smul.value");
+ for (User *U : make_early_inc_range(MulVal->users())) {
+ if (U == &I)
+ continue;
+ if (TruncInst *TI = dyn_cast<TruncInst>(U)) {
+ if (TI->getType()->getPrimitiveSizeInBits() == MulWidth)
+ IC.replaceInstUsesWith(*TI, Mul);
+ else
+ TI->setOperand(0, Mul);
+ } else if (BinaryOperator *BO = dyn_cast<BinaryOperator>(U)) {
+ assert(BO->getOpcode() == Instruction::And);
+ // Replace (mul & mask) --> zext (mul.with.overflow & short_mask)
+ ConstantInt *CI = cast<ConstantInt>(BO->getOperand(1));
+ APInt ShortMask = CI->getValue().trunc(MulWidth);
+ Value *ShortAnd = Builder.CreateAnd(Mul, ShortMask);
+ Value *Zext = Builder.CreateZExt(ShortAnd, BO->getType());
+ IC.replaceInstUsesWith(*BO, Zext);
+ } else {
+ llvm_unreachable("Unexpected Binary operation");
+ }
+ IC.addToWorklist(cast<Instruction>(U));
+ }
+ }
+
+ // The original icmp gets replaced with the overflow value, maybe inverted
+ // depending on predicate.
+ if (IsInverse) {
+ Value *Res = Builder.CreateExtractValue(Call, 1);
+ return BinaryOperator::CreateNot(Res);
+ }
+
+ return ExtractValueInst::Create(Call, 1);
+}
+
/// When performing a comparison against a constant, it is possible that not all
/// the bits in the LHS are demanded. This helper method computes the mask that
/// IS demanded.
@@ -7651,6 +7817,19 @@ Instruction *InstCombinerImpl::visitICmpInst(ICmpInst &I) {
return R;
}
+ // (sext X) * (sext Y) --> llvm.smul.with.overflow.1 -->
+ // llvm.smul.with.overflow. Detects patterns for signed multiplication
+ // overflow checks where the result is adjusted by a constant and then
+ // compared against another constant.
+ const APInt *C1;
+ if (match(Op0, m_Add(m_NSWMul(m_SExt(m_Value(X)), m_SExt(m_Value(Y))),
+ m_APInt(C))) &&
+ match(Op1, m_APInt(C1))) {
+ if (Instruction *R = processSMulSExtIdiom(
+ I, cast<Instruction>(Op0)->getOperand(0), C, C1, *this))
+ return R;
+ }
+
// Signbit test folds
// Fold (X u>> BitWidth - 1 Pred ZExt(i1)) --> X s< 0 Pred i1
// Fold (X s>> BitWidth - 1 Pred SExt(i1)) --> X s< 0 Pred i1
diff --git a/llvm/test/Transforms/InstCombine/overflow-mul.ll b/llvm/test/Transforms/InstCombine/overflow-mul.ll
index 6322356cf506f..9c7f9b6d520e9 100644
--- a/llvm/test/Transforms/InstCombine/overflow-mul.ll
+++ b/llvm/test/Transforms/InstCombine/overflow-mul.ll
@@ -347,12 +347,9 @@ define i32 @extra_and_use_mask_too_large(i32 %x, i32 %y) {
define i32 @smul(i32 %a, i32 %b) {
; CHECK-LABEL: @smul(
-; CHECK-NEXT: [[CONV:%.*]] = sext i32 [[A:%.*]] to i64
-; CHECK-NEXT: [[CONV1:%.*]] = sext i32 [[B:%.*]] to i64
-; CHECK-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV1]], [[CONV]]
-; CHECK-NEXT: [[TMP1:%.*]] = add nsw i64 [[MUL]], -2147483648
-; CHECK-NEXT: [[TMP2:%.*]] = icmp ult i64 [[TMP1]], -4294967296
-; CHECK-NEXT: [[CONV3:%.*]] = zext i1 [[TMP2]] to i32
+; CHECK-NEXT: [[SMUL:%.*]] = call { i32, i1 } @llvm.smul.with.overflow.i32(i32 [[B:%.*]], i32 [[A:%.*]])
+; CHECK-NEXT: [[TMP1:%.*]] = extractvalue { i32, i1 } [[SMUL]], 1
+; CHECK-NEXT: [[CONV3:%.*]] = zext i1 [[TMP1]] to i32
; CHECK-NEXT: ret i32 [[CONV3]]
;
%conv = sext i32 %a to i64
@@ -366,11 +363,9 @@ define i32 @smul(i32 %a, i32 %b) {
define i32 @smul2(i32 %a, i32 %b) {
; CHECK-LABEL: @smul2(
-; CHECK-NEXT: [[CONV:%.*]] = sext i32 [[A:%.*]] to i64
-; CHECK-NEXT: [[CONV1:%.*]] = sext i32 [[B:%.*]] to i64
-; CHECK-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV1]], [[CONV]]
-; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[MUL]], 2147483647
-; CHECK-NEXT: [[TMP2:%.*]] = icmp ult i64 [[TMP1]], 4294967295
+; CHECK-NEXT: [[SMUL:%.*]] = call { i32, i1 } @llvm.smul.with.overflow.i32(i32 [[B:%.*]], i32 [[A:%.*]])
+; CHECK-NEXT: [[TMP1:%.*]] = extractvalue { i32, i1 } [[SMUL]], 1
+; CHECK-NEXT: [[TMP2:%.*]] = xor i1 [[TMP1]], true
; CHECK-NEXT: [[CONV3:%.*]] = zext i1 [[TMP2]] to i32
; CHECK-NEXT: ret i32 [[CONV3]]
;
@@ -378,7 +373,7 @@ define i32 @smul2(i32 %a, i32 %b) {
%conv1 = sext i32 %b to i64
%mul = mul nsw i64 %conv1, %conv
%cmp = icmp sle i64 %mul, 2147483647
- %cmp2 = icmp sgt i64 %mul, -2147483648
+ %cmp2 = icmp sge i64 %mul, -2147483648
%1 = select i1 %cmp, i1 %cmp2, i1 false
%conv3 = zext i1 %1 to i32
ret i32 %conv3
@@ -386,11 +381,9 @@ define i32 @smul2(i32 %a, i32 %b) {
define i1 @smul_sext_add_pattern(i8 %a, i8 %b) {
; CHECK-LABEL: @smul_sext_add_pattern(
-; CHECK-NEXT: [[A_EXT:%.*]] = sext i8 [[A:%.*]] to i32
-; CHECK-NEXT: [[B_EXT:%.*]] = sext i8 [[B:%.*]] to i32
-; CHECK-NEXT: [[MUL:%.*]] = mul nsw i32 [[A_EXT]], [[B_EXT]]
-; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[MUL]], 128
-; CHECK-NEXT: [[CMP:%.*]] = icmp ult i32 [[ADD]], 256
+; CHECK-NEXT: [[SMUL:%.*]] = call { i8, i1 } @llvm.smul.with.overflow.i8(i8 [[A:%.*]], i8 [[B:%.*]])
+; CHECK-NEXT: [[TMP1:%.*]] = extractvalue { i8, i1 } [[SMUL]], 1
+; CHECK-NEXT: [[CMP:%.*]] = xor i1 [[TMP1]], true
; CHECK-NEXT: ret i1 [[CMP]]
;
%a.ext = sext i8 %a to i32
@@ -452,11 +445,9 @@ define i1 @smul_sext_add_different_widths(i4 %a, i16 %b) {
define i1 @smul_sext_add_no_nsw(i8 %a, i8 %b) {
; CHECK-LABEL: @smul_sext_add_no_nsw(
-; CHECK-NEXT: [[A_EXT:%.*]] = sext i8 [[A:%.*]] to i32
-; CHECK-NEXT: [[B_EXT:%.*]] = sext i8 [[B:%.*]] to i32
-; CHECK-NEXT: [[MUL:%.*]] = mul nsw i32 [[A_EXT]], [[B_EXT]]
-; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[MUL]], 128
-; CHECK-NEXT: [[CMP:%.*]] = icmp ult i32 [[ADD]], 256
+; CHECK-NEXT: [[SMUL:%.*]] = call { i8, i1 } @llvm.smul.with.overflow.i8(i8 [[A:%.*]], i8 [[B:%.*]])
+; CHECK-NEXT: [[TMP1:%.*]] = extractvalue { i8, i1 } [[SMUL]], 1
+; CHECK-NEXT: [[CMP:%.*]] = xor i1 [[TMP1]], true
; CHECK-NEXT: ret i1 [[CMP]]
;
%a.ext = sext i8 %a to i32
@@ -533,11 +524,9 @@ define i1 @smul_sext_add_extreme_constants(i8 %a, i8 %b) {
define i1 @smul_sext_add_nsw(i8 %a, i8 %b) {
; CHECK-LABEL: @smul_sext_add_nsw(
-; CHECK-NEXT: [[A_EXT:%.*]] = sext i8 [[A:%.*]] to i32
-; CHECK-NEXT: [[B_EXT:%.*]] = sext i8 [[B:%.*]] to i32
-; CHECK-NEXT: [[MUL:%.*]] = mul nsw i32 [[A_EXT]], [[B_EXT]]
-; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[MUL]], 128
-; CHECK-NEXT: [[CMP:%.*]] = icmp ult i32 [[ADD]], 256
+; CHECK-NEXT: [[SMUL:%.*]] = call { i8, i1 } @llvm.smul.with.overflow.i8(i8 [[A:%.*]], i8 [[B:%.*]])
+; CHECK-NEXT: [[TMP1:%.*]] = extractvalue { i8, i1 } [[SMUL]], 1
+; CHECK-NEXT: [[CMP:%.*]] = xor i1 [[TMP1]], true
; CHECK-NEXT: ret i1 [[CMP]]
;
%a.ext = sext i8 %a to i32
@@ -704,8 +693,8 @@ define i32 @smul_different_sizes(i32 %a, i8 %b) {
ret i32 %retval
}
-define i32 @smul_inverse_pattern(i32 %a, i32 %b) {
-; CHECK-LABEL: @smul_inverse_pattern(
+define i32 @smul_inverse_pattern_negative(i32 %a, i32 %b) {
+; CHECK-LABEL: @smul_inverse_pattern_negative(
; CHECK-NEXT: [[CONV:%.*]] = sext i32 [[A:%.*]] to i64
; CHECK-NEXT: [[CONV1:%.*]] = sext i32 [[B:%.*]] to i64
; CHECK-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV1]], [[CONV]]
@@ -723,6 +712,7 @@ define i32 @smul_inverse_pattern(i32 %a, i32 %b) {
ret i32 %retval
}
+; TODO: Vector support?
define <2 x i32> @smul_vector_operations(<2 x i32> %a, <2 x i32> %b) {
; CHECK-LABEL: @smul_vector_operations(
; CHECK-NEXT: [[CONV:%.*]] = sext <2 x i32> [[A:%.*]] to <2 x i64>
More information about the llvm-commits
mailing list