[llvm] [InstCombine] Simplify further when shift is half bitwidth (PR #93677)

Wed May 29 15:27:41 PDT 2024

https://github.com/AtariDreams updated https://github.com/llvm/llvm-project/pull/93677

>From d3e88aa88b7790b5c843dbbb94a07a865d81782d Mon Sep 17 00:00:00 2001
From: Rose <gfunni234 at gmail.com>
Date: Mon, 6 May 2024 17:07:29 -0400
Subject: [PATCH] [InstCombine] lshr (mul (X, 2^N + 1)), N -> X when X is
 half-width

Alive2 Proofs:
https://alive2.llvm.org/ce/z/eSinJY
https://alive2.llvm.org/ce/z/sweDgc
https://alive2.llvm.org/ce/z/-2dXZi
https://alive2.llvm.org/ce/z/kx2PhF
https://alive2.llvm.org/ce/z/e9QjM6
---
 .../InstCombine/InstCombineShifts.cpp         |  4 ++--
 llvm/test/Transforms/InstCombine/ashr-lshr.ll | 11 ++++++++++
 llvm/test/Transforms/InstCombine/lshr.ll      | 21 ++++++++++++-------
 3 files changed, 27 insertions(+), 9 deletions(-)

diff --git a/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp b/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp
index 4f91993750fd2..59d0e0188c1a3 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp
@@ -1465,9 +1465,9 @@ Instruction *InstCombinerImpl::visitLShr(BinaryOperator &I) {
           MulC->logBase2() == ShAmtC) {
         // Look for a "splat" mul pattern - it replicates bits across each half
         // of a value, so a right shift is just a mask of the low bits:
-        // lshr i[2N] (mul nuw X, (2^N)+1), N --> and iN X, (2^N)-1
+        // lshr i[2N] (mul nuw X, (2^N)+1), N --> X
         if (ShAmtC * 2 == BitWidth)
-          return BinaryOperator::CreateAnd(X, ConstantInt::get(Ty, *MulC - 2));
+          return replaceInstUsesWith(I, X);
 
         // lshr (mul nuw (X, 2^N + 1)), N -> add nuw (X, lshr(X, N))
         if (Op0->hasOneUse()) {
diff --git a/llvm/test/Transforms/InstCombine/ashr-lshr.ll b/llvm/test/Transforms/InstCombine/ashr-lshr.ll
index c2a4f35412670..717cbbabb1128 100644
--- a/llvm/test/Transforms/InstCombine/ashr-lshr.ll
+++ b/llvm/test/Transforms/InstCombine/ashr-lshr.ll
@@ -862,4 +862,15 @@ define i32 @ashr_mul_times_5_div_4_exact_2(i32 %x) {
   ret i32 %ashr
 }
 
+define i32 @mul_splat_fold_ashr(i32 %x) {
+; CHECK-LABEL: @mul_splat_fold_ashr(
+; CHECK-NEXT:    [[TMP1:%.*]] = ashr i32 [[X:%.*]], 16
+; CHECK-NEXT:    [[T:%.*]] = add nsw i32 [[TMP1]], [[X]]
+; CHECK-NEXT:    ret i32 [[T]]
+;
+  %m = mul nsw i32 %x, 65537
+  %t = ashr i32 %m, 16
+  ret i32 %t
+}
+
 declare void @use(i32)
diff --git a/llvm/test/Transforms/InstCombine/lshr.ll b/llvm/test/Transforms/InstCombine/lshr.ll
index dfdb6c7b4b268..17b08985ee90e 100644
--- a/llvm/test/Transforms/InstCombine/lshr.ll
+++ b/llvm/test/Transforms/InstCombine/lshr.ll
@@ -348,22 +348,31 @@ define <2 x i32> @narrow_lshr_constant(<2 x i8> %x, <2 x i8> %y) {
 
 define i32 @mul_splat_fold(i32 %x) {
 ; CHECK-LABEL: @mul_splat_fold(
-; CHECK-NEXT:    [[T:%.*]] = and i32 [[X:%.*]], 65535
-; CHECK-NEXT:    ret i32 [[T]]
+; CHECK-NEXT:    ret i32 [[X:%.*]]
 ;
   %m = mul nuw i32 %x, 65537
   %t = lshr i32 %m, 16
   ret i32 %t
 }
 
+define i32 @mul_splat_fold_known_zeros(i32 %x) {
+; CHECK-LABEL: @mul_splat_fold_known_zeros(
+; CHECK-NEXT:    [[XX:%.*]] = and i32 [[X:%.*]], 360
+; CHECK-NEXT:    ret i32 [[XX]]
+;
+  %xx = and i32 %x, 360
+  %m = mul nuw i32 %xx, 65537
+  %t = lshr i32 %m, 16
+  ret i32 %t
+}
+
 ; Vector type, extra use, weird types are all ok.
 
 define <3 x i14> @mul_splat_fold_vec(<3 x i14> %x) {
 ; CHECK-LABEL: @mul_splat_fold_vec(
 ; CHECK-NEXT:    [[M:%.*]] = mul nuw <3 x i14> [[X:%.*]], <i14 129, i14 129, i14 129>
 ; CHECK-NEXT:    call void @usevec(<3 x i14> [[M]])
-; CHECK-NEXT:    [[T:%.*]] = and <3 x i14> [[X]], <i14 127, i14 127, i14 127>
-; CHECK-NEXT:    ret <3 x i14> [[T]]
+; CHECK-NEXT:    ret <3 x i14> [[X]]
 ;
   %m = mul nuw <3 x i14> %x, <i14 129, i14 129, i14 129>
   call void @usevec(<3 x i14> %m)
@@ -628,8 +637,6 @@ define i32 @mul_splat_fold_wrong_lshr_const(i32 %x) {
   ret i32 %t
 }
 
-; Negative test (but simplifies into a different transform)
-
 define i32 @mul_splat_fold_no_nuw(i32 %x) {
 ; CHECK-LABEL: @mul_splat_fold_no_nuw(
 ; CHECK-NEXT:    [[TMP1:%.*]] = lshr i32 [[X:%.*]], 16
@@ -641,7 +648,7 @@ define i32 @mul_splat_fold_no_nuw(i32 %x) {
   ret i32 %t
 }
 
-; Negative test 
+; Negative test
 
 define i32 @mul_splat_fold_no_flags(i32 %x) {
 ; CHECK-LABEL: @mul_splat_fold_no_flags(