[llvm] [Transforms] Add X / (Y << Z) --> (X >> Z) / Y fold (PR #87122)

Sat Mar 30 07:38:25 PDT 2024

https://github.com/AtariDreams updated https://github.com/llvm/llvm-project/pull/87122

>From 80ab7fab632ca8353d2f53c39d1444e647d96dc8 Mon Sep 17 00:00:00 2001
From: Rose <gfunni234 at gmail.com>
Date: Fri, 29 Mar 2024 18:17:46 -0400
Subject: [PATCH] [Transforms] Add X / (Y << Z) --> (X >> Z) / Y fold

Alive2 Proof: https://alive2.llvm.org/ce/z/FjoN_A
---
 .../InstCombine/InstCombineMulDivRem.cpp      | 10 ++++
 llvm/test/Transforms/InstCombine/div-shift.ll | 51 +++++++++----------
 llvm/test/Transforms/InstCombine/exact.ll     |  4 +-
 .../Transforms/InstCombine/shift-shift.ll     |  4 +-
 llvm/test/Transforms/InstCombine/shift.ll     |  4 +-
 .../Transforms/InstCombine/vector-udiv.ll     |  8 +--
 6 files changed, 45 insertions(+), 36 deletions(-)

diff --git a/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp b/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
index 8c698e52b5a0e6..3766b81d3e4d5c 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
@@ -1080,6 +1080,16 @@ static Value *foldIDivShl(BinaryOperator &I, InstCombiner::BuilderTy &Builder) {
     }
   }
 
+  // X / (Y << Z) --> (X >> Z) / Y
+  if (match(Op0, m_Value(X)) &&
+      match(Op1, m_OneUse(m_Shl(m_Value(Y), m_Value(Z))))) {
+    auto *Shl1 = cast<OverflowingBinaryOperator>(Op1);
+    if (!IsSigned && Shl1->hasNoUnsignedWrap()) {
+      Value *NewShift = Builder.CreateLShr(X, Z, "", I.isExact());
+      return Builder.CreateUDiv(NewShift, Y, "", I.isExact());
+    }
+  }
+
   return nullptr;
 }
 
diff --git a/llvm/test/Transforms/InstCombine/div-shift.ll b/llvm/test/Transforms/InstCombine/div-shift.ll
index 9610746811a43a..aaaaf92db65812 100644
--- a/llvm/test/Transforms/InstCombine/div-shift.ll
+++ b/llvm/test/Transforms/InstCombine/div-shift.ll
@@ -12,8 +12,8 @@ declare i8 @llvm.smax.i8(i8, i8)
 define i32 @t1(i16 zeroext %x, i32 %y) {
 ; CHECK-LABEL: @t1(
 ; CHECK-NEXT:    [[CONV:%.*]] = zext i16 [[X:%.*]] to i32
-; CHECK-NEXT:    [[TMP1:%.*]] = add i32 [[Y:%.*]], 1
-; CHECK-NEXT:    [[D1:%.*]] = lshr i32 [[CONV]], [[TMP1]]
+; CHECK-NEXT:    [[TMP1:%.*]] = lshr i32 [[CONV]], [[Y:%.*]]
+; CHECK-NEXT:    [[D1:%.*]] = lshr i32 [[TMP1]], 1
 ; CHECK-NEXT:    ret i32 [[D1]]
 ;
   %conv = zext i16 %x to i32
@@ -25,8 +25,8 @@ define i32 @t1(i16 zeroext %x, i32 %y) {
 define <2 x i32> @t1vec(<2 x i16> %x, <2 x i32> %y) {
 ; CHECK-LABEL: @t1vec(
 ; CHECK-NEXT:    [[CONV:%.*]] = zext <2 x i16> [[X:%.*]] to <2 x i32>
-; CHECK-NEXT:    [[TMP1:%.*]] = add <2 x i32> [[Y:%.*]], <i32 1, i32 1>
-; CHECK-NEXT:    [[D1:%.*]] = lshr <2 x i32> [[CONV]], [[TMP1]]
+; CHECK-NEXT:    [[TMP1:%.*]] = lshr <2 x i32> [[CONV]], [[Y:%.*]]
+; CHECK-NEXT:    [[D1:%.*]] = lshr <2 x i32> [[TMP1]], <i32 1, i32 1>
 ; CHECK-NEXT:    ret <2 x i32> [[D1]]
 ;
   %conv = zext <2 x i16> %x to <2 x i32>
@@ -466,9 +466,9 @@ define i5 @udiv_mul_shl_nuw_exact_commute1(i5 %x, i5 %y, i5 %z) {
 
 define i5 @udiv_mul_shl_nuw_commute2(i5 %x, i5 %y, i5 %z) {
 ; CHECK-LABEL: @udiv_mul_shl_nuw_commute2(
-; CHECK-NEXT:    [[M1:%.*]] = mul nuw i5 [[X:%.*]], [[Y:%.*]]
-; CHECK-NEXT:    [[M2:%.*]] = shl nuw i5 [[Z:%.*]], [[X]]
-; CHECK-NEXT:    [[D:%.*]] = udiv i5 [[M1]], [[M2]]
+; CHECK-NEXT:    [[M2:%.*]] = mul nuw i5 [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    [[M1:%.*]] = lshr i5 [[M2]], [[X]]
+; CHECK-NEXT:    [[D:%.*]] = udiv i5 [[M1]], [[Z:%.*]]
 ; CHECK-NEXT:    ret i5 [[D]]
 ;
   %m1 = mul nuw i5 %x, %y
@@ -646,9 +646,9 @@ define i5 @sdiv_shl_mul_nuw(i5 %x, i5 %y, i5 %z) {
 
 define i5 @udiv_mul_shl_missing_nsw1(i5 %x, i5 %y, i5 %z) {
 ; CHECK-LABEL: @udiv_mul_shl_missing_nsw1(
-; CHECK-NEXT:    [[M1:%.*]] = mul nsw i5 [[X:%.*]], [[Y:%.*]]
-; CHECK-NEXT:    [[M2:%.*]] = shl nuw i5 [[Y]], [[Z:%.*]]
-; CHECK-NEXT:    [[D:%.*]] = udiv i5 [[M1]], [[M2]]
+; CHECK-NEXT:    [[M2:%.*]] = mul nsw i5 [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    [[M1:%.*]] = lshr i5 [[M2]], [[Z:%.*]]
+; CHECK-NEXT:    [[D:%.*]] = udiv i5 [[M1]], [[Y]]
 ; CHECK-NEXT:    ret i5 [[D]]
 ;
   %m1 = mul nsw i5 %x, %y
@@ -674,8 +674,8 @@ define i5 @udiv_mul_shl_missing_nsw2(i5 %x, i5 %y, i5 %z) {
 
 define i8 @udiv_shl_nuw(i8 %x, i8 %y, i8 %z) {
 ; CHECK-LABEL: @udiv_shl_nuw(
-; CHECK-NEXT:    [[S:%.*]] = shl nuw i8 [[Y:%.*]], [[Z:%.*]]
-; CHECK-NEXT:    [[D:%.*]] = udiv i8 [[X:%.*]], [[S]]
+; CHECK-NEXT:    [[X:%.*]] = lshr i8 [[S:%.*]], [[Z:%.*]]
+; CHECK-NEXT:    [[D:%.*]] = udiv i8 [[X]], [[Y:%.*]]
 ; CHECK-NEXT:    ret i8 [[D]]
 ;
   %s = shl nuw i8 %y, %z
@@ -685,8 +685,8 @@ define i8 @udiv_shl_nuw(i8 %x, i8 %y, i8 %z) {
 
 define <2 x i4> @udiv_shl_nuw_exact(<2 x i4> %x, <2 x i4> %y, <2 x i4> %z) {
 ; CHECK-LABEL: @udiv_shl_nuw_exact(
-; CHECK-NEXT:    [[S:%.*]] = shl nuw <2 x i4> [[Y:%.*]], [[Z:%.*]]
-; CHECK-NEXT:    [[D:%.*]] = udiv exact <2 x i4> [[X:%.*]], [[S]]
+; CHECK-NEXT:    [[X:%.*]] = lshr exact <2 x i4> [[S:%.*]], [[Z:%.*]]
+; CHECK-NEXT:    [[D:%.*]] = udiv exact <2 x i4> [[X]], [[Y:%.*]]
 ; CHECK-NEXT:    ret <2 x i4> [[D]]
 ;
   %s = shl nuw <2 x i4> %y, %z
@@ -966,9 +966,9 @@ define i8 @udiv_shl_shl_nuw_nsw(i8 %x, i8 %y, i8 %z) {
 
 define i8 @udiv_shl_shl_nsw_nuw(i8 %x, i8 %y, i8 %z) {
 ; CHECK-LABEL: @udiv_shl_shl_nsw_nuw(
-; CHECK-NEXT:    [[XZ:%.*]] = shl nsw i8 [[X:%.*]], [[Z:%.*]]
-; CHECK-NEXT:    [[YZ:%.*]] = shl nuw i8 [[Y:%.*]], [[Z]]
-; CHECK-NEXT:    [[D:%.*]] = udiv i8 [[XZ]], [[YZ]]
+; CHECK-NEXT:    [[TMP1:%.*]] = lshr i8 -1, [[Z:%.*]]
+; CHECK-NEXT:    [[XZ:%.*]] = and i8 [[TMP1]], [[X:%.*]]
+; CHECK-NEXT:    [[D:%.*]] = udiv i8 [[XZ]], [[YZ:%.*]]
 ; CHECK-NEXT:    ret i8 [[D]]
 ;
   %xz = shl nsw i8 %x, %z
@@ -988,13 +988,12 @@ define i8 @udiv_shl_shl_nuw_nsw2(i8 %x, i8 %y, i8 %z) {
   ret i8 %d
 }
 
-; TODO: X / (Y << Z) --> (X >> Z) / Y
-; https://alive2.llvm.org/ce/z/FjoN_A
+; X / (Y << Z) --> (X >> Z) / Y
 
 define i8 @udiv_shl_nuw_divisor(i8 %x, i8 %y, i8 %z) {
 ; CHECK-LABEL: @udiv_shl_nuw_divisor(
-; CHECK-NEXT:    [[S:%.*]] = shl nuw i8 [[Y:%.*]], [[Z:%.*]]
-; CHECK-NEXT:    [[D:%.*]] = udiv i8 [[X:%.*]], [[S]]
+; CHECK-NEXT:    [[X:%.*]] = lshr i8 [[S:%.*]], [[Z:%.*]]
+; CHECK-NEXT:    [[D:%.*]] = udiv i8 [[X]], [[Y:%.*]]
 ; CHECK-NEXT:    ret i8 [[D]]
 ;
   %s = shl nuw i8 %y, %z
@@ -1017,8 +1016,8 @@ define i8 @udiv_fail_shl_overflow(i8 %x, i8 %y) {
 
 define i8 @udiv_shl_no_overflow(i8 %x, i8 %y) {
 ; CHECK-LABEL: @udiv_shl_no_overflow(
-; CHECK-NEXT:    [[TMP1:%.*]] = add i8 [[Y:%.*]], 1
-; CHECK-NEXT:    [[MUL1:%.*]] = lshr i8 [[X:%.*]], [[TMP1]]
+; CHECK-NEXT:    [[TMP1:%.*]] = lshr i8 [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    [[MUL1:%.*]] = lshr i8 [[TMP1]], 1
 ; CHECK-NEXT:    ret i8 [[MUL1]]
 ;
   %shl = shl nuw i8 2, %y
@@ -1185,9 +1184,9 @@ entry:
 define i32 @udiv_shl_pair_overflow_fail1(i32 %a, i32 %x, i32 %y) {
 ; CHECK-LABEL: @udiv_shl_pair_overflow_fail1(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[LHS:%.*]] = shl nsw i32 [[A:%.*]], [[X:%.*]]
-; CHECK-NEXT:    [[RHS:%.*]] = shl nuw i32 [[A]], [[Y:%.*]]
-; CHECK-NEXT:    [[DIV:%.*]] = udiv i32 [[LHS]], [[RHS]]
+; CHECK-NEXT:    [[RHS:%.*]] = shl nsw i32 [[A:%.*]], [[X:%.*]]
+; CHECK-NEXT:    [[LHS:%.*]] = lshr i32 [[RHS]], [[Y:%.*]]
+; CHECK-NEXT:    [[DIV:%.*]] = udiv i32 [[LHS]], [[A]]
 ; CHECK-NEXT:    ret i32 [[DIV]]
 ;
 entry:
diff --git a/llvm/test/Transforms/InstCombine/exact.ll b/llvm/test/Transforms/InstCombine/exact.ll
index 10d46e7b2dfb3d..d730bb25f9c234 100644
--- a/llvm/test/Transforms/InstCombine/exact.ll
+++ b/llvm/test/Transforms/InstCombine/exact.ll
@@ -82,8 +82,8 @@ define i32 @udiv1(i32 %x, i32 %w) {
 
 define i32 @udiv2(i32 %x, i32 %w) {
 ; CHECK-LABEL: @udiv2(
-; CHECK-NEXT:    [[Z1:%.*]] = lshr exact i32 [[X:%.*]], [[W:%.*]]
-; CHECK-NEXT:    ret i32 [[Z1]]
+; CHECK-NEXT:    [[TMP1:%.*]] = lshr exact i32 [[X:%.*]], [[W:%.*]]
+; CHECK-NEXT:    ret i32 [[TMP1]]
 ;
   %y = shl i32 1, %w
   %z = udiv exact i32 %x, %y
diff --git a/llvm/test/Transforms/InstCombine/shift-shift.ll b/llvm/test/Transforms/InstCombine/shift-shift.ll
index 8a40863300d45f..7de88fdd7cecf6 100644
--- a/llvm/test/Transforms/InstCombine/shift-shift.ll
+++ b/llvm/test/Transforms/InstCombine/shift-shift.ll
@@ -345,8 +345,8 @@ define <3 x i8> @shl_shl_constants_vec(<3 x i8> %x) {
 ; PR9809
 define i32 @shl_shl_constants_div(i32 %a, i32 %b) {
 ; CHECK-LABEL: @shl_shl_constants_div(
-; CHECK-NEXT:    [[TMP1:%.*]] = add i32 [[B:%.*]], 2
-; CHECK-NEXT:    [[DIV1:%.*]] = lshr i32 [[A:%.*]], [[TMP1]]
+; CHECK-NEXT:    [[TMP1:%.*]] = lshr i32 [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    [[DIV1:%.*]] = lshr i32 [[TMP1]], 2
 ; CHECK-NEXT:    ret i32 [[DIV1]]
 ;
   %shl1 = shl i32 1, %b
diff --git a/llvm/test/Transforms/InstCombine/shift.ll b/llvm/test/Transforms/InstCombine/shift.ll
index bef7fc81a7d1f9..ea273bb683c63c 100644
--- a/llvm/test/Transforms/InstCombine/shift.ll
+++ b/llvm/test/Transforms/InstCombine/shift.ll
@@ -699,8 +699,8 @@ define <2 x i32> @test42vec(<2 x i32> %a, <2 x i32> %b) {
 
 define i32 @test43(i32 %a, i32 %b) nounwind {
 ; CHECK-LABEL: @test43(
-; CHECK-NEXT:    [[TMP1:%.*]] = add i32 [[B:%.*]], 12
-; CHECK-NEXT:    [[DIV21:%.*]] = lshr i32 [[A:%.*]], [[TMP1]]
+; CHECK-NEXT:    [[TMP1:%.*]] = lshr i32 [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    [[DIV21:%.*]] = lshr i32 [[TMP1]], 12
 ; CHECK-NEXT:    ret i32 [[DIV21]]
 ;
   %div = shl i32 4096, %b    ; must be exact otherwise we'd divide by zero
diff --git a/llvm/test/Transforms/InstCombine/vector-udiv.ll b/llvm/test/Transforms/InstCombine/vector-udiv.ll
index c817b3a1ac5a0a..6e664e8c0ff9a2 100644
--- a/llvm/test/Transforms/InstCombine/vector-udiv.ll
+++ b/llvm/test/Transforms/InstCombine/vector-udiv.ll
@@ -51,8 +51,8 @@ define <4 x i32> @test_v4i32_negconst_undef(<4 x i32> %a0) {
 ; X udiv (C1 << N), where C1 is "1<<C2"  -->  X >> (N+C2)
 define <4 x i32> @test_v4i32_shl_splatconst_pow2(<4 x i32> %a0, <4 x i32> %a1) {
 ; CHECK-LABEL: @test_v4i32_shl_splatconst_pow2(
-; CHECK-NEXT:    [[TMP1:%.*]] = add <4 x i32> [[A1:%.*]], <i32 2, i32 2, i32 2, i32 2>
-; CHECK-NEXT:    [[TMP2:%.*]] = lshr <4 x i32> [[A0:%.*]], [[TMP1]]
+; CHECK-NEXT:    [[TMP1:%.*]] = lshr <4 x i32> [[A0:%.*]], [[A1:%.*]]
+; CHECK-NEXT:    [[TMP2:%.*]] = lshr <4 x i32> [[TMP1]], <i32 2, i32 2, i32 2, i32 2>
 ; CHECK-NEXT:    ret <4 x i32> [[TMP2]]
 ;
   %1 = shl <4 x i32> <i32 4, i32 4, i32 4, i32 4>, %a1
@@ -62,8 +62,8 @@ define <4 x i32> @test_v4i32_shl_splatconst_pow2(<4 x i32> %a0, <4 x i32> %a1) {
 
 define <4 x i32> @test_v4i32_shl_const_pow2(<4 x i32> %a0, <4 x i32> %a1) {
 ; CHECK-LABEL: @test_v4i32_shl_const_pow2(
-; CHECK-NEXT:    [[TMP1:%.*]] = add <4 x i32> [[A1:%.*]], <i32 2, i32 3, i32 4, i32 5>
-; CHECK-NEXT:    [[TMP2:%.*]] = lshr <4 x i32> [[A0:%.*]], [[TMP1]]
+; CHECK-NEXT:    [[TMP1:%.*]] = lshr <4 x i32> [[A0:%.*]], [[A1:%.*]]
+; CHECK-NEXT:    [[TMP2:%.*]] = lshr <4 x i32> [[TMP1]], <i32 2, i32 3, i32 4, i32 5>
 ; CHECK-NEXT:    ret <4 x i32> [[TMP2]]
 ;
   %1 = shl <4 x i32> <i32 4, i32 8, i32 16, i32 32>, %a1