[llvm] [InstCombine] Resolve TODO: Remove one-time check if other logic operand (Y) is constant (PR #77973)

Thu Jan 18 17:56:39 PST 2024

https://github.com/AtariDreams updated https://github.com/llvm/llvm-project/pull/77973

>From 9ed961b0d694c428856667a794eb35f5c08b3cd3 Mon Sep 17 00:00:00 2001
From: Rose <83477269+AtariDreams at users.noreply.github.com>
Date: Sun, 14 Jan 2024 12:53:37 -0500
Subject: [PATCH 1/2] [InstCombine] Add pre-commit tests [NFC]

---
 .../Transforms/InstCombine/shift-logic.ll     | 127 ++++++++++++++-
 .../LoopVectorize/AArch64/sve-widen-phi.ll    | 154 +++++++++---------
 2 files changed, 202 insertions(+), 79 deletions(-)

diff --git a/llvm/test/Transforms/InstCombine/shift-logic.ll b/llvm/test/Transforms/InstCombine/shift-logic.ll
index 544694d398431e..d4d6273331ca59 100644
--- a/llvm/test/Transforms/InstCombine/shift-logic.ll
+++ b/llvm/test/Transforms/InstCombine/shift-logic.ll
@@ -1,8 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
 ; RUN: opt < %s -passes=instcombine -S | FileCheck %s
 
-declare void @use(i64)
-
 define i8 @shl_and(i8 %x, i8 %y) {
 ; CHECK-LABEL: @shl_and(
 ; CHECK-NEXT:    [[TMP1:%.*]] = shl i8 [[X:%.*]], 5
@@ -16,6 +14,8 @@ define i8 @shl_and(i8 %x, i8 %y) {
   ret i8 %sh1
 }
 
+declare void @use(i8)
+
 define <2 x i8> @shl_and_nonuniform(<2 x i8> %x, <2 x i8> %y) {
 ; CHECK-LABEL: @shl_and_nonuniform(
 ; CHECK-NEXT:    [[TMP1:%.*]] = shl <2 x i8> [[X:%.*]], <i8 5, i8 4>
@@ -29,6 +29,23 @@ define <2 x i8> @shl_and_nonuniform(<2 x i8> %x, <2 x i8> %y) {
   ret <2 x i8> %sh1
 }
 
+define <2 x i8> @shl_and_nonuniform_multiuse(<2 x i8> %x) {
+; CHECK-LABEL: @shl_and_nonuniform_multiuse(
+; CHECK-NEXT:    [[SH0:%.*]] = shl <2 x i8> [[X:%.*]], <i8 3, i8 4>
+; CHECK-NEXT:    call void @use1(<2 x i8> [[SH0]])
+; CHECK-NEXT:    [[TMP1:%.*]] = shl <2 x i8> [[X]], <i8 5, i8 4>
+; CHECK-NEXT:    [[SH1:%.*]] = and <2 x i8> [[TMP1]], <i8 -88, i8 42>
+; CHECK-NEXT:    ret <2 x i8> [[SH1]]
+;
+  %sh0 = shl <2 x i8> %x, <i8 3, i8 4>
+  %r = and <2 x i8> %sh0, <i8 42, i8 42> ; constant operand on the 'and'
+  call void @use1(<2 x i8> %sh0)
+  %sh1 = shl <2 x i8> %r, <i8 2, i8 0>
+  ret <2 x i8> %sh1
+}
+
+declare void @use1(<2 x i8>)
+
 define i16 @shl_or(i16 %x, i16 %py) {
 ; CHECK-LABEL: @shl_or(
 ; CHECK-NEXT:    [[Y:%.*]] = srem i16 [[PY:%.*]], 42
@@ -59,6 +76,23 @@ define <2 x i16> @shl_or_undef(<2 x i16> %x, <2 x i16> %py) {
   ret <2 x i16> %sh1
 }
 
+define <2 x i16> @shl_or_undef_multiuse(<2 x i16> %x) {
+; CHECK-LABEL: @shl_or_undef_multiuse(
+; CHECK-NEXT:    [[SH0:%.*]] = shl <2 x i16> [[X:%.*]], <i16 5, i16 undef>
+; CHECK-NEXT:    call void @use2(<2 x i16> [[SH0]])
+; CHECK-NEXT:    [[TMP1:%.*]] = shl <2 x i16> [[X]], <i16 12, i16 undef>
+; CHECK-NEXT:    [[SH1:%.*]] = or <2 x i16> [[TMP1]], <i16 5376, i16 poison>
+; CHECK-NEXT:    ret <2 x i16> [[SH1]]
+;
+  %sh0 = shl <2 x i16> %x, <i16 5, i16 undef>
+  %r = or <2 x i16> <i16 42, i16 42>, %sh0 ; constant operand on the 'or'
+  call void @use2(<2 x i16> %sh0)
+  %sh1 = shl <2 x i16> %r, <i16 7, i16 undef>
+  ret <2 x i16> %sh1
+}
+
+declare void @use2(<2 x i16>)
+
 define i32 @shl_xor(i32 %x, i32 %y) {
 ; CHECK-LABEL: @shl_xor(
 ; CHECK-NEXT:    [[TMP1:%.*]] = shl i32 [[X:%.*]], 12
@@ -85,6 +119,23 @@ define <2 x i32> @shl_xor_nonuniform(<2 x i32> %x, <2 x i32> %y) {
   ret <2 x i32> %sh1
 }
 
+declare void @use3(<2 x i32>)
+
+define <2 x i32> @shl_xor_nonuniform_multiuse(<2 x i32> %x) {
+; CHECK-LABEL: @shl_xor_nonuniform_multiuse(
+; CHECK-NEXT:    [[SH0:%.*]] = shl <2 x i32> [[X:%.*]], <i32 5, i32 6>
+; CHECK-NEXT:    call void @use3(<2 x i32> [[SH0]])
+; CHECK-NEXT:    [[TMP1:%.*]] = shl <2 x i32> [[X]], <i32 12, i32 14>
+; CHECK-NEXT:    [[SH1:%.*]] = xor <2 x i32> [[TMP1]], <i32 5376, i32 10752>
+; CHECK-NEXT:    ret <2 x i32> [[SH1]]
+;
+  %sh0 = shl <2 x i32> %x, <i32 5, i32 6>
+  %r = xor <2 x i32> <i32 42, i32 42>, %sh0 ; constant operand on the 'xor'
+  call void @use3(<2 x i32> %sh0)
+  %sh1 = shl <2 x i32> %r, <i32 7, i32 8>
+  ret <2 x i32> %sh1
+}
+
 define i64 @lshr_and(i64 %x, i64 %py) {
 ; CHECK-LABEL: @lshr_and(
 ; CHECK-NEXT:    [[Y:%.*]] = srem i64 [[PY:%.*]], 42
@@ -115,6 +166,21 @@ define <2 x i64> @lshr_and_undef(<2 x i64> %x, <2 x i64> %py) {
   ret <2 x i64> %sh1
 }
 
+define <2 x i64> @lshr_and_undef_multiuse(<2 x i64> %x) {
+; CHECK-LABEL: @lshr_and_undef_multiuse(
+; CHECK-NEXT:    [[SH0:%.*]] = lshr <2 x i64> [[X:%.*]], <i64 5, i64 undef>
+; CHECK-NEXT:    call void @use4(<2 x i64> [[SH0]])
+; CHECK-NEXT:    ret <2 x i64> zeroinitializer
+;
+  %sh0 = lshr <2 x i64> %x, <i64 5, i64 undef>
+  %r = and <2 x i64> <i64 42, i64 42>, %sh0 ; constant operand on the 'and'
+  call void @use4(<2 x i64> %sh0)
+  %sh1 = lshr <2 x i64> %r, <i64 7, i64 undef>
+  ret <2 x i64> %sh1
+}
+
+declare void @use4(<2 x i64>)
+
 define <4 x i32> @lshr_or(<4 x i32> %x, <4 x i32> %y) {
 ; CHECK-LABEL: @lshr_or(
 ; CHECK-NEXT:    [[TMP1:%.*]] = lshr <4 x i32> [[X:%.*]], <i32 12, i32 12, i32 12, i32 12>
@@ -359,6 +425,21 @@ define <2 x i8> @shl_add_nonuniform(<2 x i8> %x, <2 x i8> %y) {
   ret <2 x i8> %sh1
 }
 
+define <2 x i8> @shl_add_nonuniform_multiuse(<2 x i8> %x) {
+; CHECK-LABEL: @shl_add_nonuniform_multiuse(
+; CHECK-NEXT:    [[SH0:%.*]] = shl <2 x i8> [[X:%.*]], <i8 3, i8 4>
+; CHECK-NEXT:    call void @use(<2 x i8> [[SH0]])
+; CHECK-NEXT:    [[TMP1:%.*]] = shl <2 x i8> [[X]], <i8 5, i8 4>
+; CHECK-NEXT:    [[SH1:%.*]] = add <2 x i8> [[TMP1]], <i8 -88, i8 42>
+; CHECK-NEXT:    ret <2 x i8> [[SH1]]
+;
+  %sh0 = shl <2 x i8> %x, <i8 3, i8 4>
+  %r = add <2 x i8> %sh0, <i8 42, i8 42> ; constant operand on the 'add'
+  call void @use(<2 x i8> %sh0)
+  %sh1 = shl <2 x i8> %r, <i8 2, i8 0>
+  ret <2 x i8> %sh1
+}
+
 
 define <2 x i64> @shl_add_undef(<2 x i64> %x, <2 x i64> %py) {
 ; CHECK-LABEL: @shl_add_undef(
@@ -375,6 +456,20 @@ define <2 x i64> @shl_add_undef(<2 x i64> %x, <2 x i64> %py) {
   ret <2 x i64> %sh1
 }
 
+define <2 x i64> @shl_add_undef_multiuse(<2 x i64> %x) {
+; CHECK-LABEL: @shl_add_undef_multiuse(
+; CHECK-NEXT:    [[SH0:%.*]] = shl <2 x i64> [[X:%.*]], <i64 5, i64 undef>
+; CHECK-NEXT:    call void @use4(<2 x i64> [[SH0]])
+; CHECK-NEXT:    [[TMP1:%.*]] = shl <2 x i64> [[X]], <i64 12, i64 undef>
+; CHECK-NEXT:    [[SH1:%.*]] = add <2 x i64> [[TMP1]], <i64 5376, i64 poison>
+; CHECK-NEXT:    ret <2 x i64> [[SH1]]
+;
+  %sh0 = shl <2 x i64> %x, <i64 5, i64 undef>
+  %r = add <2 x i64> <i64 42, i64 42>, %sh0 ; constant operand on the 'add'
+  call void @use4(<2 x i64> %sh0)
+  %sh1 = shl <2 x i64> %r, <i64 7, i64 undef>
+  ret <2 x i64> %sh1
+}
 
 define i8 @lshr_add(i8 %x, i8 %y) {
 ; CHECK-LABEL: @lshr_add(
@@ -457,6 +552,20 @@ define <2 x i8> @shl_sub_nonuniform(<2 x i8> %x, <2 x i8> %y) {
   ret <2 x i8> %sh1
 }
 
+define <2 x i8> @shl_sub_nonuniform_multiuse(<2 x i8> %x) {
+; CHECK-LABEL: @shl_sub_nonuniform_multiuse(
+; CHECK-NEXT:    [[SH0:%.*]] = shl <2 x i8> [[X:%.*]], <i8 3, i8 4>
+; CHECK-NEXT:    call void @use1(<2 x i8> [[SH0]])
+; CHECK-NEXT:    [[TMP1:%.*]] = shl <2 x i8> [[X]], <i8 5, i8 4>
+; CHECK-NEXT:    [[SH1:%.*]] = add <2 x i8> [[TMP1]], <i8 88, i8 -42>
+; CHECK-NEXT:    ret <2 x i8> [[SH1]]
+;
+  %sh0 = shl <2 x i8> %x, <i8 3, i8 4>
+  %r = sub <2 x i8> %sh0, <i8 42, i8 42>
+  call void @use1(<2 x i8> %sh0)
+  %sh1 = shl <2 x i8> %r, <i8 2, i8 0>
+  ret <2 x i8> %sh1
+}
 
 define <2 x i64> @shl_sub_undef(<2 x i64> %x, <2 x i64> %py) {
 ; CHECK-LABEL: @shl_sub_undef(
@@ -473,6 +582,20 @@ define <2 x i64> @shl_sub_undef(<2 x i64> %x, <2 x i64> %py) {
   ret <2 x i64> %sh1
 }
 
+define <2 x i64> @shl_sub_undef_multiuse(<2 x i64> %x) {
+; CHECK-LABEL: @shl_sub_undef_multiuse(
+; CHECK-NEXT:    [[SH0:%.*]] = shl <2 x i64> [[X:%.*]], <i64 5, i64 undef>
+; CHECK-NEXT:    call void @use4(<2 x i64> [[SH0]])
+; CHECK-NEXT:    [[TMP1:%.*]] = shl <2 x i64> [[X]], <i64 12, i64 undef>
+; CHECK-NEXT:    [[SH1:%.*]] = sub <2 x i64> <i64 5376, i64 poison>, [[TMP1]]
+; CHECK-NEXT:    ret <2 x i64> [[SH1]]
+;
+  %sh0 = shl <2 x i64> %x, <i64 5, i64 undef>
+  %r = sub <2 x i64> <i64 42, i64 42>, %sh0 ; constant operand on the 'sub'
+  call void @use4(<2 x i64> %sh0)
+  %sh1 = shl <2 x i64> %r, <i64 7, i64 undef>
+  ret <2 x i64> %sh1
+}
 
 define i8 @lshr_sub(i8 %x, i8 %y) {
 ; CHECK-LABEL: @lshr_sub(
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-phi.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-phi.ll
index 1682d0740d0f5b..c2e613e0ad41f3 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-phi.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-phi.ll
@@ -27,43 +27,43 @@ define void @widen_ptr_phi_unrolled(ptr noalias nocapture %a, ptr noalias nocapt
 ; CHECK-NEXT:    [[N_VEC:%.*]] = and i64 [[DOTNEG]], [[N]]
 ; CHECK-NEXT:    [[TMP3:%.*]] = shl i64 [[N_VEC]], 3
 ; CHECK-NEXT:    [[IND_END:%.*]] = getelementptr i8, ptr [[C:%.*]], i64 [[TMP3]]
-; CHECK-NEXT:    [[TMP25:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-NEXT:    [[TMP26:%.*]] = shl nuw nsw i64 [[TMP25]], 3
+; CHECK-NEXT:    [[TMP4:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT:    [[TMP5:%.*]] = shl nuw nsw i64 [[TMP4]], 3
 ; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; CHECK:       vector.body:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[TMP4:%.*]] = shl i64 [[INDEX]], 3
-; CHECK-NEXT:    [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[C]], i64 [[TMP4]]
-; CHECK-NEXT:    [[TMP5:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-NEXT:    [[TMP6:%.*]] = shl nuw nsw i64 [[TMP5]], 5
-; CHECK-NEXT:    [[TMP7:%.*]] = shl i64 [[INDEX]], 3
-; CHECK-NEXT:    [[TMP8:%.*]] = getelementptr i8, ptr [[C]], i64 [[TMP6]]
-; CHECK-NEXT:    [[NEXT_GEP2:%.*]] = getelementptr i8, ptr [[TMP8]], i64 [[TMP7]]
+; CHECK-NEXT:    [[TMP6:%.*]] = shl i64 [[INDEX]], 3
+; CHECK-NEXT:    [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[C]], i64 [[TMP6]]
+; CHECK-NEXT:    [[TMP7:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT:    [[TMP8:%.*]] = shl nuw nsw i64 [[TMP7]], 5
+; CHECK-NEXT:    [[TMP9:%.*]] = shl i64 [[INDEX]], 3
+; CHECK-NEXT:    [[TMP10:%.*]] = getelementptr i8, ptr [[C]], i64 [[TMP8]]
+; CHECK-NEXT:    [[NEXT_GEP2:%.*]] = getelementptr i8, ptr [[TMP10]], i64 [[TMP9]]
 ; CHECK-NEXT:    [[WIDE_VEC:%.*]] = load <vscale x 8 x i32>, ptr [[NEXT_GEP]], align 4
 ; CHECK-NEXT:    [[WIDE_VEC3:%.*]] = load <vscale x 8 x i32>, ptr [[NEXT_GEP2]], align 4
 ; CHECK-NEXT:    [[STRIDED_VEC:%.*]] = call { <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.experimental.vector.deinterleave2.nxv8i32(<vscale x 8 x i32> [[WIDE_VEC]])
-; CHECK-NEXT:    [[TMP9:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32> } [[STRIDED_VEC]], 0
-; CHECK-NEXT:    [[TMP10:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32> } [[STRIDED_VEC]], 1
+; CHECK-NEXT:    [[TMP11:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32> } [[STRIDED_VEC]], 0
+; CHECK-NEXT:    [[TMP12:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32> } [[STRIDED_VEC]], 1
 ; CHECK-NEXT:    [[STRIDED_VEC4:%.*]] = call { <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.experimental.vector.deinterleave2.nxv8i32(<vscale x 8 x i32> [[WIDE_VEC3]])
-; CHECK-NEXT:    [[TMP11:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32> } [[STRIDED_VEC4]], 0
-; CHECK-NEXT:    [[TMP12:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32> } [[STRIDED_VEC4]], 1
-; CHECK-NEXT:    [[TMP13:%.*]] = add nsw <vscale x 4 x i32> [[TMP9]], shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 1, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)
-; CHECK-NEXT:    [[TMP14:%.*]] = add nsw <vscale x 4 x i32> [[TMP11]], shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 1, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)
-; CHECK-NEXT:    [[TMP15:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[INDEX]]
-; CHECK-NEXT:    [[TMP16:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-NEXT:    [[TMP17:%.*]] = shl nuw nsw i64 [[TMP16]], 2
-; CHECK-NEXT:    [[TMP18:%.*]] = getelementptr inbounds i32, ptr [[TMP15]], i64 [[TMP17]]
-; CHECK-NEXT:    store <vscale x 4 x i32> [[TMP13]], ptr [[TMP15]], align 4
-; CHECK-NEXT:    store <vscale x 4 x i32> [[TMP14]], ptr [[TMP18]], align 4
-; CHECK-NEXT:    [[TMP19:%.*]] = add nsw <vscale x 4 x i32> [[TMP10]], shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 1, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)
-; CHECK-NEXT:    [[TMP20:%.*]] = add nsw <vscale x 4 x i32> [[TMP12]], shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 1, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)
-; CHECK-NEXT:    [[TMP21:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i64 [[INDEX]]
-; CHECK-NEXT:    [[TMP22:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-NEXT:    [[TMP23:%.*]] = shl nuw nsw i64 [[TMP22]], 2
-; CHECK-NEXT:    [[TMP24:%.*]] = getelementptr inbounds i32, ptr [[TMP21]], i64 [[TMP23]]
-; CHECK-NEXT:    store <vscale x 4 x i32> [[TMP19]], ptr [[TMP21]], align 4
-; CHECK-NEXT:    store <vscale x 4 x i32> [[TMP20]], ptr [[TMP24]], align 4
-; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP26]]
+; CHECK-NEXT:    [[TMP13:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32> } [[STRIDED_VEC4]], 0
+; CHECK-NEXT:    [[TMP14:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32> } [[STRIDED_VEC4]], 1
+; CHECK-NEXT:    [[TMP15:%.*]] = add nsw <vscale x 4 x i32> [[TMP11]], shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 1, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)
+; CHECK-NEXT:    [[TMP16:%.*]] = add nsw <vscale x 4 x i32> [[TMP13]], shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 1, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)
+; CHECK-NEXT:    [[TMP17:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[INDEX]]
+; CHECK-NEXT:    [[TMP18:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT:    [[TMP19:%.*]] = shl nuw nsw i64 [[TMP18]], 2
+; CHECK-NEXT:    [[TMP20:%.*]] = getelementptr inbounds i32, ptr [[TMP17]], i64 [[TMP19]]
+; CHECK-NEXT:    store <vscale x 4 x i32> [[TMP15]], ptr [[TMP17]], align 4
+; CHECK-NEXT:    store <vscale x 4 x i32> [[TMP16]], ptr [[TMP20]], align 4
+; CHECK-NEXT:    [[TMP21:%.*]] = add nsw <vscale x 4 x i32> [[TMP12]], shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 1, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)
+; CHECK-NEXT:    [[TMP22:%.*]] = add nsw <vscale x 4 x i32> [[TMP14]], shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 1, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)
+; CHECK-NEXT:    [[TMP23:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i64 [[INDEX]]
+; CHECK-NEXT:    [[TMP24:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT:    [[TMP25:%.*]] = shl nuw nsw i64 [[TMP24]], 2
+; CHECK-NEXT:    [[TMP26:%.*]] = getelementptr inbounds i32, ptr [[TMP23]], i64 [[TMP25]]
+; CHECK-NEXT:    store <vscale x 4 x i32> [[TMP21]], ptr [[TMP23]], align 4
+; CHECK-NEXT:    store <vscale x 4 x i32> [[TMP22]], ptr [[TMP26]], align 4
+; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]]
 ; CHECK-NEXT:    [[TMP27:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
 ; CHECK-NEXT:    br i1 [[TMP27]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
 ; CHECK:       middle.block:
@@ -143,28 +143,28 @@ define void @widen_2ptrs_phi_unrolled(ptr noalias nocapture %dst, ptr noalias no
 ; CHECK-NEXT:    [[IND_END:%.*]] = getelementptr i8, ptr [[SRC:%.*]], i64 [[TMP3]]
 ; CHECK-NEXT:    [[TMP4:%.*]] = shl i64 [[N_VEC]], 2
 ; CHECK-NEXT:    [[IND_END2:%.*]] = getelementptr i8, ptr [[DST:%.*]], i64 [[TMP4]]
-; CHECK-NEXT:    [[TMP15:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-NEXT:    [[TMP16:%.*]] = shl nuw nsw i64 [[TMP15]], 3
+; CHECK-NEXT:    [[TMP5:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT:    [[TMP6:%.*]] = shl nuw nsw i64 [[TMP5]], 3
 ; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; CHECK:       vector.body:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[TMP5:%.*]] = shl i64 [[INDEX]], 2
-; CHECK-NEXT:    [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[SRC]], i64 [[TMP5]]
-; CHECK-NEXT:    [[TMP6:%.*]] = shl i64 [[INDEX]], 2
-; CHECK-NEXT:    [[NEXT_GEP5:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP6]]
-; CHECK-NEXT:    [[TMP7:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-NEXT:    [[TMP8:%.*]] = shl nuw nsw i64 [[TMP7]], 2
-; CHECK-NEXT:    [[TMP9:%.*]] = getelementptr i32, ptr [[NEXT_GEP]], i64 [[TMP8]]
+; CHECK-NEXT:    [[TMP7:%.*]] = shl i64 [[INDEX]], 2
+; CHECK-NEXT:    [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[SRC]], i64 [[TMP7]]
+; CHECK-NEXT:    [[TMP8:%.*]] = shl i64 [[INDEX]], 2
+; CHECK-NEXT:    [[NEXT_GEP5:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP8]]
+; CHECK-NEXT:    [[TMP9:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT:    [[TMP10:%.*]] = shl nuw nsw i64 [[TMP9]], 2
+; CHECK-NEXT:    [[TMP11:%.*]] = getelementptr i32, ptr [[NEXT_GEP]], i64 [[TMP10]]
 ; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <vscale x 4 x i32>, ptr [[NEXT_GEP]], align 4
-; CHECK-NEXT:    [[WIDE_LOAD7:%.*]] = load <vscale x 4 x i32>, ptr [[TMP9]], align 4
-; CHECK-NEXT:    [[TMP10:%.*]] = shl nsw <vscale x 4 x i32> [[WIDE_LOAD]], shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 1, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)
-; CHECK-NEXT:    [[TMP11:%.*]] = shl nsw <vscale x 4 x i32> [[WIDE_LOAD7]], shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 1, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)
-; CHECK-NEXT:    [[TMP12:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-NEXT:    [[TMP13:%.*]] = shl nuw nsw i64 [[TMP12]], 2
-; CHECK-NEXT:    [[TMP14:%.*]] = getelementptr i32, ptr [[NEXT_GEP5]], i64 [[TMP13]]
-; CHECK-NEXT:    store <vscale x 4 x i32> [[TMP10]], ptr [[NEXT_GEP5]], align 4
-; CHECK-NEXT:    store <vscale x 4 x i32> [[TMP11]], ptr [[TMP14]], align 4
-; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP16]]
+; CHECK-NEXT:    [[WIDE_LOAD7:%.*]] = load <vscale x 4 x i32>, ptr [[TMP11]], align 4
+; CHECK-NEXT:    [[TMP12:%.*]] = shl nsw <vscale x 4 x i32> [[WIDE_LOAD]], shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 1, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)
+; CHECK-NEXT:    [[TMP13:%.*]] = shl nsw <vscale x 4 x i32> [[WIDE_LOAD7]], shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 1, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)
+; CHECK-NEXT:    [[TMP14:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT:    [[TMP15:%.*]] = shl nuw nsw i64 [[TMP14]], 2
+; CHECK-NEXT:    [[TMP16:%.*]] = getelementptr i32, ptr [[NEXT_GEP5]], i64 [[TMP15]]
+; CHECK-NEXT:    store <vscale x 4 x i32> [[TMP12]], ptr [[NEXT_GEP5]], align 4
+; CHECK-NEXT:    store <vscale x 4 x i32> [[TMP13]], ptr [[TMP16]], align 4
+; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP6]]
 ; CHECK-NEXT:    [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
 ; CHECK-NEXT:    br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
 ; CHECK:       middle.block:
@@ -234,30 +234,30 @@ define i32 @pointer_iv_mixed(ptr noalias %a, ptr noalias %b, i64 %n) #0 {
 ; CHECK-NEXT:    [[IND_END:%.*]] = getelementptr i8, ptr [[A:%.*]], i64 [[TMP3]]
 ; CHECK-NEXT:    [[TMP4:%.*]] = shl i64 [[N_VEC]], 3
 ; CHECK-NEXT:    [[IND_END2:%.*]] = getelementptr i8, ptr [[B:%.*]], i64 [[TMP4]]
-; CHECK-NEXT:    [[TMP12:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-NEXT:    [[TMP13:%.*]] = shl nuw nsw i64 [[TMP12]], 1
+; CHECK-NEXT:    [[TMP5:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT:    [[TMP6:%.*]] = shl nuw nsw i64 [[TMP5]], 1
 ; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; CHECK:       vector.body:
 ; CHECK-NEXT:    [[POINTER_PHI:%.*]] = phi ptr [ [[A]], [[VECTOR_PH]] ], [ [[PTR_IND:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <vscale x 2 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP11:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[TMP5:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-NEXT:    [[TMP6:%.*]] = shl nuw nsw i64 [[TMP5]], 3
-; CHECK-NEXT:    [[TMP7:%.*]] = call <vscale x 2 x i64> @llvm.experimental.stepvector.nxv2i64()
-; CHECK-NEXT:    [[VECTOR_GEP:%.*]] = shl <vscale x 2 x i64> [[TMP7]], shufflevector (<vscale x 2 x i64> insertelement (<vscale x 2 x i64> poison, i64 2, i64 0), <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer)
-; CHECK-NEXT:    [[TMP8:%.*]] = getelementptr i8, ptr [[POINTER_PHI]], <vscale x 2 x i64> [[VECTOR_GEP]]
-; CHECK-NEXT:    [[TMP9:%.*]] = shl i64 [[INDEX]], 3
-; CHECK-NEXT:    [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP9]]
-; CHECK-NEXT:    [[TMP10:%.*]] = extractelement <vscale x 2 x ptr> [[TMP8]], i64 0
-; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <vscale x 2 x i32>, ptr [[TMP10]], align 8
-; CHECK-NEXT:    [[TMP11]] = add <vscale x 2 x i32> [[WIDE_LOAD]], [[VEC_PHI]]
-; CHECK-NEXT:    store <vscale x 2 x ptr> [[TMP8]], ptr [[NEXT_GEP]], align 8
-; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP13]]
-; CHECK-NEXT:    [[PTR_IND]] = getelementptr i8, ptr [[POINTER_PHI]], i64 [[TMP6]]
+; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <vscale x 2 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP13:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[TMP7:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT:    [[TMP8:%.*]] = shl nuw nsw i64 [[TMP7]], 3
+; CHECK-NEXT:    [[TMP9:%.*]] = call <vscale x 2 x i64> @llvm.experimental.stepvector.nxv2i64()
+; CHECK-NEXT:    [[VECTOR_GEP:%.*]] = shl <vscale x 2 x i64> [[TMP9]], shufflevector (<vscale x 2 x i64> insertelement (<vscale x 2 x i64> poison, i64 2, i64 0), <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer)
+; CHECK-NEXT:    [[TMP10:%.*]] = getelementptr i8, ptr [[POINTER_PHI]], <vscale x 2 x i64> [[VECTOR_GEP]]
+; CHECK-NEXT:    [[TMP11:%.*]] = shl i64 [[INDEX]], 3
+; CHECK-NEXT:    [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP11]]
+; CHECK-NEXT:    [[TMP12:%.*]] = extractelement <vscale x 2 x ptr> [[TMP10]], i64 0
+; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <vscale x 2 x i32>, ptr [[TMP12]], align 8
+; CHECK-NEXT:    [[TMP13]] = add <vscale x 2 x i32> [[WIDE_LOAD]], [[VEC_PHI]]
+; CHECK-NEXT:    store <vscale x 2 x ptr> [[TMP10]], ptr [[NEXT_GEP]], align 8
+; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP6]]
+; CHECK-NEXT:    [[PTR_IND]] = getelementptr i8, ptr [[POINTER_PHI]], i64 [[TMP8]]
 ; CHECK-NEXT:    [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
 ; CHECK-NEXT:    br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]]
 ; CHECK:       middle.block:
-; CHECK-NEXT:    [[TMP15:%.*]] = call i32 @llvm.vector.reduce.add.nxv2i32(<vscale x 2 x i32> [[TMP11]])
+; CHECK-NEXT:    [[TMP15:%.*]] = call i32 @llvm.vector.reduce.add.nxv2i32(<vscale x 2 x i32> [[TMP13]])
 ; CHECK-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[SMAX]], [[N_VEC]]
 ; CHECK-NEXT:    br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]
 ; CHECK:       scalar.ph:
@@ -310,22 +310,22 @@ define void @phi_used_in_vector_compare_and_scalar_indvar_update_and_store(ptr %
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
 ; CHECK:       vector.ph:
-; CHECK-NEXT:    [[TMP6:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-NEXT:    [[TMP7:%.*]] = shl nuw nsw i64 [[TMP6]], 1
+; CHECK-NEXT:    [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT:    [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 1
 ; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; CHECK:       vector.body:
 ; CHECK-NEXT:    [[POINTER_PHI:%.*]] = phi ptr [ [[PTR:%.*]], [[VECTOR_PH]] ], [ [[PTR_IND:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-NEXT:    [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 2
-; CHECK-NEXT:    [[TMP2:%.*]] = call <vscale x 2 x i64> @llvm.experimental.stepvector.nxv2i64()
-; CHECK-NEXT:    [[VECTOR_GEP:%.*]] = shl <vscale x 2 x i64> [[TMP2]], shufflevector (<vscale x 2 x i64> insertelement (<vscale x 2 x i64> poison, i64 1, i64 0), <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer)
-; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[POINTER_PHI]], <vscale x 2 x i64> [[VECTOR_GEP]]
-; CHECK-NEXT:    [[TMP4:%.*]] = icmp ne <vscale x 2 x ptr> [[TMP3]], zeroinitializer
-; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <vscale x 2 x ptr> [[TMP3]], i64 0
-; CHECK-NEXT:    call void @llvm.masked.store.nxv2i16.p0(<vscale x 2 x i16> zeroinitializer, ptr [[TMP5]], i32 2, <vscale x 2 x i1> [[TMP4]])
-; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP7]]
-; CHECK-NEXT:    [[PTR_IND]] = getelementptr i8, ptr [[POINTER_PHI]], i64 [[TMP1]]
+; CHECK-NEXT:    [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT:    [[TMP3:%.*]] = shl nuw nsw i64 [[TMP2]], 2
+; CHECK-NEXT:    [[TMP4:%.*]] = call <vscale x 2 x i64> @llvm.experimental.stepvector.nxv2i64()
+; CHECK-NEXT:    [[VECTOR_GEP:%.*]] = shl <vscale x 2 x i64> [[TMP4]], shufflevector (<vscale x 2 x i64> insertelement (<vscale x 2 x i64> poison, i64 1, i64 0), <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer)
+; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr i8, ptr [[POINTER_PHI]], <vscale x 2 x i64> [[VECTOR_GEP]]
+; CHECK-NEXT:    [[TMP6:%.*]] = icmp ne <vscale x 2 x ptr> [[TMP5]], zeroinitializer
+; CHECK-NEXT:    [[TMP7:%.*]] = extractelement <vscale x 2 x ptr> [[TMP5]], i64 0
+; CHECK-NEXT:    call void @llvm.masked.store.nxv2i16.p0(<vscale x 2 x i16> zeroinitializer, ptr [[TMP7]], i32 2, <vscale x 2 x i1> [[TMP6]])
+; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP1]]
+; CHECK-NEXT:    [[PTR_IND]] = getelementptr i8, ptr [[POINTER_PHI]], i64 [[TMP3]]
 ; CHECK-NEXT:    [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024
 ; CHECK-NEXT:    br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]]
 ; CHECK:       middle.block:

>From 203c4135e6a844981cb7b6dca57947c6ec9dea78 Mon Sep 17 00:00:00 2001
From: Rose <83477269+AtariDreams at users.noreply.github.com>
Date: Fri, 12 Jan 2024 15:15:27 -0500
Subject: [PATCH 2/2] [Transforms] Remove one-time check if other logic operand
 (Y) is constant

By using match(W, m_ImmConstant()), we do not need to worry about one-time use anymore.
---
 .../InstCombine/InstCombineShifts.cpp         | 33 +++++++++++--------
 1 file changed, 19 insertions(+), 14 deletions(-)

diff --git a/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp b/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp
index b7958978c450c9..70b49bb42f9065 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp
@@ -368,23 +368,24 @@ static Instruction *foldShiftOfShiftedBinOp(BinaryOperator &I,
 
   // Find a matching one-use shift by constant. The fold is not valid if the sum
   // of the shift values equals or exceeds bitwidth.
-  // TODO: Remove the one-use check if the other logic operand (Y) is constant.
   Value *X, *Y;
-  auto matchFirstShift = [&](Value *V) {
+  auto matchFirstShift = [&](Value *V, Value *W) {
     APInt Threshold(Ty->getScalarSizeInBits(), Ty->getScalarSizeInBits());
-    return match(V,
-                 m_OneUse(m_BinOp(ShiftOpcode, m_Value(X), m_Constant(C0)))) &&
-           match(ConstantExpr::getAdd(C0, C1),
-                 m_SpecificInt_ICMP(ICmpInst::ICMP_ULT, Threshold));
+    auto binOp = m_BinOp(ShiftOpcode, m_Value(X), m_Constant(C0));
+    return (match(W, m_ImmConstant()))
+               ? match(V, binOp)
+               : match(V, m_OneUse(binOp)) &&
+                     match(ConstantExpr::getAdd(C0, C1),
+                           m_SpecificInt_ICMP(ICmpInst::ICMP_ULT, Threshold));
   };
 
   // Logic ops and Add are commutative, so check each operand for a match. Sub
   // is not so we cannot reoder if we match operand(1) and need to keep the
   // operands in their original positions.
   bool FirstShiftIsOp1 = false;
-  if (matchFirstShift(BinInst->getOperand(0)))
+  if (matchFirstShift(BinInst->getOperand(0), BinInst->getOperand(1)))
     Y = BinInst->getOperand(1);
-  else if (matchFirstShift(BinInst->getOperand(1))) {
+  else if (matchFirstShift(BinInst->getOperand(1), BinInst->getOperand(0))) {
     Y = BinInst->getOperand(0);
     FirstShiftIsOp1 = BinInst->getOpcode() == Instruction::Sub;
   } else
@@ -565,14 +566,17 @@ static bool canEvaluateShifted(Value *V, unsigned NumBits, bool IsLeftShift,
     return true;
 
   Instruction *I = dyn_cast<Instruction>(V);
-  if (!I) return false;
+  if (!I)
+    return false;
 
   // We can't mutate something that has multiple uses: doing so would
   // require duplicating the instruction in general, which isn't profitable.
-  if (!I->hasOneUse()) return false;
+  if (!I->hasOneUse())
+    return false;
 
   switch (I->getOpcode()) {
-  default: return false;
+  default:
+    return false;
   case Instruction::And:
   case Instruction::Or:
   case Instruction::Xor:
@@ -689,7 +693,8 @@ static Value *getShiftedValue(Value *V, unsigned NumBits, bool isLeftShift,
   IC.addToWorklist(I);
 
   switch (I->getOpcode()) {
-  default: llvm_unreachable("Inconsistency with CanEvaluateShifted");
+  default:
+    llvm_unreachable("Inconsistency with CanEvaluateShifted");
   case Instruction::And:
   case Instruction::Or:
   case Instruction::Xor:
@@ -727,8 +732,8 @@ static Value *getShiftedValue(Value *V, unsigned NumBits, bool isLeftShift,
     IC.InsertNewInstWith(Neg, I->getIterator());
     unsigned TypeWidth = I->getType()->getScalarSizeInBits();
     APInt Mask = APInt::getLowBitsSet(TypeWidth, TypeWidth - NumBits);
-    auto *And = BinaryOperator::CreateAnd(Neg,
-                                          ConstantInt::get(I->getType(), Mask));
+    auto *And =
+        BinaryOperator::CreateAnd(Neg, ConstantInt::get(I->getType(), Mask));
     And->takeName(I);
     return IC.InsertNewInstWith(And, I->getIterator());
   }