[llvm] [InstCombine] Resolve TODO: Remove one-time check if other logic operand (Y) is constant (PR #77973)
via llvm-commits
llvm-commits at lists.llvm.org
Thu Jan 18 17:56:39 PST 2024
https://github.com/AtariDreams updated https://github.com/llvm/llvm-project/pull/77973
>From 9ed961b0d694c428856667a794eb35f5c08b3cd3 Mon Sep 17 00:00:00 2001
From: Rose <83477269+AtariDreams at users.noreply.github.com>
Date: Sun, 14 Jan 2024 12:53:37 -0500
Subject: [PATCH 1/2] [InstCombine] Add pre-commit tests [NFC]
---
.../Transforms/InstCombine/shift-logic.ll | 127 ++++++++++++++-
.../LoopVectorize/AArch64/sve-widen-phi.ll | 154 +++++++++---------
2 files changed, 202 insertions(+), 79 deletions(-)
diff --git a/llvm/test/Transforms/InstCombine/shift-logic.ll b/llvm/test/Transforms/InstCombine/shift-logic.ll
index 544694d398431e..d4d6273331ca59 100644
--- a/llvm/test/Transforms/InstCombine/shift-logic.ll
+++ b/llvm/test/Transforms/InstCombine/shift-logic.ll
@@ -1,8 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt < %s -passes=instcombine -S | FileCheck %s
-declare void @use(i64)
-
define i8 @shl_and(i8 %x, i8 %y) {
; CHECK-LABEL: @shl_and(
; CHECK-NEXT: [[TMP1:%.*]] = shl i8 [[X:%.*]], 5
@@ -16,6 +14,8 @@ define i8 @shl_and(i8 %x, i8 %y) {
ret i8 %sh1
}
+declare void @use(i8)
+
define <2 x i8> @shl_and_nonuniform(<2 x i8> %x, <2 x i8> %y) {
; CHECK-LABEL: @shl_and_nonuniform(
; CHECK-NEXT: [[TMP1:%.*]] = shl <2 x i8> [[X:%.*]], <i8 5, i8 4>
@@ -29,6 +29,23 @@ define <2 x i8> @shl_and_nonuniform(<2 x i8> %x, <2 x i8> %y) {
ret <2 x i8> %sh1
}
+define <2 x i8> @shl_and_nonuniform_multiuse(<2 x i8> %x) {
+; CHECK-LABEL: @shl_and_nonuniform_multiuse(
+; CHECK-NEXT: [[SH0:%.*]] = shl <2 x i8> [[X:%.*]], <i8 3, i8 4>
+; CHECK-NEXT: call void @use1(<2 x i8> [[SH0]])
+; CHECK-NEXT: [[TMP1:%.*]] = shl <2 x i8> [[X]], <i8 5, i8 4>
+; CHECK-NEXT: [[SH1:%.*]] = and <2 x i8> [[TMP1]], <i8 -88, i8 42>
+; CHECK-NEXT: ret <2 x i8> [[SH1]]
+;
+ %sh0 = shl <2 x i8> %x, <i8 3, i8 4>
+ %r = and <2 x i8> %sh0, <i8 42, i8 42> ; constant operand on the 'and'
+ call void @use1(<2 x i8> %sh0)
+ %sh1 = shl <2 x i8> %r, <i8 2, i8 0>
+ ret <2 x i8> %sh1
+}
+
+declare void @use1(<2 x i8>)
+
define i16 @shl_or(i16 %x, i16 %py) {
; CHECK-LABEL: @shl_or(
; CHECK-NEXT: [[Y:%.*]] = srem i16 [[PY:%.*]], 42
@@ -59,6 +76,23 @@ define <2 x i16> @shl_or_undef(<2 x i16> %x, <2 x i16> %py) {
ret <2 x i16> %sh1
}
+define <2 x i16> @shl_or_undef_multiuse(<2 x i16> %x) {
+; CHECK-LABEL: @shl_or_undef_multiuse(
+; CHECK-NEXT: [[SH0:%.*]] = shl <2 x i16> [[X:%.*]], <i16 5, i16 undef>
+; CHECK-NEXT: call void @use2(<2 x i16> [[SH0]])
+; CHECK-NEXT: [[TMP1:%.*]] = shl <2 x i16> [[X]], <i16 12, i16 undef>
+; CHECK-NEXT: [[SH1:%.*]] = or <2 x i16> [[TMP1]], <i16 5376, i16 poison>
+; CHECK-NEXT: ret <2 x i16> [[SH1]]
+;
+ %sh0 = shl <2 x i16> %x, <i16 5, i16 undef>
+ %r = or <2 x i16> <i16 42, i16 42>, %sh0 ; constant operand on the 'or'
+ call void @use2(<2 x i16> %sh0)
+ %sh1 = shl <2 x i16> %r, <i16 7, i16 undef>
+ ret <2 x i16> %sh1
+}
+
+declare void @use2(<2 x i16>)
+
define i32 @shl_xor(i32 %x, i32 %y) {
; CHECK-LABEL: @shl_xor(
; CHECK-NEXT: [[TMP1:%.*]] = shl i32 [[X:%.*]], 12
@@ -85,6 +119,23 @@ define <2 x i32> @shl_xor_nonuniform(<2 x i32> %x, <2 x i32> %y) {
ret <2 x i32> %sh1
}
+declare void @use3(<2 x i32>)
+
+define <2 x i32> @shl_xor_nonuniform_multiuse(<2 x i32> %x) {
+; CHECK-LABEL: @shl_xor_nonuniform_multiuse(
+; CHECK-NEXT: [[SH0:%.*]] = shl <2 x i32> [[X:%.*]], <i32 5, i32 6>
+; CHECK-NEXT: call void @use3(<2 x i32> [[SH0]])
+; CHECK-NEXT: [[TMP1:%.*]] = shl <2 x i32> [[X]], <i32 12, i32 14>
+; CHECK-NEXT: [[SH1:%.*]] = xor <2 x i32> [[TMP1]], <i32 5376, i32 10752>
+; CHECK-NEXT: ret <2 x i32> [[SH1]]
+;
+ %sh0 = shl <2 x i32> %x, <i32 5, i32 6>
+ %r = xor <2 x i32> <i32 42, i32 42>, %sh0 ; constant operand on the 'xor'
+ call void @use3(<2 x i32> %sh0)
+ %sh1 = shl <2 x i32> %r, <i32 7, i32 8>
+ ret <2 x i32> %sh1
+}
+
define i64 @lshr_and(i64 %x, i64 %py) {
; CHECK-LABEL: @lshr_and(
; CHECK-NEXT: [[Y:%.*]] = srem i64 [[PY:%.*]], 42
@@ -115,6 +166,21 @@ define <2 x i64> @lshr_and_undef(<2 x i64> %x, <2 x i64> %py) {
ret <2 x i64> %sh1
}
+define <2 x i64> @lshr_and_undef_multiuse(<2 x i64> %x) {
+; CHECK-LABEL: @lshr_and_undef_multiuse(
+; CHECK-NEXT: [[SH0:%.*]] = lshr <2 x i64> [[X:%.*]], <i64 5, i64 undef>
+; CHECK-NEXT: call void @use4(<2 x i64> [[SH0]])
+; CHECK-NEXT: ret <2 x i64> zeroinitializer
+;
+ %sh0 = lshr <2 x i64> %x, <i64 5, i64 undef>
+ %r = and <2 x i64> <i64 42, i64 42>, %sh0 ; constant operand on the 'and'
+ call void @use4(<2 x i64> %sh0)
+ %sh1 = lshr <2 x i64> %r, <i64 7, i64 undef>
+ ret <2 x i64> %sh1
+}
+
+declare void @use4(<2 x i64>)
+
define <4 x i32> @lshr_or(<4 x i32> %x, <4 x i32> %y) {
; CHECK-LABEL: @lshr_or(
; CHECK-NEXT: [[TMP1:%.*]] = lshr <4 x i32> [[X:%.*]], <i32 12, i32 12, i32 12, i32 12>
@@ -359,6 +425,21 @@ define <2 x i8> @shl_add_nonuniform(<2 x i8> %x, <2 x i8> %y) {
ret <2 x i8> %sh1
}
+define <2 x i8> @shl_add_nonuniform_multiuse(<2 x i8> %x) {
+; CHECK-LABEL: @shl_add_nonuniform_multiuse(
+; CHECK-NEXT: [[SH0:%.*]] = shl <2 x i8> [[X:%.*]], <i8 3, i8 4>
+; CHECK-NEXT: call void @use(<2 x i8> [[SH0]])
+; CHECK-NEXT: [[TMP1:%.*]] = shl <2 x i8> [[X]], <i8 5, i8 4>
+; CHECK-NEXT: [[SH1:%.*]] = add <2 x i8> [[TMP1]], <i8 -88, i8 42>
+; CHECK-NEXT: ret <2 x i8> [[SH1]]
+;
+ %sh0 = shl <2 x i8> %x, <i8 3, i8 4>
+ %r = add <2 x i8> %sh0, <i8 42, i8 42> ; constant operand on the 'add'
+ call void @use(<2 x i8> %sh0)
+ %sh1 = shl <2 x i8> %r, <i8 2, i8 0>
+ ret <2 x i8> %sh1
+}
+
define <2 x i64> @shl_add_undef(<2 x i64> %x, <2 x i64> %py) {
; CHECK-LABEL: @shl_add_undef(
@@ -375,6 +456,20 @@ define <2 x i64> @shl_add_undef(<2 x i64> %x, <2 x i64> %py) {
ret <2 x i64> %sh1
}
+define <2 x i64> @shl_add_undef_multiuse(<2 x i64> %x) {
+; CHECK-LABEL: @shl_add_undef_multiuse(
+; CHECK-NEXT: [[SH0:%.*]] = shl <2 x i64> [[X:%.*]], <i64 5, i64 undef>
+; CHECK-NEXT: call void @use4(<2 x i64> [[SH0]])
+; CHECK-NEXT: [[TMP1:%.*]] = shl <2 x i64> [[X]], <i64 12, i64 undef>
+; CHECK-NEXT: [[SH1:%.*]] = add <2 x i64> [[TMP1]], <i64 5376, i64 poison>
+; CHECK-NEXT: ret <2 x i64> [[SH1]]
+;
+ %sh0 = shl <2 x i64> %x, <i64 5, i64 undef>
+ %r = add <2 x i64> <i64 42, i64 42>, %sh0 ; constant operand on the 'add'
+ call void @use4(<2 x i64> %sh0)
+ %sh1 = shl <2 x i64> %r, <i64 7, i64 undef>
+ ret <2 x i64> %sh1
+}
define i8 @lshr_add(i8 %x, i8 %y) {
; CHECK-LABEL: @lshr_add(
@@ -457,6 +552,20 @@ define <2 x i8> @shl_sub_nonuniform(<2 x i8> %x, <2 x i8> %y) {
ret <2 x i8> %sh1
}
+define <2 x i8> @shl_sub_nonuniform_multiuse(<2 x i8> %x) {
+; CHECK-LABEL: @shl_sub_nonuniform_multiuse(
+; CHECK-NEXT: [[SH0:%.*]] = shl <2 x i8> [[X:%.*]], <i8 3, i8 4>
+; CHECK-NEXT: call void @use1(<2 x i8> [[SH0]])
+; CHECK-NEXT: [[TMP1:%.*]] = shl <2 x i8> [[X]], <i8 5, i8 4>
+; CHECK-NEXT: [[SH1:%.*]] = add <2 x i8> [[TMP1]], <i8 88, i8 -42>
+; CHECK-NEXT: ret <2 x i8> [[SH1]]
+;
+ %sh0 = shl <2 x i8> %x, <i8 3, i8 4>
+ %r = sub <2 x i8> %sh0, <i8 42, i8 42>
+ call void @use1(<2 x i8> %sh0)
+ %sh1 = shl <2 x i8> %r, <i8 2, i8 0>
+ ret <2 x i8> %sh1
+}
define <2 x i64> @shl_sub_undef(<2 x i64> %x, <2 x i64> %py) {
; CHECK-LABEL: @shl_sub_undef(
@@ -473,6 +582,20 @@ define <2 x i64> @shl_sub_undef(<2 x i64> %x, <2 x i64> %py) {
ret <2 x i64> %sh1
}
+define <2 x i64> @shl_sub_undef_multiuse(<2 x i64> %x) {
+; CHECK-LABEL: @shl_sub_undef_multiuse(
+; CHECK-NEXT: [[SH0:%.*]] = shl <2 x i64> [[X:%.*]], <i64 5, i64 undef>
+; CHECK-NEXT: call void @use4(<2 x i64> [[SH0]])
+; CHECK-NEXT: [[TMP1:%.*]] = shl <2 x i64> [[X]], <i64 12, i64 undef>
+; CHECK-NEXT: [[SH1:%.*]] = sub <2 x i64> <i64 5376, i64 poison>, [[TMP1]]
+; CHECK-NEXT: ret <2 x i64> [[SH1]]
+;
+ %sh0 = shl <2 x i64> %x, <i64 5, i64 undef>
+ %r = sub <2 x i64> <i64 42, i64 42>, %sh0 ; constant operand on the 'sub'
+ call void @use4(<2 x i64> %sh0)
+ %sh1 = shl <2 x i64> %r, <i64 7, i64 undef>
+ ret <2 x i64> %sh1
+}
define i8 @lshr_sub(i8 %x, i8 %y) {
; CHECK-LABEL: @lshr_sub(
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-phi.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-phi.ll
index 1682d0740d0f5b..c2e613e0ad41f3 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-phi.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-phi.ll
@@ -27,43 +27,43 @@ define void @widen_ptr_phi_unrolled(ptr noalias nocapture %a, ptr noalias nocapt
; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[DOTNEG]], [[N]]
; CHECK-NEXT: [[TMP3:%.*]] = shl i64 [[N_VEC]], 3
; CHECK-NEXT: [[IND_END:%.*]] = getelementptr i8, ptr [[C:%.*]], i64 [[TMP3]]
-; CHECK-NEXT: [[TMP25:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-NEXT: [[TMP26:%.*]] = shl nuw nsw i64 [[TMP25]], 3
+; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT: [[TMP5:%.*]] = shl nuw nsw i64 [[TMP4]], 3
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT: [[TMP4:%.*]] = shl i64 [[INDEX]], 3
-; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[C]], i64 [[TMP4]]
-; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-NEXT: [[TMP6:%.*]] = shl nuw nsw i64 [[TMP5]], 5
-; CHECK-NEXT: [[TMP7:%.*]] = shl i64 [[INDEX]], 3
-; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[C]], i64 [[TMP6]]
-; CHECK-NEXT: [[NEXT_GEP2:%.*]] = getelementptr i8, ptr [[TMP8]], i64 [[TMP7]]
+; CHECK-NEXT: [[TMP6:%.*]] = shl i64 [[INDEX]], 3
+; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[C]], i64 [[TMP6]]
+; CHECK-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT: [[TMP8:%.*]] = shl nuw nsw i64 [[TMP7]], 5
+; CHECK-NEXT: [[TMP9:%.*]] = shl i64 [[INDEX]], 3
+; CHECK-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[C]], i64 [[TMP8]]
+; CHECK-NEXT: [[NEXT_GEP2:%.*]] = getelementptr i8, ptr [[TMP10]], i64 [[TMP9]]
; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <vscale x 8 x i32>, ptr [[NEXT_GEP]], align 4
; CHECK-NEXT: [[WIDE_VEC3:%.*]] = load <vscale x 8 x i32>, ptr [[NEXT_GEP2]], align 4
; CHECK-NEXT: [[STRIDED_VEC:%.*]] = call { <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.experimental.vector.deinterleave2.nxv8i32(<vscale x 8 x i32> [[WIDE_VEC]])
-; CHECK-NEXT: [[TMP9:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32> } [[STRIDED_VEC]], 0
-; CHECK-NEXT: [[TMP10:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32> } [[STRIDED_VEC]], 1
+; CHECK-NEXT: [[TMP11:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32> } [[STRIDED_VEC]], 0
+; CHECK-NEXT: [[TMP12:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32> } [[STRIDED_VEC]], 1
; CHECK-NEXT: [[STRIDED_VEC4:%.*]] = call { <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.experimental.vector.deinterleave2.nxv8i32(<vscale x 8 x i32> [[WIDE_VEC3]])
-; CHECK-NEXT: [[TMP11:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32> } [[STRIDED_VEC4]], 0
-; CHECK-NEXT: [[TMP12:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32> } [[STRIDED_VEC4]], 1
-; CHECK-NEXT: [[TMP13:%.*]] = add nsw <vscale x 4 x i32> [[TMP9]], shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 1, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)
-; CHECK-NEXT: [[TMP14:%.*]] = add nsw <vscale x 4 x i32> [[TMP11]], shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 1, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)
-; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[INDEX]]
-; CHECK-NEXT: [[TMP16:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-NEXT: [[TMP17:%.*]] = shl nuw nsw i64 [[TMP16]], 2
-; CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds i32, ptr [[TMP15]], i64 [[TMP17]]
-; CHECK-NEXT: store <vscale x 4 x i32> [[TMP13]], ptr [[TMP15]], align 4
-; CHECK-NEXT: store <vscale x 4 x i32> [[TMP14]], ptr [[TMP18]], align 4
-; CHECK-NEXT: [[TMP19:%.*]] = add nsw <vscale x 4 x i32> [[TMP10]], shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 1, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)
-; CHECK-NEXT: [[TMP20:%.*]] = add nsw <vscale x 4 x i32> [[TMP12]], shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 1, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)
-; CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i64 [[INDEX]]
-; CHECK-NEXT: [[TMP22:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-NEXT: [[TMP23:%.*]] = shl nuw nsw i64 [[TMP22]], 2
-; CHECK-NEXT: [[TMP24:%.*]] = getelementptr inbounds i32, ptr [[TMP21]], i64 [[TMP23]]
-; CHECK-NEXT: store <vscale x 4 x i32> [[TMP19]], ptr [[TMP21]], align 4
-; CHECK-NEXT: store <vscale x 4 x i32> [[TMP20]], ptr [[TMP24]], align 4
-; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP26]]
+; CHECK-NEXT: [[TMP13:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32> } [[STRIDED_VEC4]], 0
+; CHECK-NEXT: [[TMP14:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32> } [[STRIDED_VEC4]], 1
+; CHECK-NEXT: [[TMP15:%.*]] = add nsw <vscale x 4 x i32> [[TMP11]], shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 1, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)
+; CHECK-NEXT: [[TMP16:%.*]] = add nsw <vscale x 4 x i32> [[TMP13]], shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 1, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)
+; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[INDEX]]
+; CHECK-NEXT: [[TMP18:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT: [[TMP19:%.*]] = shl nuw nsw i64 [[TMP18]], 2
+; CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds i32, ptr [[TMP17]], i64 [[TMP19]]
+; CHECK-NEXT: store <vscale x 4 x i32> [[TMP15]], ptr [[TMP17]], align 4
+; CHECK-NEXT: store <vscale x 4 x i32> [[TMP16]], ptr [[TMP20]], align 4
+; CHECK-NEXT: [[TMP21:%.*]] = add nsw <vscale x 4 x i32> [[TMP12]], shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 1, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)
+; CHECK-NEXT: [[TMP22:%.*]] = add nsw <vscale x 4 x i32> [[TMP14]], shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 1, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)
+; CHECK-NEXT: [[TMP23:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i64 [[INDEX]]
+; CHECK-NEXT: [[TMP24:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT: [[TMP25:%.*]] = shl nuw nsw i64 [[TMP24]], 2
+; CHECK-NEXT: [[TMP26:%.*]] = getelementptr inbounds i32, ptr [[TMP23]], i64 [[TMP25]]
+; CHECK-NEXT: store <vscale x 4 x i32> [[TMP21]], ptr [[TMP23]], align 4
+; CHECK-NEXT: store <vscale x 4 x i32> [[TMP22]], ptr [[TMP26]], align 4
+; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]]
; CHECK-NEXT: [[TMP27:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-NEXT: br i1 [[TMP27]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
; CHECK: middle.block:
@@ -143,28 +143,28 @@ define void @widen_2ptrs_phi_unrolled(ptr noalias nocapture %dst, ptr noalias no
; CHECK-NEXT: [[IND_END:%.*]] = getelementptr i8, ptr [[SRC:%.*]], i64 [[TMP3]]
; CHECK-NEXT: [[TMP4:%.*]] = shl i64 [[N_VEC]], 2
; CHECK-NEXT: [[IND_END2:%.*]] = getelementptr i8, ptr [[DST:%.*]], i64 [[TMP4]]
-; CHECK-NEXT: [[TMP15:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-NEXT: [[TMP16:%.*]] = shl nuw nsw i64 [[TMP15]], 3
+; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT: [[TMP6:%.*]] = shl nuw nsw i64 [[TMP5]], 3
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT: [[TMP5:%.*]] = shl i64 [[INDEX]], 2
-; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[SRC]], i64 [[TMP5]]
-; CHECK-NEXT: [[TMP6:%.*]] = shl i64 [[INDEX]], 2
-; CHECK-NEXT: [[NEXT_GEP5:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP6]]
-; CHECK-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-NEXT: [[TMP8:%.*]] = shl nuw nsw i64 [[TMP7]], 2
-; CHECK-NEXT: [[TMP9:%.*]] = getelementptr i32, ptr [[NEXT_GEP]], i64 [[TMP8]]
+; CHECK-NEXT: [[TMP7:%.*]] = shl i64 [[INDEX]], 2
+; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[SRC]], i64 [[TMP7]]
+; CHECK-NEXT: [[TMP8:%.*]] = shl i64 [[INDEX]], 2
+; CHECK-NEXT: [[NEXT_GEP5:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP8]]
+; CHECK-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT: [[TMP10:%.*]] = shl nuw nsw i64 [[TMP9]], 2
+; CHECK-NEXT: [[TMP11:%.*]] = getelementptr i32, ptr [[NEXT_GEP]], i64 [[TMP10]]
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 4 x i32>, ptr [[NEXT_GEP]], align 4
-; CHECK-NEXT: [[WIDE_LOAD7:%.*]] = load <vscale x 4 x i32>, ptr [[TMP9]], align 4
-; CHECK-NEXT: [[TMP10:%.*]] = shl nsw <vscale x 4 x i32> [[WIDE_LOAD]], shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 1, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)
-; CHECK-NEXT: [[TMP11:%.*]] = shl nsw <vscale x 4 x i32> [[WIDE_LOAD7]], shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 1, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)
-; CHECK-NEXT: [[TMP12:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-NEXT: [[TMP13:%.*]] = shl nuw nsw i64 [[TMP12]], 2
-; CHECK-NEXT: [[TMP14:%.*]] = getelementptr i32, ptr [[NEXT_GEP5]], i64 [[TMP13]]
-; CHECK-NEXT: store <vscale x 4 x i32> [[TMP10]], ptr [[NEXT_GEP5]], align 4
-; CHECK-NEXT: store <vscale x 4 x i32> [[TMP11]], ptr [[TMP14]], align 4
-; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP16]]
+; CHECK-NEXT: [[WIDE_LOAD7:%.*]] = load <vscale x 4 x i32>, ptr [[TMP11]], align 4
+; CHECK-NEXT: [[TMP12:%.*]] = shl nsw <vscale x 4 x i32> [[WIDE_LOAD]], shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 1, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)
+; CHECK-NEXT: [[TMP13:%.*]] = shl nsw <vscale x 4 x i32> [[WIDE_LOAD7]], shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 1, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)
+; CHECK-NEXT: [[TMP14:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT: [[TMP15:%.*]] = shl nuw nsw i64 [[TMP14]], 2
+; CHECK-NEXT: [[TMP16:%.*]] = getelementptr i32, ptr [[NEXT_GEP5]], i64 [[TMP15]]
+; CHECK-NEXT: store <vscale x 4 x i32> [[TMP12]], ptr [[NEXT_GEP5]], align 4
+; CHECK-NEXT: store <vscale x 4 x i32> [[TMP13]], ptr [[TMP16]], align 4
+; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP6]]
; CHECK-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
; CHECK: middle.block:
@@ -234,30 +234,30 @@ define i32 @pointer_iv_mixed(ptr noalias %a, ptr noalias %b, i64 %n) #0 {
; CHECK-NEXT: [[IND_END:%.*]] = getelementptr i8, ptr [[A:%.*]], i64 [[TMP3]]
; CHECK-NEXT: [[TMP4:%.*]] = shl i64 [[N_VEC]], 3
; CHECK-NEXT: [[IND_END2:%.*]] = getelementptr i8, ptr [[B:%.*]], i64 [[TMP4]]
-; CHECK-NEXT: [[TMP12:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-NEXT: [[TMP13:%.*]] = shl nuw nsw i64 [[TMP12]], 1
+; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT: [[TMP6:%.*]] = shl nuw nsw i64 [[TMP5]], 1
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[POINTER_PHI:%.*]] = phi ptr [ [[A]], [[VECTOR_PH]] ], [ [[PTR_IND:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 2 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP11:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-NEXT: [[TMP6:%.*]] = shl nuw nsw i64 [[TMP5]], 3
-; CHECK-NEXT: [[TMP7:%.*]] = call <vscale x 2 x i64> @llvm.experimental.stepvector.nxv2i64()
-; CHECK-NEXT: [[VECTOR_GEP:%.*]] = shl <vscale x 2 x i64> [[TMP7]], shufflevector (<vscale x 2 x i64> insertelement (<vscale x 2 x i64> poison, i64 2, i64 0), <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer)
-; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[POINTER_PHI]], <vscale x 2 x i64> [[VECTOR_GEP]]
-; CHECK-NEXT: [[TMP9:%.*]] = shl i64 [[INDEX]], 3
-; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP9]]
-; CHECK-NEXT: [[TMP10:%.*]] = extractelement <vscale x 2 x ptr> [[TMP8]], i64 0
-; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 2 x i32>, ptr [[TMP10]], align 8
-; CHECK-NEXT: [[TMP11]] = add <vscale x 2 x i32> [[WIDE_LOAD]], [[VEC_PHI]]
-; CHECK-NEXT: store <vscale x 2 x ptr> [[TMP8]], ptr [[NEXT_GEP]], align 8
-; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP13]]
-; CHECK-NEXT: [[PTR_IND]] = getelementptr i8, ptr [[POINTER_PHI]], i64 [[TMP6]]
+; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 2 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP13:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT: [[TMP8:%.*]] = shl nuw nsw i64 [[TMP7]], 3
+; CHECK-NEXT: [[TMP9:%.*]] = call <vscale x 2 x i64> @llvm.experimental.stepvector.nxv2i64()
+; CHECK-NEXT: [[VECTOR_GEP:%.*]] = shl <vscale x 2 x i64> [[TMP9]], shufflevector (<vscale x 2 x i64> insertelement (<vscale x 2 x i64> poison, i64 2, i64 0), <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer)
+; CHECK-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[POINTER_PHI]], <vscale x 2 x i64> [[VECTOR_GEP]]
+; CHECK-NEXT: [[TMP11:%.*]] = shl i64 [[INDEX]], 3
+; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP11]]
+; CHECK-NEXT: [[TMP12:%.*]] = extractelement <vscale x 2 x ptr> [[TMP10]], i64 0
+; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 2 x i32>, ptr [[TMP12]], align 8
+; CHECK-NEXT: [[TMP13]] = add <vscale x 2 x i32> [[WIDE_LOAD]], [[VEC_PHI]]
+; CHECK-NEXT: store <vscale x 2 x ptr> [[TMP10]], ptr [[NEXT_GEP]], align 8
+; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP6]]
+; CHECK-NEXT: [[PTR_IND]] = getelementptr i8, ptr [[POINTER_PHI]], i64 [[TMP8]]
; CHECK-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]]
; CHECK: middle.block:
-; CHECK-NEXT: [[TMP15:%.*]] = call i32 @llvm.vector.reduce.add.nxv2i32(<vscale x 2 x i32> [[TMP11]])
+; CHECK-NEXT: [[TMP15:%.*]] = call i32 @llvm.vector.reduce.add.nxv2i32(<vscale x 2 x i32> [[TMP13]])
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[SMAX]], [[N_VEC]]
; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]
; CHECK: scalar.ph:
@@ -310,22 +310,22 @@ define void @phi_used_in_vector_compare_and_scalar_indvar_update_and_store(ptr %
; CHECK-NEXT: entry:
; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK: vector.ph:
-; CHECK-NEXT: [[TMP6:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-NEXT: [[TMP7:%.*]] = shl nuw nsw i64 [[TMP6]], 1
+; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 1
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[POINTER_PHI:%.*]] = phi ptr [ [[PTR:%.*]], [[VECTOR_PH]] ], [ [[PTR_IND:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 2
-; CHECK-NEXT: [[TMP2:%.*]] = call <vscale x 2 x i64> @llvm.experimental.stepvector.nxv2i64()
-; CHECK-NEXT: [[VECTOR_GEP:%.*]] = shl <vscale x 2 x i64> [[TMP2]], shufflevector (<vscale x 2 x i64> insertelement (<vscale x 2 x i64> poison, i64 1, i64 0), <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer)
-; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[POINTER_PHI]], <vscale x 2 x i64> [[VECTOR_GEP]]
-; CHECK-NEXT: [[TMP4:%.*]] = icmp ne <vscale x 2 x ptr> [[TMP3]], zeroinitializer
-; CHECK-NEXT: [[TMP5:%.*]] = extractelement <vscale x 2 x ptr> [[TMP3]], i64 0
-; CHECK-NEXT: call void @llvm.masked.store.nxv2i16.p0(<vscale x 2 x i16> zeroinitializer, ptr [[TMP5]], i32 2, <vscale x 2 x i1> [[TMP4]])
-; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP7]]
-; CHECK-NEXT: [[PTR_IND]] = getelementptr i8, ptr [[POINTER_PHI]], i64 [[TMP1]]
+; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT: [[TMP3:%.*]] = shl nuw nsw i64 [[TMP2]], 2
+; CHECK-NEXT: [[TMP4:%.*]] = call <vscale x 2 x i64> @llvm.experimental.stepvector.nxv2i64()
+; CHECK-NEXT: [[VECTOR_GEP:%.*]] = shl <vscale x 2 x i64> [[TMP4]], shufflevector (<vscale x 2 x i64> insertelement (<vscale x 2 x i64> poison, i64 1, i64 0), <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer)
+; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[POINTER_PHI]], <vscale x 2 x i64> [[VECTOR_GEP]]
+; CHECK-NEXT: [[TMP6:%.*]] = icmp ne <vscale x 2 x ptr> [[TMP5]], zeroinitializer
+; CHECK-NEXT: [[TMP7:%.*]] = extractelement <vscale x 2 x ptr> [[TMP5]], i64 0
+; CHECK-NEXT: call void @llvm.masked.store.nxv2i16.p0(<vscale x 2 x i16> zeroinitializer, ptr [[TMP7]], i32 2, <vscale x 2 x i1> [[TMP6]])
+; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP1]]
+; CHECK-NEXT: [[PTR_IND]] = getelementptr i8, ptr [[POINTER_PHI]], i64 [[TMP3]]
; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024
; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]]
; CHECK: middle.block:
>From 203c4135e6a844981cb7b6dca57947c6ec9dea78 Mon Sep 17 00:00:00 2001
From: Rose <83477269+AtariDreams at users.noreply.github.com>
Date: Fri, 12 Jan 2024 15:15:27 -0500
Subject: [PATCH 2/2] [Transforms] Remove one-time check if other logic operand
(Y) is constant
By using match(W, m_ImmConstant()), we do not need to worry about one-time use anymore.
---
.../InstCombine/InstCombineShifts.cpp | 33 +++++++++++--------
1 file changed, 19 insertions(+), 14 deletions(-)
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp b/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp
index b7958978c450c9..70b49bb42f9065 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp
@@ -368,23 +368,24 @@ static Instruction *foldShiftOfShiftedBinOp(BinaryOperator &I,
// Find a matching one-use shift by constant. The fold is not valid if the sum
// of the shift values equals or exceeds bitwidth.
- // TODO: Remove the one-use check if the other logic operand (Y) is constant.
Value *X, *Y;
- auto matchFirstShift = [&](Value *V) {
+ auto matchFirstShift = [&](Value *V, Value *W) {
APInt Threshold(Ty->getScalarSizeInBits(), Ty->getScalarSizeInBits());
- return match(V,
- m_OneUse(m_BinOp(ShiftOpcode, m_Value(X), m_Constant(C0)))) &&
- match(ConstantExpr::getAdd(C0, C1),
- m_SpecificInt_ICMP(ICmpInst::ICMP_ULT, Threshold));
+ auto binOp = m_BinOp(ShiftOpcode, m_Value(X), m_Constant(C0));
+ return (match(W, m_ImmConstant()))
+ ? match(V, binOp)
+ : match(V, m_OneUse(binOp)) &&
+ match(ConstantExpr::getAdd(C0, C1),
+ m_SpecificInt_ICMP(ICmpInst::ICMP_ULT, Threshold));
};
// Logic ops and Add are commutative, so check each operand for a match. Sub
// is not so we cannot reoder if we match operand(1) and need to keep the
// operands in their original positions.
bool FirstShiftIsOp1 = false;
- if (matchFirstShift(BinInst->getOperand(0)))
+ if (matchFirstShift(BinInst->getOperand(0), BinInst->getOperand(1)))
Y = BinInst->getOperand(1);
- else if (matchFirstShift(BinInst->getOperand(1))) {
+ else if (matchFirstShift(BinInst->getOperand(1), BinInst->getOperand(0))) {
Y = BinInst->getOperand(0);
FirstShiftIsOp1 = BinInst->getOpcode() == Instruction::Sub;
} else
@@ -565,14 +566,17 @@ static bool canEvaluateShifted(Value *V, unsigned NumBits, bool IsLeftShift,
return true;
Instruction *I = dyn_cast<Instruction>(V);
- if (!I) return false;
+ if (!I)
+ return false;
// We can't mutate something that has multiple uses: doing so would
// require duplicating the instruction in general, which isn't profitable.
- if (!I->hasOneUse()) return false;
+ if (!I->hasOneUse())
+ return false;
switch (I->getOpcode()) {
- default: return false;
+ default:
+ return false;
case Instruction::And:
case Instruction::Or:
case Instruction::Xor:
@@ -689,7 +693,8 @@ static Value *getShiftedValue(Value *V, unsigned NumBits, bool isLeftShift,
IC.addToWorklist(I);
switch (I->getOpcode()) {
- default: llvm_unreachable("Inconsistency with CanEvaluateShifted");
+ default:
+ llvm_unreachable("Inconsistency with CanEvaluateShifted");
case Instruction::And:
case Instruction::Or:
case Instruction::Xor:
@@ -727,8 +732,8 @@ static Value *getShiftedValue(Value *V, unsigned NumBits, bool isLeftShift,
IC.InsertNewInstWith(Neg, I->getIterator());
unsigned TypeWidth = I->getType()->getScalarSizeInBits();
APInt Mask = APInt::getLowBitsSet(TypeWidth, TypeWidth - NumBits);
- auto *And = BinaryOperator::CreateAnd(Neg,
- ConstantInt::get(I->getType(), Mask));
+ auto *And =
+ BinaryOperator::CreateAnd(Neg, ConstantInt::get(I->getType(), Mask));
And->takeName(I);
return IC.InsertNewInstWith(And, I->getIterator());
}
More information about the llvm-commits
mailing list