[llvm] a66051c - [InstCombine] Add oneuse checks to shr + cmp constant folds.

Thu Oct 26 11:36:17 PDT 2023

Author: Amara Emerson
Date: 2023-10-26T11:36:10-07:00
New Revision: a66051c68a43af39f9fd962f71d58ae0efcf860d

URL: https://github.com/llvm/llvm-project/commit/a66051c68a43af39f9fd962f71d58ae0efcf860d
DIFF: https://github.com/llvm/llvm-project/commit/a66051c68a43af39f9fd962f71d58ae0efcf860d.diff

LOG: [InstCombine] Add oneuse checks to shr + cmp constant folds.

This change has virtually no code size regressions on the llvm test suite (+ SPECs)
while having these improvements (measured with -Os on Darwin arm64):

External/S.../CFP2006/450.soplex/450.soplex    214024.00      213920.00     -0.0%
External/S...7speed/641.leela_s/641.leela_s     93412.00       93348.00     -0.1%
External/S...17rate/541.leela_r/541.leela_r     93412.00       93348.00     -0.1%
MultiSourc.../Applications/JM/lencod/lencod    426044.00      425748.00     -0.1%
MultiSourc...rks/mediabench/gsm/toast/toast     20436.00       20416.00     -0.1%
MultiSourc...ench/telecomm-gsm/telecomm-gsm     20436.00       20416.00     -0.1%
MultiSourc...Prolangs-C/assembler/assembler     16172.00       16156.00     -0.1%
MultiSourc...nch/mpeg2/mpeg2dec/mpeg2decode     35332.00       35256.00     -0.2%
SingleSour...Adobe-C++/stepanov_abstraction      6904.00        6888.00     -0.2%
External/SPEC/CINT2000/254.gap/254.gap         366060.00      365132.00     -0.3%
MultiSourc...-ProxyApps-C++/PENNANT/PENNANT     79688.00       79484.00     -0.3%
External/S...NT2006/464.h264ref/464.h264ref    352044.00      351132.00     -0.3%
SingleSour...arks/Adobe-C++/functionobjects     15524.00       15480.00     -0.3%
SingleSour...arks/Adobe-C++/stepanov_vector     10728.00       10696.00     -0.3%
SingleSour...ks/Misc-C++/stepanov_container     16900.00       16848.00     -0.3%
MultiSource/Applications/oggenc/oggenc         124184.00      123780.00     -0.3%
SingleSour...tout-C++/Shootout-C++-wordfreq      7060.00        7036.00     -0.3%
MultiSourc...ity-rijndael/security-rijndael      8976.00        8936.00     -0.4%
MultiSource/Benchmarks/McCat/18-imp/imp          9816.00        9772.00     -0.4%
SingleSour...chmarks/Misc-C++/stepanov_v1p2      1772.00        1764.00     -0.5%
MultiSourc...iabench/g721/g721encode/encode      5492.00        5464.00     -0.5%
MultiSourc...rks/McCat/03-testtrie/testtrie      1364.00        1344.00     -1.5%
SingleSour.../execute/GCC-C-execute-pr42833       400.00         364.00     -9.0%

Doing so also prevents a regression described in https://reviews.llvm.org/D143624

Differential Revision: https://reviews.llvm.org/D149918

Added: 
    

Modified: 
    llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
    llvm/test/Transforms/InstCombine/ashr-icmp-minmax-idiom-break.ll
    llvm/test/Transforms/InstCombine/icmp-shr-lt-gt.ll
    llvm/test/Transforms/PhaseOrdering/icmp-ashr-breaking-select-idiom.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
index a09c9b48be9d5b2..fd12b01736f3f84 100644

--- a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
@@ -2451,7 +2451,7 @@ Instruction *InstCombinerImpl::foldICmpShrConstant(ICmpInst &Cmp,
   // constant-value-based preconditions in the folds below, then we could assert
   // those conditions rather than checking them. This is 
diff icult because of
   // undef/poison (PR34838).
-  if (IsAShr) {
+  if (IsAShr && Shr->hasOneUse()) {
     if (IsExact || Pred == CmpInst::ICMP_SLT || Pred == CmpInst::ICMP_ULT) {
       // When ShAmtC can be shifted losslessly:
       // icmp PRED (ashr exact X, ShAmtC), C --> icmp PRED X, (C << ShAmtC)

diff  --git a/llvm/test/Transforms/InstCombine/ashr-icmp-minmax-idiom-break.ll b/llvm/test/Transforms/InstCombine/ashr-icmp-minmax-idiom-break.ll
index a539b9136689682..c6d6e916b2c7867 100644
--- a/llvm/test/Transforms/InstCombine/ashr-icmp-minmax-idiom-break.ll
+++ b/llvm/test/Transforms/InstCombine/ashr-icmp-minmax-idiom-break.ll
@@ -1,8 +1,8 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2
 ; RUN: opt < %s -passes=instcombine -S | FileCheck %s
 
-; This test is pre-committed to show sub-optimal codegen due to
-; min/max idiom breakage. On AArch64, these constants are also expensive to materialize,
+; Check we don't have sub-optimal codegen due to min/max idiom breakage.
+; On AArch64, these constants are also expensive to materialize,
 ; and therefore generate poor code vs maintaining the min/max idiom.
 
 define i64 @dont_break_minmax_i64(i64 %conv, i64 %conv2) {
@@ -10,8 +10,7 @@ define i64 @dont_break_minmax_i64(i64 %conv, i64 %conv2) {
 ; CHECK-SAME: (i64 [[CONV:%.*]], i64 [[CONV2:%.*]]) {
 ; CHECK-NEXT:    [[MUL:%.*]] = mul nsw i64 [[CONV]], [[CONV2]]
 ; CHECK-NEXT:    [[SHR:%.*]] = ashr i64 [[MUL]], 4
-; CHECK-NEXT:    [[CMP4_I:%.*]] = icmp slt i64 [[MUL]], 5579712
-; CHECK-NEXT:    [[SPEC_SELECT_I:%.*]] = select i1 [[CMP4_I]], i64 [[SHR]], i64 348731
+; CHECK-NEXT:    [[SPEC_SELECT_I:%.*]] = call i64 @llvm.smin.i64(i64 [[SHR]], i64 348731)
 ; CHECK-NEXT:    ret i64 [[SPEC_SELECT_I]]
 ;
   %mul = mul nsw i64 %conv, %conv2

diff  --git a/llvm/test/Transforms/InstCombine/icmp-shr-lt-gt.ll b/llvm/test/Transforms/InstCombine/icmp-shr-lt-gt.ll
index 08a763a50bf958d..538590e188c443f 100644
--- a/llvm/test/Transforms/InstCombine/icmp-shr-lt-gt.ll
+++ b/llvm/test/Transforms/InstCombine/icmp-shr-lt-gt.ll
@@ -900,7 +900,7 @@ define i1 @ashrsgt_01_00(i4 %x) {
 define i1 @ashrsgt_01_00_multiuse(i4 %x, ptr %p) {
 ; CHECK-LABEL: @ashrsgt_01_00_multiuse(
 ; CHECK-NEXT:    [[S:%.*]] = ashr i4 [[X:%.*]], 1
-; CHECK-NEXT:    [[C:%.*]] = icmp sgt i4 [[X]], 1
+; CHECK-NEXT:    [[C:%.*]] = icmp sgt i4 [[S]], 0
 ; CHECK-NEXT:    store i4 [[S]], ptr [[P:%.*]], align 1
 ; CHECK-NEXT:    ret i1 [[C]]
 ;

diff  --git a/llvm/test/Transforms/PhaseOrdering/icmp-ashr-breaking-select-idiom.ll b/llvm/test/Transforms/PhaseOrdering/icmp-ashr-breaking-select-idiom.ll
index 8559f973f281ace..67d721b23d6f008 100644
--- a/llvm/test/Transforms/PhaseOrdering/icmp-ashr-breaking-select-idiom.ll
+++ b/llvm/test/Transforms/PhaseOrdering/icmp-ashr-breaking-select-idiom.ll
@@ -5,8 +5,7 @@ define i32 @testa(i32 %mul) {
 ; CHECK-LABEL: define i32 @testa(
 ; CHECK-SAME: i32 [[MUL:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
 ; CHECK-NEXT:    [[SHR:%.*]] = ashr i32 [[MUL]], 15
-; CHECK-NEXT:    [[CMP4_I:%.*]] = icmp slt i32 [[MUL]], 1073741824
-; CHECK-NEXT:    [[SPEC_SELECT_I:%.*]] = select i1 [[CMP4_I]], i32 [[SHR]], i32 32767
+; CHECK-NEXT:    [[SPEC_SELECT_I:%.*]] = tail call i32 @llvm.smin.i32(i32 [[SHR]], i32 32767)
 ; CHECK-NEXT:    ret i32 [[SPEC_SELECT_I]]
 ;
   %shr = ashr i32 %mul, 15
@@ -20,11 +19,8 @@ define i32 @testb(i32 %mul) {
 ; CHECK-LABEL: define i32 @testb(
 ; CHECK-SAME: i32 [[MUL:%.*]]) local_unnamed_addr #[[ATTR0]] {
 ; CHECK-NEXT:    [[SHR102:%.*]] = ashr i32 [[MUL]], 7
-; CHECK-NEXT:    [[CMP4_I:%.*]] = icmp sgt i32 [[MUL]], 16383
-; CHECK-NEXT:    [[RETVAL_0_I:%.*]] = select i1 [[CMP4_I]], i32 127, i32 -128
-; CHECK-NEXT:    [[TMP1:%.*]] = add i32 [[MUL]], 16384
-; CHECK-NEXT:    [[CLEANUP_DEST_SLOT_0_I:%.*]] = icmp ult i32 [[TMP1]], 32768
-; CHECK-NEXT:    [[SPEC_SELECT_I:%.*]] = select i1 [[CLEANUP_DEST_SLOT_0_I]], i32 [[SHR102]], i32 [[RETVAL_0_I]]
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call i32 @llvm.smax.i32(i32 [[SHR102]], i32 -128)
+; CHECK-NEXT:    [[SPEC_SELECT_I:%.*]] = tail call i32 @llvm.smin.i32(i32 [[TMP1]], i32 127)
 ; CHECK-NEXT:    ret i32 [[SPEC_SELECT_I]]
 ;
   %shr102 = ashr i32 %mul, 7