[clang] [SCCP] [Transform] Adding ICMP folding for zext and sext in SCCPSolver (PR #67594)

LĂ©onard Oest O'Leary via cfe-commits cfe-commits at lists.llvm.org
Wed Sep 27 12:15:38 PDT 2023


https://github.com/leo-ard created https://github.com/llvm/llvm-project/pull/67594

This PR fixes #55013 : the max intrinsics is not generated for this simple loop case : https://godbolt.org/z/hxz1xhMPh. This is caused by a ICMP not being folded into a select, thus not generating the max intrinsics. 

Since LLVM 14, SCCP pass got smarter by folding sext into zext for positive ranges : https://reviews.llvm.org/D81756. After this change, InstCombine was sometimes unable to fold icmp correctly as both of the arguments pointed to mismatched zext/sext. To fix this, @rotateright implemented this fix : https://reviews.llvm.org/D124419 that tries to resolve the mismatch by knowing if the argument of a zext is positive (in which case, it is like a sext) by using ValueTracking. However, ValueTracking seems to be not smart enough for this case and cannot accurately know that the value is positive or not. This PR implements the folding in SCCP directly, where we have the knowledge that the value are positive or not.

This PR also contains test cases for sext/zext folding with SCCP as well as a x86 regression tests for the max/min case. 

>From 399f9d64cfde0761ac8278dd05ba704d879b1f5a Mon Sep 17 00:00:00 2001
From: leo-ard <lool4516 at gmail.com>
Date: Wed, 27 Sep 2023 13:35:53 -0400
Subject: [PATCH] Adding ICMP folding for SCCPSolver

---
 clang/test/CodeGen/X86/min_max.c              |  19 ++
 llvm/lib/Transforms/Utils/SCCPSolver.cpp      |  54 +++++
 .../Transforms/SCCP/icmp-fold-with-cast.ll    | 185 ++++++++++++++++++
 llvm/test/Transforms/SCCP/widening.ll         |   4 +-
 4 files changed, 260 insertions(+), 2 deletions(-)
 create mode 100644 clang/test/CodeGen/X86/min_max.c
 create mode 100644 llvm/test/Transforms/SCCP/icmp-fold-with-cast.ll

diff --git a/clang/test/CodeGen/X86/min_max.c b/clang/test/CodeGen/X86/min_max.c
new file mode 100644
index 000000000000000..7af8181cc9ff367
--- /dev/null
+++ b/clang/test/CodeGen/X86/min_max.c
@@ -0,0 +1,19 @@
+// RUN: %clang_cc1 %s -O2 -triple=x86_64-apple-darwin -emit-llvm -o - | FileCheck %s
+
+short vecreduce_smax_v2i16(int n, short* v)
+{
+  // CHECK: @llvm.smax
+  short p = 0;
+  for (int i = 0; i < n; ++i)
+    p = p < v[i] ? v[i] : p;
+  return p;
+}
+
+short vecreduce_smin_v2i16(int n, short* v)
+{
+  // CHECK: @llvm.smin
+  short p = 0;
+  for (int i = 0; i < n; ++i)
+    p = p > v[i] ? v[i] : p;
+  return p;
+}
\ No newline at end of file
diff --git a/llvm/lib/Transforms/Utils/SCCPSolver.cpp b/llvm/lib/Transforms/Utils/SCCPSolver.cpp
index 8a67fda7c98e787..09f64a925ab1b7c 100644
--- a/llvm/lib/Transforms/Utils/SCCPSolver.cpp
+++ b/llvm/lib/Transforms/Utils/SCCPSolver.cpp
@@ -193,6 +193,60 @@ static bool replaceSignedInst(SCCPSolver &Solver,
     NewInst = BinaryOperator::Create(NewOpcode, Op0, Op1, "", &Inst);
     break;
   }
+  case Instruction::ICmp: {
+    ICmpInst &ICmp = cast<ICmpInst>(Inst);
+  
+    ZExtInst *Op0_zext = dyn_cast<ZExtInst>(ICmp.getOperand(0));
+    SExtInst *Op0_sext = dyn_cast<SExtInst>(ICmp.getOperand(0));
+  
+    ZExtInst *Op1_zext = dyn_cast<ZExtInst>(ICmp.getOperand(1));
+    SExtInst *Op1_sext = dyn_cast<SExtInst>(ICmp.getOperand(1));
+  
+    CastInst *Op0;
+    CastInst *Op1;
+  
+    if (Op0_zext) Op0 = Op0_zext; else Op0 = Op0_sext;
+    if (Op1_zext) Op1 = Op1_zext; else Op1 = Op1_sext;
+  
+    bool reversed = false;
+  
+    if (!Op0 || !Op1){
+      // Op0 and Op1 must be defined
+      return false;
+    } 
+  
+    if (Op1_zext && (! Op0_zext)){
+      // We force Op0 to be a zext and reverse the arguments
+      //   at the end if we swap
+      reversed = true; 
+  
+      std::swap(Op0_zext, Op1_zext);
+      std::swap(Op0_sext, Op1_sext);
+      std::swap(Op0, Op1);
+    }
+  
+  
+    if(Op0->getType() != Op1->getType()){
+      // extensions are not of the same type
+      // This optimization is done in InstCombine
+      return false;
+    }
+  
+    // ICMP (sext X) (sext y) => ICMP X, Y
+    // ICMP (zext X) (zext y) => ICMP X, Y
+    // ICMP (zext X) (sext Y) => ICMP X, Y  if X >= 0 and ICMP signed
+    if((Op0_zext && Op1_zext)
+       || (Op0_sext && Op1_sext) 
+       || (ICmp.isSigned() && Op0_zext && Op1_sext && isNonNegative(Op0_zext->getOperand(0)))){
+  
+      Value *X = Op0->getOperand(0);
+      Value *Y = Op1->getOperand(0);
+      NewInst = CmpInst::Create(ICmp.getOpcode(), ICmp.getPredicate(), reversed ? Y : X, reversed ? X : Y, "", &Inst);
+      break;
+    }
+  
+    return false;
+  }
   default:
     return false;
   }
diff --git a/llvm/test/Transforms/SCCP/icmp-fold-with-cast.ll b/llvm/test/Transforms/SCCP/icmp-fold-with-cast.ll
new file mode 100644
index 000000000000000..90b2c123081fb49
--- /dev/null
+++ b/llvm/test/Transforms/SCCP/icmp-fold-with-cast.ll
@@ -0,0 +1,185 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --tool ./bin/opt --version 3
+; See PRXXX for more details
+; RUN: opt < %s -S -passes=ipsccp | FileCheck %s
+
+
+define signext i32 @sext_sext(i16 %x, i16 %y) {
+; CHECK-LABEL: define signext i32 @sext_sext(
+; CHECK-SAME: i16 [[X:%.*]], i16 [[Y:%.*]]) {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[CONV:%.*]] = sext i16 [[X]] to i32
+; CHECK-NEXT:    [[CONV1:%.*]] = sext i16 [[Y]] to i32
+; CHECK-NEXT:    [[CMP2:%.*]] = icmp sgt i16 [[X]], [[Y]]
+; CHECK-NEXT:    br i1 [[CMP2]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
+; CHECK:       cond.true:
+; CHECK-NEXT:    br label [[COND_END:%.*]]
+; CHECK:       cond.false:
+; CHECK-NEXT:    br label [[COND_END]]
+; CHECK:       cond.end:
+; CHECK-NEXT:    [[COND:%.*]] = phi i32 [ 0, [[COND_TRUE]] ], [ 1, [[COND_FALSE]] ]
+; CHECK-NEXT:    ret i32 [[COND]]
+;
+entry:
+  %conv = sext i16 %x to i32
+  %conv1 = sext i16 %y to i32
+  %cmp2 = icmp sgt i32 %conv, %conv1
+  br i1 %cmp2, label %cond.true, label %cond.false
+
+cond.true:                                        ; preds = %for.body
+  br label %cond.end
+
+cond.false:                                       ; preds = %for.body
+  br label %cond.end
+
+cond.end:                                         ; preds = %cond.false, %cond.true
+  %cond = phi i32 [ 0, %cond.true ], [ 1, %cond.false ]
+  ret i32 %cond
+}
+
+
+define signext i32 @zext_zext(i16 %x, i16 %y) {
+; CHECK-LABEL: define signext i32 @zext_zext(
+; CHECK-SAME: i16 [[X:%.*]], i16 [[Y:%.*]]) {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[CONV:%.*]] = zext i16 [[X]] to i32
+; CHECK-NEXT:    [[CONV1:%.*]] = zext i16 [[Y]] to i32
+; CHECK-NEXT:    [[CMP2:%.*]] = icmp sgt i16 [[X]], [[Y]]
+; CHECK-NEXT:    br i1 [[CMP2]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
+; CHECK:       cond.true:
+; CHECK-NEXT:    br label [[COND_END:%.*]]
+; CHECK:       cond.false:
+; CHECK-NEXT:    br label [[COND_END]]
+; CHECK:       cond.end:
+; CHECK-NEXT:    [[COND:%.*]] = phi i32 [ 0, [[COND_TRUE]] ], [ 1, [[COND_FALSE]] ]
+; CHECK-NEXT:    ret i32 [[COND]]
+;
+entry:
+  %conv = zext i16 %x to i32
+  %conv1 = zext i16 %y to i32
+  %cmp2 = icmp sgt i32 %conv, %conv1
+  br i1 %cmp2, label %cond.true, label %cond.false
+
+cond.true:                                        ; preds = %for.body
+  br label %cond.end
+
+cond.false:                                       ; preds = %for.body
+  br label %cond.end
+
+cond.end:                                         ; preds = %cond.false, %cond.true
+  %cond = phi i32 [ 0, %cond.true ], [ 1, %cond.false ]
+  ret i32 %cond
+}
+
+
+define signext i16 @zext_positive_and_sext(i32 noundef %n, ptr noundef %v) {
+; CHECK-LABEL: define signext i16 @zext_positive_and_sext(
+; CHECK-SAME: i32 noundef [[N:%.*]], ptr noundef [[V:%.*]]) {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br label [[FOR_COND:%.*]]
+; CHECK:       for.cond:
+; CHECK-NEXT:    [[P_0:%.*]] = phi i16 [ 0, [[ENTRY:%.*]] ], [ [[CONV8:%.*]], [[COND_END:%.*]] ]
+; CHECK-NEXT:    [[I_0:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[INC:%.*]], [[COND_END]] ]
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[I_0]], [[N]]
+; CHECK-NEXT:    br i1 [[CMP]], label [[FOR_BODY:%.*]], label [[FOR_COND_CLEANUP:%.*]]
+; CHECK:       for.body:
+; CHECK-NEXT:    [[CONV:%.*]] = zext i16 [[P_0]] to i32
+; CHECK-NEXT:    [[IDXPROM:%.*]] = sext i32 [[I_0]] to i64
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr i16, ptr [[V]], i64 [[IDXPROM]]
+; CHECK-NEXT:    [[TMP0:%.*]] = load i16, ptr [[ARRAYIDX]], align 2
+; CHECK-NEXT:    [[CONV1:%.*]] = sext i16 [[TMP0]] to i32
+; CHECK-NEXT:    [[CMP2:%.*]] = icmp slt i16 [[P_0]], [[TMP0]]
+; CHECK-NEXT:    br i1 [[CMP2]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
+; CHECK:       cond.true:
+; CHECK-NEXT:    br label [[COND_END]]
+; CHECK:       cond.false:
+; CHECK-NEXT:    br label [[COND_END]]
+; CHECK:       for.cond.cleanup:
+; CHECK-NEXT:    ret i16 [[P_0]]
+; CHECK:       cond.end:
+; CHECK-NEXT:    [[COND:%.*]] = phi i32 [ [[CONV1]], [[COND_TRUE]] ], [ [[CONV]], [[COND_FALSE]] ]
+; CHECK-NEXT:    [[CONV8]] = trunc i32 [[COND]] to i16
+; CHECK-NEXT:    [[INC]] = add nsw i32 [[I_0]], 1
+; CHECK-NEXT:    br label [[FOR_COND]]
+;
+entry:
+  br label %for.cond
+
+for.cond:                                         ; preds = %cond.end, %entry
+  %p.0 = phi i16 [ 0, %entry ], [ %conv8, %cond.end ]
+  %i.0 = phi i32 [ 0, %entry ], [ %inc, %cond.end ]
+  %cmp = icmp slt i32 %i.0, %n
+  br i1 %cmp, label %for.body, label %for.cond.cleanup
+
+for.body:                                         ; preds = %for.cond
+  %conv = zext i16 %p.0 to i32                    ;; %p.0 is always positive here
+  %idxprom = sext i32 %i.0 to i64
+  %arrayidx = getelementptr i16, ptr %v, i64 %idxprom
+  %0 = load i16, ptr %arrayidx, align 2
+  %conv1 = sext i16 %0 to i32
+  %cmp2 = icmp slt i32 %conv, %conv1
+  br i1 %cmp2, label %cond.true, label %cond.false
+
+cond.true:                                        ; preds = %for.body
+  br label %cond.end
+
+cond.false:                                       ; preds = %for.body
+  br label %cond.end
+
+for.cond.cleanup:                                 ; preds = %for.cond
+  ret i16 %p.0
+
+cond.end:                                         ; preds = %cond.false, %cond.true
+  %cond = phi i32 [ %conv1, %cond.true ], [ %conv, %cond.false ]
+  %conv8 = trunc i32 %cond to i16
+  %inc = add nsw i32 %i.0, 1
+  br label %for.cond
+}
+
+
+
+define signext i16 @sext_and_zext_positive(i16 %x) {
+; CHECK-LABEL: define signext i16 @sext_and_zext_positive(
+; CHECK-SAME: i16 [[X:%.*]]) {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br label [[FOR_COND:%.*]]
+; CHECK:       for.cond:
+; CHECK-NEXT:    [[V:%.*]] = phi i16 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[COND_END:%.*]] ]
+; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
+; CHECK:       for.body:
+; CHECK-NEXT:    [[CONV:%.*]] = zext i16 [[V]] to i32
+; CHECK-NEXT:    [[CONV1:%.*]] = sext i16 [[X]] to i32
+; CHECK-NEXT:    [[CMP2:%.*]] = icmp slt i16 [[X]], [[V]]
+; CHECK-NEXT:    br i1 [[CMP2]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
+; CHECK:       cond.true:
+; CHECK-NEXT:    br label [[COND_END]]
+; CHECK:       cond.false:
+; CHECK-NEXT:    br label [[COND_END]]
+; CHECK:       cond.end:
+; CHECK-NEXT:    [[A:%.*]] = phi i16 [ 10, [[COND_TRUE]] ], [ 20, [[COND_FALSE]] ]
+; CHECK-NEXT:    [[INC]] = add nuw nsw i16 [[A]], 1
+; CHECK-NEXT:    br label [[FOR_COND]]
+;
+entry:
+  br label %for.cond
+
+for.cond:                                         ; preds = %cond.end, %entry
+  %v = phi i16 [ 0, %entry ], [ %inc, %cond.end ] ;; always positive
+  br label %for.body
+
+for.body:                                         ; preds = %for.cond
+  %conv = zext i16 %v to i32                    ;; %p.0 is always positive here
+  %conv1 = sext i16 %x to i32                    ;; %p.0 is always positive here
+  %cmp2 = icmp slt i32 %conv1, %conv ;; positive/positive
+  br i1 %cmp2, label %cond.true, label %cond.false
+
+cond.true:                                        ; preds = %for.body
+  br label %cond.end
+
+cond.false:                                       ; preds = %for.body
+  br label %cond.end
+
+cond.end:                                         ; preds = %cond.false, %cond.true
+  %a = phi i16 [ 10, %cond.true ],  [ 20, %cond.false ]
+  %inc = add i16 %a, 1
+  br label %for.cond
+}
diff --git a/llvm/test/Transforms/SCCP/widening.ll b/llvm/test/Transforms/SCCP/widening.ll
index f482ed3a4e7f655..84adf77c621e1ff 100644
--- a/llvm/test/Transforms/SCCP/widening.ll
+++ b/llvm/test/Transforms/SCCP/widening.ll
@@ -653,7 +653,7 @@ define ptr @wobble(ptr %arg, i32 %arg1) align 2 {
 ; SCCP-NEXT:    [[TMP52:%.*]] = call dereferenceable(1) ptr @spam(ptr [[ARG]], i32 [[TMP51]])
 ; SCCP-NEXT:    [[TMP53:%.*]] = load i8, ptr [[TMP52]], align 1
 ; SCCP-NEXT:    [[TMP54:%.*]] = zext i8 [[TMP53]] to i32
-; SCCP-NEXT:    [[TMP55:%.*]] = icmp sgt i32 [[TMP48]], [[TMP54]]
+; SCCP-NEXT:    [[TMP55:%.*]] = icmp sgt i8 [[TMP47]], [[TMP53]]
 ; SCCP-NEXT:    br i1 [[TMP55]], label [[BB56:%.*]], label [[BB60:%.*]]
 ; SCCP:       bb56:
 ; SCCP-NEXT:    [[TMP57:%.*]] = add nsw i32 [[TMP40]], -1
@@ -734,7 +734,7 @@ define ptr @wobble(ptr %arg, i32 %arg1) align 2 {
 ; IPSCCP-NEXT:    [[TMP52:%.*]] = call dereferenceable(1) ptr @spam(ptr [[ARG]], i32 [[TMP51]])
 ; IPSCCP-NEXT:    [[TMP53:%.*]] = load i8, ptr [[TMP52]], align 1
 ; IPSCCP-NEXT:    [[TMP54:%.*]] = zext i8 [[TMP53]] to i32
-; IPSCCP-NEXT:    [[TMP55:%.*]] = icmp sgt i32 [[TMP48]], [[TMP54]]
+; IPSCCP-NEXT:    [[TMP55:%.*]] = icmp sgt i8 [[TMP47]], [[TMP53]]
 ; IPSCCP-NEXT:    br i1 [[TMP55]], label [[BB56:%.*]], label [[BB60:%.*]]
 ; IPSCCP:       bb56:
 ; IPSCCP-NEXT:    br label [[BB60]]



More information about the cfe-commits mailing list