[llvm] [SCCP] [Transform] Adding ICMP folding for zext and sext in SCCPSolver (PR #67594)

LĂ©onard Oest O'Leary via llvm-commits llvm-commits at lists.llvm.org
Wed Sep 27 12:43:10 PDT 2023


https://github.com/leo-ard updated https://github.com/llvm/llvm-project/pull/67594

>From 399f9d64cfde0761ac8278dd05ba704d879b1f5a Mon Sep 17 00:00:00 2001
From: leo-ard <lool4516 at gmail.com>
Date: Wed, 27 Sep 2023 13:35:53 -0400
Subject: [PATCH 1/2] Adding ICMP folding for SCCPSolver

---
 clang/test/CodeGen/X86/min_max.c              |  19 ++
 llvm/lib/Transforms/Utils/SCCPSolver.cpp      |  54 +++++
 .../Transforms/SCCP/icmp-fold-with-cast.ll    | 185 ++++++++++++++++++
 llvm/test/Transforms/SCCP/widening.ll         |   4 +-
 4 files changed, 260 insertions(+), 2 deletions(-)
 create mode 100644 clang/test/CodeGen/X86/min_max.c
 create mode 100644 llvm/test/Transforms/SCCP/icmp-fold-with-cast.ll

diff --git a/clang/test/CodeGen/X86/min_max.c b/clang/test/CodeGen/X86/min_max.c
new file mode 100644
index 000000000000000..7af8181cc9ff367
--- /dev/null
+++ b/clang/test/CodeGen/X86/min_max.c
@@ -0,0 +1,19 @@
+// RUN: %clang_cc1 %s -O2 -triple=x86_64-apple-darwin -emit-llvm -o - | FileCheck %s
+
+short vecreduce_smax_v2i16(int n, short* v)
+{
+  // CHECK: @llvm.smax
+  short p = 0;
+  for (int i = 0; i < n; ++i)
+    p = p < v[i] ? v[i] : p;
+  return p;
+}
+
+short vecreduce_smin_v2i16(int n, short* v)
+{
+  // CHECK: @llvm.smin
+  short p = 0;
+  for (int i = 0; i < n; ++i)
+    p = p > v[i] ? v[i] : p;
+  return p;
+}
\ No newline at end of file
diff --git a/llvm/lib/Transforms/Utils/SCCPSolver.cpp b/llvm/lib/Transforms/Utils/SCCPSolver.cpp
index 8a67fda7c98e787..09f64a925ab1b7c 100644
--- a/llvm/lib/Transforms/Utils/SCCPSolver.cpp
+++ b/llvm/lib/Transforms/Utils/SCCPSolver.cpp
@@ -193,6 +193,60 @@ static bool replaceSignedInst(SCCPSolver &Solver,
     NewInst = BinaryOperator::Create(NewOpcode, Op0, Op1, "", &Inst);
     break;
   }
+  case Instruction::ICmp: {
+    ICmpInst &ICmp = cast<ICmpInst>(Inst);
+  
+    ZExtInst *Op0_zext = dyn_cast<ZExtInst>(ICmp.getOperand(0));
+    SExtInst *Op0_sext = dyn_cast<SExtInst>(ICmp.getOperand(0));
+  
+    ZExtInst *Op1_zext = dyn_cast<ZExtInst>(ICmp.getOperand(1));
+    SExtInst *Op1_sext = dyn_cast<SExtInst>(ICmp.getOperand(1));
+  
+    CastInst *Op0;
+    CastInst *Op1;
+  
+    if (Op0_zext) Op0 = Op0_zext; else Op0 = Op0_sext;
+    if (Op1_zext) Op1 = Op1_zext; else Op1 = Op1_sext;
+  
+    bool reversed = false;
+  
+    if (!Op0 || !Op1){
+      // Op0 and Op1 must be defined
+      return false;
+    } 
+  
+    if (Op1_zext && (! Op0_zext)){
+      // We force Op0 to be a zext and reverse the arguments
+      //   at the end if we swap
+      reversed = true; 
+  
+      std::swap(Op0_zext, Op1_zext);
+      std::swap(Op0_sext, Op1_sext);
+      std::swap(Op0, Op1);
+    }
+  
+  
+    if(Op0->getType() != Op1->getType()){
+      // extensions are not of the same type
+      // This optimization is done in InstCombine
+      return false;
+    }
+  
+    // ICMP (sext X) (sext y) => ICMP X, Y
+    // ICMP (zext X) (zext y) => ICMP X, Y
+    // ICMP (zext X) (sext Y) => ICMP X, Y  if X >= 0 and ICMP signed
+    if((Op0_zext && Op1_zext)
+       || (Op0_sext && Op1_sext) 
+       || (ICmp.isSigned() && Op0_zext && Op1_sext && isNonNegative(Op0_zext->getOperand(0)))){
+  
+      Value *X = Op0->getOperand(0);
+      Value *Y = Op1->getOperand(0);
+      NewInst = CmpInst::Create(ICmp.getOpcode(), ICmp.getPredicate(), reversed ? Y : X, reversed ? X : Y, "", &Inst);
+      break;
+    }
+  
+    return false;
+  }
   default:
     return false;
   }
diff --git a/llvm/test/Transforms/SCCP/icmp-fold-with-cast.ll b/llvm/test/Transforms/SCCP/icmp-fold-with-cast.ll
new file mode 100644
index 000000000000000..90b2c123081fb49
--- /dev/null
+++ b/llvm/test/Transforms/SCCP/icmp-fold-with-cast.ll
@@ -0,0 +1,185 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --tool ./bin/opt --version 3
+; See PRXXX for more details
+; RUN: opt < %s -S -passes=ipsccp | FileCheck %s
+
+
+define signext i32 @sext_sext(i16 %x, i16 %y) {
+; CHECK-LABEL: define signext i32 @sext_sext(
+; CHECK-SAME: i16 [[X:%.*]], i16 [[Y:%.*]]) {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[CONV:%.*]] = sext i16 [[X]] to i32
+; CHECK-NEXT:    [[CONV1:%.*]] = sext i16 [[Y]] to i32
+; CHECK-NEXT:    [[CMP2:%.*]] = icmp sgt i16 [[X]], [[Y]]
+; CHECK-NEXT:    br i1 [[CMP2]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
+; CHECK:       cond.true:
+; CHECK-NEXT:    br label [[COND_END:%.*]]
+; CHECK:       cond.false:
+; CHECK-NEXT:    br label [[COND_END]]
+; CHECK:       cond.end:
+; CHECK-NEXT:    [[COND:%.*]] = phi i32 [ 0, [[COND_TRUE]] ], [ 1, [[COND_FALSE]] ]
+; CHECK-NEXT:    ret i32 [[COND]]
+;
+entry:
+  %conv = sext i16 %x to i32
+  %conv1 = sext i16 %y to i32
+  %cmp2 = icmp sgt i32 %conv, %conv1
+  br i1 %cmp2, label %cond.true, label %cond.false
+
+cond.true:                                        ; preds = %for.body
+  br label %cond.end
+
+cond.false:                                       ; preds = %for.body
+  br label %cond.end
+
+cond.end:                                         ; preds = %cond.false, %cond.true
+  %cond = phi i32 [ 0, %cond.true ], [ 1, %cond.false ]
+  ret i32 %cond
+}
+
+
+define signext i32 @zext_zext(i16 %x, i16 %y) {
+; CHECK-LABEL: define signext i32 @zext_zext(
+; CHECK-SAME: i16 [[X:%.*]], i16 [[Y:%.*]]) {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[CONV:%.*]] = zext i16 [[X]] to i32
+; CHECK-NEXT:    [[CONV1:%.*]] = zext i16 [[Y]] to i32
+; CHECK-NEXT:    [[CMP2:%.*]] = icmp sgt i16 [[X]], [[Y]]
+; CHECK-NEXT:    br i1 [[CMP2]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
+; CHECK:       cond.true:
+; CHECK-NEXT:    br label [[COND_END:%.*]]
+; CHECK:       cond.false:
+; CHECK-NEXT:    br label [[COND_END]]
+; CHECK:       cond.end:
+; CHECK-NEXT:    [[COND:%.*]] = phi i32 [ 0, [[COND_TRUE]] ], [ 1, [[COND_FALSE]] ]
+; CHECK-NEXT:    ret i32 [[COND]]
+;
+entry:
+  %conv = zext i16 %x to i32
+  %conv1 = zext i16 %y to i32
+  %cmp2 = icmp sgt i32 %conv, %conv1
+  br i1 %cmp2, label %cond.true, label %cond.false
+
+cond.true:                                        ; preds = %for.body
+  br label %cond.end
+
+cond.false:                                       ; preds = %for.body
+  br label %cond.end
+
+cond.end:                                         ; preds = %cond.false, %cond.true
+  %cond = phi i32 [ 0, %cond.true ], [ 1, %cond.false ]
+  ret i32 %cond
+}
+
+
+define signext i16 @zext_positive_and_sext(i32 noundef %n, ptr noundef %v) {
+; CHECK-LABEL: define signext i16 @zext_positive_and_sext(
+; CHECK-SAME: i32 noundef [[N:%.*]], ptr noundef [[V:%.*]]) {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br label [[FOR_COND:%.*]]
+; CHECK:       for.cond:
+; CHECK-NEXT:    [[P_0:%.*]] = phi i16 [ 0, [[ENTRY:%.*]] ], [ [[CONV8:%.*]], [[COND_END:%.*]] ]
+; CHECK-NEXT:    [[I_0:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[INC:%.*]], [[COND_END]] ]
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[I_0]], [[N]]
+; CHECK-NEXT:    br i1 [[CMP]], label [[FOR_BODY:%.*]], label [[FOR_COND_CLEANUP:%.*]]
+; CHECK:       for.body:
+; CHECK-NEXT:    [[CONV:%.*]] = zext i16 [[P_0]] to i32
+; CHECK-NEXT:    [[IDXPROM:%.*]] = sext i32 [[I_0]] to i64
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr i16, ptr [[V]], i64 [[IDXPROM]]
+; CHECK-NEXT:    [[TMP0:%.*]] = load i16, ptr [[ARRAYIDX]], align 2
+; CHECK-NEXT:    [[CONV1:%.*]] = sext i16 [[TMP0]] to i32
+; CHECK-NEXT:    [[CMP2:%.*]] = icmp slt i16 [[P_0]], [[TMP0]]
+; CHECK-NEXT:    br i1 [[CMP2]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
+; CHECK:       cond.true:
+; CHECK-NEXT:    br label [[COND_END]]
+; CHECK:       cond.false:
+; CHECK-NEXT:    br label [[COND_END]]
+; CHECK:       for.cond.cleanup:
+; CHECK-NEXT:    ret i16 [[P_0]]
+; CHECK:       cond.end:
+; CHECK-NEXT:    [[COND:%.*]] = phi i32 [ [[CONV1]], [[COND_TRUE]] ], [ [[CONV]], [[COND_FALSE]] ]
+; CHECK-NEXT:    [[CONV8]] = trunc i32 [[COND]] to i16
+; CHECK-NEXT:    [[INC]] = add nsw i32 [[I_0]], 1
+; CHECK-NEXT:    br label [[FOR_COND]]
+;
+entry:
+  br label %for.cond
+
+for.cond:                                         ; preds = %cond.end, %entry
+  %p.0 = phi i16 [ 0, %entry ], [ %conv8, %cond.end ]
+  %i.0 = phi i32 [ 0, %entry ], [ %inc, %cond.end ]
+  %cmp = icmp slt i32 %i.0, %n
+  br i1 %cmp, label %for.body, label %for.cond.cleanup
+
+for.body:                                         ; preds = %for.cond
+  %conv = zext i16 %p.0 to i32                    ;; %p.0 is always positive here
+  %idxprom = sext i32 %i.0 to i64
+  %arrayidx = getelementptr i16, ptr %v, i64 %idxprom
+  %0 = load i16, ptr %arrayidx, align 2
+  %conv1 = sext i16 %0 to i32
+  %cmp2 = icmp slt i32 %conv, %conv1
+  br i1 %cmp2, label %cond.true, label %cond.false
+
+cond.true:                                        ; preds = %for.body
+  br label %cond.end
+
+cond.false:                                       ; preds = %for.body
+  br label %cond.end
+
+for.cond.cleanup:                                 ; preds = %for.cond
+  ret i16 %p.0
+
+cond.end:                                         ; preds = %cond.false, %cond.true
+  %cond = phi i32 [ %conv1, %cond.true ], [ %conv, %cond.false ]
+  %conv8 = trunc i32 %cond to i16
+  %inc = add nsw i32 %i.0, 1
+  br label %for.cond
+}
+
+
+
+define signext i16 @sext_and_zext_positive(i16 %x) {
+; CHECK-LABEL: define signext i16 @sext_and_zext_positive(
+; CHECK-SAME: i16 [[X:%.*]]) {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br label [[FOR_COND:%.*]]
+; CHECK:       for.cond:
+; CHECK-NEXT:    [[V:%.*]] = phi i16 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[COND_END:%.*]] ]
+; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
+; CHECK:       for.body:
+; CHECK-NEXT:    [[CONV:%.*]] = zext i16 [[V]] to i32
+; CHECK-NEXT:    [[CONV1:%.*]] = sext i16 [[X]] to i32
+; CHECK-NEXT:    [[CMP2:%.*]] = icmp slt i16 [[X]], [[V]]
+; CHECK-NEXT:    br i1 [[CMP2]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
+; CHECK:       cond.true:
+; CHECK-NEXT:    br label [[COND_END]]
+; CHECK:       cond.false:
+; CHECK-NEXT:    br label [[COND_END]]
+; CHECK:       cond.end:
+; CHECK-NEXT:    [[A:%.*]] = phi i16 [ 10, [[COND_TRUE]] ], [ 20, [[COND_FALSE]] ]
+; CHECK-NEXT:    [[INC]] = add nuw nsw i16 [[A]], 1
+; CHECK-NEXT:    br label [[FOR_COND]]
+;
+entry:
+  br label %for.cond
+
+for.cond:                                         ; preds = %cond.end, %entry
+  %v = phi i16 [ 0, %entry ], [ %inc, %cond.end ] ;; always positive
+  br label %for.body
+
+for.body:                                         ; preds = %for.cond
+  %conv = zext i16 %v to i32                    ;; %p.0 is always positive here
+  %conv1 = sext i16 %x to i32                    ;; %p.0 is always positive here
+  %cmp2 = icmp slt i32 %conv1, %conv ;; positive/positive
+  br i1 %cmp2, label %cond.true, label %cond.false
+
+cond.true:                                        ; preds = %for.body
+  br label %cond.end
+
+cond.false:                                       ; preds = %for.body
+  br label %cond.end
+
+cond.end:                                         ; preds = %cond.false, %cond.true
+  %a = phi i16 [ 10, %cond.true ],  [ 20, %cond.false ]
+  %inc = add i16 %a, 1
+  br label %for.cond
+}
diff --git a/llvm/test/Transforms/SCCP/widening.ll b/llvm/test/Transforms/SCCP/widening.ll
index f482ed3a4e7f655..84adf77c621e1ff 100644
--- a/llvm/test/Transforms/SCCP/widening.ll
+++ b/llvm/test/Transforms/SCCP/widening.ll
@@ -653,7 +653,7 @@ define ptr @wobble(ptr %arg, i32 %arg1) align 2 {
 ; SCCP-NEXT:    [[TMP52:%.*]] = call dereferenceable(1) ptr @spam(ptr [[ARG]], i32 [[TMP51]])
 ; SCCP-NEXT:    [[TMP53:%.*]] = load i8, ptr [[TMP52]], align 1
 ; SCCP-NEXT:    [[TMP54:%.*]] = zext i8 [[TMP53]] to i32
-; SCCP-NEXT:    [[TMP55:%.*]] = icmp sgt i32 [[TMP48]], [[TMP54]]
+; SCCP-NEXT:    [[TMP55:%.*]] = icmp sgt i8 [[TMP47]], [[TMP53]]
 ; SCCP-NEXT:    br i1 [[TMP55]], label [[BB56:%.*]], label [[BB60:%.*]]
 ; SCCP:       bb56:
 ; SCCP-NEXT:    [[TMP57:%.*]] = add nsw i32 [[TMP40]], -1
@@ -734,7 +734,7 @@ define ptr @wobble(ptr %arg, i32 %arg1) align 2 {
 ; IPSCCP-NEXT:    [[TMP52:%.*]] = call dereferenceable(1) ptr @spam(ptr [[ARG]], i32 [[TMP51]])
 ; IPSCCP-NEXT:    [[TMP53:%.*]] = load i8, ptr [[TMP52]], align 1
 ; IPSCCP-NEXT:    [[TMP54:%.*]] = zext i8 [[TMP53]] to i32
-; IPSCCP-NEXT:    [[TMP55:%.*]] = icmp sgt i32 [[TMP48]], [[TMP54]]
+; IPSCCP-NEXT:    [[TMP55:%.*]] = icmp sgt i8 [[TMP47]], [[TMP53]]
 ; IPSCCP-NEXT:    br i1 [[TMP55]], label [[BB56:%.*]], label [[BB60:%.*]]
 ; IPSCCP:       bb56:
 ; IPSCCP-NEXT:    br label [[BB60]]

>From 6e972aa4b4dd7cd6b2d8f2af95333f67f820df35 Mon Sep 17 00:00:00 2001
From: leo-ard <lool4516 at gmail.com>
Date: Wed, 27 Sep 2023 15:39:49 -0400
Subject: [PATCH 2/2] Format SCCPSolver according to clang-format

---
 llvm/lib/Transforms/Utils/SCCPSolver.cpp | 54 +++++++++++++-----------
 1 file changed, 30 insertions(+), 24 deletions(-)

diff --git a/llvm/lib/Transforms/Utils/SCCPSolver.cpp b/llvm/lib/Transforms/Utils/SCCPSolver.cpp
index 09f64a925ab1b7c..0038e8866c2ce62 100644
--- a/llvm/lib/Transforms/Utils/SCCPSolver.cpp
+++ b/llvm/lib/Transforms/Utils/SCCPSolver.cpp
@@ -195,56 +195,62 @@ static bool replaceSignedInst(SCCPSolver &Solver,
   }
   case Instruction::ICmp: {
     ICmpInst &ICmp = cast<ICmpInst>(Inst);
-  
+
     ZExtInst *Op0_zext = dyn_cast<ZExtInst>(ICmp.getOperand(0));
     SExtInst *Op0_sext = dyn_cast<SExtInst>(ICmp.getOperand(0));
-  
+
     ZExtInst *Op1_zext = dyn_cast<ZExtInst>(ICmp.getOperand(1));
     SExtInst *Op1_sext = dyn_cast<SExtInst>(ICmp.getOperand(1));
-  
+
     CastInst *Op0;
     CastInst *Op1;
-  
-    if (Op0_zext) Op0 = Op0_zext; else Op0 = Op0_sext;
-    if (Op1_zext) Op1 = Op1_zext; else Op1 = Op1_sext;
-  
+
+    if (Op0_zext)
+      Op0 = Op0_zext;
+    else
+      Op0 = Op0_sext;
+    if (Op1_zext)
+      Op1 = Op1_zext;
+    else
+      Op1 = Op1_sext;
+
     bool reversed = false;
-  
-    if (!Op0 || !Op1){
+
+    if (!Op0 || !Op1) {
       // Op0 and Op1 must be defined
       return false;
-    } 
-  
-    if (Op1_zext && (! Op0_zext)){
+    }
+
+    if (Op1_zext && (!Op0_zext)) {
       // We force Op0 to be a zext and reverse the arguments
       //   at the end if we swap
-      reversed = true; 
-  
+      reversed = true;
+
       std::swap(Op0_zext, Op1_zext);
       std::swap(Op0_sext, Op1_sext);
       std::swap(Op0, Op1);
     }
-  
-  
-    if(Op0->getType() != Op1->getType()){
+
+    if (Op0->getType() != Op1->getType()) {
       // extensions are not of the same type
       // This optimization is done in InstCombine
       return false;
     }
-  
+
     // ICMP (sext X) (sext y) => ICMP X, Y
     // ICMP (zext X) (zext y) => ICMP X, Y
     // ICMP (zext X) (sext Y) => ICMP X, Y  if X >= 0 and ICMP signed
-    if((Op0_zext && Op1_zext)
-       || (Op0_sext && Op1_sext) 
-       || (ICmp.isSigned() && Op0_zext && Op1_sext && isNonNegative(Op0_zext->getOperand(0)))){
-  
+    if ((Op0_zext && Op1_zext) || (Op0_sext && Op1_sext) ||
+        (ICmp.isSigned() && Op0_zext && Op1_sext &&
+         isNonNegative(Op0_zext->getOperand(0)))) {
+
       Value *X = Op0->getOperand(0);
       Value *Y = Op1->getOperand(0);
-      NewInst = CmpInst::Create(ICmp.getOpcode(), ICmp.getPredicate(), reversed ? Y : X, reversed ? X : Y, "", &Inst);
+      NewInst = CmpInst::Create(ICmp.getOpcode(), ICmp.getPredicate(),
+                                reversed ? Y : X, reversed ? X : Y, "", &Inst);
       break;
     }
-  
+
     return false;
   }
   default:



More information about the llvm-commits mailing list