[llvm] [SCCP] [Transform] Adding ICMP folding for zext and sext in SCCPSolver (PR #67594)
LĂ©onard Oest O'Leary via llvm-commits
llvm-commits at lists.llvm.org
Wed Sep 27 12:43:10 PDT 2023
https://github.com/leo-ard updated https://github.com/llvm/llvm-project/pull/67594
>From 399f9d64cfde0761ac8278dd05ba704d879b1f5a Mon Sep 17 00:00:00 2001
From: leo-ard <lool4516 at gmail.com>
Date: Wed, 27 Sep 2023 13:35:53 -0400
Subject: [PATCH 1/2] Adding ICMP folding for SCCPSolver
---
clang/test/CodeGen/X86/min_max.c | 19 ++
llvm/lib/Transforms/Utils/SCCPSolver.cpp | 54 +++++
.../Transforms/SCCP/icmp-fold-with-cast.ll | 185 ++++++++++++++++++
llvm/test/Transforms/SCCP/widening.ll | 4 +-
4 files changed, 260 insertions(+), 2 deletions(-)
create mode 100644 clang/test/CodeGen/X86/min_max.c
create mode 100644 llvm/test/Transforms/SCCP/icmp-fold-with-cast.ll
diff --git a/clang/test/CodeGen/X86/min_max.c b/clang/test/CodeGen/X86/min_max.c
new file mode 100644
index 000000000000000..7af8181cc9ff367
--- /dev/null
+++ b/clang/test/CodeGen/X86/min_max.c
@@ -0,0 +1,19 @@
+// RUN: %clang_cc1 %s -O2 -triple=x86_64-apple-darwin -emit-llvm -o - | FileCheck %s
+
+short vecreduce_smax_v2i16(int n, short* v)
+{
+ // CHECK: @llvm.smax
+ short p = 0;
+ for (int i = 0; i < n; ++i)
+ p = p < v[i] ? v[i] : p;
+ return p;
+}
+
+short vecreduce_smin_v2i16(int n, short* v)
+{
+ // CHECK: @llvm.smin
+ short p = 0;
+ for (int i = 0; i < n; ++i)
+ p = p > v[i] ? v[i] : p;
+ return p;
+}
\ No newline at end of file
diff --git a/llvm/lib/Transforms/Utils/SCCPSolver.cpp b/llvm/lib/Transforms/Utils/SCCPSolver.cpp
index 8a67fda7c98e787..09f64a925ab1b7c 100644
--- a/llvm/lib/Transforms/Utils/SCCPSolver.cpp
+++ b/llvm/lib/Transforms/Utils/SCCPSolver.cpp
@@ -193,6 +193,60 @@ static bool replaceSignedInst(SCCPSolver &Solver,
NewInst = BinaryOperator::Create(NewOpcode, Op0, Op1, "", &Inst);
break;
}
+ case Instruction::ICmp: {
+ ICmpInst &ICmp = cast<ICmpInst>(Inst);
+
+ ZExtInst *Op0_zext = dyn_cast<ZExtInst>(ICmp.getOperand(0));
+ SExtInst *Op0_sext = dyn_cast<SExtInst>(ICmp.getOperand(0));
+
+ ZExtInst *Op1_zext = dyn_cast<ZExtInst>(ICmp.getOperand(1));
+ SExtInst *Op1_sext = dyn_cast<SExtInst>(ICmp.getOperand(1));
+
+ CastInst *Op0;
+ CastInst *Op1;
+
+ if (Op0_zext) Op0 = Op0_zext; else Op0 = Op0_sext;
+ if (Op1_zext) Op1 = Op1_zext; else Op1 = Op1_sext;
+
+ bool reversed = false;
+
+ if (!Op0 || !Op1){
+ // Op0 and Op1 must be defined
+ return false;
+ }
+
+ if (Op1_zext && (! Op0_zext)){
+ // We force Op0 to be a zext and reverse the arguments
+ // at the end if we swap
+ reversed = true;
+
+ std::swap(Op0_zext, Op1_zext);
+ std::swap(Op0_sext, Op1_sext);
+ std::swap(Op0, Op1);
+ }
+
+
+ if(Op0->getType() != Op1->getType()){
+ // extensions are not of the same type
+ // This optimization is done in InstCombine
+ return false;
+ }
+
+ // ICMP (sext X) (sext y) => ICMP X, Y
+ // ICMP (zext X) (zext y) => ICMP X, Y
+ // ICMP (zext X) (sext Y) => ICMP X, Y if X >= 0 and ICMP signed
+ if((Op0_zext && Op1_zext)
+ || (Op0_sext && Op1_sext)
+ || (ICmp.isSigned() && Op0_zext && Op1_sext && isNonNegative(Op0_zext->getOperand(0)))){
+
+ Value *X = Op0->getOperand(0);
+ Value *Y = Op1->getOperand(0);
+ NewInst = CmpInst::Create(ICmp.getOpcode(), ICmp.getPredicate(), reversed ? Y : X, reversed ? X : Y, "", &Inst);
+ break;
+ }
+
+ return false;
+ }
default:
return false;
}
diff --git a/llvm/test/Transforms/SCCP/icmp-fold-with-cast.ll b/llvm/test/Transforms/SCCP/icmp-fold-with-cast.ll
new file mode 100644
index 000000000000000..90b2c123081fb49
--- /dev/null
+++ b/llvm/test/Transforms/SCCP/icmp-fold-with-cast.ll
@@ -0,0 +1,185 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --tool ./bin/opt --version 3
+; See PRXXX for more details
+; RUN: opt < %s -S -passes=ipsccp | FileCheck %s
+
+
+define signext i32 @sext_sext(i16 %x, i16 %y) {
+; CHECK-LABEL: define signext i32 @sext_sext(
+; CHECK-SAME: i16 [[X:%.*]], i16 [[Y:%.*]]) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[CONV:%.*]] = sext i16 [[X]] to i32
+; CHECK-NEXT: [[CONV1:%.*]] = sext i16 [[Y]] to i32
+; CHECK-NEXT: [[CMP2:%.*]] = icmp sgt i16 [[X]], [[Y]]
+; CHECK-NEXT: br i1 [[CMP2]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
+; CHECK: cond.true:
+; CHECK-NEXT: br label [[COND_END:%.*]]
+; CHECK: cond.false:
+; CHECK-NEXT: br label [[COND_END]]
+; CHECK: cond.end:
+; CHECK-NEXT: [[COND:%.*]] = phi i32 [ 0, [[COND_TRUE]] ], [ 1, [[COND_FALSE]] ]
+; CHECK-NEXT: ret i32 [[COND]]
+;
+entry:
+ %conv = sext i16 %x to i32
+ %conv1 = sext i16 %y to i32
+ %cmp2 = icmp sgt i32 %conv, %conv1
+ br i1 %cmp2, label %cond.true, label %cond.false
+
+cond.true: ; preds = %for.body
+ br label %cond.end
+
+cond.false: ; preds = %for.body
+ br label %cond.end
+
+cond.end: ; preds = %cond.false, %cond.true
+ %cond = phi i32 [ 0, %cond.true ], [ 1, %cond.false ]
+ ret i32 %cond
+}
+
+
+define signext i32 @zext_zext(i16 %x, i16 %y) {
+; CHECK-LABEL: define signext i32 @zext_zext(
+; CHECK-SAME: i16 [[X:%.*]], i16 [[Y:%.*]]) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[CONV:%.*]] = zext i16 [[X]] to i32
+; CHECK-NEXT: [[CONV1:%.*]] = zext i16 [[Y]] to i32
+; CHECK-NEXT: [[CMP2:%.*]] = icmp sgt i16 [[X]], [[Y]]
+; CHECK-NEXT: br i1 [[CMP2]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
+; CHECK: cond.true:
+; CHECK-NEXT: br label [[COND_END:%.*]]
+; CHECK: cond.false:
+; CHECK-NEXT: br label [[COND_END]]
+; CHECK: cond.end:
+; CHECK-NEXT: [[COND:%.*]] = phi i32 [ 0, [[COND_TRUE]] ], [ 1, [[COND_FALSE]] ]
+; CHECK-NEXT: ret i32 [[COND]]
+;
+entry:
+ %conv = zext i16 %x to i32
+ %conv1 = zext i16 %y to i32
+ %cmp2 = icmp sgt i32 %conv, %conv1
+ br i1 %cmp2, label %cond.true, label %cond.false
+
+cond.true: ; preds = %for.body
+ br label %cond.end
+
+cond.false: ; preds = %for.body
+ br label %cond.end
+
+cond.end: ; preds = %cond.false, %cond.true
+ %cond = phi i32 [ 0, %cond.true ], [ 1, %cond.false ]
+ ret i32 %cond
+}
+
+
+define signext i16 @zext_positive_and_sext(i32 noundef %n, ptr noundef %v) {
+; CHECK-LABEL: define signext i16 @zext_positive_and_sext(
+; CHECK-SAME: i32 noundef [[N:%.*]], ptr noundef [[V:%.*]]) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: br label [[FOR_COND:%.*]]
+; CHECK: for.cond:
+; CHECK-NEXT: [[P_0:%.*]] = phi i16 [ 0, [[ENTRY:%.*]] ], [ [[CONV8:%.*]], [[COND_END:%.*]] ]
+; CHECK-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[INC:%.*]], [[COND_END]] ]
+; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[I_0]], [[N]]
+; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY:%.*]], label [[FOR_COND_CLEANUP:%.*]]
+; CHECK: for.body:
+; CHECK-NEXT: [[CONV:%.*]] = zext i16 [[P_0]] to i32
+; CHECK-NEXT: [[IDXPROM:%.*]] = sext i32 [[I_0]] to i64
+; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr i16, ptr [[V]], i64 [[IDXPROM]]
+; CHECK-NEXT: [[TMP0:%.*]] = load i16, ptr [[ARRAYIDX]], align 2
+; CHECK-NEXT: [[CONV1:%.*]] = sext i16 [[TMP0]] to i32
+; CHECK-NEXT: [[CMP2:%.*]] = icmp slt i16 [[P_0]], [[TMP0]]
+; CHECK-NEXT: br i1 [[CMP2]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
+; CHECK: cond.true:
+; CHECK-NEXT: br label [[COND_END]]
+; CHECK: cond.false:
+; CHECK-NEXT: br label [[COND_END]]
+; CHECK: for.cond.cleanup:
+; CHECK-NEXT: ret i16 [[P_0]]
+; CHECK: cond.end:
+; CHECK-NEXT: [[COND:%.*]] = phi i32 [ [[CONV1]], [[COND_TRUE]] ], [ [[CONV]], [[COND_FALSE]] ]
+; CHECK-NEXT: [[CONV8]] = trunc i32 [[COND]] to i16
+; CHECK-NEXT: [[INC]] = add nsw i32 [[I_0]], 1
+; CHECK-NEXT: br label [[FOR_COND]]
+;
+entry:
+ br label %for.cond
+
+for.cond: ; preds = %cond.end, %entry
+ %p.0 = phi i16 [ 0, %entry ], [ %conv8, %cond.end ]
+ %i.0 = phi i32 [ 0, %entry ], [ %inc, %cond.end ]
+ %cmp = icmp slt i32 %i.0, %n
+ br i1 %cmp, label %for.body, label %for.cond.cleanup
+
+for.body: ; preds = %for.cond
+ %conv = zext i16 %p.0 to i32 ;; %p.0 is always positive here
+ %idxprom = sext i32 %i.0 to i64
+ %arrayidx = getelementptr i16, ptr %v, i64 %idxprom
+ %0 = load i16, ptr %arrayidx, align 2
+ %conv1 = sext i16 %0 to i32
+ %cmp2 = icmp slt i32 %conv, %conv1
+ br i1 %cmp2, label %cond.true, label %cond.false
+
+cond.true: ; preds = %for.body
+ br label %cond.end
+
+cond.false: ; preds = %for.body
+ br label %cond.end
+
+for.cond.cleanup: ; preds = %for.cond
+ ret i16 %p.0
+
+cond.end: ; preds = %cond.false, %cond.true
+ %cond = phi i32 [ %conv1, %cond.true ], [ %conv, %cond.false ]
+ %conv8 = trunc i32 %cond to i16
+ %inc = add nsw i32 %i.0, 1
+ br label %for.cond
+}
+
+
+
+define signext i16 @sext_and_zext_positive(i16 %x) {
+; CHECK-LABEL: define signext i16 @sext_and_zext_positive(
+; CHECK-SAME: i16 [[X:%.*]]) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: br label [[FOR_COND:%.*]]
+; CHECK: for.cond:
+; CHECK-NEXT: [[V:%.*]] = phi i16 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[COND_END:%.*]] ]
+; CHECK-NEXT: br label [[FOR_BODY:%.*]]
+; CHECK: for.body:
+; CHECK-NEXT: [[CONV:%.*]] = zext i16 [[V]] to i32
+; CHECK-NEXT: [[CONV1:%.*]] = sext i16 [[X]] to i32
+; CHECK-NEXT: [[CMP2:%.*]] = icmp slt i16 [[X]], [[V]]
+; CHECK-NEXT: br i1 [[CMP2]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
+; CHECK: cond.true:
+; CHECK-NEXT: br label [[COND_END]]
+; CHECK: cond.false:
+; CHECK-NEXT: br label [[COND_END]]
+; CHECK: cond.end:
+; CHECK-NEXT: [[A:%.*]] = phi i16 [ 10, [[COND_TRUE]] ], [ 20, [[COND_FALSE]] ]
+; CHECK-NEXT: [[INC]] = add nuw nsw i16 [[A]], 1
+; CHECK-NEXT: br label [[FOR_COND]]
+;
+entry:
+ br label %for.cond
+
+for.cond: ; preds = %cond.end, %entry
+ %v = phi i16 [ 0, %entry ], [ %inc, %cond.end ] ;; always positive
+ br label %for.body
+
+for.body: ; preds = %for.cond
+ %conv = zext i16 %v to i32 ;; %p.0 is always positive here
+ %conv1 = sext i16 %x to i32 ;; %p.0 is always positive here
+ %cmp2 = icmp slt i32 %conv1, %conv ;; positive/positive
+ br i1 %cmp2, label %cond.true, label %cond.false
+
+cond.true: ; preds = %for.body
+ br label %cond.end
+
+cond.false: ; preds = %for.body
+ br label %cond.end
+
+cond.end: ; preds = %cond.false, %cond.true
+ %a = phi i16 [ 10, %cond.true ], [ 20, %cond.false ]
+ %inc = add i16 %a, 1
+ br label %for.cond
+}
diff --git a/llvm/test/Transforms/SCCP/widening.ll b/llvm/test/Transforms/SCCP/widening.ll
index f482ed3a4e7f655..84adf77c621e1ff 100644
--- a/llvm/test/Transforms/SCCP/widening.ll
+++ b/llvm/test/Transforms/SCCP/widening.ll
@@ -653,7 +653,7 @@ define ptr @wobble(ptr %arg, i32 %arg1) align 2 {
; SCCP-NEXT: [[TMP52:%.*]] = call dereferenceable(1) ptr @spam(ptr [[ARG]], i32 [[TMP51]])
; SCCP-NEXT: [[TMP53:%.*]] = load i8, ptr [[TMP52]], align 1
; SCCP-NEXT: [[TMP54:%.*]] = zext i8 [[TMP53]] to i32
-; SCCP-NEXT: [[TMP55:%.*]] = icmp sgt i32 [[TMP48]], [[TMP54]]
+; SCCP-NEXT: [[TMP55:%.*]] = icmp sgt i8 [[TMP47]], [[TMP53]]
; SCCP-NEXT: br i1 [[TMP55]], label [[BB56:%.*]], label [[BB60:%.*]]
; SCCP: bb56:
; SCCP-NEXT: [[TMP57:%.*]] = add nsw i32 [[TMP40]], -1
@@ -734,7 +734,7 @@ define ptr @wobble(ptr %arg, i32 %arg1) align 2 {
; IPSCCP-NEXT: [[TMP52:%.*]] = call dereferenceable(1) ptr @spam(ptr [[ARG]], i32 [[TMP51]])
; IPSCCP-NEXT: [[TMP53:%.*]] = load i8, ptr [[TMP52]], align 1
; IPSCCP-NEXT: [[TMP54:%.*]] = zext i8 [[TMP53]] to i32
-; IPSCCP-NEXT: [[TMP55:%.*]] = icmp sgt i32 [[TMP48]], [[TMP54]]
+; IPSCCP-NEXT: [[TMP55:%.*]] = icmp sgt i8 [[TMP47]], [[TMP53]]
; IPSCCP-NEXT: br i1 [[TMP55]], label [[BB56:%.*]], label [[BB60:%.*]]
; IPSCCP: bb56:
; IPSCCP-NEXT: br label [[BB60]]
>From 6e972aa4b4dd7cd6b2d8f2af95333f67f820df35 Mon Sep 17 00:00:00 2001
From: leo-ard <lool4516 at gmail.com>
Date: Wed, 27 Sep 2023 15:39:49 -0400
Subject: [PATCH 2/2] Format SCCPSolver according to clang-format
---
llvm/lib/Transforms/Utils/SCCPSolver.cpp | 54 +++++++++++++-----------
1 file changed, 30 insertions(+), 24 deletions(-)
diff --git a/llvm/lib/Transforms/Utils/SCCPSolver.cpp b/llvm/lib/Transforms/Utils/SCCPSolver.cpp
index 09f64a925ab1b7c..0038e8866c2ce62 100644
--- a/llvm/lib/Transforms/Utils/SCCPSolver.cpp
+++ b/llvm/lib/Transforms/Utils/SCCPSolver.cpp
@@ -195,56 +195,62 @@ static bool replaceSignedInst(SCCPSolver &Solver,
}
case Instruction::ICmp: {
ICmpInst &ICmp = cast<ICmpInst>(Inst);
-
+
ZExtInst *Op0_zext = dyn_cast<ZExtInst>(ICmp.getOperand(0));
SExtInst *Op0_sext = dyn_cast<SExtInst>(ICmp.getOperand(0));
-
+
ZExtInst *Op1_zext = dyn_cast<ZExtInst>(ICmp.getOperand(1));
SExtInst *Op1_sext = dyn_cast<SExtInst>(ICmp.getOperand(1));
-
+
CastInst *Op0;
CastInst *Op1;
-
- if (Op0_zext) Op0 = Op0_zext; else Op0 = Op0_sext;
- if (Op1_zext) Op1 = Op1_zext; else Op1 = Op1_sext;
-
+
+ if (Op0_zext)
+ Op0 = Op0_zext;
+ else
+ Op0 = Op0_sext;
+ if (Op1_zext)
+ Op1 = Op1_zext;
+ else
+ Op1 = Op1_sext;
+
bool reversed = false;
-
- if (!Op0 || !Op1){
+
+ if (!Op0 || !Op1) {
// Op0 and Op1 must be defined
return false;
- }
-
- if (Op1_zext && (! Op0_zext)){
+ }
+
+ if (Op1_zext && (!Op0_zext)) {
// We force Op0 to be a zext and reverse the arguments
// at the end if we swap
- reversed = true;
-
+ reversed = true;
+
std::swap(Op0_zext, Op1_zext);
std::swap(Op0_sext, Op1_sext);
std::swap(Op0, Op1);
}
-
-
- if(Op0->getType() != Op1->getType()){
+
+ if (Op0->getType() != Op1->getType()) {
// extensions are not of the same type
// This optimization is done in InstCombine
return false;
}
-
+
// ICMP (sext X) (sext y) => ICMP X, Y
// ICMP (zext X) (zext y) => ICMP X, Y
// ICMP (zext X) (sext Y) => ICMP X, Y if X >= 0 and ICMP signed
- if((Op0_zext && Op1_zext)
- || (Op0_sext && Op1_sext)
- || (ICmp.isSigned() && Op0_zext && Op1_sext && isNonNegative(Op0_zext->getOperand(0)))){
-
+ if ((Op0_zext && Op1_zext) || (Op0_sext && Op1_sext) ||
+ (ICmp.isSigned() && Op0_zext && Op1_sext &&
+ isNonNegative(Op0_zext->getOperand(0)))) {
+
Value *X = Op0->getOperand(0);
Value *Y = Op1->getOperand(0);
- NewInst = CmpInst::Create(ICmp.getOpcode(), ICmp.getPredicate(), reversed ? Y : X, reversed ? X : Y, "", &Inst);
+ NewInst = CmpInst::Create(ICmp.getOpcode(), ICmp.getPredicate(),
+ reversed ? Y : X, reversed ? X : Y, "", &Inst);
break;
}
-
+
return false;
}
default:
More information about the llvm-commits
mailing list