[llvm] [InstSimplify] Implement simple folds for `ucmp`/`scmp` intrinsics (PR #95601)
via llvm-commits
llvm-commits at lists.llvm.org
Fri Jun 14 14:05:24 PDT 2024
https://github.com/Poseydon42 created https://github.com/llvm/llvm-project/pull/95601
This patch adds folds for the cases where both operands are the same or where it can be established that the first operand is less than, equal to, or greater than the second operand.
>From 2457d69101706e03b65649c25539a117728b9982 Mon Sep 17 00:00:00 2001
From: Poseydon42 <vvmposeydon at gmail.com>
Date: Fri, 14 Jun 2024 21:33:09 +0100
Subject: [PATCH 1/2] [InstSimplify] Add tests for some folds for UCMP/SCMP
intrinsics
This adds tests for the following folds:
- cmp x, x => 0
- cmp x, <something greater than x> => 1
- cmp x, <something equal to x> => 0
- cmp x, <something less than x> => -1
---
llvm/test/Transforms/InstSimplify/uscmp.ll | 208 +++++++++++++++++++++
1 file changed, 208 insertions(+)
diff --git a/llvm/test/Transforms/InstSimplify/uscmp.ll b/llvm/test/Transforms/InstSimplify/uscmp.ll
index adfcc313eff9e..69e630b823756 100644
--- a/llvm/test/Transforms/InstSimplify/uscmp.ll
+++ b/llvm/test/Transforms/InstSimplify/uscmp.ll
@@ -96,3 +96,211 @@ define <4 x i8> @scmp_nonsplat() {
%1 = call <4 x i8> @llvm.scmp(<4 x i32> <i32 0, i32 1, i32 2, i32 3>, <4 x i32> <i32 -1, i32 1, i32 -2, i32 4>)
ret <4 x i8> %1
}
+
+define i8 @scmp_with_itself(i32 %x) {
+; CHECK-LABEL: define i8 @scmp_with_itself(
+; CHECK-SAME: i32 [[X:%.*]]) {
+; CHECK-NEXT: [[TMP1:%.*]] = call i8 @llvm.scmp.i8.i32(i32 [[X]], i32 [[X]])
+; CHECK-NEXT: ret i8 [[TMP1]]
+;
+ %1 = call i8 @llvm.scmp(i32 %x, i32 %x)
+ ret i8 %1
+}
+
+define <4 x i8> @ucmp_vec_with_itself(<4 x i32> %x) {
+; CHECK-LABEL: define <4 x i8> @ucmp_vec_with_itself(
+; CHECK-SAME: <4 x i32> [[X:%.*]]) {
+; CHECK-NEXT: [[TMP1:%.*]] = call <4 x i8> @llvm.scmp.v4i8.v4i32(<4 x i32> [[X]], <4 x i32> [[X]])
+; CHECK-NEXT: ret <4 x i8> [[TMP1]]
+;
+ %1 = call <4 x i8> @llvm.scmp(<4 x i32> %x, <4 x i32> %x)
+ ret <4 x i8> %1
+}
+
+define i8 @scmp_known_gt(i32 %x, i32 %y) {
+; CHECK-LABEL: define i8 @scmp_known_gt(
+; CHECK-SAME: i32 [[X:%.*]], i32 [[Y:%.*]]) {
+; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt i32 [[X]], [[Y]]
+; CHECK-NEXT: call void @llvm.assume(i1 [[TMP1]])
+; CHECK-NEXT: [[TMP2:%.*]] = call i8 @llvm.scmp.i8.i32(i32 [[X]], i32 [[Y]])
+; CHECK-NEXT: ret i8 [[TMP2]]
+;
+ %1 = icmp sgt i32 %x, %y
+ call void @llvm.assume(i1 %1)
+
+ %2 = call i8 @llvm.scmp(i32 %x, i32 %y)
+ ret i8 %2
+}
+
+define i8 @scmp_known_eq(i32 %x, i32 %y) {
+; CHECK-LABEL: define i8 @scmp_known_eq(
+; CHECK-SAME: i32 [[X:%.*]], i32 [[Y:%.*]]) {
+; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i32 [[X]], [[Y]]
+; CHECK-NEXT: call void @llvm.assume(i1 [[TMP1]])
+; CHECK-NEXT: [[TMP2:%.*]] = call i8 @llvm.scmp.i8.i32(i32 [[X]], i32 [[Y]])
+; CHECK-NEXT: ret i8 [[TMP2]]
+;
+ %1 = icmp eq i32 %x, %y
+ call void @llvm.assume(i1 %1)
+
+ %2 = call i8 @llvm.scmp(i32 %x, i32 %y)
+ ret i8 %2
+}
+
+define i8 @scmp_known_lt(i32 %x, i32 %y) {
+; CHECK-LABEL: define i8 @scmp_known_lt(
+; CHECK-SAME: i32 [[X:%.*]], i32 [[Y:%.*]]) {
+; CHECK-NEXT: [[TMP1:%.*]] = icmp slt i32 [[X]], [[Y]]
+; CHECK-NEXT: call void @llvm.assume(i1 [[TMP1]])
+; CHECK-NEXT: [[TMP2:%.*]] = call i8 @llvm.scmp.i8.i32(i32 [[X]], i32 [[Y]])
+; CHECK-NEXT: ret i8 [[TMP2]]
+;
+ %1 = icmp slt i32 %x, %y
+ call void @llvm.assume(i1 %1)
+
+ %2 = call i8 @llvm.scmp(i32 %x, i32 %y)
+ ret i8 %2
+}
+
+define i8 @ucmp_known_gt(i32 %x, i32 %y) {
+; CHECK-LABEL: define i8 @ucmp_known_gt(
+; CHECK-SAME: i32 [[X:%.*]], i32 [[Y:%.*]]) {
+; CHECK-NEXT: [[TMP1:%.*]] = icmp ugt i32 [[X]], [[Y]]
+; CHECK-NEXT: call void @llvm.assume(i1 [[TMP1]])
+; CHECK-NEXT: [[TMP2:%.*]] = call i8 @llvm.ucmp.i8.i32(i32 [[X]], i32 [[Y]])
+; CHECK-NEXT: ret i8 [[TMP2]]
+;
+ %1 = icmp ugt i32 %x, %y
+ call void @llvm.assume(i1 %1)
+
+ %2 = call i8 @llvm.ucmp(i32 %x, i32 %y)
+ ret i8 %2
+}
+
+define i8 @ucmp_known_eq(i32 %x, i32 %y) {
+; CHECK-LABEL: define i8 @ucmp_known_eq(
+; CHECK-SAME: i32 [[X:%.*]], i32 [[Y:%.*]]) {
+; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i32 [[X]], [[Y]]
+; CHECK-NEXT: call void @llvm.assume(i1 [[TMP1]])
+; CHECK-NEXT: [[TMP2:%.*]] = call i8 @llvm.ucmp.i8.i32(i32 [[X]], i32 [[Y]])
+; CHECK-NEXT: ret i8 [[TMP2]]
+;
+ %1 = icmp eq i32 %x, %y
+ call void @llvm.assume(i1 %1)
+
+ %2 = call i8 @llvm.ucmp(i32 %x, i32 %y)
+ ret i8 %2
+}
+
+define i8 @ucmp_known_lt(i32 %x, i32 %y) {
+; CHECK-LABEL: define i8 @ucmp_known_lt(
+; CHECK-SAME: i32 [[X:%.*]], i32 [[Y:%.*]]) {
+; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i32 [[X]], [[Y]]
+; CHECK-NEXT: call void @llvm.assume(i1 [[TMP1]])
+; CHECK-NEXT: [[TMP2:%.*]] = call i8 @llvm.ucmp.i8.i32(i32 [[X]], i32 [[Y]])
+; CHECK-NEXT: ret i8 [[TMP2]]
+;
+ %1 = icmp ult i32 %x, %y
+ call void @llvm.assume(i1 %1)
+
+ %2 = call i8 @llvm.ucmp(i32 %x, i32 %y)
+ ret i8 %2
+}
+
+define i8 @ucmp_with_addition(i32 %x) {
+; CHECK-LABEL: define i8 @ucmp_with_addition(
+; CHECK-SAME: i32 [[X:%.*]]) {
+; CHECK-NEXT: [[TMP1:%.*]] = add nuw i32 [[X]], 1
+; CHECK-NEXT: [[TMP2:%.*]] = call i8 @llvm.ucmp.i8.i32(i32 [[X]], i32 [[TMP1]])
+; CHECK-NEXT: ret i8 [[TMP2]]
+;
+ %1 = add nuw i32 %x, 1
+ %2 = call i8 @llvm.ucmp(i32 %x, i32 %1)
+ ret i8 %2
+}
+
+define i8 @ucmp_with_addition2(i32 %x) {
+; CHECK-LABEL: define i8 @ucmp_with_addition2(
+; CHECK-SAME: i32 [[X:%.*]]) {
+; CHECK-NEXT: [[TMP1:%.*]] = add nuw i32 [[X]], 1
+; CHECK-NEXT: [[TMP2:%.*]] = call i8 @llvm.ucmp.i8.i32(i32 [[TMP1]], i32 [[X]])
+; CHECK-NEXT: ret i8 [[TMP2]]
+;
+ %1 = add nuw i32 %x, 1
+ %2 = call i8 @llvm.ucmp(i32 %1, i32 %x)
+ ret i8 %2
+}
+
+; Negative case: mismatched signedness of predicates
+define i8 @scmp_known_ugt(i32 %x, i32 %y) {
+; CHECK-LABEL: define i8 @scmp_known_ugt(
+; CHECK-SAME: i32 [[X:%.*]], i32 [[Y:%.*]]) {
+; CHECK-NEXT: [[TMP1:%.*]] = icmp ugt i32 [[X]], [[Y]]
+; CHECK-NEXT: call void @llvm.assume(i1 [[TMP1]])
+; CHECK-NEXT: [[TMP2:%.*]] = call i8 @llvm.scmp.i8.i32(i32 [[X]], i32 [[Y]])
+; CHECK-NEXT: ret i8 [[TMP2]]
+;
+ %1 = icmp ugt i32 %x, %y
+ call void @llvm.assume(i1 %1)
+
+ %2 = call i8 @llvm.scmp(i32 %x, i32 %y)
+ ret i8 %2
+}
+
+define i8 @scmp_known_ult(i32 %x, i32 %y) {
+; CHECK-LABEL: define i8 @scmp_known_ult(
+; CHECK-SAME: i32 [[X:%.*]], i32 [[Y:%.*]]) {
+; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i32 [[X]], [[Y]]
+; CHECK-NEXT: call void @llvm.assume(i1 [[TMP1]])
+; CHECK-NEXT: [[TMP2:%.*]] = call i8 @llvm.scmp.i8.i32(i32 [[X]], i32 [[Y]])
+; CHECK-NEXT: ret i8 [[TMP2]]
+;
+ %1 = icmp ult i32 %x, %y
+ call void @llvm.assume(i1 %1)
+
+ %2 = call i8 @llvm.scmp(i32 %x, i32 %y)
+ ret i8 %2
+}
+
+define i8 @ucmp_known_sgt(i32 %x, i32 %y) {
+; CHECK-LABEL: define i8 @ucmp_known_sgt(
+; CHECK-SAME: i32 [[X:%.*]], i32 [[Y:%.*]]) {
+; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt i32 [[X]], [[Y]]
+; CHECK-NEXT: call void @llvm.assume(i1 [[TMP1]])
+; CHECK-NEXT: [[TMP2:%.*]] = call i8 @llvm.ucmp.i8.i32(i32 [[X]], i32 [[Y]])
+; CHECK-NEXT: ret i8 [[TMP2]]
+;
+ %1 = icmp sgt i32 %x, %y
+ call void @llvm.assume(i1 %1)
+
+ %2 = call i8 @llvm.ucmp(i32 %x, i32 %y)
+ ret i8 %2
+}
+
+define i8 @ucmp_known_slt(i32 %x, i32 %y) {
+; CHECK-LABEL: define i8 @ucmp_known_slt(
+; CHECK-SAME: i32 [[X:%.*]], i32 [[Y:%.*]]) {
+; CHECK-NEXT: [[TMP1:%.*]] = icmp slt i32 [[X]], [[Y]]
+; CHECK-NEXT: call void @llvm.assume(i1 [[TMP1]])
+; CHECK-NEXT: [[TMP2:%.*]] = call i8 @llvm.ucmp.i8.i32(i32 [[X]], i32 [[Y]])
+; CHECK-NEXT: ret i8 [[TMP2]]
+;
+ %1 = icmp slt i32 %x, %y
+ call void @llvm.assume(i1 %1)
+
+ %2 = call i8 @llvm.ucmp(i32 %x, i32 %y)
+ ret i8 %2
+}
+
+; Negative case: no nuw flag
+define i8 @ucmp_with_addition_no_nuw(i32 %x) {
+; CHECK-LABEL: define i8 @ucmp_with_addition_no_nuw(
+; CHECK-SAME: i32 [[X:%.*]]) {
+; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[X]], 1
+; CHECK-NEXT: [[TMP2:%.*]] = call i8 @llvm.ucmp.i8.i32(i32 [[X]], i32 [[TMP1]])
+; CHECK-NEXT: ret i8 [[TMP2]]
+;
+ %1 = add i32 %x, 1
+ %2 = call i8 @llvm.ucmp(i32 %x, i32 %1)
+ ret i8 %2
+}
>From 97e547a8feb57c7f9536b5af3448928e2cc7c512 Mon Sep 17 00:00:00 2001
From: Poseydon42 <vvmposeydon at gmail.com>
Date: Fri, 14 Jun 2024 21:39:02 +0100
Subject: [PATCH 2/2] [InstSimplify] Implemented folds of UCMP/SCMP with two
equal operands or where comparison between them has a known result
---
llvm/lib/Analysis/InstructionSimplify.cpp | 27 ++++++++++++++++++
llvm/test/Transforms/InstSimplify/uscmp.ll | 32 +++++++---------------
2 files changed, 37 insertions(+), 22 deletions(-)
diff --git a/llvm/lib/Analysis/InstructionSimplify.cpp b/llvm/lib/Analysis/InstructionSimplify.cpp
index 8b2aa6b9f18b0..37f2f8c43ca5d 100644
--- a/llvm/lib/Analysis/InstructionSimplify.cpp
+++ b/llvm/lib/Analysis/InstructionSimplify.cpp
@@ -6505,6 +6505,33 @@ Value *llvm::simplifyBinaryIntrinsic(Intrinsic::ID IID, Type *ReturnType,
break;
}
+ case Intrinsic::scmp:
+ case Intrinsic::ucmp: {
+ // Fold cmp x, x -> 0
+ if (Op0 == Op1)
+ return Constant::getNullValue(ReturnType);
+
+ // Fold to a constant if the relationship between operands can be
+ // established with certainty
+ if (isICmpTrue(CmpInst::ICMP_EQ, Op0, Op1, Q, RecursionLimit))
+ return Constant::getNullValue(ReturnType);
+
+ ICmpInst::Predicate PredGT =
+ IID == Intrinsic::scmp ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT;
+ if (isICmpTrue(PredGT, Op0, Op1, Q, RecursionLimit))
+ return Constant::getIntegerValue(
+ ReturnType,
+ APInt(ReturnType->getIntegerBitWidth(), 1, /*isSigned*/ false));
+
+ ICmpInst::Predicate PredLT =
+ IID == Intrinsic::scmp ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT;
+ if (isICmpTrue(PredLT, Op0, Op1, Q, RecursionLimit))
+ return Constant::getIntegerValue(
+ ReturnType,
+ APInt(ReturnType->getIntegerBitWidth(), -1, /*isSigned*/ true));
+
+ break;
+ }
case Intrinsic::usub_with_overflow:
case Intrinsic::ssub_with_overflow:
// X - X -> { 0, false }
diff --git a/llvm/test/Transforms/InstSimplify/uscmp.ll b/llvm/test/Transforms/InstSimplify/uscmp.ll
index 69e630b823756..a5805caaa5089 100644
--- a/llvm/test/Transforms/InstSimplify/uscmp.ll
+++ b/llvm/test/Transforms/InstSimplify/uscmp.ll
@@ -100,8 +100,7 @@ define <4 x i8> @scmp_nonsplat() {
define i8 @scmp_with_itself(i32 %x) {
; CHECK-LABEL: define i8 @scmp_with_itself(
; CHECK-SAME: i32 [[X:%.*]]) {
-; CHECK-NEXT: [[TMP1:%.*]] = call i8 @llvm.scmp.i8.i32(i32 [[X]], i32 [[X]])
-; CHECK-NEXT: ret i8 [[TMP1]]
+; CHECK-NEXT: ret i8 0
;
%1 = call i8 @llvm.scmp(i32 %x, i32 %x)
ret i8 %1
@@ -110,8 +109,7 @@ define i8 @scmp_with_itself(i32 %x) {
define <4 x i8> @ucmp_vec_with_itself(<4 x i32> %x) {
; CHECK-LABEL: define <4 x i8> @ucmp_vec_with_itself(
; CHECK-SAME: <4 x i32> [[X:%.*]]) {
-; CHECK-NEXT: [[TMP1:%.*]] = call <4 x i8> @llvm.scmp.v4i8.v4i32(<4 x i32> [[X]], <4 x i32> [[X]])
-; CHECK-NEXT: ret <4 x i8> [[TMP1]]
+; CHECK-NEXT: ret <4 x i8> zeroinitializer
;
%1 = call <4 x i8> @llvm.scmp(<4 x i32> %x, <4 x i32> %x)
ret <4 x i8> %1
@@ -122,8 +120,7 @@ define i8 @scmp_known_gt(i32 %x, i32 %y) {
; CHECK-SAME: i32 [[X:%.*]], i32 [[Y:%.*]]) {
; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt i32 [[X]], [[Y]]
; CHECK-NEXT: call void @llvm.assume(i1 [[TMP1]])
-; CHECK-NEXT: [[TMP2:%.*]] = call i8 @llvm.scmp.i8.i32(i32 [[X]], i32 [[Y]])
-; CHECK-NEXT: ret i8 [[TMP2]]
+; CHECK-NEXT: ret i8 1
;
%1 = icmp sgt i32 %x, %y
call void @llvm.assume(i1 %1)
@@ -137,8 +134,7 @@ define i8 @scmp_known_eq(i32 %x, i32 %y) {
; CHECK-SAME: i32 [[X:%.*]], i32 [[Y:%.*]]) {
; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i32 [[X]], [[Y]]
; CHECK-NEXT: call void @llvm.assume(i1 [[TMP1]])
-; CHECK-NEXT: [[TMP2:%.*]] = call i8 @llvm.scmp.i8.i32(i32 [[X]], i32 [[Y]])
-; CHECK-NEXT: ret i8 [[TMP2]]
+; CHECK-NEXT: ret i8 0
;
%1 = icmp eq i32 %x, %y
call void @llvm.assume(i1 %1)
@@ -152,8 +148,7 @@ define i8 @scmp_known_lt(i32 %x, i32 %y) {
; CHECK-SAME: i32 [[X:%.*]], i32 [[Y:%.*]]) {
; CHECK-NEXT: [[TMP1:%.*]] = icmp slt i32 [[X]], [[Y]]
; CHECK-NEXT: call void @llvm.assume(i1 [[TMP1]])
-; CHECK-NEXT: [[TMP2:%.*]] = call i8 @llvm.scmp.i8.i32(i32 [[X]], i32 [[Y]])
-; CHECK-NEXT: ret i8 [[TMP2]]
+; CHECK-NEXT: ret i8 -1
;
%1 = icmp slt i32 %x, %y
call void @llvm.assume(i1 %1)
@@ -167,8 +162,7 @@ define i8 @ucmp_known_gt(i32 %x, i32 %y) {
; CHECK-SAME: i32 [[X:%.*]], i32 [[Y:%.*]]) {
; CHECK-NEXT: [[TMP1:%.*]] = icmp ugt i32 [[X]], [[Y]]
; CHECK-NEXT: call void @llvm.assume(i1 [[TMP1]])
-; CHECK-NEXT: [[TMP2:%.*]] = call i8 @llvm.ucmp.i8.i32(i32 [[X]], i32 [[Y]])
-; CHECK-NEXT: ret i8 [[TMP2]]
+; CHECK-NEXT: ret i8 1
;
%1 = icmp ugt i32 %x, %y
call void @llvm.assume(i1 %1)
@@ -182,8 +176,7 @@ define i8 @ucmp_known_eq(i32 %x, i32 %y) {
; CHECK-SAME: i32 [[X:%.*]], i32 [[Y:%.*]]) {
; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i32 [[X]], [[Y]]
; CHECK-NEXT: call void @llvm.assume(i1 [[TMP1]])
-; CHECK-NEXT: [[TMP2:%.*]] = call i8 @llvm.ucmp.i8.i32(i32 [[X]], i32 [[Y]])
-; CHECK-NEXT: ret i8 [[TMP2]]
+; CHECK-NEXT: ret i8 0
;
%1 = icmp eq i32 %x, %y
call void @llvm.assume(i1 %1)
@@ -197,8 +190,7 @@ define i8 @ucmp_known_lt(i32 %x, i32 %y) {
; CHECK-SAME: i32 [[X:%.*]], i32 [[Y:%.*]]) {
; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i32 [[X]], [[Y]]
; CHECK-NEXT: call void @llvm.assume(i1 [[TMP1]])
-; CHECK-NEXT: [[TMP2:%.*]] = call i8 @llvm.ucmp.i8.i32(i32 [[X]], i32 [[Y]])
-; CHECK-NEXT: ret i8 [[TMP2]]
+; CHECK-NEXT: ret i8 -1
;
%1 = icmp ult i32 %x, %y
call void @llvm.assume(i1 %1)
@@ -210,9 +202,7 @@ define i8 @ucmp_known_lt(i32 %x, i32 %y) {
define i8 @ucmp_with_addition(i32 %x) {
; CHECK-LABEL: define i8 @ucmp_with_addition(
; CHECK-SAME: i32 [[X:%.*]]) {
-; CHECK-NEXT: [[TMP1:%.*]] = add nuw i32 [[X]], 1
-; CHECK-NEXT: [[TMP2:%.*]] = call i8 @llvm.ucmp.i8.i32(i32 [[X]], i32 [[TMP1]])
-; CHECK-NEXT: ret i8 [[TMP2]]
+; CHECK-NEXT: ret i8 -1
;
%1 = add nuw i32 %x, 1
%2 = call i8 @llvm.ucmp(i32 %x, i32 %1)
@@ -222,9 +212,7 @@ define i8 @ucmp_with_addition(i32 %x) {
define i8 @ucmp_with_addition2(i32 %x) {
; CHECK-LABEL: define i8 @ucmp_with_addition2(
; CHECK-SAME: i32 [[X:%.*]]) {
-; CHECK-NEXT: [[TMP1:%.*]] = add nuw i32 [[X]], 1
-; CHECK-NEXT: [[TMP2:%.*]] = call i8 @llvm.ucmp.i8.i32(i32 [[TMP1]], i32 [[X]])
-; CHECK-NEXT: ret i8 [[TMP2]]
+; CHECK-NEXT: ret i8 1
;
%1 = add nuw i32 %x, 1
%2 = call i8 @llvm.ucmp(i32 %1, i32 %x)
More information about the llvm-commits
mailing list