[llvm] [InstSimplify] Add constant folding support for `ucmp`/`scmp` intrinsics (PR #93730)

via llvm-commits llvm-commits at lists.llvm.org
Thu May 30 08:06:51 PDT 2024


https://github.com/Poseydon42 updated https://github.com/llvm/llvm-project/pull/93730

>From f1a2aad373069d8fb439d67f7d68a46b93000132 Mon Sep 17 00:00:00 2001
From: Poseydon42 <vvmposeydon at gmail.com>
Date: Wed, 29 May 2024 20:36:09 +0100
Subject: [PATCH 1/2] [InstSimplify] Add constant folding tests for UCMP/SCMP
 intrinsics

---
 llvm/test/Transforms/InstSimplify/uscmp.ll | 110 +++++++++++++++++++++
 1 file changed, 110 insertions(+)
 create mode 100644 llvm/test/Transforms/InstSimplify/uscmp.ll

diff --git a/llvm/test/Transforms/InstSimplify/uscmp.ll b/llvm/test/Transforms/InstSimplify/uscmp.ll
new file mode 100644
index 0000000000000..91b33946f1c92
--- /dev/null
+++ b/llvm/test/Transforms/InstSimplify/uscmp.ll
@@ -0,0 +1,110 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt < %s -passes=instsimplify -S | FileCheck %s
+
+define i8 @scmp_lt() {
+; CHECK-LABEL: define i8 @scmp_lt() {
+; CHECK-NEXT:    [[TMP1:%.*]] = call i8 @llvm.scmp.i8.i32(i32 -7, i32 3)
+; CHECK-NEXT:    ret i8 [[TMP1]]
+;
+  %1 = call i8 @llvm.scmp(i32 -7, i32 3)
+  ret i8 %1
+}
+
+define i8 @scmp_eq() {
+; CHECK-LABEL: define i8 @scmp_eq() {
+; CHECK-NEXT:    [[TMP1:%.*]] = call i8 @llvm.scmp.i8.i32(i32 2, i32 2)
+; CHECK-NEXT:    ret i8 [[TMP1]]
+;
+  %1 = call i8 @llvm.scmp(i32 2, i32 2)
+  ret i8 %1
+}
+
+define i8 @scmp_gt() {
+; CHECK-LABEL: define i8 @scmp_gt() {
+; CHECK-NEXT:    [[TMP1:%.*]] = call i8 @llvm.scmp.i8.i32(i32 2, i32 -7)
+; CHECK-NEXT:    ret i8 [[TMP1]]
+;
+  %1 = call i8 @llvm.scmp(i32 2, i32 -7)
+  ret i8 %1
+}
+
+define i8 @ucmp_lt() {
+; CHECK-LABEL: define i8 @ucmp_lt() {
+; CHECK-NEXT:    [[TMP1:%.*]] = call i8 @llvm.ucmp.i8.i32(i32 7, i32 12)
+; CHECK-NEXT:    ret i8 [[TMP1]]
+;
+  %1 = call i8 @llvm.ucmp(i32 7, i32 12)
+  ret i8 %1
+}
+
+define i2 @ucmp_eq() {
+; CHECK-LABEL: define i2 @ucmp_eq() {
+; CHECK-NEXT:    [[TMP1:%.*]] = call i2 @llvm.ucmp.i2.i32(i32 12, i32 12)
+; CHECK-NEXT:    ret i2 [[TMP1]]
+;
+  %1 = call i2 @llvm.ucmp(i32 12, i32 12)
+  ret i2 %1
+}
+
+define i100 @ucmp_gt() {
+; CHECK-LABEL: define i100 @ucmp_gt() {
+; CHECK-NEXT:    [[TMP1:%.*]] = call i100 @llvm.ucmp.i100.i32(i32 7, i32 3)
+; CHECK-NEXT:    ret i100 [[TMP1]]
+;
+  %1 = call i100 @llvm.ucmp(i32 7, i32 3)
+  ret i100 %1
+}
+
+define i8 @ucmp_poison() {
+; CHECK-LABEL: define i8 @ucmp_poison() {
+; CHECK-NEXT:    [[TMP1:%.*]] = call i8 @llvm.ucmp.i8.i32(i32 poison, i32 5)
+; CHECK-NEXT:    ret i8 [[TMP1]]
+;
+  %1 = call i8 @llvm.ucmp(i32 poison, i32 5)
+  ret i8 %1
+}
+
+define i8 @scmp_poison() {
+; CHECK-LABEL: define i8 @scmp_poison() {
+; CHECK-NEXT:    [[TMP1:%.*]] = call i8 @llvm.scmp.i8.i32(i32 0, i32 poison)
+; CHECK-NEXT:    ret i8 [[TMP1]]
+;
+  %1 = call i8 @llvm.scmp(i32 0, i32 poison)
+  ret i8 %1
+}
+
+define i8 @scmp_undef() {
+; CHECK-LABEL: define i8 @scmp_undef() {
+; CHECK-NEXT:    [[TMP1:%.*]] = call i8 @llvm.scmp.i8.i32(i32 undef, i32 -12)
+; CHECK-NEXT:    ret i8 [[TMP1]]
+;
+  %1 = call i8 @llvm.scmp(i32 undef, i32 -12)
+  ret i8 %1
+}
+
+define i8 @ucmp_undef() {
+; CHECK-LABEL: define i8 @ucmp_undef() {
+; CHECK-NEXT:    [[TMP1:%.*]] = call i8 @llvm.ucmp.i8.i32(i32 2, i32 undef)
+; CHECK-NEXT:    ret i8 [[TMP1]]
+;
+  %1 = call i8 @llvm.ucmp(i32 2, i32 undef)
+  ret i8 %1
+}
+
+define <4 x i8> @ucmp_lt_splat() {
+; CHECK-LABEL: define <4 x i8> @ucmp_lt_splat() {
+; CHECK-NEXT:    [[TMP1:%.*]] = call <4 x i8> @llvm.ucmp.v4i8.v4i32(<4 x i32> <i32 1, i32 1, i32 1, i32 1>, <4 x i32> <i32 3, i32 3, i32 3, i32 3>)
+; CHECK-NEXT:    ret <4 x i8> [[TMP1]]
+;
+  %1 = call <4 x i8> @llvm.ucmp(<4 x i32> splat(i32 1), <4 x i32> splat(i32 3))
+  ret <4 x i8> %1
+}
+
+define <4 x i8> @scmp_nonsplat() {
+; CHECK-LABEL: define <4 x i8> @scmp_nonsplat() {
+; CHECK-NEXT:    [[TMP1:%.*]] = call <4 x i8> @llvm.scmp.v4i8.v4i32(<4 x i32> <i32 0, i32 1, i32 2, i32 3>, <4 x i32> <i32 -1, i32 1, i32 -2, i32 4>)
+; CHECK-NEXT:    ret <4 x i8> [[TMP1]]
+;
+  %1 = call <4 x i8> @llvm.scmp(<4 x i32> <i32 0, i32 1, i32 2, i32 3>, <4 x i32> <i32 -1, i32 1, i32 -2, i32 4>)
+  ret <4 x i8> %1
+}

>From 0da0092681ab4964fb3b0823c163e3dc8c071c39 Mon Sep 17 00:00:00 2001
From: Poseydon42 <vvmposeydon at gmail.com>
Date: Thu, 30 May 2024 16:04:38 +0100
Subject: [PATCH 2/2] [InstSimplify] Implemented constant folding for UCMP/SCMP
 intrinsics

---
 llvm/lib/Analysis/ConstantFolding.cpp      | 17 ++++++++++
 llvm/test/Transforms/InstSimplify/uscmp.ll | 36 ++++++++--------------
 2 files changed, 29 insertions(+), 24 deletions(-)

diff --git a/llvm/lib/Analysis/ConstantFolding.cpp b/llvm/lib/Analysis/ConstantFolding.cpp
index 046a769453808..e377eac0a4fe6 100644
--- a/llvm/lib/Analysis/ConstantFolding.cpp
+++ b/llvm/lib/Analysis/ConstantFolding.cpp
@@ -1506,6 +1506,8 @@ bool llvm::canConstantFoldCallTo(const CallBase *Call, const Function *F) {
   case Intrinsic::smin:
   case Intrinsic::umax:
   case Intrinsic::umin:
+  case Intrinsic::scmp:
+  case Intrinsic::ucmp:
   case Intrinsic::sadd_with_overflow:
   case Intrinsic::uadd_with_overflow:
   case Intrinsic::ssub_with_overflow:
@@ -2764,6 +2766,21 @@ static Constant *ConstantFoldIntrinsicCall2(Intrinsic::ID IntrinsicID, Type *Ty,
                   ? *C0
                   : *C1);
 
+    case Intrinsic::scmp:
+    case Intrinsic::ucmp:
+      if (isa<PoisonValue>(Operands[0]) || isa<PoisonValue>(Operands[1]))
+        return PoisonValue::get(Ty);
+
+      if (!C0 || !C1)
+        return ConstantInt::get(Ty, 0);
+
+      int Res;
+      if (IntrinsicID == Intrinsic::scmp)
+        Res = C0->sgt(*C1) ? 1 : C0->slt(*C1) ? -1 : 0;
+      else
+        Res = C0->ugt(*C1) ? 1 : C0->ult(*C1) ? -1 : 0;
+      return ConstantInt::get(Ty, Res, /*IsSigned=*/true);
+
     case Intrinsic::usub_with_overflow:
     case Intrinsic::ssub_with_overflow:
       // X - undef -> { 0, false }
diff --git a/llvm/test/Transforms/InstSimplify/uscmp.ll b/llvm/test/Transforms/InstSimplify/uscmp.ll
index 91b33946f1c92..adfcc313eff9e 100644
--- a/llvm/test/Transforms/InstSimplify/uscmp.ll
+++ b/llvm/test/Transforms/InstSimplify/uscmp.ll
@@ -3,8 +3,7 @@
 
 define i8 @scmp_lt() {
 ; CHECK-LABEL: define i8 @scmp_lt() {
-; CHECK-NEXT:    [[TMP1:%.*]] = call i8 @llvm.scmp.i8.i32(i32 -7, i32 3)
-; CHECK-NEXT:    ret i8 [[TMP1]]
+; CHECK-NEXT:    ret i8 -1
 ;
   %1 = call i8 @llvm.scmp(i32 -7, i32 3)
   ret i8 %1
@@ -12,8 +11,7 @@ define i8 @scmp_lt() {
 
 define i8 @scmp_eq() {
 ; CHECK-LABEL: define i8 @scmp_eq() {
-; CHECK-NEXT:    [[TMP1:%.*]] = call i8 @llvm.scmp.i8.i32(i32 2, i32 2)
-; CHECK-NEXT:    ret i8 [[TMP1]]
+; CHECK-NEXT:    ret i8 0
 ;
   %1 = call i8 @llvm.scmp(i32 2, i32 2)
   ret i8 %1
@@ -21,8 +19,7 @@ define i8 @scmp_eq() {
 
 define i8 @scmp_gt() {
 ; CHECK-LABEL: define i8 @scmp_gt() {
-; CHECK-NEXT:    [[TMP1:%.*]] = call i8 @llvm.scmp.i8.i32(i32 2, i32 -7)
-; CHECK-NEXT:    ret i8 [[TMP1]]
+; CHECK-NEXT:    ret i8 1
 ;
   %1 = call i8 @llvm.scmp(i32 2, i32 -7)
   ret i8 %1
@@ -30,8 +27,7 @@ define i8 @scmp_gt() {
 
 define i8 @ucmp_lt() {
 ; CHECK-LABEL: define i8 @ucmp_lt() {
-; CHECK-NEXT:    [[TMP1:%.*]] = call i8 @llvm.ucmp.i8.i32(i32 7, i32 12)
-; CHECK-NEXT:    ret i8 [[TMP1]]
+; CHECK-NEXT:    ret i8 -1
 ;
   %1 = call i8 @llvm.ucmp(i32 7, i32 12)
   ret i8 %1
@@ -39,8 +35,7 @@ define i8 @ucmp_lt() {
 
 define i2 @ucmp_eq() {
 ; CHECK-LABEL: define i2 @ucmp_eq() {
-; CHECK-NEXT:    [[TMP1:%.*]] = call i2 @llvm.ucmp.i2.i32(i32 12, i32 12)
-; CHECK-NEXT:    ret i2 [[TMP1]]
+; CHECK-NEXT:    ret i2 0
 ;
   %1 = call i2 @llvm.ucmp(i32 12, i32 12)
   ret i2 %1
@@ -48,8 +43,7 @@ define i2 @ucmp_eq() {
 
 define i100 @ucmp_gt() {
 ; CHECK-LABEL: define i100 @ucmp_gt() {
-; CHECK-NEXT:    [[TMP1:%.*]] = call i100 @llvm.ucmp.i100.i32(i32 7, i32 3)
-; CHECK-NEXT:    ret i100 [[TMP1]]
+; CHECK-NEXT:    ret i100 1
 ;
   %1 = call i100 @llvm.ucmp(i32 7, i32 3)
   ret i100 %1
@@ -57,8 +51,7 @@ define i100 @ucmp_gt() {
 
 define i8 @ucmp_poison() {
 ; CHECK-LABEL: define i8 @ucmp_poison() {
-; CHECK-NEXT:    [[TMP1:%.*]] = call i8 @llvm.ucmp.i8.i32(i32 poison, i32 5)
-; CHECK-NEXT:    ret i8 [[TMP1]]
+; CHECK-NEXT:    ret i8 poison
 ;
   %1 = call i8 @llvm.ucmp(i32 poison, i32 5)
   ret i8 %1
@@ -66,8 +59,7 @@ define i8 @ucmp_poison() {
 
 define i8 @scmp_poison() {
 ; CHECK-LABEL: define i8 @scmp_poison() {
-; CHECK-NEXT:    [[TMP1:%.*]] = call i8 @llvm.scmp.i8.i32(i32 0, i32 poison)
-; CHECK-NEXT:    ret i8 [[TMP1]]
+; CHECK-NEXT:    ret i8 poison
 ;
   %1 = call i8 @llvm.scmp(i32 0, i32 poison)
   ret i8 %1
@@ -75,8 +67,7 @@ define i8 @scmp_poison() {
 
 define i8 @scmp_undef() {
 ; CHECK-LABEL: define i8 @scmp_undef() {
-; CHECK-NEXT:    [[TMP1:%.*]] = call i8 @llvm.scmp.i8.i32(i32 undef, i32 -12)
-; CHECK-NEXT:    ret i8 [[TMP1]]
+; CHECK-NEXT:    ret i8 0
 ;
   %1 = call i8 @llvm.scmp(i32 undef, i32 -12)
   ret i8 %1
@@ -84,8 +75,7 @@ define i8 @scmp_undef() {
 
 define i8 @ucmp_undef() {
 ; CHECK-LABEL: define i8 @ucmp_undef() {
-; CHECK-NEXT:    [[TMP1:%.*]] = call i8 @llvm.ucmp.i8.i32(i32 2, i32 undef)
-; CHECK-NEXT:    ret i8 [[TMP1]]
+; CHECK-NEXT:    ret i8 0
 ;
   %1 = call i8 @llvm.ucmp(i32 2, i32 undef)
   ret i8 %1
@@ -93,8 +83,7 @@ define i8 @ucmp_undef() {
 
 define <4 x i8> @ucmp_lt_splat() {
 ; CHECK-LABEL: define <4 x i8> @ucmp_lt_splat() {
-; CHECK-NEXT:    [[TMP1:%.*]] = call <4 x i8> @llvm.ucmp.v4i8.v4i32(<4 x i32> <i32 1, i32 1, i32 1, i32 1>, <4 x i32> <i32 3, i32 3, i32 3, i32 3>)
-; CHECK-NEXT:    ret <4 x i8> [[TMP1]]
+; CHECK-NEXT:    ret <4 x i8> <i8 -1, i8 -1, i8 -1, i8 -1>
 ;
   %1 = call <4 x i8> @llvm.ucmp(<4 x i32> splat(i32 1), <4 x i32> splat(i32 3))
   ret <4 x i8> %1
@@ -102,8 +91,7 @@ define <4 x i8> @ucmp_lt_splat() {
 
 define <4 x i8> @scmp_nonsplat() {
 ; CHECK-LABEL: define <4 x i8> @scmp_nonsplat() {
-; CHECK-NEXT:    [[TMP1:%.*]] = call <4 x i8> @llvm.scmp.v4i8.v4i32(<4 x i32> <i32 0, i32 1, i32 2, i32 3>, <4 x i32> <i32 -1, i32 1, i32 -2, i32 4>)
-; CHECK-NEXT:    ret <4 x i8> [[TMP1]]
+; CHECK-NEXT:    ret <4 x i8> <i8 1, i8 0, i8 1, i8 -1>
 ;
   %1 = call <4 x i8> @llvm.scmp(<4 x i32> <i32 0, i32 1, i32 2, i32 3>, <4 x i32> <i32 -1, i32 1, i32 -2, i32 4>)
   ret <4 x i8> %1



More information about the llvm-commits mailing list