[llvm] [InstCombine] fold unsigned predicates on srem result (PR #122520)
Jacob Young via llvm-commits
llvm-commits at lists.llvm.org
Fri Jan 10 12:05:16 PST 2025
https://github.com/jacobly0 created https://github.com/llvm/llvm-project/pull/122520
This allows optimization of more signed floor implementations when the divisor is a known power of two to an arithmetic shift.
Proof for the implemented optimizations:
https://alive2.llvm.org/ce/z/UtdPgv
Proof for the test cases:
https://alive2.llvm.org/ce/z/M_PBjw
>From 712a5bff44e96ce34d0c42d8699721aa09bdc30c Mon Sep 17 00:00:00 2001
From: Jacob Young <jacobly0 at users.noreply.github.com>
Date: Fri, 10 Jan 2025 14:00:49 -0500
Subject: [PATCH 1/2] [InstCombine][NFC] precommit tests for signed floor
division
---
llvm/test/Transforms/InstCombine/add.ll | 34 +++++++++++++++++++++++++
1 file changed, 34 insertions(+)
diff --git a/llvm/test/Transforms/InstCombine/add.ll b/llvm/test/Transforms/InstCombine/add.ll
index 222f87fa3a5f18..5684bee06b9164 100644
--- a/llvm/test/Transforms/InstCombine/add.ll
+++ b/llvm/test/Transforms/InstCombine/add.ll
@@ -3018,6 +3018,40 @@ define i32 @floor_sdiv_wrong_op(i32 %x, i32 %y) {
ret i32 %r
}
+define i32 @floor_sdiv_using_srem_by_8(i32 %x) {
+; CHECK-LABEL: @floor_sdiv_using_srem_by_8(
+; CHECK-NEXT: [[D:%.*]] = sdiv i32 [[X:%.*]], 8
+; CHECK-NEXT: [[R:%.*]] = srem i32 [[X]], 8
+; CHECK-NEXT: [[I:%.*]] = icmp ugt i32 [[R]], -2147483648
+; CHECK-NEXT: [[S:%.*]] = sext i1 [[I]] to i32
+; CHECK-NEXT: [[F:%.*]] = add nsw i32 [[D]], [[S]]
+; CHECK-NEXT: ret i32 [[F]]
+;
+ %d = sdiv i32 %x, 8
+ %r = srem i32 %x, 8
+ %i = icmp ugt i32 %r, -2147483648
+ %s = sext i1 %i to i32
+ %f = add i32 %d, %s
+ ret i32 %f
+}
+
+define i32 @floor_sdiv_using_srem_by_2(i32 %x) {
+; CHECK-LABEL: @floor_sdiv_using_srem_by_2(
+; CHECK-NEXT: [[D:%.*]] = sdiv i32 [[X:%.*]], 2
+; CHECK-NEXT: [[R:%.*]] = srem i32 [[X]], 2
+; CHECK-NEXT: [[I:%.*]] = icmp ugt i32 [[R]], -2147483648
+; CHECK-NEXT: [[S:%.*]] = sext i1 [[I]] to i32
+; CHECK-NEXT: [[F:%.*]] = add nsw i32 [[D]], [[S]]
+; CHECK-NEXT: ret i32 [[F]]
+;
+ %d = sdiv i32 %x, 2
+ %r = srem i32 %x, 2
+ %i = icmp ugt i32 %r, -2147483648
+ %s = sext i1 %i to i32
+ %f = add i32 %d, %s
+ ret i32 %f
+}
+
; (X s>> (BW - 1)) + (zext (X s> 0)) --> (X s>> (BW - 1)) | (zext (X != 0))
define i8 @signum_i8_i8(i8 %x) {
>From 5646920eade839e18fd392f7ab29c711884cd4ae Mon Sep 17 00:00:00 2001
From: Jacob Young <jacobly0 at users.noreply.github.com>
Date: Fri, 10 Jan 2025 14:58:10 -0500
Subject: [PATCH 2/2] [InstCombine] fold unsigned predicates on srem result
This allows optimization of more signed floor implementations when the
divisor is a known power of two to an arithmetic shift.
Proof for the implemented optimizations:
https://alive2.llvm.org/ce/z/UtdPgv
Proof for the test cases:
https://alive2.llvm.org/ce/z/M_PBjw
---
.../InstCombine/InstCombineCompares.cpp | 22 +++++++++++++++----
llvm/test/Transforms/InstCombine/add.ll | 12 ++--------
2 files changed, 20 insertions(+), 14 deletions(-)
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
index 2e457257599493..16cd4e847a3ad4 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
@@ -2679,7 +2679,8 @@ Instruction *InstCombinerImpl::foldICmpSRemConstant(ICmpInst &Cmp,
// (X % pow2C) sgt/slt 0
const ICmpInst::Predicate Pred = Cmp.getPredicate();
if (Pred != ICmpInst::ICMP_SGT && Pred != ICmpInst::ICMP_SLT &&
- Pred != ICmpInst::ICMP_EQ && Pred != ICmpInst::ICMP_NE)
+ Pred != ICmpInst::ICMP_EQ && Pred != ICmpInst::ICMP_NE &&
+ Pred != ICmpInst::ICMP_UGT && Pred != ICmpInst::ICMP_ULT)
return nullptr;
// TODO: The one-use check is standard because we do not typically want to
@@ -2692,23 +2693,36 @@ Instruction *InstCombinerImpl::foldICmpSRemConstant(ICmpInst &Cmp,
if (!match(SRem->getOperand(1), m_Power2(DivisorC)))
return nullptr;
+ Type *Ty = SRem->getType();
+ APInt SignMask = APInt::getSignMask(Ty->getScalarSizeInBits());
+ // Setting the lsb instead of adding one properly handles i1.
+ APInt SignMaskOrOne = SignMask | 1;
+
// For cmp_sgt/cmp_slt only zero valued C is handled.
// For cmp_eq/cmp_ne only positive valued C is handled.
+ // For cmp_ugt only signed min/max valued C is handled.
+ // For cmp_ult only signed min | 0/1 valued C is handled.
if (((Pred == ICmpInst::ICMP_SGT || Pred == ICmpInst::ICMP_SLT) &&
!C.isZero()) ||
((Pred == ICmpInst::ICMP_EQ || Pred == ICmpInst::ICMP_NE) &&
- !C.isStrictlyPositive()))
+ !C.isStrictlyPositive()) ||
+ (Pred == ICmpInst::ICMP_UGT && !C.isMinSignedValue() &&
+ !C.isMaxSignedValue()) ||
+ (Pred == ICmpInst::ICMP_ULT && !C.isMinSignedValue() &&
+ C != SignMaskOrOne))
return nullptr;
// Mask off the sign bit and the modulo bits (low-bits).
- Type *Ty = SRem->getType();
- APInt SignMask = APInt::getSignMask(Ty->getScalarSizeInBits());
Constant *MaskC = ConstantInt::get(Ty, SignMask | (*DivisorC - 1));
Value *And = Builder.CreateAnd(SRem->getOperand(0), MaskC);
if (Pred == ICmpInst::ICMP_EQ || Pred == ICmpInst::ICMP_NE)
return new ICmpInst(Pred, And, ConstantInt::get(Ty, C));
+ if (Pred == ICmpInst::ICMP_ULT)
+ return new ICmpInst(ICmpInst::ICMP_ULT, And,
+ ConstantInt::get(Ty, SignMaskOrOne));
+
// For 'is positive?' check that the sign-bit is clear and at least 1 masked
// bit is set. Example:
// (i8 X % 32) s> 0 --> (X & 159) s> 0
diff --git a/llvm/test/Transforms/InstCombine/add.ll b/llvm/test/Transforms/InstCombine/add.ll
index 5684bee06b9164..495f99824652d6 100644
--- a/llvm/test/Transforms/InstCombine/add.ll
+++ b/llvm/test/Transforms/InstCombine/add.ll
@@ -3020,11 +3020,7 @@ define i32 @floor_sdiv_wrong_op(i32 %x, i32 %y) {
define i32 @floor_sdiv_using_srem_by_8(i32 %x) {
; CHECK-LABEL: @floor_sdiv_using_srem_by_8(
-; CHECK-NEXT: [[D:%.*]] = sdiv i32 [[X:%.*]], 8
-; CHECK-NEXT: [[R:%.*]] = srem i32 [[X]], 8
-; CHECK-NEXT: [[I:%.*]] = icmp ugt i32 [[R]], -2147483648
-; CHECK-NEXT: [[S:%.*]] = sext i1 [[I]] to i32
-; CHECK-NEXT: [[F:%.*]] = add nsw i32 [[D]], [[S]]
+; CHECK-NEXT: [[F:%.*]] = ashr i32 [[X:%.*]], 3
; CHECK-NEXT: ret i32 [[F]]
;
%d = sdiv i32 %x, 8
@@ -3037,11 +3033,7 @@ define i32 @floor_sdiv_using_srem_by_8(i32 %x) {
define i32 @floor_sdiv_using_srem_by_2(i32 %x) {
; CHECK-LABEL: @floor_sdiv_using_srem_by_2(
-; CHECK-NEXT: [[D:%.*]] = sdiv i32 [[X:%.*]], 2
-; CHECK-NEXT: [[R:%.*]] = srem i32 [[X]], 2
-; CHECK-NEXT: [[I:%.*]] = icmp ugt i32 [[R]], -2147483648
-; CHECK-NEXT: [[S:%.*]] = sext i1 [[I]] to i32
-; CHECK-NEXT: [[F:%.*]] = add nsw i32 [[D]], [[S]]
+; CHECK-NEXT: [[F:%.*]] = ashr i32 [[X:%.*]], 1
; CHECK-NEXT: ret i32 [[F]]
;
%d = sdiv i32 %x, 2
More information about the llvm-commits
mailing list