[llvm] [InstCombine] Optimize redundant floating point comparisons in `or`/`and` inst's (PR #158097)
Rajveer Singh Bharadwaj via llvm-commits
llvm-commits at lists.llvm.org
Sun Sep 14 05:38:32 PDT 2025
https://github.com/Rajveer100 updated https://github.com/llvm/llvm-project/pull/158097
>From 52c9e4f807d6e028abbff79eec94c1cc6c02870b Mon Sep 17 00:00:00 2001
From: Rajveer <rajveer.developer at icloud.com>
Date: Thu, 11 Sep 2025 20:52:21 +0530
Subject: [PATCH] [InstCombine] Optimize redundant floating point comparisons
in `or`/`and` inst's
Resolves #157371
We can eliminate one of the `fcmp` when we have two same `olt` or `ogt`
instructions matched in `or`/`and` simplification.
---
llvm/lib/Analysis/InstructionSimplify.cpp | 29 +++++
llvm/test/CodeGen/AMDGPU/xor3-i1-const.ll | 5 +-
llvm/test/Transforms/InstCombine/or-fcmp.ll | 104 +++++++++++++++++
.../Transforms/InstCombine/redundant-fcmp.ll | 106 ++++++++++++++++++
4 files changed, 240 insertions(+), 4 deletions(-)
create mode 100644 llvm/test/Transforms/InstCombine/redundant-fcmp.ll
diff --git a/llvm/lib/Analysis/InstructionSimplify.cpp b/llvm/lib/Analysis/InstructionSimplify.cpp
index 5907e21065331..13b5e3ed4fdbb 100644
--- a/llvm/lib/Analysis/InstructionSimplify.cpp
+++ b/llvm/lib/Analysis/InstructionSimplify.cpp
@@ -1850,6 +1850,35 @@ static Value *simplifyAndOrOfFCmps(const SimplifyQuery &Q, FCmpInst *LHS,
: ConstantInt::getBool(LHS->getType(), !IsAnd);
}
+ Value *V0;
+ const APFloat *V0Op1, *V1Op1;
+ // (fcmp olt V0, V0Op1) || (fcmp olt V0, V1Op1)
+ // --> fcmp olt V0, max(V0Op1, V1Op1)
+ // (fcmp ogt V0, V0Op1) || (fcmp ogt V0, V1Op1)
+ // --> fcmp ogt V0, max(V0Op1, V1Op1)
+ //
+ // (fcmp olt V0, V0Op1) && (fcmp olt V0, V1Op1)
+ // --> fcmp olt V0, min(V0Op1, V1Op1)
+ // (fcmp ogt V0, V0Op1) && (fcmp ogt V0, V1Op1)
+ // --> fcmp ogt V0, min(V0Op1, V1Op1)
+ if (match(LHS, m_SpecificFCmp(FCmpInst::FCMP_OLT, m_Value(V0),
+ m_APFloat(V0Op1))) &&
+ match(RHS, m_SpecificFCmp(FCmpInst::FCMP_OLT, m_Specific(V0),
+ m_APFloat(V1Op1)))) {
+ if (*V0Op1 > *V1Op1)
+ return !IsAnd ? LHS : RHS;
+ if (*V1Op1 > *V0Op1)
+ return !IsAnd ? RHS : LHS;
+ } else if (match(LHS, m_SpecificFCmp(FCmpInst::FCMP_OGT, m_Value(V0),
+ m_APFloat(V0Op1))) &&
+ match(RHS, m_SpecificFCmp(FCmpInst::FCMP_OGT, m_Specific(V0),
+ m_APFloat(V1Op1)))) {
+ if (*V0Op1 < *V1Op1)
+ return !IsAnd ? LHS : RHS;
+ if (*V1Op1 < *V0Op1)
+ return !IsAnd ? RHS : LHS;
+ }
+
return nullptr;
}
diff --git a/llvm/test/CodeGen/AMDGPU/xor3-i1-const.ll b/llvm/test/CodeGen/AMDGPU/xor3-i1-const.ll
index 3059b5b445958..f76c68919d8e1 100644
--- a/llvm/test/CodeGen/AMDGPU/xor3-i1-const.ll
+++ b/llvm/test/CodeGen/AMDGPU/xor3-i1-const.ll
@@ -5,10 +5,7 @@
define amdgpu_ps float @xor3_i1_const(float inreg %arg1, i32 inreg %arg2) {
; GCN-LABEL: xor3_i1_const:
; GCN: ; %bb.0: ; %main_body
-; GCN-NEXT: v_mov_b32_e32 v0, 0x42640000
-; GCN-NEXT: v_cmp_lt_f32_e64 s[2:3], s0, 0
-; GCN-NEXT: v_cmp_lt_f32_e32 vcc, s0, v0
-; GCN-NEXT: s_and_b64 s[0:1], s[2:3], vcc
+; GCN-NEXT: v_cmp_lt_f32_e64 s[0:1], s0, 0
; GCN-NEXT: v_cndmask_b32_e64 v0, 1.0, 0, s[0:1]
; GCN-NEXT: ; return to shader part epilog
main_body:
diff --git a/llvm/test/Transforms/InstCombine/or-fcmp.ll b/llvm/test/Transforms/InstCombine/or-fcmp.ll
index 193fe4b5cc722..cf08807696fa2 100644
--- a/llvm/test/Transforms/InstCombine/or-fcmp.ll
+++ b/llvm/test/Transforms/InstCombine/or-fcmp.ll
@@ -4657,3 +4657,107 @@ define i1 @or_fcmp_reassoc4(i1 %x, double %a, double %b) {
%retval = or i1 %cmp1, %or
ret i1 %retval
}
+
+define i1 @or_fcmp_redundant_or1(ptr %arg0) {
+; CHECK-LABEL: @or_fcmp_redundant_or1(
+; CHECK-NEXT: [[V0:%.*]] = load double, ptr [[ARG0:%.*]], align 8
+; CHECK-NEXT: [[V2:%.*]] = fcmp nsz olt double [[V0]], 1.990000e+00
+; CHECK-NEXT: ret i1 [[V2]]
+;
+ %v0 = load double, ptr %arg0, align 8
+ %v1 = fcmp nsz olt double %v0, 1.000000e-02
+ %v2 = fcmp nsz olt double %v0, 1.990000e+00
+ %v3 = or i1 %v1, %v2
+ ret i1 %v3
+}
+
+define i1 @or_fcmp_redundant_or2(ptr %arg0) {
+; CHECK-LABEL: @or_fcmp_redundant_or2(
+; CHECK-NEXT: [[V0:%.*]] = load double, ptr [[ARG0:%.*]], align 8
+; CHECK-NEXT: [[V1:%.*]] = fcmp nsz olt double [[V0]], 2.300000e+00
+; CHECK-NEXT: ret i1 [[V1]]
+;
+ %v0 = load double, ptr %arg0, align 8
+ %v1 = fcmp nsz olt double %v0, 2.300000e+00
+ %v2 = fcmp nsz olt double %v0, 1.990000e+00
+ %v3 = or i1 %v1, %v2
+ ret i1 %v3
+}
+
+define i1 @or_fcmp_redundant_or3(ptr %arg0) {
+; CHECK-LABEL: @or_fcmp_redundant_or3(
+; CHECK-NEXT: [[V0:%.*]] = load double, ptr [[ARG0:%.*]], align 8
+; CHECK-NEXT: [[V1:%.*]] = fcmp nsz ogt double [[V0]], 1.000000e-02
+; CHECK-NEXT: ret i1 [[V1]]
+;
+ %v0 = load double, ptr %arg0, align 8
+ %v1 = fcmp nsz ogt double %v0, 1.000000e-02
+ %v2 = fcmp nsz ogt double %v0, 1.990000e+00
+ %v3 = or i1 %v1, %v2
+ ret i1 %v3
+}
+
+define i1 @or_fcmp_redundant_or4(ptr %arg0) {
+; CHECK-LABEL: @or_fcmp_redundant_or4(
+; CHECK-NEXT: [[V0:%.*]] = load double, ptr [[ARG0:%.*]], align 8
+; CHECK-NEXT: [[V2:%.*]] = fcmp nsz ogt double [[V0]], 1.990000e+00
+; CHECK-NEXT: ret i1 [[V2]]
+;
+ %v0 = load double, ptr %arg0, align 8
+ %v1 = fcmp nsz ogt double %v0, 2.300000e+00
+ %v2 = fcmp nsz ogt double %v0, 1.990000e+00
+ %v3 = or i1 %v1, %v2
+ ret i1 %v3
+}
+
+define i1 @or_fcmp_redundant_and1(ptr %arg0) {
+; CHECK-LABEL: @or_fcmp_redundant_and1(
+; CHECK-NEXT: [[V0:%.*]] = load double, ptr [[ARG0:%.*]], align 8
+; CHECK-NEXT: [[V1:%.*]] = fcmp nsz olt double [[V0]], 1.000000e-02
+; CHECK-NEXT: ret i1 [[V1]]
+;
+ %v0 = load double, ptr %arg0, align 8
+ %v1 = fcmp nsz olt double %v0, 1.000000e-02
+ %v2 = fcmp nsz olt double %v0, 1.990000e+00
+ %v3 = and i1 %v1, %v2
+ ret i1 %v3
+}
+
+define i1 @or_fcmp_redundant_and2(ptr %arg0) {
+; CHECK-LABEL: @or_fcmp_redundant_and2(
+; CHECK-NEXT: [[V0:%.*]] = load double, ptr [[ARG0:%.*]], align 8
+; CHECK-NEXT: [[V2:%.*]] = fcmp nsz olt double [[V0]], 1.990000e+00
+; CHECK-NEXT: ret i1 [[V2]]
+;
+ %v0 = load double, ptr %arg0, align 8
+ %v1 = fcmp nsz olt double %v0, 2.300000e+00
+ %v2 = fcmp nsz olt double %v0, 1.990000e+00
+ %v3 = and i1 %v1, %v2
+ ret i1 %v3
+}
+
+define i1 @or_fcmp_redundant_and3(ptr %arg0) {
+; CHECK-LABEL: @or_fcmp_redundant_and3(
+; CHECK-NEXT: [[V0:%.*]] = load double, ptr [[ARG0:%.*]], align 8
+; CHECK-NEXT: [[V2:%.*]] = fcmp nsz ogt double [[V0]], 1.990000e+00
+; CHECK-NEXT: ret i1 [[V2]]
+;
+ %v0 = load double, ptr %arg0, align 8
+ %v1 = fcmp nsz ogt double %v0, 1.000000e-02
+ %v2 = fcmp nsz ogt double %v0, 1.990000e+00
+ %v3 = and i1 %v1, %v2
+ ret i1 %v3
+}
+
+define i1 @or_fcmp_redundant_and4(ptr %arg0) {
+; CHECK-LABEL: @or_fcmp_redundant_and4(
+; CHECK-NEXT: [[V0:%.*]] = load double, ptr [[ARG0:%.*]], align 8
+; CHECK-NEXT: [[V1:%.*]] = fcmp nsz ogt double [[V0]], 2.300000e+00
+; CHECK-NEXT: ret i1 [[V1]]
+;
+ %v0 = load double, ptr %arg0, align 8
+ %v1 = fcmp nsz ogt double %v0, 2.300000e+00
+ %v2 = fcmp nsz ogt double %v0, 1.990000e+00
+ %v3 = and i1 %v1, %v2
+ ret i1 %v3
+}
diff --git a/llvm/test/Transforms/InstCombine/redundant-fcmp.ll b/llvm/test/Transforms/InstCombine/redundant-fcmp.ll
new file mode 100644
index 0000000000000..c2c39d94206b0
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/redundant-fcmp.ll
@@ -0,0 +1,106 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -O3 -S | FileCheck %s
+
+define i1 @or_fcmp_redundant_or1(ptr %arg0) {
+; CHECK-LABEL: @or_fcmp_redundant_or1(
+; CHECK-NEXT: [[V0:%.*]] = load double, ptr [[ARG0:%.*]], align 8
+; CHECK-NEXT: [[V2:%.*]] = fcmp nsz olt double [[V0]], 1.990000e+00
+; CHECK-NEXT: ret i1 [[V2]]
+;
+ %v0 = load double, ptr %arg0, align 8
+ %v1 = fcmp nsz olt double %v0, 1.000000e-02
+ %v2 = fcmp nsz olt double %v0, 1.990000e+00
+ %v3 = or i1 %v1, %v2
+ ret i1 %v3
+}
+
+define i1 @or_fcmp_redundant_or2(ptr %arg0) {
+; CHECK-LABEL: @or_fcmp_redundant_or2(
+; CHECK-NEXT: [[V0:%.*]] = load double, ptr [[ARG0:%.*]], align 8
+; CHECK-NEXT: [[V1:%.*]] = fcmp nsz olt double [[V0]], 2.300000e+00
+; CHECK-NEXT: ret i1 [[V1]]
+;
+ %v0 = load double, ptr %arg0, align 8
+ %v1 = fcmp nsz olt double %v0, 2.300000e+00
+ %v2 = fcmp nsz olt double %v0, 1.990000e+00
+ %v3 = or i1 %v1, %v2
+ ret i1 %v3
+}
+
+define i1 @or_fcmp_redundant_or3(ptr %arg0) {
+; CHECK-LABEL: @or_fcmp_redundant_or3(
+; CHECK-NEXT: [[V0:%.*]] = load double, ptr [[ARG0:%.*]], align 8
+; CHECK-NEXT: [[V1:%.*]] = fcmp nsz ogt double [[V0]], 1.000000e-02
+; CHECK-NEXT: ret i1 [[V1]]
+;
+ %v0 = load double, ptr %arg0, align 8
+ %v1 = fcmp nsz ogt double %v0, 1.000000e-02
+ %v2 = fcmp nsz ogt double %v0, 1.990000e+00
+ %v3 = or i1 %v1, %v2
+ ret i1 %v3
+}
+
+define i1 @or_fcmp_redundant_or4(ptr %arg0) {
+; CHECK-LABEL: @or_fcmp_redundant_or4(
+; CHECK-NEXT: [[V0:%.*]] = load double, ptr [[ARG0:%.*]], align 8
+; CHECK-NEXT: [[V2:%.*]] = fcmp nsz ogt double [[V0]], 1.990000e+00
+; CHECK-NEXT: ret i1 [[V2]]
+;
+ %v0 = load double, ptr %arg0, align 8
+ %v1 = fcmp nsz ogt double %v0, 2.300000e+00
+ %v2 = fcmp nsz ogt double %v0, 1.990000e+00
+ %v3 = or i1 %v1, %v2
+ ret i1 %v3
+}
+
+define i1 @or_fcmp_redundant_and1(ptr %arg0) {
+; CHECK-LABEL: @or_fcmp_redundant_and1(
+; CHECK-NEXT: [[V0:%.*]] = load double, ptr [[ARG0:%.*]], align 8
+; CHECK-NEXT: [[V1:%.*]] = fcmp nsz olt double [[V0]], 1.000000e-02
+; CHECK-NEXT: ret i1 [[V1]]
+;
+ %v0 = load double, ptr %arg0, align 8
+ %v1 = fcmp nsz olt double %v0, 1.000000e-02
+ %v2 = fcmp nsz olt double %v0, 1.990000e+00
+ %v3 = and i1 %v1, %v2
+ ret i1 %v3
+}
+
+define i1 @or_fcmp_redundant_and2(ptr %arg0) {
+; CHECK-LABEL: @or_fcmp_redundant_and2(
+; CHECK-NEXT: [[V0:%.*]] = load double, ptr [[ARG0:%.*]], align 8
+; CHECK-NEXT: [[V2:%.*]] = fcmp nsz olt double [[V0]], 1.990000e+00
+; CHECK-NEXT: ret i1 [[V2]]
+;
+ %v0 = load double, ptr %arg0, align 8
+ %v1 = fcmp nsz olt double %v0, 2.300000e+00
+ %v2 = fcmp nsz olt double %v0, 1.990000e+00
+ %v3 = and i1 %v1, %v2
+ ret i1 %v3
+}
+
+define i1 @or_fcmp_redundant_and3(ptr %arg0) {
+; CHECK-LABEL: @or_fcmp_redundant_and3(
+; CHECK-NEXT: [[V0:%.*]] = load double, ptr [[ARG0:%.*]], align 8
+; CHECK-NEXT: [[V2:%.*]] = fcmp nsz ogt double [[V0]], 1.990000e+00
+; CHECK-NEXT: ret i1 [[V2]]
+;
+ %v0 = load double, ptr %arg0, align 8
+ %v1 = fcmp nsz ogt double %v0, 1.000000e-02
+ %v2 = fcmp nsz ogt double %v0, 1.990000e+00
+ %v3 = and i1 %v1, %v2
+ ret i1 %v3
+}
+
+define i1 @or_fcmp_redundant_and4(ptr %arg0) {
+; CHECK-LABEL: @or_fcmp_redundant_and4(
+; CHECK-NEXT: [[V0:%.*]] = load double, ptr [[ARG0:%.*]], align 8
+; CHECK-NEXT: [[V1:%.*]] = fcmp nsz ogt double [[V0]], 2.300000e+00
+; CHECK-NEXT: ret i1 [[V1]]
+;
+ %v0 = load double, ptr %arg0, align 8
+ %v1 = fcmp nsz ogt double %v0, 2.300000e+00
+ %v2 = fcmp nsz ogt double %v0, 1.990000e+00
+ %v3 = and i1 %v1, %v2
+ ret i1 %v3
+}
More information about the llvm-commits
mailing list