[llvm] [HEXAGON] Fix semantics of ordered FP compares (PR #131089)
via llvm-commits
llvm-commits at lists.llvm.org
Thu Mar 13 00:40:05 PDT 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-hexagon
Author: None (aankit-ca)
<details>
<summary>Changes</summary>
For the ordered FP compare bitcode instructions, the Hexagon backend was assuming that no operand could be a NaN. This assumption is flawed. This patch fixes the code-generation to produce fpcmp.uo and and appropriate bit comparison operators to account for the case when an operand to a FP compare is a NaN.
---
Full diff: https://github.com/llvm/llvm-project/pull/131089.diff
2 Files Affected:
- (modified) llvm/lib/Target/Hexagon/HexagonPatterns.td (+24-14)
- (added) llvm/test/CodeGen/Hexagon/fcmp-nan.ll (+189)
``````````diff
diff --git a/llvm/lib/Target/Hexagon/HexagonPatterns.td b/llvm/lib/Target/Hexagon/HexagonPatterns.td
index cba5ff1ab0d9b..244f204539c89 100644
--- a/llvm/lib/Target/Hexagon/HexagonPatterns.td
+++ b/llvm/lib/Target/Hexagon/HexagonPatterns.td
@@ -721,11 +721,6 @@ def: OpR_RR_pat<A2_vcmpwgtu, setugt, v2i1, V2I32>;
def: OpR_RR_pat<F2_sfcmpeq, seteq, i1, F32>;
def: OpR_RR_pat<F2_sfcmpgt, setgt, i1, F32>;
def: OpR_RR_pat<F2_sfcmpge, setge, i1, F32>;
-def: OpR_RR_pat<F2_sfcmpeq, setoeq, i1, F32>;
-def: OpR_RR_pat<F2_sfcmpgt, setogt, i1, F32>;
-def: OpR_RR_pat<F2_sfcmpge, setoge, i1, F32>;
-def: OpR_RR_pat<F2_sfcmpgt, RevCmp<setolt>, i1, F32>;
-def: OpR_RR_pat<F2_sfcmpge, RevCmp<setole>, i1, F32>;
def: OpR_RR_pat<F2_sfcmpgt, RevCmp<setlt>, i1, F32>;
def: OpR_RR_pat<F2_sfcmpge, RevCmp<setle>, i1, F32>;
def: OpR_RR_pat<F2_sfcmpuo, setuo, i1, F32>;
@@ -733,11 +728,6 @@ def: OpR_RR_pat<F2_sfcmpuo, setuo, i1, F32>;
def: OpR_RR_pat<F2_dfcmpeq, seteq, i1, F64>;
def: OpR_RR_pat<F2_dfcmpgt, setgt, i1, F64>;
def: OpR_RR_pat<F2_dfcmpge, setge, i1, F64>;
-def: OpR_RR_pat<F2_dfcmpeq, setoeq, i1, F64>;
-def: OpR_RR_pat<F2_dfcmpgt, setogt, i1, F64>;
-def: OpR_RR_pat<F2_dfcmpge, setoge, i1, F64>;
-def: OpR_RR_pat<F2_dfcmpgt, RevCmp<setolt>, i1, F64>;
-def: OpR_RR_pat<F2_dfcmpge, RevCmp<setole>, i1, F64>;
def: OpR_RR_pat<F2_dfcmpgt, RevCmp<setlt>, i1, F64>;
def: OpR_RR_pat<F2_dfcmpge, RevCmp<setle>, i1, F64>;
def: OpR_RR_pat<F2_dfcmpuo, setuo, i1, F64>;
@@ -900,15 +890,35 @@ def: OpmR_RR_pat<Cmpud<F2_dfcmpge>, RevCmp<setule>, i1, F64>;
def: OpmR_RR_pat<Cmpud<F2_dfcmpgt>, RevCmp<setult>, i1, F64>;
def: OpmR_RR_pat<Cmpudn<F2_dfcmpeq>, setune, i1, F64>;
-def: OpmR_RR_pat<Outn<F2_sfcmpeq>, setone, i1, F32>;
-def: OpmR_RR_pat<Outn<F2_sfcmpeq>, setne, i1, F32>;
+class T4<InstHexagon MI1, InstHexagon MI2, InstHexagon MI3, InstHexagon MI4>
+ : OutPatFrag<(ops node:$Rs, node:$Rt),
+ (MI1 (MI2 (MI3 $Rs, $Rt), (MI4 $Rs, $Rt)))>;
-def: OpmR_RR_pat<Outn<F2_dfcmpeq>, setone, i1, F64>;
-def: OpmR_RR_pat<Outn<F2_dfcmpeq>, setne, i1, F64>;
+class Cmpof<InstHexagon MI>: T3<C2_andn, MI, F2_sfcmpuo>;
+class Cmpod<InstHexagon MI>: T3<C2_andn, MI, F2_dfcmpuo>;
+
+class Cmpofn<InstHexagon MI>: T4<C2_not, C2_or, MI, F2_sfcmpuo>;
+class Cmpodn<InstHexagon MI>: T4<C2_not, C2_or, MI, F2_dfcmpuo>;
+
+def: OpmR_RR_pat<Cmpof<F2_sfcmpeq>, setoeq, i1, F32>;
+def: OpmR_RR_pat<Cmpof<F2_sfcmpge>, setoge, i1, F32>;
+def: OpmR_RR_pat<Cmpof<F2_sfcmpgt>, setogt, i1, F32>;
+def: OpmR_RR_pat<Cmpof<F2_sfcmpge>, RevCmp<setole>, i1, F32>;
+def: OpmR_RR_pat<Cmpof<F2_sfcmpgt>, RevCmp<setolt>, i1, F32>;
+def: OpmR_RR_pat<Cmpofn<F2_sfcmpeq>, setone, i1, F32>;
+
+def: OpmR_RR_pat<Cmpod<F2_dfcmpeq>, setoeq, i1, F64>;
+def: OpmR_RR_pat<Cmpod<F2_dfcmpge>, setoge, i1, F64>;
+def: OpmR_RR_pat<Cmpod<F2_dfcmpgt>, setogt, i1, F64>;
+def: OpmR_RR_pat<Cmpod<F2_dfcmpge>, RevCmp<setole>, i1, F64>;
+def: OpmR_RR_pat<Cmpod<F2_dfcmpgt>, RevCmp<setolt>, i1, F64>;
+def: OpmR_RR_pat<Cmpodn<F2_dfcmpeq>, setone, i1, F64>;
def: OpmR_RR_pat<Outn<F2_sfcmpuo>, seto, i1, F32>;
def: OpmR_RR_pat<Outn<F2_dfcmpuo>, seto, i1, F64>;
+def: OpmR_RR_pat<Outn<F2_sfcmpeq>, setne, i1, F32>;
+def: OpmR_RR_pat<Outn<F2_dfcmpeq>, setne, i1, F64>;
// --(6) Select ----------------------------------------------------------
//
diff --git a/llvm/test/CodeGen/Hexagon/fcmp-nan.ll b/llvm/test/CodeGen/Hexagon/fcmp-nan.ll
new file mode 100644
index 0000000000000..1469402911601
--- /dev/null
+++ b/llvm/test/CodeGen/Hexagon/fcmp-nan.ll
@@ -0,0 +1,189 @@
+; RUN: llc -march=hexagon < %s | FileCheck %s
+;
+; Test that all FP ordered compare instructions generate the correct
+; post-processing to accommodate NaNs.
+;
+; Specifically for ordered FP compares, we have to check if one of
+; the operands was a NaN to comform to the semantics of the ordered
+; fcmp bitcode instruction
+;
+target triple = "hexagon"
+
+;
+; Functions for float:
+;
+
+;
+; CHECK-DAG: [[REG0:p([0-3])]] = sfcmp.eq(r0,r1)
+; CHECK-DAG: [[REG1:p([0-3])]] = sfcmp.uo(r0,r1)
+; CHECK: [[REG2:p([0-3])]] = and([[REG0]],![[REG1]])
+; CHECK: r0 = mux([[REG2]],#1,#0)
+;
+define i32 @compare_oeq_f(float %val, float %val2) local_unnamed_addr #0 {
+entry:
+ %cmpinf = fcmp oeq float %val, %val2
+ %0 = zext i1 %cmpinf to i32
+ ret i32 %0
+}
+
+
+;
+; CHECK-DAG: [[REG0:p([0-3])]] = sfcmp.eq(r0,r1)
+; CHECK-DAG: [[REG1:p([0-3])]] = sfcmp.uo(r0,r1)
+; CHECK: [[REG2:p([0-3])]] = or([[REG0]],[[REG1]])
+; CHECK: r0 = mux([[REG2]],#0,#1)
+;
+define i32 @compare_one_f(float %val, float %val2) local_unnamed_addr #0 {
+entry:
+ %cmpinf = fcmp one float %val, %val2
+ %0 = zext i1 %cmpinf to i32
+ ret i32 %0
+}
+
+
+;
+; CHECK-DAG: [[REG0:p([0-3])]] = sfcmp.gt(r0,r1)
+; CHECK-DAG: [[REG1:p([0-3])]] = sfcmp.uo(r0,r1)
+; CHECK: [[REG2:p([0-3])]] = and([[REG0]],![[REG1]])
+; CHECK: r0 = mux([[REG2]],#1,#0)
+;
+define i32 @compare_ogt_f(float %val, float %val2) local_unnamed_addr #0 {
+entry:
+ %cmpinf = fcmp ogt float %val, %val2
+ %0 = zext i1 %cmpinf to i32
+ ret i32 %0
+}
+
+
+;
+; CHECK-DAG: [[REG0:p([0-3])]] = sfcmp.ge(r1,r0)
+; CHECK-DAG: [[REG1:p([0-3])]] = sfcmp.uo(r1,r0)
+; CHECK: [[REG2:p([0-3])]] = and([[REG0]],![[REG1]])
+; CHECK: r0 = mux([[REG2]],#1,#0)
+;
+define i32 @compare_ole_f(float %val, float %val2) local_unnamed_addr #0 {
+entry:
+ %cmpinf = fcmp ole float %val, %val2
+ %0 = zext i1 %cmpinf to i32
+ ret i32 %0
+}
+
+
+
+;
+; CHECK-DAG: [[REG0:p([0-3])]] = sfcmp.ge(r0,r1)
+; CHECK-DAG: [[REG1:p([0-3])]] = sfcmp.uo(r0,r1)
+; CHECK: [[REG2:p([0-3])]] = and([[REG0]],![[REG1]])
+; CHECK: r0 = mux([[REG2]],#1,#0)
+;
+define i32 @compare_oge_f(float %val, float %val2) local_unnamed_addr #0 {
+entry:
+ %cmpinf = fcmp oge float %val, %val2
+ %0 = zext i1 %cmpinf to i32
+ ret i32 %0
+}
+
+
+;
+; CHECK-DAG: [[REG0:p([0-3])]] = sfcmp.gt(r1,r0)
+; CHECK-DAG: [[REG1:p([0-3])]] = sfcmp.uo(r1,r0)
+; CHECK: [[REG2:p([0-3])]] = and([[REG0]],![[REG1]])
+; CHECK: r0 = mux([[REG2]],#1,#0)
+;
+define i32 @compare_olt_f(float %val, float %val2) local_unnamed_addr #0 {
+entry:
+ %cmpinf = fcmp olt float %val, %val2
+ %0 = zext i1 %cmpinf to i32
+ ret i32 %0
+}
+
+
+
+;
+; Functions for double:
+;
+
+;
+; CHECK-DAG: [[REG0:p([0-3])]] = dfcmp.eq(r1:0,r3:2)
+; CHECK-DAG: [[REG1:p([0-3])]] = dfcmp.uo(r1:0,r3:2)
+; CHECK: [[REG2:p([0-3])]] = and([[REG0]],![[REG1]])
+; CHECK: r0 = mux([[REG2]],#1,#0)
+;
+define i32 @compare_oeq_d(double %val, double %val2) local_unnamed_addr #0 {
+entry:
+ %cmpinf = fcmp oeq double %val, %val2
+ %0 = zext i1 %cmpinf to i32
+ ret i32 %0
+}
+
+
+;
+; CHECK-DAG: [[REG0:p([0-3])]] = dfcmp.eq(r1:0,r3:2)
+; CHECK-DAG: [[REG1:p([0-3])]] = dfcmp.uo(r1:0,r3:2)
+; CHECK: [[REG2:p([0-3])]] = or([[REG0]],[[REG1]])
+; CHECK: r0 = mux([[REG2]],#0,#1)
+;
+define i32 @compare_one_d(double %val, double %val2) local_unnamed_addr #0 {
+entry:
+ %cmpinf = fcmp one double %val, %val2
+ %0 = zext i1 %cmpinf to i32
+ ret i32 %0
+}
+
+
+
+;
+; CHECK-DAG: [[REG0:p([0-3])]] = dfcmp.gt(r1:0,r3:2)
+; CHECK-DAG: [[REG1:p([0-3])]] = dfcmp.uo(r1:0,r3:2)
+; CHECK: [[REG2:p([0-3])]] = and([[REG0]],![[REG1]])
+; CHECK: r0 = mux([[REG2]],#1,#0)
+;
+define i32 @compare_ogt_d(double %val, double %val2) local_unnamed_addr #0 {
+entry:
+ %cmpinf = fcmp ogt double %val, %val2
+ %0 = zext i1 %cmpinf to i32
+ ret i32 %0
+}
+
+
+;
+; CHECK-DAG: [[REG0:p([0-3])]] = dfcmp.ge(r3:2,r1:0)
+; CHECK-DAG: [[REG1:p([0-3])]] = dfcmp.uo(r3:2,r1:0)
+; CHECK: [[REG2:p([0-3])]] = and([[REG0]],![[REG1]])
+; CHECK: r0 = mux([[REG2]],#1,#0)
+;
+define i32 @compare_ole_d(double %val, double %val2) local_unnamed_addr #0 {
+entry:
+ %cmpinf = fcmp ole double %val, %val2
+ %0 = zext i1 %cmpinf to i32
+ ret i32 %0
+}
+
+
+;
+; CHECK-DAG: [[REG0:p([0-3])]] = dfcmp.ge(r1:0,r3:2)
+; CHECK-DAG: [[REG1:p([0-3])]] = dfcmp.uo(r1:0,r3:2)
+; CHECK: [[REG2:p([0-3])]] = and([[REG0]],![[REG1]])
+; CHECK: r0 = mux([[REG2]],#1,#0)
+;
+define i32 @compare_oge_d(double %val, double %val2) local_unnamed_addr #0 {
+entry:
+ %cmpinf = fcmp oge double %val, %val2
+ %0 = zext i1 %cmpinf to i32
+ ret i32 %0
+}
+
+
+;
+; CHECK-DAG: [[REG0:p([0-3])]] = dfcmp.gt(r3:2,r1:0)
+; CHECK-DAG: [[REG1:p([0-3])]] = dfcmp.uo(r3:2,r1:0)
+; CHECK: [[REG2:p([0-3])]] = and([[REG0]],![[REG1]])
+; CHECK: r0 = mux([[REG2]],#1,#0)
+;
+define i32 @compare_olt_d(double %val, double %val2) local_unnamed_addr #0 {
+entry:
+ %cmpinf = fcmp olt double %val, %val2
+ %0 = zext i1 %cmpinf to i32
+ ret i32 %0
+}
+
``````````
</details>
https://github.com/llvm/llvm-project/pull/131089
More information about the llvm-commits
mailing list