[llvm] [PowerPC] Fix lowering when performing conditional jumps on f128 or f16 (PR #125776)
via llvm-commits
llvm-commits at lists.llvm.org
Tue Feb 4 15:03:08 PST 2025
https://github.com/liushuyu created https://github.com/llvm/llvm-project/pull/125776
This pull request fixes the ppc-isel lowering when LLVM frontend IR contains a conditional jump right after a floating-point comparison that conditions on a `f128` or `f16`:
```llvm
target datalayout = "e-m:e-Fn32-i64:64-i128:128-n32:64-S128-v256:256:256-v512:512:512"
target triple = "powerpc64le-unknown-linux-gnu"
define fp128 @test_function(fp128 %0) {
%2 = fcmp ogt fp128 %0, 0xL00000000000000000000000000000000
%3 = icmp i1 %2, false
br i1 %3, label %5, label %4
4: ; preds = %1
ret fp128 0xL00000000000000000000000000000000
5: ; preds = %1
ret fp128 0xL00000000000000000000000000000000
}
```
>From 4ab3c93da431f9146526831cb0193c40d1919459 Mon Sep 17 00:00:00 2001
From: liushuyu <liushuyu011 at gmail.com>
Date: Tue, 4 Feb 2025 15:55:20 -0700
Subject: [PATCH] [PowerPC] Fix lowering when performing conditional jumps on
f128 or f16 values
---
llvm/lib/Target/PowerPC/PPCISelLowering.cpp | 34 ++++-
llvm/lib/Target/PowerPC/PPCISelLowering.h | 1 +
llvm/test/CodeGen/PowerPC/f128-branch-cond.ll | 55 +++++++
llvm/test/CodeGen/PowerPC/f16-branch-cond.ll | 139 ++++++++++++++++++
4 files changed, 228 insertions(+), 1 deletion(-)
create mode 100644 llvm/test/CodeGen/PowerPC/f16-branch-cond.ll
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index bdc1ac7c7da5891..587ffe0520b05cf 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -223,6 +223,7 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i8, Expand);
}
+ setOperationAction(ISD::BR_CC, MVT::f16, Custom);
if (Subtarget.isISA3_0()) {
setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Legal);
setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Legal);
@@ -1309,7 +1310,7 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
setOperationAction(ISD::SETCC, MVT::f128, Custom);
setOperationAction(ISD::STRICT_FSETCC, MVT::f128, Custom);
setOperationAction(ISD::STRICT_FSETCCS, MVT::f128, Custom);
- setOperationAction(ISD::BR_CC, MVT::f128, Expand);
+ setOperationAction(ISD::BR_CC, MVT::f128, Custom);
// Lower following f128 select_cc pattern:
// select_cc x, y, tv, fv, cc -> select_cc (setcc x, y, cc), 0, tv, fv, NE
@@ -8218,6 +8219,36 @@ SDValue PPCTargetLowering::LowerTRUNCATEVector(SDValue Op,
return DAG.getVectorShuffle(WideVT, DL, Op1, Op2, ShuffV);
}
+SDValue PPCTargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
+ SDValue Chain = Op.getOperand(0);
+ ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get();
+ SDValue LHS = Op.getOperand(2);
+ SDValue RHS = Op.getOperand(3);
+ SDValue Dest = Op.getOperand(4);
+ EVT LHSVT = LHS.getValueType();
+ SDLoc dl(Op);
+
+ // Soften the cc condition with libcall if it is fp128.
+ if (LHSVT == MVT::f128) {
+ assert(!Subtarget.hasP9Vector() &&
+ "BR_CC for f128 is already legal under Power9!");
+ softenSetCCOperands(DAG, LHSVT, LHS, RHS, CC, dl, LHS, RHS, Chain);
+ if (RHS.getNode())
+ LHS = DAG.getNode(ISD::BR_CC, dl, Op.getValueType(), Chain,
+ DAG.getCondCode(CC), LHS, RHS, Dest);
+ return LHS;
+ }
+
+ if (LHSVT == MVT::f16) {
+ LHS = DAG.getFPExtendOrRound(LHS, dl, MVT::f32);
+ RHS = DAG.getFPExtendOrRound(RHS, dl, MVT::f32);
+ return DAG.getNode(ISD::BR_CC, dl, Op.getValueType(), Chain,
+ DAG.getCondCode(CC), LHS, RHS, Dest);
+ }
+
+ assert(false && "Only f16 and f128 BR_CC lowering is handled here!");
+}
+
/// LowerSELECT_CC - Lower floating point select_cc's into fsel instruction when
/// possible.
SDValue PPCTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
@@ -12122,6 +12153,7 @@ SDValue PPCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::STORE: return LowerSTORE(Op, DAG);
case ISD::TRUNCATE: return LowerTRUNCATE(Op, DAG);
case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
+ case ISD::BR_CC: return LowerBR_CC(Op, DAG);
case ISD::STRICT_FP_TO_UINT:
case ISD::STRICT_FP_TO_SINT:
case ISD::FP_TO_UINT:
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.h b/llvm/lib/Target/PowerPC/PPCISelLowering.h
index 514329bbe92d7f5..c0efdf1d5c56102 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.h
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.h
@@ -1297,6 +1297,7 @@ namespace llvm {
SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerBR_CC(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG,
const SDLoc &dl) const;
SDValue LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG) const;
diff --git a/llvm/test/CodeGen/PowerPC/f128-branch-cond.ll b/llvm/test/CodeGen/PowerPC/f128-branch-cond.ll
index 75c2bc7bc0fa05d..353b06e64fda56a 100644
--- a/llvm/test/CodeGen/PowerPC/f128-branch-cond.ll
+++ b/llvm/test/CodeGen/PowerPC/f128-branch-cond.ll
@@ -291,6 +291,61 @@ final:
ret i32 %result
}
+define i32 @test_choice5(fp128 %a) #0 {
+; LABEL: test_choice5
+; P8-LABEL: test_choice5:
+; P8: # %bb.0:
+; P8-NEXT: mflr 0
+; P8-NEXT: stdu 1, -32(1)
+; P8-NEXT: std 0, 48(1)
+; P8-NEXT: addis 3, 2, .LCPI4_0 at toc@ha
+; P8-NEXT: addi 3, 3, .LCPI4_0 at toc@l
+; P8-NEXT: lxvd2x 0, 0, 3
+; P8-NEXT: xxswapd 35, 0
+; P8-NEXT: bl __gtkf2
+; P8-NEXT: nop
+; P8-NEXT: # kill: def $r3 killed $r3 killed $x3
+; P8-NEXT: cmpwi 3, 0
+; P8-NEXT: bgt 0, .LBB4_2
+; P8-NEXT: b .LBB4_1
+; P8-NEXT: .LBB4_1: # %if.true
+; P8-NEXT: li 3, 1
+; P8-NEXT: addi 1, 1, 32
+; P8-NEXT: ld 0, 16(1)
+; P8-NEXT: mtlr 0
+; P8-NEXT: blr
+; P8-NEXT: .LBB4_2: # %if.false
+; P8-NEXT: li 3, 0
+; P8-NEXT: addi 1, 1, 32
+; P8-NEXT: ld 0, 16(1)
+; P8-NEXT: mtlr 0
+; P8-NEXT: blr
+;
+; P9-LABEL: test_choice5:
+; P9: # %bb.0:
+; P9-NEXT: addis 3, 2, .LCPI4_0 at toc@ha
+; P9-NEXT: addi 3, 3, .LCPI4_0 at toc@l
+; P9-NEXT: lxv 35, 0(3)
+; P9-NEXT: xscmpuqp 0, 2, 3
+; P9-NEXT: bgt 0, .LBB4_2
+; P9-NEXT: b .LBB4_1
+; P9-NEXT: .LBB4_1: # %if.true
+; P9-NEXT: li 3, 1
+; P9-NEXT: blr
+; P9-NEXT: .LBB4_2: # %if.false
+; P9-NEXT: li 3, 0
+; P9-NEXT: blr
+ %cmp = fcmp ogt fp128 %a, 0xL00000000000000000000000000000000
+ %not = icmp eq i1 %cmp, false
+ br i1 %not, label %if.true, label %if.false
+
+if.true:
+ ret i32 1
+
+if.false:
+ ret i32 0
+}
+
attributes #0 = { nounwind }
declare i32 @foo()
diff --git a/llvm/test/CodeGen/PowerPC/f16-branch-cond.ll b/llvm/test/CodeGen/PowerPC/f16-branch-cond.ll
new file mode 100644
index 000000000000000..190033f3e03765f
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/f16-branch-cond.ll
@@ -0,0 +1,139 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=powerpc64le-unknown-linux -mcpu=pwr8 -O0 < %s | \
+; RUN: FileCheck %s -check-prefix=P8
+; RUN: llc -mtriple=powerpc64le-unknown-linux -mcpu=pwr9 -O0 < %s | \
+; RUN: FileCheck %s -check-prefix=P9
+
+define i32 @test_choice1(half %a) #0 {
+; P8-LABEL: test_choice1:
+; P8: # %bb.0:
+; P8-NEXT: mflr 0
+; P8-NEXT: stdu 1, -32(1)
+; P8-NEXT: std 0, 48(1)
+; P8-NEXT: bl __gnu_f2h_ieee
+; P8-NEXT: nop
+; P8-NEXT: clrldi 3, 3, 48
+; P8-NEXT: bl __gnu_h2f_ieee
+; P8-NEXT: nop
+; P8-NEXT: xxlxor 0, 0, 0
+; P8-NEXT: fcmpu 0, 1, 0
+; P8-NEXT: bgt 0, .LBB0_2
+; P8-NEXT: b .LBB0_1
+; P8-NEXT: .LBB0_1: # %if.true
+; P8-NEXT: li 3, 1
+; P8-NEXT: addi 1, 1, 32
+; P8-NEXT: ld 0, 16(1)
+; P8-NEXT: mtlr 0
+; P8-NEXT: blr
+; P8-NEXT: .LBB0_2: # %if.false
+; P8-NEXT: li 3, 0
+; P8-NEXT: addi 1, 1, 32
+; P8-NEXT: ld 0, 16(1)
+; P8-NEXT: mtlr 0
+; P8-NEXT: blr
+;
+; P9-LABEL: test_choice1:
+; P9: # %bb.0:
+; P9-NEXT: xscvdphp 0, 1
+; P9-NEXT: mffprwz 3, 0
+; P9-NEXT: clrlwi 3, 3, 16
+; P9-NEXT: mtfprwz 0, 3
+; P9-NEXT: xscvhpdp 0, 0
+; P9-NEXT: xxlxor 1, 1, 1
+; P9-NEXT: fcmpu 0, 0, 1
+; P9-NEXT: bgt 0, .LBB0_2
+; P9-NEXT: b .LBB0_1
+; P9-NEXT: .LBB0_1: # %if.true
+; P9-NEXT: li 3, 1
+; P9-NEXT: blr
+; P9-NEXT: .LBB0_2: # %if.false
+; P9-NEXT: li 3, 0
+; P9-NEXT: blr
+ %cmp = fcmp ogt half %a, 0.0
+ %not = icmp eq i1 %cmp, false
+ br i1 %not, label %if.true, label %if.false
+
+if.true:
+ ret i32 1
+
+if.false:
+ ret i32 0
+}
+
+define i32 @test_choice2(half %a, half %b) #0 {
+; P8-LABEL: test_choice2:
+; P8: # %bb.0:
+; P8-NEXT: mflr 0
+; P8-NEXT: stdu 1, -64(1)
+; P8-NEXT: std 0, 80(1)
+; P8-NEXT: li 3, 52
+; P8-NEXT: stxsspx 2, 1, 3 # 4-byte Folded Spill
+; P8-NEXT: fmr 0, 1
+; P8-NEXT: lxsspx 1, 1, 3 # 4-byte Folded Reload
+; P8-NEXT: li 3, 56
+; P8-NEXT: stxsspx 0, 1, 3 # 4-byte Folded Spill
+; P8-NEXT: bl __gnu_f2h_ieee
+; P8-NEXT: nop
+; P8-NEXT: clrldi 3, 3, 48
+; P8-NEXT: bl __gnu_h2f_ieee
+; P8-NEXT: nop
+; P8-NEXT: fmr 0, 1
+; P8-NEXT: li 3, 56
+; P8-NEXT: lxsspx 1, 1, 3 # 4-byte Folded Reload
+; P8-NEXT: stfs 0, 60(1) # 4-byte Folded Spill
+; P8-NEXT: bl __gnu_f2h_ieee
+; P8-NEXT: nop
+; P8-NEXT: clrldi 3, 3, 48
+; P8-NEXT: bl __gnu_h2f_ieee
+; P8-NEXT: nop
+; P8-NEXT: lfs 0, 60(1) # 4-byte Folded Reload
+; P8-NEXT: fcmpu 0, 1, 0
+; P8-NEXT: bne 0, .LBB1_2
+; P8-NEXT: b .LBB1_1
+; P8-NEXT: .LBB1_1: # %if.true
+; P8-NEXT: li 3, 1
+; P8-NEXT: addi 1, 1, 64
+; P8-NEXT: ld 0, 16(1)
+; P8-NEXT: mtlr 0
+; P8-NEXT: blr
+; P8-NEXT: .LBB1_2: # %if.false
+; P8-NEXT: li 3, 0
+; P8-NEXT: addi 1, 1, 64
+; P8-NEXT: ld 0, 16(1)
+; P8-NEXT: mtlr 0
+; P8-NEXT: blr
+;
+; P9-LABEL: test_choice2:
+; P9: # %bb.0:
+; P9-NEXT: fmr 0, 1
+; P9-NEXT: xscvdphp 1, 2
+; P9-NEXT: mffprwz 3, 1
+; P9-NEXT: clrlwi 3, 3, 16
+; P9-NEXT: mtfprwz 1, 3
+; P9-NEXT: xscvhpdp 1, 1
+; P9-NEXT: xscvdphp 0, 0
+; P9-NEXT: mffprwz 3, 0
+; P9-NEXT: clrlwi 3, 3, 16
+; P9-NEXT: mtfprwz 0, 3
+; P9-NEXT: xscvhpdp 0, 0
+; P9-NEXT: fcmpu 0, 0, 1
+; P9-NEXT: bne 0, .LBB1_2
+; P9-NEXT: b .LBB1_1
+; P9-NEXT: .LBB1_1: # %if.true
+; P9-NEXT: li 3, 1
+; P9-NEXT: blr
+; P9-NEXT: .LBB1_2: # %if.false
+; P9-NEXT: li 3, 0
+; P9-NEXT: blr
+ %cmp = fcmp une half %a, %b
+ %not = xor i1 %cmp, true
+ br i1 %not, label %if.true, label %if.false
+
+if.true:
+ ret i32 1
+
+if.false:
+ ret i32 0
+}
+
+attributes #0 = { nounwind }
More information about the llvm-commits
mailing list