[llvm] 06c3311 - [PowerPC] Implement llvm.set.rounding intrinsic (#67302)
via llvm-commits
llvm-commits at lists.llvm.org
Mon Sep 9 23:30:36 PDT 2024
Author: Qiu Chaofan
Date: 2024-09-10T14:30:31+08:00
New Revision: 06c331163e779875ad9e74dd2da99d6b90f6e5bd
URL: https://github.com/llvm/llvm-project/commit/06c331163e779875ad9e74dd2da99d6b90f6e5bd
DIFF: https://github.com/llvm/llvm-project/commit/06c331163e779875ad9e74dd2da99d6b90f6e5bd.diff
LOG: [PowerPC] Implement llvm.set.rounding intrinsic (#67302)
Added:
Modified:
llvm/lib/Target/PowerPC/PPCISelLowering.cpp
llvm/lib/Target/PowerPC/PPCISelLowering.h
llvm/test/CodeGen/PowerPC/frounds.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index 8b794656d5c219..fd03eeba911490 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -435,13 +435,13 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
} else {
setOperationAction(ISD::FMA , MVT::f64, Legal);
setOperationAction(ISD::FMA , MVT::f32, Legal);
+ setOperationAction(ISD::GET_ROUNDING, MVT::i32, Custom);
+ setOperationAction(ISD::SET_ROUNDING, MVT::Other, Custom);
}
if (Subtarget.hasSPE())
setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand);
- setOperationAction(ISD::GET_ROUNDING, MVT::i32, Custom);
-
// If we're enabling GP optimizations, use hardware square root
if (!Subtarget.hasFSQRT() &&
!(TM.Options.UnsafeFPMath && Subtarget.hasFRSQRTE() &&
@@ -9060,6 +9060,103 @@ SDValue PPCTargetLowering::LowerINT_TO_FP(SDValue Op,
return FP;
}
+SDValue PPCTargetLowering::LowerSET_ROUNDING(SDValue Op,
+ SelectionDAG &DAG) const {
+ SDLoc Dl(Op);
+ MachineFunction &MF = DAG.getMachineFunction();
+ EVT PtrVT = getPointerTy(MF.getDataLayout());
+ SDValue Chain = Op.getOperand(0);
+
+ // If requested mode is constant, just use simpler mtfsb/mffscrni
+ if (auto *CVal = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
+ uint64_t Mode = CVal->getZExtValue();
+ assert(Mode < 4 && "Unsupported rounding mode!");
+ unsigned InternalRnd = Mode ^ (~(Mode >> 1) & 1);
+ if (Subtarget.isISA3_0())
+ return SDValue(
+ DAG.getMachineNode(
+ PPC::MFFSCRNI, Dl, {MVT::f64, MVT::Other},
+ {DAG.getConstant(InternalRnd, Dl, MVT::i32, true), Chain}),
+ 1);
+ SDNode *SetHi = DAG.getMachineNode(
+ (InternalRnd & 2) ? PPC::MTFSB1 : PPC::MTFSB0, Dl, MVT::Other,
+ {DAG.getConstant(30, Dl, MVT::i32, true), Chain});
+ SDNode *SetLo = DAG.getMachineNode(
+ (InternalRnd & 1) ? PPC::MTFSB1 : PPC::MTFSB0, Dl, MVT::Other,
+ {DAG.getConstant(31, Dl, MVT::i32, true), SDValue(SetHi, 0)});
+ return SDValue(SetLo, 0);
+ }
+
+ // Use x ^ (~(x >> 1) & 1) to transform LLVM rounding mode to Power format.
+ SDValue One = DAG.getConstant(1, Dl, MVT::i32);
+ SDValue SrcFlag = DAG.getNode(ISD::AND, Dl, MVT::i32, Op.getOperand(1),
+ DAG.getConstant(3, Dl, MVT::i32));
+ SDValue DstFlag = DAG.getNode(
+ ISD::XOR, Dl, MVT::i32, SrcFlag,
+ DAG.getNode(ISD::AND, Dl, MVT::i32,
+ DAG.getNOT(Dl,
+ DAG.getNode(ISD::SRL, Dl, MVT::i32, SrcFlag, One),
+ MVT::i32),
+ One));
+ // For Power9, there's faster mffscrn, and we don't need to read FPSCR
+ SDValue MFFS;
+ if (!Subtarget.isISA3_0()) {
+ MFFS = DAG.getNode(PPCISD::MFFS, Dl, {MVT::f64, MVT::Other}, Chain);
+ Chain = MFFS.getValue(1);
+ }
+ SDValue NewFPSCR;
+ if (Subtarget.isPPC64()) {
+ if (Subtarget.isISA3_0()) {
+ NewFPSCR = DAG.getAnyExtOrTrunc(DstFlag, Dl, MVT::i64);
+ } else {
+ // Set the last two bits (rounding mode) of bitcasted FPSCR.
+ SDNode *InsertRN = DAG.getMachineNode(
+ PPC::RLDIMI, Dl, MVT::i64,
+ {DAG.getNode(ISD::BITCAST, Dl, MVT::i64, MFFS),
+ DAG.getNode(ISD::ZERO_EXTEND, Dl, MVT::i64, DstFlag),
+ DAG.getTargetConstant(0, Dl, MVT::i32),
+ DAG.getTargetConstant(62, Dl, MVT::i32)});
+ NewFPSCR = SDValue(InsertRN, 0);
+ }
+ NewFPSCR = DAG.getNode(ISD::BITCAST, Dl, MVT::f64, NewFPSCR);
+ } else {
+ // In 32-bit mode, store f64, load and update the lower half.
+ int SSFI = MF.getFrameInfo().CreateStackObject(8, Align(8), false);
+ SDValue StackSlot = DAG.getFrameIndex(SSFI, PtrVT);
+ SDValue Addr = Subtarget.isLittleEndian()
+ ? StackSlot
+ : DAG.getNode(ISD::ADD, Dl, PtrVT, StackSlot,
+ DAG.getConstant(4, Dl, PtrVT));
+ if (Subtarget.isISA3_0()) {
+ Chain = DAG.getStore(Chain, Dl, DstFlag, Addr, MachinePointerInfo());
+ } else {
+ Chain = DAG.getStore(Chain, Dl, MFFS, StackSlot, MachinePointerInfo());
+ SDValue Tmp =
+ DAG.getLoad(MVT::i32, Dl, Chain, Addr, MachinePointerInfo());
+ Chain = Tmp.getValue(1);
+ Tmp = SDValue(DAG.getMachineNode(
+ PPC::RLWIMI, Dl, MVT::i32,
+ {Tmp, DstFlag, DAG.getTargetConstant(0, Dl, MVT::i32),
+ DAG.getTargetConstant(30, Dl, MVT::i32),
+ DAG.getTargetConstant(31, Dl, MVT::i32)}),
+ 0);
+ Chain = DAG.getStore(Chain, Dl, Tmp, Addr, MachinePointerInfo());
+ }
+ NewFPSCR =
+ DAG.getLoad(MVT::f64, Dl, Chain, StackSlot, MachinePointerInfo());
+ Chain = NewFPSCR.getValue(1);
+ }
+ if (Subtarget.isISA3_0())
+ return SDValue(DAG.getMachineNode(PPC::MFFSCRN, Dl, {MVT::f64, MVT::Other},
+ {NewFPSCR, Chain}),
+ 1);
+ SDValue Zero = DAG.getConstant(0, Dl, MVT::i32, true);
+ SDNode *MTFSF = DAG.getMachineNode(
+ PPC::MTFSF, Dl, MVT::Other,
+ {DAG.getConstant(255, Dl, MVT::i32, true), NewFPSCR, Zero, Zero, Chain});
+ return SDValue(MTFSF, 0);
+}
+
SDValue PPCTargetLowering::LowerGET_ROUNDING(SDValue Op,
SelectionDAG &DAG) const {
SDLoc dl(Op);
@@ -11921,6 +12018,8 @@ SDValue PPCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::UINT_TO_FP:
case ISD::SINT_TO_FP: return LowerINT_TO_FP(Op, DAG);
case ISD::GET_ROUNDING: return LowerGET_ROUNDING(Op, DAG);
+ case ISD::SET_ROUNDING:
+ return LowerSET_ROUNDING(Op, DAG);
// Lower 64-bit shifts.
case ISD::SHL_PARTS: return LowerSHL_PARTS(Op, DAG);
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.h b/llvm/lib/Target/PowerPC/PPCISelLowering.h
index 0bdfdcd15441f4..8907c3c5a81c3c 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.h
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.h
@@ -1296,6 +1296,7 @@ namespace llvm {
const SDLoc &dl) const;
SDValue LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerGET_ROUNDING(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerSET_ROUNDING(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerSHL_PARTS(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerSRL_PARTS(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerSRA_PARTS(SDValue Op, SelectionDAG &DAG) const;
diff --git a/llvm/test/CodeGen/PowerPC/frounds.ll b/llvm/test/CodeGen/PowerPC/frounds.ll
index c1f7181b30f3f6..cd2d7813af3962 100644
--- a/llvm/test/CodeGen/PowerPC/frounds.ll
+++ b/llvm/test/CodeGen/PowerPC/frounds.ll
@@ -5,14 +5,17 @@
; RUN: -check-prefix=PPC64
; RUN: llc -verify-machineinstrs < %s -mtriple=powerpc64le -mattr=-direct-move \
; RUN: | FileCheck %s -check-prefix=PPC64LE
+; RUN: llc -verify-machineinstrs < %s -mtriple=ppc32-- -mcpu=pwr9 \
+; RUN: | FileCheck %s -check-prefix=P9_32
+; RUN: llc -verify-machineinstrs < %s -mtriple=powerpc64le -mcpu=pwr9 \
+; RUN: | FileCheck %s -check-prefix=P9
; RUN: llc -verify-machineinstrs < %s -mtriple=powerpc64le | FileCheck %s \
; RUN: -check-prefix=DM
-define i32 @foo() {
+define i32 @foo() #0 {
; PPC32-LABEL: foo:
; PPC32: # %bb.0: # %entry
; PPC32-NEXT: stwu 1, -32(1)
-; PPC32-NEXT: .cfi_def_cfa_offset 32
; PPC32-NEXT: mffs 0
; PPC32-NEXT: stfd 0, 16(1)
; PPC32-NEXT: lwz 3, 20(1)
@@ -51,6 +54,33 @@ define i32 @foo() {
; PPC64LE-NEXT: stw 3, -4(1)
; PPC64LE-NEXT: blr
;
+; P9_32-LABEL: foo:
+; P9_32: # %bb.0: # %entry
+; P9_32-NEXT: stwu 1, -32(1)
+; P9_32-NEXT: mffs 0
+; P9_32-NEXT: stfd 0, 16(1)
+; P9_32-NEXT: lwz 3, 20(1)
+; P9_32-NEXT: clrlwi 4, 3, 30
+; P9_32-NEXT: not 3, 3
+; P9_32-NEXT: rlwinm 3, 3, 31, 31, 31
+; P9_32-NEXT: xor 3, 4, 3
+; P9_32-NEXT: stw 3, 24(1)
+; P9_32-NEXT: stw 3, 28(1)
+; P9_32-NEXT: addi 1, 1, 32
+; P9_32-NEXT: blr
+;
+; P9-LABEL: foo:
+; P9: # %bb.0: # %entry
+; P9-NEXT: mffs 0
+; P9-NEXT: mffprd 3, 0
+; P9-NEXT: clrlwi 4, 3, 30
+; P9-NEXT: not 3, 3
+; P9-NEXT: rlwinm 3, 3, 31, 31, 31
+; P9-NEXT: xor 3, 4, 3
+; P9-NEXT: stw 3, -8(1)
+; P9-NEXT: stw 3, -4(1)
+; P9-NEXT: blr
+;
; DM-LABEL: foo:
; DM: # %bb.0: # %entry
; DM-NEXT: mffs 0
@@ -77,4 +107,254 @@ return: ; preds = %entry
ret i32 %retval3
}
-declare i32 @llvm.get.rounding() nounwind
+define void @setrnd_tozero() #0 {
+; PPC32-LABEL: setrnd_tozero:
+; PPC32: # %bb.0: # %entry
+; PPC32-NEXT: mtfsb0 30
+; PPC32-NEXT: mtfsb1 31
+; PPC32-NEXT: blr
+;
+; PPC64-LABEL: setrnd_tozero:
+; PPC64: # %bb.0: # %entry
+; PPC64-NEXT: mtfsb0 30
+; PPC64-NEXT: mtfsb1 31
+; PPC64-NEXT: blr
+;
+; PPC64LE-LABEL: setrnd_tozero:
+; PPC64LE: # %bb.0: # %entry
+; PPC64LE-NEXT: mtfsb0 30
+; PPC64LE-NEXT: mtfsb1 31
+; PPC64LE-NEXT: blr
+;
+; P9_32-LABEL: setrnd_tozero:
+; P9_32: # %bb.0: # %entry
+; P9_32-NEXT: mffscrni 0, 1
+; P9_32-NEXT: blr
+;
+; P9-LABEL: setrnd_tozero:
+; P9: # %bb.0: # %entry
+; P9-NEXT: mffscrni 0, 1
+; P9-NEXT: blr
+;
+; DM-LABEL: setrnd_tozero:
+; DM: # %bb.0: # %entry
+; DM-NEXT: mtfsb0 30
+; DM-NEXT: mtfsb1 31
+; DM-NEXT: blr
+entry:
+ call void @llvm.set.rounding(i32 0)
+ ret void
+}
+
+define void @setrnd_tonearest_tieeven() #0 {
+; PPC32-LABEL: setrnd_tonearest_tieeven:
+; PPC32: # %bb.0: # %entry
+; PPC32-NEXT: mtfsb0 30
+; PPC32-NEXT: mtfsb0 31
+; PPC32-NEXT: blr
+;
+; PPC64-LABEL: setrnd_tonearest_tieeven:
+; PPC64: # %bb.0: # %entry
+; PPC64-NEXT: mtfsb0 30
+; PPC64-NEXT: mtfsb0 31
+; PPC64-NEXT: blr
+;
+; PPC64LE-LABEL: setrnd_tonearest_tieeven:
+; PPC64LE: # %bb.0: # %entry
+; PPC64LE-NEXT: mtfsb0 30
+; PPC64LE-NEXT: mtfsb0 31
+; PPC64LE-NEXT: blr
+;
+; P9_32-LABEL: setrnd_tonearest_tieeven:
+; P9_32: # %bb.0: # %entry
+; P9_32-NEXT: mffscrni 0, 0
+; P9_32-NEXT: blr
+;
+; P9-LABEL: setrnd_tonearest_tieeven:
+; P9: # %bb.0: # %entry
+; P9-NEXT: mffscrni 0, 0
+; P9-NEXT: blr
+;
+; DM-LABEL: setrnd_tonearest_tieeven:
+; DM: # %bb.0: # %entry
+; DM-NEXT: mtfsb0 30
+; DM-NEXT: mtfsb0 31
+; DM-NEXT: blr
+entry:
+ call void @llvm.set.rounding(i32 1)
+ ret void
+}
+
+define void @setrnd_toposinf() #0 {
+; PPC32-LABEL: setrnd_toposinf:
+; PPC32: # %bb.0: # %entry
+; PPC32-NEXT: mtfsb1 30
+; PPC32-NEXT: mtfsb0 31
+; PPC32-NEXT: blr
+;
+; PPC64-LABEL: setrnd_toposinf:
+; PPC64: # %bb.0: # %entry
+; PPC64-NEXT: mtfsb1 30
+; PPC64-NEXT: mtfsb0 31
+; PPC64-NEXT: blr
+;
+; PPC64LE-LABEL: setrnd_toposinf:
+; PPC64LE: # %bb.0: # %entry
+; PPC64LE-NEXT: mtfsb1 30
+; PPC64LE-NEXT: mtfsb0 31
+; PPC64LE-NEXT: blr
+;
+; P9_32-LABEL: setrnd_toposinf:
+; P9_32: # %bb.0: # %entry
+; P9_32-NEXT: mffscrni 0, 2
+; P9_32-NEXT: blr
+;
+; P9-LABEL: setrnd_toposinf:
+; P9: # %bb.0: # %entry
+; P9-NEXT: mffscrni 0, 2
+; P9-NEXT: blr
+;
+; DM-LABEL: setrnd_toposinf:
+; DM: # %bb.0: # %entry
+; DM-NEXT: mtfsb1 30
+; DM-NEXT: mtfsb0 31
+; DM-NEXT: blr
+entry:
+ call void @llvm.set.rounding(i32 2)
+ ret void
+}
+
+define void @setrnd_toneginf() #0 {
+; PPC32-LABEL: setrnd_toneginf:
+; PPC32: # %bb.0: # %entry
+; PPC32-NEXT: mtfsb1 30
+; PPC32-NEXT: mtfsb1 31
+; PPC32-NEXT: blr
+;
+; PPC64-LABEL: setrnd_toneginf:
+; PPC64: # %bb.0: # %entry
+; PPC64-NEXT: mtfsb1 30
+; PPC64-NEXT: mtfsb1 31
+; PPC64-NEXT: blr
+;
+; PPC64LE-LABEL: setrnd_toneginf:
+; PPC64LE: # %bb.0: # %entry
+; PPC64LE-NEXT: mtfsb1 30
+; PPC64LE-NEXT: mtfsb1 31
+; PPC64LE-NEXT: blr
+;
+; P9_32-LABEL: setrnd_toneginf:
+; P9_32: # %bb.0: # %entry
+; P9_32-NEXT: mffscrni 0, 3
+; P9_32-NEXT: blr
+;
+; P9-LABEL: setrnd_toneginf:
+; P9: # %bb.0: # %entry
+; P9-NEXT: mffscrni 0, 3
+; P9-NEXT: blr
+;
+; DM-LABEL: setrnd_toneginf:
+; DM: # %bb.0: # %entry
+; DM-NEXT: mtfsb1 30
+; DM-NEXT: mtfsb1 31
+; DM-NEXT: blr
+entry:
+ call void @llvm.set.rounding(i32 3)
+ ret void
+}
+
+define void @setrnd_var(i32 %x) #0 {
+; PPC32-LABEL: setrnd_var:
+; PPC32: # %bb.0: # %entry
+; PPC32-NEXT: stwu 1, -16(1)
+; PPC32-NEXT: mffs 0
+; PPC32-NEXT: stfd 0, 8(1)
+; PPC32-NEXT: clrlwi 4, 3, 30
+; PPC32-NEXT: lwz 5, 12(1)
+; PPC32-NEXT: rlwinm 3, 3, 31, 31, 31
+; PPC32-NEXT: xor 3, 3, 4
+; PPC32-NEXT: xori 3, 3, 1
+; PPC32-NEXT: rlwimi 5, 3, 0, 30, 31
+; PPC32-NEXT: stw 5, 12(1)
+; PPC32-NEXT: lfd 0, 8(1)
+; PPC32-NEXT: mtfsf 255, 0
+; PPC32-NEXT: addi 1, 1, 16
+; PPC32-NEXT: blr
+;
+; PPC64-LABEL: setrnd_var:
+; PPC64: # %bb.0: # %entry
+; PPC64-NEXT: mffs 0
+; PPC64-NEXT: stfd 0, -16(1)
+; PPC64-NEXT: clrlwi 4, 3, 30
+; PPC64-NEXT: rlwinm 3, 3, 31, 31, 31
+; PPC64-NEXT: ld 5, -16(1)
+; PPC64-NEXT: xor 3, 3, 4
+; PPC64-NEXT: xori 3, 3, 1
+; PPC64-NEXT: clrldi 3, 3, 32
+; PPC64-NEXT: rldimi 5, 3, 0, 62
+; PPC64-NEXT: std 5, -8(1)
+; PPC64-NEXT: lfd 0, -8(1)
+; PPC64-NEXT: mtfsf 255, 0
+; PPC64-NEXT: blr
+;
+; PPC64LE-LABEL: setrnd_var:
+; PPC64LE: # %bb.0: # %entry
+; PPC64LE-NEXT: mffs 0
+; PPC64LE-NEXT: clrlwi 4, 3, 30
+; PPC64LE-NEXT: rlwinm 3, 3, 31, 31, 31
+; PPC64LE-NEXT: stfd 0, -16(1)
+; PPC64LE-NEXT: xor 3, 3, 4
+; PPC64LE-NEXT: ld 4, -16(1)
+; PPC64LE-NEXT: xori 3, 3, 1
+; PPC64LE-NEXT: clrldi 3, 3, 32
+; PPC64LE-NEXT: rldimi 4, 3, 0, 62
+; PPC64LE-NEXT: std 4, -8(1)
+; PPC64LE-NEXT: lfd 0, -8(1)
+; PPC64LE-NEXT: mtfsf 255, 0
+; PPC64LE-NEXT: blr
+;
+; P9_32-LABEL: setrnd_var:
+; P9_32: # %bb.0: # %entry
+; P9_32-NEXT: stwu 1, -16(1)
+; P9_32-NEXT: clrlwi 4, 3, 30
+; P9_32-NEXT: rlwinm 3, 3, 31, 31, 31
+; P9_32-NEXT: xor 3, 3, 4
+; P9_32-NEXT: xori 3, 3, 1
+; P9_32-NEXT: stw 3, 12(1)
+; P9_32-NEXT: lfd 0, 8(1)
+; P9_32-NEXT: mffscrn 0, 0
+; P9_32-NEXT: addi 1, 1, 16
+; P9_32-NEXT: blr
+;
+; P9-LABEL: setrnd_var:
+; P9: # %bb.0: # %entry
+; P9-NEXT: clrlwi 4, 3, 30
+; P9-NEXT: rlwinm 3, 3, 31, 31, 31
+; P9-NEXT: xor 3, 3, 4
+; P9-NEXT: xori 3, 3, 1
+; P9-NEXT: mtfprd 0, 3
+; P9-NEXT: mffscrn 0, 0
+; P9-NEXT: blr
+;
+; DM-LABEL: setrnd_var:
+; DM: # %bb.0: # %entry
+; DM-NEXT: clrlwi 4, 3, 30
+; DM-NEXT: rlwinm 3, 3, 31, 31, 31
+; DM-NEXT: xor 3, 3, 4
+; DM-NEXT: xori 3, 3, 1
+; DM-NEXT: clrldi 3, 3, 32
+; DM-NEXT: mffs 0
+; DM-NEXT: mffprd 4, 0
+; DM-NEXT: rldimi 4, 3, 0, 62
+; DM-NEXT: mtfprd 0, 4
+; DM-NEXT: mtfsf 255, 0
+; DM-NEXT: blr
+entry:
+ call void @llvm.set.rounding(i32 %x)
+ ret void
+}
+
+declare i32 @llvm.get.rounding() #0
+declare void @llvm.set.rounding(i32) #0
+
+attributes #0 = { nounwind }
More information about the llvm-commits
mailing list