[llvm] [X86][GlobalISel] Added support for llvm.get.rounding (PR #147716)
via llvm-commits
llvm-commits at lists.llvm.org
Wed Jul 9 08:32:07 PDT 2025
https://github.com/JaydeepChauhan14 updated https://github.com/llvm/llvm-project/pull/147716
>From 821e55473a58b42421f745c1614a1fed65bf5e10 Mon Sep 17 00:00:00 2001
From: Chauhan Jaydeep Ashwinbhai <chauhan.jaydeep.ashwinbhai at intel.com>
Date: Wed, 9 Jul 2025 05:56:39 -0700
Subject: [PATCH 1/4] [X86][GlobalISel] Added support for llvm.get.rounding
---
.../CodeGen/GlobalISel/MachineIRBuilder.h | 5 +
llvm/include/llvm/Support/TargetOpcodes.def | 2 +
llvm/include/llvm/Target/GenericOpcodes.td | 6 +
llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp | 3 +
.../lib/Target/X86/GISel/X86LegalizerInfo.cpp | 81 ++++
llvm/lib/Target/X86/GISel/X86LegalizerInfo.h | 3 +
llvm/lib/Target/X86/X86InstrGISel.td | 8 +
llvm/test/CodeGen/X86/flt-rounds.ll | 442 ++++++++++++------
llvm/test/MC/ELF/mc-dump.s | 2 +-
.../GlobalISelEmitter/GlobalISelEmitter.td | 2 +-
10 files changed, 417 insertions(+), 137 deletions(-)
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h b/llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h
index 25fef99699fdf..01b94ac7218a5 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h
@@ -2382,6 +2382,11 @@ class LLVM_ABI MachineIRBuilder {
return buildInstr(TargetOpcode::G_RESET_FPMODE, {}, {});
}
+ /// Build and insert \p Dst = G_GET_ROUNDING
+ MachineInstrBuilder buildGetRounding(const DstOp &Dst) {
+ return buildInstr(TargetOpcode::G_GET_ROUNDING, {Dst}, {});
+ }
+
virtual MachineInstrBuilder
buildInstr(unsigned Opc, ArrayRef<DstOp> DstOps, ArrayRef<SrcOp> SrcOps,
std::optional<unsigned> Flags = std::nullopt);
diff --git a/llvm/include/llvm/Support/TargetOpcodes.def b/llvm/include/llvm/Support/TargetOpcodes.def
index 92fd60e03112a..5dfa1d5095b86 100644
--- a/llvm/include/llvm/Support/TargetOpcodes.def
+++ b/llvm/include/llvm/Support/TargetOpcodes.def
@@ -735,6 +735,8 @@ HANDLE_TARGET_OPCODE(G_GET_FPMODE)
HANDLE_TARGET_OPCODE(G_SET_FPMODE)
HANDLE_TARGET_OPCODE(G_RESET_FPMODE)
+HANDLE_TARGET_OPCODE(G_GET_ROUNDING)
+
/// Generic pointer offset
HANDLE_TARGET_OPCODE(G_PTR_ADD)
diff --git a/llvm/include/llvm/Target/GenericOpcodes.td b/llvm/include/llvm/Target/GenericOpcodes.td
index a462b07461b41..a4a23cde42335 100644
--- a/llvm/include/llvm/Target/GenericOpcodes.td
+++ b/llvm/include/llvm/Target/GenericOpcodes.td
@@ -1246,6 +1246,12 @@ def G_READSTEADYCOUNTER : GenericInstruction {
let hasSideEffects = true;
}
+def G_GET_ROUNDING : GenericInstruction {
+ let OutOperandList = (outs type0:$dst);
+ let InOperandList = (ins);
+ let hasSideEffects = true;
+}
+
//------------------------------------------------------------------------------
// Memory ops
//------------------------------------------------------------------------------
diff --git a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
index ef39fc74554c9..d7280eaba2440 100644
--- a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
@@ -2593,6 +2593,9 @@ bool IRTranslator::translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID,
case Intrinsic::reset_fpmode:
MIRBuilder.buildResetFPMode();
return true;
+ case Intrinsic::get_rounding:
+ MIRBuilder.buildGetRounding(getOrCreateVReg(CI));
+ return true;
case Intrinsic::vscale: {
MIRBuilder.buildVScale(getOrCreateVReg(CI), 1);
return true;
diff --git a/llvm/lib/Target/X86/GISel/X86LegalizerInfo.cpp b/llvm/lib/Target/X86/GISel/X86LegalizerInfo.cpp
index 8e304c07ed5cb..7fe58539cd4ec 100644
--- a/llvm/lib/Target/X86/GISel/X86LegalizerInfo.cpp
+++ b/llvm/lib/Target/X86/GISel/X86LegalizerInfo.cpp
@@ -17,6 +17,7 @@
#include "llvm/CodeGen/GlobalISel/LegalizerHelper.h"
#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/TargetOpcodes.h"
#include "llvm/CodeGen/ValueTypes.h"
#include "llvm/IR/DerivedTypes.h"
@@ -108,6 +109,8 @@ X86LegalizerInfo::X86LegalizerInfo(const X86Subtarget &STI,
.legalFor(HasSSE2 || UseX87, {s64})
.legalFor(UseX87, {s80});
+ getActionDefinitionsBuilder(G_GET_ROUNDING).customFor({s32});
+
// merge/unmerge
for (unsigned Op : {G_MERGE_VALUES, G_UNMERGE_VALUES}) {
unsigned BigTyIdx = Op == G_MERGE_VALUES ? 0 : 1;
@@ -611,6 +614,8 @@ bool X86LegalizerInfo::legalizeCustom(LegalizerHelper &Helper, MachineInstr &MI,
return legalizeSITOFP(MI, MRI, Helper);
case TargetOpcode::G_FPTOSI:
return legalizeFPTOSI(MI, MRI, Helper);
+ case TargetOpcode::G_GET_ROUNDING:
+ return legalizeGETROUNDING(MI, MRI, Helper);
}
llvm_unreachable("expected switch to return");
}
@@ -777,6 +782,82 @@ bool X86LegalizerInfo::legalizeNarrowingStore(MachineInstr &MI,
return true;
}
+bool X86LegalizerInfo::legalizeGETROUNDING(MachineInstr &MI,
+ MachineRegisterInfo &MRI,
+ LegalizerHelper &Helper) const {
+ /*
+ The rounding mode is in bits 11:10 of FPSR, and has the following
+ settings:
+ 00 Round to nearest
+ 01 Round to -inf
+ 10 Round to +inf
+ 11 Round to 0
+
+ GET_ROUNDING, on the other hand, expects the following:
+ -1 Undefined
+ 0 Round to 0
+ 1 Round to nearest
+ 2 Round to +inf
+ 3 Round to -inf
+
+ To perform the conversion, we use a packed lookup table of the four 2-bit
+ values that we can index by FPSP[11:10]
+ 0x2d --> (0b00,10,11,01) --> (0,2,3,1) >> FPSR[11:10]
+
+ (0x2d >> ((FPSR >> 9) & 6)) & 3
+ */
+
+ MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
+ MachineFunction &MF = MIRBuilder.getMF();
+ Register Dst = MI.getOperand(0).getReg();
+ LLT DstTy = MRI.getType(Dst);
+ const LLT s8 = LLT::scalar(8);
+ const LLT s16 = LLT::scalar(16);
+ const LLT s32 = LLT::scalar(32);
+
+ // Save FP Control Word to stack slot
+ int MemSize = 2;
+ Align Alignment = Align(2);
+ MachinePointerInfo PtrInfo;
+ auto StackTemp = Helper.createStackTemporary(TypeSize::getFixed(MemSize),
+ Alignment, PtrInfo);
+ Register StackPtr = StackTemp.getReg(0);
+
+ auto StoreMMO = MF.getMachineMemOperand(PtrInfo, MachineMemOperand::MOStore,
+ MemSize, Alignment);
+
+ // Store FP Control Word to stack slot using G_FNSTCW16
+ MIRBuilder.buildInstr(X86::G_FNSTCW16)
+ .addUse(StackPtr)
+ .addMemOperand(StoreMMO);
+
+ // Load FP Control Word from stack slot
+ auto LoadMMO = MF.getMachineMemOperand(PtrInfo, MachineMemOperand::MOLoad,
+ MemSize, Alignment);
+
+ auto CWD32 =
+ MIRBuilder.buildZExt(s32, MIRBuilder.buildLoad(s16, StackPtr, *LoadMMO));
+ auto Shifted8 = MIRBuilder.buildTrunc(
+ s8, MIRBuilder.buildLShr(s32, CWD32, MIRBuilder.buildConstant(s8, 9)));
+ auto Masked32 = MIRBuilder.buildZExt(
+ s32, MIRBuilder.buildAnd(s8, Shifted8, MIRBuilder.buildConstant(s8, 6)));
+
+ // LUT is a packed lookup table (0x2d) used to map the 2-bit x87 FPU rounding
+ // mode (from bits 11:10 of the control word) to the values expected by
+ // GET_ROUNDING. The mapping is performed by shifting LUT right by the
+ // extracted rounding mode and masking the result with 3 to obtain the final
+ auto LUT = MIRBuilder.buildConstant(s32, 0x2d);
+ auto LUTShifted = MIRBuilder.buildLShr(s32, LUT, Masked32);
+ auto RetVal =
+ MIRBuilder.buildAnd(s32, LUTShifted, MIRBuilder.buildConstant(s32, 3));
+ auto RetValTrunc = MIRBuilder.buildZExtOrTrunc(DstTy, RetVal);
+
+ MIRBuilder.buildCopy(Dst, RetValTrunc);
+
+ MI.eraseFromParent();
+ return true;
+}
+
bool X86LegalizerInfo::legalizeIntrinsic(LegalizerHelper &Helper,
MachineInstr &MI) const {
return true;
diff --git a/llvm/lib/Target/X86/GISel/X86LegalizerInfo.h b/llvm/lib/Target/X86/GISel/X86LegalizerInfo.h
index 1ba82674ed4c6..0003552d70ee0 100644
--- a/llvm/lib/Target/X86/GISel/X86LegalizerInfo.h
+++ b/llvm/lib/Target/X86/GISel/X86LegalizerInfo.h
@@ -54,6 +54,9 @@ class X86LegalizerInfo : public LegalizerInfo {
bool legalizeFPTOSI(MachineInstr &MI, MachineRegisterInfo &MRI,
LegalizerHelper &Helper) const;
+
+ bool legalizeGETROUNDING(MachineInstr &MI, MachineRegisterInfo &MRI,
+ LegalizerHelper &Helper) const;
};
} // namespace llvm
#endif
diff --git a/llvm/lib/Target/X86/X86InstrGISel.td b/llvm/lib/Target/X86/X86InstrGISel.td
index f4fa33807cd9a..39198214037a3 100644
--- a/llvm/lib/Target/X86/X86InstrGISel.td
+++ b/llvm/lib/Target/X86/X86InstrGISel.td
@@ -27,5 +27,13 @@ def G_FIST : X86GenericInstruction {
let mayStore = true;
}
+def G_FNSTCW16 : X86GenericInstruction {
+ let OutOperandList = (outs);
+ let InOperandList = (ins ptype0:$dst);
+ let hasSideEffects = true;
+ let mayStore = true;
+}
+
def : GINodeEquiv<G_FILD, X86fild>;
def : GINodeEquiv<G_FIST, X86fp_to_mem>;
+def : GINodeEquiv<G_FNSTCW16, X86fp_cwd_get16>;
diff --git a/llvm/test/CodeGen/X86/flt-rounds.ll b/llvm/test/CodeGen/X86/flt-rounds.ll
index a5908978a5438..1d7a8d8456c27 100644
--- a/llvm/test/CodeGen/X86/flt-rounds.ll
+++ b/llvm/test/CodeGen/X86/flt-rounds.ll
@@ -1,7 +1,9 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=i686-unknown-linux-gnu -mattr=-sse -verify-machineinstrs < %s | FileCheck %s --check-prefix=X86
-; RUN: llc -mtriple=i686-unknown-linux-gnu -mattr=-sse2 -verify-machineinstrs < %s | FileCheck %s --check-prefix=X86
-; RUN: llc -mtriple=x86_64-unknown-linux-gnu -verify-machineinstrs < %s | FileCheck %s --check-prefix=X64
+; RUN: llc -mtriple=i686-unknown-linux-gnu -mattr=-sse -verify-machineinstrs < %s | FileCheck %s --check-prefixes=X86,SDAG-X86
+; RUN: llc -mtriple=i686-unknown-linux-gnu -mattr=-sse2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=X86,SDAG-X86
+; RUN: llc -mtriple=x86_64-unknown-linux-gnu -verify-machineinstrs < %s | FileCheck %s --check-prefixes=X64,SDAG-X64
+; RUN: llc -mtriple=i686-unknown-linux-gnu -global-isel=1 -global-isel-abort=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=X86,GISEL-X86
+; RUN: llc -mtriple=x86_64-unknown-linux-gnu -global-isel=1 -global-isel-abort=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=X64,GISEL-X64
declare i32 @llvm.get.rounding()
@@ -37,139 +39,309 @@ define i32 @test_flt_rounds() nounwind {
; Make sure we preserve order with fesetround.
define i32 @multiple_flt_rounds() nounwind {
-; X86-LABEL: multiple_flt_rounds:
-; X86: # %bb.0: # %entry
-; X86-NEXT: pushl %ebx
-; X86-NEXT: pushl %esi
-; X86-NEXT: subl $20, %esp
-; X86-NEXT: movl $1024, (%esp) # imm = 0x400
-; X86-NEXT: calll fesetround
-; X86-NEXT: fnstcw {{[0-9]+}}(%esp)
-; X86-NEXT: movzwl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: shrl $9, %ecx
-; X86-NEXT: andb $6, %cl
-; X86-NEXT: movl $45, %esi
-; X86-NEXT: movl $45, %eax
-; X86-NEXT: # kill: def $cl killed $cl killed $ecx
-; X86-NEXT: shrl %cl, %eax
-; X86-NEXT: andl $3, %eax
-; X86-NEXT: xorl %ebx, %ebx
-; X86-NEXT: cmpl $3, %eax
-; X86-NEXT: setne %bl
-; X86-NEXT: movl $0, (%esp)
-; X86-NEXT: calll fesetround
-; X86-NEXT: fnstcw {{[0-9]+}}(%esp)
-; X86-NEXT: movzwl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: shrl $9, %ecx
-; X86-NEXT: andb $6, %cl
-; X86-NEXT: movl $45, %eax
-; X86-NEXT: # kill: def $cl killed $cl killed $ecx
-; X86-NEXT: shrl %cl, %eax
-; X86-NEXT: andl $3, %eax
-; X86-NEXT: cmpl $1, %eax
-; X86-NEXT: je .LBB1_2
-; X86-NEXT: # %bb.1: # %entry
-; X86-NEXT: incl %ebx
-; X86-NEXT: .LBB1_2: # %entry
-; X86-NEXT: movl $3072, (%esp) # imm = 0xC00
-; X86-NEXT: calll fesetround
-; X86-NEXT: fnstcw {{[0-9]+}}(%esp)
-; X86-NEXT: movzwl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: shrl $9, %ecx
-; X86-NEXT: andb $6, %cl
-; X86-NEXT: movl $45, %eax
-; X86-NEXT: # kill: def $cl killed $cl killed $ecx
-; X86-NEXT: shrl %cl, %eax
-; X86-NEXT: andl $3, %eax
-; X86-NEXT: cmpl $1, %eax
-; X86-NEXT: sbbl $-1, %ebx
-; X86-NEXT: movl $2048, (%esp) # imm = 0x800
-; X86-NEXT: calll fesetround
-; X86-NEXT: fnstcw {{[0-9]+}}(%esp)
-; X86-NEXT: movzwl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: shrl $9, %ecx
-; X86-NEXT: andb $6, %cl
-; X86-NEXT: # kill: def $cl killed $cl killed $ecx
-; X86-NEXT: shrl %cl, %esi
-; X86-NEXT: andl $3, %esi
-; X86-NEXT: xorl %ecx, %ecx
-; X86-NEXT: cmpl $2, %esi
-; X86-NEXT: setne %cl
-; X86-NEXT: negl %ecx
-; X86-NEXT: xorl %eax, %eax
-; X86-NEXT: cmpl %ecx, %ebx
-; X86-NEXT: setne %al
-; X86-NEXT: addl $20, %esp
-; X86-NEXT: popl %esi
-; X86-NEXT: popl %ebx
-; X86-NEXT: retl
+; SDAG-X86-LABEL: multiple_flt_rounds:
+; SDAG-X86: # %bb.0: # %entry
+; SDAG-X86-NEXT: pushl %ebx
+; SDAG-X86-NEXT: pushl %esi
+; SDAG-X86-NEXT: subl $20, %esp
+; SDAG-X86-NEXT: movl $1024, (%esp) # imm = 0x400
+; SDAG-X86-NEXT: calll fesetround
+; SDAG-X86-NEXT: fnstcw {{[0-9]+}}(%esp)
+; SDAG-X86-NEXT: movzwl {{[0-9]+}}(%esp), %ecx
+; SDAG-X86-NEXT: shrl $9, %ecx
+; SDAG-X86-NEXT: andb $6, %cl
+; SDAG-X86-NEXT: movl $45, %esi
+; SDAG-X86-NEXT: movl $45, %eax
+; SDAG-X86-NEXT: # kill: def $cl killed $cl killed $ecx
+; SDAG-X86-NEXT: shrl %cl, %eax
+; SDAG-X86-NEXT: andl $3, %eax
+; SDAG-X86-NEXT: xorl %ebx, %ebx
+; SDAG-X86-NEXT: cmpl $3, %eax
+; SDAG-X86-NEXT: setne %bl
+; SDAG-X86-NEXT: movl $0, (%esp)
+; SDAG-X86-NEXT: calll fesetround
+; SDAG-X86-NEXT: fnstcw {{[0-9]+}}(%esp)
+; SDAG-X86-NEXT: movzwl {{[0-9]+}}(%esp), %ecx
+; SDAG-X86-NEXT: shrl $9, %ecx
+; SDAG-X86-NEXT: andb $6, %cl
+; SDAG-X86-NEXT: movl $45, %eax
+; SDAG-X86-NEXT: # kill: def $cl killed $cl killed $ecx
+; SDAG-X86-NEXT: shrl %cl, %eax
+; SDAG-X86-NEXT: andl $3, %eax
+; SDAG-X86-NEXT: cmpl $1, %eax
+; SDAG-X86-NEXT: je .LBB1_2
+; SDAG-X86-NEXT: # %bb.1: # %entry
+; SDAG-X86-NEXT: incl %ebx
+; SDAG-X86-NEXT: .LBB1_2: # %entry
+; SDAG-X86-NEXT: movl $3072, (%esp) # imm = 0xC00
+; SDAG-X86-NEXT: calll fesetround
+; SDAG-X86-NEXT: fnstcw {{[0-9]+}}(%esp)
+; SDAG-X86-NEXT: movzwl {{[0-9]+}}(%esp), %ecx
+; SDAG-X86-NEXT: shrl $9, %ecx
+; SDAG-X86-NEXT: andb $6, %cl
+; SDAG-X86-NEXT: movl $45, %eax
+; SDAG-X86-NEXT: # kill: def $cl killed $cl killed $ecx
+; SDAG-X86-NEXT: shrl %cl, %eax
+; SDAG-X86-NEXT: andl $3, %eax
+; SDAG-X86-NEXT: cmpl $1, %eax
+; SDAG-X86-NEXT: sbbl $-1, %ebx
+; SDAG-X86-NEXT: movl $2048, (%esp) # imm = 0x800
+; SDAG-X86-NEXT: calll fesetround
+; SDAG-X86-NEXT: fnstcw {{[0-9]+}}(%esp)
+; SDAG-X86-NEXT: movzwl {{[0-9]+}}(%esp), %ecx
+; SDAG-X86-NEXT: shrl $9, %ecx
+; SDAG-X86-NEXT: andb $6, %cl
+; SDAG-X86-NEXT: # kill: def $cl killed $cl killed $ecx
+; SDAG-X86-NEXT: shrl %cl, %esi
+; SDAG-X86-NEXT: andl $3, %esi
+; SDAG-X86-NEXT: xorl %ecx, %ecx
+; SDAG-X86-NEXT: cmpl $2, %esi
+; SDAG-X86-NEXT: setne %cl
+; SDAG-X86-NEXT: negl %ecx
+; SDAG-X86-NEXT: xorl %eax, %eax
+; SDAG-X86-NEXT: cmpl %ecx, %ebx
+; SDAG-X86-NEXT: setne %al
+; SDAG-X86-NEXT: addl $20, %esp
+; SDAG-X86-NEXT: popl %esi
+; SDAG-X86-NEXT: popl %ebx
+; SDAG-X86-NEXT: retl
;
-; X64-LABEL: multiple_flt_rounds:
-; X64: # %bb.0: # %entry
-; X64-NEXT: pushq %rbp
-; X64-NEXT: pushq %r14
-; X64-NEXT: pushq %rbx
-; X64-NEXT: subq $16, %rsp
-; X64-NEXT: movl $1024, %edi # imm = 0x400
-; X64-NEXT: callq fesetround
-; X64-NEXT: fnstcw {{[0-9]+}}(%rsp)
-; X64-NEXT: movzwl {{[0-9]+}}(%rsp), %ecx
-; X64-NEXT: shrl $9, %ecx
-; X64-NEXT: andb $6, %cl
-; X64-NEXT: movl $45, %ebx
-; X64-NEXT: movl $45, %eax
-; X64-NEXT: # kill: def $cl killed $cl killed $ecx
-; X64-NEXT: shrl %cl, %eax
-; X64-NEXT: andl $3, %eax
-; X64-NEXT: xorl %r14d, %r14d
-; X64-NEXT: cmpl $3, %eax
-; X64-NEXT: setne %r14b
-; X64-NEXT: xorl %edi, %edi
-; X64-NEXT: callq fesetround
-; X64-NEXT: fnstcw {{[0-9]+}}(%rsp)
-; X64-NEXT: movzwl {{[0-9]+}}(%rsp), %ecx
-; X64-NEXT: shrl $9, %ecx
-; X64-NEXT: andb $6, %cl
-; X64-NEXT: movl $45, %eax
-; X64-NEXT: # kill: def $cl killed $cl killed $ecx
-; X64-NEXT: shrl %cl, %eax
-; X64-NEXT: andl $3, %eax
-; X64-NEXT: leal 1(%r14), %ebp
-; X64-NEXT: cmpl $1, %eax
-; X64-NEXT: cmovel %r14d, %ebp
-; X64-NEXT: movl $3072, %edi # imm = 0xC00
-; X64-NEXT: callq fesetround
-; X64-NEXT: fnstcw {{[0-9]+}}(%rsp)
-; X64-NEXT: movzwl {{[0-9]+}}(%rsp), %ecx
-; X64-NEXT: shrl $9, %ecx
-; X64-NEXT: andb $6, %cl
-; X64-NEXT: movl $45, %eax
-; X64-NEXT: # kill: def $cl killed $cl killed $ecx
-; X64-NEXT: shrl %cl, %eax
-; X64-NEXT: andl $3, %eax
-; X64-NEXT: cmpl $1, %eax
-; X64-NEXT: sbbl $-1, %ebp
-; X64-NEXT: movl $2048, %edi # imm = 0x800
-; X64-NEXT: callq fesetround
-; X64-NEXT: fnstcw {{[0-9]+}}(%rsp)
-; X64-NEXT: movzwl {{[0-9]+}}(%rsp), %ecx
-; X64-NEXT: shrl $9, %ecx
-; X64-NEXT: andb $6, %cl
-; X64-NEXT: # kill: def $cl killed $cl killed $ecx
-; X64-NEXT: shrl %cl, %ebx
-; X64-NEXT: andl $3, %ebx
-; X64-NEXT: xorl %ecx, %ecx
-; X64-NEXT: cmpl $2, %ebx
-; X64-NEXT: setne %cl
-; X64-NEXT: negl %ecx
-; X64-NEXT: xorl %eax, %eax
-; X64-NEXT: cmpl %ecx, %ebp
-; X64-NEXT: setne %al
-; X64-NEXT: addq $16, %rsp
-; X64-NEXT: popq %rbx
-; X64-NEXT: popq %r14
-; X64-NEXT: popq %rbp
-; X64-NEXT: retq
+; SDAG-X64-LABEL: multiple_flt_rounds:
+; SDAG-X64: # %bb.0: # %entry
+; SDAG-X64-NEXT: pushq %rbp
+; SDAG-X64-NEXT: pushq %r14
+; SDAG-X64-NEXT: pushq %rbx
+; SDAG-X64-NEXT: subq $16, %rsp
+; SDAG-X64-NEXT: movl $1024, %edi # imm = 0x400
+; SDAG-X64-NEXT: callq fesetround
+; SDAG-X64-NEXT: fnstcw {{[0-9]+}}(%rsp)
+; SDAG-X64-NEXT: movzwl {{[0-9]+}}(%rsp), %ecx
+; SDAG-X64-NEXT: shrl $9, %ecx
+; SDAG-X64-NEXT: andb $6, %cl
+; SDAG-X64-NEXT: movl $45, %ebx
+; SDAG-X64-NEXT: movl $45, %eax
+; SDAG-X64-NEXT: # kill: def $cl killed $cl killed $ecx
+; SDAG-X64-NEXT: shrl %cl, %eax
+; SDAG-X64-NEXT: andl $3, %eax
+; SDAG-X64-NEXT: xorl %r14d, %r14d
+; SDAG-X64-NEXT: cmpl $3, %eax
+; SDAG-X64-NEXT: setne %r14b
+; SDAG-X64-NEXT: xorl %edi, %edi
+; SDAG-X64-NEXT: callq fesetround
+; SDAG-X64-NEXT: fnstcw {{[0-9]+}}(%rsp)
+; SDAG-X64-NEXT: movzwl {{[0-9]+}}(%rsp), %ecx
+; SDAG-X64-NEXT: shrl $9, %ecx
+; SDAG-X64-NEXT: andb $6, %cl
+; SDAG-X64-NEXT: movl $45, %eax
+; SDAG-X64-NEXT: # kill: def $cl killed $cl killed $ecx
+; SDAG-X64-NEXT: shrl %cl, %eax
+; SDAG-X64-NEXT: andl $3, %eax
+; SDAG-X64-NEXT: leal 1(%r14), %ebp
+; SDAG-X64-NEXT: cmpl $1, %eax
+; SDAG-X64-NEXT: cmovel %r14d, %ebp
+; SDAG-X64-NEXT: movl $3072, %edi # imm = 0xC00
+; SDAG-X64-NEXT: callq fesetround
+; SDAG-X64-NEXT: fnstcw {{[0-9]+}}(%rsp)
+; SDAG-X64-NEXT: movzwl {{[0-9]+}}(%rsp), %ecx
+; SDAG-X64-NEXT: shrl $9, %ecx
+; SDAG-X64-NEXT: andb $6, %cl
+; SDAG-X64-NEXT: movl $45, %eax
+; SDAG-X64-NEXT: # kill: def $cl killed $cl killed $ecx
+; SDAG-X64-NEXT: shrl %cl, %eax
+; SDAG-X64-NEXT: andl $3, %eax
+; SDAG-X64-NEXT: cmpl $1, %eax
+; SDAG-X64-NEXT: sbbl $-1, %ebp
+; SDAG-X64-NEXT: movl $2048, %edi # imm = 0x800
+; SDAG-X64-NEXT: callq fesetround
+; SDAG-X64-NEXT: fnstcw {{[0-9]+}}(%rsp)
+; SDAG-X64-NEXT: movzwl {{[0-9]+}}(%rsp), %ecx
+; SDAG-X64-NEXT: shrl $9, %ecx
+; SDAG-X64-NEXT: andb $6, %cl
+; SDAG-X64-NEXT: # kill: def $cl killed $cl killed $ecx
+; SDAG-X64-NEXT: shrl %cl, %ebx
+; SDAG-X64-NEXT: andl $3, %ebx
+; SDAG-X64-NEXT: xorl %ecx, %ecx
+; SDAG-X64-NEXT: cmpl $2, %ebx
+; SDAG-X64-NEXT: setne %cl
+; SDAG-X64-NEXT: negl %ecx
+; SDAG-X64-NEXT: xorl %eax, %eax
+; SDAG-X64-NEXT: cmpl %ecx, %ebp
+; SDAG-X64-NEXT: setne %al
+; SDAG-X64-NEXT: addq $16, %rsp
+; SDAG-X64-NEXT: popq %rbx
+; SDAG-X64-NEXT: popq %r14
+; SDAG-X64-NEXT: popq %rbp
+; SDAG-X64-NEXT: retq
+;
+; GISEL-X86-LABEL: multiple_flt_rounds:
+; GISEL-X86: # %bb.0: # %entry
+; GISEL-X86-NEXT: pushl %ebp
+; GISEL-X86-NEXT: pushl %ebx
+; GISEL-X86-NEXT: pushl %edi
+; GISEL-X86-NEXT: pushl %esi
+; GISEL-X86-NEXT: subl $12, %esp
+; GISEL-X86-NEXT: movl $1, %ebp
+; GISEL-X86-NEXT: movl $1024, (%esp) # imm = 0x400
+; GISEL-X86-NEXT: calll fesetround
+; GISEL-X86-NEXT: fnstcw {{[0-9]+}}(%esp)
+; GISEL-X86-NEXT: movzwl {{[0-9]+}}(%esp), %ecx
+; GISEL-X86-NEXT: shrl $9, %ecx
+; GISEL-X86-NEXT: andb $6, %cl
+; GISEL-X86-NEXT: movl $45, %edi
+; GISEL-X86-NEXT: movl $45, %eax
+; GISEL-X86-NEXT: # kill: def $cl killed $cl killed $ecx
+; GISEL-X86-NEXT: shrl %cl, %eax
+; GISEL-X86-NEXT: andl $3, %eax
+; GISEL-X86-NEXT: xorl %ebx, %ebx
+; GISEL-X86-NEXT: cmpl $3, %eax
+; GISEL-X86-NEXT: setne %bl
+; GISEL-X86-NEXT: andl $1, %ebx
+; GISEL-X86-NEXT: movl $0, (%esp)
+; GISEL-X86-NEXT: calll fesetround
+; GISEL-X86-NEXT: fnstcw {{[0-9]+}}(%esp)
+; GISEL-X86-NEXT: movzwl {{[0-9]+}}(%esp), %ecx
+; GISEL-X86-NEXT: shrl $9, %ecx
+; GISEL-X86-NEXT: andb $6, %cl
+; GISEL-X86-NEXT: movl $45, %edx
+; GISEL-X86-NEXT: # kill: def $cl killed $cl killed $ecx
+; GISEL-X86-NEXT: shrl %cl, %edx
+; GISEL-X86-NEXT: andl $3, %edx
+; GISEL-X86-NEXT: xorl %eax, %eax
+; GISEL-X86-NEXT: cmpl $1, %edx
+; GISEL-X86-NEXT: sete %cl
+; GISEL-X86-NEXT: testl %ebx, %ebx
+; GISEL-X86-NEXT: je .LBB1_2
+; GISEL-X86-NEXT: # %bb.1: # %entry
+; GISEL-X86-NEXT: movl $2, %ebp
+; GISEL-X86-NEXT: .LBB1_2: # %entry
+; GISEL-X86-NEXT: xorl %esi, %esi
+; GISEL-X86-NEXT: movb %cl, %al
+; GISEL-X86-NEXT: andl $1, %eax
+; GISEL-X86-NEXT: je .LBB1_4
+; GISEL-X86-NEXT: # %bb.3: # %entry
+; GISEL-X86-NEXT: movl %ebx, %ebp
+; GISEL-X86-NEXT: .LBB1_4: # %entry
+; GISEL-X86-NEXT: movl $3072, (%esp) # imm = 0xC00
+; GISEL-X86-NEXT: calll fesetround
+; GISEL-X86-NEXT: fnstcw {{[0-9]+}}(%esp)
+; GISEL-X86-NEXT: movzwl {{[0-9]+}}(%esp), %ecx
+; GISEL-X86-NEXT: shrl $9, %ecx
+; GISEL-X86-NEXT: andb $6, %cl
+; GISEL-X86-NEXT: movl $45, %eax
+; GISEL-X86-NEXT: # kill: def $cl killed $cl killed $ecx
+; GISEL-X86-NEXT: shrl %cl, %eax
+; GISEL-X86-NEXT: andl $3, %eax
+; GISEL-X86-NEXT: xorl %ebx, %ebx
+; GISEL-X86-NEXT: cmpl %esi, %eax
+; GISEL-X86-NEXT: setne %bl
+; GISEL-X86-NEXT: andl $1, %ebx
+; GISEL-X86-NEXT: addl %ebp, %ebx
+; GISEL-X86-NEXT: movl $2048, (%esp) # imm = 0x800
+; GISEL-X86-NEXT: calll fesetround
+; GISEL-X86-NEXT: fnstcw {{[0-9]+}}(%esp)
+; GISEL-X86-NEXT: movzwl {{[0-9]+}}(%esp), %ecx
+; GISEL-X86-NEXT: shrl $9, %ecx
+; GISEL-X86-NEXT: andb $6, %cl
+; GISEL-X86-NEXT: # kill: def $cl killed $cl killed $ecx
+; GISEL-X86-NEXT: shrl %cl, %edi
+; GISEL-X86-NEXT: andl $3, %edi
+; GISEL-X86-NEXT: xorl %ecx, %ecx
+; GISEL-X86-NEXT: movl $2, %eax
+; GISEL-X86-NEXT: cmpl %eax, %edi
+; GISEL-X86-NEXT: setne %cl
+; GISEL-X86-NEXT: shll $31, %ecx
+; GISEL-X86-NEXT: sarl $31, %ecx
+; GISEL-X86-NEXT: xorl %eax, %eax
+; GISEL-X86-NEXT: cmpl %ecx, %ebx
+; GISEL-X86-NEXT: setne %al
+; GISEL-X86-NEXT: andl $1, %eax
+; GISEL-X86-NEXT: addl $12, %esp
+; GISEL-X86-NEXT: popl %esi
+; GISEL-X86-NEXT: popl %edi
+; GISEL-X86-NEXT: popl %ebx
+; GISEL-X86-NEXT: popl %ebp
+; GISEL-X86-NEXT: retl
+;
+; GISEL-X64-LABEL: multiple_flt_rounds:
+; GISEL-X64: # %bb.0: # %entry
+; GISEL-X64-NEXT: pushq %rbp
+; GISEL-X64-NEXT: pushq %r15
+; GISEL-X64-NEXT: pushq %r14
+; GISEL-X64-NEXT: pushq %rbx
+; GISEL-X64-NEXT: pushq %rax
+; GISEL-X64-NEXT: movl $1, %r14d
+; GISEL-X64-NEXT: movl $2, %ebp
+; GISEL-X64-NEXT: movl $1024, %edi # imm = 0x400
+; GISEL-X64-NEXT: callq fesetround
+; GISEL-X64-NEXT: fnstcw (%rsp)
+; GISEL-X64-NEXT: movzwl (%rsp), %ecx
+; GISEL-X64-NEXT: shrl $9, %ecx
+; GISEL-X64-NEXT: andb $6, %cl
+; GISEL-X64-NEXT: movl $45, %ebx
+; GISEL-X64-NEXT: movl $45, %eax
+; GISEL-X64-NEXT: # kill: def $cl killed $cl killed $ecx
+; GISEL-X64-NEXT: shrl %cl, %eax
+; GISEL-X64-NEXT: andl $3, %eax
+; GISEL-X64-NEXT: xorl %r15d, %r15d
+; GISEL-X64-NEXT: cmpl $3, %eax
+; GISEL-X64-NEXT: setne %r15b
+; GISEL-X64-NEXT: andl $1, %r15d
+; GISEL-X64-NEXT: xorl %edi, %edi
+; GISEL-X64-NEXT: callq fesetround
+; GISEL-X64-NEXT: fnstcw {{[0-9]+}}(%rsp)
+; GISEL-X64-NEXT: movzwl {{[0-9]+}}(%rsp), %ecx
+; GISEL-X64-NEXT: shrl $9, %ecx
+; GISEL-X64-NEXT: andb $6, %cl
+; GISEL-X64-NEXT: movl $45, %eax
+; GISEL-X64-NEXT: # kill: def $cl killed $cl killed $ecx
+; GISEL-X64-NEXT: shrl %cl, %eax
+; GISEL-X64-NEXT: andl $3, %eax
+; GISEL-X64-NEXT: xorl %ecx, %ecx
+; GISEL-X64-NEXT: cmpl $1, %eax
+; GISEL-X64-NEXT: sete %cl
+; GISEL-X64-NEXT: testl %r15d, %r15d
+; GISEL-X64-NEXT: cmovel %r14d, %ebp
+; GISEL-X64-NEXT: andl $1, %ecx
+; GISEL-X64-NEXT: cmovnel %r15d, %ebp
+; GISEL-X64-NEXT: movl $3072, %edi # imm = 0xC00
+; GISEL-X64-NEXT: callq fesetround
+; GISEL-X64-NEXT: fnstcw {{[0-9]+}}(%rsp)
+; GISEL-X64-NEXT: movzwl {{[0-9]+}}(%rsp), %ecx
+; GISEL-X64-NEXT: shrl $9, %ecx
+; GISEL-X64-NEXT: andb $6, %cl
+; GISEL-X64-NEXT: movl $45, %eax
+; GISEL-X64-NEXT: # kill: def $cl killed $cl killed $ecx
+; GISEL-X64-NEXT: shrl %cl, %eax
+; GISEL-X64-NEXT: andl $3, %eax
+; GISEL-X64-NEXT: xorl %r14d, %r14d
+; GISEL-X64-NEXT: cmpl $0, %eax
+; GISEL-X64-NEXT: setne %r14b
+; GISEL-X64-NEXT: andl $1, %r14d
+; GISEL-X64-NEXT: addl %ebp, %r14d
+; GISEL-X64-NEXT: movl $2048, %edi # imm = 0x800
+; GISEL-X64-NEXT: callq fesetround
+; GISEL-X64-NEXT: fnstcw {{[0-9]+}}(%rsp)
+; GISEL-X64-NEXT: movzwl {{[0-9]+}}(%rsp), %ecx
+; GISEL-X64-NEXT: shrl $9, %ecx
+; GISEL-X64-NEXT: andb $6, %cl
+; GISEL-X64-NEXT: # kill: def $cl killed $cl killed $ecx
+; GISEL-X64-NEXT: shrl %cl, %ebx
+; GISEL-X64-NEXT: andl $3, %ebx
+; GISEL-X64-NEXT: xorl %ecx, %ecx
+; GISEL-X64-NEXT: cmpl $2, %ebx
+; GISEL-X64-NEXT: setne %cl
+; GISEL-X64-NEXT: shll $31, %ecx
+; GISEL-X64-NEXT: sarl $31, %ecx
+; GISEL-X64-NEXT: xorl %eax, %eax
+; GISEL-X64-NEXT: cmpl %ecx, %r14d
+; GISEL-X64-NEXT: setne %al
+; GISEL-X64-NEXT: andl $1, %eax
+; GISEL-X64-NEXT: addq $8, %rsp
+; GISEL-X64-NEXT: popq %rbx
+; GISEL-X64-NEXT: popq %r14
+; GISEL-X64-NEXT: popq %r15
+; GISEL-X64-NEXT: popq %rbp
+; GISEL-X64-NEXT: retq
entry:
%call = tail call i32 @fesetround(i32 1024)
%0 = tail call i32 @llvm.get.rounding()
diff --git a/llvm/test/MC/ELF/mc-dump.s b/llvm/test/MC/ELF/mc-dump.s
index 3788eb093eef2..fb29fcd880866 100644
--- a/llvm/test/MC/ELF/mc-dump.s
+++ b/llvm/test/MC/ELF/mc-dump.s
@@ -12,7 +12,7 @@
# CHECK-NEXT:0 Data Size:0 []
# CHECK-NEXT: Symbol @0 _start
# CHECK-NEXT:0 Org Offset:3 Value:0
-# CHECK-NEXT:3 Relaxable Size:2 <MCInst #1996 <MCOperand Expr:.Ltmp0>>
+# CHECK-NEXT:3 Relaxable Size:2 <MCInst #1998 <MCOperand Expr:.Ltmp0>>
# CHECK-NEXT: Fixup @1 Value:.Ltmp0 Kind:4001
# CHECK-NEXT:5 Data Size:16 [48,8b,04,25,00,00,00,00,48,8b,04,25,00,00,00,00]
# CHECK-NEXT: Fixup @4 Value:f0@<variant 11> Kind:4017
diff --git a/llvm/test/TableGen/GlobalISelEmitter/GlobalISelEmitter.td b/llvm/test/TableGen/GlobalISelEmitter/GlobalISelEmitter.td
index 6e5d7de0732f0..0d6d4a3a29274 100644
--- a/llvm/test/TableGen/GlobalISelEmitter/GlobalISelEmitter.td
+++ b/llvm/test/TableGen/GlobalISelEmitter/GlobalISelEmitter.td
@@ -535,7 +535,7 @@ def : Pat<(frag GPR32:$src1, complex:$src2, complex:$src3),
// R00O-NEXT: GIM_Reject,
// R00O: // Label [[DEFAULT_NUM]]: @[[DEFAULT]]
// R00O-NEXT: GIM_Reject,
-// R00O-NEXT: }; // Size: 1878 bytes
+// R00O-NEXT: }; // Size: 1882 bytes
def INSNBOB : I<(outs GPR32:$dst), (ins GPR32:$src1, GPR32:$src2, GPR32:$src3, GPR32:$src4),
[(set GPR32:$dst,
>From 8f25ae1b79ea58456f8aae403697a7f5005a625f Mon Sep 17 00:00:00 2001
From: JaydeepChauhan14 <chauhan.jaydeep.ashwinbhai at intel.com>
Date: Wed, 9 Jul 2025 18:36:35 +0530
Subject: [PATCH 2/4] Update llvm/lib/Target/X86/GISel/X86LegalizerInfo.cpp
Co-authored-by: Evgenii Kudriashov <evgenii.kudriashov at intel.com>
---
llvm/lib/Target/X86/GISel/X86LegalizerInfo.cpp | 5 ++---
1 file changed, 2 insertions(+), 3 deletions(-)
diff --git a/llvm/lib/Target/X86/GISel/X86LegalizerInfo.cpp b/llvm/lib/Target/X86/GISel/X86LegalizerInfo.cpp
index 7fe58539cd4ec..826883e0a16aa 100644
--- a/llvm/lib/Target/X86/GISel/X86LegalizerInfo.cpp
+++ b/llvm/lib/Target/X86/GISel/X86LegalizerInfo.cpp
@@ -819,9 +819,8 @@ bool X86LegalizerInfo::legalizeGETROUNDING(MachineInstr &MI,
int MemSize = 2;
Align Alignment = Align(2);
MachinePointerInfo PtrInfo;
- auto StackTemp = Helper.createStackTemporary(TypeSize::getFixed(MemSize),
- Alignment, PtrInfo);
- Register StackPtr = StackTemp.getReg(0);
+ Register StackPtr = Helper.createStackTemporary(TypeSize::getFixed(MemSize),
+ Alignment, PtrInfo).getReg(0);
auto StoreMMO = MF.getMachineMemOperand(PtrInfo, MachineMemOperand::MOStore,
MemSize, Alignment);
>From feafaf2d7398c45d85950b9b25e704418dbd3ae1 Mon Sep 17 00:00:00 2001
From: Chauhan Jaydeep Ashwinbhai <chauhan.jaydeep.ashwinbhai at intel.com>
Date: Wed, 9 Jul 2025 06:15:56 -0700
Subject: [PATCH 3/4] Fixed formatting issue
---
llvm/lib/Target/X86/GISel/X86LegalizerInfo.cpp | 6 ++++--
1 file changed, 4 insertions(+), 2 deletions(-)
diff --git a/llvm/lib/Target/X86/GISel/X86LegalizerInfo.cpp b/llvm/lib/Target/X86/GISel/X86LegalizerInfo.cpp
index 826883e0a16aa..9cd74f4110b01 100644
--- a/llvm/lib/Target/X86/GISel/X86LegalizerInfo.cpp
+++ b/llvm/lib/Target/X86/GISel/X86LegalizerInfo.cpp
@@ -819,8 +819,10 @@ bool X86LegalizerInfo::legalizeGETROUNDING(MachineInstr &MI,
int MemSize = 2;
Align Alignment = Align(2);
MachinePointerInfo PtrInfo;
- Register StackPtr = Helper.createStackTemporary(TypeSize::getFixed(MemSize),
- Alignment, PtrInfo).getReg(0);
+ Register StackPtr =
+ Helper
+ .createStackTemporary(TypeSize::getFixed(MemSize), Alignment, PtrInfo)
+ .getReg(0);
auto StoreMMO = MF.getMachineMemOperand(PtrInfo, MachineMemOperand::MOStore,
MemSize, Alignment);
>From e5a970d5b9ba06c2caf1c587091787f82719bbab Mon Sep 17 00:00:00 2001
From: Chauhan Jaydeep Ashwinbhai <chauhan.jaydeep.ashwinbhai at intel.com>
Date: Wed, 9 Jul 2025 08:31:37 -0700
Subject: [PATCH 4/4] Added previous code
---
llvm/lib/Target/X86/GISel/X86LegalizerInfo.cpp | 7 +++----
1 file changed, 3 insertions(+), 4 deletions(-)
diff --git a/llvm/lib/Target/X86/GISel/X86LegalizerInfo.cpp b/llvm/lib/Target/X86/GISel/X86LegalizerInfo.cpp
index 9cd74f4110b01..7fe58539cd4ec 100644
--- a/llvm/lib/Target/X86/GISel/X86LegalizerInfo.cpp
+++ b/llvm/lib/Target/X86/GISel/X86LegalizerInfo.cpp
@@ -819,10 +819,9 @@ bool X86LegalizerInfo::legalizeGETROUNDING(MachineInstr &MI,
int MemSize = 2;
Align Alignment = Align(2);
MachinePointerInfo PtrInfo;
- Register StackPtr =
- Helper
- .createStackTemporary(TypeSize::getFixed(MemSize), Alignment, PtrInfo)
- .getReg(0);
+ auto StackTemp = Helper.createStackTemporary(TypeSize::getFixed(MemSize),
+ Alignment, PtrInfo);
+ Register StackPtr = StackTemp.getReg(0);
auto StoreMMO = MF.getMachineMemOperand(PtrInfo, MachineMemOperand::MOStore,
MemSize, Alignment);
More information about the llvm-commits
mailing list