[llvm] 0f0079c - [X86][GlobalISel] Added support for llvm.get.rounding (#147716)

via llvm-commits llvm-commits at lists.llvm.org
Fri Jul 11 06:48:21 PDT 2025


Author: JaydeepChauhan14
Date: 2025-07-11T15:48:18+02:00
New Revision: 0f0079c29da4b4d5bbd43dced1db9ad6c6d11008

URL: https://github.com/llvm/llvm-project/commit/0f0079c29da4b4d5bbd43dced1db9ad6c6d11008
DIFF: https://github.com/llvm/llvm-project/commit/0f0079c29da4b4d5bbd43dced1db9ad6c6d11008.diff

LOG: [X86][GlobalISel] Added support for llvm.get.rounding (#147716)

- This implementation is adapted from SDAG
X86TargetLowering::LowerGET_ROUNDING.
- llvm.set.rounding will be added later because it involves MXCSR
updates currently unsupported.

Added: 
    

Modified: 
    llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h
    llvm/include/llvm/Support/TargetOpcodes.def
    llvm/include/llvm/Target/GenericOpcodes.td
    llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
    llvm/lib/Target/X86/GISel/X86LegalizerInfo.cpp
    llvm/lib/Target/X86/GISel/X86LegalizerInfo.h
    llvm/lib/Target/X86/X86InstrGISel.td
    llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir
    llvm/test/CodeGen/RISCV/GlobalISel/legalizer-info-validation.mir
    llvm/test/CodeGen/X86/flt-rounds.ll
    llvm/test/MC/ELF/mc-dump.s
    llvm/test/TableGen/GlobalISelEmitter/GlobalISelEmitter.td

Removed: 
    


################################################################################
diff  --git a/llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h b/llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h
index 7a598bb77b356..756c0b24a6f8b 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h
@@ -2424,6 +2424,11 @@ class LLVM_ABI MachineIRBuilder {
     return buildInstr(TargetOpcode::G_RESET_FPMODE, {}, {});
   }
 
+  /// Build and insert \p Dst = G_GET_ROUNDING
+  MachineInstrBuilder buildGetRounding(const DstOp &Dst) {
+    return buildInstr(TargetOpcode::G_GET_ROUNDING, {Dst}, {});
+  }
+
   virtual MachineInstrBuilder
   buildInstr(unsigned Opc, ArrayRef<DstOp> DstOps, ArrayRef<SrcOp> SrcOps,
              std::optional<unsigned> Flags = std::nullopt);

diff  --git a/llvm/include/llvm/Support/TargetOpcodes.def b/llvm/include/llvm/Support/TargetOpcodes.def
index 6ba0290cc77a6..b905576b61791 100644
--- a/llvm/include/llvm/Support/TargetOpcodes.def
+++ b/llvm/include/llvm/Support/TargetOpcodes.def
@@ -744,6 +744,8 @@ HANDLE_TARGET_OPCODE(G_GET_FPMODE)
 HANDLE_TARGET_OPCODE(G_SET_FPMODE)
 HANDLE_TARGET_OPCODE(G_RESET_FPMODE)
 
+HANDLE_TARGET_OPCODE(G_GET_ROUNDING)
+
 /// Generic pointer offset
 HANDLE_TARGET_OPCODE(G_PTR_ADD)
 

diff  --git a/llvm/include/llvm/Target/GenericOpcodes.td b/llvm/include/llvm/Target/GenericOpcodes.td
index bcf49b448e782..ce4750db88c9a 100644
--- a/llvm/include/llvm/Target/GenericOpcodes.td
+++ b/llvm/include/llvm/Target/GenericOpcodes.td
@@ -1267,6 +1267,12 @@ def G_READSTEADYCOUNTER : GenericInstruction {
   let hasSideEffects = true;
 }
 
+def G_GET_ROUNDING : GenericInstruction {
+  let OutOperandList = (outs type0:$dst);
+  let InOperandList = (ins);
+  let hasSideEffects = true;
+}
+
 //------------------------------------------------------------------------------
 // Memory ops
 //------------------------------------------------------------------------------

diff  --git a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
index ef39fc74554c9..d7280eaba2440 100644
--- a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
@@ -2593,6 +2593,9 @@ bool IRTranslator::translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID,
   case Intrinsic::reset_fpmode:
     MIRBuilder.buildResetFPMode();
     return true;
+  case Intrinsic::get_rounding:
+    MIRBuilder.buildGetRounding(getOrCreateVReg(CI));
+    return true;
   case Intrinsic::vscale: {
     MIRBuilder.buildVScale(getOrCreateVReg(CI), 1);
     return true;

diff  --git a/llvm/lib/Target/X86/GISel/X86LegalizerInfo.cpp b/llvm/lib/Target/X86/GISel/X86LegalizerInfo.cpp
index 8e304c07ed5cb..7fe58539cd4ec 100644
--- a/llvm/lib/Target/X86/GISel/X86LegalizerInfo.cpp
+++ b/llvm/lib/Target/X86/GISel/X86LegalizerInfo.cpp
@@ -17,6 +17,7 @@
 #include "llvm/CodeGen/GlobalISel/LegalizerHelper.h"
 #include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
 #include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/TargetOpcodes.h"
 #include "llvm/CodeGen/ValueTypes.h"
 #include "llvm/IR/DerivedTypes.h"
@@ -108,6 +109,8 @@ X86LegalizerInfo::X86LegalizerInfo(const X86Subtarget &STI,
       .legalFor(HasSSE2 || UseX87, {s64})
       .legalFor(UseX87, {s80});
 
+  getActionDefinitionsBuilder(G_GET_ROUNDING).customFor({s32});
+
   // merge/unmerge
   for (unsigned Op : {G_MERGE_VALUES, G_UNMERGE_VALUES}) {
     unsigned BigTyIdx = Op == G_MERGE_VALUES ? 0 : 1;
@@ -611,6 +614,8 @@ bool X86LegalizerInfo::legalizeCustom(LegalizerHelper &Helper, MachineInstr &MI,
     return legalizeSITOFP(MI, MRI, Helper);
   case TargetOpcode::G_FPTOSI:
     return legalizeFPTOSI(MI, MRI, Helper);
+  case TargetOpcode::G_GET_ROUNDING:
+    return legalizeGETROUNDING(MI, MRI, Helper);
   }
   llvm_unreachable("expected switch to return");
 }
@@ -777,6 +782,82 @@ bool X86LegalizerInfo::legalizeNarrowingStore(MachineInstr &MI,
   return true;
 }
 
+bool X86LegalizerInfo::legalizeGETROUNDING(MachineInstr &MI,
+                                           MachineRegisterInfo &MRI,
+                                           LegalizerHelper &Helper) const {
+  /*
+   The rounding mode is in bits 11:10 of FPSR, and has the following
+   settings:
+     00 Round to nearest
+     01 Round to -inf
+     10 Round to +inf
+     11 Round to 0
+
+  GET_ROUNDING, on the other hand, expects the following:
+    -1 Undefined
+     0 Round to 0
+     1 Round to nearest
+     2 Round to +inf
+     3 Round to -inf
+
+  To perform the conversion, we use a packed lookup table of the four 2-bit
+  values that we can index by FPSP[11:10]
+    0x2d --> (0b00,10,11,01) --> (0,2,3,1) >> FPSR[11:10]
+
+    (0x2d >> ((FPSR >> 9) & 6)) & 3
+  */
+
+  MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
+  MachineFunction &MF = MIRBuilder.getMF();
+  Register Dst = MI.getOperand(0).getReg();
+  LLT DstTy = MRI.getType(Dst);
+  const LLT s8 = LLT::scalar(8);
+  const LLT s16 = LLT::scalar(16);
+  const LLT s32 = LLT::scalar(32);
+
+  // Save FP Control Word to stack slot
+  int MemSize = 2;
+  Align Alignment = Align(2);
+  MachinePointerInfo PtrInfo;
+  auto StackTemp = Helper.createStackTemporary(TypeSize::getFixed(MemSize),
+                                               Alignment, PtrInfo);
+  Register StackPtr = StackTemp.getReg(0);
+
+  auto StoreMMO = MF.getMachineMemOperand(PtrInfo, MachineMemOperand::MOStore,
+                                          MemSize, Alignment);
+
+  // Store FP Control Word to stack slot using G_FNSTCW16
+  MIRBuilder.buildInstr(X86::G_FNSTCW16)
+      .addUse(StackPtr)
+      .addMemOperand(StoreMMO);
+
+  // Load FP Control Word from stack slot
+  auto LoadMMO = MF.getMachineMemOperand(PtrInfo, MachineMemOperand::MOLoad,
+                                         MemSize, Alignment);
+
+  auto CWD32 =
+      MIRBuilder.buildZExt(s32, MIRBuilder.buildLoad(s16, StackPtr, *LoadMMO));
+  auto Shifted8 = MIRBuilder.buildTrunc(
+      s8, MIRBuilder.buildLShr(s32, CWD32, MIRBuilder.buildConstant(s8, 9)));
+  auto Masked32 = MIRBuilder.buildZExt(
+      s32, MIRBuilder.buildAnd(s8, Shifted8, MIRBuilder.buildConstant(s8, 6)));
+
+  // LUT is a packed lookup table (0x2d) used to map the 2-bit x87 FPU rounding
+  // mode (from bits 11:10 of the control word) to the values expected by
+  // GET_ROUNDING. The mapping is performed by shifting LUT right by the
+  // extracted rounding mode and masking the result with 3 to obtain the final
+  auto LUT = MIRBuilder.buildConstant(s32, 0x2d);
+  auto LUTShifted = MIRBuilder.buildLShr(s32, LUT, Masked32);
+  auto RetVal =
+      MIRBuilder.buildAnd(s32, LUTShifted, MIRBuilder.buildConstant(s32, 3));
+  auto RetValTrunc = MIRBuilder.buildZExtOrTrunc(DstTy, RetVal);
+
+  MIRBuilder.buildCopy(Dst, RetValTrunc);
+
+  MI.eraseFromParent();
+  return true;
+}
+
 bool X86LegalizerInfo::legalizeIntrinsic(LegalizerHelper &Helper,
                                          MachineInstr &MI) const {
   return true;

diff  --git a/llvm/lib/Target/X86/GISel/X86LegalizerInfo.h b/llvm/lib/Target/X86/GISel/X86LegalizerInfo.h
index 1ba82674ed4c6..0003552d70ee0 100644
--- a/llvm/lib/Target/X86/GISel/X86LegalizerInfo.h
+++ b/llvm/lib/Target/X86/GISel/X86LegalizerInfo.h
@@ -54,6 +54,9 @@ class X86LegalizerInfo : public LegalizerInfo {
 
   bool legalizeFPTOSI(MachineInstr &MI, MachineRegisterInfo &MRI,
                       LegalizerHelper &Helper) const;
+
+  bool legalizeGETROUNDING(MachineInstr &MI, MachineRegisterInfo &MRI,
+                           LegalizerHelper &Helper) const;
 };
 } // namespace llvm
 #endif

diff  --git a/llvm/lib/Target/X86/X86InstrGISel.td b/llvm/lib/Target/X86/X86InstrGISel.td
index f4fa33807cd9a..39198214037a3 100644
--- a/llvm/lib/Target/X86/X86InstrGISel.td
+++ b/llvm/lib/Target/X86/X86InstrGISel.td
@@ -27,5 +27,13 @@ def G_FIST : X86GenericInstruction {
   let mayStore = true;
 }
 
+def G_FNSTCW16 : X86GenericInstruction {
+  let OutOperandList = (outs);
+  let InOperandList = (ins ptype0:$dst);
+  let hasSideEffects = true;
+  let mayStore = true;
+}
+
 def : GINodeEquiv<G_FILD, X86fild>;
 def : GINodeEquiv<G_FIST, X86fp_to_mem>;
+def : GINodeEquiv<G_FNSTCW16, X86fp_cwd_get16>;

diff  --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir
index d0424f2e400fc..bd2d8c095831b 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir
@@ -636,6 +636,9 @@
 # DEBUG-NEXT: .. opcode {{[0-9]+}} is aliased to {{[0-9]+}}
 # DEBUG-NEXT: .. type index coverage check SKIPPED: user-defined predicate detected
 # DEBUG-NEXT: .. imm index coverage check SKIPPED: user-defined predicate detected
+# DEBUG-NEXT: G_GET_ROUNDING (opcode {{[0-9]+}}): 1 type index, 0 imm indices
+# DEBUG-NEXT:.. type index coverage check SKIPPED: no rules defined
+# DEBUG-NEXT:.. imm index coverage check SKIPPED: no rules defined
 # DEBUG-NEXT: G_PTR_ADD (opcode {{[0-9]+}}): 2 type indices, 0 imm indices
 # DEBUG-NEXT: .. the first uncovered type index: 2, OK
 # DEBUG-NEXT: .. the first uncovered imm index: 0, OK

diff  --git a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer-info-validation.mir b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer-info-validation.mir
index 30e455f57737b..82cc6829838a0 100644
--- a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer-info-validation.mir
+++ b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer-info-validation.mir
@@ -624,6 +624,9 @@
 # DEBUG-NEXT: G_RESET_FPMODE (opcode {{[0-9]+}}): 0 type indices, 0 imm indices
 # DEBUG-NEXT: .. type index coverage check SKIPPED: no rules defined
 # DEBUG-NEXT: .. imm index coverage check SKIPPED: no rules defined
+# DEBUG-NEXT: G_GET_ROUNDING (opcode {{[0-9]+}}): 1 type index, 0 imm indices
+# DEBUG-NEXT:.. type index coverage check SKIPPED: no rules defined
+# DEBUG-NEXT:.. imm index coverage check SKIPPED: no rules defined
 # DEBUG-NEXT: G_PTR_ADD (opcode {{[0-9]+}}): 2 type indices, 0 imm indices
 # DEBUG-NEXT: .. the first uncovered type index: 2, OK
 # DEBUG-NEXT: .. the first uncovered imm index: 0, OK

diff  --git a/llvm/test/CodeGen/X86/flt-rounds.ll b/llvm/test/CodeGen/X86/flt-rounds.ll
index a5908978a5438..1d7a8d8456c27 100644
--- a/llvm/test/CodeGen/X86/flt-rounds.ll
+++ b/llvm/test/CodeGen/X86/flt-rounds.ll
@@ -1,7 +1,9 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=i686-unknown-linux-gnu -mattr=-sse -verify-machineinstrs < %s | FileCheck %s --check-prefix=X86
-; RUN: llc -mtriple=i686-unknown-linux-gnu -mattr=-sse2 -verify-machineinstrs < %s | FileCheck %s --check-prefix=X86
-; RUN: llc -mtriple=x86_64-unknown-linux-gnu -verify-machineinstrs < %s | FileCheck %s --check-prefix=X64
+; RUN: llc -mtriple=i686-unknown-linux-gnu -mattr=-sse -verify-machineinstrs < %s | FileCheck %s --check-prefixes=X86,SDAG-X86
+; RUN: llc -mtriple=i686-unknown-linux-gnu -mattr=-sse2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=X86,SDAG-X86
+; RUN: llc -mtriple=x86_64-unknown-linux-gnu -verify-machineinstrs < %s | FileCheck %s --check-prefixes=X64,SDAG-X64
+; RUN: llc -mtriple=i686-unknown-linux-gnu   -global-isel=1 -global-isel-abort=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=X86,GISEL-X86
+; RUN: llc -mtriple=x86_64-unknown-linux-gnu -global-isel=1 -global-isel-abort=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=X64,GISEL-X64
 
 declare i32 @llvm.get.rounding()
 
@@ -37,139 +39,309 @@ define i32 @test_flt_rounds() nounwind {
 
 ; Make sure we preserve order with fesetround.
 define i32 @multiple_flt_rounds() nounwind {
-; X86-LABEL: multiple_flt_rounds:
-; X86:       # %bb.0: # %entry
-; X86-NEXT:    pushl %ebx
-; X86-NEXT:    pushl %esi
-; X86-NEXT:    subl $20, %esp
-; X86-NEXT:    movl $1024, (%esp) # imm = 0x400
-; X86-NEXT:    calll fesetround
-; X86-NEXT:    fnstcw {{[0-9]+}}(%esp)
-; X86-NEXT:    movzwl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT:    shrl $9, %ecx
-; X86-NEXT:    andb $6, %cl
-; X86-NEXT:    movl $45, %esi
-; X86-NEXT:    movl $45, %eax
-; X86-NEXT:    # kill: def $cl killed $cl killed $ecx
-; X86-NEXT:    shrl %cl, %eax
-; X86-NEXT:    andl $3, %eax
-; X86-NEXT:    xorl %ebx, %ebx
-; X86-NEXT:    cmpl $3, %eax
-; X86-NEXT:    setne %bl
-; X86-NEXT:    movl $0, (%esp)
-; X86-NEXT:    calll fesetround
-; X86-NEXT:    fnstcw {{[0-9]+}}(%esp)
-; X86-NEXT:    movzwl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT:    shrl $9, %ecx
-; X86-NEXT:    andb $6, %cl
-; X86-NEXT:    movl $45, %eax
-; X86-NEXT:    # kill: def $cl killed $cl killed $ecx
-; X86-NEXT:    shrl %cl, %eax
-; X86-NEXT:    andl $3, %eax
-; X86-NEXT:    cmpl $1, %eax
-; X86-NEXT:    je .LBB1_2
-; X86-NEXT:  # %bb.1: # %entry
-; X86-NEXT:    incl %ebx
-; X86-NEXT:  .LBB1_2: # %entry
-; X86-NEXT:    movl $3072, (%esp) # imm = 0xC00
-; X86-NEXT:    calll fesetround
-; X86-NEXT:    fnstcw {{[0-9]+}}(%esp)
-; X86-NEXT:    movzwl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT:    shrl $9, %ecx
-; X86-NEXT:    andb $6, %cl
-; X86-NEXT:    movl $45, %eax
-; X86-NEXT:    # kill: def $cl killed $cl killed $ecx
-; X86-NEXT:    shrl %cl, %eax
-; X86-NEXT:    andl $3, %eax
-; X86-NEXT:    cmpl $1, %eax
-; X86-NEXT:    sbbl $-1, %ebx
-; X86-NEXT:    movl $2048, (%esp) # imm = 0x800
-; X86-NEXT:    calll fesetround
-; X86-NEXT:    fnstcw {{[0-9]+}}(%esp)
-; X86-NEXT:    movzwl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT:    shrl $9, %ecx
-; X86-NEXT:    andb $6, %cl
-; X86-NEXT:    # kill: def $cl killed $cl killed $ecx
-; X86-NEXT:    shrl %cl, %esi
-; X86-NEXT:    andl $3, %esi
-; X86-NEXT:    xorl %ecx, %ecx
-; X86-NEXT:    cmpl $2, %esi
-; X86-NEXT:    setne %cl
-; X86-NEXT:    negl %ecx
-; X86-NEXT:    xorl %eax, %eax
-; X86-NEXT:    cmpl %ecx, %ebx
-; X86-NEXT:    setne %al
-; X86-NEXT:    addl $20, %esp
-; X86-NEXT:    popl %esi
-; X86-NEXT:    popl %ebx
-; X86-NEXT:    retl
+; SDAG-X86-LABEL: multiple_flt_rounds:
+; SDAG-X86:       # %bb.0: # %entry
+; SDAG-X86-NEXT:    pushl %ebx
+; SDAG-X86-NEXT:    pushl %esi
+; SDAG-X86-NEXT:    subl $20, %esp
+; SDAG-X86-NEXT:    movl $1024, (%esp) # imm = 0x400
+; SDAG-X86-NEXT:    calll fesetround
+; SDAG-X86-NEXT:    fnstcw {{[0-9]+}}(%esp)
+; SDAG-X86-NEXT:    movzwl {{[0-9]+}}(%esp), %ecx
+; SDAG-X86-NEXT:    shrl $9, %ecx
+; SDAG-X86-NEXT:    andb $6, %cl
+; SDAG-X86-NEXT:    movl $45, %esi
+; SDAG-X86-NEXT:    movl $45, %eax
+; SDAG-X86-NEXT:    # kill: def $cl killed $cl killed $ecx
+; SDAG-X86-NEXT:    shrl %cl, %eax
+; SDAG-X86-NEXT:    andl $3, %eax
+; SDAG-X86-NEXT:    xorl %ebx, %ebx
+; SDAG-X86-NEXT:    cmpl $3, %eax
+; SDAG-X86-NEXT:    setne %bl
+; SDAG-X86-NEXT:    movl $0, (%esp)
+; SDAG-X86-NEXT:    calll fesetround
+; SDAG-X86-NEXT:    fnstcw {{[0-9]+}}(%esp)
+; SDAG-X86-NEXT:    movzwl {{[0-9]+}}(%esp), %ecx
+; SDAG-X86-NEXT:    shrl $9, %ecx
+; SDAG-X86-NEXT:    andb $6, %cl
+; SDAG-X86-NEXT:    movl $45, %eax
+; SDAG-X86-NEXT:    # kill: def $cl killed $cl killed $ecx
+; SDAG-X86-NEXT:    shrl %cl, %eax
+; SDAG-X86-NEXT:    andl $3, %eax
+; SDAG-X86-NEXT:    cmpl $1, %eax
+; SDAG-X86-NEXT:    je .LBB1_2
+; SDAG-X86-NEXT:  # %bb.1: # %entry
+; SDAG-X86-NEXT:    incl %ebx
+; SDAG-X86-NEXT:  .LBB1_2: # %entry
+; SDAG-X86-NEXT:    movl $3072, (%esp) # imm = 0xC00
+; SDAG-X86-NEXT:    calll fesetround
+; SDAG-X86-NEXT:    fnstcw {{[0-9]+}}(%esp)
+; SDAG-X86-NEXT:    movzwl {{[0-9]+}}(%esp), %ecx
+; SDAG-X86-NEXT:    shrl $9, %ecx
+; SDAG-X86-NEXT:    andb $6, %cl
+; SDAG-X86-NEXT:    movl $45, %eax
+; SDAG-X86-NEXT:    # kill: def $cl killed $cl killed $ecx
+; SDAG-X86-NEXT:    shrl %cl, %eax
+; SDAG-X86-NEXT:    andl $3, %eax
+; SDAG-X86-NEXT:    cmpl $1, %eax
+; SDAG-X86-NEXT:    sbbl $-1, %ebx
+; SDAG-X86-NEXT:    movl $2048, (%esp) # imm = 0x800
+; SDAG-X86-NEXT:    calll fesetround
+; SDAG-X86-NEXT:    fnstcw {{[0-9]+}}(%esp)
+; SDAG-X86-NEXT:    movzwl {{[0-9]+}}(%esp), %ecx
+; SDAG-X86-NEXT:    shrl $9, %ecx
+; SDAG-X86-NEXT:    andb $6, %cl
+; SDAG-X86-NEXT:    # kill: def $cl killed $cl killed $ecx
+; SDAG-X86-NEXT:    shrl %cl, %esi
+; SDAG-X86-NEXT:    andl $3, %esi
+; SDAG-X86-NEXT:    xorl %ecx, %ecx
+; SDAG-X86-NEXT:    cmpl $2, %esi
+; SDAG-X86-NEXT:    setne %cl
+; SDAG-X86-NEXT:    negl %ecx
+; SDAG-X86-NEXT:    xorl %eax, %eax
+; SDAG-X86-NEXT:    cmpl %ecx, %ebx
+; SDAG-X86-NEXT:    setne %al
+; SDAG-X86-NEXT:    addl $20, %esp
+; SDAG-X86-NEXT:    popl %esi
+; SDAG-X86-NEXT:    popl %ebx
+; SDAG-X86-NEXT:    retl
 ;
-; X64-LABEL: multiple_flt_rounds:
-; X64:       # %bb.0: # %entry
-; X64-NEXT:    pushq %rbp
-; X64-NEXT:    pushq %r14
-; X64-NEXT:    pushq %rbx
-; X64-NEXT:    subq $16, %rsp
-; X64-NEXT:    movl $1024, %edi # imm = 0x400
-; X64-NEXT:    callq fesetround
-; X64-NEXT:    fnstcw {{[0-9]+}}(%rsp)
-; X64-NEXT:    movzwl {{[0-9]+}}(%rsp), %ecx
-; X64-NEXT:    shrl $9, %ecx
-; X64-NEXT:    andb $6, %cl
-; X64-NEXT:    movl $45, %ebx
-; X64-NEXT:    movl $45, %eax
-; X64-NEXT:    # kill: def $cl killed $cl killed $ecx
-; X64-NEXT:    shrl %cl, %eax
-; X64-NEXT:    andl $3, %eax
-; X64-NEXT:    xorl %r14d, %r14d
-; X64-NEXT:    cmpl $3, %eax
-; X64-NEXT:    setne %r14b
-; X64-NEXT:    xorl %edi, %edi
-; X64-NEXT:    callq fesetround
-; X64-NEXT:    fnstcw {{[0-9]+}}(%rsp)
-; X64-NEXT:    movzwl {{[0-9]+}}(%rsp), %ecx
-; X64-NEXT:    shrl $9, %ecx
-; X64-NEXT:    andb $6, %cl
-; X64-NEXT:    movl $45, %eax
-; X64-NEXT:    # kill: def $cl killed $cl killed $ecx
-; X64-NEXT:    shrl %cl, %eax
-; X64-NEXT:    andl $3, %eax
-; X64-NEXT:    leal 1(%r14), %ebp
-; X64-NEXT:    cmpl $1, %eax
-; X64-NEXT:    cmovel %r14d, %ebp
-; X64-NEXT:    movl $3072, %edi # imm = 0xC00
-; X64-NEXT:    callq fesetround
-; X64-NEXT:    fnstcw {{[0-9]+}}(%rsp)
-; X64-NEXT:    movzwl {{[0-9]+}}(%rsp), %ecx
-; X64-NEXT:    shrl $9, %ecx
-; X64-NEXT:    andb $6, %cl
-; X64-NEXT:    movl $45, %eax
-; X64-NEXT:    # kill: def $cl killed $cl killed $ecx
-; X64-NEXT:    shrl %cl, %eax
-; X64-NEXT:    andl $3, %eax
-; X64-NEXT:    cmpl $1, %eax
-; X64-NEXT:    sbbl $-1, %ebp
-; X64-NEXT:    movl $2048, %edi # imm = 0x800
-; X64-NEXT:    callq fesetround
-; X64-NEXT:    fnstcw {{[0-9]+}}(%rsp)
-; X64-NEXT:    movzwl {{[0-9]+}}(%rsp), %ecx
-; X64-NEXT:    shrl $9, %ecx
-; X64-NEXT:    andb $6, %cl
-; X64-NEXT:    # kill: def $cl killed $cl killed $ecx
-; X64-NEXT:    shrl %cl, %ebx
-; X64-NEXT:    andl $3, %ebx
-; X64-NEXT:    xorl %ecx, %ecx
-; X64-NEXT:    cmpl $2, %ebx
-; X64-NEXT:    setne %cl
-; X64-NEXT:    negl %ecx
-; X64-NEXT:    xorl %eax, %eax
-; X64-NEXT:    cmpl %ecx, %ebp
-; X64-NEXT:    setne %al
-; X64-NEXT:    addq $16, %rsp
-; X64-NEXT:    popq %rbx
-; X64-NEXT:    popq %r14
-; X64-NEXT:    popq %rbp
-; X64-NEXT:    retq
+; SDAG-X64-LABEL: multiple_flt_rounds:
+; SDAG-X64:       # %bb.0: # %entry
+; SDAG-X64-NEXT:    pushq %rbp
+; SDAG-X64-NEXT:    pushq %r14
+; SDAG-X64-NEXT:    pushq %rbx
+; SDAG-X64-NEXT:    subq $16, %rsp
+; SDAG-X64-NEXT:    movl $1024, %edi # imm = 0x400
+; SDAG-X64-NEXT:    callq fesetround
+; SDAG-X64-NEXT:    fnstcw {{[0-9]+}}(%rsp)
+; SDAG-X64-NEXT:    movzwl {{[0-9]+}}(%rsp), %ecx
+; SDAG-X64-NEXT:    shrl $9, %ecx
+; SDAG-X64-NEXT:    andb $6, %cl
+; SDAG-X64-NEXT:    movl $45, %ebx
+; SDAG-X64-NEXT:    movl $45, %eax
+; SDAG-X64-NEXT:    # kill: def $cl killed $cl killed $ecx
+; SDAG-X64-NEXT:    shrl %cl, %eax
+; SDAG-X64-NEXT:    andl $3, %eax
+; SDAG-X64-NEXT:    xorl %r14d, %r14d
+; SDAG-X64-NEXT:    cmpl $3, %eax
+; SDAG-X64-NEXT:    setne %r14b
+; SDAG-X64-NEXT:    xorl %edi, %edi
+; SDAG-X64-NEXT:    callq fesetround
+; SDAG-X64-NEXT:    fnstcw {{[0-9]+}}(%rsp)
+; SDAG-X64-NEXT:    movzwl {{[0-9]+}}(%rsp), %ecx
+; SDAG-X64-NEXT:    shrl $9, %ecx
+; SDAG-X64-NEXT:    andb $6, %cl
+; SDAG-X64-NEXT:    movl $45, %eax
+; SDAG-X64-NEXT:    # kill: def $cl killed $cl killed $ecx
+; SDAG-X64-NEXT:    shrl %cl, %eax
+; SDAG-X64-NEXT:    andl $3, %eax
+; SDAG-X64-NEXT:    leal 1(%r14), %ebp
+; SDAG-X64-NEXT:    cmpl $1, %eax
+; SDAG-X64-NEXT:    cmovel %r14d, %ebp
+; SDAG-X64-NEXT:    movl $3072, %edi # imm = 0xC00
+; SDAG-X64-NEXT:    callq fesetround
+; SDAG-X64-NEXT:    fnstcw {{[0-9]+}}(%rsp)
+; SDAG-X64-NEXT:    movzwl {{[0-9]+}}(%rsp), %ecx
+; SDAG-X64-NEXT:    shrl $9, %ecx
+; SDAG-X64-NEXT:    andb $6, %cl
+; SDAG-X64-NEXT:    movl $45, %eax
+; SDAG-X64-NEXT:    # kill: def $cl killed $cl killed $ecx
+; SDAG-X64-NEXT:    shrl %cl, %eax
+; SDAG-X64-NEXT:    andl $3, %eax
+; SDAG-X64-NEXT:    cmpl $1, %eax
+; SDAG-X64-NEXT:    sbbl $-1, %ebp
+; SDAG-X64-NEXT:    movl $2048, %edi # imm = 0x800
+; SDAG-X64-NEXT:    callq fesetround
+; SDAG-X64-NEXT:    fnstcw {{[0-9]+}}(%rsp)
+; SDAG-X64-NEXT:    movzwl {{[0-9]+}}(%rsp), %ecx
+; SDAG-X64-NEXT:    shrl $9, %ecx
+; SDAG-X64-NEXT:    andb $6, %cl
+; SDAG-X64-NEXT:    # kill: def $cl killed $cl killed $ecx
+; SDAG-X64-NEXT:    shrl %cl, %ebx
+; SDAG-X64-NEXT:    andl $3, %ebx
+; SDAG-X64-NEXT:    xorl %ecx, %ecx
+; SDAG-X64-NEXT:    cmpl $2, %ebx
+; SDAG-X64-NEXT:    setne %cl
+; SDAG-X64-NEXT:    negl %ecx
+; SDAG-X64-NEXT:    xorl %eax, %eax
+; SDAG-X64-NEXT:    cmpl %ecx, %ebp
+; SDAG-X64-NEXT:    setne %al
+; SDAG-X64-NEXT:    addq $16, %rsp
+; SDAG-X64-NEXT:    popq %rbx
+; SDAG-X64-NEXT:    popq %r14
+; SDAG-X64-NEXT:    popq %rbp
+; SDAG-X64-NEXT:    retq
+;
+; GISEL-X86-LABEL: multiple_flt_rounds:
+; GISEL-X86:       # %bb.0: # %entry
+; GISEL-X86-NEXT:    pushl %ebp
+; GISEL-X86-NEXT:    pushl %ebx
+; GISEL-X86-NEXT:    pushl %edi
+; GISEL-X86-NEXT:    pushl %esi
+; GISEL-X86-NEXT:    subl $12, %esp
+; GISEL-X86-NEXT:    movl $1, %ebp
+; GISEL-X86-NEXT:    movl $1024, (%esp) # imm = 0x400
+; GISEL-X86-NEXT:    calll fesetround
+; GISEL-X86-NEXT:    fnstcw {{[0-9]+}}(%esp)
+; GISEL-X86-NEXT:    movzwl {{[0-9]+}}(%esp), %ecx
+; GISEL-X86-NEXT:    shrl $9, %ecx
+; GISEL-X86-NEXT:    andb $6, %cl
+; GISEL-X86-NEXT:    movl $45, %edi
+; GISEL-X86-NEXT:    movl $45, %eax
+; GISEL-X86-NEXT:    # kill: def $cl killed $cl killed $ecx
+; GISEL-X86-NEXT:    shrl %cl, %eax
+; GISEL-X86-NEXT:    andl $3, %eax
+; GISEL-X86-NEXT:    xorl %ebx, %ebx
+; GISEL-X86-NEXT:    cmpl $3, %eax
+; GISEL-X86-NEXT:    setne %bl
+; GISEL-X86-NEXT:    andl $1, %ebx
+; GISEL-X86-NEXT:    movl $0, (%esp)
+; GISEL-X86-NEXT:    calll fesetround
+; GISEL-X86-NEXT:    fnstcw {{[0-9]+}}(%esp)
+; GISEL-X86-NEXT:    movzwl {{[0-9]+}}(%esp), %ecx
+; GISEL-X86-NEXT:    shrl $9, %ecx
+; GISEL-X86-NEXT:    andb $6, %cl
+; GISEL-X86-NEXT:    movl $45, %edx
+; GISEL-X86-NEXT:    # kill: def $cl killed $cl killed $ecx
+; GISEL-X86-NEXT:    shrl %cl, %edx
+; GISEL-X86-NEXT:    andl $3, %edx
+; GISEL-X86-NEXT:    xorl %eax, %eax
+; GISEL-X86-NEXT:    cmpl $1, %edx
+; GISEL-X86-NEXT:    sete %cl
+; GISEL-X86-NEXT:    testl %ebx, %ebx
+; GISEL-X86-NEXT:    je .LBB1_2
+; GISEL-X86-NEXT:  # %bb.1: # %entry
+; GISEL-X86-NEXT:    movl $2, %ebp
+; GISEL-X86-NEXT:  .LBB1_2: # %entry
+; GISEL-X86-NEXT:    xorl %esi, %esi
+; GISEL-X86-NEXT:    movb %cl, %al
+; GISEL-X86-NEXT:    andl $1, %eax
+; GISEL-X86-NEXT:    je .LBB1_4
+; GISEL-X86-NEXT:  # %bb.3: # %entry
+; GISEL-X86-NEXT:    movl %ebx, %ebp
+; GISEL-X86-NEXT:  .LBB1_4: # %entry
+; GISEL-X86-NEXT:    movl $3072, (%esp) # imm = 0xC00
+; GISEL-X86-NEXT:    calll fesetround
+; GISEL-X86-NEXT:    fnstcw {{[0-9]+}}(%esp)
+; GISEL-X86-NEXT:    movzwl {{[0-9]+}}(%esp), %ecx
+; GISEL-X86-NEXT:    shrl $9, %ecx
+; GISEL-X86-NEXT:    andb $6, %cl
+; GISEL-X86-NEXT:    movl $45, %eax
+; GISEL-X86-NEXT:    # kill: def $cl killed $cl killed $ecx
+; GISEL-X86-NEXT:    shrl %cl, %eax
+; GISEL-X86-NEXT:    andl $3, %eax
+; GISEL-X86-NEXT:    xorl %ebx, %ebx
+; GISEL-X86-NEXT:    cmpl %esi, %eax
+; GISEL-X86-NEXT:    setne %bl
+; GISEL-X86-NEXT:    andl $1, %ebx
+; GISEL-X86-NEXT:    addl %ebp, %ebx
+; GISEL-X86-NEXT:    movl $2048, (%esp) # imm = 0x800
+; GISEL-X86-NEXT:    calll fesetround
+; GISEL-X86-NEXT:    fnstcw {{[0-9]+}}(%esp)
+; GISEL-X86-NEXT:    movzwl {{[0-9]+}}(%esp), %ecx
+; GISEL-X86-NEXT:    shrl $9, %ecx
+; GISEL-X86-NEXT:    andb $6, %cl
+; GISEL-X86-NEXT:    # kill: def $cl killed $cl killed $ecx
+; GISEL-X86-NEXT:    shrl %cl, %edi
+; GISEL-X86-NEXT:    andl $3, %edi
+; GISEL-X86-NEXT:    xorl %ecx, %ecx
+; GISEL-X86-NEXT:    movl $2, %eax
+; GISEL-X86-NEXT:    cmpl %eax, %edi
+; GISEL-X86-NEXT:    setne %cl
+; GISEL-X86-NEXT:    shll $31, %ecx
+; GISEL-X86-NEXT:    sarl $31, %ecx
+; GISEL-X86-NEXT:    xorl %eax, %eax
+; GISEL-X86-NEXT:    cmpl %ecx, %ebx
+; GISEL-X86-NEXT:    setne %al
+; GISEL-X86-NEXT:    andl $1, %eax
+; GISEL-X86-NEXT:    addl $12, %esp
+; GISEL-X86-NEXT:    popl %esi
+; GISEL-X86-NEXT:    popl %edi
+; GISEL-X86-NEXT:    popl %ebx
+; GISEL-X86-NEXT:    popl %ebp
+; GISEL-X86-NEXT:    retl
+;
+; GISEL-X64-LABEL: multiple_flt_rounds:
+; GISEL-X64:       # %bb.0: # %entry
+; GISEL-X64-NEXT:    pushq %rbp
+; GISEL-X64-NEXT:    pushq %r15
+; GISEL-X64-NEXT:    pushq %r14
+; GISEL-X64-NEXT:    pushq %rbx
+; GISEL-X64-NEXT:    pushq %rax
+; GISEL-X64-NEXT:    movl $1, %r14d
+; GISEL-X64-NEXT:    movl $2, %ebp
+; GISEL-X64-NEXT:    movl $1024, %edi # imm = 0x400
+; GISEL-X64-NEXT:    callq fesetround
+; GISEL-X64-NEXT:    fnstcw (%rsp)
+; GISEL-X64-NEXT:    movzwl (%rsp), %ecx
+; GISEL-X64-NEXT:    shrl $9, %ecx
+; GISEL-X64-NEXT:    andb $6, %cl
+; GISEL-X64-NEXT:    movl $45, %ebx
+; GISEL-X64-NEXT:    movl $45, %eax
+; GISEL-X64-NEXT:    # kill: def $cl killed $cl killed $ecx
+; GISEL-X64-NEXT:    shrl %cl, %eax
+; GISEL-X64-NEXT:    andl $3, %eax
+; GISEL-X64-NEXT:    xorl %r15d, %r15d
+; GISEL-X64-NEXT:    cmpl $3, %eax
+; GISEL-X64-NEXT:    setne %r15b
+; GISEL-X64-NEXT:    andl $1, %r15d
+; GISEL-X64-NEXT:    xorl %edi, %edi
+; GISEL-X64-NEXT:    callq fesetround
+; GISEL-X64-NEXT:    fnstcw {{[0-9]+}}(%rsp)
+; GISEL-X64-NEXT:    movzwl {{[0-9]+}}(%rsp), %ecx
+; GISEL-X64-NEXT:    shrl $9, %ecx
+; GISEL-X64-NEXT:    andb $6, %cl
+; GISEL-X64-NEXT:    movl $45, %eax
+; GISEL-X64-NEXT:    # kill: def $cl killed $cl killed $ecx
+; GISEL-X64-NEXT:    shrl %cl, %eax
+; GISEL-X64-NEXT:    andl $3, %eax
+; GISEL-X64-NEXT:    xorl %ecx, %ecx
+; GISEL-X64-NEXT:    cmpl $1, %eax
+; GISEL-X64-NEXT:    sete %cl
+; GISEL-X64-NEXT:    testl %r15d, %r15d
+; GISEL-X64-NEXT:    cmovel %r14d, %ebp
+; GISEL-X64-NEXT:    andl $1, %ecx
+; GISEL-X64-NEXT:    cmovnel %r15d, %ebp
+; GISEL-X64-NEXT:    movl $3072, %edi # imm = 0xC00
+; GISEL-X64-NEXT:    callq fesetround
+; GISEL-X64-NEXT:    fnstcw {{[0-9]+}}(%rsp)
+; GISEL-X64-NEXT:    movzwl {{[0-9]+}}(%rsp), %ecx
+; GISEL-X64-NEXT:    shrl $9, %ecx
+; GISEL-X64-NEXT:    andb $6, %cl
+; GISEL-X64-NEXT:    movl $45, %eax
+; GISEL-X64-NEXT:    # kill: def $cl killed $cl killed $ecx
+; GISEL-X64-NEXT:    shrl %cl, %eax
+; GISEL-X64-NEXT:    andl $3, %eax
+; GISEL-X64-NEXT:    xorl %r14d, %r14d
+; GISEL-X64-NEXT:    cmpl $0, %eax
+; GISEL-X64-NEXT:    setne %r14b
+; GISEL-X64-NEXT:    andl $1, %r14d
+; GISEL-X64-NEXT:    addl %ebp, %r14d
+; GISEL-X64-NEXT:    movl $2048, %edi # imm = 0x800
+; GISEL-X64-NEXT:    callq fesetround
+; GISEL-X64-NEXT:    fnstcw {{[0-9]+}}(%rsp)
+; GISEL-X64-NEXT:    movzwl {{[0-9]+}}(%rsp), %ecx
+; GISEL-X64-NEXT:    shrl $9, %ecx
+; GISEL-X64-NEXT:    andb $6, %cl
+; GISEL-X64-NEXT:    # kill: def $cl killed $cl killed $ecx
+; GISEL-X64-NEXT:    shrl %cl, %ebx
+; GISEL-X64-NEXT:    andl $3, %ebx
+; GISEL-X64-NEXT:    xorl %ecx, %ecx
+; GISEL-X64-NEXT:    cmpl $2, %ebx
+; GISEL-X64-NEXT:    setne %cl
+; GISEL-X64-NEXT:    shll $31, %ecx
+; GISEL-X64-NEXT:    sarl $31, %ecx
+; GISEL-X64-NEXT:    xorl %eax, %eax
+; GISEL-X64-NEXT:    cmpl %ecx, %r14d
+; GISEL-X64-NEXT:    setne %al
+; GISEL-X64-NEXT:    andl $1, %eax
+; GISEL-X64-NEXT:    addq $8, %rsp
+; GISEL-X64-NEXT:    popq %rbx
+; GISEL-X64-NEXT:    popq %r14
+; GISEL-X64-NEXT:    popq %r15
+; GISEL-X64-NEXT:    popq %rbp
+; GISEL-X64-NEXT:    retq
 entry:
   %call = tail call i32 @fesetround(i32 1024)
   %0 = tail call i32 @llvm.get.rounding()

diff  --git a/llvm/test/MC/ELF/mc-dump.s b/llvm/test/MC/ELF/mc-dump.s
index 389941db23e3b..36d3a05768dc6 100644
--- a/llvm/test/MC/ELF/mc-dump.s
+++ b/llvm/test/MC/ELF/mc-dump.s
@@ -12,7 +12,7 @@
 # CHECK-NEXT:0 Data Size:0 []
 # CHECK-NEXT:  Symbol @0 _start
 # CHECK-NEXT:0 Org Offset:3 Value:0
-# CHECK-NEXT:3 Relaxable Size:2 <MCInst #1999 <MCOperand Expr:.Ltmp0>>
+# CHECK-NEXT:3 Relaxable Size:2 <MCInst #2001 <MCOperand Expr:.Ltmp0>>
 # CHECK-NEXT:  Fixup @1 Value:.Ltmp0 Kind:4001
 # CHECK-NEXT:5 Data Size:16 [48,8b,04,25,00,00,00,00,48,8b,04,25,00,00,00,00]
 # CHECK-NEXT:  Fixup @4 Value:f0@<variant 11> Kind:4017

diff  --git a/llvm/test/TableGen/GlobalISelEmitter/GlobalISelEmitter.td b/llvm/test/TableGen/GlobalISelEmitter/GlobalISelEmitter.td
index e9c2069fdbd98..c3895b524e85e 100644
--- a/llvm/test/TableGen/GlobalISelEmitter/GlobalISelEmitter.td
+++ b/llvm/test/TableGen/GlobalISelEmitter/GlobalISelEmitter.td
@@ -535,7 +535,7 @@ def : Pat<(frag GPR32:$src1, complex:$src2, complex:$src3),
 // R00O-NEXT:  GIM_Reject,
 // R00O:       // Label [[DEFAULT_NUM]]: @[[DEFAULT]]
 // R00O-NEXT:  GIM_Reject,
-// R00O-NEXT:  }; // Size: 1890 bytes
+// R00O-NEXT:  }; // Size: 1894 bytes
 
 def INSNBOB : I<(outs GPR32:$dst), (ins GPR32:$src1, GPR32:$src2, GPR32:$src3, GPR32:$src4),
                  [(set GPR32:$dst,


        


More information about the llvm-commits mailing list