[llvm] Add Instruction selection support for x87 ld/st (PR #97016)

Malay Sanghi via llvm-commits llvm-commits at lists.llvm.org
Wed Jul 3 05:41:58 PDT 2024


https://github.com/MalaySanghi updated https://github.com/llvm/llvm-project/pull/97016

>From 08275b72c6740e37123914f2435f9c256c3343a7 Mon Sep 17 00:00:00 2001
From: Malay Sanghi <malay.sanghi at intel.com>
Date: Fri, 28 Jun 2024 00:09:20 -0700
Subject: [PATCH 1/3] Add Instruction selection support for x87 ld/st

Other ld/st also have c++ selection.
---
 .../X86/GISel/X86InstructionSelector.cpp      |  28 ++-
 .../CodeGen/X86/GlobalISel/x86_64-fallback.ll |   9 -
 llvm/test/CodeGen/X86/GlobalISel/x87.ll       | 221 ++++++++++++++++++
 3 files changed, 244 insertions(+), 14 deletions(-)
 create mode 100644 llvm/test/CodeGen/X86/GlobalISel/x87.ll

diff --git a/llvm/lib/Target/X86/GISel/X86InstructionSelector.cpp b/llvm/lib/Target/X86/GISel/X86InstructionSelector.cpp
index 303783ea3fd22..64f977e9c9d66 100644
--- a/llvm/lib/Target/X86/GISel/X86InstructionSelector.cpp
+++ b/llvm/lib/Target/X86/GISel/X86InstructionSelector.cpp
@@ -195,6 +195,14 @@ X86InstructionSelector::getRegClass(LLT Ty, const RegisterBank &RB) const {
       return &X86::VR512RegClass;
   }
 
+  if (RB.getID() == X86::PSRRegBankID) {
+    if (Ty.getSizeInBits() == 80)
+      return &X86::RFP80RegClass;
+    if (Ty.getSizeInBits() == 64)
+      return &X86::RFP64RegClass;
+    return &X86::RFP32RegClass;
+  }
+
   llvm_unreachable("Unknown RegBank!");
 }
 
@@ -462,6 +470,8 @@ unsigned X86InstructionSelector::getLoadStoreOp(const LLT &Ty,
                     : (HasAVX512 ? X86::VMOVSSZmr :
                        HasAVX    ? X86::VMOVSSmr :
                                    X86::MOVSSmr);
+    if (X86::PSRRegBankID == RB.getID())
+      return Isload ? X86::LD_Fp32m : X86::ST_Fp32m;
   } else if (Ty == LLT::scalar(64) || Ty == LLT::pointer(0, 64)) {
     if (X86::GPRRegBankID == RB.getID())
       return Isload ? X86::MOV64rm : X86::MOV64mr;
@@ -472,6 +482,10 @@ unsigned X86InstructionSelector::getLoadStoreOp(const LLT &Ty,
                     : (HasAVX512 ? X86::VMOVSDZmr :
                        HasAVX    ? X86::VMOVSDmr :
                                    X86::MOVSDmr);
+    if (X86::PSRRegBankID == RB.getID())
+      return Isload ? X86::LD_Fp64m : X86::ST_Fp64m;
+  } else if (Ty == LLT::scalar(80) || Ty == LLT::pointer(0, 80)) {
+    return Isload ? X86::LD_Fp80m : X86::ST_FpP80m;
   } else if (Ty.isVector() && Ty.getSizeInBits() == 128) {
     if (Alignment >= Align(16))
       return Isload ? (HasVLX ? X86::VMOVAPSZ128rm
@@ -611,7 +625,10 @@ bool X86InstructionSelector::selectLoadStoreOp(MachineInstr &I,
     I.removeOperand(0);
     addFullAddress(MIB, AM).addUse(DefReg);
   }
-  return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
+  bool Constrained = constrainSelectedInstRegOperands(I, TII, TRI, RBI);
+  if (Constrained)
+    I.addImplicitDefUseOperands(MF);
+  return Constrained;
 }
 
 static unsigned getLeaOP(LLT Ty, const X86Subtarget &STI) {
@@ -1503,14 +1520,15 @@ bool X86InstructionSelector::materializeFP(MachineInstr &I,
   const Register DstReg = I.getOperand(0).getReg();
   const LLT DstTy = MRI.getType(DstReg);
   const RegisterBank &RegBank = *RBI.getRegBank(DstReg, MRI, TRI);
-  Align Alignment = Align(DstTy.getSizeInBytes());
+  // Create the load from the constant pool.
+  const ConstantFP *CFP = I.getOperand(1).getFPImm();
+  const auto &DataLayout = MF.getDataLayout();
+  Align Alignment = DataLayout.getPrefTypeAlign(CFP->getType());
   const DebugLoc &DbgLoc = I.getDebugLoc();
 
   unsigned Opc =
       getLoadStoreOp(DstTy, RegBank, TargetOpcode::G_LOAD, Alignment);
 
-  // Create the load from the constant pool.
-  const ConstantFP *CFP = I.getOperand(1).getFPImm();
   unsigned CPI = MF.getConstantPool()->getConstantPoolIndex(CFP, Alignment);
   MachineInstr *LoadInst = nullptr;
   unsigned char OpFlag = STI.classifyLocalReference(nullptr);
@@ -1525,7 +1543,7 @@ bool X86InstructionSelector::materializeFP(MachineInstr &I,
 
     MachineMemOperand *MMO = MF.getMachineMemOperand(
         MachinePointerInfo::getConstantPool(MF), MachineMemOperand::MOLoad,
-        LLT::pointer(0, MF.getDataLayout().getPointerSizeInBits()), Alignment);
+        LLT::pointer(0, DataLayout.getPointerSizeInBits()), Alignment);
 
     LoadInst =
         addDirectMem(BuildMI(*I.getParent(), I, DbgLoc, TII.get(Opc), DstReg),
diff --git a/llvm/test/CodeGen/X86/GlobalISel/x86_64-fallback.ll b/llvm/test/CodeGen/X86/GlobalISel/x86_64-fallback.ll
index 39302734dde78..bb0f0ae14f304 100644
--- a/llvm/test/CodeGen/X86/GlobalISel/x86_64-fallback.ll
+++ b/llvm/test/CodeGen/X86/GlobalISel/x86_64-fallback.ll
@@ -7,15 +7,6 @@
 ; When we cannot produce a test case anymore, that means we can remove
 ; the fallback path.
 
-; Check that we fallback on invoke translation failures.
-; FALLBACK-WITH-REPORT-ERR: remark: <unknown>:0:0: cannot select: G_STORE %1:psr(s80), %0:gpr(p0) :: (store (s80) into %ir.ptr, align 16) (in function: test_x86_fp80_dump)
-; FALLBACK-WITH-REPORT-ERR: warning: Instruction selection used fallback path for test_x86_fp80_dump
-; FALLBACK-WITH-REPORT-OUT-LABEL: test_x86_fp80_dump:
-define void @test_x86_fp80_dump(ptr %ptr){
-  store x86_fp80 0xK4002A000000000000000, ptr %ptr, align 16
-  ret void
-}
-
 ; Check that we fallback on byVal argument
 ; FALLBACK-WITH-REPORT-ERR: remark: <unknown>:0:0: unable to translate instruction: call: '  call void @ScaleObjectOverwrite_3(ptr %index, ptr byval(%struct.PointListStruct) %index)' (in function: ScaleObjectOverwrite_2)
 ; FALLBACK-WITH-REPORT-ERR: warning: Instruction selection used fallback path for ScaleObjectOverwrite_2
diff --git a/llvm/test/CodeGen/X86/GlobalISel/x87.ll b/llvm/test/CodeGen/X86/GlobalISel/x87.ll
new file mode 100644
index 0000000000000..ebec84b03ba20
--- /dev/null
+++ b/llvm/test/CodeGen/X86/GlobalISel/x87.ll
@@ -0,0 +1,221 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc < %s -mtriple=i686-- -mattr=+x87,-sse,-sse2 -global-isel | FileCheck %s --check-prefixes=CHECK-32,GISEL_X86
+; RUN: llc < %s -mtriple=x86_64-- -mattr=+x87,-sse,-sse2 -global-isel | FileCheck %s --check-prefixes=CHECK-64,GISEL_X64
+; RUN: llc < %s -mtriple=i686-- -mattr=+x87,-sse,-sse2 | FileCheck %s --check-prefixes=CHECK-32,SDAG_X86
+; RUN: llc < %s -mtriple=x86_64-- -mattr=+x87,-sse,-sse2 | FileCheck %s --check-prefixes=CHECK-64,SDAG_X64
+
+define x86_fp80 @f0(x86_fp80 noundef %a) nounwind {
+; GISEL_X86-LABEL: f0:
+; GISEL_X86:       # %bb.0:
+; GISEL_X86-NEXT:    pushl %ebp
+; GISEL_X86-NEXT:    movl %esp, %ebp
+; GISEL_X86-NEXT:    andl $-16, %esp
+; GISEL_X86-NEXT:    subl $48, %esp
+; GISEL_X86-NEXT:    fldt 8(%ebp)
+; GISEL_X86-NEXT:    fldt {{\.?LCPI[0-9]+_[0-9]+}}
+; GISEL_X86-NEXT:    fxch %st(1)
+; GISEL_X86-NEXT:    fstpt {{[0-9]+}}(%esp)
+; GISEL_X86-NEXT:    fstpt (%esp)
+; GISEL_X86-NEXT:    fldt {{[0-9]+}}(%esp)
+; GISEL_X86-NEXT:    fldt (%esp)
+; GISEL_X86-NEXT:    faddp %st, %st(1)
+; GISEL_X86-NEXT:    movl %ebp, %esp
+; GISEL_X86-NEXT:    popl %ebp
+; GISEL_X86-NEXT:    retl
+;
+; GISEL_X64-LABEL: f0:
+; GISEL_X64:       # %bb.0:
+; GISEL_X64-NEXT:    fldt {{[0-9]+}}(%rsp)
+; GISEL_X64-NEXT:    fldt {{\.?LCPI[0-9]+_[0-9]+}}(%rip)
+; GISEL_X64-NEXT:    fxch %st(1)
+; GISEL_X64-NEXT:    fstpt -{{[0-9]+}}(%rsp)
+; GISEL_X64-NEXT:    fstpt -{{[0-9]+}}(%rsp)
+; GISEL_X64-NEXT:    fldt -{{[0-9]+}}(%rsp)
+; GISEL_X64-NEXT:    fldt -{{[0-9]+}}(%rsp)
+; GISEL_X64-NEXT:    faddp %st, %st(1)
+; GISEL_X64-NEXT:    retq
+;
+; SDAG_X86-LABEL: f0:
+; SDAG_X86:       # %bb.0:
+; SDAG_X86-NEXT:    pushl %ebp
+; SDAG_X86-NEXT:    movl %esp, %ebp
+; SDAG_X86-NEXT:    andl $-16, %esp
+; SDAG_X86-NEXT:    subl $48, %esp
+; SDAG_X86-NEXT:    fldt 8(%ebp)
+; SDAG_X86-NEXT:    fld %st(0)
+; SDAG_X86-NEXT:    fstpt {{[0-9]+}}(%esp)
+; SDAG_X86-NEXT:    flds {{\.?LCPI[0-9]+_[0-9]+}}
+; SDAG_X86-NEXT:    fld %st(0)
+; SDAG_X86-NEXT:    fstpt (%esp)
+; SDAG_X86-NEXT:    faddp %st, %st(1)
+; SDAG_X86-NEXT:    movl %ebp, %esp
+; SDAG_X86-NEXT:    popl %ebp
+; SDAG_X86-NEXT:    retl
+;
+; SDAG_X64-LABEL: f0:
+; SDAG_X64:       # %bb.0:
+; SDAG_X64-NEXT:    fldt {{[0-9]+}}(%rsp)
+; SDAG_X64-NEXT:    fld %st(0)
+; SDAG_X64-NEXT:    fstpt -{{[0-9]+}}(%rsp)
+; SDAG_X64-NEXT:    flds {{\.?LCPI[0-9]+_[0-9]+}}(%rip)
+; SDAG_X64-NEXT:    fld %st(0)
+; SDAG_X64-NEXT:    fstpt -{{[0-9]+}}(%rsp)
+; SDAG_X64-NEXT:    faddp %st, %st(1)
+; SDAG_X64-NEXT:    retq
+  %a.addr = alloca x86_fp80, align 16
+  %x = alloca x86_fp80, align 16
+  store x86_fp80 %a, ptr %a.addr, align 16
+  store x86_fp80 0xK400A8000000000000000, ptr %x, align 16
+  %load1 = load x86_fp80, ptr %a.addr, align 16
+  %load2 = load x86_fp80, ptr %x, align 16
+  %add = fadd x86_fp80 %load1, %load2
+  ret x86_fp80 %add
+}
+
+
+define void @f1(ptr %a, ptr %b) nounwind {
+; GISEL_X86-LABEL: f1:
+; GISEL_X86:       # %bb.0:
+; GISEL_X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; GISEL_X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; GISEL_X86-NEXT:    fldt (%eax)
+; GISEL_X86-NEXT:    fldt (%ecx)
+; GISEL_X86-NEXT:    fsubrp %st, %st(1)
+; GISEL_X86-NEXT:    fstpt (%eax)
+; GISEL_X86-NEXT:    retl
+;
+; CHECK-64-LABEL: f1:
+; CHECK-64:       # %bb.0:
+; CHECK-64-NEXT:    fldt (%rdi)
+; CHECK-64-NEXT:    fldt (%rsi)
+; CHECK-64-NEXT:    fsubrp %st, %st(1)
+; CHECK-64-NEXT:    fstpt (%rdi)
+; CHECK-64-NEXT:    retq
+;
+; SDAG_X86-LABEL: f1:
+; SDAG_X86:       # %bb.0:
+; SDAG_X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; SDAG_X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; SDAG_X86-NEXT:    fldt (%ecx)
+; SDAG_X86-NEXT:    fldt (%eax)
+; SDAG_X86-NEXT:    fsubrp %st, %st(1)
+; SDAG_X86-NEXT:    fstpt (%ecx)
+; SDAG_X86-NEXT:    retl
+  %load1 = load x86_fp80, ptr %a, align 4
+  %load2 = load x86_fp80, ptr %b, align 4
+  %sub = fsub x86_fp80 %load1, %load2
+  store x86_fp80 %sub, ptr %a, align 4
+  ret void
+}
+
+define void @f2(ptr %a, ptr %b) nounwind {
+; GISEL_X86-LABEL: f2:
+; GISEL_X86:       # %bb.0:
+; GISEL_X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; GISEL_X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; GISEL_X86-NEXT:    fldt (%eax)
+; GISEL_X86-NEXT:    fldt (%ecx)
+; GISEL_X86-NEXT:    fmulp %st, %st(1)
+; GISEL_X86-NEXT:    fstpt (%eax)
+; GISEL_X86-NEXT:    retl
+;
+; CHECK-64-LABEL: f2:
+; CHECK-64:       # %bb.0:
+; CHECK-64-NEXT:    fldt (%rdi)
+; CHECK-64-NEXT:    fldt (%rsi)
+; CHECK-64-NEXT:    fmulp %st, %st(1)
+; CHECK-64-NEXT:    fstpt (%rdi)
+; CHECK-64-NEXT:    retq
+;
+; SDAG_X86-LABEL: f2:
+; SDAG_X86:       # %bb.0:
+; SDAG_X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; SDAG_X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; SDAG_X86-NEXT:    fldt (%ecx)
+; SDAG_X86-NEXT:    fldt (%eax)
+; SDAG_X86-NEXT:    fmulp %st, %st(1)
+; SDAG_X86-NEXT:    fstpt (%ecx)
+; SDAG_X86-NEXT:    retl
+  %load1 = load x86_fp80, ptr %a, align 16
+  %load2 = load x86_fp80, ptr %b, align 16
+  %mul = fmul x86_fp80 %load1, %load2
+  store x86_fp80 %mul, ptr %a, align 16
+  ret void
+}
+
+define void @f3(ptr %a, ptr %b) nounwind {
+; GISEL_X86-LABEL: f3:
+; GISEL_X86:       # %bb.0:
+; GISEL_X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; GISEL_X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; GISEL_X86-NEXT:    fldt (%eax)
+; GISEL_X86-NEXT:    fldt (%ecx)
+; GISEL_X86-NEXT:    fdivrp %st, %st(1)
+; GISEL_X86-NEXT:    fstpt (%eax)
+; GISEL_X86-NEXT:    retl
+;
+; CHECK-64-LABEL: f3:
+; CHECK-64:       # %bb.0:
+; CHECK-64-NEXT:    fldt (%rdi)
+; CHECK-64-NEXT:    fldt (%rsi)
+; CHECK-64-NEXT:    fdivrp %st, %st(1)
+; CHECK-64-NEXT:    fstpt (%rdi)
+; CHECK-64-NEXT:    retq
+;
+; SDAG_X86-LABEL: f3:
+; SDAG_X86:       # %bb.0:
+; SDAG_X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; SDAG_X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; SDAG_X86-NEXT:    fldt (%ecx)
+; SDAG_X86-NEXT:    fldt (%eax)
+; SDAG_X86-NEXT:    fdivrp %st, %st(1)
+; SDAG_X86-NEXT:    fstpt (%ecx)
+; SDAG_X86-NEXT:    retl
+  %load1 = load x86_fp80, ptr %a, align 4
+  %load2 = load x86_fp80, ptr %b, align 4
+  %div = fdiv x86_fp80 %load1, %load2
+  store x86_fp80 %div, ptr %a, align 4
+  ret void
+}
+
+define void @f6(ptr %0, ptr %1) nounwind {
+; GISEL_X86-LABEL: f6:
+; GISEL_X86:       # %bb.0:
+; GISEL_X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; GISEL_X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; GISEL_X86-NEXT:    flds {{\.?LCPI[0-9]+_[0-9]+}}
+; GISEL_X86-NEXT:    flds (%eax)
+; GISEL_X86-NEXT:    faddp %st, %st(1)
+; GISEL_X86-NEXT:    fstps (%ecx)
+; GISEL_X86-NEXT:    retl
+;
+; GISEL_X64-LABEL: f6:
+; GISEL_X64:       # %bb.0:
+; GISEL_X64-NEXT:    flds {{\.?LCPI[0-9]+_[0-9]+}}(%rip)
+; GISEL_X64-NEXT:    flds (%rdi)
+; GISEL_X64-NEXT:    faddp %st, %st(1)
+; GISEL_X64-NEXT:    fstps (%rsi)
+; GISEL_X64-NEXT:    retq
+;
+; SDAG_X86-LABEL: f6:
+; SDAG_X86:       # %bb.0:
+; SDAG_X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; SDAG_X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; SDAG_X86-NEXT:    flds (%ecx)
+; SDAG_X86-NEXT:    fadds {{\.?LCPI[0-9]+_[0-9]+}}
+; SDAG_X86-NEXT:    fstps (%eax)
+; SDAG_X86-NEXT:    retl
+;
+; SDAG_X64-LABEL: f6:
+; SDAG_X64:       # %bb.0:
+; SDAG_X64-NEXT:    flds (%rdi)
+; SDAG_X64-NEXT:    fadds {{\.?LCPI[0-9]+_[0-9]+}}(%rip)
+; SDAG_X64-NEXT:    fstps (%rsi)
+; SDAG_X64-NEXT:    retq
+  %load1 = load float, ptr %0
+  %add = fadd float %load1, 20.0
+  store float %add, ptr %1
+  ret void
+}
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; CHECK-32: {{.*}}

>From 90889419e17e54670531f84a01b9026d9aae20fc Mon Sep 17 00:00:00 2001
From: Malay Sanghi <malay.sanghi at intel.com>
Date: Wed, 3 Jul 2024 04:10:35 -0700
Subject: [PATCH 2/3] review

---
 .../X86/GISel/X86InstructionSelector.cpp      |  5 +-
 .../X86/{GlobalISel/x87.ll => isel-x87.ll}    | 94 ++++++++++---------
 2 files changed, 52 insertions(+), 47 deletions(-)
 rename llvm/test/CodeGen/X86/{GlobalISel/x87.ll => isel-x87.ll} (96%)

diff --git a/llvm/lib/Target/X86/GISel/X86InstructionSelector.cpp b/llvm/lib/Target/X86/GISel/X86InstructionSelector.cpp
index 64f977e9c9d66..9b869b1d0fb7c 100644
--- a/llvm/lib/Target/X86/GISel/X86InstructionSelector.cpp
+++ b/llvm/lib/Target/X86/GISel/X86InstructionSelector.cpp
@@ -200,7 +200,8 @@ X86InstructionSelector::getRegClass(LLT Ty, const RegisterBank &RB) const {
       return &X86::RFP80RegClass;
     if (Ty.getSizeInBits() == 64)
       return &X86::RFP64RegClass;
-    return &X86::RFP32RegClass;
+    if (Ty.getSizeInBits() == 32)
+      return &X86::RFP32RegClass;
   }
 
   llvm_unreachable("Unknown RegBank!");
@@ -484,7 +485,7 @@ unsigned X86InstructionSelector::getLoadStoreOp(const LLT &Ty,
                                    X86::MOVSDmr);
     if (X86::PSRRegBankID == RB.getID())
       return Isload ? X86::LD_Fp64m : X86::ST_Fp64m;
-  } else if (Ty == LLT::scalar(80) || Ty == LLT::pointer(0, 80)) {
+  } else if (Ty == LLT::scalar(80)) {
     return Isload ? X86::LD_Fp80m : X86::ST_FpP80m;
   } else if (Ty.isVector() && Ty.getSizeInBits() == 128) {
     if (Alignment >= Align(16))
diff --git a/llvm/test/CodeGen/X86/GlobalISel/x87.ll b/llvm/test/CodeGen/X86/isel-x87.ll
similarity index 96%
rename from llvm/test/CodeGen/X86/GlobalISel/x87.ll
rename to llvm/test/CodeGen/X86/isel-x87.ll
index ebec84b03ba20..f1adf279c7353 100644
--- a/llvm/test/CodeGen/X86/GlobalISel/x87.ll
+++ b/llvm/test/CodeGen/X86/isel-x87.ll
@@ -1,8 +1,10 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
 ; RUN: llc < %s -mtriple=i686-- -mattr=+x87,-sse,-sse2 -global-isel | FileCheck %s --check-prefixes=CHECK-32,GISEL_X86
-; RUN: llc < %s -mtriple=x86_64-- -mattr=+x87,-sse,-sse2 -global-isel | FileCheck %s --check-prefixes=CHECK-64,GISEL_X64
 ; RUN: llc < %s -mtriple=i686-- -mattr=+x87,-sse,-sse2 | FileCheck %s --check-prefixes=CHECK-32,SDAG_X86
+; RUN: llc < %s -mtriple=i686-- -mattr=+x87,-sse,-sse2 -fast-isel=true | FileCheck %s --check-prefixes=CHECK-32,SDAG_X86,FAST_X86
+; RUN: llc < %s -mtriple=x86_64-- -mattr=+x87,-sse,-sse2 -global-isel | FileCheck %s --check-prefixes=CHECK-64,GISEL_X64
 ; RUN: llc < %s -mtriple=x86_64-- -mattr=+x87,-sse,-sse2 | FileCheck %s --check-prefixes=CHECK-64,SDAG_X64
+; RUN: llc < %s -mtriple=x86_64-- -mattr=+x87,-sse,-sse2 -fast-isel=true | FileCheck %s --check-prefixes=CHECK-64,SDAG_X64,FAST_X64
 
 define x86_fp80 @f0(x86_fp80 noundef %a) nounwind {
 ; GISEL_X86-LABEL: f0:
@@ -23,18 +25,6 @@ define x86_fp80 @f0(x86_fp80 noundef %a) nounwind {
 ; GISEL_X86-NEXT:    popl %ebp
 ; GISEL_X86-NEXT:    retl
 ;
-; GISEL_X64-LABEL: f0:
-; GISEL_X64:       # %bb.0:
-; GISEL_X64-NEXT:    fldt {{[0-9]+}}(%rsp)
-; GISEL_X64-NEXT:    fldt {{\.?LCPI[0-9]+_[0-9]+}}(%rip)
-; GISEL_X64-NEXT:    fxch %st(1)
-; GISEL_X64-NEXT:    fstpt -{{[0-9]+}}(%rsp)
-; GISEL_X64-NEXT:    fstpt -{{[0-9]+}}(%rsp)
-; GISEL_X64-NEXT:    fldt -{{[0-9]+}}(%rsp)
-; GISEL_X64-NEXT:    fldt -{{[0-9]+}}(%rsp)
-; GISEL_X64-NEXT:    faddp %st, %st(1)
-; GISEL_X64-NEXT:    retq
-;
 ; SDAG_X86-LABEL: f0:
 ; SDAG_X86:       # %bb.0:
 ; SDAG_X86-NEXT:    pushl %ebp
@@ -52,6 +42,18 @@ define x86_fp80 @f0(x86_fp80 noundef %a) nounwind {
 ; SDAG_X86-NEXT:    popl %ebp
 ; SDAG_X86-NEXT:    retl
 ;
+; GISEL_X64-LABEL: f0:
+; GISEL_X64:       # %bb.0:
+; GISEL_X64-NEXT:    fldt {{[0-9]+}}(%rsp)
+; GISEL_X64-NEXT:    fldt {{\.?LCPI[0-9]+_[0-9]+}}(%rip)
+; GISEL_X64-NEXT:    fxch %st(1)
+; GISEL_X64-NEXT:    fstpt -{{[0-9]+}}(%rsp)
+; GISEL_X64-NEXT:    fstpt -{{[0-9]+}}(%rsp)
+; GISEL_X64-NEXT:    fldt -{{[0-9]+}}(%rsp)
+; GISEL_X64-NEXT:    fldt -{{[0-9]+}}(%rsp)
+; GISEL_X64-NEXT:    faddp %st, %st(1)
+; GISEL_X64-NEXT:    retq
+;
 ; SDAG_X64-LABEL: f0:
 ; SDAG_X64:       # %bb.0:
 ; SDAG_X64-NEXT:    fldt {{[0-9]+}}(%rsp)
@@ -84,14 +86,6 @@ define void @f1(ptr %a, ptr %b) nounwind {
 ; GISEL_X86-NEXT:    fstpt (%eax)
 ; GISEL_X86-NEXT:    retl
 ;
-; CHECK-64-LABEL: f1:
-; CHECK-64:       # %bb.0:
-; CHECK-64-NEXT:    fldt (%rdi)
-; CHECK-64-NEXT:    fldt (%rsi)
-; CHECK-64-NEXT:    fsubrp %st, %st(1)
-; CHECK-64-NEXT:    fstpt (%rdi)
-; CHECK-64-NEXT:    retq
-;
 ; SDAG_X86-LABEL: f1:
 ; SDAG_X86:       # %bb.0:
 ; SDAG_X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
@@ -101,6 +95,14 @@ define void @f1(ptr %a, ptr %b) nounwind {
 ; SDAG_X86-NEXT:    fsubrp %st, %st(1)
 ; SDAG_X86-NEXT:    fstpt (%ecx)
 ; SDAG_X86-NEXT:    retl
+;
+; CHECK-64-LABEL: f1:
+; CHECK-64:       # %bb.0:
+; CHECK-64-NEXT:    fldt (%rdi)
+; CHECK-64-NEXT:    fldt (%rsi)
+; CHECK-64-NEXT:    fsubrp %st, %st(1)
+; CHECK-64-NEXT:    fstpt (%rdi)
+; CHECK-64-NEXT:    retq
   %load1 = load x86_fp80, ptr %a, align 4
   %load2 = load x86_fp80, ptr %b, align 4
   %sub = fsub x86_fp80 %load1, %load2
@@ -119,14 +121,6 @@ define void @f2(ptr %a, ptr %b) nounwind {
 ; GISEL_X86-NEXT:    fstpt (%eax)
 ; GISEL_X86-NEXT:    retl
 ;
-; CHECK-64-LABEL: f2:
-; CHECK-64:       # %bb.0:
-; CHECK-64-NEXT:    fldt (%rdi)
-; CHECK-64-NEXT:    fldt (%rsi)
-; CHECK-64-NEXT:    fmulp %st, %st(1)
-; CHECK-64-NEXT:    fstpt (%rdi)
-; CHECK-64-NEXT:    retq
-;
 ; SDAG_X86-LABEL: f2:
 ; SDAG_X86:       # %bb.0:
 ; SDAG_X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
@@ -136,6 +130,14 @@ define void @f2(ptr %a, ptr %b) nounwind {
 ; SDAG_X86-NEXT:    fmulp %st, %st(1)
 ; SDAG_X86-NEXT:    fstpt (%ecx)
 ; SDAG_X86-NEXT:    retl
+;
+; CHECK-64-LABEL: f2:
+; CHECK-64:       # %bb.0:
+; CHECK-64-NEXT:    fldt (%rdi)
+; CHECK-64-NEXT:    fldt (%rsi)
+; CHECK-64-NEXT:    fmulp %st, %st(1)
+; CHECK-64-NEXT:    fstpt (%rdi)
+; CHECK-64-NEXT:    retq
   %load1 = load x86_fp80, ptr %a, align 16
   %load2 = load x86_fp80, ptr %b, align 16
   %mul = fmul x86_fp80 %load1, %load2
@@ -154,14 +156,6 @@ define void @f3(ptr %a, ptr %b) nounwind {
 ; GISEL_X86-NEXT:    fstpt (%eax)
 ; GISEL_X86-NEXT:    retl
 ;
-; CHECK-64-LABEL: f3:
-; CHECK-64:       # %bb.0:
-; CHECK-64-NEXT:    fldt (%rdi)
-; CHECK-64-NEXT:    fldt (%rsi)
-; CHECK-64-NEXT:    fdivrp %st, %st(1)
-; CHECK-64-NEXT:    fstpt (%rdi)
-; CHECK-64-NEXT:    retq
-;
 ; SDAG_X86-LABEL: f3:
 ; SDAG_X86:       # %bb.0:
 ; SDAG_X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
@@ -171,6 +165,14 @@ define void @f3(ptr %a, ptr %b) nounwind {
 ; SDAG_X86-NEXT:    fdivrp %st, %st(1)
 ; SDAG_X86-NEXT:    fstpt (%ecx)
 ; SDAG_X86-NEXT:    retl
+;
+; CHECK-64-LABEL: f3:
+; CHECK-64:       # %bb.0:
+; CHECK-64-NEXT:    fldt (%rdi)
+; CHECK-64-NEXT:    fldt (%rsi)
+; CHECK-64-NEXT:    fdivrp %st, %st(1)
+; CHECK-64-NEXT:    fstpt (%rdi)
+; CHECK-64-NEXT:    retq
   %load1 = load x86_fp80, ptr %a, align 4
   %load2 = load x86_fp80, ptr %b, align 4
   %div = fdiv x86_fp80 %load1, %load2
@@ -189,14 +191,6 @@ define void @f6(ptr %0, ptr %1) nounwind {
 ; GISEL_X86-NEXT:    fstps (%ecx)
 ; GISEL_X86-NEXT:    retl
 ;
-; GISEL_X64-LABEL: f6:
-; GISEL_X64:       # %bb.0:
-; GISEL_X64-NEXT:    flds {{\.?LCPI[0-9]+_[0-9]+}}(%rip)
-; GISEL_X64-NEXT:    flds (%rdi)
-; GISEL_X64-NEXT:    faddp %st, %st(1)
-; GISEL_X64-NEXT:    fstps (%rsi)
-; GISEL_X64-NEXT:    retq
-;
 ; SDAG_X86-LABEL: f6:
 ; SDAG_X86:       # %bb.0:
 ; SDAG_X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
@@ -206,6 +200,14 @@ define void @f6(ptr %0, ptr %1) nounwind {
 ; SDAG_X86-NEXT:    fstps (%eax)
 ; SDAG_X86-NEXT:    retl
 ;
+; GISEL_X64-LABEL: f6:
+; GISEL_X64:       # %bb.0:
+; GISEL_X64-NEXT:    flds {{\.?LCPI[0-9]+_[0-9]+}}(%rip)
+; GISEL_X64-NEXT:    flds (%rdi)
+; GISEL_X64-NEXT:    faddp %st, %st(1)
+; GISEL_X64-NEXT:    fstps (%rsi)
+; GISEL_X64-NEXT:    retq
+;
 ; SDAG_X64-LABEL: f6:
 ; SDAG_X64:       # %bb.0:
 ; SDAG_X64-NEXT:    flds (%rdi)
@@ -219,3 +221,5 @@ define void @f6(ptr %0, ptr %1) nounwind {
 }
 ;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
 ; CHECK-32: {{.*}}
+; FAST_X64: {{.*}}
+; FAST_X86: {{.*}}

>From c57992c3f588f27871adcaeb96b262954300fad0 Mon Sep 17 00:00:00 2001
From: Malay Sanghi <malay.sanghi at intel.com>
Date: Wed, 3 Jul 2024 05:41:09 -0700
Subject: [PATCH 3/3] rename vars

---
 llvm/lib/Target/X86/GISel/X86InstructionSelector.cpp | 9 ++++-----
 llvm/test/CodeGen/X86/isel-x87.ll                    | 6 +++---
 2 files changed, 7 insertions(+), 8 deletions(-)

diff --git a/llvm/lib/Target/X86/GISel/X86InstructionSelector.cpp b/llvm/lib/Target/X86/GISel/X86InstructionSelector.cpp
index 9b869b1d0fb7c..d73873812eeb6 100644
--- a/llvm/lib/Target/X86/GISel/X86InstructionSelector.cpp
+++ b/llvm/lib/Target/X86/GISel/X86InstructionSelector.cpp
@@ -627,8 +627,7 @@ bool X86InstructionSelector::selectLoadStoreOp(MachineInstr &I,
     addFullAddress(MIB, AM).addUse(DefReg);
   }
   bool Constrained = constrainSelectedInstRegOperands(I, TII, TRI, RBI);
-  if (Constrained)
-    I.addImplicitDefUseOperands(MF);
+  I.addImplicitDefUseOperands(MF);
   return Constrained;
 }
 
@@ -1523,8 +1522,8 @@ bool X86InstructionSelector::materializeFP(MachineInstr &I,
   const RegisterBank &RegBank = *RBI.getRegBank(DstReg, MRI, TRI);
   // Create the load from the constant pool.
   const ConstantFP *CFP = I.getOperand(1).getFPImm();
-  const auto &DataLayout = MF.getDataLayout();
-  Align Alignment = DataLayout.getPrefTypeAlign(CFP->getType());
+  const auto &DL = MF.getDataLayout();
+  Align Alignment = DL.getPrefTypeAlign(CFP->getType());
   const DebugLoc &DbgLoc = I.getDebugLoc();
 
   unsigned Opc =
@@ -1544,7 +1543,7 @@ bool X86InstructionSelector::materializeFP(MachineInstr &I,
 
     MachineMemOperand *MMO = MF.getMachineMemOperand(
         MachinePointerInfo::getConstantPool(MF), MachineMemOperand::MOLoad,
-        LLT::pointer(0, DataLayout.getPointerSizeInBits()), Alignment);
+        LLT::pointer(0, DL.getPointerSizeInBits()), Alignment);
 
     LoadInst =
         addDirectMem(BuildMI(*I.getParent(), I, DbgLoc, TII.get(Opc), DstReg),
diff --git a/llvm/test/CodeGen/X86/isel-x87.ll b/llvm/test/CodeGen/X86/isel-x87.ll
index f1adf279c7353..690c1f6ea968c 100644
--- a/llvm/test/CodeGen/X86/isel-x87.ll
+++ b/llvm/test/CodeGen/X86/isel-x87.ll
@@ -180,7 +180,7 @@ define void @f3(ptr %a, ptr %b) nounwind {
   ret void
 }
 
-define void @f6(ptr %0, ptr %1) nounwind {
+define void @f6(ptr %a, ptr %b) nounwind {
 ; GISEL_X86-LABEL: f6:
 ; GISEL_X86:       # %bb.0:
 ; GISEL_X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
@@ -214,9 +214,9 @@ define void @f6(ptr %0, ptr %1) nounwind {
 ; SDAG_X64-NEXT:    fadds {{\.?LCPI[0-9]+_[0-9]+}}(%rip)
 ; SDAG_X64-NEXT:    fstps (%rsi)
 ; SDAG_X64-NEXT:    retq
-  %load1 = load float, ptr %0
+  %load1 = load float, ptr %a
   %add = fadd float %load1, 20.0
-  store float %add, ptr %1
+  store float %add, ptr %b
   ret void
 }
 ;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:



More information about the llvm-commits mailing list