[llvm] a77d3ea - [X86][GlobalISel] Add instruction selection support for x87 ld/st (#97016)
via llvm-commits
llvm-commits at lists.llvm.org
Tue Jul 9 01:54:28 PDT 2024
Author: Malay Sanghi
Date: 2024-07-09T10:54:25+02:00
New Revision: a77d3ea310c61cf59c1146895b2d51fe014eb0a9
URL: https://github.com/llvm/llvm-project/commit/a77d3ea310c61cf59c1146895b2d51fe014eb0a9
DIFF: https://github.com/llvm/llvm-project/commit/a77d3ea310c61cf59c1146895b2d51fe014eb0a9.diff
LOG: [X86][GlobalISel] Add instruction selection support for x87 ld/st (#97016)
Add x87 G_LOAD/G_STORE selection support to existing C++ lowering.
Added:
llvm/test/CodeGen/X86/isel-x87.ll
Modified:
llvm/lib/Target/X86/GISel/X86InstructionSelector.cpp
llvm/test/CodeGen/X86/GlobalISel/x86_64-fallback.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/X86/GISel/X86InstructionSelector.cpp b/llvm/lib/Target/X86/GISel/X86InstructionSelector.cpp
index 303783ea3fd22..d73873812eeb6 100644
--- a/llvm/lib/Target/X86/GISel/X86InstructionSelector.cpp
+++ b/llvm/lib/Target/X86/GISel/X86InstructionSelector.cpp
@@ -195,6 +195,15 @@ X86InstructionSelector::getRegClass(LLT Ty, const RegisterBank &RB) const {
return &X86::VR512RegClass;
}
+ if (RB.getID() == X86::PSRRegBankID) {
+ if (Ty.getSizeInBits() == 80)
+ return &X86::RFP80RegClass;
+ if (Ty.getSizeInBits() == 64)
+ return &X86::RFP64RegClass;
+ if (Ty.getSizeInBits() == 32)
+ return &X86::RFP32RegClass;
+ }
+
llvm_unreachable("Unknown RegBank!");
}
@@ -462,6 +471,8 @@ unsigned X86InstructionSelector::getLoadStoreOp(const LLT &Ty,
: (HasAVX512 ? X86::VMOVSSZmr :
HasAVX ? X86::VMOVSSmr :
X86::MOVSSmr);
+ if (X86::PSRRegBankID == RB.getID())
+ return Isload ? X86::LD_Fp32m : X86::ST_Fp32m;
} else if (Ty == LLT::scalar(64) || Ty == LLT::pointer(0, 64)) {
if (X86::GPRRegBankID == RB.getID())
return Isload ? X86::MOV64rm : X86::MOV64mr;
@@ -472,6 +483,10 @@ unsigned X86InstructionSelector::getLoadStoreOp(const LLT &Ty,
: (HasAVX512 ? X86::VMOVSDZmr :
HasAVX ? X86::VMOVSDmr :
X86::MOVSDmr);
+ if (X86::PSRRegBankID == RB.getID())
+ return Isload ? X86::LD_Fp64m : X86::ST_Fp64m;
+ } else if (Ty == LLT::scalar(80)) {
+ return Isload ? X86::LD_Fp80m : X86::ST_FpP80m;
} else if (Ty.isVector() && Ty.getSizeInBits() == 128) {
if (Alignment >= Align(16))
return Isload ? (HasVLX ? X86::VMOVAPSZ128rm
@@ -611,7 +626,9 @@ bool X86InstructionSelector::selectLoadStoreOp(MachineInstr &I,
I.removeOperand(0);
addFullAddress(MIB, AM).addUse(DefReg);
}
- return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
+ bool Constrained = constrainSelectedInstRegOperands(I, TII, TRI, RBI);
+ I.addImplicitDefUseOperands(MF);
+ return Constrained;
}
static unsigned getLeaOP(LLT Ty, const X86Subtarget &STI) {
@@ -1503,14 +1520,15 @@ bool X86InstructionSelector::materializeFP(MachineInstr &I,
const Register DstReg = I.getOperand(0).getReg();
const LLT DstTy = MRI.getType(DstReg);
const RegisterBank &RegBank = *RBI.getRegBank(DstReg, MRI, TRI);
- Align Alignment = Align(DstTy.getSizeInBytes());
+ // Create the load from the constant pool.
+ const ConstantFP *CFP = I.getOperand(1).getFPImm();
+ const auto &DL = MF.getDataLayout();
+ Align Alignment = DL.getPrefTypeAlign(CFP->getType());
const DebugLoc &DbgLoc = I.getDebugLoc();
unsigned Opc =
getLoadStoreOp(DstTy, RegBank, TargetOpcode::G_LOAD, Alignment);
- // Create the load from the constant pool.
- const ConstantFP *CFP = I.getOperand(1).getFPImm();
unsigned CPI = MF.getConstantPool()->getConstantPoolIndex(CFP, Alignment);
MachineInstr *LoadInst = nullptr;
unsigned char OpFlag = STI.classifyLocalReference(nullptr);
@@ -1525,7 +1543,7 @@ bool X86InstructionSelector::materializeFP(MachineInstr &I,
MachineMemOperand *MMO = MF.getMachineMemOperand(
MachinePointerInfo::getConstantPool(MF), MachineMemOperand::MOLoad,
- LLT::pointer(0, MF.getDataLayout().getPointerSizeInBits()), Alignment);
+ LLT::pointer(0, DL.getPointerSizeInBits()), Alignment);
LoadInst =
addDirectMem(BuildMI(*I.getParent(), I, DbgLoc, TII.get(Opc), DstReg),
diff --git a/llvm/test/CodeGen/X86/GlobalISel/x86_64-fallback.ll b/llvm/test/CodeGen/X86/GlobalISel/x86_64-fallback.ll
index 39302734dde78..bb0f0ae14f304 100644
--- a/llvm/test/CodeGen/X86/GlobalISel/x86_64-fallback.ll
+++ b/llvm/test/CodeGen/X86/GlobalISel/x86_64-fallback.ll
@@ -7,15 +7,6 @@
; When we cannot produce a test case anymore, that means we can remove
; the fallback path.
-; Check that we fallback on invoke translation failures.
-; FALLBACK-WITH-REPORT-ERR: remark: <unknown>:0:0: cannot select: G_STORE %1:psr(s80), %0:gpr(p0) :: (store (s80) into %ir.ptr, align 16) (in function: test_x86_fp80_dump)
-; FALLBACK-WITH-REPORT-ERR: warning: Instruction selection used fallback path for test_x86_fp80_dump
-; FALLBACK-WITH-REPORT-OUT-LABEL: test_x86_fp80_dump:
-define void @test_x86_fp80_dump(ptr %ptr){
- store x86_fp80 0xK4002A000000000000000, ptr %ptr, align 16
- ret void
-}
-
; Check that we fallback on byVal argument
; FALLBACK-WITH-REPORT-ERR: remark: <unknown>:0:0: unable to translate instruction: call: ' call void @ScaleObjectOverwrite_3(ptr %index, ptr byval(%struct.PointListStruct) %index)' (in function: ScaleObjectOverwrite_2)
; FALLBACK-WITH-REPORT-ERR: warning: Instruction selection used fallback path for ScaleObjectOverwrite_2
diff --git a/llvm/test/CodeGen/X86/isel-x87.ll b/llvm/test/CodeGen/X86/isel-x87.ll
new file mode 100644
index 0000000000000..690c1f6ea968c
--- /dev/null
+++ b/llvm/test/CodeGen/X86/isel-x87.ll
@@ -0,0 +1,225 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc < %s -mtriple=i686-- -mattr=+x87,-sse,-sse2 -global-isel | FileCheck %s --check-prefixes=CHECK-32,GISEL_X86
+; RUN: llc < %s -mtriple=i686-- -mattr=+x87,-sse,-sse2 | FileCheck %s --check-prefixes=CHECK-32,SDAG_X86
+; RUN: llc < %s -mtriple=i686-- -mattr=+x87,-sse,-sse2 -fast-isel=true | FileCheck %s --check-prefixes=CHECK-32,SDAG_X86,FAST_X86
+; RUN: llc < %s -mtriple=x86_64-- -mattr=+x87,-sse,-sse2 -global-isel | FileCheck %s --check-prefixes=CHECK-64,GISEL_X64
+; RUN: llc < %s -mtriple=x86_64-- -mattr=+x87,-sse,-sse2 | FileCheck %s --check-prefixes=CHECK-64,SDAG_X64
+; RUN: llc < %s -mtriple=x86_64-- -mattr=+x87,-sse,-sse2 -fast-isel=true | FileCheck %s --check-prefixes=CHECK-64,SDAG_X64,FAST_X64
+
+define x86_fp80 @f0(x86_fp80 noundef %a) nounwind {
+; GISEL_X86-LABEL: f0:
+; GISEL_X86: # %bb.0:
+; GISEL_X86-NEXT: pushl %ebp
+; GISEL_X86-NEXT: movl %esp, %ebp
+; GISEL_X86-NEXT: andl $-16, %esp
+; GISEL_X86-NEXT: subl $48, %esp
+; GISEL_X86-NEXT: fldt 8(%ebp)
+; GISEL_X86-NEXT: fldt {{\.?LCPI[0-9]+_[0-9]+}}
+; GISEL_X86-NEXT: fxch %st(1)
+; GISEL_X86-NEXT: fstpt {{[0-9]+}}(%esp)
+; GISEL_X86-NEXT: fstpt (%esp)
+; GISEL_X86-NEXT: fldt {{[0-9]+}}(%esp)
+; GISEL_X86-NEXT: fldt (%esp)
+; GISEL_X86-NEXT: faddp %st, %st(1)
+; GISEL_X86-NEXT: movl %ebp, %esp
+; GISEL_X86-NEXT: popl %ebp
+; GISEL_X86-NEXT: retl
+;
+; SDAG_X86-LABEL: f0:
+; SDAG_X86: # %bb.0:
+; SDAG_X86-NEXT: pushl %ebp
+; SDAG_X86-NEXT: movl %esp, %ebp
+; SDAG_X86-NEXT: andl $-16, %esp
+; SDAG_X86-NEXT: subl $48, %esp
+; SDAG_X86-NEXT: fldt 8(%ebp)
+; SDAG_X86-NEXT: fld %st(0)
+; SDAG_X86-NEXT: fstpt {{[0-9]+}}(%esp)
+; SDAG_X86-NEXT: flds {{\.?LCPI[0-9]+_[0-9]+}}
+; SDAG_X86-NEXT: fld %st(0)
+; SDAG_X86-NEXT: fstpt (%esp)
+; SDAG_X86-NEXT: faddp %st, %st(1)
+; SDAG_X86-NEXT: movl %ebp, %esp
+; SDAG_X86-NEXT: popl %ebp
+; SDAG_X86-NEXT: retl
+;
+; GISEL_X64-LABEL: f0:
+; GISEL_X64: # %bb.0:
+; GISEL_X64-NEXT: fldt {{[0-9]+}}(%rsp)
+; GISEL_X64-NEXT: fldt {{\.?LCPI[0-9]+_[0-9]+}}(%rip)
+; GISEL_X64-NEXT: fxch %st(1)
+; GISEL_X64-NEXT: fstpt -{{[0-9]+}}(%rsp)
+; GISEL_X64-NEXT: fstpt -{{[0-9]+}}(%rsp)
+; GISEL_X64-NEXT: fldt -{{[0-9]+}}(%rsp)
+; GISEL_X64-NEXT: fldt -{{[0-9]+}}(%rsp)
+; GISEL_X64-NEXT: faddp %st, %st(1)
+; GISEL_X64-NEXT: retq
+;
+; SDAG_X64-LABEL: f0:
+; SDAG_X64: # %bb.0:
+; SDAG_X64-NEXT: fldt {{[0-9]+}}(%rsp)
+; SDAG_X64-NEXT: fld %st(0)
+; SDAG_X64-NEXT: fstpt -{{[0-9]+}}(%rsp)
+; SDAG_X64-NEXT: flds {{\.?LCPI[0-9]+_[0-9]+}}(%rip)
+; SDAG_X64-NEXT: fld %st(0)
+; SDAG_X64-NEXT: fstpt -{{[0-9]+}}(%rsp)
+; SDAG_X64-NEXT: faddp %st, %st(1)
+; SDAG_X64-NEXT: retq
+ %a.addr = alloca x86_fp80, align 16
+ %x = alloca x86_fp80, align 16
+ store x86_fp80 %a, ptr %a.addr, align 16
+ store x86_fp80 0xK400A8000000000000000, ptr %x, align 16
+ %load1 = load x86_fp80, ptr %a.addr, align 16
+ %load2 = load x86_fp80, ptr %x, align 16
+ %add = fadd x86_fp80 %load1, %load2
+ ret x86_fp80 %add
+}
+
+
+define void @f1(ptr %a, ptr %b) nounwind {
+; GISEL_X86-LABEL: f1:
+; GISEL_X86: # %bb.0:
+; GISEL_X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; GISEL_X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; GISEL_X86-NEXT: fldt (%eax)
+; GISEL_X86-NEXT: fldt (%ecx)
+; GISEL_X86-NEXT: fsubrp %st, %st(1)
+; GISEL_X86-NEXT: fstpt (%eax)
+; GISEL_X86-NEXT: retl
+;
+; SDAG_X86-LABEL: f1:
+; SDAG_X86: # %bb.0:
+; SDAG_X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; SDAG_X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; SDAG_X86-NEXT: fldt (%ecx)
+; SDAG_X86-NEXT: fldt (%eax)
+; SDAG_X86-NEXT: fsubrp %st, %st(1)
+; SDAG_X86-NEXT: fstpt (%ecx)
+; SDAG_X86-NEXT: retl
+;
+; CHECK-64-LABEL: f1:
+; CHECK-64: # %bb.0:
+; CHECK-64-NEXT: fldt (%rdi)
+; CHECK-64-NEXT: fldt (%rsi)
+; CHECK-64-NEXT: fsubrp %st, %st(1)
+; CHECK-64-NEXT: fstpt (%rdi)
+; CHECK-64-NEXT: retq
+ %load1 = load x86_fp80, ptr %a, align 4
+ %load2 = load x86_fp80, ptr %b, align 4
+ %sub = fsub x86_fp80 %load1, %load2
+ store x86_fp80 %sub, ptr %a, align 4
+ ret void
+}
+
+define void @f2(ptr %a, ptr %b) nounwind {
+; GISEL_X86-LABEL: f2:
+; GISEL_X86: # %bb.0:
+; GISEL_X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; GISEL_X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; GISEL_X86-NEXT: fldt (%eax)
+; GISEL_X86-NEXT: fldt (%ecx)
+; GISEL_X86-NEXT: fmulp %st, %st(1)
+; GISEL_X86-NEXT: fstpt (%eax)
+; GISEL_X86-NEXT: retl
+;
+; SDAG_X86-LABEL: f2:
+; SDAG_X86: # %bb.0:
+; SDAG_X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; SDAG_X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; SDAG_X86-NEXT: fldt (%ecx)
+; SDAG_X86-NEXT: fldt (%eax)
+; SDAG_X86-NEXT: fmulp %st, %st(1)
+; SDAG_X86-NEXT: fstpt (%ecx)
+; SDAG_X86-NEXT: retl
+;
+; CHECK-64-LABEL: f2:
+; CHECK-64: # %bb.0:
+; CHECK-64-NEXT: fldt (%rdi)
+; CHECK-64-NEXT: fldt (%rsi)
+; CHECK-64-NEXT: fmulp %st, %st(1)
+; CHECK-64-NEXT: fstpt (%rdi)
+; CHECK-64-NEXT: retq
+ %load1 = load x86_fp80, ptr %a, align 16
+ %load2 = load x86_fp80, ptr %b, align 16
+ %mul = fmul x86_fp80 %load1, %load2
+ store x86_fp80 %mul, ptr %a, align 16
+ ret void
+}
+
+define void @f3(ptr %a, ptr %b) nounwind {
+; GISEL_X86-LABEL: f3:
+; GISEL_X86: # %bb.0:
+; GISEL_X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; GISEL_X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; GISEL_X86-NEXT: fldt (%eax)
+; GISEL_X86-NEXT: fldt (%ecx)
+; GISEL_X86-NEXT: fdivrp %st, %st(1)
+; GISEL_X86-NEXT: fstpt (%eax)
+; GISEL_X86-NEXT: retl
+;
+; SDAG_X86-LABEL: f3:
+; SDAG_X86: # %bb.0:
+; SDAG_X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; SDAG_X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; SDAG_X86-NEXT: fldt (%ecx)
+; SDAG_X86-NEXT: fldt (%eax)
+; SDAG_X86-NEXT: fdivrp %st, %st(1)
+; SDAG_X86-NEXT: fstpt (%ecx)
+; SDAG_X86-NEXT: retl
+;
+; CHECK-64-LABEL: f3:
+; CHECK-64: # %bb.0:
+; CHECK-64-NEXT: fldt (%rdi)
+; CHECK-64-NEXT: fldt (%rsi)
+; CHECK-64-NEXT: fdivrp %st, %st(1)
+; CHECK-64-NEXT: fstpt (%rdi)
+; CHECK-64-NEXT: retq
+ %load1 = load x86_fp80, ptr %a, align 4
+ %load2 = load x86_fp80, ptr %b, align 4
+ %div = fdiv x86_fp80 %load1, %load2
+ store x86_fp80 %div, ptr %a, align 4
+ ret void
+}
+
+define void @f6(ptr %a, ptr %b) nounwind {
+; GISEL_X86-LABEL: f6:
+; GISEL_X86: # %bb.0:
+; GISEL_X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; GISEL_X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; GISEL_X86-NEXT: flds {{\.?LCPI[0-9]+_[0-9]+}}
+; GISEL_X86-NEXT: flds (%eax)
+; GISEL_X86-NEXT: faddp %st, %st(1)
+; GISEL_X86-NEXT: fstps (%ecx)
+; GISEL_X86-NEXT: retl
+;
+; SDAG_X86-LABEL: f6:
+; SDAG_X86: # %bb.0:
+; SDAG_X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; SDAG_X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; SDAG_X86-NEXT: flds (%ecx)
+; SDAG_X86-NEXT: fadds {{\.?LCPI[0-9]+_[0-9]+}}
+; SDAG_X86-NEXT: fstps (%eax)
+; SDAG_X86-NEXT: retl
+;
+; GISEL_X64-LABEL: f6:
+; GISEL_X64: # %bb.0:
+; GISEL_X64-NEXT: flds {{\.?LCPI[0-9]+_[0-9]+}}(%rip)
+; GISEL_X64-NEXT: flds (%rdi)
+; GISEL_X64-NEXT: faddp %st, %st(1)
+; GISEL_X64-NEXT: fstps (%rsi)
+; GISEL_X64-NEXT: retq
+;
+; SDAG_X64-LABEL: f6:
+; SDAG_X64: # %bb.0:
+; SDAG_X64-NEXT: flds (%rdi)
+; SDAG_X64-NEXT: fadds {{\.?LCPI[0-9]+_[0-9]+}}(%rip)
+; SDAG_X64-NEXT: fstps (%rsi)
+; SDAG_X64-NEXT: retq
+ %load1 = load float, ptr %a
+ %add = fadd float %load1, 20.0
+ store float %add, ptr %b
+ ret void
+}
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; CHECK-32: {{.*}}
+; FAST_X64: {{.*}}
+; FAST_X86: {{.*}}
More information about the llvm-commits
mailing list