[llvm] 02adfb5 - AMDGPU/GlobalISel: Manually select scalar f64 G_FNEG
Matt Arsenault via llvm-commits
llvm-commits at lists.llvm.org
Wed Jan 29 06:52:44 PST 2020
Author: Matt Arsenault
Date: 2020-01-29T06:49:16-08:00
New Revision: 02adfb5155e5ee1bb94b9b196db57b9902ae8278
URL: https://github.com/llvm/llvm-project/commit/02adfb5155e5ee1bb94b9b196db57b9902ae8278
DIFF: https://github.com/llvm/llvm-project/commit/02adfb5155e5ee1bb94b9b196db57b9902ae8278.diff
LOG: AMDGPU/GlobalISel: Manually select scalar f64 G_FNEG
This should be no problem to support with a pattern, but it turns out
there are just too many yaks to shave. The main problem is in the DAG
emitter, which I have no desire to sink effort into fixing.
If we had a bit to disable patterns in the DAG importer, fixing the
GlobalISelEmitter is more manageable.
Added:
Modified:
llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h
llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fneg.mir
Removed:
################################################################################
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
index 009815f5f92e..324fdfa6c8ba 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
@@ -1440,6 +1440,61 @@ bool AMDGPUInstructionSelector::selectG_CONSTANT(MachineInstr &I) const {
return RBI.constrainGenericRegister(DstReg, *DstRC, *MRI);
}
+bool AMDGPUInstructionSelector::selectG_FNEG(MachineInstr &MI) const {
+ // Only manually handle the f64 SGPR case.
+ //
+ // FIXME: This is a workaround for 2.5
diff erent tablegen problems. Because
+ // the bit ops theoretically have a second result due to the implicit def of
+ // SCC, the GlobalISelEmitter is overly conservative and rejects it. Fixing
+ // that is easy by disabling the check. The result works, but uses a
+ // nonsensical sreg32orlds_and_sreg_1 regclass.
+ //
+ // The DAG emitter is more problematic, and incorrectly adds both S_XOR_B32 to
+ // the variadic REG_SEQUENCE operands.
+
+ Register Dst = MI.getOperand(0).getReg();
+ const RegisterBank *DstRB = RBI.getRegBank(Dst, *MRI, TRI);
+ if (DstRB->getID() != AMDGPU::SGPRRegBankID ||
+ MRI->getType(Dst) != LLT::scalar(64))
+ return false;
+
+ Register Src = MI.getOperand(1).getReg();
+ MachineInstr *Fabs = getOpcodeDef(TargetOpcode::G_FABS, Src, *MRI);
+ if (Fabs)
+ Src = Fabs->getOperand(1).getReg();
+
+ if (!RBI.constrainGenericRegister(Src, AMDGPU::SReg_64RegClass, *MRI) ||
+ !RBI.constrainGenericRegister(Dst, AMDGPU::SReg_64RegClass, *MRI))
+ return false;
+
+ MachineBasicBlock *BB = MI.getParent();
+ const DebugLoc &DL = MI.getDebugLoc();
+ Register LoReg = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
+ Register HiReg = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
+ Register ConstReg = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
+ Register OpReg = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
+
+ BuildMI(*BB, &MI, DL, TII.get(AMDGPU::COPY), LoReg)
+ .addReg(Src, 0, AMDGPU::sub0);
+ BuildMI(*BB, &MI, DL, TII.get(AMDGPU::COPY), HiReg)
+ .addReg(Src, 0, AMDGPU::sub1);
+ BuildMI(*BB, &MI, DL, TII.get(AMDGPU::S_MOV_B32), ConstReg)
+ .addImm(0x80000000);
+
+ // Set or toggle sign bit.
+ unsigned Opc = Fabs ? AMDGPU::S_OR_B32 : AMDGPU::S_XOR_B32;
+ BuildMI(*BB, &MI, DL, TII.get(Opc), OpReg)
+ .addReg(HiReg)
+ .addReg(ConstReg);
+ BuildMI(*BB, &MI, DL, TII.get(AMDGPU::REG_SEQUENCE), Dst)
+ .addReg(LoReg)
+ .addImm(AMDGPU::sub0)
+ .addReg(OpReg)
+ .addImm(AMDGPU::sub1);
+ MI.eraseFromParent();
+ return true;
+}
+
static bool isConstant(const MachineInstr &MI) {
return MI.getOpcode() == TargetOpcode::G_CONSTANT;
}
@@ -1851,6 +1906,10 @@ bool AMDGPUInstructionSelector::select(MachineInstr &I) {
case TargetOpcode::G_CONSTANT:
case TargetOpcode::G_FCONSTANT:
return selectG_CONSTANT(I);
+ case TargetOpcode::G_FNEG:
+ if (selectImpl(I, *CoverageInfo))
+ return true;
+ return selectG_FNEG(I);
case TargetOpcode::G_EXTRACT:
return selectG_EXTRACT(I);
case TargetOpcode::G_MERGE_VALUES:
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h
index cc4d987979f8..19bd0315b8c0 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h
@@ -87,6 +87,7 @@ class AMDGPUInstructionSelector : public InstructionSelector {
bool selectG_TRUNC(MachineInstr &I) const;
bool selectG_SZA_EXT(MachineInstr &I) const;
bool selectG_CONSTANT(MachineInstr &I) const;
+ bool selectG_FNEG(MachineInstr &I) const;
bool selectG_AND_OR_XOR(MachineInstr &I) const;
bool selectG_ADD_SUB(MachineInstr &I) const;
bool selectG_UADDO_USUBO_UADDE_USUBE(MachineInstr &I) const;
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fneg.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fneg.mir
index ddea2d9f08e7..ded09e28687e 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fneg.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fneg.mir
@@ -200,9 +200,13 @@ body: |
liveins: $sgpr0_sgpr1
; GCN-LABEL: name: fneg_s64_ss
; GCN: liveins: $sgpr0_sgpr1
- ; GCN: [[COPY:%[0-9]+]]:sgpr(s64) = COPY $sgpr0_sgpr1
- ; GCN: [[FNEG:%[0-9]+]]:sreg_64(s64) = G_FNEG [[COPY]]
- ; GCN: $sgpr0_sgpr1 = COPY [[FNEG]](s64)
+ ; GCN: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
+ ; GCN: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0
+ ; GCN: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1
+ ; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483648
+ ; GCN: [[S_XOR_B32_:%[0-9]+]]:sreg_32 = S_XOR_B32 [[COPY2]], [[S_MOV_B32_]], implicit-def $scc
+ ; GCN: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[S_XOR_B32_]], %subreg.sub1
+ ; GCN: $sgpr0_sgpr1 = COPY [[REG_SEQUENCE]]
%0:sgpr(s64) = COPY $sgpr0_sgpr1
%1:sgpr(s64) = G_FNEG %0
$sgpr0_sgpr1 = COPY %1
@@ -462,10 +466,13 @@ body: |
liveins: $sgpr0_sgpr1
; GCN-LABEL: name: fneg_fabs_s64_ss
; GCN: liveins: $sgpr0_sgpr1
- ; GCN: [[COPY:%[0-9]+]]:sgpr(s64) = COPY $sgpr0_sgpr1
- ; GCN: [[FABS:%[0-9]+]]:sgpr(s64) = G_FABS [[COPY]]
- ; GCN: [[FNEG:%[0-9]+]]:sreg_64(s64) = G_FNEG [[FABS]]
- ; GCN: $sgpr0_sgpr1 = COPY [[FNEG]](s64)
+ ; GCN: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
+ ; GCN: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0
+ ; GCN: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1
+ ; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483648
+ ; GCN: [[S_OR_B32_:%[0-9]+]]:sreg_32 = S_OR_B32 [[COPY2]], [[S_MOV_B32_]], implicit-def $scc
+ ; GCN: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[S_OR_B32_]], %subreg.sub1
+ ; GCN: $sgpr0_sgpr1 = COPY [[REG_SEQUENCE]]
%0:sgpr(s64) = COPY $sgpr0_sgpr1
%1:sgpr(s64) = G_FABS %0
%2:sgpr(s64) = G_FNEG %1
More information about the llvm-commits
mailing list