[llvm] 064cea9 - AMDGPU/GlobalISel: Try to use s_and_b64 in ptrmask selection
Matt Arsenault via llvm-commits
llvm-commits at lists.llvm.org
Thu Jan 20 09:57:01 PST 2022
Author: Matt Arsenault
Date: 2022-01-20T12:56:53-05:00
New Revision: 064cea9c9a02e8fee77f9e28b3e06cb4908f7a0e
URL: https://github.com/llvm/llvm-project/commit/064cea9c9a02e8fee77f9e28b3e06cb4908f7a0e
DIFF: https://github.com/llvm/llvm-project/commit/064cea9c9a02e8fee77f9e28b3e06cb4908f7a0e.diff
LOG: AMDGPU/GlobalISel: Try to use s_and_b64 in ptrmask selection
Avoids a test diff with SDAG.
Added:
Modified:
llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-ptrmask.mir
Removed:
################################################################################
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
index d40d5873eacf..6ab2cb6df3bd 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
@@ -2552,6 +2552,8 @@ bool AMDGPUInstructionSelector::selectG_PTRMASK(MachineInstr &I) const {
Register MaskReg = I.getOperand(2).getReg();
LLT Ty = MRI->getType(DstReg);
LLT MaskTy = MRI->getType(MaskReg);
+ MachineBasicBlock *BB = I.getParent();
+ const DebugLoc &DL = I.getDebugLoc();
const RegisterBank *DstRB = RBI.getRegBank(DstReg, *MRI, TRI);
const RegisterBank *SrcRB = RBI.getRegBank(SrcReg, *MRI, TRI);
@@ -2560,6 +2562,24 @@ bool AMDGPUInstructionSelector::selectG_PTRMASK(MachineInstr &I) const {
if (DstRB != SrcRB) // Should only happen for hand written MIR.
return false;
+ // Try to avoid emitting a bit operation when we only need to touch half of
+ // the 64-bit pointer.
+ APInt MaskOnes = KnownBits->getKnownOnes(MaskReg).zextOrSelf(64);
+ const APInt MaskHi32 = APInt::getHighBitsSet(64, 32);
+ const APInt MaskLo32 = APInt::getLowBitsSet(64, 32);
+
+ const bool CanCopyLow32 = (MaskOnes & MaskLo32) == MaskLo32;
+ const bool CanCopyHi32 = (MaskOnes & MaskHi32) == MaskHi32;
+
+ if (!IsVGPR && Ty.getSizeInBits() == 64 &&
+ !CanCopyLow32 && !CanCopyHi32) {
+ auto MIB = BuildMI(*BB, &I, DL, TII.get(AMDGPU::S_AND_B64), DstReg)
+ .addReg(SrcReg)
+ .addReg(MaskReg);
+ I.eraseFromParent();
+ return constrainSelectedInstRegOperands(*MIB, TII, TRI, RBI);
+ }
+
unsigned NewOpc = IsVGPR ? AMDGPU::V_AND_B32_e64 : AMDGPU::S_AND_B32;
const TargetRegisterClass &RegRC
= IsVGPR ? AMDGPU::VGPR_32RegClass : AMDGPU::SReg_32RegClass;
@@ -2576,8 +2596,6 @@ bool AMDGPUInstructionSelector::selectG_PTRMASK(MachineInstr &I) const {
!RBI.constrainGenericRegister(MaskReg, *MaskRC, *MRI))
return false;
- MachineBasicBlock *BB = I.getParent();
- const DebugLoc &DL = I.getDebugLoc();
if (Ty.getSizeInBits() == 32) {
assert(MaskTy.getSizeInBits() == 32 &&
"ptrmask should have been narrowed during legalize");
@@ -2600,13 +2618,7 @@ bool AMDGPUInstructionSelector::selectG_PTRMASK(MachineInstr &I) const {
Register MaskedLo, MaskedHi;
- // Try to avoid emitting a bit operation when we only need to touch half of
- // the 64-bit pointer.
- APInt MaskOnes = KnownBits->getKnownOnes(MaskReg).zextOrSelf(64);
-
- const APInt MaskHi32 = APInt::getHighBitsSet(64, 32);
- const APInt MaskLo32 = APInt::getLowBitsSet(64, 32);
- if ((MaskOnes & MaskLo32) == MaskLo32) {
+ if (CanCopyLow32) {
// If all the bits in the low half are 1, we only need a copy for it.
MaskedLo = LoReg;
} else {
@@ -2621,7 +2633,7 @@ bool AMDGPUInstructionSelector::selectG_PTRMASK(MachineInstr &I) const {
.addReg(MaskLo);
}
- if ((MaskOnes & MaskHi32) == MaskHi32) {
+ if (CanCopyHi32) {
// If all the bits in the high half are 1, we only need a copy for it.
MaskedHi = HiReg;
} else {
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-ptrmask.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-ptrmask.mir
index d0ee43b20b2d..4861a891e059 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-ptrmask.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-ptrmask.mir
@@ -244,14 +244,8 @@ body: |
; CHECK-LABEL: name: ptrmask_p0_s64_sgpr_sgpr_sgpr
; CHECK: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_64 = COPY $sgpr2_sgpr3
- ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0
- ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1
- ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY [[COPY1]].sub0
- ; CHECK-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY2]], [[COPY4]], implicit-def $scc
- ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY [[COPY1]].sub1
- ; CHECK-NEXT: [[S_AND_B32_1:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY3]], [[COPY5]], implicit-def $scc
- ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_AND_B32_]], %subreg.sub0, [[S_AND_B32_1]], %subreg.sub1
- ; CHECK-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]]
+ ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64 = S_AND_B64 [[COPY]], [[COPY1]], implicit-def $scc
+ ; CHECK-NEXT: S_ENDPGM 0, implicit [[S_AND_B64_]]
%0:sgpr(p0) = COPY $sgpr0_sgpr1
%1:sgpr(s64) = COPY $sgpr2_sgpr3
%2:sgpr(p0) = G_PTRMASK %0, %1
@@ -293,14 +287,8 @@ body: |
; CHECK-LABEL: name: ptrmask_p0_s64_sgpr_sgpr_sgpr_0x0000000000000000
; CHECK: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
; CHECK-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
- ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0
- ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1
- ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY [[S_MOV_B64_]].sub0
- ; CHECK-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY1]], [[COPY3]], implicit-def $scc
- ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY [[S_MOV_B64_]].sub1
- ; CHECK-NEXT: [[S_AND_B32_1:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY2]], [[COPY4]], implicit-def $scc
- ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_AND_B32_]], %subreg.sub0, [[S_AND_B32_1]], %subreg.sub1
- ; CHECK-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]]
+ ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64 = S_AND_B64 [[COPY]], [[S_MOV_B64_]], implicit-def $scc
+ ; CHECK-NEXT: S_ENDPGM 0, implicit [[S_AND_B64_]]
%0:sgpr(p0) = COPY $sgpr0_sgpr1
%1:sgpr(s64) = G_CONSTANT i64 0
%2:sgpr(p0) = G_PTRMASK %0, %1
@@ -322,14 +310,8 @@ body: |
; CHECK-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4042322160
; CHECK-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 -252645136
; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
- ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0
- ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1
- ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0
- ; CHECK-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY1]], [[COPY3]], implicit-def $scc
- ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1
- ; CHECK-NEXT: [[S_AND_B32_1:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY2]], [[COPY4]], implicit-def $scc
- ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_AND_B32_]], %subreg.sub0, [[S_AND_B32_1]], %subreg.sub1
- ; CHECK-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE1]]
+ ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64 = S_AND_B64 [[COPY]], [[REG_SEQUENCE]], implicit-def $scc
+ ; CHECK-NEXT: S_ENDPGM 0, implicit [[S_AND_B64_]]
%0:sgpr(p0) = COPY $sgpr0_sgpr1
%1:sgpr(s64) = G_CONSTANT i64 -1085102592571150096
%2:sgpr(p0) = G_PTRMASK %0, %1
@@ -351,14 +333,8 @@ body: |
; CHECK-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
; CHECK-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 -2147483648
; CHECK-NEXT: %const:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
- ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0
- ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1
- ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY %const.sub0
- ; CHECK-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY1]], [[COPY3]], implicit-def $scc
- ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY %const.sub1
- ; CHECK-NEXT: [[S_AND_B32_1:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY2]], [[COPY4]], implicit-def $scc
- ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_AND_B32_]], %subreg.sub0, [[S_AND_B32_1]], %subreg.sub1
- ; CHECK-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]]
+ ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64 = S_AND_B64 [[COPY]], %const, implicit-def $scc
+ ; CHECK-NEXT: S_ENDPGM 0, implicit [[S_AND_B64_]]
%0:sgpr(p0) = COPY $sgpr0_sgpr1
%const:sgpr(s64) = G_CONSTANT i64 -9223372036854775808
%1:sgpr(p0) = G_PTRMASK %0, %const
@@ -407,14 +383,8 @@ body: |
; CHECK-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
; CHECK-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 1
; CHECK-NEXT: %const:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
- ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0
- ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1
- ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY %const.sub0
- ; CHECK-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY1]], [[COPY3]], implicit-def $scc
- ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY %const.sub1
- ; CHECK-NEXT: [[S_AND_B32_1:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY2]], [[COPY4]], implicit-def $scc
- ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_AND_B32_]], %subreg.sub0, [[S_AND_B32_1]], %subreg.sub1
- ; CHECK-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]]
+ ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64 = S_AND_B64 [[COPY]], %const, implicit-def $scc
+ ; CHECK-NEXT: S_ENDPGM 0, implicit [[S_AND_B64_]]
%0:sgpr(p0) = COPY $sgpr0_sgpr1
%const:sgpr(s64) = G_CONSTANT i64 4294967296
%1:sgpr(p0) = G_PTRMASK %0, %const
More information about the llvm-commits
mailing list