[llvm] bb10fa3 - AMDGPU: Fix wrong null value for private address space
Matt Arsenault via llvm-commits
llvm-commits at lists.llvm.org
Tue May 26 13:35:21 PDT 2020
Author: Matt Arsenault
Date: 2020-05-26T16:35:13-04:00
New Revision: bb10fa3a53f928e2e24ad3eaf8e57508fe9d4320
URL: https://github.com/llvm/llvm-project/commit/bb10fa3a53f928e2e24ad3eaf8e57508fe9d4320
DIFF: https://github.com/llvm/llvm-project/commit/bb10fa3a53f928e2e24ad3eaf8e57508fe9d4320.diff
LOG: AMDGPU: Fix wrong null value for private address space
I'm guessing this was a holdover from when 0 was an invalid stack
pointer, but surprised nobody has discovered this before.
Also don't allow offset folding for -1 pointers, since it looks weird
to partially fold this.
Added:
Modified:
llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h
llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-private.mir
llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-addrspacecast.mir
llvm/test/CodeGen/AMDGPU/addrspacecast.ll
llvm/test/CodeGen/AMDGPU/nullptr.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
index 52823c16d72d..edd8ea39e0f4 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
@@ -1482,22 +1482,26 @@ bool AMDGPUDAGToDAGISel::SelectMUBUFScratchOffen(SDNode *Parent,
Rsrc = CurDAG->getRegister(Info->getScratchRSrcReg(), MVT::v4i32);
if (ConstantSDNode *CAddr = dyn_cast<ConstantSDNode>(Addr)) {
- unsigned Imm = CAddr->getZExtValue();
-
- SDValue HighBits = CurDAG->getTargetConstant(Imm & ~4095, DL, MVT::i32);
- MachineSDNode *MovHighBits = CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32,
- DL, MVT::i32, HighBits);
- VAddr = SDValue(MovHighBits, 0);
-
- // In a call sequence, stores to the argument stack area are relative to the
- // stack pointer.
- const MachinePointerInfo &PtrInfo = cast<MemSDNode>(Parent)->getPointerInfo();
-
- SOffset = isStackPtrRelative(PtrInfo)
- ? CurDAG->getRegister(Info->getStackPtrOffsetReg(), MVT::i32)
- : CurDAG->getTargetConstant(0, DL, MVT::i32);
- ImmOffset = CurDAG->getTargetConstant(Imm & 4095, DL, MVT::i16);
- return true;
+ int64_t Imm = CAddr->getSExtValue();
+ const int64_t NullPtr =
+ AMDGPUTargetMachine::getNullPointerValue(AMDGPUAS::PRIVATE_ADDRESS);
+ // Don't fold null pointer.
+ if (Imm != NullPtr) {
+ SDValue HighBits = CurDAG->getTargetConstant(Imm & ~4095, DL, MVT::i32);
+ MachineSDNode *MovHighBits = CurDAG->getMachineNode(
+ AMDGPU::V_MOV_B32_e32, DL, MVT::i32, HighBits);
+ VAddr = SDValue(MovHighBits, 0);
+
+ // In a call sequence, stores to the argument stack area are relative to the
+ // stack pointer.
+ const MachinePointerInfo &PtrInfo
+ = cast<MemSDNode>(Parent)->getPointerInfo();
+ SOffset = isStackPtrRelative(PtrInfo)
+ ? CurDAG->getRegister(Info->getStackPtrOffsetReg(), MVT::i32)
+ : CurDAG->getTargetConstant(0, DL, MVT::i32);
+ ImmOffset = CurDAG->getTargetConstant(Imm & 4095, DL, MVT::i16);
+ return true;
+ }
}
if (CurDAG->isBaseWithConstantOffset(Addr)) {
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
index aee6c0dd8a8e..5afec2188d66 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
@@ -3062,7 +3062,8 @@ AMDGPUInstructionSelector::selectMUBUFScratchOffen(MachineOperand &Root) const {
const SIMachineFunctionInfo *Info = MF->getInfo<SIMachineFunctionInfo>();
int64_t Offset = 0;
- if (mi_match(Root.getReg(), *MRI, m_ICst(Offset))) {
+ if (mi_match(Root.getReg(), *MRI, m_ICst(Offset)) &&
+ Offset != TM.getNullPointerValue(AMDGPUAS::PRIVATE_ADDRESS)) {
Register HighBits = MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass);
// TODO: Should this be inside the render function? The iterator seems to
@@ -3091,7 +3092,7 @@ AMDGPUInstructionSelector::selectMUBUFScratchOffen(MachineOperand &Root) const {
}}};
}
- assert(Offset == 0);
+ assert(Offset == 0 || Offset == -1);
// Try to fold a frame index directly into the MUBUF vaddr field, and any
// offsets.
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h
index 2ef6cd5b3e33..e223fecc8819 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h
@@ -57,8 +57,9 @@ class AMDGPUTargetMachine : public LLVMTargetMachine {
void adjustPassManager(PassManagerBuilder &) override;
/// Get the integer value of a null pointer in the given address space.
- uint64_t getNullPointerValue(unsigned AddrSpace) const {
+ static int64_t getNullPointerValue(unsigned AddrSpace) {
return (AddrSpace == AMDGPUAS::LOCAL_ADDRESS ||
+ AddrSpace == AMDGPUAS::PRIVATE_ADDRESS ||
AddrSpace == AMDGPUAS::REGION_ADDRESS) ? -1 : 0;
}
};
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-private.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-private.mir
index 13e4035a4882..79284fdfd05f 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-private.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-private.mir
@@ -843,3 +843,31 @@ body: |
$vgpr0 = COPY %3
...
+
+# Should not fold offset if this is a null dereference.
+---
+
+name: load_private_s32_from_neg1
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+machineFunctionInfo:
+ scratchRSrcReg: $sgpr0_sgpr1_sgpr2_sgpr3
+ stackPtrOffsetReg: $sgpr32
+
+body: |
+ bb.0:
+
+ ; GFX6-LABEL: name: load_private_s32_from_neg1
+ ; GFX6: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294967295, implicit $exec
+ ; GFX6: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[V_MOV_B32_e32_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, -1, 0, 0, 0, 0, 0, implicit $exec :: (load 4, addrspace 5)
+ ; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
+ ; GFX9-LABEL: name: load_private_s32_from_neg1
+ ; GFX9: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294967295, implicit $exec
+ ; GFX9: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[V_MOV_B32_e32_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, -1, 0, 0, 0, 0, 0, implicit $exec :: (load 4, addrspace 5)
+ ; GFX9: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
+ %0:vgpr(p5) = G_CONSTANT i32 -1
+ %1:vgpr(s32) = G_LOAD %0 :: (load 4, align 4, addrspace 5)
+ $vgpr0 = COPY %1
+
+...
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-addrspacecast.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-addrspacecast.mir
index 643bdd3b7d58..395d34a00081 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-addrspacecast.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-addrspacecast.mir
@@ -173,12 +173,12 @@ body: |
; VI-LABEL: name: test_addrspacecast_p5_to_p0
; VI: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
; VI: [[COPY1:%[0-9]+]]:_(p5) = COPY $vgpr0
- ; VI: [[C:%[0-9]+]]:_(p5) = G_CONSTANT i32 0
+ ; VI: [[C:%[0-9]+]]:_(p5) = G_CONSTANT i32 -1
; VI: [[C1:%[0-9]+]]:_(p0) = G_CONSTANT i64 0
; VI: [[COPY2:%[0-9]+]]:_(p4) = COPY [[COPY]](p4)
; VI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 68
- ; VI: [[GEP:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY2]], [[C2]](s64)
- ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[GEP]](p4) :: (dereferenceable invariant load 4, addrspace 4)
+ ; VI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY2]], [[C2]](s64)
+ ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load 4, addrspace 4)
; VI: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[COPY1]](p5), [[C]]
; VI: [[PTRTOINT:%[0-9]+]]:_(s32) = G_PTRTOINT [[COPY1]](p5)
; VI: [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[PTRTOINT]](s32), [[LOAD]](s32)
@@ -186,7 +186,7 @@ body: |
; VI: $vgpr0_vgpr1 = COPY [[SELECT]](p0)
; GFX9-LABEL: name: test_addrspacecast_p5_to_p0
; GFX9: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
- ; GFX9: [[C:%[0-9]+]]:_(p5) = G_CONSTANT i32 0
+ ; GFX9: [[C:%[0-9]+]]:_(p5) = G_CONSTANT i32 -1
; GFX9: [[C1:%[0-9]+]]:_(p0) = G_CONSTANT i64 0
; GFX9: [[S_GETREG_B32_:%[0-9]+]]:sreg_32(s32) = S_GETREG_B32 30735
; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
@@ -216,7 +216,7 @@ body: |
; VI-LABEL: name: test_addrspacecast_p0_to_p5
; VI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
- ; VI: [[C:%[0-9]+]]:_(p5) = G_CONSTANT i32 0
+ ; VI: [[C:%[0-9]+]]:_(p5) = G_CONSTANT i32 -1
; VI: [[C1:%[0-9]+]]:_(p0) = G_CONSTANT i64 0
; VI: [[EXTRACT:%[0-9]+]]:_(p5) = G_EXTRACT [[COPY]](p0), 0
; VI: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[COPY]](p0), [[C1]]
@@ -224,7 +224,7 @@ body: |
; VI: $vgpr0 = COPY [[SELECT]](p5)
; GFX9-LABEL: name: test_addrspacecast_p0_to_p5
; GFX9: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
- ; GFX9: [[C:%[0-9]+]]:_(p5) = G_CONSTANT i32 0
+ ; GFX9: [[C:%[0-9]+]]:_(p5) = G_CONSTANT i32 -1
; GFX9: [[C1:%[0-9]+]]:_(p0) = G_CONSTANT i64 0
; GFX9: [[EXTRACT:%[0-9]+]]:_(p5) = G_EXTRACT [[COPY]](p0), 0
; GFX9: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[COPY]](p0), [[C1]]
@@ -232,7 +232,7 @@ body: |
; GFX9: $vgpr0 = COPY [[SELECT]](p5)
; SI-LABEL: name: test_addrspacecast_p0_to_p5
; SI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
- ; SI: [[C:%[0-9]+]]:_(p5) = G_CONSTANT i32 0
+ ; SI: [[C:%[0-9]+]]:_(p5) = G_CONSTANT i32 -1
; SI: [[C1:%[0-9]+]]:_(p0) = G_CONSTANT i64 0
; SI: [[EXTRACT:%[0-9]+]]:_(p5) = G_EXTRACT [[COPY]](p0), 0
; SI: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[COPY]](p0), [[C1]]
@@ -260,8 +260,8 @@ body: |
; VI: [[C1:%[0-9]+]]:_(p0) = G_CONSTANT i64 0
; VI: [[COPY2:%[0-9]+]]:_(p4) = COPY [[COPY]](p4)
; VI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 64
- ; VI: [[GEP:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY2]], [[C2]](s64)
- ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[GEP]](p4) :: (dereferenceable invariant load 4, align 64, addrspace 4)
+ ; VI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY2]], [[C2]](s64)
+ ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load 4, align 64, addrspace 4)
; VI: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[COPY1]](p3), [[C]]
; VI: [[PTRTOINT:%[0-9]+]]:_(s32) = G_PTRTOINT [[COPY1]](p3)
; VI: [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[PTRTOINT]](s32), [[LOAD]](s32)
@@ -466,15 +466,15 @@ body: |
; VI: [[C1:%[0-9]+]]:_(p0) = G_CONSTANT i64 0
; VI: [[COPY2:%[0-9]+]]:_(p4) = COPY [[COPY]](p4)
; VI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 64
- ; VI: [[GEP:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY2]], [[C2]](s64)
- ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[GEP]](p4) :: (dereferenceable invariant load 4, align 64, addrspace 4)
+ ; VI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY2]], [[C2]](s64)
+ ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load 4, align 64, addrspace 4)
; VI: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV]](p3), [[C]]
; VI: [[PTRTOINT:%[0-9]+]]:_(s32) = G_PTRTOINT [[UV]](p3)
; VI: [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[PTRTOINT]](s32), [[LOAD]](s32)
; VI: [[SELECT:%[0-9]+]]:_(p0) = G_SELECT [[ICMP]](s1), [[MV]], [[C1]]
; VI: [[COPY3:%[0-9]+]]:_(p4) = COPY [[COPY]](p4)
- ; VI: [[GEP1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY3]], [[C2]](s64)
- ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[GEP1]](p4) :: (dereferenceable invariant load 4, align 64, addrspace 4)
+ ; VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY3]], [[C2]](s64)
+ ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load 4, align 64, addrspace 4)
; VI: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV1]](p3), [[C]]
; VI: [[PTRTOINT1:%[0-9]+]]:_(s32) = G_PTRTOINT [[UV1]](p3)
; VI: [[MV1:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[PTRTOINT1]](s32), [[LOAD1]](s32)
diff --git a/llvm/test/CodeGen/AMDGPU/addrspacecast.ll b/llvm/test/CodeGen/AMDGPU/addrspacecast.ll
index 764f93509764..d16edbac75fe 100644
--- a/llvm/test/CodeGen/AMDGPU/addrspacecast.ll
+++ b/llvm/test/CodeGen/AMDGPU/addrspacecast.ll
@@ -76,7 +76,7 @@ define void @use_group_to_flat_addrspacecast_func(i32 addrspace(3)* %ptr) #0 {
; CI-DAG: v_mov_b32_e32 [[VAPERTURE:v[0-9]+]], [[APERTURE]]
; CI-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 7
-; CI-DAG: v_cmp_ne_u32_e64 vcc, [[PTR]], 0
+; CI-DAG: v_cmp_ne_u32_e64 vcc, [[PTR]], -1
; CI-DAG: v_cndmask_b32_e32 v[[HI:[0-9]+]], 0, [[VAPERTURE]], vcc
; CI-DAG: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[PTR]]
; CI-DAG: v_cndmask_b32_e32 v[[LO:[0-9]+]], 0, [[VPTR]]
@@ -89,7 +89,7 @@ define void @use_group_to_flat_addrspacecast_func(i32 addrspace(3)* %ptr) #0 {
; GFX9-XXX: v_mov_b32_e32 [[VAPERTURE:v[0-9]+]], src_private_base
; GFX9-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 7
-; GFX9: v_cmp_ne_u32_e64 vcc, [[PTR]], 0
+; GFX9: v_cmp_ne_u32_e64 vcc, [[PTR]], -1
; GFX9: v_cndmask_b32_e32 v[[HI:[0-9]+]], 0, [[VAPERTURE]], vcc
; GFX9: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[PTR]]
; GFX9-DAG: v_cndmask_b32_e32 v[[LO:[0-9]+]], 0, [[VPTR]]
@@ -167,7 +167,7 @@ define amdgpu_kernel void @use_flat_to_group_addrspacecast(i32* %ptr) #0 {
; HSA: s_load_dwordx2 s{{\[}}[[PTR_LO:[0-9]+]]:[[PTR_HI:[0-9]+]]{{\]}}
; HSA-DAG: v_cmp_ne_u64_e64 vcc, s{{\[}}[[PTR_LO]]:[[PTR_HI]]{{\]}}, 0{{$}}
; HSA-DAG: v_mov_b32_e32 v[[VPTR_LO:[0-9]+]], s[[PTR_LO]]
-; HSA-DAG: v_cndmask_b32_e32 [[CASTPTR:v[0-9]+]], 0, v[[VPTR_LO]]
+; HSA-DAG: v_cndmask_b32_e32 [[CASTPTR:v[0-9]+]], -1, v[[VPTR_LO]]
; HSA-DAG: v_mov_b32_e32 v[[K:[0-9]+]], 0{{$}}
; HSA: buffer_store_dword v[[K]], [[CASTPTR]], s{{\[[0-9]+:[0-9]+\]}}, 0 offen{{$}}
define amdgpu_kernel void @use_flat_to_private_addrspacecast(i32* %ptr) #0 {
@@ -252,12 +252,16 @@ define amdgpu_kernel void @cast_neg1_flat_to_group_addrspacecast() #0 {
; FIXME: Shouldn't need to enable queue ptr
; HSA-LABEL: {{^}}cast_0_private_to_flat_addrspacecast:
-; CI: enable_sgpr_queue_ptr = 1
-; GFX9: enable_sgpr_queue_ptr = 0
+; CI: s_load_dword [[APERTURE:s[0-9]+]], s[4:5], 0x11
+; CI-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], [[APERTURE]]
+; GFX9-DAG: s_getreg_b32 [[SSRC_SHARED:s[0-9]+]], hwreg(HW_REG_SH_MEM_BASES, 0, 16)
+; GFX9-DAG: s_lshl_b32 [[SSRC_SHARED_BASE:s[0-9]+]], [[SSRC_SHARED]], 16
+; GFX9-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], [[SSRC_SHARED_BASE]]
+
+; GFX9-XXX: v_mov_b32_e32 v[[HI:[0-9]+]], src_shared_base
; HSA-DAG: v_mov_b32_e32 v[[LO:[0-9]+]], 0{{$}}
; HSA-DAG: v_mov_b32_e32 v[[K:[0-9]+]], 7{{$}}
-; HSA: v_mov_b32_e32 v[[HI:[0-9]+]], 0{{$}}
; HSA: {{flat|global}}_store_dword v{{\[}}[[LO]]:[[HI]]{{\]}}, v[[K]]
define amdgpu_kernel void @cast_0_private_to_flat_addrspacecast() #0 {
%cast = addrspacecast i32 addrspace(5)* null to i32*
@@ -266,14 +270,41 @@ define amdgpu_kernel void @cast_0_private_to_flat_addrspacecast() #0 {
}
; HSA-LABEL: {{^}}cast_0_flat_to_private_addrspacecast:
-; HSA: v_mov_b32_e32 [[K:v[0-9]+]], 7{{$}}
-; HSA: buffer_store_dword [[K]], off, s{{\[[0-9]+:[0-9]+\]}}, 0
+; HSA-DAG: v_mov_b32_e32 [[PTR:v[0-9]+]], -1{{$}}
+; HSA-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 7{{$}}
+; HSA: buffer_store_dword [[K]], [[PTR]], s{{\[[0-9]+:[0-9]+\]}}, 0
define amdgpu_kernel void @cast_0_flat_to_private_addrspacecast() #0 {
%cast = addrspacecast i32* null to i32 addrspace(5)*
store volatile i32 7, i32 addrspace(5)* %cast
ret void
}
+
+; HSA-LABEL: {{^}}cast_neg1_private_to_flat_addrspacecast:
+; CI: enable_sgpr_queue_ptr = 1
+; GFX9: enable_sgpr_queue_ptr = 0
+
+; HSA-DAG: v_mov_b32_e32 v[[LO:[0-9]+]], 0{{$}}
+; HSA-DAG: v_mov_b32_e32 v[[K:[0-9]+]], 7{{$}}
+; HSA: v_mov_b32_e32 v[[HI:[0-9]+]], 0{{$}}
+; HSA: {{flat|global}}_store_dword v{{\[}}[[LO]]:[[HI]]{{\]}}, v[[K]]
+define amdgpu_kernel void @cast_neg1_private_to_flat_addrspacecast() #0 {
+ %cast = addrspacecast i32 addrspace(5)* inttoptr (i32 -1 to i32 addrspace(5)*) to i32*
+ store volatile i32 7, i32* %cast
+ ret void
+}
+
+; HSA-LABEL: {{^}}cast_neg1_flat_to_private_addrspacecast:
+; HSA-DAG: v_mov_b32_e32 [[PTR:v[0-9]+]], -1{{$}}
+; HSA-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 7{{$}}
+; HSA: buffer_store_dword [[K]], [[PTR]], s{{\[[0-9]+:[0-9]+\]}}, 0
+define amdgpu_kernel void @cast_neg1_flat_to_private_addrspacecast() #0 {
+ %cast = addrspacecast i32* inttoptr (i64 -1 to i32*) to i32 addrspace(5)*
+ store volatile i32 7, i32 addrspace(5)* %cast
+ ret void
+}
+
+
; Disable optimizations in case there are optimizations added that
; specialize away generic pointer accesses.
diff --git a/llvm/test/CodeGen/AMDGPU/nullptr.ll b/llvm/test/CodeGen/AMDGPU/nullptr.ll
index 4eaf9836bb9d..16292f0ebee0 100644
--- a/llvm/test/CodeGen/AMDGPU/nullptr.ll
+++ b/llvm/test/CodeGen/AMDGPU/nullptr.ll
@@ -4,7 +4,7 @@
%struct.S = type { i32 addrspace(5)*, i32 addrspace(1)*, i32 addrspace(4)*, i32 addrspace(3)*, i32*, i32 addrspace(2)*}
; CHECK-LABEL: nullptr_priv:
-; CHECK-NEXT: .long 0
+; CHECK-NEXT: .long -1
@nullptr_priv = global i32 addrspace(5)* addrspacecast (i32* null to i32 addrspace(5)*)
; CHECK-LABEL: nullptr_glob:
@@ -98,7 +98,7 @@
@nullptr23 = global i32 addrspace(23)* addrspacecast (i32* null to i32 addrspace(23)*)
; CHECK-LABEL: structWithPointers:
-; CHECK-NEXT: .long 0
+; CHECK-NEXT: .long -1
; GCN-NEXT: .zero 4
; GCN-NEXT: .quad 0
; R600-NEXT: .long 0
More information about the llvm-commits
mailing list