[llvm] a5b9ad7 - AMDGPU/GlobalISel: Don't emit code for unused kernel arguments
Matt Arsenault via llvm-commits
llvm-commits at lists.llvm.org
Mon Jul 6 06:11:39 PDT 2020
Author: Matt Arsenault
Date: 2020-07-06T09:04:06-04:00
New Revision: a5b9ad7e9aca1329ba310e638dafa58c47468a58
URL: https://github.com/llvm/llvm-project/commit/a5b9ad7e9aca1329ba310e638dafa58c47468a58
DIFF: https://github.com/llvm/llvm-project/commit/a5b9ad7e9aca1329ba310e638dafa58c47468a58.diff
LOG: AMDGPU/GlobalISel: Don't emit code for unused kernel arguments
Added:
Modified:
llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp
llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-amdgpu_kernel.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp
index 7a3a502113df..83e5fcef7d7b 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp
@@ -523,6 +523,9 @@ bool AMDGPUCallLowering::lowerFormalArgumentsKernel(
uint64_t ArgOffset = alignTo(ExplicitArgOffset, ABIAlign) + BaseOffset;
ExplicitArgOffset = alignTo(ExplicitArgOffset, ABIAlign) + AllocSize;
+ if (Arg.use_empty())
+ continue;
+
ArrayRef<Register> OrigArgRegs = VRegs[i];
Register ArgReg =
OrigArgRegs.size() == 1
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-amdgpu_kernel.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-amdgpu_kernel.ll
index d5e7003a1561..4c48f9cc49fe 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-amdgpu_kernel.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-amdgpu_kernel.ll
@@ -1092,9 +1092,9 @@ define amdgpu_kernel void @empty_struct_arg({} %in) nounwind {
; With the SelectionDAG argument lowering, the alignments for the
; struct members is not properly considered, making these wrong.
-define amdgpu_kernel void @struct_argument_alignment({i32, i64} %arg0, i8, {i32, i64} %arg1) {
+define amdgpu_kernel void @struct_argument_alignment({i32, i64} %arg0, i8 %pad, {i32, i64} %arg1) {
; HSA-VI-LABEL: name: struct_argument_alignment
- ; HSA-VI: bb.1 (%ir-block.1):
+ ; HSA-VI: bb.1 (%ir-block.0):
; HSA-VI: liveins: $sgpr4_sgpr5
; HSA-VI: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
; HSA-VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
@@ -1112,13 +1112,15 @@ define amdgpu_kernel void @struct_argument_alignment({i32, i64} %arg0, i8, {i32,
; HSA-VI: [[EXTRACT3:%[0-9]+]]:_(s64) = G_EXTRACT [[LOAD2]](s128), 64
; HSA-VI: [[C3:%[0-9]+]]:_(p1) = G_CONSTANT i64 0
; HSA-VI: [[COPY1:%[0-9]+]]:_(p1) = COPY [[C3]](p1)
+ ; HSA-VI: [[COPY2:%[0-9]+]]:_(p1) = COPY [[C3]](p1)
; HSA-VI: G_STORE [[EXTRACT]](s32), [[C3]](p1) :: (volatile store 4 into `i32 addrspace(1)* null`, addrspace 1)
; HSA-VI: G_STORE [[EXTRACT1]](s64), [[COPY1]](p1) :: (volatile store 8 into `i64 addrspace(1)* null`, addrspace 1)
+ ; HSA-VI: G_STORE [[LOAD1]](s8), [[COPY2]](p1) :: (volatile store 1 into `i8 addrspace(1)* null`, addrspace 1)
; HSA-VI: G_STORE [[EXTRACT2]](s32), [[C3]](p1) :: (volatile store 4 into `i32 addrspace(1)* null`, addrspace 1)
; HSA-VI: G_STORE [[EXTRACT3]](s64), [[COPY1]](p1) :: (volatile store 8 into `i64 addrspace(1)* null`, addrspace 1)
; HSA-VI: S_ENDPGM 0
; LEGACY-MESA-VI-LABEL: name: struct_argument_alignment
- ; LEGACY-MESA-VI: bb.1 (%ir-block.1):
+ ; LEGACY-MESA-VI: bb.1 (%ir-block.0):
; LEGACY-MESA-VI: liveins: $sgpr0_sgpr1
; LEGACY-MESA-VI: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1
; LEGACY-MESA-VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36
@@ -1136,8 +1138,10 @@ define amdgpu_kernel void @struct_argument_alignment({i32, i64} %arg0, i8, {i32,
; LEGACY-MESA-VI: [[EXTRACT3:%[0-9]+]]:_(s64) = G_EXTRACT [[LOAD2]](s128), 64
; LEGACY-MESA-VI: [[C3:%[0-9]+]]:_(p1) = G_CONSTANT i64 0
; LEGACY-MESA-VI: [[COPY1:%[0-9]+]]:_(p1) = COPY [[C3]](p1)
+ ; LEGACY-MESA-VI: [[COPY2:%[0-9]+]]:_(p1) = COPY [[C3]](p1)
; LEGACY-MESA-VI: G_STORE [[EXTRACT]](s32), [[C3]](p1) :: (volatile store 4 into `i32 addrspace(1)* null`, addrspace 1)
; LEGACY-MESA-VI: G_STORE [[EXTRACT1]](s64), [[COPY1]](p1) :: (volatile store 8 into `i64 addrspace(1)* null`, addrspace 1)
+ ; LEGACY-MESA-VI: G_STORE [[LOAD1]](s8), [[COPY2]](p1) :: (volatile store 1 into `i8 addrspace(1)* null`, addrspace 1)
; LEGACY-MESA-VI: G_STORE [[EXTRACT2]](s32), [[C3]](p1) :: (volatile store 4 into `i32 addrspace(1)* null`, addrspace 1)
; LEGACY-MESA-VI: G_STORE [[EXTRACT3]](s64), [[COPY1]](p1) :: (volatile store 8 into `i64 addrspace(1)* null`, addrspace 1)
; LEGACY-MESA-VI: S_ENDPGM 0
@@ -1147,6 +1151,7 @@ define amdgpu_kernel void @struct_argument_alignment({i32, i64} %arg0, i8, {i32,
%val3 = extractvalue {i32, i64} %arg1, 1
store volatile i32 %val0, i32 addrspace(1)* null
store volatile i64 %val1, i64 addrspace(1)* null
+ store volatile i8 %pad, i8 addrspace(1)* null
store volatile i32 %val2, i32 addrspace(1)* null
store volatile i64 %val3, i64 addrspace(1)* null
ret void
@@ -1164,20 +1169,15 @@ define amdgpu_kernel void @packed_struct_argument_alignment(<{i32, i64}> %arg0,
; HSA-VI: [[LOAD:%[0-9]+]]:_(s96) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load 12, align 16, addrspace 4)
; HSA-VI: [[EXTRACT:%[0-9]+]]:_(s32) = G_EXTRACT [[LOAD]](s96), 0
; HSA-VI: [[EXTRACT1:%[0-9]+]]:_(s64) = G_EXTRACT [[LOAD]](s96), 32
- ; HSA-VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 12
+ ; HSA-VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 13
; HSA-VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
- ; HSA-VI: [[LOAD1:%[0-9]+]]:_(s8) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load 1, align 4, addrspace 4)
- ; HSA-VI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 13
- ; HSA-VI: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64)
- ; HSA-VI: [[LOAD2:%[0-9]+]]:_(s96) = G_LOAD [[PTR_ADD2]](p4) :: (dereferenceable invariant load 12, align 1, addrspace 4)
- ; HSA-VI: [[EXTRACT2:%[0-9]+]]:_(s32) = G_EXTRACT [[LOAD2]](s96), 0
- ; HSA-VI: [[EXTRACT3:%[0-9]+]]:_(s64) = G_EXTRACT [[LOAD2]](s96), 32
- ; HSA-VI: [[C3:%[0-9]+]]:_(p1) = G_CONSTANT i64 0
- ; HSA-VI: [[COPY1:%[0-9]+]]:_(p1) = COPY [[C3]](p1)
- ; HSA-VI: G_STORE [[EXTRACT]](s32), [[C3]](p1) :: (volatile store 4 into `i32 addrspace(1)* null`, addrspace 1)
+ ; HSA-VI: [[LOAD1:%[0-9]+]]:_(s8) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load 12, align 1, addrspace 4)
+ ; HSA-VI: [[C2:%[0-9]+]]:_(p1) = G_CONSTANT i64 0
+ ; HSA-VI: [[COPY1:%[0-9]+]]:_(p1) = COPY [[C2]](p1)
+ ; HSA-VI: G_STORE [[EXTRACT]](s32), [[C2]](p1) :: (volatile store 4 into `i32 addrspace(1)* null`, addrspace 1)
; HSA-VI: G_STORE [[EXTRACT1]](s64), [[COPY1]](p1) :: (volatile store 8 into `i64 addrspace(1)* null`, addrspace 1)
- ; HSA-VI: G_STORE [[EXTRACT2]](s32), [[C3]](p1) :: (volatile store 4 into `i32 addrspace(1)* null`, addrspace 1)
- ; HSA-VI: G_STORE [[EXTRACT3]](s64), [[COPY1]](p1) :: (volatile store 8 into `i64 addrspace(1)* null`, addrspace 1)
+ ; HSA-VI: G_STORE %3:_(s32), [[C2]](p1) :: (volatile store 4 into `i32 addrspace(1)* null`, addrspace 1)
+ ; HSA-VI: G_STORE %4:_(s64), [[COPY1]](p1) :: (volatile store 8 into `i64 addrspace(1)* null`, addrspace 1)
; HSA-VI: S_ENDPGM 0
; LEGACY-MESA-VI-LABEL: name: packed_struct_argument_alignment
; LEGACY-MESA-VI: bb.1 (%ir-block.1):
@@ -1188,20 +1188,15 @@ define amdgpu_kernel void @packed_struct_argument_alignment(<{i32, i64}> %arg0,
; LEGACY-MESA-VI: [[LOAD:%[0-9]+]]:_(s96) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load 12, align 4, addrspace 4)
; LEGACY-MESA-VI: [[EXTRACT:%[0-9]+]]:_(s32) = G_EXTRACT [[LOAD]](s96), 0
; LEGACY-MESA-VI: [[EXTRACT1:%[0-9]+]]:_(s64) = G_EXTRACT [[LOAD]](s96), 32
- ; LEGACY-MESA-VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 48
+ ; LEGACY-MESA-VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 49
; LEGACY-MESA-VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
- ; LEGACY-MESA-VI: [[LOAD1:%[0-9]+]]:_(s8) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load 1, align 16, addrspace 4)
- ; LEGACY-MESA-VI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 49
- ; LEGACY-MESA-VI: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64)
- ; LEGACY-MESA-VI: [[LOAD2:%[0-9]+]]:_(s96) = G_LOAD [[PTR_ADD2]](p4) :: (dereferenceable invariant load 12, align 1, addrspace 4)
- ; LEGACY-MESA-VI: [[EXTRACT2:%[0-9]+]]:_(s32) = G_EXTRACT [[LOAD2]](s96), 0
- ; LEGACY-MESA-VI: [[EXTRACT3:%[0-9]+]]:_(s64) = G_EXTRACT [[LOAD2]](s96), 32
- ; LEGACY-MESA-VI: [[C3:%[0-9]+]]:_(p1) = G_CONSTANT i64 0
- ; LEGACY-MESA-VI: [[COPY1:%[0-9]+]]:_(p1) = COPY [[C3]](p1)
- ; LEGACY-MESA-VI: G_STORE [[EXTRACT]](s32), [[C3]](p1) :: (volatile store 4 into `i32 addrspace(1)* null`, addrspace 1)
+ ; LEGACY-MESA-VI: [[LOAD1:%[0-9]+]]:_(s8) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load 12, align 1, addrspace 4)
+ ; LEGACY-MESA-VI: [[C2:%[0-9]+]]:_(p1) = G_CONSTANT i64 0
+ ; LEGACY-MESA-VI: [[COPY1:%[0-9]+]]:_(p1) = COPY [[C2]](p1)
+ ; LEGACY-MESA-VI: G_STORE [[EXTRACT]](s32), [[C2]](p1) :: (volatile store 4 into `i32 addrspace(1)* null`, addrspace 1)
; LEGACY-MESA-VI: G_STORE [[EXTRACT1]](s64), [[COPY1]](p1) :: (volatile store 8 into `i64 addrspace(1)* null`, addrspace 1)
- ; LEGACY-MESA-VI: G_STORE [[EXTRACT2]](s32), [[C3]](p1) :: (volatile store 4 into `i32 addrspace(1)* null`, addrspace 1)
- ; LEGACY-MESA-VI: G_STORE [[EXTRACT3]](s64), [[COPY1]](p1) :: (volatile store 8 into `i64 addrspace(1)* null`, addrspace 1)
+ ; LEGACY-MESA-VI: G_STORE %3:_(s32), [[C2]](p1) :: (volatile store 4 into `i32 addrspace(1)* null`, addrspace 1)
+ ; LEGACY-MESA-VI: G_STORE %4:_(s64), [[COPY1]](p1) :: (volatile store 8 into `i64 addrspace(1)* null`, addrspace 1)
; LEGACY-MESA-VI: S_ENDPGM 0
%val0 = extractvalue <{i32, i64}> %arg0, 0
%val1 = extractvalue <{i32, i64}> %arg0, 1
@@ -1213,3 +1208,18 @@ define amdgpu_kernel void @packed_struct_argument_alignment(<{i32, i64}> %arg0,
store volatile i64 %val3, i64 addrspace(1)* null
ret void
}
+
+define amdgpu_kernel void @unused_i32_arg(i32 addrspace(1)* nocapture %out, i32 %unused, i32 %in) nounwind {
+ ; HSA-VI-LABEL: name: unused_i32_arg
+ ; HSA-VI: bb.1.entry:
+ ; HSA-VI: liveins: $sgpr4_sgpr5
+ ; HSA-VI: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
+ ; HSA-VI: S_ENDPGM 0
+ ; LEGACY-MESA-VI-LABEL: name: unused_i32_arg
+ ; LEGACY-MESA-VI: bb.1.entry:
+ ; LEGACY-MESA-VI: liveins: $sgpr0_sgpr1
+ ; LEGACY-MESA-VI: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1
+ ; LEGACY-MESA-VI: S_ENDPGM 0
+entry:
+ ret void
+}
More information about the llvm-commits
mailing list