[llvm] [AMDGPU] Use table strategy for LowerModuleLDSPass at O0 (PR #160181)
Matt Arsenault via llvm-commits
llvm-commits at lists.llvm.org
Wed Sep 24 22:11:11 PDT 2025
================
@@ -0,0 +1,237 @@
+; RUN: llc -mtriple=amdgcn-amd-amdhsa < %s
+
+%"struct.rocprim::ROCPRIM_400000_NS::detail::raw_storage" = type { [1056 x i8] }
+%"struct.rocprim::ROCPRIM_400000_NS::detail::raw_storage.17" = type { [4 x i8] }
+%"struct.rocprim::ROCPRIM_400000_NS::detail::raw_storage.43" = type { [16 x i8] }
+
+ at _ZZN7hipcomp30do_cascaded_compression_kernelIhmLi128ELi4096EEEviiiPKPKT_PKT0_PKPvPS6_28hipcompBatchedCascadedOpts_tE24shared_element_storage_0 = addrspace(3) global [1026 x i32] undef
+ at _ZZN7hipcomp30do_cascaded_compression_kernelIhmLi128ELi4096EEEviiiPKPKT_PKT0_PKPvPS6_28hipcompBatchedCascadedOpts_tE24shared_element_storage_1 = addrspace(3) global [1026 x i32] undef
+ at _ZZN7hipcomp30do_cascaded_compression_kernelIhmLi128ELi4096EEEviiiPKPKT_PKT0_PKPvPS6_28hipcompBatchedCascadedOpts_tE19shared_count_buffer = external addrspace(3) global [2048 x i32]
+ at _ZZN7hipcomp30do_cascaded_compression_kernelIhmLi128ELi4096EEEviiiPKPKT_PKT0_PKPvPS6_28hipcompBatchedCascadedOpts_tE17shared_tmp_buffer = addrspace(3) global [2050 x i32] undef
+ at _ZZN7hipcomp30do_cascaded_compression_kernelIhmLi128ELi4096EEEviiiPKPKT_PKT0_PKPvPS6_28hipcompBatchedCascadedOpts_tE14chunk_metadata = addrspace(3) global [16 x i32] undef
+ at _ZZN7hipcomp18block_rle_compressIhmtLi128EEEvPKT_T0_PS1_PT1_PS4_S7_E12temp_storage = external addrspace(3) global %"struct.rocprim::ROCPRIM_400000_NS::detail::raw_storage"
+ at _ZZN7hipcomp16get_for_bitwidthItmLi128EEEvPKT_T0_PS1_PjE12temp_storage = addrspace(3) global %"struct.rocprim::ROCPRIM_400000_NS::detail::raw_storage.17" undef
+ at _ZZN7hipcomp30do_cascaded_compression_kernelItmLi128ELi4096EEEviiiPKPKT_PKT0_PKPvPS6_28hipcompBatchedCascadedOpts_tE24shared_element_storage_0 = addrspace(3) global [1026 x i32] undef
+ at _ZZN7hipcomp30do_cascaded_compression_kernelItmLi128ELi4096EEEviiiPKPKT_PKT0_PKPvPS6_28hipcompBatchedCascadedOpts_tE24shared_element_storage_1 = addrspace(3) global [1026 x i32] undef
+ at _ZZN7hipcomp30do_cascaded_compression_kernelItmLi128ELi4096EEEviiiPKPKT_PKT0_PKPvPS6_28hipcompBatchedCascadedOpts_tE19shared_count_buffer = external addrspace(3) global [1024 x i32]
+ at _ZZN7hipcomp30do_cascaded_compression_kernelItmLi128ELi4096EEEviiiPKPKT_PKT0_PKPvPS6_28hipcompBatchedCascadedOpts_tE17shared_tmp_buffer = addrspace(3) global [1026 x i32] undef
+ at _ZZN7hipcomp30do_cascaded_compression_kernelItmLi128ELi4096EEEviiiPKPKT_PKT0_PKPvPS6_28hipcompBatchedCascadedOpts_tE14chunk_metadata = external addrspace(3) global [16 x i32]
+ at _ZZN7hipcomp18block_rle_compressItmtLi128EEEvPKT_T0_PS1_PT1_PS4_S7_E12temp_storage = external addrspace(3) global %"struct.rocprim::ROCPRIM_400000_NS::detail::raw_storage"
+ at _ZZ23HlifCompressBatchKernelILi32EN7hipcomp25cascaded_compress_wrapperIjmLi128ELi4096EEERK28hipcompBatchedCascadedOpts_tLi1EENSt9enable_ifIXsr3std10is_base_ofI21hlif_compress_wrapperT0_EE5valueEvE4typeE12CompressArgsT1_E13output_status = external addrspace(3) global [1 x i32]
+ at _ZZN7hipcomp30do_cascaded_compression_kernelIjmLi128ELi4096EEEviiiPKPKT_PKT0_PKPvPS6_28hipcompBatchedCascadedOpts_tE24shared_element_storage_0 = addrspace(3) global [1026 x i32] undef
+ at _ZZN7hipcomp30do_cascaded_compression_kernelIjmLi128ELi4096EEEviiiPKPKT_PKT0_PKPvPS6_28hipcompBatchedCascadedOpts_tE24shared_element_storage_1 = addrspace(3) global [1026 x i32] undef
+ at _ZZN7hipcomp30do_cascaded_compression_kernelIjmLi128ELi4096EEEviiiPKPKT_PKT0_PKPvPS6_28hipcompBatchedCascadedOpts_tE19shared_count_buffer = external addrspace(3) global [512 x i32]
+ at _ZZN7hipcomp30do_cascaded_compression_kernelIjmLi128ELi4096EEEviiiPKPKT_PKT0_PKPvPS6_28hipcompBatchedCascadedOpts_tE17shared_tmp_buffer = addrspace(3) global [514 x i32] undef
+ at _ZZN7hipcomp30do_cascaded_compression_kernelIjmLi128ELi4096EEEviiiPKPKT_PKT0_PKPvPS6_28hipcompBatchedCascadedOpts_tE14chunk_metadata = external addrspace(3) global [16 x i32]
+ at _ZZN7hipcomp18block_rle_compressIjmtLi128EEEvPKT_T0_PS1_PT1_PS4_S7_E12temp_storage = external addrspace(3) global %"struct.rocprim::ROCPRIM_400000_NS::detail::raw_storage"
+ at _ZZN7hipcomp30do_cascaded_compression_kernelImmLi128ELi4096EEEviiiPKPKT_PKT0_PKPvPS6_28hipcompBatchedCascadedOpts_tE24shared_element_storage_0 = external addrspace(3) global [514 x i64]
+ at _ZZN7hipcomp30do_cascaded_compression_kernelImmLi128ELi4096EEEviiiPKPKT_PKT0_PKPvPS6_28hipcompBatchedCascadedOpts_tE24shared_element_storage_1 = external addrspace(3) global [514 x i64]
+ at _ZZN7hipcomp30do_cascaded_compression_kernelImmLi128ELi4096EEEviiiPKPKT_PKT0_PKPvPS6_28hipcompBatchedCascadedOpts_tE19shared_count_buffer = external addrspace(3) global [256 x i32]
+ at _ZZN7hipcomp30do_cascaded_compression_kernelImmLi128ELi4096EEEviiiPKPKT_PKT0_PKPvPS6_28hipcompBatchedCascadedOpts_tE17shared_tmp_buffer = external addrspace(3) global [258 x i32]
+ at _ZZN7hipcomp30do_cascaded_compression_kernelImmLi128ELi4096EEEviiiPKPKT_PKT0_PKPvPS6_28hipcompBatchedCascadedOpts_tE14chunk_metadata = external addrspace(3) global [16 x i32]
+ at _ZZN7hipcomp18block_rle_compressImmtLi128EEEvPKT_T0_PS1_PT1_PS4_S7_E12temp_storage = external addrspace(3) global %"struct.rocprim::ROCPRIM_400000_NS::detail::raw_storage"
+ at _ZZN7hipcomp16get_for_bitwidthImmLi128EEEvPKT_T0_PS1_PjE12temp_storage = external addrspace(3) global %"struct.rocprim::ROCPRIM_400000_NS::detail::raw_storage.43"
+
+define amdgpu_kernel void @_Z23HlifCompressBatchKernelILi32EN7hipcomp25cascaded_compress_wrapperIhmLi128ELi4096EEERK28hipcompBatchedCascadedOpts_tLi1EENSt9enable_ifIXsr3std10is_base_ofI21hlif_compress_wrapperT0_EE5valueEvE4typeE12CompressArgsT1_() {
+entry:
+ %0 = call i32 @_Z17HlifCompressBatchILi1ERN7hipcomp25cascaded_compress_wrapperIhmLi128ELi4096EEERN18cooperative_groups12thread_blockEEvRK12CompressArgsOT0_OT1_()
+ ret void
+}
+
+define i32 @_Z17HlifCompressBatchILi1ERN7hipcomp25cascaded_compress_wrapperIhmLi128ELi4096EEERN18cooperative_groups12thread_blockEEvRK12CompressArgsOT0_OT1_() {
----------------
arsenm wrote:
Fix all of these variable names, replace them with more meaningful testcase names
https://github.com/llvm/llvm-project/pull/160181
More information about the llvm-commits
mailing list