[llvm-branch-commits] [llvm] [CodeGen][StaticDataSplitter]Support constant pool partitioning (PR #129781)
Snehasish Kumar via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Tue Mar 25 13:48:41 PDT 2025
================
@@ -0,0 +1,141 @@
+; RUN: llc -mtriple=aarch64 -enable-split-machine-functions \
+; RUN: -partition-static-data-sections=true -function-sections=true \
+; RUN: -unique-section-names=false \
+; RUN: %s -o - 2>&1 | FileCheck %s --dump-input=always
+
+; Repeat the RUN command above for big-endian systems.
+; RUN: llc -mtriple=aarch64_be -enable-split-machine-functions \
+; RUN: -partition-static-data-sections=true -function-sections=true \
+; RUN: -unique-section-names=false \
+; RUN: %s -o - 2>&1 | FileCheck %s --dump-input=always
+
+; Tests that constant pool hotness is aggregated across the module. The
+; static-data-splitter processes data from cold_func first, unprofiled_func
+; secondly, and then hot_func. Specifically, tests that
+; - If a constant is accessed by hot functions, all constant pools for this
+; constant (e.g., from an unprofiled function, or cold function) should have
+; `.hot` suffix.
+; - Similarly if a constant is accessed by both cold function and un-profiled
+; function, constant pools for this constant should not have `.unlikely` suffix.
+
+; CHECK: .section .rodata.cst8.hot,"aM", at progbits,8
+; CHECK: .LCPI0_0:
+; CHECK: .xword 0x3fe5c28f5c28f5c3 // double 0.68000000000000005
+; CHECK: .section .rodata.cst8.unlikely,"aM", at progbits,8
+; CHECK: .LCPI0_1:
+; CHECK: .xword 0x3fe5eb851eb851ec // double 0.68500000000000005
+; CHECK: .section .rodata.cst8,"aM", at progbits,8
+; CHECK: .LCPI0_2:
+; CHECK: .byte 0 // 0x0
+; CHECK: .byte 4 // 0x4
+; CHECK: .byte 8 // 0x8
+; CHECK: .byte 12 // 0xc
+; CHECK: .byte 255 // 0xff
+; CHECK: .byte 255 // 0xff
+; CHECK: .byte 255 // 0xff
+; CHECK: .byte 255 // 0xff
+
+; CHECK: .section .rodata.cst8,"aM", at progbits,8
+; CHECK: .LCPI1_0:
+; CHECK: .byte 0 // 0x0
+; CHECK: .byte 4 // 0x4
+; CHECK: .byte 8 // 0x8
+; CHECK: .byte 12 // 0xc
+; CHECK: .byte 255 // 0xff
+; CHECK: .byte 255 // 0xff
+; CHECK: .byte 255 // 0xff
+; CHECK: .byte 255 // 0xff
+; CHECK: .section .rodata.cst16.hot,"aM", at progbits,16
+; CHECK: .LCPI1_1:
+; CHECK: .word 442 // 0x1ba
+; CHECK: .word 100 // 0x64
+; CHECK: .word 0 // 0x0
+; CHECK: .word 0 // 0x0
+
+; CHECK: .section .rodata.cst8.hot,"aM", at progbits,8
+; CHECK: .LCPI2_0:
+; CHECK: .xword 0x3fe5c28f5c28f5c3 // double 0.68000000000000005
+; CHECK: .section .rodata.cst16.hot,"aM", at progbits,16
+; CHECK: .LCPI2_1:
+; CHECK: .word 442 // 0x1ba
+; CHECK: .word 100 // 0x64
+; CHECK: .word 0 // 0x0
+; CHECK: .word 0 // 0x0
+
+; CHECK: .section .rodata.cst32,"aM", at progbits,32
+; CHECK: .globl val
+
+define i32 @cold_func(double %x, <16 x i8> %a, <16 x i8> %b) !prof !16 {
+ %2 = tail call i32 (...) @func_taking_arbitrary_param(double 6.800000e-01)
+ %num = tail call i32 (...) @func_taking_arbitrary_param(double 6.8500000e-01)
+ %t1 = call <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8> %a, <16 x i8> %b, <8 x i8> <i8 0, i8 4, i8 8, i8 12, i8 -1, i8 -1, i8 -1, i8 -1>)
+ %t2 = bitcast <8 x i8> %t1 to <2 x i32>
+ %3 = extractelement <2 x i32> %t2, i32 1
+ %sum = add i32 %2, %3
+ %ret = add i32 %sum, %num
+ ret i32 %ret
+}
+
+declare <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8>, <16 x i8>, <8 x i8>)
+declare i32 @func_taking_arbitrary_param(...)
+
+define <4 x i1> @unprofiled_func(<16 x i8> %a, <16 x i8> %b) {
+ %t1 = call <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8> %a, <16 x i8> %b, <8 x i8> <i8 0, i8 4, i8 8, i8 12, i8 -1, i8 -1, i8 -1, i8 -1>)
+ %t2 = bitcast <8 x i8> %t1 to <4 x i16>
+ %t3 = zext <4 x i16> %t2 to <4 x i32>
+ %cmp = icmp ule <4 x i32> <i32 442, i32 100, i32 0, i32 0>, %t3
+ ret <4 x i1> %cmp
+}
+
+define <4 x i1> @hot_func(i32 %0, <4 x i32> %a) !prof !17 {
+ %2 = tail call i32 (...) @func_taking_arbitrary_param(double 6.800000e-01)
+ %b = icmp ule <4 x i32> %a, <i32 442, i32 100, i32 0, i32 0>
+ ret <4 x i1> %b
+}
+
+ at val = unnamed_addr constant i256 1
----------------
snehasish wrote:
Should this be used somewhere so that we can check that we correctly assign the section suffix for 32 bit consts?
https://github.com/llvm/llvm-project/pull/129781
More information about the llvm-branch-commits
mailing list