[llvm] [AMDGPU] Extended vector promotion to aggregate types. (PR #143784)
Matt Arsenault via llvm-commits
llvm-commits at lists.llvm.org
Thu Jun 12 01:38:32 PDT 2025
================
@@ -0,0 +1,295 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -passes=amdgpu-promote-alloca-to-vector -amdgpu-promote-alloca-to-vector-limit=512 %s | FileCheck %s
+
+declare void @clobber_i8(i8)
+
+define void @test_v4i8(i64 %idx) {
+; CHECK-LABEL: define void @test_v4i8(
+; CHECK-SAME: i64 [[IDX:%.*]]) {
+; CHECK-NEXT: [[STACK:%.*]] = freeze <4 x i8> poison
+; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x i8> [[STACK]], i64 [[IDX]]
+; CHECK-NEXT: call void @clobber_i8(i8 [[TMP1]])
+; CHECK-NEXT: ret void
+;
+ %stack = alloca <4 x i8>, align 4, addrspace(5)
+ %ptr = getelementptr inbounds i8, ptr addrspace(5) %stack, i64 %idx
+ %val = load i8, ptr addrspace(5) %ptr, align 1
+ call void @clobber_i8(i8 %val)
+ ret void
+}
+
+define void @test_a4i8(i64 %idx) {
+; CHECK-LABEL: define void @test_a4i8(
+; CHECK-SAME: i64 [[IDX:%.*]]) {
+; CHECK-NEXT: [[STACK:%.*]] = freeze <4 x i8> poison
+; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x i8> [[STACK]], i64 [[IDX]]
+; CHECK-NEXT: call void @clobber_i8(i8 [[TMP1]])
+; CHECK-NEXT: ret void
+;
+ %stack = alloca [4 x i8], align 4, addrspace(5)
+ %ptr = getelementptr inbounds i8, ptr addrspace(5) %stack, i64 %idx
+ %val = load i8, ptr addrspace(5) %ptr, align 1
+ call void @clobber_i8(i8 %val)
+ ret void
+}
+
+define void @test_a2v4i8(i64 %idx) {
+; CHECK-LABEL: define void @test_a2v4i8(
+; CHECK-SAME: i64 [[IDX:%.*]]) {
+; CHECK-NEXT: [[STACK:%.*]] = freeze <8 x i8> poison
+; CHECK-NEXT: [[TMP1:%.*]] = extractelement <8 x i8> [[STACK]], i64 [[IDX]]
+; CHECK-NEXT: call void @clobber_i8(i8 [[TMP1]])
+; CHECK-NEXT: ret void
+;
+ %stack = alloca [2 x <4 x i8>], align 4, addrspace(5)
----------------
arsenm wrote:
I thought SROA already tried to flatten out aggregate into simple arrays. Why do we need to do this? We don't need to optimally handle all IR, just post-optimized IR
https://github.com/llvm/llvm-project/pull/143784
More information about the llvm-commits
mailing list