[llvm] ceb587a - [AMDGPU] Fix crash in allowsMisalignedMemoryAccesses with i1 (#105794)
via llvm-commits
llvm-commits at lists.llvm.org
Fri Aug 23 11:51:40 PDT 2024
Author: Austin Kerbow
Date: 2024-08-23T11:51:37-07:00
New Revision: ceb587a16cc2f5d61dc3299d2e54d6c17be14e4a
URL: https://github.com/llvm/llvm-project/commit/ceb587a16cc2f5d61dc3299d2e54d6c17be14e4a
DIFF: https://github.com/llvm/llvm-project/commit/ceb587a16cc2f5d61dc3299d2e54d6c17be14e4a.diff
LOG: [AMDGPU] Fix crash in allowsMisalignedMemoryAccesses with i1 (#105794)
Added:
llvm/test/Transforms/LoadStoreVectorizer/AMDGPU/load-i1-misaligned.ll
Modified:
llvm/lib/Target/AMDGPU/SIISelLowering.cpp
llvm/test/CodeGen/AMDGPU/load-local-i1.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index ecd4451c504727..1437f3d58b5e79 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -1695,7 +1695,8 @@ bool SITargetLowering::allowsMisalignedMemoryAccessesImpl(
if (!Subtarget->hasUnalignedDSAccessEnabled() && Alignment < Align(4))
return false;
- Align RequiredAlignment(PowerOf2Ceil(Size/8)); // Natural alignment.
+ Align RequiredAlignment(
+ PowerOf2Ceil(divideCeil(Size, 8))); // Natural alignment.
if (Subtarget->hasLDSMisalignedBug() && Size > 32 &&
Alignment < RequiredAlignment)
return false;
diff --git a/llvm/test/CodeGen/AMDGPU/load-local-i1.ll b/llvm/test/CodeGen/AMDGPU/load-local-i1.ll
index 578170941efaaa..43d102e4655b23 100644
--- a/llvm/test/CodeGen/AMDGPU/load-local-i1.ll
+++ b/llvm/test/CodeGen/AMDGPU/load-local-i1.ll
@@ -462,4 +462,17 @@ define amdgpu_kernel void @local_sextload_v64i1_to_v64i64(ptr addrspace(3) %out,
ret void
}
+; FUNC-LABEL: {{^}}local_load_i1_misaligned:
+; SICIVI: s_mov_b32 m0
+; GFX9-NOT: m0
+define amdgpu_kernel void @local_load_i1_misaligned(ptr addrspace(3) %in, ptr addrspace (3) %out) #0 {
+ %in.gep.1 = getelementptr i1, ptr addrspace(3) %in, i32 1
+ %load.1 = load <16 x i1>, ptr addrspace(3) %in.gep.1, align 4
+ %load.2 = load <8 x i1>, ptr addrspace(3) %in, align 1
+ %out.gep.1 = getelementptr i1, ptr addrspace(3) %out, i32 16
+ store <16 x i1> %load.1, ptr addrspace(3) %out
+ store <8 x i1> %load.2, ptr addrspace(3) %out.gep.1
+ ret void
+}
+
attributes #0 = { nounwind }
diff --git a/llvm/test/Transforms/LoadStoreVectorizer/AMDGPU/load-i1-misaligned.ll b/llvm/test/Transforms/LoadStoreVectorizer/AMDGPU/load-i1-misaligned.ll
new file mode 100644
index 00000000000000..6f3d2cb69090eb
--- /dev/null
+++ b/llvm/test/Transforms/LoadStoreVectorizer/AMDGPU/load-i1-misaligned.ll
@@ -0,0 +1,20 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -mtriple=amdgcn-amd-amdhsa --mcpu=gfx940 -passes=load-store-vectorizer -S -o - %s | FileCheck %s
+
+; Don't crash when checking for misaligned accesses with sub-byte size.
+
+define void @misaligned_access_i1(ptr addrspace(3) %in) #0 {
+; CHECK-LABEL: define void @misaligned_access_i1(
+; CHECK-SAME: ptr addrspace(3) [[IN:%.*]]) #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT: [[IN_GEP_1:%.*]] = getelementptr i1, ptr addrspace(3) [[IN]], i32 1
+; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i1>, ptr addrspace(3) [[IN_GEP_1]], align 4
+; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i1>, ptr addrspace(3) [[IN]], align 1
+; CHECK-NEXT: ret void
+;
+ %in.gep.1 = getelementptr i1, ptr addrspace(3) %in, i32 1
+
+ %1 = load <16 x i1>, ptr addrspace(3) %in.gep.1, align 4
+ %2 = load <8 x i1>, ptr addrspace(3) %in, align 1
+ ret void
+}
+
More information about the llvm-commits
mailing list