[llvm] ceb587a - [AMDGPU] Fix crash in allowsMisalignedMemoryAccesses with i1 (#105794)

via llvm-commits llvm-commits at lists.llvm.org
Fri Aug 23 11:51:40 PDT 2024


Author: Austin Kerbow
Date: 2024-08-23T11:51:37-07:00
New Revision: ceb587a16cc2f5d61dc3299d2e54d6c17be14e4a

URL: https://github.com/llvm/llvm-project/commit/ceb587a16cc2f5d61dc3299d2e54d6c17be14e4a
DIFF: https://github.com/llvm/llvm-project/commit/ceb587a16cc2f5d61dc3299d2e54d6c17be14e4a.diff

LOG: [AMDGPU] Fix crash in allowsMisalignedMemoryAccesses with i1 (#105794)

Added: 
    llvm/test/Transforms/LoadStoreVectorizer/AMDGPU/load-i1-misaligned.ll

Modified: 
    llvm/lib/Target/AMDGPU/SIISelLowering.cpp
    llvm/test/CodeGen/AMDGPU/load-local-i1.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index ecd4451c504727..1437f3d58b5e79 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -1695,7 +1695,8 @@ bool SITargetLowering::allowsMisalignedMemoryAccessesImpl(
     if (!Subtarget->hasUnalignedDSAccessEnabled() && Alignment < Align(4))
       return false;
 
-    Align RequiredAlignment(PowerOf2Ceil(Size/8)); // Natural alignment.
+    Align RequiredAlignment(
+        PowerOf2Ceil(divideCeil(Size, 8))); // Natural alignment.
     if (Subtarget->hasLDSMisalignedBug() && Size > 32 &&
         Alignment < RequiredAlignment)
       return false;

diff  --git a/llvm/test/CodeGen/AMDGPU/load-local-i1.ll b/llvm/test/CodeGen/AMDGPU/load-local-i1.ll
index 578170941efaaa..43d102e4655b23 100644
--- a/llvm/test/CodeGen/AMDGPU/load-local-i1.ll
+++ b/llvm/test/CodeGen/AMDGPU/load-local-i1.ll
@@ -462,4 +462,17 @@ define amdgpu_kernel void @local_sextload_v64i1_to_v64i64(ptr addrspace(3) %out,
   ret void
 }
 
+; FUNC-LABEL: {{^}}local_load_i1_misaligned:
+; SICIVI: s_mov_b32 m0
+; GFX9-NOT: m0
+define amdgpu_kernel void @local_load_i1_misaligned(ptr addrspace(3) %in, ptr addrspace (3) %out) #0 {
+  %in.gep.1 = getelementptr i1, ptr addrspace(3) %in, i32 1
+  %load.1 = load <16 x i1>, ptr addrspace(3) %in.gep.1, align 4
+  %load.2 = load <8 x i1>, ptr addrspace(3) %in, align 1
+  %out.gep.1 = getelementptr i1, ptr addrspace(3) %out, i32 16
+  store <16 x i1> %load.1, ptr addrspace(3) %out
+  store <8 x i1> %load.2, ptr addrspace(3) %out.gep.1
+  ret void
+}
+
 attributes #0 = { nounwind }

diff  --git a/llvm/test/Transforms/LoadStoreVectorizer/AMDGPU/load-i1-misaligned.ll b/llvm/test/Transforms/LoadStoreVectorizer/AMDGPU/load-i1-misaligned.ll
new file mode 100644
index 00000000000000..6f3d2cb69090eb
--- /dev/null
+++ b/llvm/test/Transforms/LoadStoreVectorizer/AMDGPU/load-i1-misaligned.ll
@@ -0,0 +1,20 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -mtriple=amdgcn-amd-amdhsa --mcpu=gfx940 -passes=load-store-vectorizer -S -o - %s | FileCheck %s
+
+; Don't crash when checking for misaligned accesses with sub-byte size.
+
+define void @misaligned_access_i1(ptr addrspace(3) %in) #0 {
+; CHECK-LABEL: define void @misaligned_access_i1(
+; CHECK-SAME: ptr addrspace(3) [[IN:%.*]]) #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT:    [[IN_GEP_1:%.*]] = getelementptr i1, ptr addrspace(3) [[IN]], i32 1
+; CHECK-NEXT:    [[TMP1:%.*]] = load <16 x i1>, ptr addrspace(3) [[IN_GEP_1]], align 4
+; CHECK-NEXT:    [[TMP2:%.*]] = load <8 x i1>, ptr addrspace(3) [[IN]], align 1
+; CHECK-NEXT:    ret void
+;
+  %in.gep.1 = getelementptr i1, ptr addrspace(3) %in, i32 1
+
+  %1 = load <16 x i1>, ptr addrspace(3) %in.gep.1, align 4
+  %2 = load <8 x i1>, ptr addrspace(3) %in, align 1
+  ret void
+}
+


        


More information about the llvm-commits mailing list