[PATCH] D155051: [AMDGPU] Add sanity check that fixes bad shift operation in AMD backend
Konrad Kusiak via Phabricator via llvm-commits
llvm-commits at lists.llvm.org
Wed Aug 9 08:49:16 PDT 2023
konradkusiak97 updated this revision to Diff 548637.
konradkusiak97 added a comment.
Fixed the UB behaviour and included a testcase
Repository:
rG LLVM Github Monorepo
CHANGES SINCE LAST ACTION
https://reviews.llvm.org/D155051/new/
https://reviews.llvm.org/D155051
Files:
llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp
llvm/test/CodeGen/AMDGPU/merge-image-load.mir
llvm/test/CodeGen/AMDGPU/merge-image-sample.mir
Index: llvm/test/CodeGen/AMDGPU/merge-image-sample.mir
===================================================================
--- llvm/test/CodeGen/AMDGPU/merge-image-sample.mir
+++ llvm/test/CodeGen/AMDGPU/merge-image-sample.mir
@@ -172,6 +172,24 @@
...
---
+# GFX9-LABEL: name: image_sample_l_dmask_zero_not_merged
+# GFX9: %{{[0-9]+}}:vgpr_32 = IMAGE_SAMPLE_L_V1_V4 %5, %3, %2, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4)
+# GFX9: %{{[0-9]+}}:vreg_96 = IMAGE_SAMPLE_L_V3_V4 %5, %3, %2, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4)
+
+name: image_sample_l_dmask_zero_not_merged
+body: |
+ bb.0.entry:
+ %0:sgpr_64 = COPY $sgpr0_sgpr1
+ %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0
+ %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99
+ %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0
+ %4:vgpr_32 = COPY %2.sub3
+ %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128))
+ %6:vgpr_32 = IMAGE_SAMPLE_L_V1_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4)
+ %7:vreg_96 = IMAGE_SAMPLE_L_V3_V4 %5:vreg_128, %3:sgpr_256, %2:sgpr_128, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4)
+...
+---
+
# GFX9-LABEL: name: image_sample_l_dmask_not_disjoint_not_merged
# GFX9: %{{[0-9]+}}:vgpr_32 = IMAGE_SAMPLE_L_V1_V4 %5, %3, %2, 4, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4)
# GFX9: %{{[0-9]+}}:vreg_96 = IMAGE_SAMPLE_L_V3_V4 %5, %3, %2, 11, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4)
Index: llvm/test/CodeGen/AMDGPU/merge-image-load.mir
===================================================================
--- llvm/test/CodeGen/AMDGPU/merge-image-load.mir
+++ llvm/test/CodeGen/AMDGPU/merge-image-load.mir
@@ -172,6 +172,24 @@
...
---
+# GFX9-LABEL: name: image_load_dmask_zero_not_merged
+# GFX9: %{{[0-9]+}}:vgpr_32 = IMAGE_LOAD_V1_V4 %5, %3, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4)
+# GFX9: %{{[0-9]+}}:vreg_96 = IMAGE_LOAD_V3_V4 %5, %3, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4)
+
+name: image_load_dmask_zero_not_merged
+body: |
+ bb.0.entry:
+ %0:sgpr_64 = COPY $sgpr0_sgpr1
+ %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 36, 0
+ %2:sgpr_128 = COPY $sgpr96_sgpr97_sgpr98_sgpr99
+ %3:sgpr_256 = S_LOAD_DWORDX8_IMM %1, 208, 0
+ %4:vgpr_32 = COPY %2.sub3
+ %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %2:sgpr_128, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128))
+ %6:vgpr_32 = IMAGE_LOAD_V1_V4 %5:vreg_128, %3:sgpr_256, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4)
+ %7:vreg_96 = IMAGE_LOAD_V3_V4 %5:vreg_128, %3:sgpr_256, 0, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4)
+...
+---
+
# GFX9-LABEL: name: image_load_dmask_not_disjoint_not_merged
# GFX9: %{{[0-9]+}}:vgpr_32 = IMAGE_LOAD_V1_V4 %5, %3, 4, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4)
# GFX9: %{{[0-9]+}}:vreg_96 = IMAGE_LOAD_V3_V4 %5, %3, 11, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 4)
Index: llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp
===================================================================
--- llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp
+++ llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp
@@ -871,6 +871,9 @@
unsigned MaxMask = std::max(CI.DMask, Paired.DMask);
unsigned MinMask = std::min(CI.DMask, Paired.DMask);
+ if (!MaxMask)
+ return false;
+
unsigned AllowedBitsForMin = llvm::countr_zero(MaxMask);
if ((1u << AllowedBitsForMin) <= MinMask)
return false;
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D155051.548637.patch
Type: text/x-patch
Size: 4003 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20230809/154da074/attachment.bin>
More information about the llvm-commits
mailing list