[llvm] [IRBuilder] Limit created masked load/store alignment to valid 32bit values (PR #73647)

Tue Nov 28 05:59:40 PST 2023

llvmbot wrote:




@llvm/pr-subscribers-llvm-transforms

Author: David Green (davemgreen)

<details>
<summary>Changes</summary>

In rare situations, mostly when using a null base pointer, the returned Alignment can be very high. So high it overflows a 32bit value and we end up with a masked load/store with zero alignment, which the verifier does not like.

This patch limits the alignment passed to created masked.load and masked.store to be the maximum valid alignment that can fix in a 32bit integer.

---
Full diff: https://github.com/llvm/llvm-project/pull/73647.diff


2 Files Affected:

- (modified) llvm/lib/IR/IRBuilder.cpp (+8-4) 
- (modified) llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-loadstore.ll (+42) 


``````````diff

diff --git a/llvm/lib/IR/IRBuilder.cpp b/llvm/lib/IR/IRBuilder.cpp
index b09b80f95871a1c..dccf46bae1cf7f6 100644
--- a/llvm/lib/IR/IRBuilder.cpp
+++ b/llvm/lib/IR/IRBuilder.cpp
@@ -584,7 +584,8 @@ CallInst *IRBuilderBase::CreateMaskedLoad(Type *Ty, Value *Ptr, Align Alignment,
   if (!PassThru)
     PassThru = PoisonValue::get(Ty);
   Type *OverloadedTypes[] = { Ty, PtrTy };
-  Value *Ops[] = {Ptr, getInt32(Alignment.value()), Mask, PassThru};
+  unsigned Align32 = std::min<uint64_t>(Alignment.value(), 0x80000000ULL);
+  Value *Ops[] = {Ptr, getInt32(Align32), Mask, PassThru};
   return CreateMaskedIntrinsic(Intrinsic::masked_load, Ops,
                                OverloadedTypes, Name);
 }
@@ -602,7 +603,8 @@ CallInst *IRBuilderBase::CreateMaskedStore(Value *Val, Value *Ptr,
   assert(DataTy->isVectorTy() && "Val should be a vector");
   assert(Mask && "Mask should not be all-ones (null)");
   Type *OverloadedTypes[] = { DataTy, PtrTy };
-  Value *Ops[] = {Val, Ptr, getInt32(Alignment.value()), Mask};
+  unsigned Align32 = std::min<uint64_t>(Alignment.value(), 0x80000000ULL);
+  Value *Ops[] = {Val, Ptr, getInt32(Align32), Mask};
   return CreateMaskedIntrinsic(Intrinsic::masked_store, Ops, OverloadedTypes);
 }
 
@@ -643,7 +645,8 @@ CallInst *IRBuilderBase::CreateMaskedGather(Type *Ty, Value *Ptrs,
     PassThru = PoisonValue::get(Ty);
 
   Type *OverloadedTypes[] = {Ty, PtrsTy};
-  Value *Ops[] = {Ptrs, getInt32(Alignment.value()), Mask, PassThru};
+  unsigned Align32 = std::min<uint64_t>(Alignment.value(), 0x80000000ULL);
+  Value *Ops[] = {Ptrs, getInt32(Align32), Mask, PassThru};
 
   // We specify only one type when we create this intrinsic. Types of other
   // arguments are derived from this type.
@@ -668,7 +671,8 @@ CallInst *IRBuilderBase::CreateMaskedScatter(Value *Data, Value *Ptrs,
     Mask = getAllOnesMask(NumElts);
 
   Type *OverloadedTypes[] = {DataTy, PtrsTy};
-  Value *Ops[] = {Data, Ptrs, getInt32(Alignment.value()), Mask};
+  unsigned Align32 = std::min<uint64_t>(Alignment.value(), 0x80000000ULL);
+  Value *Ops[] = {Data, Ptrs, getInt32(Align32), Mask};
 
   // We specify only one type when we create this intrinsic. Types of other
   // arguments are derived from this type.
diff --git a/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-loadstore.ll b/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-loadstore.ll
index c67662f872503e4..979653f8a2f7520 100644
--- a/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-loadstore.ll
+++ b/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-loadstore.ll
@@ -99,6 +99,48 @@ define void @combine_st1_masked_casted_predicate(<vscale x 8 x i16> %vec, ptr %p
   ret void
 }
 
+define <vscale x 4 x i32> @combine_ld1_superalign(ptr %ptr) #0 {
+; CHECK-LABEL: @combine_ld1_superalign(
+; CHECK-NEXT:    store i1 true, ptr poison, align 1
+; CHECK-NEXT:    ret <vscale x 4 x i32> poison
+;
+  %1 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
+  %2 = call <vscale x 4 x i32> @llvm.aarch64.sve.ld1.nxv4i32(<vscale x 4 x i1> %1, ptr null)
+  ret <vscale x 4 x i32> %2
+}
+
+define <vscale x 4 x i32> @combine_ld1_masked_superalign(ptr %ptr) #0 {
+; CHECK-LABEL: @combine_ld1_masked_superalign(
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 16)
+; CHECK-NEXT:    [[TMP2:%.*]] = call <vscale x 4 x i32> @llvm.masked.load.nxv4i32.p0(ptr null, i32 -2147483648, <vscale x 4 x i1> [[TMP1]], <vscale x 4 x i32> zeroinitializer)
+; CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP2]]
+;
+  %1 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 16)
+  %2 = call <vscale x 4 x i32> @llvm.aarch64.sve.ld1.nxv4i32(<vscale x 4 x i1> %1, ptr null)
+  ret <vscale x 4 x i32> %2
+}
+
+define void @combine_st1_superalign(<vscale x 4 x i32> %vec, ptr %ptr) #0 {
+; CHECK-LABEL: @combine_st1_superalign(
+; CHECK-NEXT:    store <vscale x 4 x i32> poison, ptr null, align 16
+; CHECK-NEXT:    ret void
+;
+  %1 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
+  call void @llvm.aarch64.sve.st1.nxv4i32(<vscale x 4 x i32> %vec, <vscale x 4 x i1> %1, ptr null)
+  ret void
+}
+
+define void @combine_st1_masked_superalign(<vscale x 4 x i32> %vec, ptr %ptr) #0 {
+; CHECK-LABEL: @combine_st1_masked_superalign(
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 16)
+; CHECK-NEXT:    call void @llvm.masked.store.nxv4i32.p0(<vscale x 4 x i32> [[VEC:%.*]], ptr null, i32 -2147483648, <vscale x 4 x i1> [[TMP1]])
+; CHECK-NEXT:    ret void
+;
+  %1 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 16)
+  call void @llvm.aarch64.sve.st1.nxv4i32(<vscale x 4 x i32> %vec, <vscale x 4 x i1> %1, ptr null)
+  ret void
+}
+
 declare <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1>)
 declare <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1>)
 

``````````

</details>


https://github.com/llvm/llvm-project/pull/73647