[llvm] 5d9c321 - Handle scalable store size in MemCpyOptimizer (#118957)

Fri Dec 6 12:48:53 PST 2024

Author: Momchil Velikov
Date: 2024-12-06T20:48:48Z
New Revision: 5d9c321e8d3b543c7da44f1b0447d4d09570ddbb

URL: https://github.com/llvm/llvm-project/commit/5d9c321e8d3b543c7da44f1b0447d4d09570ddbb
DIFF: https://github.com/llvm/llvm-project/commit/5d9c321e8d3b543c7da44f1b0447d4d09570ddbb.diff

LOG: Handle scalable store size in MemCpyOptimizer (#118957)

The compiler crashes with an ICE when it tries to create a `memset` with
scalable size.

Added: 
    

Modified: 
    llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp
    llvm/test/Transforms/MemCpyOpt/vscale-memset.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp b/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp
index 0cba5d077da62b..bb98b3d1c07259 100644

--- a/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp
+++ b/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp
@@ -787,43 +787,47 @@ bool MemCpyOptPass::processStore(StoreInst *SI, BasicBlock::iterator &BBI) {
   // Ensure that the value being stored is something that can be memset'able a
   // byte at a time like "0" or "-1" or any width, as well as things like
   // 0xA0A0A0A0 and 0.0.
-  auto *V = SI->getOperand(0);
-  if (Value *ByteVal = isBytewiseValue(V, DL)) {
-    if (Instruction *I =
-            tryMergingIntoMemset(SI, SI->getPointerOperand(), ByteVal)) {
-      BBI = I->getIterator(); // Don't invalidate iterator.
-      return true;
-    }
+  Value *V = SI->getOperand(0);
+  Value *ByteVal = isBytewiseValue(V, DL);
+  if (!ByteVal)
+    return false;
 
-    // If we have an aggregate, we try to promote it to memset regardless
-    // of opportunity for merging as it can expose optimization opportunities
-    // in subsequent passes.
-    auto *T = V->getType();
-    if (T->isAggregateType()) {
-      uint64_t Size = DL.getTypeStoreSize(T);
-      IRBuilder<> Builder(SI);
-      auto *M = Builder.CreateMemSet(SI->getPointerOperand(), ByteVal, Size,
-                                     SI->getAlign());
-      M->copyMetadata(*SI, LLVMContext::MD_DIAssignID);
+  if (Instruction *I =
+          tryMergingIntoMemset(SI, SI->getPointerOperand(), ByteVal)) {
+    BBI = I->getIterator(); // Don't invalidate iterator.
+    return true;
+  }
+
+  // If we have an aggregate, we try to promote it to memset regardless
+  // of opportunity for merging as it can expose optimization opportunities
+  // in subsequent passes.
+  auto *T = V->getType();
+  if (!T->isAggregateType())
+    return false;
 
-      LLVM_DEBUG(dbgs() << "Promoting " << *SI << " to " << *M << "\n");
+  TypeSize Size = DL.getTypeStoreSize(T);
+  if (Size.isScalable())
+    return false;
 
-      // The newly inserted memset is immediately overwritten by the original
-      // store, so we do not need to rename uses.
-      auto *StoreDef = cast<MemoryDef>(MSSA->getMemoryAccess(SI));
-      auto *NewAccess = MSSAU->createMemoryAccessBefore(M, nullptr, StoreDef);
-      MSSAU->insertDef(cast<MemoryDef>(NewAccess), /*RenameUses=*/false);
+  IRBuilder<> Builder(SI);
+  auto *M = Builder.CreateMemSet(SI->getPointerOperand(), ByteVal, Size,
+                                 SI->getAlign());
+  M->copyMetadata(*SI, LLVMContext::MD_DIAssignID);
 
-      eraseInstruction(SI);
-      NumMemSetInfer++;
+  LLVM_DEBUG(dbgs() << "Promoting " << *SI << " to " << *M << "\n");
 
-      // Make sure we do not invalidate the iterator.
-      BBI = M->getIterator();
-      return true;
-    }
-  }
+  // The newly inserted memset is immediately overwritten by the original
+  // store, so we do not need to rename uses.
+  auto *StoreDef = cast<MemoryDef>(MSSA->getMemoryAccess(SI));
+  auto *NewAccess = MSSAU->createMemoryAccessBefore(M, nullptr, StoreDef);
+  MSSAU->insertDef(cast<MemoryDef>(NewAccess), /*RenameUses=*/false);
 
-  return false;
+  eraseInstruction(SI);
+  NumMemSetInfer++;
+
+  // Make sure we do not invalidate the iterator.
+  BBI = M->getIterator();
+  return true;
 }
 
 bool MemCpyOptPass::processMemSet(MemSetInst *MSI, BasicBlock::iterator &BBI) {

diff  --git a/llvm/test/Transforms/MemCpyOpt/vscale-memset.ll b/llvm/test/Transforms/MemCpyOpt/vscale-memset.ll
index b4ab443fdfb68c..45de52065cd5c1 100644
--- a/llvm/test/Transforms/MemCpyOpt/vscale-memset.ll
+++ b/llvm/test/Transforms/MemCpyOpt/vscale-memset.ll
@@ -8,7 +8,7 @@
 define void @foo(ptr %p) {
 ; CHECK-LABEL: @foo(
 ; CHECK-NEXT:    store <vscale x 16 x i8> zeroinitializer, ptr [[P:%.*]], align 16
-; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 16 x i8>, ptr [[P:%.*]], i64 1
+; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <vscale x 16 x i8>, ptr [[P]], i64 1
 ; CHECK-NEXT:    store <vscale x 16 x i8> zeroinitializer, ptr [[TMP1]], align 16
 ; CHECK-NEXT:    ret void
 ;
@@ -18,6 +18,18 @@ define void @foo(ptr %p) {
   ret void
 }
 
+; Test the compiler does not crash on a store of a scalable aggregate type.
+define void @test_no_crash_scalable_agg(ptr %p) {
+; CHECK-LABEL: @test_no_crash_scalable_agg(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    store { <vscale x 16 x i1>, <vscale x 16 x i1> } zeroinitializer, ptr [[P:%.*]], align 2
+; CHECK-NEXT:    ret void
+;
+entry:
+  store { <vscale x 16 x i1>, <vscale x 16 x i1> } zeroinitializer, ptr %p, align 2
+  ret void
+}
+
 ; Positive test
 
 define void @memset_vscale_index_zero(ptr %p, i8 %z) {