[llvm] Handle scalable store size in MemCpyOptimizer (PR #118957)
Momchil Velikov via llvm-commits
llvm-commits at lists.llvm.org
Fri Dec 6 03:52:38 PST 2024
https://github.com/momchil-velikov created https://github.com/llvm/llvm-project/pull/118957
The compiler crashes with an ICE when it tries to create a `memset` with scalable size.
>From 7e2d60348850619fb7b0c8a88e92ab103f907d34 Mon Sep 17 00:00:00 2001
From: Momchil Velikov <momchil.velikov at arm.com>
Date: Fri, 6 Dec 2024 11:08:21 +0000
Subject: [PATCH] Handle scalable store size in MemCpyOptimizer
The compiler crashes with an ICE when it tries to create a `memset` with
scalable size.
---
.../lib/Transforms/Scalar/MemCpyOptimizer.cpp | 3 +-
.../CodeGen/AArch64/memset-scalable-size.ll | 56 +++++++++++++++++++
2 files changed, 58 insertions(+), 1 deletion(-)
create mode 100644 llvm/test/CodeGen/AArch64/memset-scalable-size.ll
diff --git a/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp b/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp
index 0cba5d077da62b..fc5f6ff2b7f377 100644
--- a/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp
+++ b/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp
@@ -800,8 +800,9 @@ bool MemCpyOptPass::processStore(StoreInst *SI, BasicBlock::iterator &BBI) {
// in subsequent passes.
auto *T = V->getType();
if (T->isAggregateType()) {
- uint64_t Size = DL.getTypeStoreSize(T);
IRBuilder<> Builder(SI);
+ Value *Size =
+ Builder.CreateTypeSize(Builder.getInt64Ty(), DL.getTypeStoreSize(T));
auto *M = Builder.CreateMemSet(SI->getPointerOperand(), ByteVal, Size,
SI->getAlign());
M->copyMetadata(*SI, LLVMContext::MD_DIAssignID);
diff --git a/llvm/test/CodeGen/AArch64/memset-scalable-size.ll b/llvm/test/CodeGen/AArch64/memset-scalable-size.ll
new file mode 100644
index 00000000000000..8ea6330f235a69
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/memset-scalable-size.ll
@@ -0,0 +1,56 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -S --passes=memcpyopt < %s | FileCheck %s
+target triple = "aarch64-unknown-linux"
+
+define void @f0() {
+; CHECK-LABEL: define void @f0() {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: [[P:%.*]] = alloca { <vscale x 16 x i1>, <vscale x 16 x i1> }, align 2
+; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 4
+; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 2 [[P]], i8 0, i64 [[TMP1]], i1 false)
+; CHECK-NEXT: call void @g(ptr [[P]])
+; CHECK-NEXT: ret void
+;
+entry:
+ %p = alloca { <vscale x 16 x i1>, <vscale x 16 x i1>}, align 2
+ store { <vscale x 16 x i1>, <vscale x 16 x i1> } zeroinitializer, ptr %p, align 2
+ call void @g(ptr %p)
+ ret void
+}
+
+define void @f1() {
+; CHECK-LABEL: define void @f1() {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: [[P:%.*]] = alloca { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> }, align 16
+; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 48
+; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 16 [[P]], i8 0, i64 [[TMP1]], i1 false)
+; CHECK-NEXT: call void @g(ptr [[P]])
+; CHECK-NEXT: ret void
+;
+entry:
+ %p = alloca {<vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> }, align 16
+ store {<vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } zeroinitializer, ptr %p, align 16
+ call void @g(ptr %p)
+ ret void
+}
+
+define void @f2() {
+; CHECK-LABEL: define void @f2() {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: [[P:%.*]] = alloca { <vscale x 8 x double>, <vscale x 8 x double>, <vscale x 8 x double> }, align 16
+; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 192
+; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 16 [[P]], i8 0, i64 [[TMP1]], i1 false)
+; CHECK-NEXT: call void @g(ptr [[P]])
+; CHECK-NEXT: ret void
+;
+entry:
+ %p = alloca {<vscale x 8 x double>, <vscale x 8 x double>, <vscale x 8 x double> }, align 16
+ store {<vscale x 8 x double>, <vscale x 8 x double>, <vscale x 8 x double> } zeroinitializer, ptr %p, align 16
+ call void @g(ptr %p)
+ ret void
+}
+
+declare void @g(ptr)
More information about the llvm-commits
mailing list