[llvm] [SandboxVec] Optimization remarks (PR #129582)

via llvm-commits llvm-commits at lists.llvm.org
Mon Aug 25 08:17:35 PDT 2025


https://github.com/vporpo updated https://github.com/llvm/llvm-project/pull/129582

>From ca19fca457da9cfcec807c48701071d6e2aac2bc Mon Sep 17 00:00:00 2001
From: Vasileios Porpodas <vporpodas at google.com>
Date: Wed, 12 Mar 2025 16:47:54 -0700
Subject: [PATCH 1/2] [SandboxVec][StructInitVec] Initial pass implementation

---
 .../SandboxVectorizer/Passes/StructInitVec.h  |  50 +++
 .../Vectorize/SandboxVectorizer/VecUtils.h    |  38 ++-
 llvm/lib/Transforms/Vectorize/CMakeLists.txt  |   1 +
 .../SandboxVectorizer/Passes/PassRegistry.def |   1 +
 .../Passes/StructInitVec.cpp                  | 143 +++++++++
 .../SandboxVectorizerPassBuilder.cpp          |   1 +
 .../SandboxVectorizer/struct_init_vec.ll      | 289 ++++++++++++++++++
 .../SandboxVectorizer/VecUtilsTest.cpp        |  75 +++++
 8 files changed, 596 insertions(+), 2 deletions(-)
 create mode 100644 llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/Passes/StructInitVec.h
 create mode 100644 llvm/lib/Transforms/Vectorize/SandboxVectorizer/Passes/StructInitVec.cpp
 create mode 100644 llvm/test/Transforms/SandboxVectorizer/struct_init_vec.ll

diff --git a/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/Passes/StructInitVec.h b/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/Passes/StructInitVec.h
new file mode 100644
index 0000000000000..3527807867846
--- /dev/null
+++ b/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/Passes/StructInitVec.h
@@ -0,0 +1,50 @@
+//===- StructInitVec.h ------------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// A pass that vectorizes struct initializations.
+// Generic bottom-up vectorization cannot handle these because the
+// initialization instructions can be of different types.
+//
+
+#ifndef LLVM_TRANSFORMS_VECTORIZE_SANDBOXVECTORIZER_PASSES_STRUCTINITVEC_H
+#define LLVM_TRANSFORMS_VECTORIZE_SANDBOXVECTORIZER_PASSES_STRUCTINITVEC_H
+
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/SandboxIR/Pass.h"
+
+namespace llvm {
+
+class DataLayout;
+
+namespace sandboxir {
+
+class Value;
+class Instruction;
+class Scheduler;
+class Type;
+
+class StructInitVec final : public RegionPass {
+  const DataLayout *DL = nullptr;
+  /// Checks legality of vectorization and \returns the vector type on success,
+  /// nullopt otherwise.
+  std::optional<Type *> canVectorize(ArrayRef<Instruction *> Bndl,
+                                     Scheduler &Sched);
+
+  void tryEraseDeadInstrs(ArrayRef<Instruction *> Stores,
+                          ArrayRef<Instruction *> Loads);
+
+public:
+  StructInitVec() : RegionPass("struct-init-vec") {}
+  bool runOnRegion(Region &Rgn, const Analyses &A) final;
+};
+
+} // namespace sandboxir
+
+} // namespace llvm
+
+#endif // LLVM_TRANSFORMS_VECTORIZE_SANDBOXVECTORIZER_PASSES_STRUCTINITVEC_H
diff --git a/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/VecUtils.h b/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/VecUtils.h
index d32bfbaf7a4c8..3d57a2f574bb8 100644
--- a/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/VecUtils.h
+++ b/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/VecUtils.h
@@ -67,8 +67,8 @@ class VecUtils {
     return *Diff == ElmBytes;
   }
 
-  template <typename LoadOrStoreT>
-  static bool areConsecutive(ArrayRef<Value *> &Bndl, ScalarEvolution &SE,
+  template <typename LoadOrStoreT, typename ValT>
+  static bool areConsecutive(ArrayRef<ValT *> Bndl, ScalarEvolution &SE,
                              const DataLayout &DL) {
     static_assert(std::is_same<LoadOrStoreT, LoadInst>::value ||
                       std::is_same<LoadOrStoreT, StoreInst>::value,
@@ -85,6 +85,11 @@ class VecUtils {
     }
     return true;
   }
+  template <typename LoadOrStoreT>
+  static bool areConsecutive(ArrayRef<Value *> Bndl, ScalarEvolution &SE,
+                             const DataLayout &DL) {
+    return areConsecutive<LoadOrStoreT, Value>(Bndl, SE, DL);
+  }
 
   /// \Returns the number of vector lanes of \p Ty or 1 if not a vector.
   /// NOTE: It asserts that \p Ty is a fixed vector type.
@@ -119,6 +124,35 @@ class VecUtils {
     }
     return FixedVectorType::get(ElemTy, NumElts);
   }
+  /// \Returns the combined vector type for \p Bndl, even when the element types
+  /// differ. For example: i8,i8,i16 will return <4 x i8>. \Returns null if
+  /// types are of mixed float/integer types.
+  static Type *getCombinedVectorTypeFor(ArrayRef<Instruction *> Bndl,
+                                        const DataLayout &DL) {
+    assert(!Bndl.empty() && "Expected non-empty Bndl!");
+    unsigned TotalBits = 0;
+    unsigned MinElmBits = std::numeric_limits<unsigned>::max();
+    Type *MinElmTy = nullptr;
+    bool LastIsFloat = false;
+    for (auto [Idx, V] : enumerate(Bndl)) {
+      Type *ElmTy = getElementType(Utils::getExpectedType(V));
+
+      // Reject mixed integer/float types.
+      bool IsFloat = ElmTy->isFloatingPointTy();
+      if (Idx != 0 && IsFloat != LastIsFloat)
+        return nullptr;
+      LastIsFloat = IsFloat;
+
+      unsigned ElmBits = Utils::getNumBits(ElmTy, DL);
+      TotalBits += ElmBits * VecUtils::getNumLanes(V);
+      if (ElmBits < MinElmBits) {
+        MinElmBits = ElmBits;
+        MinElmTy = ElmTy;
+      }
+    }
+    unsigned NumElms = TotalBits / MinElmBits;
+    return FixedVectorType::get(MinElmTy, NumElms);
+  }
   /// \Returns the instruction in \p Instrs that is lowest in the BB. Expects
   /// that all instructions are in the same BB.
   static Instruction *getLowest(ArrayRef<Instruction *> Instrs) {
diff --git a/llvm/lib/Transforms/Vectorize/CMakeLists.txt b/llvm/lib/Transforms/Vectorize/CMakeLists.txt
index 96670fe3ea195..c16056aebdf3f 100644
--- a/llvm/lib/Transforms/Vectorize/CMakeLists.txt
+++ b/llvm/lib/Transforms/Vectorize/CMakeLists.txt
@@ -13,6 +13,7 @@ add_llvm_component_library(LLVMVectorize
   SandboxVectorizer/Passes/RegionsFromBBs.cpp
   SandboxVectorizer/Passes/RegionsFromMetadata.cpp
   SandboxVectorizer/Passes/SeedCollection.cpp
+  SandboxVectorizer/Passes/StructInitVec.cpp
   SandboxVectorizer/Passes/TransactionAcceptOrRevert.cpp
   SandboxVectorizer/Passes/TransactionSave.cpp
   SandboxVectorizer/SandboxVectorizer.cpp
diff --git a/llvm/lib/Transforms/Vectorize/SandboxVectorizer/Passes/PassRegistry.def b/llvm/lib/Transforms/Vectorize/SandboxVectorizer/Passes/PassRegistry.def
index 02b973926854d..657778cfd2b62 100644
--- a/llvm/lib/Transforms/Vectorize/SandboxVectorizer/Passes/PassRegistry.def
+++ b/llvm/lib/Transforms/Vectorize/SandboxVectorizer/Passes/PassRegistry.def
@@ -26,6 +26,7 @@ REGION_PASS("tr-accept", ::llvm::sandboxir::TransactionAlwaysAccept)
 REGION_PASS("tr-revert", ::llvm::sandboxir::TransactionAlwaysRevert)
 REGION_PASS("tr-accept-or-revert", ::llvm::sandboxir::TransactionAcceptOrRevert)
 REGION_PASS("bottom-up-vec", ::llvm::sandboxir::BottomUpVec)
+REGION_PASS("struct-init-vec", ::llvm::sandboxir::StructInitVec)
 
 #undef REGION_PASS
 
diff --git a/llvm/lib/Transforms/Vectorize/SandboxVectorizer/Passes/StructInitVec.cpp b/llvm/lib/Transforms/Vectorize/SandboxVectorizer/Passes/StructInitVec.cpp
new file mode 100644
index 0000000000000..8c7eb85148d56
--- /dev/null
+++ b/llvm/lib/Transforms/Vectorize/SandboxVectorizer/Passes/StructInitVec.cpp
@@ -0,0 +1,143 @@
+//===- StructInitVec.cpp - Vectorizer pass for struct initializations -----===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Vectorize/SandboxVectorizer/Passes/StructInitVec.h"
+#include "llvm/SandboxIR/Module.h"
+#include "llvm/SandboxIR/Region.h"
+#include "llvm/Transforms/Vectorize/SandboxVectorizer/Legality.h"
+#include "llvm/Transforms/Vectorize/SandboxVectorizer/Scheduler.h"
+#include "llvm/Transforms/Vectorize/SandboxVectorizer/VecUtils.h"
+
+namespace llvm {
+
+namespace sandboxir {
+
+std::optional<Type *> StructInitVec::canVectorize(ArrayRef<Instruction *> Bndl,
+                                                  Scheduler &Sched) {
+  // Check if in the same BB.
+  auto *BB = cast<Instruction>(Bndl[0])->getParent();
+  if (any_of(drop_begin(Bndl),
+             [BB](auto *V) { return cast<Instruction>(V)->getParent() != BB; }))
+    return std::nullopt;
+
+  // Check if instructions repeat.
+  SmallPtrSet<Value *, 8> Unique(Bndl.begin(), Bndl.end());
+  if (Unique.size() != Bndl.size())
+    return std::nullopt;
+
+  // Don't mix integer with floating point.
+  bool IsFloat = false;
+  bool IsInteger = false;
+  for ([[maybe_unused]] auto *I : Bndl) {
+    if (Utils::getExpectedType(Bndl[0])->isFloatingPointTy())
+      IsFloat = true;
+    else
+      IsInteger = true;
+  }
+  if (IsFloat && IsInteger)
+    return std::nullopt;
+
+  Type *VecTy = VecUtils::getCombinedVectorTypeFor(Bndl, *DL);
+  if (VecTy == nullptr)
+    return std::nullopt;
+
+  // Check scheduling.
+  if (!Sched.trySchedule(Bndl))
+    return std::nullopt;
+
+  return VecTy;
+}
+
+void StructInitVec::tryEraseDeadInstrs(ArrayRef<Instruction *> Stores,
+                                       ArrayRef<Instruction *> Loads) {
+  SmallPtrSet<Instruction *, 8> DeadCandidates;
+  for (auto *SI : Stores) {
+    if (auto *PtrI =
+            dyn_cast<Instruction>(cast<StoreInst>(SI)->getPointerOperand()))
+      DeadCandidates.insert(PtrI);
+    SI->eraseFromParent();
+  }
+  for (auto *LI : Loads) {
+    if (auto *PtrI =
+            dyn_cast<Instruction>(cast<LoadInst>(LI)->getPointerOperand()))
+      DeadCandidates.insert(PtrI);
+    cast<LoadInst>(LI)->eraseFromParent();
+  }
+  for (auto *PtrI : DeadCandidates)
+    if (!PtrI->hasNUsesOrMore(1))
+      PtrI->eraseFromParent();
+}
+
+bool StructInitVec::runOnRegion(Region &Rgn, const Analyses &A) {
+  SmallVector<Instruction *, 8> Bndl(Rgn.getAux().begin(), Rgn.getAux().end());
+  assert(Bndl.size() >= 2 && "Bad slice!");
+  Function &F = *Bndl[0]->getParent()->getParent();
+  DL = &F.getParent()->getDataLayout();
+  auto &Ctx = F.getContext();
+  Scheduler Sched(A.getAA(), Ctx);
+  if (!VecUtils::areConsecutive<StoreInst, Instruction>(
+          Bndl, A.getScalarEvolution(), *DL))
+    return false;
+  if (!canVectorize(Bndl, Sched))
+    return false;
+
+  SmallVector<Value *, 4> Operands;
+  Operands.reserve(Bndl.size());
+  for (auto *I : Bndl) {
+    auto *Op = cast<StoreInst>(I)->getValueOperand();
+    Operands.push_back(Op);
+  }
+  BasicBlock *BB = Bndl[0]->getParent();
+  // TODO: For now we only support load operands.
+  // TODO: For now we don't cross BBs.
+  if (!all_of(Operands, [BB](Value *V) {
+        auto *LI = dyn_cast<LoadInst>(V);
+        if (LI == nullptr)
+          return false;
+        if (LI->getParent() != BB)
+          return false;
+        if (LI->hasNUsesOrMore(2))
+          return false;
+        return true;
+      }))
+    return false;
+  // TODO: Try to avoid the extra copy to an instruction vector.
+  SmallVector<Instruction *, 8> Loads;
+  Loads.reserve(Operands.size());
+  for (Value *Op : Operands)
+    Loads.push_back(cast<Instruction>(Op));
+
+  bool Consecutive = VecUtils::areConsecutive<LoadInst, Instruction>(
+      Loads, A.getScalarEvolution(), *DL);
+  if (!Consecutive)
+    return false;
+  if (!canVectorize(Loads, Sched))
+    return false;
+
+  // Generate vector store and vector load
+  Type *Ty = VecUtils::getCombinedVectorTypeFor(Bndl, *DL);
+  Value *LdPtr = cast<LoadInst>(Loads[0])->getPointerOperand();
+  // TODO: Compute alignment.
+  Align LdAlign(1);
+  auto LdWhereIt = std::next(VecUtils::getLowest(Loads)->getIterator());
+  auto *VecLd =
+      LoadInst::create(Ty, LdPtr, LdAlign, LdWhereIt, Ctx, "VecIinitL");
+
+  Value *StPtr = cast<StoreInst>(Bndl[0])->getPointerOperand();
+  // TODO: Compute alignment.
+  Align StAlign(1);
+  auto StWhereIt = std::next(VecUtils::getLowest(Bndl)->getIterator());
+  StoreInst::create(VecLd, StPtr, StAlign, StWhereIt, Ctx);
+
+  tryEraseDeadInstrs(Bndl, Loads);
+  return true;
+}
+
+} // namespace sandboxir
+
+} // namespace llvm
diff --git a/llvm/lib/Transforms/Vectorize/SandboxVectorizer/SandboxVectorizerPassBuilder.cpp b/llvm/lib/Transforms/Vectorize/SandboxVectorizer/SandboxVectorizerPassBuilder.cpp
index 8432b066f966c..e168ddab9baba 100644
--- a/llvm/lib/Transforms/Vectorize/SandboxVectorizer/SandboxVectorizerPassBuilder.cpp
+++ b/llvm/lib/Transforms/Vectorize/SandboxVectorizer/SandboxVectorizerPassBuilder.cpp
@@ -8,6 +8,7 @@
 #include "llvm/Transforms/Vectorize/SandboxVectorizer/Passes/RegionsFromBBs.h"
 #include "llvm/Transforms/Vectorize/SandboxVectorizer/Passes/RegionsFromMetadata.h"
 #include "llvm/Transforms/Vectorize/SandboxVectorizer/Passes/SeedCollection.h"
+#include "llvm/Transforms/Vectorize/SandboxVectorizer/Passes/StructInitVec.h"
 #include "llvm/Transforms/Vectorize/SandboxVectorizer/Passes/TransactionAcceptOrRevert.h"
 #include "llvm/Transforms/Vectorize/SandboxVectorizer/Passes/TransactionAlwaysAccept.h"
 #include "llvm/Transforms/Vectorize/SandboxVectorizer/Passes/TransactionAlwaysRevert.h"
diff --git a/llvm/test/Transforms/SandboxVectorizer/struct_init_vec.ll b/llvm/test/Transforms/SandboxVectorizer/struct_init_vec.ll
new file mode 100644
index 0000000000000..70b02a6993fa6
--- /dev/null
+++ b/llvm/test/Transforms/SandboxVectorizer/struct_init_vec.ll
@@ -0,0 +1,289 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -passes=sandbox-vectorizer -sbvec-vec-reg-bits=1024 -sbvec-allow-non-pow2 -sbvec-passes="seed-collection<(enable-diff-types)struct-init-vec>" %s -S | FileCheck %s
+
+define void @struct_init_basic(ptr %ptr) {
+; CHECK-LABEL: define void @struct_init_basic(
+; CHECK-SAME: ptr [[PTR:%.*]]) {
+; CHECK-NEXT:    [[PTR0:%.*]] = getelementptr i8, ptr [[PTR]], i32 0
+; CHECK-NEXT:    [[VECIINITL:%.*]] = load <4 x i8>, ptr [[PTR0]], align 1, !sandboxvec [[META0:![0-9]+]]
+; CHECK-NEXT:    store <4 x i8> [[VECIINITL]], ptr [[PTR0]], align 1, !sandboxvec [[META0]]
+; CHECK-NEXT:    ret void
+;
+  %ptr0 = getelementptr i8, ptr %ptr, i32 0
+  %ptr1 = getelementptr i8, ptr %ptr, i32 1
+  %ptr2 = getelementptr i8, ptr %ptr, i32 2
+  %ld0 = load i8, ptr %ptr0
+  %ld1 = load i8, ptr %ptr1
+  %ld2 = load i16, ptr %ptr2
+  store i8 %ld0, ptr %ptr0
+  store i8 %ld1, ptr %ptr1
+  store i16 %ld2, ptr %ptr2
+  ret void
+}
+
+define void @struct_init_non_pow2(ptr %ptr) {
+; CHECK-LABEL: define void @struct_init_non_pow2(
+; CHECK-SAME: ptr [[PTR:%.*]]) {
+; CHECK-NEXT:    [[PTR0:%.*]] = getelementptr i8, ptr [[PTR]], i32 0
+; CHECK-NEXT:    [[VECIINITL:%.*]] = load <3 x i8>, ptr [[PTR0]], align 1, !sandboxvec [[META1:![0-9]+]]
+; CHECK-NEXT:    store <3 x i8> [[VECIINITL]], ptr [[PTR0]], align 1, !sandboxvec [[META1]]
+; CHECK-NEXT:    ret void
+;
+  %ptr0 = getelementptr i8, ptr %ptr, i32 0
+  %ptr1 = getelementptr i8, ptr %ptr, i32 1
+  %ld0 = load i8, ptr %ptr0
+  %ld1 = load i16, ptr %ptr1
+  store i8 %ld0, ptr %ptr0
+  store i16 %ld1, ptr %ptr1
+  ret void
+}
+
+define void @struct_init_vectorize_vectors(ptr %ptr) {
+; CHECK-LABEL: define void @struct_init_vectorize_vectors(
+; CHECK-SAME: ptr [[PTR:%.*]]) {
+; CHECK-NEXT:    [[PTR0:%.*]] = getelementptr i8, ptr [[PTR]], i32 0
+; CHECK-NEXT:    [[VECIINITL:%.*]] = load <4 x i8>, ptr [[PTR0]], align 1, !sandboxvec [[META2:![0-9]+]]
+; CHECK-NEXT:    store <4 x i8> [[VECIINITL]], ptr [[PTR0]], align 1, !sandboxvec [[META2]]
+; CHECK-NEXT:    ret void
+;
+  %ptr0 = getelementptr i8, ptr %ptr, i32 0
+  %ptr1 = getelementptr i8, ptr %ptr, i32 2
+  %ld0 = load <2 x i8>, ptr %ptr0
+  %ld1 = load <2 x i8>, ptr %ptr1
+  store <2 x i8> %ld0, ptr %ptr0
+  store <2 x i8> %ld1, ptr %ptr1
+  ret void
+}
+
+define void @struct_init_vectorize_vectors_diff_types(ptr %ptr) {
+; CHECK-LABEL: define void @struct_init_vectorize_vectors_diff_types(
+; CHECK-SAME: ptr [[PTR:%.*]]) {
+; CHECK-NEXT:    [[PTR0:%.*]] = getelementptr i8, ptr [[PTR]], i32 0
+; CHECK-NEXT:    [[VECIINITL:%.*]] = load <8 x i8>, ptr [[PTR0]], align 1, !sandboxvec [[META3:![0-9]+]]
+; CHECK-NEXT:    store <8 x i8> [[VECIINITL]], ptr [[PTR0]], align 1, !sandboxvec [[META3]]
+; CHECK-NEXT:    ret void
+;
+  %ptr0 = getelementptr i8, ptr %ptr, i32 0
+  %ptr1 = getelementptr i8, ptr %ptr, i32 2
+  %ptr2 = getelementptr i8, ptr %ptr, i32 4
+  %ld0 = load i16, ptr %ptr0
+  %ld1 = load <2 x i8>, ptr %ptr1
+  %ld2 = load <2 x i16>, ptr %ptr2
+  store i16 %ld0, ptr %ptr0
+  store <2 x i8> %ld1, ptr %ptr1
+  store <2 x i16> %ld2, ptr %ptr2
+  ret void
+}
+
+; Don't vectorize if there is a gap.
+define void @struct_init_gap(ptr %ptr) {
+; CHECK-LABEL: define void @struct_init_gap(
+; CHECK-SAME: ptr [[PTR:%.*]]) {
+; CHECK-NEXT:    [[PTR0:%.*]] = getelementptr i8, ptr [[PTR]], i32 0
+; CHECK-NEXT:    [[PTR1:%.*]] = getelementptr i8, ptr [[PTR]], i32 2
+; CHECK-NEXT:    [[LD0:%.*]] = load i8, ptr [[PTR0]], align 1
+; CHECK-NEXT:    [[LD1:%.*]] = load i16, ptr [[PTR1]], align 2
+; CHECK-NEXT:    store i8 [[LD0]], ptr [[PTR0]], align 1, !sandboxvec [[META4:![0-9]+]]
+; CHECK-NEXT:    store i16 [[LD1]], ptr [[PTR1]], align 2, !sandboxvec [[META4]]
+; CHECK-NEXT:    ret void
+;
+  %ptr0 = getelementptr i8, ptr %ptr, i32 0
+  %ptr1 = getelementptr i8, ptr %ptr, i32 2
+  %ld0 = load i8, ptr %ptr0
+  %ld1 = load i16, ptr %ptr1
+  store i8 %ld0, ptr %ptr0
+  store i16 %ld1, ptr %ptr1
+  ret void
+}
+
+define void @struct_init_loads_not_consecutive(ptr %ptr) {
+; CHECK-LABEL: define void @struct_init_loads_not_consecutive(
+; CHECK-SAME: ptr [[PTR:%.*]]) {
+; CHECK-NEXT:    [[PTR0:%.*]] = getelementptr i8, ptr [[PTR]], i32 0
+; CHECK-NEXT:    [[PTR1:%.*]] = getelementptr i8, ptr [[PTR]], i32 2
+; CHECK-NEXT:    [[LD0:%.*]] = load i8, ptr [[PTR1]], align 1
+; CHECK-NEXT:    [[LD1:%.*]] = load i16, ptr [[PTR0]], align 2
+; CHECK-NEXT:    store i8 [[LD0]], ptr [[PTR0]], align 1, !sandboxvec [[META5:![0-9]+]]
+; CHECK-NEXT:    store i16 [[LD1]], ptr [[PTR1]], align 2, !sandboxvec [[META5]]
+; CHECK-NEXT:    ret void
+;
+  %ptr0 = getelementptr i8, ptr %ptr, i32 0
+  %ptr1 = getelementptr i8, ptr %ptr, i32 2
+  %ld0 = load i8, ptr %ptr1
+  %ld1 = load i16, ptr %ptr0
+  store i8 %ld0, ptr %ptr0
+  store i16 %ld1, ptr %ptr1
+  ret void
+}
+
+; Vectorize same types, even if bottom-up-vec could do so too.
+define void @struct_init_same_types(ptr %ptr) {
+; CHECK-LABEL: define void @struct_init_same_types(
+; CHECK-SAME: ptr [[PTR:%.*]]) {
+; CHECK-NEXT:    [[PTR0:%.*]] = getelementptr i8, ptr [[PTR]], i32 0
+; CHECK-NEXT:    [[VECIINITL:%.*]] = load <2 x i8>, ptr [[PTR0]], align 1, !sandboxvec [[META6:![0-9]+]]
+; CHECK-NEXT:    store <2 x i8> [[VECIINITL]], ptr [[PTR0]], align 1, !sandboxvec [[META6]]
+; CHECK-NEXT:    ret void
+;
+  %ptr0 = getelementptr i8, ptr %ptr, i32 0
+  %ptr1 = getelementptr i8, ptr %ptr, i32 1
+  %ld0 = load i8, ptr %ptr0
+  %ld1 = load i8, ptr %ptr1
+  store i8 %ld0, ptr %ptr0
+  store i8 %ld1, ptr %ptr1
+  ret void
+}
+
+; Don't vectorize mixed integers/floats.
+define void @struct_init_mixed_int_float(ptr %ptr) {
+; CHECK-LABEL: define void @struct_init_mixed_int_float(
+; CHECK-SAME: ptr [[PTR:%.*]]) {
+; CHECK-NEXT:    [[PTR0:%.*]] = getelementptr i32, ptr [[PTR]], i32 0
+; CHECK-NEXT:    [[PTR1:%.*]] = getelementptr i32, ptr [[PTR]], i32 1
+; CHECK-NEXT:    [[LD0:%.*]] = load i32, ptr [[PTR0]], align 4
+; CHECK-NEXT:    [[LD1:%.*]] = load float, ptr [[PTR1]], align 4
+; CHECK-NEXT:    store i32 [[LD0]], ptr [[PTR0]], align 4, !sandboxvec [[META7:![0-9]+]]
+; CHECK-NEXT:    store float [[LD1]], ptr [[PTR1]], align 4, !sandboxvec [[META7]]
+; CHECK-NEXT:    ret void
+;
+  %ptr0 = getelementptr i32, ptr %ptr, i32 0
+  %ptr1 = getelementptr i32, ptr %ptr, i32 1
+  %ld0 = load i32, ptr %ptr0
+  %ld1 = load float, ptr %ptr1
+  store i32 %ld0, ptr %ptr0
+  store float %ld1, ptr %ptr1
+  ret void
+}
+
+define void @struct_init_mixed_int_float_vectors(ptr %ptr) {
+; CHECK-LABEL: define void @struct_init_mixed_int_float_vectors(
+; CHECK-SAME: ptr [[PTR:%.*]]) {
+; CHECK-NEXT:    [[PTR0:%.*]] = getelementptr i32, ptr [[PTR]], i32 0
+; CHECK-NEXT:    [[PTR1:%.*]] = getelementptr i32, ptr [[PTR]], i32 1
+; CHECK-NEXT:    [[LD0:%.*]] = load i32, ptr [[PTR0]], align 4
+; CHECK-NEXT:    [[LD1:%.*]] = load <2 x float>, ptr [[PTR1]], align 8
+; CHECK-NEXT:    store i32 [[LD0]], ptr [[PTR0]], align 4, !sandboxvec [[META8:![0-9]+]]
+; CHECK-NEXT:    store <2 x float> [[LD1]], ptr [[PTR1]], align 8, !sandboxvec [[META8]]
+; CHECK-NEXT:    ret void
+;
+  %ptr0 = getelementptr i32, ptr %ptr, i32 0
+  %ptr1 = getelementptr i32, ptr %ptr, i32 1
+  %ld0 = load i32, ptr %ptr0
+  %ld1 = load <2 x float>, ptr %ptr1
+  store i32 %ld0, ptr %ptr0
+  store <2 x float> %ld1, ptr %ptr1
+  ret void
+}
+
+; Don't cross BBs (for now).
+define ptr @struct_init_dont_cross_bbs(ptr %ptr) {
+; CHECK-LABEL: define ptr @struct_init_dont_cross_bbs(
+; CHECK-SAME: ptr [[PTR:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[PTR0:%.*]] = getelementptr i8, ptr [[PTR]], i32 0
+; CHECK-NEXT:    [[PTR1:%.*]] = getelementptr i8, ptr [[PTR]], i32 1
+; CHECK-NEXT:    [[LD0:%.*]] = load i8, ptr [[PTR0]], align 1
+; CHECK-NEXT:    [[LD1:%.*]] = load i8, ptr [[PTR1]], align 1
+; CHECK-NEXT:    br label %[[BB:.*]]
+; CHECK:       [[BB]]:
+; CHECK-NEXT:    store i8 [[LD0]], ptr [[PTR0]], align 1, !sandboxvec [[META9:![0-9]+]]
+; CHECK-NEXT:    store i8 [[LD1]], ptr [[PTR1]], align 1, !sandboxvec [[META9]]
+; CHECK-NEXT:    ret ptr [[PTR1]]
+;
+entry:
+  %ptr0 = getelementptr i8, ptr %ptr, i32 0
+  %ptr1 = getelementptr i8, ptr %ptr, i32 1
+  %ld0 = load i8, ptr %ptr0
+  %ld1 = load i8, ptr %ptr1
+  br label %bb
+
+bb:
+  store i8 %ld0, ptr %ptr0
+  store i8 %ld1, ptr %ptr1
+  ret ptr %ptr1
+}
+
+; Check that all dead GEPs are removed.
+define void @struct_init_cleanup_geps(ptr %ptrA, ptr %ptrB) {
+; CHECK-LABEL: define void @struct_init_cleanup_geps(
+; CHECK-SAME: ptr [[PTRA:%.*]], ptr [[PTRB:%.*]]) {
+; CHECK-NEXT:    [[PTRA0:%.*]] = getelementptr i8, ptr [[PTRA]], i32 0
+; CHECK-NEXT:    [[PTRB0:%.*]] = getelementptr i8, ptr [[PTRB]], i32 0
+; CHECK-NEXT:    [[VECIINITL:%.*]] = load <2 x i8>, ptr [[PTRA0]], align 1, !sandboxvec [[META10:![0-9]+]]
+; CHECK-NEXT:    store <2 x i8> [[VECIINITL]], ptr [[PTRB0]], align 1, !sandboxvec [[META10]]
+; CHECK-NEXT:    ret void
+;
+  %ptrA0 = getelementptr i8, ptr %ptrA, i32 0
+  %ptrA1 = getelementptr i8, ptr %ptrA, i32 1
+  %ptrB0 = getelementptr i8, ptr %ptrB, i32 0
+  %ptrB1 = getelementptr i8, ptr %ptrB, i32 1
+  %ld0 = load i8, ptr %ptrA0
+  %ld1 = load i8, ptr %ptrA1
+  store i8 %ld0, ptr %ptrB0
+  store i8 %ld1, ptr %ptrB1
+  ret void
+}
+
+; Check that we don't try to erase GEPs with other users.
+define ptr @struct_init_cleanup_gep_with_external_use(ptr %ptr) {
+; CHECK-LABEL: define ptr @struct_init_cleanup_gep_with_external_use(
+; CHECK-SAME: ptr [[PTR:%.*]]) {
+; CHECK-NEXT:    [[PTR0:%.*]] = getelementptr i8, ptr [[PTR]], i32 0
+; CHECK-NEXT:    [[PTR1:%.*]] = getelementptr i8, ptr [[PTR]], i32 1
+; CHECK-NEXT:    [[VECIINITL:%.*]] = load <2 x i8>, ptr [[PTR0]], align 1, !sandboxvec [[META11:![0-9]+]]
+; CHECK-NEXT:    store <2 x i8> [[VECIINITL]], ptr [[PTR0]], align 1, !sandboxvec [[META11]]
+; CHECK-NEXT:    ret ptr [[PTR1]]
+;
+  %ptr0 = getelementptr i8, ptr %ptr, i32 0
+  %ptr1 = getelementptr i8, ptr %ptr, i32 1
+  %ld0 = load i8, ptr %ptr0
+  %ld1 = load i8, ptr %ptr1
+  store i8 %ld0, ptr %ptr0
+  store i8 %ld1, ptr %ptr1
+  ret ptr %ptr1
+}
+
+; Check that we schedule both loads and stores
+define void @struct_init_schedule_stores_and_loads(ptr noalias %ptrA, ptr noalias %ptrB) {
+; CHECK-LABEL: define void @struct_init_schedule_stores_and_loads(
+; CHECK-SAME: ptr noalias [[PTRA:%.*]], ptr noalias [[PTRB:%.*]]) {
+; CHECK-NEXT:    [[PTRA0:%.*]] = getelementptr i8, ptr [[PTRA]], i64 0
+; CHECK-NEXT:    [[PTRB0:%.*]] = getelementptr i8, ptr [[PTRB]], i64 0
+; CHECK-NEXT:    [[PTRB1:%.*]] = getelementptr i8, ptr [[PTRB]], i64 1
+; CHECK-NEXT:    [[OTHER:%.*]] = load i8, ptr [[PTRB1]], align 1
+; CHECK-NEXT:    [[VECIINITL:%.*]] = load <2 x i8>, ptr [[PTRA0]], align 1, !sandboxvec [[META12:![0-9]+]]
+; CHECK-NEXT:    store i8 0, ptr [[PTRA0]], align 1
+; CHECK-NEXT:    store <2 x i8> [[VECIINITL]], ptr [[PTRB0]], align 1, !sandboxvec [[META12]]
+; CHECK-NEXT:    ret void
+;
+  %ptrA0 = getelementptr i8, ptr %ptrA, i64 0
+  %ptrA1 = getelementptr i8, ptr %ptrA, i64 1
+  %ptrB0 = getelementptr i8, ptr %ptrB, i64 0
+  %ptrB1 = getelementptr i8, ptr %ptrB, i64 1
+
+  %ld0 = load i8, ptr %ptrA0
+  store i8 %ld0, ptr %ptrB0
+
+  store i8 0, ptr %ptrA0
+  %other = load i8, ptr %ptrB1
+
+  %ld1 = load i8, ptr %ptrA1
+  store i8 %ld1, ptr %ptrB1
+  ret void
+}
+
+;.
+; CHECK: [[META0]] = distinct !{!"sandboxregion"}
+; CHECK: [[META1]] = distinct !{!"sandboxregion"}
+; CHECK: [[META2]] = distinct !{!"sandboxregion"}
+; CHECK: [[META3]] = distinct !{!"sandboxregion"}
+; CHECK: [[META4]] = distinct !{!"sandboxregion"}
+; CHECK: [[META5]] = distinct !{!"sandboxregion"}
+; CHECK: [[META6]] = distinct !{!"sandboxregion"}
+; CHECK: [[META7]] = distinct !{!"sandboxregion"}
+; CHECK: [[META8]] = distinct !{!"sandboxregion"}
+; CHECK: [[META9]] = distinct !{!"sandboxregion"}
+; CHECK: [[META10]] = distinct !{!"sandboxregion"}
+; CHECK: [[META11]] = distinct !{!"sandboxregion"}
+; CHECK: [[META12]] = distinct !{!"sandboxregion"}
+;.
diff --git a/llvm/unittests/Transforms/Vectorize/SandboxVectorizer/VecUtilsTest.cpp b/llvm/unittests/Transforms/Vectorize/SandboxVectorizer/VecUtilsTest.cpp
index 2bfea6908305c..03eb32fdf6002 100644
--- a/llvm/unittests/Transforms/Vectorize/SandboxVectorizer/VecUtilsTest.cpp
+++ b/llvm/unittests/Transforms/Vectorize/SandboxVectorizer/VecUtilsTest.cpp
@@ -18,6 +18,7 @@
 #include "llvm/IR/Dominators.h"
 #include "llvm/SandboxIR/Context.h"
 #include "llvm/SandboxIR/Function.h"
+#include "llvm/SandboxIR/Module.h"
 #include "llvm/SandboxIR/Type.h"
 #include "llvm/Support/SourceMgr.h"
 #include "gmock/gmock.h"
@@ -424,6 +425,80 @@ TEST_F(VecUtilsTest, GetWideType) {
   EXPECT_EQ(sandboxir::VecUtils::getWideType(Int32X4Ty, 2), Int32X8Ty);
 }
 
+TEST_F(VecUtilsTest, GetCombinedVectorTypeFor) {
+  parseIR(R"IR(
+define void @foo(ptr %ptr, i8 %i8, i16 %i16, i32 %i32, float %f32, double %f64, <2 x i8> %v2xi8, <2 x i16> %v2xi16) {
+  store i8 %i8, ptr %ptr
+  store i16 %i16, ptr %ptr
+  store i32 %i32, ptr %ptr
+  store float %f32, ptr %ptr
+  store double %f64, ptr %ptr
+  store <2 x i8> %v2xi8, ptr %ptr
+  store <2 x i16> %v2xi16, ptr %ptr
+  ret void
+}
+)IR");
+  Function &LLVMF = *M->getFunction("foo");
+
+  sandboxir::Context Ctx(C);
+  auto &F = *Ctx.createFunction(&LLVMF);
+  auto &BB = *F.begin();
+  const auto &DL = F.getParent()->getDataLayout();
+  auto It = BB.begin();
+  auto *Store_i8 = &*It++;
+  auto *Store_i16 = &*It++;
+  auto *Store_i32 = &*It++;
+  auto *Store_f32 = &*It++;
+  auto *Store_f64 = &*It++;
+  auto *Store_2xi8 = &*It++;
+  auto *Store_2xi16 = &*It++;
+
+  auto *I8Ty = sandboxir::IntegerType::get(Ctx, 8);
+  auto *I16Ty = sandboxir::IntegerType::get(Ctx, 16);
+  auto *F32Ty = sandboxir::Type::getFloatTy(Ctx);
+
+  // Check same type.
+  EXPECT_EQ(
+      sandboxir::VecUtils::getCombinedVectorTypeFor({Store_i8, Store_i8}, DL),
+      sandboxir::FixedVectorType::get(I8Ty, 2));
+  EXPECT_EQ(sandboxir::VecUtils::getCombinedVectorTypeFor(
+                {Store_2xi8, Store_2xi8}, DL),
+            sandboxir::FixedVectorType::get(I8Ty, 4));
+
+  // Check different types, power-of-two.
+  EXPECT_EQ(sandboxir::VecUtils::getCombinedVectorTypeFor(
+                {Store_i8, Store_i8, Store_i16}, DL),
+            sandboxir::FixedVectorType::get(I8Ty, 4));
+  EXPECT_EQ(sandboxir::VecUtils::getCombinedVectorTypeFor(
+                {Store_i8, Store_i8, Store_i16, Store_i32}, DL),
+            sandboxir::FixedVectorType::get(I8Ty, 8));
+  EXPECT_EQ(sandboxir::VecUtils::getCombinedVectorTypeFor(
+                {Store_2xi8, Store_2xi8, Store_2xi16}, DL),
+            sandboxir::FixedVectorType::get(I8Ty, 8));
+
+  // Check different types non-power-of-two.
+  EXPECT_EQ(
+      sandboxir::VecUtils::getCombinedVectorTypeFor({Store_f32, Store_f64}, DL),
+      sandboxir::FixedVectorType::get(F32Ty, 3));
+  EXPECT_EQ(
+      sandboxir::VecUtils::getCombinedVectorTypeFor({Store_i32, Store_i16}, DL),
+      sandboxir::FixedVectorType::get(I16Ty, 3));
+  EXPECT_EQ(sandboxir::VecUtils::getCombinedVectorTypeFor(
+                {Store_i8, Store_i16, Store_i32}, DL),
+            sandboxir::FixedVectorType::get(I8Ty, 7));
+  EXPECT_EQ(sandboxir::VecUtils::getCombinedVectorTypeFor(
+                {Store_i8, Store_i16, Store_2xi8}, DL),
+            sandboxir::FixedVectorType::get(I8Ty, 5));
+
+  // Mix float and integer.
+  EXPECT_EQ(
+      sandboxir::VecUtils::getCombinedVectorTypeFor({Store_i32, Store_f32}, DL),
+      nullptr);
+  EXPECT_EQ(sandboxir::VecUtils::getCombinedVectorTypeFor(
+                {Store_f32, Store_2xi8}, DL),
+            nullptr);
+}
+
 TEST_F(VecUtilsTest, GetLowest) {
   parseIR(R"IR(
 define void @foo(i8 %v) {

>From 196aec327cc56f4d9f2e00d89a264453cffddf4e Mon Sep 17 00:00:00 2001
From: Vasileios Porpodas <vporpodas at google.com>
Date: Fri, 14 Mar 2025 12:52:08 -0700
Subject: [PATCH 2/2] [SandboxVec][StructInitVec] Add support for constants

---
 .../Passes/StructInitVec.cpp                  | 94 ++++++++++++-------
 .../SandboxVectorizer/struct_init_vec.ll      | 54 +++++++++++
 2 files changed, 115 insertions(+), 33 deletions(-)

diff --git a/llvm/lib/Transforms/Vectorize/SandboxVectorizer/Passes/StructInitVec.cpp b/llvm/lib/Transforms/Vectorize/SandboxVectorizer/Passes/StructInitVec.cpp
index 8c7eb85148d56..a895619995294 100644
--- a/llvm/lib/Transforms/Vectorize/SandboxVectorizer/Passes/StructInitVec.cpp
+++ b/llvm/lib/Transforms/Vectorize/SandboxVectorizer/Passes/StructInitVec.cpp
@@ -93,46 +93,74 @@ bool StructInitVec::runOnRegion(Region &Rgn, const Analyses &A) {
     Operands.push_back(Op);
   }
   BasicBlock *BB = Bndl[0]->getParent();
-  // TODO: For now we only support load operands.
-  // TODO: For now we don't cross BBs.
-  if (!all_of(Operands, [BB](Value *V) {
-        auto *LI = dyn_cast<LoadInst>(V);
-        if (LI == nullptr)
-          return false;
-        if (LI->getParent() != BB)
-          return false;
-        if (LI->hasNUsesOrMore(2))
-          return false;
-        return true;
-      }))
-    return false;
-  // TODO: Try to avoid the extra copy to an instruction vector.
-  SmallVector<Instruction *, 8> Loads;
-  Loads.reserve(Operands.size());
-  for (Value *Op : Operands)
-    Loads.push_back(cast<Instruction>(Op));
-
-  bool Consecutive = VecUtils::areConsecutive<LoadInst, Instruction>(
-      Loads, A.getScalarEvolution(), *DL);
-  if (!Consecutive)
-    return false;
-  if (!canVectorize(Loads, Sched))
+  bool AllLoads = all_of(Operands, [BB](Value *V) {
+    auto *LI = dyn_cast<LoadInst>(V);
+    if (LI == nullptr)
+      return false;
+    // TODO: For now we don't cross BBs.
+    if (LI->getParent() != BB)
+      return false;
+    if (LI->hasNUsesOrMore(2))
+      return false;
+    return true;
+  });
+  bool AllConstants =
+      all_of(Operands, [](Value *V) { return isa<Constant>(V); });
+  if (!AllLoads && !AllConstants)
     return false;
 
-  // Generate vector store and vector load
-  Type *Ty = VecUtils::getCombinedVectorTypeFor(Bndl, *DL);
-  Value *LdPtr = cast<LoadInst>(Loads[0])->getPointerOperand();
-  // TODO: Compute alignment.
-  Align LdAlign(1);
-  auto LdWhereIt = std::next(VecUtils::getLowest(Loads)->getIterator());
-  auto *VecLd =
-      LoadInst::create(Ty, LdPtr, LdAlign, LdWhereIt, Ctx, "VecIinitL");
+  Value *VecOp = nullptr;
+  SmallVector<Instruction *, 8> Loads;
+  if (AllLoads) {
+    // TODO: Try to avoid the extra copy to an instruction vector.
+    Loads.reserve(Operands.size());
+    for (Value *Op : Operands)
+      Loads.push_back(cast<Instruction>(Op));
+
+    bool Consecutive = VecUtils::areConsecutive<LoadInst, Instruction>(
+        Loads, A.getScalarEvolution(), *DL);
+    if (!Consecutive)
+      return false;
+    if (!canVectorize(Loads, Sched))
+      return false;
+
+    // Generate vector load.
+    Type *Ty = VecUtils::getCombinedVectorTypeFor(Bndl, *DL);
+    Value *LdPtr = cast<LoadInst>(Loads[0])->getPointerOperand();
+    // TODO: Compute alignment.
+    Align LdAlign(1);
+    auto LdWhereIt = std::next(VecUtils::getLowest(Loads)->getIterator());
+    VecOp = LoadInst::create(Ty, LdPtr, LdAlign, LdWhereIt, Ctx, "VecIinitL");
+  } else if (AllConstants) {
+    SmallVector<Constant *, 8> Constants;
+    Constants.reserve(Operands.size());
+    for (Value *Op : Operands) {
+      auto *COp = cast<Constant>(Op);
+      if (auto *AggrCOp = dyn_cast<ConstantAggregate>(COp)) {
+        // If the operand is a constant aggregate, then append all its elements.
+        for (Value *Elm : AggrCOp->operands())
+          Constants.push_back(cast<Constant>(Elm));
+      } else if (auto *SeqCOp = dyn_cast<ConstantDataSequential>(COp)) {
+        for (auto ElmIdx : seq<unsigned>(SeqCOp->getNumElements()))
+          Constants.push_back(SeqCOp->getElementAsConstant(ElmIdx));
+      } else if (auto *Zero = dyn_cast<ConstantAggregateZero>(COp)) {
+        auto *ZeroElm = Zero->getSequentialElement();
+        for (auto ElmIdx :
+             seq<unsigned>(Zero->getElementCount().getFixedValue()))
+          Constants.push_back(ZeroElm);
+      } else {
+        Constants.push_back(COp);
+      }
+    }
+    VecOp = ConstantVector::get(Constants);
+  }
 
+  // Generate vector store.
   Value *StPtr = cast<StoreInst>(Bndl[0])->getPointerOperand();
   // TODO: Compute alignment.
   Align StAlign(1);
   auto StWhereIt = std::next(VecUtils::getLowest(Bndl)->getIterator());
-  StoreInst::create(VecLd, StPtr, StAlign, StWhereIt, Ctx);
+  StoreInst::create(VecOp, StPtr, StAlign, StWhereIt, Ctx);
 
   tryEraseDeadInstrs(Bndl, Loads);
   return true;
diff --git a/llvm/test/Transforms/SandboxVectorizer/struct_init_vec.ll b/llvm/test/Transforms/SandboxVectorizer/struct_init_vec.ll
index 70b02a6993fa6..6f8dd54404e98 100644
--- a/llvm/test/Transforms/SandboxVectorizer/struct_init_vec.ll
+++ b/llvm/test/Transforms/SandboxVectorizer/struct_init_vec.ll
@@ -272,6 +272,57 @@ define void @struct_init_schedule_stores_and_loads(ptr noalias %ptrA, ptr noalia
   ret void
 }
 
+; Store-constant pattern.
+define void @struct_init_constants(ptr %ptr) {
+; CHECK-LABEL: define void @struct_init_constants(
+; CHECK-SAME: ptr [[PTR:%.*]]) {
+; CHECK-NEXT:    [[PTR0:%.*]] = getelementptr i8, ptr [[PTR]], i32 0
+; CHECK-NEXT:    store <3 x i8> <i8 42, i8 43, i8 44>, ptr [[PTR0]], align 1, !sandboxvec [[META13:![0-9]+]]
+; CHECK-NEXT:    ret void
+;
+  %ptr0 = getelementptr i8, ptr %ptr, i32 0
+  %ptr1 = getelementptr i8, ptr %ptr, i32 1
+  %ptr2 = getelementptr i8, ptr %ptr, i32 3
+  store i8 42, ptr %ptr0
+  store i16 43, ptr %ptr1
+  store i8 44, ptr %ptr2
+  ret void
+}
+
+; Same but with ConstantDataSequential.
+define void @struct_init_constants_CDS(ptr %ptr) {
+; CHECK-LABEL: define void @struct_init_constants_CDS(
+; CHECK-SAME: ptr [[PTR:%.*]]) {
+; CHECK-NEXT:    [[PTR0:%.*]] = getelementptr i8, ptr [[PTR]], i32 0
+; CHECK-NEXT:    store <4 x i8> <i8 0, i8 1, i8 2, i8 3>, ptr [[PTR0]], align 1, !sandboxvec [[META14:![0-9]+]]
+; CHECK-NEXT:    ret void
+;
+  %ptr0 = getelementptr i8, ptr %ptr, i32 0
+  %ptr1 = getelementptr i8, ptr %ptr, i32 1
+  %ptr2 = getelementptr i8, ptr %ptr, i32 3
+  store i8 0, ptr %ptr0
+  store <2 x i8> <i8 1, i8 2>, ptr %ptr1
+  store i8 3, ptr %ptr2
+  ret void
+}
+
+; Same but with floats
+define void @struct_init_constants_CDS_float(ptr %ptr) {
+; CHECK-LABEL: define void @struct_init_constants_CDS_float(
+; CHECK-SAME: ptr [[PTR:%.*]]) {
+; CHECK-NEXT:    [[PTR0:%.*]] = getelementptr float, ptr [[PTR]], i32 0
+; CHECK-NEXT:    store <8 x float> <float 1.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00>, ptr [[PTR0]], align 1, !sandboxvec [[META15:![0-9]+]]
+; CHECK-NEXT:    ret void
+;
+  %ptr0 = getelementptr float, ptr %ptr, i32 0
+  %ptr1 = getelementptr float, ptr %ptr, i32 1
+  %ptr2 = getelementptr float, ptr %ptr, i32 3
+  store float 1.0, ptr %ptr0
+  store <2 x float> zeroinitializer, ptr %ptr1
+  store <5 x float> zeroinitializer, ptr %ptr2
+  ret void
+}
+
 ;.
 ; CHECK: [[META0]] = distinct !{!"sandboxregion"}
 ; CHECK: [[META1]] = distinct !{!"sandboxregion"}
@@ -286,4 +337,7 @@ define void @struct_init_schedule_stores_and_loads(ptr noalias %ptrA, ptr noalia
 ; CHECK: [[META10]] = distinct !{!"sandboxregion"}
 ; CHECK: [[META11]] = distinct !{!"sandboxregion"}
 ; CHECK: [[META12]] = distinct !{!"sandboxregion"}
+; CHECK: [[META13]] = distinct !{!"sandboxregion"}
+; CHECK: [[META14]] = distinct !{!"sandboxregion"}
+; CHECK: [[META15]] = distinct !{!"sandboxregion"}
 ;.



More information about the llvm-commits mailing list