[llvm] [EarlyCSE, TTI] Don't create new, unused, instructions. (PR #134534)

Florian Hahn via llvm-commits llvm-commits at lists.llvm.org
Tue Jul 8 04:01:41 PDT 2025


https://github.com/fhahn updated https://github.com/llvm/llvm-project/pull/134534

>From da37c7741fc5be76db144755da4a94245bee9947 Mon Sep 17 00:00:00 2001
From: Florian Hahn <flo at fhahn.com>
Date: Sun, 6 Apr 2025 12:11:37 +0100
Subject: [PATCH 1/4] [EarlyCSE,TTI] Clean up temporary insts created by
 getOrCreateResult.

getOrCreateResultFromMemIntrinsic can modify the current function by
inserting new instructions without EarlyCSE keeping track of the
changes.

Currently, EarlyCSE will treat the IR as unchanged, even if new
instructions are inserted. This leads to missed invalidation on the
newly added test case.

To solve this, the patch updates getOrCreateResultFromMemIntrinsic and
the only user (EarlyCSE) to keep track of newly created instructions and
cleans them up at the end of EarlyCSE.

Alternatively getOrCreateResultFromMemIntrinsic could indicate that new
instructions have been added and update Changed in EarlyCSE accordingly.
---
 .../llvm/Analysis/TargetTransformInfo.h       |  7 +++---
 .../llvm/Analysis/TargetTransformInfoImpl.h   |  5 +++--
 llvm/lib/Analysis/TargetTransformInfo.cpp     |  6 +++--
 .../AArch64/AArch64TargetTransformInfo.cpp    | 12 ++++++----
 .../AArch64/AArch64TargetTransformInfo.h      |  5 +++--
 llvm/lib/Transforms/Scalar/EarlyCSE.cpp       | 22 ++++++++++++++-----
 .../Transforms/EarlyCSE/AArch64/intrinsics.ll | 22 ++++++++++++++-----
 7 files changed, 55 insertions(+), 24 deletions(-)

diff --git a/llvm/include/llvm/Analysis/TargetTransformInfo.h b/llvm/include/llvm/Analysis/TargetTransformInfo.h
index c43870392361d..fda0aef184794 100644
--- a/llvm/include/llvm/Analysis/TargetTransformInfo.h
+++ b/llvm/include/llvm/Analysis/TargetTransformInfo.h
@@ -1705,9 +1705,10 @@ class TargetTransformInfo {
   /// \returns A value which is the result of the given memory intrinsic.  New
   /// instructions may be created to extract the result from the given intrinsic
   /// memory operation.  Returns nullptr if the target cannot create a result
-  /// from the given intrinsic.
-  LLVM_ABI Value *getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst,
-                                                    Type *ExpectedType) const;
+  /// from the given intrinsic. Adds newly created instructions to \p NewInsts.
+  LLVM_ABI Value *getOrCreateResultFromMemIntrinsic(
+      IntrinsicInst *Inst, Type *ExpectedType,
+      SmallVectorImpl<Instruction *> &NewInsts) const;
 
   /// \returns The type to use in a loop expansion of a memcpy call.
   LLVM_ABI Type *getMemcpyLoopLoweringType(
diff --git a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
index 12f87226c5f57..6873744138057 100644
--- a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
+++ b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
@@ -983,8 +983,9 @@ class TargetTransformInfoImplBase {
     return 0;
   }
 
-  virtual Value *getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst,
-                                                   Type *ExpectedType) const {
+  virtual Value *getOrCreateResultFromMemIntrinsic(
+      IntrinsicInst *Inst, Type *ExpectedType,
+      SmallVectorImpl<Instruction *> &NewInsts) const {
     return nullptr;
   }
 
diff --git a/llvm/lib/Analysis/TargetTransformInfo.cpp b/llvm/lib/Analysis/TargetTransformInfo.cpp
index 3ebd9d487ba04..7b335043c26f0 100644
--- a/llvm/lib/Analysis/TargetTransformInfo.cpp
+++ b/llvm/lib/Analysis/TargetTransformInfo.cpp
@@ -1294,8 +1294,10 @@ unsigned TargetTransformInfo::getAtomicMemIntrinsicMaxElementSize() const {
 }
 
 Value *TargetTransformInfo::getOrCreateResultFromMemIntrinsic(
-    IntrinsicInst *Inst, Type *ExpectedType) const {
-  return TTIImpl->getOrCreateResultFromMemIntrinsic(Inst, ExpectedType);
+    IntrinsicInst *Inst, Type *ExpectedType,
+    SmallVectorImpl<Instruction *> &NewInsts) const {
+  return TTIImpl->getOrCreateResultFromMemIntrinsic(Inst, ExpectedType,
+                                                    NewInsts);
 }
 
 Type *TargetTransformInfo::getMemcpyLoopLoweringType(
diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
index 380faa6cf6939..6da8a681f2343 100644
--- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
@@ -4967,9 +4967,9 @@ void AArch64TTIImpl::getPeelingPreferences(Loop *L, ScalarEvolution &SE,
   BaseT::getPeelingPreferences(L, SE, PP);
 }
 
-Value *
-AArch64TTIImpl::getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst,
-                                                  Type *ExpectedType) const {
+Value *AArch64TTIImpl::getOrCreateResultFromMemIntrinsic(
+    IntrinsicInst *Inst, Type *ExpectedType,
+    SmallVectorImpl<Instruction *> &NewInsts) const {
   switch (Inst->getIntrinsicID()) {
   default:
     return nullptr;
@@ -4988,7 +4988,11 @@ AArch64TTIImpl::getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst,
         return nullptr;
     }
     Value *Res = PoisonValue::get(ExpectedType);
-    IRBuilder<> Builder(Inst);
+    IRBuilder<ConstantFolder, IRBuilderCallbackInserter> Builder(
+        Inst->getContext(), ConstantFolder(),
+        IRBuilderCallbackInserter(
+            [&NewInsts](Instruction *I) { NewInsts.push_back(I); }));
+    Builder.SetInsertPoint(Inst);
     for (unsigned i = 0, e = NumElts; i != e; ++i) {
       Value *L = Inst->getArgOperand(i);
       Res = Builder.CreateInsertValue(Res, L, i);
diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h
index 9ada70bd7086a..f6c3f1ce8e92c 100644
--- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h
+++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h
@@ -270,8 +270,9 @@ class AArch64TTIImpl final : public BasicTTIImplBase<AArch64TTIImpl> {
   void getPeelingPreferences(Loop *L, ScalarEvolution &SE,
                              TTI::PeelingPreferences &PP) const override;
 
-  Value *getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst,
-                                           Type *ExpectedType) const override;
+  Value *getOrCreateResultFromMemIntrinsic(
+      IntrinsicInst *Inst, Type *ExpectedType,
+      SmallVectorImpl<Instruction *> &NewInsts) const override;
 
   bool getTgtMemIntrinsic(IntrinsicInst *Inst,
                           MemIntrinsicInfo &Info) const override;
diff --git a/llvm/lib/Transforms/Scalar/EarlyCSE.cpp b/llvm/lib/Transforms/Scalar/EarlyCSE.cpp
index b6cb987c0423f..33a4e928184a8 100644
--- a/llvm/lib/Transforms/Scalar/EarlyCSE.cpp
+++ b/llvm/lib/Transforms/Scalar/EarlyCSE.cpp
@@ -722,6 +722,8 @@ class EarlyCSE {
   /// This is the current generation of the memory value.
   unsigned CurrentGeneration = 0;
 
+  SmallVector<Instruction *> TmpInstructions;
+
   /// Set up the EarlyCSE runner for a particular function.
   EarlyCSE(const DataLayout &DL, const TargetLibraryInfo &TLI,
            const TargetTransformInfo &TTI, DominatorTree &DT,
@@ -958,7 +960,8 @@ class EarlyCSE {
   bool overridingStores(const ParseMemoryInst &Earlier,
                         const ParseMemoryInst &Later);
 
-  Value *getOrCreateResult(Instruction *Inst, Type *ExpectedType) const {
+  Value *getOrCreateResult(Instruction *Inst, Type *ExpectedType,
+                           SmallVectorImpl<Instruction *> &TmpInsts) const {
     // TODO: We could insert relevant casts on type mismatch.
     // The load or the store's first operand.
     Value *V;
@@ -971,7 +974,8 @@ class EarlyCSE {
         V = II->getOperand(0);
         break;
       default:
-        return TTI.getOrCreateResultFromMemIntrinsic(II, ExpectedType);
+        return TTI.getOrCreateResultFromMemIntrinsic(II, ExpectedType,
+                                                     TmpInsts);
       }
     } else {
       V = isa<LoadInst>(Inst) ? Inst : cast<StoreInst>(Inst)->getValueOperand();
@@ -1255,9 +1259,10 @@ Value *EarlyCSE::getMatchingValue(LoadValue &InVal, ParseMemoryInst &MemInst,
 
   // For stores check the result values before checking memory generation
   // (otherwise isSameMemGeneration may crash).
-  Value *Result = MemInst.isStore()
-                      ? getOrCreateResult(Matching, Other->getType())
-                      : nullptr;
+  Value *Result =
+      MemInst.isStore()
+          ? getOrCreateResult(Matching, Other->getType(), TmpInstructions)
+          : nullptr;
   if (MemInst.isStore() && InVal.DefInst != Result)
     return nullptr;
 
@@ -1278,7 +1283,7 @@ Value *EarlyCSE::getMatchingValue(LoadValue &InVal, ParseMemoryInst &MemInst,
     return nullptr;
 
   if (!Result)
-    Result = getOrCreateResult(Matching, Other->getType());
+    Result = getOrCreateResult(Matching, Other->getType(), TmpInstructions);
   return Result;
 }
 
@@ -1832,6 +1837,11 @@ bool EarlyCSE::run() {
     }
   } // while (!nodes...)
 
+  // Clean up temporary instructions.
+  for (Instruction *I : reverse(TmpInstructions))
+    if (I->use_empty())
+      I->eraseFromParent();
+
   return Changed;
 }
 
diff --git a/llvm/test/Transforms/EarlyCSE/AArch64/intrinsics.ll b/llvm/test/Transforms/EarlyCSE/AArch64/intrinsics.ll
index 94b17510bb95d..4744e3761fa6b 100644
--- a/llvm/test/Transforms/EarlyCSE/AArch64/intrinsics.ll
+++ b/llvm/test/Transforms/EarlyCSE/AArch64/intrinsics.ll
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
 ; RUN: opt < %s -S -mtriple=aarch64-none-linux-gnu -mattr=+neon -passes=early-cse -earlycse-debug-hash | FileCheck %s
-; RUN: opt < %s -S -mtriple=aarch64-none-linux-gnu -mattr=+neon -aa-pipeline=basic-aa -passes='early-cse<memssa>' | FileCheck %s
+; RUN: opt < %s -S -mtriple=aarch64-none-linux-gnu -mattr=+neon -aa-pipeline=basic-aa -passes='early-cse<memssa>' -verify-analysis-invalidation | FileCheck %s
 
 define <4 x i32> @test_cse(ptr %a, [2 x <4 x i32>] %s.coerce, i32 %n) {
 ; CHECK-LABEL: define <4 x i32> @test_cse(
@@ -17,8 +17,6 @@ define <4 x i32> @test_cse(ptr %a, [2 x <4 x i32>] %s.coerce, i32 %n) {
 ; CHECK:       [[FOR_BODY]]:
 ; CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[S_COERCE_FCA_0_EXTRACT]] to <16 x i8>
 ; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[S_COERCE_FCA_1_EXTRACT]] to <16 x i8>
-; CHECK-NEXT:    [[TMP2:%.*]] = insertvalue { <4 x i32>, <4 x i32> } poison, <4 x i32> [[S_COERCE_FCA_0_EXTRACT]], 0
-; CHECK-NEXT:    [[TMP3:%.*]] = insertvalue { <4 x i32>, <4 x i32> } [[TMP2]], <4 x i32> [[S_COERCE_FCA_1_EXTRACT]], 1
 ; CHECK-NEXT:    call void @llvm.aarch64.neon.st2.v4i32.p0(<4 x i32> [[S_COERCE_FCA_0_EXTRACT]], <4 x i32> [[S_COERCE_FCA_1_EXTRACT]], ptr [[A]])
 ; CHECK-NEXT:    [[CALL]] = call <4 x i32> @vaddq_s32(<4 x i32> [[S_COERCE_FCA_0_EXTRACT]], <4 x i32> [[S_COERCE_FCA_0_EXTRACT]])
 ; CHECK-NEXT:    [[INC]] = add nsw i32 [[I_0]], 1
@@ -71,8 +69,6 @@ define <4 x i32> @test_cse2(ptr %a, [2 x <4 x i32>] %s.coerce, i32 %n) {
 ; CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[S_COERCE_FCA_0_EXTRACT]] to <16 x i8>
 ; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[S_COERCE_FCA_1_EXTRACT]] to <16 x i8>
 ; CHECK-NEXT:    call void @llvm.aarch64.neon.st2.v4i32.p0(<4 x i32> [[S_COERCE_FCA_0_EXTRACT]], <4 x i32> [[S_COERCE_FCA_0_EXTRACT]], ptr [[A]])
-; CHECK-NEXT:    [[TMP2:%.*]] = insertvalue { <4 x i32>, <4 x i32> } poison, <4 x i32> [[S_COERCE_FCA_0_EXTRACT]], 0
-; CHECK-NEXT:    [[TMP3:%.*]] = insertvalue { <4 x i32>, <4 x i32> } [[TMP2]], <4 x i32> [[S_COERCE_FCA_1_EXTRACT]], 1
 ; CHECK-NEXT:    call void @llvm.aarch64.neon.st2.v4i32.p0(<4 x i32> [[S_COERCE_FCA_0_EXTRACT]], <4 x i32> [[S_COERCE_FCA_1_EXTRACT]], ptr [[A]])
 ; CHECK-NEXT:    [[CALL]] = call <4 x i32> @vaddq_s32(<4 x i32> [[S_COERCE_FCA_0_EXTRACT]], <4 x i32> [[S_COERCE_FCA_0_EXTRACT]])
 ; CHECK-NEXT:    [[INC]] = add nsw i32 [[I_0]], 1
@@ -324,6 +320,22 @@ for.end:                                          ; preds = %for.cond
   ret <4 x i32> %res.0
 }
 
+define void @test_ld4_st4_no_cse(ptr %p, <16 x i8> %A, <16 x i8> %B) {
+; CHECK-LABEL: define void @test_ld4_st4_no_cse(
+; CHECK-SAME: ptr [[P:%.*]], <16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[LD:%.*]] = tail call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld4.v16i8.p0(ptr [[P]])
+; CHECK-NEXT:    [[EXT:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[LD]], 0
+; CHECK-NEXT:    tail call void @llvm.aarch64.neon.st4.v16i8.p0(<16 x i8> [[EXT]], <16 x i8> [[A]], <16 x i8> [[B]], <16 x i8> zeroinitializer, ptr [[P]])
+; CHECK-NEXT:    ret void
+;
+entry:
+  %ld = tail call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld4.v16i8.p0(ptr %p)
+  %ext = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %ld, 0
+  tail call void @llvm.aarch64.neon.st4.v16i8.p0(<16 x i8> %ext, <16 x i8> %A, <16 x i8> %B, <16 x i8> zeroinitializer, ptr %p)
+  ret void
+}
+
 ; Function Attrs: nounwind
 declare void @llvm.aarch64.neon.st2.v4i32.p0(<4 x i32>, <4 x i32>, ptr nocapture)
 

>From 43f6f2f2cb64c0dbc101c201cb6fb421bcd8e257 Mon Sep 17 00:00:00 2001
From: Florian Hahn <flo at fhahn.com>
Date: Fri, 27 Jun 2025 22:49:17 +0100
Subject: [PATCH 2/4] !fixup address comments, thanks

---
 .../llvm/Analysis/TargetTransformInfo.h       | 16 +++++++---
 .../llvm/Analysis/TargetTransformInfoImpl.h   | 10 ++++--
 llvm/lib/Analysis/TargetTransformInfo.cpp     | 12 ++++---
 .../AArch64/AArch64TargetTransformInfo.cpp    | 32 +++++++++++--------
 .../AArch64/AArch64TargetTransformInfo.h      |  8 +++--
 llvm/lib/Transforms/Scalar/EarlyCSE.cpp       | 22 ++++---------
 .../Transforms/EarlyCSE/AArch64/intrinsics.ll |  4 +++
 7 files changed, 60 insertions(+), 44 deletions(-)

diff --git a/llvm/include/llvm/Analysis/TargetTransformInfo.h b/llvm/include/llvm/Analysis/TargetTransformInfo.h
index fda0aef184794..2ab201b34bba5 100644
--- a/llvm/include/llvm/Analysis/TargetTransformInfo.h
+++ b/llvm/include/llvm/Analysis/TargetTransformInfo.h
@@ -1702,13 +1702,19 @@ class TargetTransformInfo {
   /// unordered-atomic memory intrinsic.
   LLVM_ABI unsigned getAtomicMemIntrinsicMaxElementSize() const;
 
+  /// \returns A value which is the result of the given memory intrinsic.
+  /// Returns nullptr if the target cannot return a result from the given
+  /// intrinsic, e.g. because it would require creating new instructions. Use
+  /// getOrCreateResultFromMemIntrinsic to allow creating new instructions.
+  LLVM_ABI Value *getResultFromMemIntrinsic(IntrinsicInst *Inst,
+                                            Type *ExpectedType) const;
+
   /// \returns A value which is the result of the given memory intrinsic.  New
   /// instructions may be created to extract the result from the given intrinsic
-  /// memory operation.  Returns nullptr if the target cannot create a result
-  /// from the given intrinsic. Adds newly created instructions to \p NewInsts.
-  LLVM_ABI Value *getOrCreateResultFromMemIntrinsic(
-      IntrinsicInst *Inst, Type *ExpectedType,
-      SmallVectorImpl<Instruction *> &NewInsts) const;
+  /// memory operation. Returns nullptr if the target cannot create a result
+  /// from the given intrinsic.
+  LLVM_ABI Value *getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst,
+                                                    Type *ExpectedType) const;
 
   /// \returns The type to use in a loop expansion of a memcpy call.
   LLVM_ABI Type *getMemcpyLoopLoweringType(
diff --git a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
index 6873744138057..2f5229c6b70ac 100644
--- a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
+++ b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
@@ -983,9 +983,13 @@ class TargetTransformInfoImplBase {
     return 0;
   }
 
-  virtual Value *getOrCreateResultFromMemIntrinsic(
-      IntrinsicInst *Inst, Type *ExpectedType,
-      SmallVectorImpl<Instruction *> &NewInsts) const {
+  virtual Value *getResultFromMemIntrinsic(IntrinsicInst *Inst,
+                                           Type *ExpectedType) const {
+    return nullptr;
+  }
+
+  virtual Value *getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst,
+                                                   Type *ExpectedType) const {
     return nullptr;
   }
 
diff --git a/llvm/lib/Analysis/TargetTransformInfo.cpp b/llvm/lib/Analysis/TargetTransformInfo.cpp
index 7b335043c26f0..1494508ee11ff 100644
--- a/llvm/lib/Analysis/TargetTransformInfo.cpp
+++ b/llvm/lib/Analysis/TargetTransformInfo.cpp
@@ -1293,11 +1293,15 @@ unsigned TargetTransformInfo::getAtomicMemIntrinsicMaxElementSize() const {
   return TTIImpl->getAtomicMemIntrinsicMaxElementSize();
 }
 
+Value *
+TargetTransformInfo::getResultFromMemIntrinsic(IntrinsicInst *Inst,
+                                               Type *ExpectedType) const {
+  return TTIImpl->getResultFromMemIntrinsic(Inst, ExpectedType);
+}
+
 Value *TargetTransformInfo::getOrCreateResultFromMemIntrinsic(
-    IntrinsicInst *Inst, Type *ExpectedType,
-    SmallVectorImpl<Instruction *> &NewInsts) const {
-  return TTIImpl->getOrCreateResultFromMemIntrinsic(Inst, ExpectedType,
-                                                    NewInsts);
+    IntrinsicInst *Inst, Type *ExpectedType) const {
+  return TTIImpl->getOrCreateResultFromMemIntrinsic(Inst, ExpectedType);
 }
 
 Type *TargetTransformInfo::getMemcpyLoopLoweringType(
diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
index 6da8a681f2343..1156cb7e721e9 100644
--- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
@@ -4967,12 +4967,26 @@ void AArch64TTIImpl::getPeelingPreferences(Loop *L, ScalarEvolution &SE,
   BaseT::getPeelingPreferences(L, SE, PP);
 }
 
-Value *AArch64TTIImpl::getOrCreateResultFromMemIntrinsic(
-    IntrinsicInst *Inst, Type *ExpectedType,
-    SmallVectorImpl<Instruction *> &NewInsts) const {
+Value *AArch64TTIImpl::getResultFromMemIntrinsic(IntrinsicInst *Inst,
+                                                 Type *ExpectedType) const {
   switch (Inst->getIntrinsicID()) {
   default:
     return nullptr;
+  case Intrinsic::aarch64_neon_ld2:
+  case Intrinsic::aarch64_neon_ld3:
+  case Intrinsic::aarch64_neon_ld4:
+    if (Inst->getType() == ExpectedType)
+      return Inst;
+    return nullptr;
+  }
+}
+
+Value *
+AArch64TTIImpl::getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst,
+                                                  Type *ExpectedType) const {
+  switch (Inst->getIntrinsicID()) {
+  default:
+    return getResultFromMemIntrinsic(Inst, ExpectedType);
   case Intrinsic::aarch64_neon_st2:
   case Intrinsic::aarch64_neon_st3:
   case Intrinsic::aarch64_neon_st4: {
@@ -4988,23 +5002,13 @@ Value *AArch64TTIImpl::getOrCreateResultFromMemIntrinsic(
         return nullptr;
     }
     Value *Res = PoisonValue::get(ExpectedType);
-    IRBuilder<ConstantFolder, IRBuilderCallbackInserter> Builder(
-        Inst->getContext(), ConstantFolder(),
-        IRBuilderCallbackInserter(
-            [&NewInsts](Instruction *I) { NewInsts.push_back(I); }));
-    Builder.SetInsertPoint(Inst);
+    IRBuilder<> Builder(Inst);
     for (unsigned i = 0, e = NumElts; i != e; ++i) {
       Value *L = Inst->getArgOperand(i);
       Res = Builder.CreateInsertValue(Res, L, i);
     }
     return Res;
   }
-  case Intrinsic::aarch64_neon_ld2:
-  case Intrinsic::aarch64_neon_ld3:
-  case Intrinsic::aarch64_neon_ld4:
-    if (Inst->getType() == ExpectedType)
-      return Inst;
-    return nullptr;
   }
 }
 
diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h
index f6c3f1ce8e92c..49f2f5610e5fe 100644
--- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h
+++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h
@@ -270,9 +270,11 @@ class AArch64TTIImpl final : public BasicTTIImplBase<AArch64TTIImpl> {
   void getPeelingPreferences(Loop *L, ScalarEvolution &SE,
                              TTI::PeelingPreferences &PP) const override;
 
-  Value *getOrCreateResultFromMemIntrinsic(
-      IntrinsicInst *Inst, Type *ExpectedType,
-      SmallVectorImpl<Instruction *> &NewInsts) const override;
+  Value *getResultFromMemIntrinsic(IntrinsicInst *Inst,
+                                   Type *ExpectedType) const override;
+
+  Value *getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst,
+                                           Type *ExpectedType) const override;
 
   bool getTgtMemIntrinsic(IntrinsicInst *Inst,
                           MemIntrinsicInfo &Info) const override;
diff --git a/llvm/lib/Transforms/Scalar/EarlyCSE.cpp b/llvm/lib/Transforms/Scalar/EarlyCSE.cpp
index 33a4e928184a8..f135e7426307a 100644
--- a/llvm/lib/Transforms/Scalar/EarlyCSE.cpp
+++ b/llvm/lib/Transforms/Scalar/EarlyCSE.cpp
@@ -722,8 +722,6 @@ class EarlyCSE {
   /// This is the current generation of the memory value.
   unsigned CurrentGeneration = 0;
 
-  SmallVector<Instruction *> TmpInstructions;
-
   /// Set up the EarlyCSE runner for a particular function.
   EarlyCSE(const DataLayout &DL, const TargetLibraryInfo &TLI,
            const TargetTransformInfo &TTI, DominatorTree &DT,
@@ -961,7 +959,7 @@ class EarlyCSE {
                         const ParseMemoryInst &Later);
 
   Value *getOrCreateResult(Instruction *Inst, Type *ExpectedType,
-                           SmallVectorImpl<Instruction *> &TmpInsts) const {
+                           bool Create) const {
     // TODO: We could insert relevant casts on type mismatch.
     // The load or the store's first operand.
     Value *V;
@@ -974,8 +972,8 @@ class EarlyCSE {
         V = II->getOperand(0);
         break;
       default:
-        return TTI.getOrCreateResultFromMemIntrinsic(II, ExpectedType,
-                                                     TmpInsts);
+        return Create ? TTI.getOrCreateResultFromMemIntrinsic(II, ExpectedType)
+                      : TTI.getResultFromMemIntrinsic(II, ExpectedType);
       }
     } else {
       V = isa<LoadInst>(Inst) ? Inst : cast<StoreInst>(Inst)->getValueOperand();
@@ -1259,10 +1257,9 @@ Value *EarlyCSE::getMatchingValue(LoadValue &InVal, ParseMemoryInst &MemInst,
 
   // For stores check the result values before checking memory generation
   // (otherwise isSameMemGeneration may crash).
-  Value *Result =
-      MemInst.isStore()
-          ? getOrCreateResult(Matching, Other->getType(), TmpInstructions)
-          : nullptr;
+  Value *Result = MemInst.isStore()
+                      ? getOrCreateResult(Matching, Other->getType(), false)
+                      : nullptr;
   if (MemInst.isStore() && InVal.DefInst != Result)
     return nullptr;
 
@@ -1283,7 +1280,7 @@ Value *EarlyCSE::getMatchingValue(LoadValue &InVal, ParseMemoryInst &MemInst,
     return nullptr;
 
   if (!Result)
-    Result = getOrCreateResult(Matching, Other->getType(), TmpInstructions);
+    Result = getOrCreateResult(Matching, Other->getType(), true);
   return Result;
 }
 
@@ -1837,11 +1834,6 @@ bool EarlyCSE::run() {
     }
   } // while (!nodes...)
 
-  // Clean up temporary instructions.
-  for (Instruction *I : reverse(TmpInstructions))
-    if (I->use_empty())
-      I->eraseFromParent();
-
   return Changed;
 }
 
diff --git a/llvm/test/Transforms/EarlyCSE/AArch64/intrinsics.ll b/llvm/test/Transforms/EarlyCSE/AArch64/intrinsics.ll
index 4744e3761fa6b..826da89290691 100644
--- a/llvm/test/Transforms/EarlyCSE/AArch64/intrinsics.ll
+++ b/llvm/test/Transforms/EarlyCSE/AArch64/intrinsics.ll
@@ -17,6 +17,8 @@ define <4 x i32> @test_cse(ptr %a, [2 x <4 x i32>] %s.coerce, i32 %n) {
 ; CHECK:       [[FOR_BODY]]:
 ; CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[S_COERCE_FCA_0_EXTRACT]] to <16 x i8>
 ; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[S_COERCE_FCA_1_EXTRACT]] to <16 x i8>
+; CHECK-NEXT:    [[TMP2:%.*]] = insertvalue { <4 x i32>, <4 x i32> } poison, <4 x i32> [[S_COERCE_FCA_0_EXTRACT]], 0
+; CHECK-NEXT:    [[TMP3:%.*]] = insertvalue { <4 x i32>, <4 x i32> } [[TMP2]], <4 x i32> [[S_COERCE_FCA_1_EXTRACT]], 1
 ; CHECK-NEXT:    call void @llvm.aarch64.neon.st2.v4i32.p0(<4 x i32> [[S_COERCE_FCA_0_EXTRACT]], <4 x i32> [[S_COERCE_FCA_1_EXTRACT]], ptr [[A]])
 ; CHECK-NEXT:    [[CALL]] = call <4 x i32> @vaddq_s32(<4 x i32> [[S_COERCE_FCA_0_EXTRACT]], <4 x i32> [[S_COERCE_FCA_0_EXTRACT]])
 ; CHECK-NEXT:    [[INC]] = add nsw i32 [[I_0]], 1
@@ -69,6 +71,8 @@ define <4 x i32> @test_cse2(ptr %a, [2 x <4 x i32>] %s.coerce, i32 %n) {
 ; CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[S_COERCE_FCA_0_EXTRACT]] to <16 x i8>
 ; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[S_COERCE_FCA_1_EXTRACT]] to <16 x i8>
 ; CHECK-NEXT:    call void @llvm.aarch64.neon.st2.v4i32.p0(<4 x i32> [[S_COERCE_FCA_0_EXTRACT]], <4 x i32> [[S_COERCE_FCA_0_EXTRACT]], ptr [[A]])
+; CHECK-NEXT:    [[TMP2:%.*]] = insertvalue { <4 x i32>, <4 x i32> } poison, <4 x i32> [[S_COERCE_FCA_0_EXTRACT]], 0
+; CHECK-NEXT:    [[TMP3:%.*]] = insertvalue { <4 x i32>, <4 x i32> } [[TMP2]], <4 x i32> [[S_COERCE_FCA_1_EXTRACT]], 1
 ; CHECK-NEXT:    call void @llvm.aarch64.neon.st2.v4i32.p0(<4 x i32> [[S_COERCE_FCA_0_EXTRACT]], <4 x i32> [[S_COERCE_FCA_1_EXTRACT]], ptr [[A]])
 ; CHECK-NEXT:    [[CALL]] = call <4 x i32> @vaddq_s32(<4 x i32> [[S_COERCE_FCA_0_EXTRACT]], <4 x i32> [[S_COERCE_FCA_0_EXTRACT]])
 ; CHECK-NEXT:    [[INC]] = add nsw i32 [[I_0]], 1

>From 1653a0bf7af141c3583273aded3eec895cb4ad76 Mon Sep 17 00:00:00 2001
From: Florian Hahn <flo at fhahn.com>
Date: Mon, 7 Jul 2025 20:16:02 +0100
Subject: [PATCH 3/4] !fixup only have getOrCreateResultFromMemIntrinsic with
 extra arg.

---
 .../llvm/Analysis/TargetTransformInfo.h       | 20 +++++++------------
 .../llvm/Analysis/TargetTransformInfoImpl.h   | 10 +++-------
 llvm/lib/Analysis/TargetTransformInfo.cpp     | 11 +++-------
 .../AArch64/AArch64TargetTransformInfo.cpp    | 16 ++++-----------
 .../AArch64/AArch64TargetTransformInfo.h      |  8 +++-----
 llvm/lib/Transforms/Scalar/EarlyCSE.cpp       |  3 +--
 6 files changed, 21 insertions(+), 47 deletions(-)

diff --git a/llvm/include/llvm/Analysis/TargetTransformInfo.h b/llvm/include/llvm/Analysis/TargetTransformInfo.h
index 2ab201b34bba5..98b793aace7a3 100644
--- a/llvm/include/llvm/Analysis/TargetTransformInfo.h
+++ b/llvm/include/llvm/Analysis/TargetTransformInfo.h
@@ -1702,19 +1702,13 @@ class TargetTransformInfo {
   /// unordered-atomic memory intrinsic.
   LLVM_ABI unsigned getAtomicMemIntrinsicMaxElementSize() const;
 
-  /// \returns A value which is the result of the given memory intrinsic.
-  /// Returns nullptr if the target cannot return a result from the given
-  /// intrinsic, e.g. because it would require creating new instructions. Use
-  /// getOrCreateResultFromMemIntrinsic to allow creating new instructions.
-  LLVM_ABI Value *getResultFromMemIntrinsic(IntrinsicInst *Inst,
-                                            Type *ExpectedType) const;
-
-  /// \returns A value which is the result of the given memory intrinsic.  New
-  /// instructions may be created to extract the result from the given intrinsic
-  /// memory operation. Returns nullptr if the target cannot create a result
-  /// from the given intrinsic.
-  LLVM_ABI Value *getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst,
-                                                    Type *ExpectedType) const;
+  /// \returns A value which is the result of the given memory intrinsic. If \p
+  /// CanCreate is true, new instructions may be created to extract the result
+  /// from the given intrinsic memory operation. Returns nullptr if the target
+  /// cannot create a result from the given intrinsic.
+  LLVM_ABI Value *
+  getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst, Type *ExpectedType,
+                                    bool CanCreate = true) const;
 
   /// \returns The type to use in a loop expansion of a memcpy call.
   LLVM_ABI Type *getMemcpyLoopLoweringType(
diff --git a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
index 2f5229c6b70ac..ddc8a5eaffa94 100644
--- a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
+++ b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
@@ -983,13 +983,9 @@ class TargetTransformInfoImplBase {
     return 0;
   }
 
-  virtual Value *getResultFromMemIntrinsic(IntrinsicInst *Inst,
-                                           Type *ExpectedType) const {
-    return nullptr;
-  }
-
-  virtual Value *getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst,
-                                                   Type *ExpectedType) const {
+  virtual Value *
+  getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst, Type *ExpectedType,
+                                    bool CanCreate = true) const {
     return nullptr;
   }
 
diff --git a/llvm/lib/Analysis/TargetTransformInfo.cpp b/llvm/lib/Analysis/TargetTransformInfo.cpp
index 1494508ee11ff..8a470ebf85a16 100644
--- a/llvm/lib/Analysis/TargetTransformInfo.cpp
+++ b/llvm/lib/Analysis/TargetTransformInfo.cpp
@@ -1293,15 +1293,10 @@ unsigned TargetTransformInfo::getAtomicMemIntrinsicMaxElementSize() const {
   return TTIImpl->getAtomicMemIntrinsicMaxElementSize();
 }
 
-Value *
-TargetTransformInfo::getResultFromMemIntrinsic(IntrinsicInst *Inst,
-                                               Type *ExpectedType) const {
-  return TTIImpl->getResultFromMemIntrinsic(Inst, ExpectedType);
-}
-
 Value *TargetTransformInfo::getOrCreateResultFromMemIntrinsic(
-    IntrinsicInst *Inst, Type *ExpectedType) const {
-  return TTIImpl->getOrCreateResultFromMemIntrinsic(Inst, ExpectedType);
+    IntrinsicInst *Inst, Type *ExpectedType, bool CanCreate) const {
+  return TTIImpl->getOrCreateResultFromMemIntrinsic(Inst, ExpectedType,
+                                                    CanCreate);
 }
 
 Type *TargetTransformInfo::getMemcpyLoopLoweringType(
diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
index 1156cb7e721e9..2eef9d71f97d1 100644
--- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
@@ -4967,8 +4967,9 @@ void AArch64TTIImpl::getPeelingPreferences(Loop *L, ScalarEvolution &SE,
   BaseT::getPeelingPreferences(L, SE, PP);
 }
 
-Value *AArch64TTIImpl::getResultFromMemIntrinsic(IntrinsicInst *Inst,
-                                                 Type *ExpectedType) const {
+Value *AArch64TTIImpl::getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst,
+                                                         Type *ExpectedType,
+                                                         bool CanCreate) const {
   switch (Inst->getIntrinsicID()) {
   default:
     return nullptr;
@@ -4978,21 +4979,12 @@ Value *AArch64TTIImpl::getResultFromMemIntrinsic(IntrinsicInst *Inst,
     if (Inst->getType() == ExpectedType)
       return Inst;
     return nullptr;
-  }
-}
-
-Value *
-AArch64TTIImpl::getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst,
-                                                  Type *ExpectedType) const {
-  switch (Inst->getIntrinsicID()) {
-  default:
-    return getResultFromMemIntrinsic(Inst, ExpectedType);
   case Intrinsic::aarch64_neon_st2:
   case Intrinsic::aarch64_neon_st3:
   case Intrinsic::aarch64_neon_st4: {
     // Create a struct type
     StructType *ST = dyn_cast<StructType>(ExpectedType);
-    if (!ST)
+    if (!CanCreate || !ST)
       return nullptr;
     unsigned NumElts = Inst->arg_size() - 1;
     if (ST->getNumElements() != NumElts)
diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h
index 49f2f5610e5fe..ff0ab68a16a88 100644
--- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h
+++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h
@@ -270,11 +270,9 @@ class AArch64TTIImpl final : public BasicTTIImplBase<AArch64TTIImpl> {
   void getPeelingPreferences(Loop *L, ScalarEvolution &SE,
                              TTI::PeelingPreferences &PP) const override;
 
-  Value *getResultFromMemIntrinsic(IntrinsicInst *Inst,
-                                   Type *ExpectedType) const override;
-
-  Value *getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst,
-                                           Type *ExpectedType) const override;
+  Value *
+  getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst, Type *ExpectedType,
+                                    bool CanCreate = true) const override;
 
   bool getTgtMemIntrinsic(IntrinsicInst *Inst,
                           MemIntrinsicInfo &Info) const override;
diff --git a/llvm/lib/Transforms/Scalar/EarlyCSE.cpp b/llvm/lib/Transforms/Scalar/EarlyCSE.cpp
index f135e7426307a..d0ac1de802add 100644
--- a/llvm/lib/Transforms/Scalar/EarlyCSE.cpp
+++ b/llvm/lib/Transforms/Scalar/EarlyCSE.cpp
@@ -972,8 +972,7 @@ class EarlyCSE {
         V = II->getOperand(0);
         break;
       default:
-        return Create ? TTI.getOrCreateResultFromMemIntrinsic(II, ExpectedType)
-                      : TTI.getResultFromMemIntrinsic(II, ExpectedType);
+        return TTI.getOrCreateResultFromMemIntrinsic(II, ExpectedType, Create);
       }
     } else {
       V = isa<LoadInst>(Inst) ? Inst : cast<StoreInst>(Inst)->getValueOperand();

>From a20ecc0bf7e2414c6e27a174379004d1e10a6bc6 Mon Sep 17 00:00:00 2001
From: Florian Hahn <flo at fhahn.com>
Date: Tue, 8 Jul 2025 11:59:43 +0100
Subject: [PATCH 4/4] !fixup Restore original order, add arg comments.

---
 .../Target/AArch64/AArch64TargetTransformInfo.cpp  | 12 ++++++------
 llvm/lib/Transforms/Scalar/EarlyCSE.cpp            | 14 ++++++++------
 2 files changed, 14 insertions(+), 12 deletions(-)

diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
index 2eef9d71f97d1..adc905384bf53 100644
--- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
@@ -4973,12 +4973,6 @@ Value *AArch64TTIImpl::getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst,
   switch (Inst->getIntrinsicID()) {
   default:
     return nullptr;
-  case Intrinsic::aarch64_neon_ld2:
-  case Intrinsic::aarch64_neon_ld3:
-  case Intrinsic::aarch64_neon_ld4:
-    if (Inst->getType() == ExpectedType)
-      return Inst;
-    return nullptr;
   case Intrinsic::aarch64_neon_st2:
   case Intrinsic::aarch64_neon_st3:
   case Intrinsic::aarch64_neon_st4: {
@@ -5001,6 +4995,12 @@ Value *AArch64TTIImpl::getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst,
     }
     return Res;
   }
+  case Intrinsic::aarch64_neon_ld2:
+  case Intrinsic::aarch64_neon_ld3:
+  case Intrinsic::aarch64_neon_ld4:
+    if (Inst->getType() == ExpectedType)
+      return Inst;
+    return nullptr;
   }
 }
 
diff --git a/llvm/lib/Transforms/Scalar/EarlyCSE.cpp b/llvm/lib/Transforms/Scalar/EarlyCSE.cpp
index d0ac1de802add..0f8cc6ca6ed21 100644
--- a/llvm/lib/Transforms/Scalar/EarlyCSE.cpp
+++ b/llvm/lib/Transforms/Scalar/EarlyCSE.cpp
@@ -959,7 +959,7 @@ class EarlyCSE {
                         const ParseMemoryInst &Later);
 
   Value *getOrCreateResult(Instruction *Inst, Type *ExpectedType,
-                           bool Create) const {
+                           bool CanCreate) const {
     // TODO: We could insert relevant casts on type mismatch.
     // The load or the store's first operand.
     Value *V;
@@ -972,7 +972,8 @@ class EarlyCSE {
         V = II->getOperand(0);
         break;
       default:
-        return TTI.getOrCreateResultFromMemIntrinsic(II, ExpectedType, Create);
+        return TTI.getOrCreateResultFromMemIntrinsic(II, ExpectedType,
+                                                     CanCreate);
       }
     } else {
       V = isa<LoadInst>(Inst) ? Inst : cast<StoreInst>(Inst)->getValueOperand();
@@ -1256,9 +1257,10 @@ Value *EarlyCSE::getMatchingValue(LoadValue &InVal, ParseMemoryInst &MemInst,
 
   // For stores check the result values before checking memory generation
   // (otherwise isSameMemGeneration may crash).
-  Value *Result = MemInst.isStore()
-                      ? getOrCreateResult(Matching, Other->getType(), false)
-                      : nullptr;
+  Value *Result =
+      MemInst.isStore()
+          ? getOrCreateResult(Matching, Other->getType(), /*CanCreate=*/false)
+          : nullptr;
   if (MemInst.isStore() && InVal.DefInst != Result)
     return nullptr;
 
@@ -1279,7 +1281,7 @@ Value *EarlyCSE::getMatchingValue(LoadValue &InVal, ParseMemoryInst &MemInst,
     return nullptr;
 
   if (!Result)
-    Result = getOrCreateResult(Matching, Other->getType(), true);
+    Result = getOrCreateResult(Matching, Other->getType(), /*CanCreate=*/true);
   return Result;
 }
 



More information about the llvm-commits mailing list