[llvm] 1c32b6f - [AArch64][ARM] Move ARM-specific InstCombine transforms into `Transforms/Utils` (#169589)

Tue Dec 2 03:17:16 PST 2025

Author: valadaptive
Date: 2025-12-02T11:17:12Z
New Revision: 1c32b6f51ccaaf9c65be11d7dca9e5a476cddb5a

URL: https://github.com/llvm/llvm-project/commit/1c32b6f51ccaaf9c65be11d7dca9e5a476cddb5a
DIFF: https://github.com/llvm/llvm-project/commit/1c32b6f51ccaaf9c65be11d7dca9e5a476cddb5a.diff

LOG: [AArch64][ARM] Move ARM-specific InstCombine transforms into `Transforms/Utils` (#169589)

Back when `TargetTransformInfo::instCombineIntrinsic` was added in
https://reviews.llvm.org/D81728, several transforms common to both ARM
and AArch64 were kept in the non-target-specific `InstCombineCalls.cpp`
so they could be shared between the two targets.

I want to extend the transform of the `tbl` intrinsics into static
`shufflevector`s in a similar manner to
https://github.com/llvm/llvm-project/pull/169110 (right now it only
works with a 64-bit `tbl1`, but `shufflevector` should allow it to work
with up to 2 operands, and it can definitely work with 128-bit vectors).
I think separating out the transform into a TTI hook is a prerequisite.

~~I'm not happy about creating an entirely new module for this and
having to wire it up through CMake and everything, but I'm not sure
about the alternatives. If any maintainers can think of a cleaner way of
doing this, I'm very open to it.~~

I've moved the transforms into
`Transforms/Utils/ARMCommonInstCombineIntrinsic.cpp`, which is a lot
simpler.

Added: 
    llvm/include/llvm/Transforms/Utils/ARMCommonInstCombineIntrinsic.h
    llvm/lib/Transforms/Utils/ARMCommonInstCombineIntrinsic.cpp

Modified: 
    llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
    llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp
    llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
    llvm/lib/Transforms/Utils/CMakeLists.txt
    llvm/test/Transforms/InstCombine/AArch64/aes-intrinsics.ll
    llvm/test/Transforms/InstCombine/ARM/2012-04-23-Neon-Intrinsics.ll
    llvm/test/Transforms/InstCombine/ARM/aes-intrinsics.ll
    llvm/utils/gn/secondary/llvm/lib/Transforms/Utils/BUILD.gn

Removed: 
    


################################################################################
diff  --git a/llvm/include/llvm/Transforms/Utils/ARMCommonInstCombineIntrinsic.h b/llvm/include/llvm/Transforms/Utils/ARMCommonInstCombineIntrinsic.h
new file mode 100644
index 0000000000000..1efb647978423

--- /dev/null
+++ b/llvm/include/llvm/Transforms/Utils/ARMCommonInstCombineIntrinsic.h
@@ -0,0 +1,56 @@
+//===- ARMCommonInstCombineIntrinsic.h - Shared ARM/AArch64 opts *- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// This file contains optimizations for ARM and AArch64 intrinsics that
+/// are shared between both architectures. These functions can be called from:
+/// - ARM TTI's instCombineIntrinsic (for arm_neon_* intrinsics)
+/// - AArch64 TTI's instCombineIntrinsic (for aarch64_neon_* and aarch64_sve_*
+///   intrinsics)
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TRANSFORMS_UTILS_ARMCOMMONINSTCOMBINEINTRINSIC_H
+#define LLVM_TRANSFORMS_UTILS_ARMCOMMONINSTCOMBINEINTRINSIC_H
+
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Value.h"
+#include "llvm/Transforms/InstCombine/InstCombiner.h"
+
+namespace llvm {
+
+namespace ARMCommon {
+
+/// Convert a table lookup to shufflevector if the mask is constant.
+/// This could benefit tbl1 if the mask is { 7,6,5,4,3,2,1,0 }, in
+/// which case we could lower the shufflevector with rev64 instructions
+/// as it's actually a byte reverse.
+Instruction *simplifyNeonTbl1(IntrinsicInst &II, InstCombiner &IC);
+
+/// Simplify NEON multiply-long intrinsics (smull, umull).
+/// These intrinsics perform widening multiplies: they multiply two vectors of
+/// narrow integers and produce a vector of wider integers. This function
+/// performs algebraic simplifications:
+/// 1. Multiply by zero => zero vector
+/// 2. Multiply by one => zero/sign-extend the non-one operand
+/// 3. Both operands constant => regular multiply that can be constant-folded
+///    later
+Instruction *simplifyNeonMultiply(IntrinsicInst &II, InstCombiner &IC,
+                                  bool IsSigned);
+
+/// Simplify AES encryption/decryption intrinsics (AESE, AESD).
+///
+/// ARM's AES instructions (AESE/AESD) XOR the data and the key, provided as
+/// separate arguments, before performing the encryption/decryption operation.
+/// We can fold that "internal" XOR with a previous one.
+Instruction *simplifyAES(IntrinsicInst &II, InstCombiner &IC);
+
+} // namespace ARMCommon
+} // namespace llvm
+
+#endif // LLVM_TRANSFORMS_UTILS_ARMCOMMONINSTCOMBINEINTRINSIC_H

diff  --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
index 3a5f1499f9d2d..f748cdac32855 100644
--- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
@@ -25,6 +25,7 @@
 #include "llvm/Support/Debug.h"
 #include "llvm/TargetParser/AArch64TargetParser.h"
 #include "llvm/Transforms/InstCombine/InstCombiner.h"
+#include "llvm/Transforms/Utils/ARMCommonInstCombineIntrinsic.h"
 #include "llvm/Transforms/Utils/UnrollLoop.h"
 #include "llvm/Transforms/Vectorize/LoopVectorizationLegality.h"
 #include <algorithm>
@@ -2873,6 +2874,18 @@ AArch64TTIImpl::instCombineIntrinsic(InstCombiner &IC,
   case Intrinsic::aarch64_neon_fmaxnm:
   case Intrinsic::aarch64_neon_fminnm:
     return instCombineMaxMinNM(IC, II);
+  case Intrinsic::aarch64_neon_tbl1:
+    return ARMCommon::simplifyNeonTbl1(II, IC);
+  case Intrinsic::aarch64_neon_smull:
+  case Intrinsic::aarch64_neon_umull: {
+    bool IsSigned = IID == Intrinsic::aarch64_neon_smull;
+    return ARMCommon::simplifyNeonMultiply(II, IC, IsSigned);
+  }
+  case Intrinsic::aarch64_crypto_aesd:
+  case Intrinsic::aarch64_crypto_aese:
+  case Intrinsic::aarch64_sve_aesd:
+  case Intrinsic::aarch64_sve_aese:
+    return ARMCommon::simplifyAES(II, IC);
   case Intrinsic::aarch64_sve_convert_from_svbool:
     return instCombineConvertFromSVBool(IC, II);
   case Intrinsic::aarch64_sve_dup:

diff  --git a/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp b/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp
index cecc9544ed835..742ef830a3a09 100644
--- a/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp
+++ b/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp
@@ -31,6 +31,7 @@
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/TargetParser/SubtargetFeature.h"
 #include "llvm/Transforms/InstCombine/InstCombiner.h"
+#include "llvm/Transforms/Utils/ARMCommonInstCombineIntrinsic.h"
 #include "llvm/Transforms/Utils/Local.h"
 #include "llvm/Transforms/Utils/LoopUtils.h"
 #include "llvm/Transforms/Vectorize/LoopVectorizationLegality.h"
@@ -187,6 +188,19 @@ ARMTTIImpl::instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const {
     break;
   }
 
+  case Intrinsic::arm_neon_vtbl1:
+    return ARMCommon::simplifyNeonTbl1(II, IC);
+
+  case Intrinsic::arm_neon_vmulls:
+  case Intrinsic::arm_neon_vmullu: {
+    bool IsSigned = IID == Intrinsic::arm_neon_vmulls;
+    return ARMCommon::simplifyNeonMultiply(II, IC, IsSigned);
+  }
+
+  case Intrinsic::arm_neon_aesd:
+  case Intrinsic::arm_neon_aese:
+    return ARMCommon::simplifyAES(II, IC);
+
   case Intrinsic::arm_mve_pred_i2v: {
     Value *Arg = II.getArgOperand(0);
     Value *ArgArg;

diff  --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
index 743c4f574e131..2ed9ac25b2704 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -737,44 +737,6 @@ static Instruction *foldCtpop(IntrinsicInst &II, InstCombinerImpl &IC) {
   return nullptr;
 }
 
-/// Convert a table lookup to shufflevector if the mask is constant.
-/// This could benefit tbl1 if the mask is { 7,6,5,4,3,2,1,0 }, in
-/// which case we could lower the shufflevector with rev64 instructions
-/// as it's actually a byte reverse.
-static Value *simplifyNeonTbl1(const IntrinsicInst &II,
-                               InstCombiner::BuilderTy &Builder) {
-  // Bail out if the mask is not a constant.
-  auto *C = dyn_cast<Constant>(II.getArgOperand(1));
-  if (!C)
-    return nullptr;
-
-  auto *VecTy = cast<FixedVectorType>(II.getType());
-  unsigned NumElts = VecTy->getNumElements();
-
-  // Only perform this transformation for <8 x i8> vector types.
-  if (!VecTy->getElementType()->isIntegerTy(8) || NumElts != 8)
-    return nullptr;
-
-  int Indexes[8];
-
-  for (unsigned I = 0; I < NumElts; ++I) {
-    Constant *COp = C->getAggregateElement(I);
-
-    if (!COp || !isa<ConstantInt>(COp))
-      return nullptr;
-
-    Indexes[I] = cast<ConstantInt>(COp)->getLimitedValue();
-
-    // Make sure the mask indices are in range.
-    if ((unsigned)Indexes[I] >= NumElts)
-      return nullptr;
-  }
-
-  auto *V1 = II.getArgOperand(0);
-  auto *V2 = Constant::getNullValue(V1->getType());
-  return Builder.CreateShuffleVector(V1, V2, ArrayRef(Indexes));
-}
-
 // Returns true iff the 2 intrinsics have the same operands, limiting the
 // comparison to the first NumOperands.
 static bool haveSameOperands(const IntrinsicInst &I, const IntrinsicInst &E,
@@ -3166,72 +3128,6 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) {
         Intrinsic::getOrInsertDeclaration(II->getModule(), NewIntrin);
     return CallInst::Create(NewFn, CallArgs);
   }
-  case Intrinsic::arm_neon_vtbl1:
-  case Intrinsic::aarch64_neon_tbl1:
-    if (Value *V = simplifyNeonTbl1(*II, Builder))
-      return replaceInstUsesWith(*II, V);
-    break;
-
-  case Intrinsic::arm_neon_vmulls:
-  case Intrinsic::arm_neon_vmullu:
-  case Intrinsic::aarch64_neon_smull:
-  case Intrinsic::aarch64_neon_umull: {
-    Value *Arg0 = II->getArgOperand(0);
-    Value *Arg1 = II->getArgOperand(1);
-
-    // Handle mul by zero first:
-    if (isa<ConstantAggregateZero>(Arg0) || isa<ConstantAggregateZero>(Arg1)) {
-      return replaceInstUsesWith(CI, ConstantAggregateZero::get(II->getType()));
-    }
-
-    // Check for constant LHS & RHS - in this case we just simplify.
-    bool Zext = (IID == Intrinsic::arm_neon_vmullu ||
-                 IID == Intrinsic::aarch64_neon_umull);
-    VectorType *NewVT = cast<VectorType>(II->getType());
-    if (Constant *CV0 = dyn_cast<Constant>(Arg0)) {
-      if (Constant *CV1 = dyn_cast<Constant>(Arg1)) {
-        Value *V0 = Builder.CreateIntCast(CV0, NewVT, /*isSigned=*/!Zext);
-        Value *V1 = Builder.CreateIntCast(CV1, NewVT, /*isSigned=*/!Zext);
-        return replaceInstUsesWith(CI, Builder.CreateMul(V0, V1));
-      }
-
-      // Couldn't simplify - canonicalize constant to the RHS.
-      std::swap(Arg0, Arg1);
-    }
-
-    // Handle mul by one:
-    if (Constant *CV1 = dyn_cast<Constant>(Arg1))
-      if (ConstantInt *Splat =
-              dyn_cast_or_null<ConstantInt>(CV1->getSplatValue()))
-        if (Splat->isOne())
-          return CastInst::CreateIntegerCast(Arg0, II->getType(),
-                                             /*isSigned=*/!Zext);
-
-    break;
-  }
-  case Intrinsic::arm_neon_aesd:
-  case Intrinsic::arm_neon_aese:
-  case Intrinsic::aarch64_crypto_aesd:
-  case Intrinsic::aarch64_crypto_aese:
-  case Intrinsic::aarch64_sve_aesd:
-  case Intrinsic::aarch64_sve_aese: {
-    Value *DataArg = II->getArgOperand(0);
-    Value *KeyArg  = II->getArgOperand(1);
-
-    // Accept zero on either operand.
-    if (!match(KeyArg, m_ZeroInt()))
-      std::swap(KeyArg, DataArg);
-
-    // Try to use the builtin XOR in AESE and AESD to eliminate a prior XOR
-    Value *Data, *Key;
-    if (match(KeyArg, m_ZeroInt()) &&
-        match(DataArg, m_Xor(m_Value(Data), m_Value(Key)))) {
-      replaceOperand(*II, 0, Data);
-      replaceOperand(*II, 1, Key);
-      return II;
-    }
-    break;
-  }
   case Intrinsic::hexagon_V6_vandvrt:
   case Intrinsic::hexagon_V6_vandvrt_128B: {
     // Simplify Q -> V -> Q conversion.

diff  --git a/llvm/lib/Transforms/Utils/ARMCommonInstCombineIntrinsic.cpp b/llvm/lib/Transforms/Utils/ARMCommonInstCombineIntrinsic.cpp
new file mode 100644
index 0000000000000..4e4dea727b407
--- /dev/null
+++ b/llvm/lib/Transforms/Utils/ARMCommonInstCombineIntrinsic.cpp
@@ -0,0 +1,135 @@
+//===- ARMCommonInstCombineIntrinsic.cpp - Shared ARM/AArch64 opts  -------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// This file contains optimizations for ARM and AArch64 intrinsics that
+/// are shared between both architectures. These functions can be called from:
+/// - ARM TTI's instCombineIntrinsic (for arm_neon_* intrinsics)
+/// - AArch64 TTI's instCombineIntrinsic (for aarch64_neon_* and aarch64_sve_*
+///   intrinsics)
+///
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Utils/ARMCommonInstCombineIntrinsic.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Value.h"
+#include "llvm/Transforms/InstCombine/InstCombiner.h"
+
+using namespace llvm;
+using namespace llvm::PatternMatch;
+
+namespace llvm {
+namespace ARMCommon {
+
+/// Convert a table lookup to shufflevector if the mask is constant.
+/// This could benefit tbl1 if the mask is { 7,6,5,4,3,2,1,0 }, in
+/// which case we could lower the shufflevector with rev64 instructions
+/// as it's actually a byte reverse.
+Instruction *simplifyNeonTbl1(IntrinsicInst &II, InstCombiner &IC) {
+  // Bail out if the mask is not a constant.
+  auto *C = dyn_cast<Constant>(II.getArgOperand(1));
+  if (!C)
+    return nullptr;
+
+  auto *VecTy = cast<FixedVectorType>(II.getType());
+  unsigned NumElts = VecTy->getNumElements();
+
+  // Only perform this transformation for <8 x i8> vector types.
+  if (!VecTy->getElementType()->isIntegerTy(8) || NumElts != 8)
+    return nullptr;
+
+  int Indexes[8];
+
+  for (unsigned I = 0; I < NumElts; ++I) {
+    Constant *COp = C->getAggregateElement(I);
+
+    if (!COp || !isa<ConstantInt>(COp))
+      return nullptr;
+
+    Indexes[I] = cast<ConstantInt>(COp)->getLimitedValue();
+
+    // Make sure the mask indices are in range.
+    if ((unsigned)Indexes[I] >= NumElts)
+      return nullptr;
+  }
+
+  auto *V1 = II.getArgOperand(0);
+  auto *V2 = Constant::getNullValue(V1->getType());
+  Value *Shuf = IC.Builder.CreateShuffleVector(V1, V2, ArrayRef(Indexes));
+  return IC.replaceInstUsesWith(II, Shuf);
+}
+
+/// Simplify NEON multiply-long intrinsics (smull, umull).
+/// These intrinsics perform widening multiplies: they multiply two vectors of
+/// narrow integers and produce a vector of wider integers. This function
+/// performs algebraic simplifications:
+/// 1. Multiply by zero => zero vector
+/// 2. Multiply by one => zero/sign-extend the non-one operand
+/// 3. Both operands constant => regular multiply that can be constant-folded
+///    later
+Instruction *simplifyNeonMultiply(IntrinsicInst &II, InstCombiner &IC,
+                                  bool IsSigned) {
+  Value *Arg0 = II.getArgOperand(0);
+  Value *Arg1 = II.getArgOperand(1);
+
+  // Handle mul by zero first:
+  if (isa<ConstantAggregateZero>(Arg0) || isa<ConstantAggregateZero>(Arg1)) {
+    return IC.replaceInstUsesWith(II, ConstantAggregateZero::get(II.getType()));
+  }
+
+  // Check for constant LHS & RHS - in this case we just simplify.
+  VectorType *NewVT = cast<VectorType>(II.getType());
+  if (Constant *CV0 = dyn_cast<Constant>(Arg0)) {
+    if (Constant *CV1 = dyn_cast<Constant>(Arg1)) {
+      Value *V0 = IC.Builder.CreateIntCast(CV0, NewVT, IsSigned);
+      Value *V1 = IC.Builder.CreateIntCast(CV1, NewVT, IsSigned);
+      return IC.replaceInstUsesWith(II, IC.Builder.CreateMul(V0, V1));
+    }
+
+    // Couldn't simplify - canonicalize constant to the RHS.
+    std::swap(Arg0, Arg1);
+  }
+
+  // Handle mul by one:
+  if (Constant *CV1 = dyn_cast<Constant>(Arg1))
+    if (ConstantInt *Splat =
+            dyn_cast_or_null<ConstantInt>(CV1->getSplatValue()))
+      if (Splat->isOne())
+        return CastInst::CreateIntegerCast(Arg0, II.getType(), IsSigned);
+
+  return nullptr;
+}
+
+/// Simplify AES encryption/decryption intrinsics (AESE, AESD).
+///
+/// ARM's AES instructions (AESE/AESD) XOR the data and the key, provided as
+/// separate arguments, before performing the encryption/decryption operation.
+/// We can fold that "internal" XOR with a previous one.
+Instruction *simplifyAES(IntrinsicInst &II, InstCombiner &IC) {
+  Value *DataArg = II.getArgOperand(0);
+  Value *KeyArg = II.getArgOperand(1);
+
+  // Accept zero on either operand.
+  if (!match(KeyArg, m_ZeroInt()))
+    std::swap(KeyArg, DataArg);
+
+  // Try to use the builtin XOR in AESE and AESD to eliminate a prior XOR
+  Value *Data, *Key;
+  if (match(KeyArg, m_ZeroInt()) &&
+      match(DataArg, m_Xor(m_Value(Data), m_Value(Key)))) {
+    IC.replaceOperand(II, 0, Data);
+    IC.replaceOperand(II, 1, Key);
+    return &II;
+  }
+
+  return nullptr;
+}
+
+} // namespace ARMCommon
+} // namespace llvm

diff  --git a/llvm/lib/Transforms/Utils/CMakeLists.txt b/llvm/lib/Transforms/Utils/CMakeLists.txt
index f367ca2fdf56b..a1c25fd9ccd2b 100644
--- a/llvm/lib/Transforms/Utils/CMakeLists.txt
+++ b/llvm/lib/Transforms/Utils/CMakeLists.txt
@@ -1,6 +1,7 @@
 add_llvm_component_library(LLVMTransformUtils
   AddDiscriminators.cpp
   AMDGPUEmitPrintf.cpp
+  ARMCommonInstCombineIntrinsic.cpp
   ASanStackFrameLayout.cpp
   AssumeBundleBuilder.cpp
   BasicBlockUtils.cpp

diff  --git a/llvm/test/Transforms/InstCombine/AArch64/aes-intrinsics.ll b/llvm/test/Transforms/InstCombine/AArch64/aes-intrinsics.ll
index 8c69d0721b738..fdc628bb59cb0 100644
--- a/llvm/test/Transforms/InstCombine/AArch64/aes-intrinsics.ll
+++ b/llvm/test/Transforms/InstCombine/AArch64/aes-intrinsics.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
-; RUN: opt -S -passes=instcombine < %s | FileCheck %s
+; RUN: opt --mtriple=aarch64 -S -passes=instcombine < %s | FileCheck %s
 ; ARM64 AES intrinsic variants
 
 define <16 x i8> @combineXorAeseZeroARM64(<16 x i8> %data, <16 x i8> %key) {

diff  --git a/llvm/test/Transforms/InstCombine/ARM/2012-04-23-Neon-Intrinsics.ll b/llvm/test/Transforms/InstCombine/ARM/2012-04-23-Neon-Intrinsics.ll
index 5fc5709ff8897..9ba4b418cb8e5 100644
--- a/llvm/test/Transforms/InstCombine/ARM/2012-04-23-Neon-Intrinsics.ll
+++ b/llvm/test/Transforms/InstCombine/ARM/2012-04-23-Neon-Intrinsics.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
-; RUN: opt -S -passes=instcombine < %s | FileCheck %s
+; RUN: opt -mtriple=arm -S -passes=instcombine < %s | FileCheck %s
 
 define <4 x i32> @mulByZero(<4 x i16> %x) nounwind readnone ssp {
 ; CHECK-LABEL: define <4 x i32> @mulByZero(

diff  --git a/llvm/test/Transforms/InstCombine/ARM/aes-intrinsics.ll b/llvm/test/Transforms/InstCombine/ARM/aes-intrinsics.ll
index 0056d872ff9e3..10175096035ec 100644
--- a/llvm/test/Transforms/InstCombine/ARM/aes-intrinsics.ll
+++ b/llvm/test/Transforms/InstCombine/ARM/aes-intrinsics.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
-; RUN: opt -S -passes=instcombine < %s | FileCheck %s
+; RUN: opt -mtriple=arm -S -passes=instcombine < %s | FileCheck %s
 ; ARM AES intrinsic variants
 
 define <16 x i8> @combineXorAeseZeroARM(<16 x i8> %data, <16 x i8> %key) {

diff  --git a/llvm/utils/gn/secondary/llvm/lib/Transforms/Utils/BUILD.gn b/llvm/utils/gn/secondary/llvm/lib/Transforms/Utils/BUILD.gn
index 186d2ef96c19b..dae641537b43c 100644
--- a/llvm/utils/gn/secondary/llvm/lib/Transforms/Utils/BUILD.gn
+++ b/llvm/utils/gn/secondary/llvm/lib/Transforms/Utils/BUILD.gn
@@ -8,6 +8,7 @@ static_library("Utils") {
   ]
   sources = [
     "AMDGPUEmitPrintf.cpp",
+    "ARMCommonInstCombineIntrinsic.cpp",
     "ASanStackFrameLayout.cpp",
     "AddDiscriminators.cpp",
     "AssumeBundleBuilder.cpp",