[llvm] [AArch64][ARM] Move ARM-specific InstCombine transforms to new module (PR #169589)
via llvm-commits
llvm-commits at lists.llvm.org
Fri Nov 28 09:11:07 PST 2025
https://github.com/valadaptive updated https://github.com/llvm/llvm-project/pull/169589
>From 852e4d3e58620828649c5209b12cde86615d6597 Mon Sep 17 00:00:00 2001
From: valadaptive <valadaptive at protonmail.com>
Date: Tue, 25 Nov 2025 19:00:27 -0500
Subject: [PATCH 1/4] [AArch64][ARM] Move ARM-specific InstCombine transforms
to new module
---
.../AArch64/AArch64TargetTransformInfo.cpp | 15 ++
llvm/lib/Target/AArch64/CMakeLists.txt | 1 +
.../lib/Target/ARM/ARMTargetTransformInfo.cpp | 16 ++
llvm/lib/Target/ARM/CMakeLists.txt | 1 +
.../ARMCommonInstCombineIntrinsic.cpp | 137 ++++++++++++++++++
.../ARMCommon/ARMCommonInstCombineIntrinsic.h | 58 ++++++++
llvm/lib/Target/ARMCommon/CMakeLists.txt | 8 +
llvm/lib/Target/CMakeLists.txt | 5 +
.../InstCombine/InstCombineCalls.cpp | 104 -------------
.../InstCombine/AArch64/aes-intrinsics.ll | 2 +-
.../ARM/2012-04-23-Neon-Intrinsics.ll | 2 +-
.../InstCombine/ARM/aes-intrinsics.ll | 2 +-
llvm/test/Transforms/InstCombine/ARM/tbl1.ll | 2 +-
13 files changed, 245 insertions(+), 108 deletions(-)
create mode 100644 llvm/lib/Target/ARMCommon/ARMCommonInstCombineIntrinsic.cpp
create mode 100644 llvm/lib/Target/ARMCommon/ARMCommonInstCombineIntrinsic.h
create mode 100644 llvm/lib/Target/ARMCommon/CMakeLists.txt
diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
index 0bae00bafee3c..74be251668ed6 100644
--- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
@@ -7,6 +7,7 @@
//===----------------------------------------------------------------------===//
#include "AArch64TargetTransformInfo.h"
+#include "../ARMCommon/ARMCommonInstCombineIntrinsic.h"
#include "AArch64ExpandImm.h"
#include "AArch64PerfectShuffle.h"
#include "AArch64SMEAttributes.h"
@@ -2856,6 +2857,20 @@ AArch64TTIImpl::instCombineIntrinsic(InstCombiner &IC,
case Intrinsic::aarch64_neon_fmaxnm:
case Intrinsic::aarch64_neon_fminnm:
return instCombineMaxMinNM(IC, II);
+ case Intrinsic::aarch64_neon_tbl1:
+ if (Value *V = ARMCommon::simplifyNeonTbl1(II, IC.Builder))
+ return IC.replaceInstUsesWith(II, V);
+ break;
+ case Intrinsic::aarch64_neon_smull:
+ case Intrinsic::aarch64_neon_umull: {
+ bool Zext = IID == Intrinsic::aarch64_neon_umull;
+ return ARMCommon::simplifyNeonMultiply(II, IC, Zext);
+ }
+ case Intrinsic::aarch64_crypto_aesd:
+ case Intrinsic::aarch64_crypto_aese:
+ case Intrinsic::aarch64_sve_aesd:
+ case Intrinsic::aarch64_sve_aese:
+ return ARMCommon::simplifyAES(II, IC);
case Intrinsic::aarch64_sve_convert_from_svbool:
return instCombineConvertFromSVBool(IC, II);
case Intrinsic::aarch64_sve_dup:
diff --git a/llvm/lib/Target/AArch64/CMakeLists.txt b/llvm/lib/Target/AArch64/CMakeLists.txt
index 285d646293eb7..d27a698ee9e4a 100644
--- a/llvm/lib/Target/AArch64/CMakeLists.txt
+++ b/llvm/lib/Target/AArch64/CMakeLists.txt
@@ -101,6 +101,7 @@ add_llvm_target(AArch64CodeGen
AArch64Desc
AArch64Info
AArch64Utils
+ ARMCommon
Analysis
AsmPrinter
CFGuard
diff --git a/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp b/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp
index fdb0ec40cb41f..bff93aa804112 100644
--- a/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp
+++ b/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp
@@ -7,6 +7,7 @@
//===----------------------------------------------------------------------===//
#include "ARMTargetTransformInfo.h"
+#include "../ARMCommon/ARMCommonInstCombineIntrinsic.h"
#include "ARMSubtarget.h"
#include "MCTargetDesc/ARMAddressingModes.h"
#include "llvm/ADT/APInt.h"
@@ -182,6 +183,21 @@ ARMTTIImpl::instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const {
break;
}
+ case Intrinsic::arm_neon_vtbl1:
+ if (Value *V = ARMCommon::simplifyNeonTbl1(II, IC.Builder))
+ return IC.replaceInstUsesWith(II, V);
+ break;
+
+ case Intrinsic::arm_neon_vmulls:
+ case Intrinsic::arm_neon_vmullu: {
+ bool Zext = IID == Intrinsic::arm_neon_vmullu;
+ return ARMCommon::simplifyNeonMultiply(II, IC, Zext);
+ }
+
+ case Intrinsic::arm_neon_aesd:
+ case Intrinsic::arm_neon_aese:
+ return ARMCommon::simplifyAES(II, IC);
+
case Intrinsic::arm_mve_pred_i2v: {
Value *Arg = II.getArgOperand(0);
Value *ArgArg;
diff --git a/llvm/lib/Target/ARM/CMakeLists.txt b/llvm/lib/Target/ARM/CMakeLists.txt
index eb3ad01a54fb2..9fc9bc134e5cc 100644
--- a/llvm/lib/Target/ARM/CMakeLists.txt
+++ b/llvm/lib/Target/ARM/CMakeLists.txt
@@ -73,6 +73,7 @@ add_llvm_target(ARMCodeGen
Thumb2SizeReduction.cpp
LINK_COMPONENTS
+ ARMCommon
ARMDesc
ARMInfo
ARMUtils
diff --git a/llvm/lib/Target/ARMCommon/ARMCommonInstCombineIntrinsic.cpp b/llvm/lib/Target/ARMCommon/ARMCommonInstCombineIntrinsic.cpp
new file mode 100644
index 0000000000000..9b8c1f4747dc1
--- /dev/null
+++ b/llvm/lib/Target/ARMCommon/ARMCommonInstCombineIntrinsic.cpp
@@ -0,0 +1,137 @@
+//===- ARMCommonInstCombineIntrinsic.cpp -
+// instCombineIntrinsic opts for both ARM and AArch64 ---===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// This file contains optimizations for ARM and AArch64 intrinsics that
+/// are shared between both architectures. These functions can be called from:
+/// - ARM TTI's instCombineIntrinsic (for arm_neon_* intrinsics)
+/// - AArch64 TTI's instCombineIntrinsic (for aarch64_neon_* and aarch64_sve_*
+/// intrinsics)
+///
+//===----------------------------------------------------------------------===//
+
+#include "ARMCommonInstCombineIntrinsic.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Value.h"
+#include "llvm/Transforms/InstCombine/InstCombiner.h"
+
+using namespace llvm;
+using namespace llvm::PatternMatch;
+
+namespace llvm {
+namespace ARMCommon {
+
+/// Convert a table lookup to shufflevector if the mask is constant.
+/// This could benefit tbl1 if the mask is { 7,6,5,4,3,2,1,0 }, in
+/// which case we could lower the shufflevector with rev64 instructions
+/// as it's actually a byte reverse.
+Value *simplifyNeonTbl1(const IntrinsicInst &II,
+ InstCombiner::BuilderTy &Builder) {
+ // Bail out if the mask is not a constant.
+ auto *C = dyn_cast<Constant>(II.getArgOperand(1));
+ if (!C)
+ return nullptr;
+
+ auto *VecTy = cast<FixedVectorType>(II.getType());
+ unsigned NumElts = VecTy->getNumElements();
+
+ // Only perform this transformation for <8 x i8> vector types.
+ if (!VecTy->getElementType()->isIntegerTy(8) || NumElts != 8)
+ return nullptr;
+
+ int Indexes[8];
+
+ for (unsigned I = 0; I < NumElts; ++I) {
+ Constant *COp = C->getAggregateElement(I);
+
+ if (!COp || !isa<ConstantInt>(COp))
+ return nullptr;
+
+ Indexes[I] = cast<ConstantInt>(COp)->getLimitedValue();
+
+ // Make sure the mask indices are in range.
+ if ((unsigned)Indexes[I] >= NumElts)
+ return nullptr;
+ }
+
+ auto *V1 = II.getArgOperand(0);
+ auto *V2 = Constant::getNullValue(V1->getType());
+ return Builder.CreateShuffleVector(V1, V2, ArrayRef(Indexes));
+}
+
+/// Simplify NEON multiply-long intrinsics (smull, umull).
+/// These intrinsics perform widening multiplies: they multiply two vectors of
+/// narrow integers and produce a vector of wider integers. This function
+/// performs algebraic simplifications:
+/// 1. Multiply by zero => zero vector
+/// 2. Multiply by one => zero/sign-extend the non-one operand
+/// 3. Both operands constant => regular multiply that can be constant-folded
+/// later
+Instruction *simplifyNeonMultiply(IntrinsicInst &II, InstCombiner &IC,
+ bool Zext) {
+ Value *Arg0 = II.getArgOperand(0);
+ Value *Arg1 = II.getArgOperand(1);
+
+ // Handle mul by zero first:
+ if (isa<ConstantAggregateZero>(Arg0) || isa<ConstantAggregateZero>(Arg1)) {
+ return IC.replaceInstUsesWith(II, ConstantAggregateZero::get(II.getType()));
+ }
+
+ // Check for constant LHS & RHS - in this case we just simplify.
+ VectorType *NewVT = cast<VectorType>(II.getType());
+ if (Constant *CV0 = dyn_cast<Constant>(Arg0)) {
+ if (Constant *CV1 = dyn_cast<Constant>(Arg1)) {
+ Value *V0 = IC.Builder.CreateIntCast(CV0, NewVT, /*isSigned=*/!Zext);
+ Value *V1 = IC.Builder.CreateIntCast(CV1, NewVT, /*isSigned=*/!Zext);
+ return IC.replaceInstUsesWith(II, IC.Builder.CreateMul(V0, V1));
+ }
+
+ // Couldn't simplify - canonicalize constant to the RHS.
+ std::swap(Arg0, Arg1);
+ }
+
+ // Handle mul by one:
+ if (Constant *CV1 = dyn_cast<Constant>(Arg1))
+ if (ConstantInt *Splat =
+ dyn_cast_or_null<ConstantInt>(CV1->getSplatValue()))
+ if (Splat->isOne())
+ return CastInst::CreateIntegerCast(Arg0, II.getType(),
+ /*isSigned=*/!Zext);
+
+ return nullptr;
+}
+
+/// Simplify AES encryption/decryption intrinsics (AESE, AESD).
+///
+/// ARM's AES instructions (AESE/AESD) XOR the data and the key, provided as
+/// separate arguments, before performing the encryption/decryption operation.
+/// We can fold that "internal" XOR with a previous one.
+Instruction *simplifyAES(IntrinsicInst &II, InstCombiner &IC) {
+ Value *DataArg = II.getArgOperand(0);
+ Value *KeyArg = II.getArgOperand(1);
+
+ // Accept zero on either operand.
+ if (!match(KeyArg, m_ZeroInt()))
+ std::swap(KeyArg, DataArg);
+
+ // Try to use the builtin XOR in AESE and AESD to eliminate a prior XOR
+ Value *Data, *Key;
+ if (match(KeyArg, m_ZeroInt()) &&
+ match(DataArg, m_Xor(m_Value(Data), m_Value(Key)))) {
+ IC.replaceOperand(II, 0, Data);
+ IC.replaceOperand(II, 1, Key);
+ return &II;
+ }
+
+ return nullptr;
+}
+
+} // namespace ARMCommon
+} // namespace llvm
diff --git a/llvm/lib/Target/ARMCommon/ARMCommonInstCombineIntrinsic.h b/llvm/lib/Target/ARMCommon/ARMCommonInstCombineIntrinsic.h
new file mode 100644
index 0000000000000..967532b50c7c2
--- /dev/null
+++ b/llvm/lib/Target/ARMCommon/ARMCommonInstCombineIntrinsic.h
@@ -0,0 +1,58 @@
+//===- ARMCommonInstCombineIntrinsic.h -
+// instCombineIntrinsic opts for both ARM and AArch64 -----------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// This file contains optimizations for ARM and AArch64 intrinsics that
+/// are shared between both architectures. These functions can be called from:
+/// - ARM TTI's instCombineIntrinsic (for arm_neon_* intrinsics)
+/// - AArch64 TTI's instCombineIntrinsic (for aarch64_neon_* and aarch64_sve_*
+/// intrinsics)
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_ARMCOMMON_ARMCOMMONINSTCOMBINEINTRINSIC_H
+#define LLVM_LIB_TARGET_ARMCOMMON_ARMCOMMONINSTCOMBINEINTRINSIC_H
+
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Value.h"
+#include "llvm/Transforms/InstCombine/InstCombiner.h"
+
+namespace llvm {
+
+namespace ARMCommon {
+
+/// Convert a table lookup to shufflevector if the mask is constant.
+/// This could benefit tbl1 if the mask is { 7,6,5,4,3,2,1,0 }, in
+/// which case we could lower the shufflevector with rev64 instructions
+/// as it's actually a byte reverse.
+Value *simplifyNeonTbl1(const IntrinsicInst &II,
+ InstCombiner::BuilderTy &Builder);
+
+/// Simplify NEON multiply-long intrinsics (smull, umull).
+/// These intrinsics perform widening multiplies: they multiply two vectors of
+/// narrow integers and produce a vector of wider integers. This function
+/// performs algebraic simplifications:
+/// 1. Multiply by zero => zero vector
+/// 2. Multiply by one => zero/sign-extend the non-one operand
+/// 3. Both operands constant => regular multiply that can be constant-folded
+/// later
+Instruction *simplifyNeonMultiply(IntrinsicInst &II, InstCombiner &IC,
+ bool Zext);
+
+/// Simplify AES encryption/decryption intrinsics (AESE, AESD).
+///
+/// ARM's AES instructions (AESE/AESD) XOR the data and the key, provided as
+/// separate arguments, before performing the encryption/decryption operation.
+/// We can fold that "internal" XOR with a previous one.
+Instruction *simplifyAES(IntrinsicInst &II, InstCombiner &IC);
+
+} // namespace ARMCommon
+} // namespace llvm
+
+#endif // LLVM_LIB_TARGET_ARMCOMMON_ARMCOMMONINSTCOMBINEINTRINSIC_H
diff --git a/llvm/lib/Target/ARMCommon/CMakeLists.txt b/llvm/lib/Target/ARMCommon/CMakeLists.txt
new file mode 100644
index 0000000000000..1805a5df2f053
--- /dev/null
+++ b/llvm/lib/Target/ARMCommon/CMakeLists.txt
@@ -0,0 +1,8 @@
+add_llvm_component_library(LLVMARMCommon
+ ARMCommonInstCombineIntrinsic.cpp
+
+ LINK_COMPONENTS
+ Core
+ Support
+ TransformUtils
+ )
diff --git a/llvm/lib/Target/CMakeLists.txt b/llvm/lib/Target/CMakeLists.txt
index bcc13f942bf96..e3528014a4be2 100644
--- a/llvm/lib/Target/CMakeLists.txt
+++ b/llvm/lib/Target/CMakeLists.txt
@@ -31,6 +31,11 @@ if (NOT BUILD_SHARED_LIBS AND NOT APPLE AND
set(CMAKE_CXX_VISIBILITY_PRESET hidden)
endif()
+# Add shared ARM/AArch64 utilities if either target is being built
+if("ARM" IN_LIST LLVM_TARGETS_TO_BUILD OR "AArch64" IN_LIST LLVM_TARGETS_TO_BUILD)
+ add_subdirectory(ARMCommon)
+endif()
+
foreach(t ${LLVM_TARGETS_TO_BUILD})
message(STATUS "Targeting ${t}")
add_subdirectory(${t})
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
index 8e4edefec42fd..8a54c0dde6be6 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -737,44 +737,6 @@ static Instruction *foldCtpop(IntrinsicInst &II, InstCombinerImpl &IC) {
return nullptr;
}
-/// Convert a table lookup to shufflevector if the mask is constant.
-/// This could benefit tbl1 if the mask is { 7,6,5,4,3,2,1,0 }, in
-/// which case we could lower the shufflevector with rev64 instructions
-/// as it's actually a byte reverse.
-static Value *simplifyNeonTbl1(const IntrinsicInst &II,
- InstCombiner::BuilderTy &Builder) {
- // Bail out if the mask is not a constant.
- auto *C = dyn_cast<Constant>(II.getArgOperand(1));
- if (!C)
- return nullptr;
-
- auto *VecTy = cast<FixedVectorType>(II.getType());
- unsigned NumElts = VecTy->getNumElements();
-
- // Only perform this transformation for <8 x i8> vector types.
- if (!VecTy->getElementType()->isIntegerTy(8) || NumElts != 8)
- return nullptr;
-
- int Indexes[8];
-
- for (unsigned I = 0; I < NumElts; ++I) {
- Constant *COp = C->getAggregateElement(I);
-
- if (!COp || !isa<ConstantInt>(COp))
- return nullptr;
-
- Indexes[I] = cast<ConstantInt>(COp)->getLimitedValue();
-
- // Make sure the mask indices are in range.
- if ((unsigned)Indexes[I] >= NumElts)
- return nullptr;
- }
-
- auto *V1 = II.getArgOperand(0);
- auto *V2 = Constant::getNullValue(V1->getType());
- return Builder.CreateShuffleVector(V1, V2, ArrayRef(Indexes));
-}
-
// Returns true iff the 2 intrinsics have the same operands, limiting the
// comparison to the first NumOperands.
static bool haveSameOperands(const IntrinsicInst &I, const IntrinsicInst &E,
@@ -3155,72 +3117,6 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) {
Intrinsic::getOrInsertDeclaration(II->getModule(), NewIntrin);
return CallInst::Create(NewFn, CallArgs);
}
- case Intrinsic::arm_neon_vtbl1:
- case Intrinsic::aarch64_neon_tbl1:
- if (Value *V = simplifyNeonTbl1(*II, Builder))
- return replaceInstUsesWith(*II, V);
- break;
-
- case Intrinsic::arm_neon_vmulls:
- case Intrinsic::arm_neon_vmullu:
- case Intrinsic::aarch64_neon_smull:
- case Intrinsic::aarch64_neon_umull: {
- Value *Arg0 = II->getArgOperand(0);
- Value *Arg1 = II->getArgOperand(1);
-
- // Handle mul by zero first:
- if (isa<ConstantAggregateZero>(Arg0) || isa<ConstantAggregateZero>(Arg1)) {
- return replaceInstUsesWith(CI, ConstantAggregateZero::get(II->getType()));
- }
-
- // Check for constant LHS & RHS - in this case we just simplify.
- bool Zext = (IID == Intrinsic::arm_neon_vmullu ||
- IID == Intrinsic::aarch64_neon_umull);
- VectorType *NewVT = cast<VectorType>(II->getType());
- if (Constant *CV0 = dyn_cast<Constant>(Arg0)) {
- if (Constant *CV1 = dyn_cast<Constant>(Arg1)) {
- Value *V0 = Builder.CreateIntCast(CV0, NewVT, /*isSigned=*/!Zext);
- Value *V1 = Builder.CreateIntCast(CV1, NewVT, /*isSigned=*/!Zext);
- return replaceInstUsesWith(CI, Builder.CreateMul(V0, V1));
- }
-
- // Couldn't simplify - canonicalize constant to the RHS.
- std::swap(Arg0, Arg1);
- }
-
- // Handle mul by one:
- if (Constant *CV1 = dyn_cast<Constant>(Arg1))
- if (ConstantInt *Splat =
- dyn_cast_or_null<ConstantInt>(CV1->getSplatValue()))
- if (Splat->isOne())
- return CastInst::CreateIntegerCast(Arg0, II->getType(),
- /*isSigned=*/!Zext);
-
- break;
- }
- case Intrinsic::arm_neon_aesd:
- case Intrinsic::arm_neon_aese:
- case Intrinsic::aarch64_crypto_aesd:
- case Intrinsic::aarch64_crypto_aese:
- case Intrinsic::aarch64_sve_aesd:
- case Intrinsic::aarch64_sve_aese: {
- Value *DataArg = II->getArgOperand(0);
- Value *KeyArg = II->getArgOperand(1);
-
- // Accept zero on either operand.
- if (!match(KeyArg, m_ZeroInt()))
- std::swap(KeyArg, DataArg);
-
- // Try to use the builtin XOR in AESE and AESD to eliminate a prior XOR
- Value *Data, *Key;
- if (match(KeyArg, m_ZeroInt()) &&
- match(DataArg, m_Xor(m_Value(Data), m_Value(Key)))) {
- replaceOperand(*II, 0, Data);
- replaceOperand(*II, 1, Key);
- return II;
- }
- break;
- }
case Intrinsic::hexagon_V6_vandvrt:
case Intrinsic::hexagon_V6_vandvrt_128B: {
// Simplify Q -> V -> Q conversion.
diff --git a/llvm/test/Transforms/InstCombine/AArch64/aes-intrinsics.ll b/llvm/test/Transforms/InstCombine/AArch64/aes-intrinsics.ll
index 8c69d0721b738..fdc628bb59cb0 100644
--- a/llvm/test/Transforms/InstCombine/AArch64/aes-intrinsics.ll
+++ b/llvm/test/Transforms/InstCombine/AArch64/aes-intrinsics.ll
@@ -1,5 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
-; RUN: opt -S -passes=instcombine < %s | FileCheck %s
+; RUN: opt --mtriple=aarch64 -S -passes=instcombine < %s | FileCheck %s
; ARM64 AES intrinsic variants
define <16 x i8> @combineXorAeseZeroARM64(<16 x i8> %data, <16 x i8> %key) {
diff --git a/llvm/test/Transforms/InstCombine/ARM/2012-04-23-Neon-Intrinsics.ll b/llvm/test/Transforms/InstCombine/ARM/2012-04-23-Neon-Intrinsics.ll
index 5fc5709ff8897..9ba4b418cb8e5 100644
--- a/llvm/test/Transforms/InstCombine/ARM/2012-04-23-Neon-Intrinsics.ll
+++ b/llvm/test/Transforms/InstCombine/ARM/2012-04-23-Neon-Intrinsics.ll
@@ -1,5 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
-; RUN: opt -S -passes=instcombine < %s | FileCheck %s
+; RUN: opt -mtriple=arm -S -passes=instcombine < %s | FileCheck %s
define <4 x i32> @mulByZero(<4 x i16> %x) nounwind readnone ssp {
; CHECK-LABEL: define <4 x i32> @mulByZero(
diff --git a/llvm/test/Transforms/InstCombine/ARM/aes-intrinsics.ll b/llvm/test/Transforms/InstCombine/ARM/aes-intrinsics.ll
index 0056d872ff9e3..10175096035ec 100644
--- a/llvm/test/Transforms/InstCombine/ARM/aes-intrinsics.ll
+++ b/llvm/test/Transforms/InstCombine/ARM/aes-intrinsics.ll
@@ -1,5 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
-; RUN: opt -S -passes=instcombine < %s | FileCheck %s
+; RUN: opt -mtriple=arm -S -passes=instcombine < %s | FileCheck %s
; ARM AES intrinsic variants
define <16 x i8> @combineXorAeseZeroARM(<16 x i8> %data, <16 x i8> %key) {
diff --git a/llvm/test/Transforms/InstCombine/ARM/tbl1.ll b/llvm/test/Transforms/InstCombine/ARM/tbl1.ll
index fbec1a2bb7a07..ceeac8648ec51 100644
--- a/llvm/test/Transforms/InstCombine/ARM/tbl1.ll
+++ b/llvm/test/Transforms/InstCombine/ARM/tbl1.ll
@@ -1,5 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt < %s -passes=instcombine -S | FileCheck %s
+; RUN: opt < %s -mtriple=arm -passes=instcombine -S | FileCheck %s
target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64"
target triple = "armv8-arm-none-eabi"
>From 234164f31e24d5194e498d9fbbb0cc6981d760d4 Mon Sep 17 00:00:00 2001
From: valadaptive <valadaptive at protonmail.com>
Date: Tue, 25 Nov 2025 19:06:37 -0500
Subject: [PATCH 2/4] [AArch64][ARM] !Zext -> IsSigned
---
llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp | 4 ++--
llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp | 4 ++--
.../Target/ARMCommon/ARMCommonInstCombineIntrinsic.cpp | 9 ++++-----
.../lib/Target/ARMCommon/ARMCommonInstCombineIntrinsic.h | 2 +-
4 files changed, 9 insertions(+), 10 deletions(-)
diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
index 74be251668ed6..4958b44836db8 100644
--- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
@@ -2863,8 +2863,8 @@ AArch64TTIImpl::instCombineIntrinsic(InstCombiner &IC,
break;
case Intrinsic::aarch64_neon_smull:
case Intrinsic::aarch64_neon_umull: {
- bool Zext = IID == Intrinsic::aarch64_neon_umull;
- return ARMCommon::simplifyNeonMultiply(II, IC, Zext);
+ bool IsSigned = IID == Intrinsic::aarch64_neon_smull;
+ return ARMCommon::simplifyNeonMultiply(II, IC, IsSigned);
}
case Intrinsic::aarch64_crypto_aesd:
case Intrinsic::aarch64_crypto_aese:
diff --git a/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp b/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp
index bff93aa804112..63541872f74de 100644
--- a/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp
+++ b/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp
@@ -190,8 +190,8 @@ ARMTTIImpl::instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const {
case Intrinsic::arm_neon_vmulls:
case Intrinsic::arm_neon_vmullu: {
- bool Zext = IID == Intrinsic::arm_neon_vmullu;
- return ARMCommon::simplifyNeonMultiply(II, IC, Zext);
+ bool IsSigned = IID == Intrinsic::arm_neon_vmulls;
+ return ARMCommon::simplifyNeonMultiply(II, IC, IsSigned);
}
case Intrinsic::arm_neon_aesd:
diff --git a/llvm/lib/Target/ARMCommon/ARMCommonInstCombineIntrinsic.cpp b/llvm/lib/Target/ARMCommon/ARMCommonInstCombineIntrinsic.cpp
index 9b8c1f4747dc1..9ce82e66334b7 100644
--- a/llvm/lib/Target/ARMCommon/ARMCommonInstCombineIntrinsic.cpp
+++ b/llvm/lib/Target/ARMCommon/ARMCommonInstCombineIntrinsic.cpp
@@ -75,7 +75,7 @@ Value *simplifyNeonTbl1(const IntrinsicInst &II,
/// 3. Both operands constant => regular multiply that can be constant-folded
/// later
Instruction *simplifyNeonMultiply(IntrinsicInst &II, InstCombiner &IC,
- bool Zext) {
+ bool IsSigned) {
Value *Arg0 = II.getArgOperand(0);
Value *Arg1 = II.getArgOperand(1);
@@ -88,8 +88,8 @@ Instruction *simplifyNeonMultiply(IntrinsicInst &II, InstCombiner &IC,
VectorType *NewVT = cast<VectorType>(II.getType());
if (Constant *CV0 = dyn_cast<Constant>(Arg0)) {
if (Constant *CV1 = dyn_cast<Constant>(Arg1)) {
- Value *V0 = IC.Builder.CreateIntCast(CV0, NewVT, /*isSigned=*/!Zext);
- Value *V1 = IC.Builder.CreateIntCast(CV1, NewVT, /*isSigned=*/!Zext);
+ Value *V0 = IC.Builder.CreateIntCast(CV0, NewVT, IsSigned);
+ Value *V1 = IC.Builder.CreateIntCast(CV1, NewVT, IsSigned);
return IC.replaceInstUsesWith(II, IC.Builder.CreateMul(V0, V1));
}
@@ -102,8 +102,7 @@ Instruction *simplifyNeonMultiply(IntrinsicInst &II, InstCombiner &IC,
if (ConstantInt *Splat =
dyn_cast_or_null<ConstantInt>(CV1->getSplatValue()))
if (Splat->isOne())
- return CastInst::CreateIntegerCast(Arg0, II.getType(),
- /*isSigned=*/!Zext);
+ return CastInst::CreateIntegerCast(Arg0, II.getType(), IsSigned);
return nullptr;
}
diff --git a/llvm/lib/Target/ARMCommon/ARMCommonInstCombineIntrinsic.h b/llvm/lib/Target/ARMCommon/ARMCommonInstCombineIntrinsic.h
index 967532b50c7c2..f9d43e1a7becb 100644
--- a/llvm/lib/Target/ARMCommon/ARMCommonInstCombineIntrinsic.h
+++ b/llvm/lib/Target/ARMCommon/ARMCommonInstCombineIntrinsic.h
@@ -43,7 +43,7 @@ Value *simplifyNeonTbl1(const IntrinsicInst &II,
/// 3. Both operands constant => regular multiply that can be constant-folded
/// later
Instruction *simplifyNeonMultiply(IntrinsicInst &II, InstCombiner &IC,
- bool Zext);
+ bool IsSigned);
/// Simplify AES encryption/decryption intrinsics (AESE, AESD).
///
>From e53963fb1221a442e4c7c2bda269f06a50b17c89 Mon Sep 17 00:00:00 2001
From: valadaptive <valadaptive at protonmail.com>
Date: Tue, 25 Nov 2025 19:43:43 -0500
Subject: [PATCH 3/4] [AArch64][ARM] Make simplifyNeonTbl1 behave like the
other transforms
---
llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp | 4 +---
llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp | 4 +---
llvm/lib/Target/ARMCommon/ARMCommonInstCombineIntrinsic.cpp | 6 +++---
llvm/lib/Target/ARMCommon/ARMCommonInstCombineIntrinsic.h | 3 +--
4 files changed, 6 insertions(+), 11 deletions(-)
diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
index 4958b44836db8..8acad13c494d4 100644
--- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
@@ -2858,9 +2858,7 @@ AArch64TTIImpl::instCombineIntrinsic(InstCombiner &IC,
case Intrinsic::aarch64_neon_fminnm:
return instCombineMaxMinNM(IC, II);
case Intrinsic::aarch64_neon_tbl1:
- if (Value *V = ARMCommon::simplifyNeonTbl1(II, IC.Builder))
- return IC.replaceInstUsesWith(II, V);
- break;
+ return ARMCommon::simplifyNeonTbl1(II, IC);
case Intrinsic::aarch64_neon_smull:
case Intrinsic::aarch64_neon_umull: {
bool IsSigned = IID == Intrinsic::aarch64_neon_smull;
diff --git a/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp b/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp
index 63541872f74de..3dd3ae65321f8 100644
--- a/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp
+++ b/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp
@@ -184,9 +184,7 @@ ARMTTIImpl::instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const {
}
case Intrinsic::arm_neon_vtbl1:
- if (Value *V = ARMCommon::simplifyNeonTbl1(II, IC.Builder))
- return IC.replaceInstUsesWith(II, V);
- break;
+ return ARMCommon::simplifyNeonTbl1(II, IC);
case Intrinsic::arm_neon_vmulls:
case Intrinsic::arm_neon_vmullu: {
diff --git a/llvm/lib/Target/ARMCommon/ARMCommonInstCombineIntrinsic.cpp b/llvm/lib/Target/ARMCommon/ARMCommonInstCombineIntrinsic.cpp
index 9ce82e66334b7..6e5711a0272e3 100644
--- a/llvm/lib/Target/ARMCommon/ARMCommonInstCombineIntrinsic.cpp
+++ b/llvm/lib/Target/ARMCommon/ARMCommonInstCombineIntrinsic.cpp
@@ -32,8 +32,7 @@ namespace ARMCommon {
/// This could benefit tbl1 if the mask is { 7,6,5,4,3,2,1,0 }, in
/// which case we could lower the shufflevector with rev64 instructions
/// as it's actually a byte reverse.
-Value *simplifyNeonTbl1(const IntrinsicInst &II,
- InstCombiner::BuilderTy &Builder) {
+Instruction *simplifyNeonTbl1(IntrinsicInst &II, InstCombiner &IC) {
// Bail out if the mask is not a constant.
auto *C = dyn_cast<Constant>(II.getArgOperand(1));
if (!C)
@@ -63,7 +62,8 @@ Value *simplifyNeonTbl1(const IntrinsicInst &II,
auto *V1 = II.getArgOperand(0);
auto *V2 = Constant::getNullValue(V1->getType());
- return Builder.CreateShuffleVector(V1, V2, ArrayRef(Indexes));
+ Value *Shuf = IC.Builder.CreateShuffleVector(V1, V2, ArrayRef(Indexes));
+ return IC.replaceInstUsesWith(II, Shuf);
}
/// Simplify NEON multiply-long intrinsics (smull, umull).
diff --git a/llvm/lib/Target/ARMCommon/ARMCommonInstCombineIntrinsic.h b/llvm/lib/Target/ARMCommon/ARMCommonInstCombineIntrinsic.h
index f9d43e1a7becb..541fb6a57f558 100644
--- a/llvm/lib/Target/ARMCommon/ARMCommonInstCombineIntrinsic.h
+++ b/llvm/lib/Target/ARMCommon/ARMCommonInstCombineIntrinsic.h
@@ -31,8 +31,7 @@ namespace ARMCommon {
/// This could benefit tbl1 if the mask is { 7,6,5,4,3,2,1,0 }, in
/// which case we could lower the shufflevector with rev64 instructions
/// as it's actually a byte reverse.
-Value *simplifyNeonTbl1(const IntrinsicInst &II,
- InstCombiner::BuilderTy &Builder);
+Instruction *simplifyNeonTbl1(IntrinsicInst &II, InstCombiner &IC);
/// Simplify NEON multiply-long intrinsics (smull, umull).
/// These intrinsics perform widening multiplies: they multiply two vectors of
>From 40b6ac39a40afe9b487b1c75b64ff57671bee64a Mon Sep 17 00:00:00 2001
From: valadaptive <valadaptive at protonmail.com>
Date: Fri, 28 Nov 2025 12:10:53 -0500
Subject: [PATCH 4/4] [AArch64][ARM] Move the transforms to Transforms/Utils
---
.../Transforms/Utils}/ARMCommonInstCombineIntrinsic.h | 6 +++---
llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp | 2 +-
llvm/lib/Target/AArch64/CMakeLists.txt | 1 -
llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp | 2 +-
llvm/lib/Target/ARM/CMakeLists.txt | 1 -
llvm/lib/Target/ARMCommon/CMakeLists.txt | 8 --------
llvm/lib/Target/CMakeLists.txt | 5 -----
.../Utils}/ARMCommonInstCombineIntrinsic.cpp | 2 +-
llvm/lib/Transforms/Utils/CMakeLists.txt | 1 +
.../utils/gn/secondary/llvm/lib/Transforms/Utils/BUILD.gn | 1 +
10 files changed, 8 insertions(+), 21 deletions(-)
rename llvm/{lib/Target/ARMCommon => include/llvm/Transforms/Utils}/ARMCommonInstCombineIntrinsic.h (91%)
delete mode 100644 llvm/lib/Target/ARMCommon/CMakeLists.txt
rename llvm/lib/{Target/ARMCommon => Transforms/Utils}/ARMCommonInstCombineIntrinsic.cpp (98%)
diff --git a/llvm/lib/Target/ARMCommon/ARMCommonInstCombineIntrinsic.h b/llvm/include/llvm/Transforms/Utils/ARMCommonInstCombineIntrinsic.h
similarity index 91%
rename from llvm/lib/Target/ARMCommon/ARMCommonInstCombineIntrinsic.h
rename to llvm/include/llvm/Transforms/Utils/ARMCommonInstCombineIntrinsic.h
index 541fb6a57f558..9426dc8d16482 100644
--- a/llvm/lib/Target/ARMCommon/ARMCommonInstCombineIntrinsic.h
+++ b/llvm/include/llvm/Transforms/Utils/ARMCommonInstCombineIntrinsic.h
@@ -16,8 +16,8 @@
///
//===----------------------------------------------------------------------===//
-#ifndef LLVM_LIB_TARGET_ARMCOMMON_ARMCOMMONINSTCOMBINEINTRINSIC_H
-#define LLVM_LIB_TARGET_ARMCOMMON_ARMCOMMONINSTCOMBINEINTRINSIC_H
+#ifndef LLVM_TRANSFORMS_UTILS_ARMCOMMONINSTCOMBINEINTRINSIC_H
+#define LLVM_TRANSFORMS_UTILS_ARMCOMMONINSTCOMBINEINTRINSIC_H
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Value.h"
@@ -54,4 +54,4 @@ Instruction *simplifyAES(IntrinsicInst &II, InstCombiner &IC);
} // namespace ARMCommon
} // namespace llvm
-#endif // LLVM_LIB_TARGET_ARMCOMMON_ARMCOMMONINSTCOMBINEINTRINSIC_H
+#endif // LLVM_TRANSFORMS_UTILS_ARMCOMMONINSTCOMBINEINTRINSIC_H
diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
index 8acad13c494d4..0d0c0970091d6 100644
--- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
@@ -7,7 +7,6 @@
//===----------------------------------------------------------------------===//
#include "AArch64TargetTransformInfo.h"
-#include "../ARMCommon/ARMCommonInstCombineIntrinsic.h"
#include "AArch64ExpandImm.h"
#include "AArch64PerfectShuffle.h"
#include "AArch64SMEAttributes.h"
@@ -26,6 +25,7 @@
#include "llvm/Support/Debug.h"
#include "llvm/TargetParser/AArch64TargetParser.h"
#include "llvm/Transforms/InstCombine/InstCombiner.h"
+#include "llvm/Transforms/Utils/ARMCommonInstCombineIntrinsic.h"
#include "llvm/Transforms/Utils/UnrollLoop.h"
#include "llvm/Transforms/Vectorize/LoopVectorizationLegality.h"
#include <algorithm>
diff --git a/llvm/lib/Target/AArch64/CMakeLists.txt b/llvm/lib/Target/AArch64/CMakeLists.txt
index d27a698ee9e4a..285d646293eb7 100644
--- a/llvm/lib/Target/AArch64/CMakeLists.txt
+++ b/llvm/lib/Target/AArch64/CMakeLists.txt
@@ -101,7 +101,6 @@ add_llvm_target(AArch64CodeGen
AArch64Desc
AArch64Info
AArch64Utils
- ARMCommon
Analysis
AsmPrinter
CFGuard
diff --git a/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp b/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp
index 3dd3ae65321f8..c93b2fbf419fe 100644
--- a/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp
+++ b/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp
@@ -7,7 +7,6 @@
//===----------------------------------------------------------------------===//
#include "ARMTargetTransformInfo.h"
-#include "../ARMCommon/ARMCommonInstCombineIntrinsic.h"
#include "ARMSubtarget.h"
#include "MCTargetDesc/ARMAddressingModes.h"
#include "llvm/ADT/APInt.h"
@@ -32,6 +31,7 @@
#include "llvm/Target/TargetMachine.h"
#include "llvm/TargetParser/SubtargetFeature.h"
#include "llvm/Transforms/InstCombine/InstCombiner.h"
+#include "llvm/Transforms/Utils/ARMCommonInstCombineIntrinsic.h"
#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Transforms/Utils/LoopUtils.h"
#include "llvm/Transforms/Vectorize/LoopVectorizationLegality.h"
diff --git a/llvm/lib/Target/ARM/CMakeLists.txt b/llvm/lib/Target/ARM/CMakeLists.txt
index 9fc9bc134e5cc..eb3ad01a54fb2 100644
--- a/llvm/lib/Target/ARM/CMakeLists.txt
+++ b/llvm/lib/Target/ARM/CMakeLists.txt
@@ -73,7 +73,6 @@ add_llvm_target(ARMCodeGen
Thumb2SizeReduction.cpp
LINK_COMPONENTS
- ARMCommon
ARMDesc
ARMInfo
ARMUtils
diff --git a/llvm/lib/Target/ARMCommon/CMakeLists.txt b/llvm/lib/Target/ARMCommon/CMakeLists.txt
deleted file mode 100644
index 1805a5df2f053..0000000000000
--- a/llvm/lib/Target/ARMCommon/CMakeLists.txt
+++ /dev/null
@@ -1,8 +0,0 @@
-add_llvm_component_library(LLVMARMCommon
- ARMCommonInstCombineIntrinsic.cpp
-
- LINK_COMPONENTS
- Core
- Support
- TransformUtils
- )
diff --git a/llvm/lib/Target/CMakeLists.txt b/llvm/lib/Target/CMakeLists.txt
index e3528014a4be2..bcc13f942bf96 100644
--- a/llvm/lib/Target/CMakeLists.txt
+++ b/llvm/lib/Target/CMakeLists.txt
@@ -31,11 +31,6 @@ if (NOT BUILD_SHARED_LIBS AND NOT APPLE AND
set(CMAKE_CXX_VISIBILITY_PRESET hidden)
endif()
-# Add shared ARM/AArch64 utilities if either target is being built
-if("ARM" IN_LIST LLVM_TARGETS_TO_BUILD OR "AArch64" IN_LIST LLVM_TARGETS_TO_BUILD)
- add_subdirectory(ARMCommon)
-endif()
-
foreach(t ${LLVM_TARGETS_TO_BUILD})
message(STATUS "Targeting ${t}")
add_subdirectory(${t})
diff --git a/llvm/lib/Target/ARMCommon/ARMCommonInstCombineIntrinsic.cpp b/llvm/lib/Transforms/Utils/ARMCommonInstCombineIntrinsic.cpp
similarity index 98%
rename from llvm/lib/Target/ARMCommon/ARMCommonInstCombineIntrinsic.cpp
rename to llvm/lib/Transforms/Utils/ARMCommonInstCombineIntrinsic.cpp
index 6e5711a0272e3..fe192596b6f2b 100644
--- a/llvm/lib/Target/ARMCommon/ARMCommonInstCombineIntrinsic.cpp
+++ b/llvm/lib/Transforms/Utils/ARMCommonInstCombineIntrinsic.cpp
@@ -16,7 +16,7 @@
///
//===----------------------------------------------------------------------===//
-#include "ARMCommonInstCombineIntrinsic.h"
+#include "llvm/Transforms/Utils/ARMCommonInstCombineIntrinsic.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Value.h"
diff --git a/llvm/lib/Transforms/Utils/CMakeLists.txt b/llvm/lib/Transforms/Utils/CMakeLists.txt
index f367ca2fdf56b..a1c25fd9ccd2b 100644
--- a/llvm/lib/Transforms/Utils/CMakeLists.txt
+++ b/llvm/lib/Transforms/Utils/CMakeLists.txt
@@ -1,6 +1,7 @@
add_llvm_component_library(LLVMTransformUtils
AddDiscriminators.cpp
AMDGPUEmitPrintf.cpp
+ ARMCommonInstCombineIntrinsic.cpp
ASanStackFrameLayout.cpp
AssumeBundleBuilder.cpp
BasicBlockUtils.cpp
diff --git a/llvm/utils/gn/secondary/llvm/lib/Transforms/Utils/BUILD.gn b/llvm/utils/gn/secondary/llvm/lib/Transforms/Utils/BUILD.gn
index 186d2ef96c19b..dae641537b43c 100644
--- a/llvm/utils/gn/secondary/llvm/lib/Transforms/Utils/BUILD.gn
+++ b/llvm/utils/gn/secondary/llvm/lib/Transforms/Utils/BUILD.gn
@@ -8,6 +8,7 @@ static_library("Utils") {
]
sources = [
"AMDGPUEmitPrintf.cpp",
+ "ARMCommonInstCombineIntrinsic.cpp",
"ASanStackFrameLayout.cpp",
"AddDiscriminators.cpp",
"AssumeBundleBuilder.cpp",
More information about the llvm-commits
mailing list