[llvm] [AArch64][InstCombine] Combine AES instructions with zero operands. (PR #142781)
via llvm-commits
llvm-commits at lists.llvm.org
Wed Jun 4 07:35:33 PDT 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-llvm-transforms
Author: Ricardo Jesus (rj-jesus)
<details>
<summary>Changes</summary>
We currently combine (AES (EOR (A, B)), 0) into (AES A, B) for Neon intrinsics
when the zero operand appears in the RHS of the AES instruction.
This patch extends the combine to support AES SVE intrinsics and
the case where the zero operand appears in the LHS of the AES
instructions.
GCC has had such an optimisation for long: https://gcc.gnu.org/cgit/gcc/commit/?id=9b57fd3d96f312194b49fb4774dd2ce075ef5c17.
* Neon: https://godbolt.org/z/Gbhvq6bzd
* SVE: https://godbolt.org/z/7Kbox589Y
---
Full diff: https://github.com/llvm/llvm-project/pull/142781.diff
2 Files Affected:
- (modified) llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp (+7-1)
- (modified) llvm/test/Transforms/InstCombine/AArch64/aes-intrinsics.ll (+48)
``````````diff
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
index cfb4af391b540..c169ab25b2106 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -3076,10 +3076,16 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) {
case Intrinsic::arm_neon_aesd:
case Intrinsic::arm_neon_aese:
case Intrinsic::aarch64_crypto_aesd:
- case Intrinsic::aarch64_crypto_aese: {
+ case Intrinsic::aarch64_crypto_aese:
+ case Intrinsic::aarch64_sve_aesd:
+ case Intrinsic::aarch64_sve_aese: {
Value *DataArg = II->getArgOperand(0);
Value *KeyArg = II->getArgOperand(1);
+ // Accept zero on either operand.
+ if (!match(KeyArg, m_ZeroInt()))
+ std::swap(KeyArg, DataArg);
+
// Try to use the builtin XOR in AESE and AESD to eliminate a prior XOR
Value *Data, *Key;
if (match(KeyArg, m_ZeroInt()) &&
diff --git a/llvm/test/Transforms/InstCombine/AArch64/aes-intrinsics.ll b/llvm/test/Transforms/InstCombine/AArch64/aes-intrinsics.ll
index c6695f17b955b..ed3c566858c2c 100644
--- a/llvm/test/Transforms/InstCombine/AArch64/aes-intrinsics.ll
+++ b/llvm/test/Transforms/InstCombine/AArch64/aes-intrinsics.ll
@@ -13,6 +13,17 @@ define <16 x i8> @combineXorAeseZeroARM64(<16 x i8> %data, <16 x i8> %key) {
ret <16 x i8> %data.aes
}
+define <16 x i8> @combineXorAeseZeroLhsARM64(<16 x i8> %data, <16 x i8> %key) {
+; CHECK-LABEL: define <16 x i8> @combineXorAeseZeroLhsARM64(
+; CHECK-SAME: <16 x i8> [[DATA:%.*]], <16 x i8> [[KEY:%.*]]) {
+; CHECK-NEXT: [[DATA_AES:%.*]] = tail call <16 x i8> @llvm.aarch64.crypto.aese(<16 x i8> [[DATA]], <16 x i8> [[KEY]])
+; CHECK-NEXT: ret <16 x i8> [[DATA_AES]]
+;
+ %data.xor = xor <16 x i8> %data, %key
+ %data.aes = tail call <16 x i8> @llvm.aarch64.crypto.aese(<16 x i8> zeroinitializer, <16 x i8> %data.xor)
+ ret <16 x i8> %data.aes
+}
+
define <16 x i8> @combineXorAeseNonZeroARM64(<16 x i8> %data, <16 x i8> %key) {
; CHECK-LABEL: define <16 x i8> @combineXorAeseNonZeroARM64(
; CHECK-SAME: <16 x i8> [[DATA:%.*]], <16 x i8> [[KEY:%.*]]) {
@@ -36,6 +47,17 @@ define <16 x i8> @combineXorAesdZeroARM64(<16 x i8> %data, <16 x i8> %key) {
ret <16 x i8> %data.aes
}
+define <16 x i8> @combineXorAesdZeroLhsARM64(<16 x i8> %data, <16 x i8> %key) {
+; CHECK-LABEL: define <16 x i8> @combineXorAesdZeroLhsARM64(
+; CHECK-SAME: <16 x i8> [[DATA:%.*]], <16 x i8> [[KEY:%.*]]) {
+; CHECK-NEXT: [[DATA_AES:%.*]] = tail call <16 x i8> @llvm.aarch64.crypto.aesd(<16 x i8> [[DATA]], <16 x i8> [[KEY]])
+; CHECK-NEXT: ret <16 x i8> [[DATA_AES]]
+;
+ %data.xor = xor <16 x i8> %data, %key
+ %data.aes = tail call <16 x i8> @llvm.aarch64.crypto.aesd(<16 x i8> zeroinitializer, <16 x i8> %data.xor)
+ ret <16 x i8> %data.aes
+}
+
define <16 x i8> @combineXorAesdNonZeroARM64(<16 x i8> %data, <16 x i8> %key) {
; CHECK-LABEL: define <16 x i8> @combineXorAesdNonZeroARM64(
; CHECK-SAME: <16 x i8> [[DATA:%.*]], <16 x i8> [[KEY:%.*]]) {
@@ -51,3 +73,29 @@ define <16 x i8> @combineXorAesdNonZeroARM64(<16 x i8> %data, <16 x i8> %key) {
declare <16 x i8> @llvm.aarch64.crypto.aese(<16 x i8>, <16 x i8>) #0
declare <16 x i8> @llvm.aarch64.crypto.aesd(<16 x i8>, <16 x i8>) #0
+; SVE
+
+define <vscale x 16 x i8> @combineXorAeseZeroSVE(<vscale x 16 x i8> %data, <vscale x 16 x i8> %key) {
+; CHECK-LABEL: define <vscale x 16 x i8> @combineXorAeseZeroSVE(
+; CHECK-SAME: <vscale x 16 x i8> [[DATA:%.*]], <vscale x 16 x i8> [[KEY:%.*]]) {
+; CHECK-NEXT: [[DATA_AES:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.aese(<vscale x 16 x i8> [[DATA]], <vscale x 16 x i8> [[KEY]])
+; CHECK-NEXT: ret <vscale x 16 x i8> [[DATA_AES]]
+;
+ %data.xor = xor <vscale x 16 x i8> %data, %key
+ %data.aes = tail call <vscale x 16 x i8> @llvm.aarch64.sve.aese(<vscale x 16 x i8> zeroinitializer, <vscale x 16 x i8> %data.xor)
+ ret <vscale x 16 x i8> %data.aes
+}
+
+define <vscale x 16 x i8> @combineXorAesdZeroSVE(<vscale x 16 x i8> %data, <vscale x 16 x i8> %key) {
+; CHECK-LABEL: define <vscale x 16 x i8> @combineXorAesdZeroSVE(
+; CHECK-SAME: <vscale x 16 x i8> [[DATA:%.*]], <vscale x 16 x i8> [[KEY:%.*]]) {
+; CHECK-NEXT: [[DATA_AES:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.aesd(<vscale x 16 x i8> [[DATA]], <vscale x 16 x i8> [[KEY]])
+; CHECK-NEXT: ret <vscale x 16 x i8> [[DATA_AES]]
+;
+ %data.xor = xor <vscale x 16 x i8> %data, %key
+ %data.aes = tail call <vscale x 16 x i8> @llvm.aarch64.sve.aesd(<vscale x 16 x i8> zeroinitializer, <vscale x 16 x i8> %data.xor)
+ ret <vscale x 16 x i8> %data.aes
+}
+
+declare <vscale x 16 x i8> @llvm.aarch64.sve.aese(<vscale x 16 x i8>, <vscale x 16 x i8>) #0
+declare <vscale x 16 x i8> @llvm.aarch64.sve.aesd(<vscale x 16 x i8>, <vscale x 16 x i8>) #0
``````````
</details>
https://github.com/llvm/llvm-project/pull/142781
More information about the llvm-commits
mailing list