[llvm] [AArch64][InstCombine] Combine AES instructions with zero operands. (PR #142781)

Ricardo Jesus via llvm-commits llvm-commits at lists.llvm.org
Wed Jun 4 07:34:38 PDT 2025


https://github.com/rj-jesus created https://github.com/llvm/llvm-project/pull/142781

We combine (AES (EOR (A, B)), 0) into (AES A, B) for Neon intrinsics
when the zero operand appears in the RHS of the AES instruction.

This patch extends the combine to support AES SVE intrinsics and
the case where the zero operand appears in the LHS of the AES
instructions.

GCC has had such an optimisation for long: https://gcc.gnu.org/cgit/gcc/commit/?id=9b57fd3d96f312194b49fb4774dd2ce075ef5c17.
* Neon: https://godbolt.org/z/Gbhvq6bzd
* SVE: https://godbolt.org/z/7Kbox589Y

>From 618c6f24c2496b0455e68b9200913772d5f5a189 Mon Sep 17 00:00:00 2001
From: Ricardo Jesus <rjj at nvidia.com>
Date: Wed, 4 Jun 2025 05:40:32 -0700
Subject: [PATCH 1/2] Precommit tests.

---
 .../InstCombine/AArch64/aes-intrinsics.ll     | 52 +++++++++++++++++++
 1 file changed, 52 insertions(+)

diff --git a/llvm/test/Transforms/InstCombine/AArch64/aes-intrinsics.ll b/llvm/test/Transforms/InstCombine/AArch64/aes-intrinsics.ll
index c6695f17b955b..2a3a1761ae71a 100644
--- a/llvm/test/Transforms/InstCombine/AArch64/aes-intrinsics.ll
+++ b/llvm/test/Transforms/InstCombine/AArch64/aes-intrinsics.ll
@@ -13,6 +13,18 @@ define <16 x i8> @combineXorAeseZeroARM64(<16 x i8> %data, <16 x i8> %key) {
   ret <16 x i8> %data.aes
 }
 
+define <16 x i8> @combineXorAeseZeroLhsARM64(<16 x i8> %data, <16 x i8> %key) {
+; CHECK-LABEL: define <16 x i8> @combineXorAeseZeroLhsARM64(
+; CHECK-SAME: <16 x i8> [[DATA:%.*]], <16 x i8> [[KEY:%.*]]) {
+; CHECK-NEXT:    [[DATA_XOR:%.*]] = xor <16 x i8> [[DATA]], [[KEY]]
+; CHECK-NEXT:    [[DATA_AES:%.*]] = tail call <16 x i8> @llvm.aarch64.crypto.aese(<16 x i8> zeroinitializer, <16 x i8> [[DATA_XOR]])
+; CHECK-NEXT:    ret <16 x i8> [[DATA_AES]]
+;
+  %data.xor = xor <16 x i8> %data, %key
+  %data.aes = tail call <16 x i8> @llvm.aarch64.crypto.aese(<16 x i8> zeroinitializer, <16 x i8> %data.xor)
+  ret <16 x i8> %data.aes
+}
+
 define <16 x i8> @combineXorAeseNonZeroARM64(<16 x i8> %data, <16 x i8> %key) {
 ; CHECK-LABEL: define <16 x i8> @combineXorAeseNonZeroARM64(
 ; CHECK-SAME: <16 x i8> [[DATA:%.*]], <16 x i8> [[KEY:%.*]]) {
@@ -36,6 +48,18 @@ define <16 x i8> @combineXorAesdZeroARM64(<16 x i8> %data, <16 x i8> %key) {
   ret <16 x i8> %data.aes
 }
 
+define <16 x i8> @combineXorAesdZeroLhsARM64(<16 x i8> %data, <16 x i8> %key) {
+; CHECK-LABEL: define <16 x i8> @combineXorAesdZeroLhsARM64(
+; CHECK-SAME: <16 x i8> [[DATA:%.*]], <16 x i8> [[KEY:%.*]]) {
+; CHECK-NEXT:    [[DATA_XOR:%.*]] = xor <16 x i8> [[DATA]], [[KEY]]
+; CHECK-NEXT:    [[DATA_AES:%.*]] = tail call <16 x i8> @llvm.aarch64.crypto.aesd(<16 x i8> zeroinitializer, <16 x i8> [[DATA_XOR]])
+; CHECK-NEXT:    ret <16 x i8> [[DATA_AES]]
+;
+  %data.xor = xor <16 x i8> %data, %key
+  %data.aes = tail call <16 x i8> @llvm.aarch64.crypto.aesd(<16 x i8> zeroinitializer, <16 x i8> %data.xor)
+  ret <16 x i8> %data.aes
+}
+
 define <16 x i8> @combineXorAesdNonZeroARM64(<16 x i8> %data, <16 x i8> %key) {
 ; CHECK-LABEL: define <16 x i8> @combineXorAesdNonZeroARM64(
 ; CHECK-SAME: <16 x i8> [[DATA:%.*]], <16 x i8> [[KEY:%.*]]) {
@@ -51,3 +75,31 @@ define <16 x i8> @combineXorAesdNonZeroARM64(<16 x i8> %data, <16 x i8> %key) {
 declare <16 x i8> @llvm.aarch64.crypto.aese(<16 x i8>, <16 x i8>) #0
 declare <16 x i8> @llvm.aarch64.crypto.aesd(<16 x i8>, <16 x i8>) #0
 
+; SVE
+
+define <vscale x 16 x i8> @combineXorAeseZeroSVE(<vscale x 16 x i8> %data, <vscale x 16 x i8> %key) {
+; CHECK-LABEL: define <vscale x 16 x i8> @combineXorAeseZeroSVE(
+; CHECK-SAME: <vscale x 16 x i8> [[DATA:%.*]], <vscale x 16 x i8> [[KEY:%.*]]) {
+; CHECK-NEXT:    [[DATA_XOR:%.*]] = xor <vscale x 16 x i8> [[DATA]], [[KEY]]
+; CHECK-NEXT:    [[DATA_AES:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.aese(<vscale x 16 x i8> zeroinitializer, <vscale x 16 x i8> [[DATA_XOR]])
+; CHECK-NEXT:    ret <vscale x 16 x i8> [[DATA_AES]]
+;
+  %data.xor = xor <vscale x 16 x i8> %data, %key
+  %data.aes = tail call <vscale x 16 x i8> @llvm.aarch64.sve.aese(<vscale x 16 x i8> zeroinitializer, <vscale x 16 x i8> %data.xor)
+  ret <vscale x 16 x i8> %data.aes
+}
+
+define <vscale x 16 x i8> @combineXorAesdZeroSVE(<vscale x 16 x i8> %data, <vscale x 16 x i8> %key) {
+; CHECK-LABEL: define <vscale x 16 x i8> @combineXorAesdZeroSVE(
+; CHECK-SAME: <vscale x 16 x i8> [[DATA:%.*]], <vscale x 16 x i8> [[KEY:%.*]]) {
+; CHECK-NEXT:    [[DATA_XOR:%.*]] = xor <vscale x 16 x i8> [[DATA]], [[KEY]]
+; CHECK-NEXT:    [[DATA_AES:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.aesd(<vscale x 16 x i8> zeroinitializer, <vscale x 16 x i8> [[DATA_XOR]])
+; CHECK-NEXT:    ret <vscale x 16 x i8> [[DATA_AES]]
+;
+  %data.xor = xor <vscale x 16 x i8> %data, %key
+  %data.aes = tail call <vscale x 16 x i8> @llvm.aarch64.sve.aesd(<vscale x 16 x i8> zeroinitializer, <vscale x 16 x i8> %data.xor)
+  ret <vscale x 16 x i8> %data.aes
+}
+
+declare <vscale x 16 x i8> @llvm.aarch64.sve.aese(<vscale x 16 x i8>, <vscale x 16 x i8>) #0
+declare <vscale x 16 x i8> @llvm.aarch64.sve.aesd(<vscale x 16 x i8>, <vscale x 16 x i8>) #0

>From 2c3d0d8ce4f509d0c666cecd111440b0c0e5c176 Mon Sep 17 00:00:00 2001
From: Ricardo Jesus <rjj at nvidia.com>
Date: Wed, 4 Jun 2025 05:40:21 -0700
Subject: [PATCH 2/2] [AArch64][InstCombine] Combine AES instructions with zero
 operands.

We already combine (AES (EOR (A, B)), 0) into (AES A, B) for Neon
intrinsics, and when the zero appears in the RHS of the AES instruction.

This patch extends that combine to support the AES SVE intrinsics and
the case where the zero appears in the LHS of the AES instructions.
---
 llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp |  8 +++++++-
 .../Transforms/InstCombine/AArch64/aes-intrinsics.ll | 12 ++++--------
 2 files changed, 11 insertions(+), 9 deletions(-)

diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
index cfb4af391b540..c169ab25b2106 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -3076,10 +3076,16 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) {
   case Intrinsic::arm_neon_aesd:
   case Intrinsic::arm_neon_aese:
   case Intrinsic::aarch64_crypto_aesd:
-  case Intrinsic::aarch64_crypto_aese: {
+  case Intrinsic::aarch64_crypto_aese:
+  case Intrinsic::aarch64_sve_aesd:
+  case Intrinsic::aarch64_sve_aese: {
     Value *DataArg = II->getArgOperand(0);
     Value *KeyArg  = II->getArgOperand(1);
 
+    // Accept zero on either operand.
+    if (!match(KeyArg, m_ZeroInt()))
+      std::swap(KeyArg, DataArg);
+
     // Try to use the builtin XOR in AESE and AESD to eliminate a prior XOR
     Value *Data, *Key;
     if (match(KeyArg, m_ZeroInt()) &&
diff --git a/llvm/test/Transforms/InstCombine/AArch64/aes-intrinsics.ll b/llvm/test/Transforms/InstCombine/AArch64/aes-intrinsics.ll
index 2a3a1761ae71a..ed3c566858c2c 100644
--- a/llvm/test/Transforms/InstCombine/AArch64/aes-intrinsics.ll
+++ b/llvm/test/Transforms/InstCombine/AArch64/aes-intrinsics.ll
@@ -16,8 +16,7 @@ define <16 x i8> @combineXorAeseZeroARM64(<16 x i8> %data, <16 x i8> %key) {
 define <16 x i8> @combineXorAeseZeroLhsARM64(<16 x i8> %data, <16 x i8> %key) {
 ; CHECK-LABEL: define <16 x i8> @combineXorAeseZeroLhsARM64(
 ; CHECK-SAME: <16 x i8> [[DATA:%.*]], <16 x i8> [[KEY:%.*]]) {
-; CHECK-NEXT:    [[DATA_XOR:%.*]] = xor <16 x i8> [[DATA]], [[KEY]]
-; CHECK-NEXT:    [[DATA_AES:%.*]] = tail call <16 x i8> @llvm.aarch64.crypto.aese(<16 x i8> zeroinitializer, <16 x i8> [[DATA_XOR]])
+; CHECK-NEXT:    [[DATA_AES:%.*]] = tail call <16 x i8> @llvm.aarch64.crypto.aese(<16 x i8> [[DATA]], <16 x i8> [[KEY]])
 ; CHECK-NEXT:    ret <16 x i8> [[DATA_AES]]
 ;
   %data.xor = xor <16 x i8> %data, %key
@@ -51,8 +50,7 @@ define <16 x i8> @combineXorAesdZeroARM64(<16 x i8> %data, <16 x i8> %key) {
 define <16 x i8> @combineXorAesdZeroLhsARM64(<16 x i8> %data, <16 x i8> %key) {
 ; CHECK-LABEL: define <16 x i8> @combineXorAesdZeroLhsARM64(
 ; CHECK-SAME: <16 x i8> [[DATA:%.*]], <16 x i8> [[KEY:%.*]]) {
-; CHECK-NEXT:    [[DATA_XOR:%.*]] = xor <16 x i8> [[DATA]], [[KEY]]
-; CHECK-NEXT:    [[DATA_AES:%.*]] = tail call <16 x i8> @llvm.aarch64.crypto.aesd(<16 x i8> zeroinitializer, <16 x i8> [[DATA_XOR]])
+; CHECK-NEXT:    [[DATA_AES:%.*]] = tail call <16 x i8> @llvm.aarch64.crypto.aesd(<16 x i8> [[DATA]], <16 x i8> [[KEY]])
 ; CHECK-NEXT:    ret <16 x i8> [[DATA_AES]]
 ;
   %data.xor = xor <16 x i8> %data, %key
@@ -80,8 +78,7 @@ declare <16 x i8> @llvm.aarch64.crypto.aesd(<16 x i8>, <16 x i8>) #0
 define <vscale x 16 x i8> @combineXorAeseZeroSVE(<vscale x 16 x i8> %data, <vscale x 16 x i8> %key) {
 ; CHECK-LABEL: define <vscale x 16 x i8> @combineXorAeseZeroSVE(
 ; CHECK-SAME: <vscale x 16 x i8> [[DATA:%.*]], <vscale x 16 x i8> [[KEY:%.*]]) {
-; CHECK-NEXT:    [[DATA_XOR:%.*]] = xor <vscale x 16 x i8> [[DATA]], [[KEY]]
-; CHECK-NEXT:    [[DATA_AES:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.aese(<vscale x 16 x i8> zeroinitializer, <vscale x 16 x i8> [[DATA_XOR]])
+; CHECK-NEXT:    [[DATA_AES:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.aese(<vscale x 16 x i8> [[DATA]], <vscale x 16 x i8> [[KEY]])
 ; CHECK-NEXT:    ret <vscale x 16 x i8> [[DATA_AES]]
 ;
   %data.xor = xor <vscale x 16 x i8> %data, %key
@@ -92,8 +89,7 @@ define <vscale x 16 x i8> @combineXorAeseZeroSVE(<vscale x 16 x i8> %data, <vsca
 define <vscale x 16 x i8> @combineXorAesdZeroSVE(<vscale x 16 x i8> %data, <vscale x 16 x i8> %key) {
 ; CHECK-LABEL: define <vscale x 16 x i8> @combineXorAesdZeroSVE(
 ; CHECK-SAME: <vscale x 16 x i8> [[DATA:%.*]], <vscale x 16 x i8> [[KEY:%.*]]) {
-; CHECK-NEXT:    [[DATA_XOR:%.*]] = xor <vscale x 16 x i8> [[DATA]], [[KEY]]
-; CHECK-NEXT:    [[DATA_AES:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.aesd(<vscale x 16 x i8> zeroinitializer, <vscale x 16 x i8> [[DATA_XOR]])
+; CHECK-NEXT:    [[DATA_AES:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.aesd(<vscale x 16 x i8> [[DATA]], <vscale x 16 x i8> [[KEY]])
 ; CHECK-NEXT:    ret <vscale x 16 x i8> [[DATA_AES]]
 ;
   %data.xor = xor <vscale x 16 x i8> %data, %key



More information about the llvm-commits mailing list