[llvm] [AArch64][InstCombine] Combine AES instructions with zero operands. (PR #142781)

Ricardo Jesus via llvm-commits llvm-commits at lists.llvm.org
Thu Jun 5 01:04:51 PDT 2025


https://github.com/rj-jesus updated https://github.com/llvm/llvm-project/pull/142781

>From 618c6f24c2496b0455e68b9200913772d5f5a189 Mon Sep 17 00:00:00 2001
From: Ricardo Jesus <rjj at nvidia.com>
Date: Wed, 4 Jun 2025 05:40:32 -0700
Subject: [PATCH 1/3] Precommit tests.

---
 .../InstCombine/AArch64/aes-intrinsics.ll     | 52 +++++++++++++++++++
 1 file changed, 52 insertions(+)

diff --git a/llvm/test/Transforms/InstCombine/AArch64/aes-intrinsics.ll b/llvm/test/Transforms/InstCombine/AArch64/aes-intrinsics.ll
index c6695f17b955b..2a3a1761ae71a 100644
--- a/llvm/test/Transforms/InstCombine/AArch64/aes-intrinsics.ll
+++ b/llvm/test/Transforms/InstCombine/AArch64/aes-intrinsics.ll
@@ -13,6 +13,18 @@ define <16 x i8> @combineXorAeseZeroARM64(<16 x i8> %data, <16 x i8> %key) {
   ret <16 x i8> %data.aes
 }
 
+define <16 x i8> @combineXorAeseZeroLhsARM64(<16 x i8> %data, <16 x i8> %key) {
+; CHECK-LABEL: define <16 x i8> @combineXorAeseZeroLhsARM64(
+; CHECK-SAME: <16 x i8> [[DATA:%.*]], <16 x i8> [[KEY:%.*]]) {
+; CHECK-NEXT:    [[DATA_XOR:%.*]] = xor <16 x i8> [[DATA]], [[KEY]]
+; CHECK-NEXT:    [[DATA_AES:%.*]] = tail call <16 x i8> @llvm.aarch64.crypto.aese(<16 x i8> zeroinitializer, <16 x i8> [[DATA_XOR]])
+; CHECK-NEXT:    ret <16 x i8> [[DATA_AES]]
+;
+  %data.xor = xor <16 x i8> %data, %key
+  %data.aes = tail call <16 x i8> @llvm.aarch64.crypto.aese(<16 x i8> zeroinitializer, <16 x i8> %data.xor)
+  ret <16 x i8> %data.aes
+}
+
 define <16 x i8> @combineXorAeseNonZeroARM64(<16 x i8> %data, <16 x i8> %key) {
 ; CHECK-LABEL: define <16 x i8> @combineXorAeseNonZeroARM64(
 ; CHECK-SAME: <16 x i8> [[DATA:%.*]], <16 x i8> [[KEY:%.*]]) {
@@ -36,6 +48,18 @@ define <16 x i8> @combineXorAesdZeroARM64(<16 x i8> %data, <16 x i8> %key) {
   ret <16 x i8> %data.aes
 }
 
+define <16 x i8> @combineXorAesdZeroLhsARM64(<16 x i8> %data, <16 x i8> %key) {
+; CHECK-LABEL: define <16 x i8> @combineXorAesdZeroLhsARM64(
+; CHECK-SAME: <16 x i8> [[DATA:%.*]], <16 x i8> [[KEY:%.*]]) {
+; CHECK-NEXT:    [[DATA_XOR:%.*]] = xor <16 x i8> [[DATA]], [[KEY]]
+; CHECK-NEXT:    [[DATA_AES:%.*]] = tail call <16 x i8> @llvm.aarch64.crypto.aesd(<16 x i8> zeroinitializer, <16 x i8> [[DATA_XOR]])
+; CHECK-NEXT:    ret <16 x i8> [[DATA_AES]]
+;
+  %data.xor = xor <16 x i8> %data, %key
+  %data.aes = tail call <16 x i8> @llvm.aarch64.crypto.aesd(<16 x i8> zeroinitializer, <16 x i8> %data.xor)
+  ret <16 x i8> %data.aes
+}
+
 define <16 x i8> @combineXorAesdNonZeroARM64(<16 x i8> %data, <16 x i8> %key) {
 ; CHECK-LABEL: define <16 x i8> @combineXorAesdNonZeroARM64(
 ; CHECK-SAME: <16 x i8> [[DATA:%.*]], <16 x i8> [[KEY:%.*]]) {
@@ -51,3 +75,31 @@ define <16 x i8> @combineXorAesdNonZeroARM64(<16 x i8> %data, <16 x i8> %key) {
 declare <16 x i8> @llvm.aarch64.crypto.aese(<16 x i8>, <16 x i8>) #0
 declare <16 x i8> @llvm.aarch64.crypto.aesd(<16 x i8>, <16 x i8>) #0
 
+; SVE
+
+define <vscale x 16 x i8> @combineXorAeseZeroSVE(<vscale x 16 x i8> %data, <vscale x 16 x i8> %key) {
+; CHECK-LABEL: define <vscale x 16 x i8> @combineXorAeseZeroSVE(
+; CHECK-SAME: <vscale x 16 x i8> [[DATA:%.*]], <vscale x 16 x i8> [[KEY:%.*]]) {
+; CHECK-NEXT:    [[DATA_XOR:%.*]] = xor <vscale x 16 x i8> [[DATA]], [[KEY]]
+; CHECK-NEXT:    [[DATA_AES:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.aese(<vscale x 16 x i8> zeroinitializer, <vscale x 16 x i8> [[DATA_XOR]])
+; CHECK-NEXT:    ret <vscale x 16 x i8> [[DATA_AES]]
+;
+  %data.xor = xor <vscale x 16 x i8> %data, %key
+  %data.aes = tail call <vscale x 16 x i8> @llvm.aarch64.sve.aese(<vscale x 16 x i8> zeroinitializer, <vscale x 16 x i8> %data.xor)
+  ret <vscale x 16 x i8> %data.aes
+}
+
+define <vscale x 16 x i8> @combineXorAesdZeroSVE(<vscale x 16 x i8> %data, <vscale x 16 x i8> %key) {
+; CHECK-LABEL: define <vscale x 16 x i8> @combineXorAesdZeroSVE(
+; CHECK-SAME: <vscale x 16 x i8> [[DATA:%.*]], <vscale x 16 x i8> [[KEY:%.*]]) {
+; CHECK-NEXT:    [[DATA_XOR:%.*]] = xor <vscale x 16 x i8> [[DATA]], [[KEY]]
+; CHECK-NEXT:    [[DATA_AES:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.aesd(<vscale x 16 x i8> zeroinitializer, <vscale x 16 x i8> [[DATA_XOR]])
+; CHECK-NEXT:    ret <vscale x 16 x i8> [[DATA_AES]]
+;
+  %data.xor = xor <vscale x 16 x i8> %data, %key
+  %data.aes = tail call <vscale x 16 x i8> @llvm.aarch64.sve.aesd(<vscale x 16 x i8> zeroinitializer, <vscale x 16 x i8> %data.xor)
+  ret <vscale x 16 x i8> %data.aes
+}
+
+declare <vscale x 16 x i8> @llvm.aarch64.sve.aese(<vscale x 16 x i8>, <vscale x 16 x i8>) #0
+declare <vscale x 16 x i8> @llvm.aarch64.sve.aesd(<vscale x 16 x i8>, <vscale x 16 x i8>) #0

>From 2c3d0d8ce4f509d0c666cecd111440b0c0e5c176 Mon Sep 17 00:00:00 2001
From: Ricardo Jesus <rjj at nvidia.com>
Date: Wed, 4 Jun 2025 05:40:21 -0700
Subject: [PATCH 2/3] [AArch64][InstCombine] Combine AES instructions with zero
 operands.

We already combine (AES (EOR (A, B)), 0) into (AES A, B) for Neon
intrinsics, and when the zero appears in the RHS of the AES instruction.

This patch extends that combine to support the AES SVE intrinsics and
the case where the zero appears in the LHS of the AES instructions.
---
 llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp |  8 +++++++-
 .../Transforms/InstCombine/AArch64/aes-intrinsics.ll | 12 ++++--------
 2 files changed, 11 insertions(+), 9 deletions(-)

diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
index cfb4af391b540..c169ab25b2106 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -3076,10 +3076,16 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) {
   case Intrinsic::arm_neon_aesd:
   case Intrinsic::arm_neon_aese:
   case Intrinsic::aarch64_crypto_aesd:
-  case Intrinsic::aarch64_crypto_aese: {
+  case Intrinsic::aarch64_crypto_aese:
+  case Intrinsic::aarch64_sve_aesd:
+  case Intrinsic::aarch64_sve_aese: {
     Value *DataArg = II->getArgOperand(0);
     Value *KeyArg  = II->getArgOperand(1);
 
+    // Accept zero on either operand.
+    if (!match(KeyArg, m_ZeroInt()))
+      std::swap(KeyArg, DataArg);
+
     // Try to use the builtin XOR in AESE and AESD to eliminate a prior XOR
     Value *Data, *Key;
     if (match(KeyArg, m_ZeroInt()) &&
diff --git a/llvm/test/Transforms/InstCombine/AArch64/aes-intrinsics.ll b/llvm/test/Transforms/InstCombine/AArch64/aes-intrinsics.ll
index 2a3a1761ae71a..ed3c566858c2c 100644
--- a/llvm/test/Transforms/InstCombine/AArch64/aes-intrinsics.ll
+++ b/llvm/test/Transforms/InstCombine/AArch64/aes-intrinsics.ll
@@ -16,8 +16,7 @@ define <16 x i8> @combineXorAeseZeroARM64(<16 x i8> %data, <16 x i8> %key) {
 define <16 x i8> @combineXorAeseZeroLhsARM64(<16 x i8> %data, <16 x i8> %key) {
 ; CHECK-LABEL: define <16 x i8> @combineXorAeseZeroLhsARM64(
 ; CHECK-SAME: <16 x i8> [[DATA:%.*]], <16 x i8> [[KEY:%.*]]) {
-; CHECK-NEXT:    [[DATA_XOR:%.*]] = xor <16 x i8> [[DATA]], [[KEY]]
-; CHECK-NEXT:    [[DATA_AES:%.*]] = tail call <16 x i8> @llvm.aarch64.crypto.aese(<16 x i8> zeroinitializer, <16 x i8> [[DATA_XOR]])
+; CHECK-NEXT:    [[DATA_AES:%.*]] = tail call <16 x i8> @llvm.aarch64.crypto.aese(<16 x i8> [[DATA]], <16 x i8> [[KEY]])
 ; CHECK-NEXT:    ret <16 x i8> [[DATA_AES]]
 ;
   %data.xor = xor <16 x i8> %data, %key
@@ -51,8 +50,7 @@ define <16 x i8> @combineXorAesdZeroARM64(<16 x i8> %data, <16 x i8> %key) {
 define <16 x i8> @combineXorAesdZeroLhsARM64(<16 x i8> %data, <16 x i8> %key) {
 ; CHECK-LABEL: define <16 x i8> @combineXorAesdZeroLhsARM64(
 ; CHECK-SAME: <16 x i8> [[DATA:%.*]], <16 x i8> [[KEY:%.*]]) {
-; CHECK-NEXT:    [[DATA_XOR:%.*]] = xor <16 x i8> [[DATA]], [[KEY]]
-; CHECK-NEXT:    [[DATA_AES:%.*]] = tail call <16 x i8> @llvm.aarch64.crypto.aesd(<16 x i8> zeroinitializer, <16 x i8> [[DATA_XOR]])
+; CHECK-NEXT:    [[DATA_AES:%.*]] = tail call <16 x i8> @llvm.aarch64.crypto.aesd(<16 x i8> [[DATA]], <16 x i8> [[KEY]])
 ; CHECK-NEXT:    ret <16 x i8> [[DATA_AES]]
 ;
   %data.xor = xor <16 x i8> %data, %key
@@ -80,8 +78,7 @@ declare <16 x i8> @llvm.aarch64.crypto.aesd(<16 x i8>, <16 x i8>) #0
 define <vscale x 16 x i8> @combineXorAeseZeroSVE(<vscale x 16 x i8> %data, <vscale x 16 x i8> %key) {
 ; CHECK-LABEL: define <vscale x 16 x i8> @combineXorAeseZeroSVE(
 ; CHECK-SAME: <vscale x 16 x i8> [[DATA:%.*]], <vscale x 16 x i8> [[KEY:%.*]]) {
-; CHECK-NEXT:    [[DATA_XOR:%.*]] = xor <vscale x 16 x i8> [[DATA]], [[KEY]]
-; CHECK-NEXT:    [[DATA_AES:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.aese(<vscale x 16 x i8> zeroinitializer, <vscale x 16 x i8> [[DATA_XOR]])
+; CHECK-NEXT:    [[DATA_AES:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.aese(<vscale x 16 x i8> [[DATA]], <vscale x 16 x i8> [[KEY]])
 ; CHECK-NEXT:    ret <vscale x 16 x i8> [[DATA_AES]]
 ;
   %data.xor = xor <vscale x 16 x i8> %data, %key
@@ -92,8 +89,7 @@ define <vscale x 16 x i8> @combineXorAeseZeroSVE(<vscale x 16 x i8> %data, <vsca
 define <vscale x 16 x i8> @combineXorAesdZeroSVE(<vscale x 16 x i8> %data, <vscale x 16 x i8> %key) {
 ; CHECK-LABEL: define <vscale x 16 x i8> @combineXorAesdZeroSVE(
 ; CHECK-SAME: <vscale x 16 x i8> [[DATA:%.*]], <vscale x 16 x i8> [[KEY:%.*]]) {
-; CHECK-NEXT:    [[DATA_XOR:%.*]] = xor <vscale x 16 x i8> [[DATA]], [[KEY]]
-; CHECK-NEXT:    [[DATA_AES:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.aesd(<vscale x 16 x i8> zeroinitializer, <vscale x 16 x i8> [[DATA_XOR]])
+; CHECK-NEXT:    [[DATA_AES:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.aesd(<vscale x 16 x i8> [[DATA]], <vscale x 16 x i8> [[KEY]])
 ; CHECK-NEXT:    ret <vscale x 16 x i8> [[DATA_AES]]
 ;
   %data.xor = xor <vscale x 16 x i8> %data, %key

>From 8ffefea0a1d77ad42b81957ef428d4f641ea9213 Mon Sep 17 00:00:00 2001
From: Ricardo Jesus <rjj at nvidia.com>
Date: Thu, 5 Jun 2025 01:02:23 -0700
Subject: [PATCH 3/3] Add SVE zero RHS tests.

---
 .../InstCombine/AArch64/aes-intrinsics.ll     | 30 ++++++++++++++++---
 1 file changed, 26 insertions(+), 4 deletions(-)

diff --git a/llvm/test/Transforms/InstCombine/AArch64/aes-intrinsics.ll b/llvm/test/Transforms/InstCombine/AArch64/aes-intrinsics.ll
index ed3c566858c2c..8c69d0721b738 100644
--- a/llvm/test/Transforms/InstCombine/AArch64/aes-intrinsics.ll
+++ b/llvm/test/Transforms/InstCombine/AArch64/aes-intrinsics.ll
@@ -75,8 +75,8 @@ declare <16 x i8> @llvm.aarch64.crypto.aesd(<16 x i8>, <16 x i8>) #0
 
 ; SVE
 
-define <vscale x 16 x i8> @combineXorAeseZeroSVE(<vscale x 16 x i8> %data, <vscale x 16 x i8> %key) {
-; CHECK-LABEL: define <vscale x 16 x i8> @combineXorAeseZeroSVE(
+define <vscale x 16 x i8> @combineXorAeseZeroLhsSVE(<vscale x 16 x i8> %data, <vscale x 16 x i8> %key) {
+; CHECK-LABEL: define <vscale x 16 x i8> @combineXorAeseZeroLhsSVE(
 ; CHECK-SAME: <vscale x 16 x i8> [[DATA:%.*]], <vscale x 16 x i8> [[KEY:%.*]]) {
 ; CHECK-NEXT:    [[DATA_AES:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.aese(<vscale x 16 x i8> [[DATA]], <vscale x 16 x i8> [[KEY]])
 ; CHECK-NEXT:    ret <vscale x 16 x i8> [[DATA_AES]]
@@ -86,8 +86,19 @@ define <vscale x 16 x i8> @combineXorAeseZeroSVE(<vscale x 16 x i8> %data, <vsca
   ret <vscale x 16 x i8> %data.aes
 }
 
-define <vscale x 16 x i8> @combineXorAesdZeroSVE(<vscale x 16 x i8> %data, <vscale x 16 x i8> %key) {
-; CHECK-LABEL: define <vscale x 16 x i8> @combineXorAesdZeroSVE(
+define <vscale x 16 x i8> @combineXorAeseZeroRhsSVE(<vscale x 16 x i8> %data, <vscale x 16 x i8> %key) {
+; CHECK-LABEL: define <vscale x 16 x i8> @combineXorAeseZeroRhsSVE(
+; CHECK-SAME: <vscale x 16 x i8> [[DATA:%.*]], <vscale x 16 x i8> [[KEY:%.*]]) {
+; CHECK-NEXT:    [[DATA_AES:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.aese(<vscale x 16 x i8> [[DATA]], <vscale x 16 x i8> [[KEY]])
+; CHECK-NEXT:    ret <vscale x 16 x i8> [[DATA_AES]]
+;
+  %data.xor = xor <vscale x 16 x i8> %data, %key
+  %data.aes = tail call <vscale x 16 x i8> @llvm.aarch64.sve.aese(<vscale x 16 x i8> %data.xor, <vscale x 16 x i8> zeroinitializer)
+  ret <vscale x 16 x i8> %data.aes
+}
+
+define <vscale x 16 x i8> @combineXorAesdZeroLhsSVE(<vscale x 16 x i8> %data, <vscale x 16 x i8> %key) {
+; CHECK-LABEL: define <vscale x 16 x i8> @combineXorAesdZeroLhsSVE(
 ; CHECK-SAME: <vscale x 16 x i8> [[DATA:%.*]], <vscale x 16 x i8> [[KEY:%.*]]) {
 ; CHECK-NEXT:    [[DATA_AES:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.aesd(<vscale x 16 x i8> [[DATA]], <vscale x 16 x i8> [[KEY]])
 ; CHECK-NEXT:    ret <vscale x 16 x i8> [[DATA_AES]]
@@ -97,5 +108,16 @@ define <vscale x 16 x i8> @combineXorAesdZeroSVE(<vscale x 16 x i8> %data, <vsca
   ret <vscale x 16 x i8> %data.aes
 }
 
+define <vscale x 16 x i8> @combineXorAesdZeroRhsSVE(<vscale x 16 x i8> %data, <vscale x 16 x i8> %key) {
+; CHECK-LABEL: define <vscale x 16 x i8> @combineXorAesdZeroRhsSVE(
+; CHECK-SAME: <vscale x 16 x i8> [[DATA:%.*]], <vscale x 16 x i8> [[KEY:%.*]]) {
+; CHECK-NEXT:    [[DATA_AES:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.aesd(<vscale x 16 x i8> [[DATA]], <vscale x 16 x i8> [[KEY]])
+; CHECK-NEXT:    ret <vscale x 16 x i8> [[DATA_AES]]
+;
+  %data.xor = xor <vscale x 16 x i8> %data, %key
+  %data.aes = tail call <vscale x 16 x i8> @llvm.aarch64.sve.aesd(<vscale x 16 x i8> %data.xor, <vscale x 16 x i8> zeroinitializer)
+  ret <vscale x 16 x i8> %data.aes
+}
+
 declare <vscale x 16 x i8> @llvm.aarch64.sve.aese(<vscale x 16 x i8>, <vscale x 16 x i8>) #0
 declare <vscale x 16 x i8> @llvm.aarch64.sve.aesd(<vscale x 16 x i8>, <vscale x 16 x i8>) #0



More information about the llvm-commits mailing list