[llvm] [AArch64][InstCombine] Combine AES instructions with zero operands. (PR #142781)

via llvm-commits llvm-commits at lists.llvm.org
Wed Jun 4 07:35:33 PDT 2025


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-llvm-transforms

Author: Ricardo Jesus (rj-jesus)

<details>
<summary>Changes</summary>

We currently combine (AES (EOR (A, B)), 0) into (AES A, B) for Neon intrinsics
when the zero operand appears in the RHS of the AES instruction.

This patch extends the combine to support AES SVE intrinsics and
the case where the zero operand appears in the LHS of the AES
instructions.

GCC has had such an optimisation for long: https://gcc.gnu.org/cgit/gcc/commit/?id=9b57fd3d96f312194b49fb4774dd2ce075ef5c17.
* Neon: https://godbolt.org/z/Gbhvq6bzd
* SVE: https://godbolt.org/z/7Kbox589Y

---
Full diff: https://github.com/llvm/llvm-project/pull/142781.diff


2 Files Affected:

- (modified) llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp (+7-1) 
- (modified) llvm/test/Transforms/InstCombine/AArch64/aes-intrinsics.ll (+48) 


``````````diff
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
index cfb4af391b540..c169ab25b2106 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -3076,10 +3076,16 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) {
   case Intrinsic::arm_neon_aesd:
   case Intrinsic::arm_neon_aese:
   case Intrinsic::aarch64_crypto_aesd:
-  case Intrinsic::aarch64_crypto_aese: {
+  case Intrinsic::aarch64_crypto_aese:
+  case Intrinsic::aarch64_sve_aesd:
+  case Intrinsic::aarch64_sve_aese: {
     Value *DataArg = II->getArgOperand(0);
     Value *KeyArg  = II->getArgOperand(1);
 
+    // Accept zero on either operand.
+    if (!match(KeyArg, m_ZeroInt()))
+      std::swap(KeyArg, DataArg);
+
     // Try to use the builtin XOR in AESE and AESD to eliminate a prior XOR
     Value *Data, *Key;
     if (match(KeyArg, m_ZeroInt()) &&
diff --git a/llvm/test/Transforms/InstCombine/AArch64/aes-intrinsics.ll b/llvm/test/Transforms/InstCombine/AArch64/aes-intrinsics.ll
index c6695f17b955b..ed3c566858c2c 100644
--- a/llvm/test/Transforms/InstCombine/AArch64/aes-intrinsics.ll
+++ b/llvm/test/Transforms/InstCombine/AArch64/aes-intrinsics.ll
@@ -13,6 +13,17 @@ define <16 x i8> @combineXorAeseZeroARM64(<16 x i8> %data, <16 x i8> %key) {
   ret <16 x i8> %data.aes
 }
 
+define <16 x i8> @combineXorAeseZeroLhsARM64(<16 x i8> %data, <16 x i8> %key) {
+; CHECK-LABEL: define <16 x i8> @combineXorAeseZeroLhsARM64(
+; CHECK-SAME: <16 x i8> [[DATA:%.*]], <16 x i8> [[KEY:%.*]]) {
+; CHECK-NEXT:    [[DATA_AES:%.*]] = tail call <16 x i8> @llvm.aarch64.crypto.aese(<16 x i8> [[DATA]], <16 x i8> [[KEY]])
+; CHECK-NEXT:    ret <16 x i8> [[DATA_AES]]
+;
+  %data.xor = xor <16 x i8> %data, %key
+  %data.aes = tail call <16 x i8> @llvm.aarch64.crypto.aese(<16 x i8> zeroinitializer, <16 x i8> %data.xor)
+  ret <16 x i8> %data.aes
+}
+
 define <16 x i8> @combineXorAeseNonZeroARM64(<16 x i8> %data, <16 x i8> %key) {
 ; CHECK-LABEL: define <16 x i8> @combineXorAeseNonZeroARM64(
 ; CHECK-SAME: <16 x i8> [[DATA:%.*]], <16 x i8> [[KEY:%.*]]) {
@@ -36,6 +47,17 @@ define <16 x i8> @combineXorAesdZeroARM64(<16 x i8> %data, <16 x i8> %key) {
   ret <16 x i8> %data.aes
 }
 
+define <16 x i8> @combineXorAesdZeroLhsARM64(<16 x i8> %data, <16 x i8> %key) {
+; CHECK-LABEL: define <16 x i8> @combineXorAesdZeroLhsARM64(
+; CHECK-SAME: <16 x i8> [[DATA:%.*]], <16 x i8> [[KEY:%.*]]) {
+; CHECK-NEXT:    [[DATA_AES:%.*]] = tail call <16 x i8> @llvm.aarch64.crypto.aesd(<16 x i8> [[DATA]], <16 x i8> [[KEY]])
+; CHECK-NEXT:    ret <16 x i8> [[DATA_AES]]
+;
+  %data.xor = xor <16 x i8> %data, %key
+  %data.aes = tail call <16 x i8> @llvm.aarch64.crypto.aesd(<16 x i8> zeroinitializer, <16 x i8> %data.xor)
+  ret <16 x i8> %data.aes
+}
+
 define <16 x i8> @combineXorAesdNonZeroARM64(<16 x i8> %data, <16 x i8> %key) {
 ; CHECK-LABEL: define <16 x i8> @combineXorAesdNonZeroARM64(
 ; CHECK-SAME: <16 x i8> [[DATA:%.*]], <16 x i8> [[KEY:%.*]]) {
@@ -51,3 +73,29 @@ define <16 x i8> @combineXorAesdNonZeroARM64(<16 x i8> %data, <16 x i8> %key) {
 declare <16 x i8> @llvm.aarch64.crypto.aese(<16 x i8>, <16 x i8>) #0
 declare <16 x i8> @llvm.aarch64.crypto.aesd(<16 x i8>, <16 x i8>) #0
 
+; SVE
+
+define <vscale x 16 x i8> @combineXorAeseZeroSVE(<vscale x 16 x i8> %data, <vscale x 16 x i8> %key) {
+; CHECK-LABEL: define <vscale x 16 x i8> @combineXorAeseZeroSVE(
+; CHECK-SAME: <vscale x 16 x i8> [[DATA:%.*]], <vscale x 16 x i8> [[KEY:%.*]]) {
+; CHECK-NEXT:    [[DATA_AES:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.aese(<vscale x 16 x i8> [[DATA]], <vscale x 16 x i8> [[KEY]])
+; CHECK-NEXT:    ret <vscale x 16 x i8> [[DATA_AES]]
+;
+  %data.xor = xor <vscale x 16 x i8> %data, %key
+  %data.aes = tail call <vscale x 16 x i8> @llvm.aarch64.sve.aese(<vscale x 16 x i8> zeroinitializer, <vscale x 16 x i8> %data.xor)
+  ret <vscale x 16 x i8> %data.aes
+}
+
+define <vscale x 16 x i8> @combineXorAesdZeroSVE(<vscale x 16 x i8> %data, <vscale x 16 x i8> %key) {
+; CHECK-LABEL: define <vscale x 16 x i8> @combineXorAesdZeroSVE(
+; CHECK-SAME: <vscale x 16 x i8> [[DATA:%.*]], <vscale x 16 x i8> [[KEY:%.*]]) {
+; CHECK-NEXT:    [[DATA_AES:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.aesd(<vscale x 16 x i8> [[DATA]], <vscale x 16 x i8> [[KEY]])
+; CHECK-NEXT:    ret <vscale x 16 x i8> [[DATA_AES]]
+;
+  %data.xor = xor <vscale x 16 x i8> %data, %key
+  %data.aes = tail call <vscale x 16 x i8> @llvm.aarch64.sve.aesd(<vscale x 16 x i8> zeroinitializer, <vscale x 16 x i8> %data.xor)
+  ret <vscale x 16 x i8> %data.aes
+}
+
+declare <vscale x 16 x i8> @llvm.aarch64.sve.aese(<vscale x 16 x i8>, <vscale x 16 x i8>) #0
+declare <vscale x 16 x i8> @llvm.aarch64.sve.aesd(<vscale x 16 x i8>, <vscale x 16 x i8>) #0

``````````

</details>


https://github.com/llvm/llvm-project/pull/142781


More information about the llvm-commits mailing list