[llvm] [AArch64] Improve expansion of immediates of the form (~w << 32 | w). (PR #162286)

Tue Oct 7 06:52:08 PDT 2025

https://github.com/rj-jesus created https://github.com/llvm/llvm-project/pull/162286

When one half of a 64-bit immediate corresponds to the negation of the other half, we can use a sequence of MOVN, MOVK and EOR (or BIC) to expand the bottom half of the immediate and replicate its negation to the top half.  In the general case, this saves us a MOVK compared to expanding the immediate explicitly.

As a refinement, when the bottom half contains a 16-bit chunk of ones, the intermediate MOVK can be omitted. Similarly, when the bottom half contains a chunk of zeros, we can alternatively expand its negation and use an EON to reconstruct the expected result. In either case, this still saves us a MOVK compared to the default expansion.

https://godbolt.org/z/cjrK1fzTb

>From 3056f39874354cf285fd1a453daf9dec216b377e Mon Sep 17 00:00:00 2001
From: Ricardo Jesus <rjj at nvidia.com>
Date: Wed, 24 Sep 2025 08:09:01 -0700
Subject: [PATCH 1/2] Add tests.

---
 llvm/test/CodeGen/AArch64/arm64-movi.ll | 55 +++++++++++++++++++++++++
 1 file changed, 55 insertions(+)

diff --git a/llvm/test/CodeGen/AArch64/arm64-movi.ll b/llvm/test/CodeGen/AArch64/arm64-movi.ll
index c9074c2adbe3c..cc1ba5c9c1536 100644
--- a/llvm/test/CodeGen/AArch64/arm64-movi.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-movi.ll
@@ -549,3 +549,58 @@ define i64 @orr_32_eor_64() nounwind {
 ; CHECK-NEXT:    ret
   ret i64 18446604367017541391
 }
+
+;==--------------------------------------------------------------------------==
+; Tests for EOR / EON with MOVN.
+;==--------------------------------------------------------------------------==
+
+define i64 @movn_0_eon() {
+; CHECK-LABEL: movn_0_eon:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov x0, #43690 // =0xaaaa
+; CHECK-NEXT:    movk x0, #21845, lsl #32
+; CHECK-NEXT:    movk x0, #65535, lsl #48
+; CHECK-NEXT:    ret
+  ret i64 u0xffff55550000aaaa
+}
+
+define i64 @movn_1_eon() {
+; CHECK-LABEL: movn_1_eon:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov x0, #2863267840 // =0xaaaa0000
+; CHECK-NEXT:    movk x0, #65535, lsl #32
+; CHECK-NEXT:    movk x0, #21845, lsl #48
+; CHECK-NEXT:    ret
+  ret i64 u0x5555ffffaaaa0000
+}
+
+define i64 @movn_0_eor() {
+; CHECK-LABEL: movn_0_eor:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov x0, #21845 // =0x5555
+; CHECK-NEXT:    movk x0, #65535, lsl #16
+; CHECK-NEXT:    movk x0, #43690, lsl #32
+; CHECK-NEXT:    ret
+  ret i64 u0x0000aaaaffff5555
+}
+
+define i64 @movn_1_eor() {
+; CHECK-LABEL: movn_1_eor:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov x0, #65535 // =0xffff
+; CHECK-NEXT:    movk x0, #21845, lsl #16
+; CHECK-NEXT:    movk x0, #43690, lsl #48
+; CHECK-NEXT:    ret
+  ret i64 u0xaaaa00005555ffff
+}
+
+define i64 @movn_movk_eor() {
+; CHECK-LABEL: movn_movk_eor:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov x0, #43690 // =0xaaaa
+; CHECK-NEXT:    movk x0, #52428, lsl #16
+; CHECK-NEXT:    movk x0, #21845, lsl #32
+; CHECK-NEXT:    movk x0, #13107, lsl #48
+; CHECK-NEXT:    ret
+  ret i64 u0x33335555ccccaaaa
+}

>From 151b440030f8001ef611eef6c0f7675990c0b5b8 Mon Sep 17 00:00:00 2001
From: Ricardo Jesus <rjj at nvidia.com>
Date: Wed, 24 Sep 2025 08:08:24 -0700
Subject: [PATCH 2/2] [AArch64] Improve expansion of immediates of the form (~w
 << 32 | w).

When one half of a 64-bit immediate corresponds to the negation of the
other half, we can use a sequence of MOVN, MOVK and EOR to expand the
bottom half of the immediate and replicate its negation to the top half.
In the general case, this saves us a MOVK compared to expanding the
immediate explicitly.

As a refinement, when the bottom half contains a 16-bit chunk of ones,
the intermediate MOVK can be omitted. Similarly, when the bottom half
contains a chunk of zeros, we can alternatively expand its negation and
use a EON to reconstruct the expected result. In either case, this still
saves us a MOVK compared to the default expansion.
---
 llvm/lib/Target/AArch64/AArch64ExpandImm.cpp  | 58 ++++++++++++++++++-
 .../AArch64/AArch64ExpandPseudoInsts.cpp      |  2 +
 llvm/test/CodeGen/AArch64/arm64-movi.ll       | 25 ++++----
 3 files changed, 69 insertions(+), 16 deletions(-)

diff --git a/llvm/lib/Target/AArch64/AArch64ExpandImm.cpp b/llvm/lib/Target/AArch64/AArch64ExpandImm.cpp
index 98016271a9d00..184c45448dc9a 100644
--- a/llvm/lib/Target/AArch64/AArch64ExpandImm.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ExpandImm.cpp
@@ -239,6 +239,57 @@ static bool trySequenceOfOnes(uint64_t UImm,
   return true;
 }
 
+// Attempt to expand 64-bit immediate values whose negated upper half match
+// the lower half (for example, 0x1234'5678'edcb'a987).
+// Immediates of this form can generally be expanded via a sequence of
+// MOVN+MOVK to expand the lower half, followed by an EOR to shift and negate
+// the result to the upper half, e.g.:
+//   mov  x0, #-22137          // =0xffffffffffffa987
+//   movk x0, #60875, lsl #16  // =0xffffffffedcba987
+//   eor  x0, x0, x0, lsl #32  // =0xffffffffedcba987 ^ 0xedcba98700000000
+//                                =0x12345678edcba987.
+// When the lower half contains a 16-bit chunk of ones, such as
+// 0x0000'5678'ffff'a987, the intermediate MOVK is redundant.
+// Similarly, when it contains a 16-bit chunk of zeros, such as
+// 0xffff'5678'0000'a987, the expansion can instead be effected by expanding
+// the negation of the lower half and negating the result with an EON, e.g.:
+//   mov  x0, #-43400          // =0xffffffffffff5678
+//   eon  x0, x0, x0, lsl #32  // =0xffffffffffff5678 ^ ~0xffff567800000000
+//                                =0xffffffffffff5678 ^  0x0000a987ffffffff
+//                                =0xffff56780000a987.
+// In any of these cases, the expansion with EOR/EON saves an instruction
+// compared to the default expansion based on MOV and MOVKs.
+static bool tryCopyWithNegation(uint64_t Imm,
+                                SmallVectorImpl<ImmInsnModel> &Insn) {
+  // We need the negation of the upper half of Imm to match the lower half.
+  // Degenerate cases where Imm is a run of ones should be handled separately.
+  if ((~Imm >> 32) != (Imm & 0xffffffffULL) || llvm::isShiftedMask_64(Imm))
+    return false;
+
+  const unsigned Mask = 0xffff;
+  unsigned Opc = AArch64::EORXrs;
+
+  // If we have a chunk of all zeros in the lower half, we can save a MOVK by
+  // materialising the negated immediate and negating the result with an EON.
+  if ((Imm & Mask) == 0 || ((Imm >> 16) & Mask) == 0) {
+    Opc = AArch64::EONXrs;
+    Imm = ~Imm;
+  }
+
+  unsigned Imm0 = Imm & Mask;
+  unsigned Imm16 = (Imm >> 16) & Mask;
+  if (Imm0 != Mask) {
+    Insn.push_back({AArch64::MOVNXi, Imm0 ^ Mask, 0});
+    if (Imm16 != Mask)
+      Insn.push_back({AArch64::MOVKXi, Imm16, 16});
+  } else {
+    Insn.push_back({AArch64::MOVNXi, Imm16 ^ Mask, 16});
+  }
+
+  Insn.push_back({Opc, 0, 32});
+  return true;
+}
+
 static uint64_t GetRunOfOnesStartingAt(uint64_t V, uint64_t StartPosition) {
   uint64_t NumOnes = llvm::countr_one(V >> StartPosition);
 
@@ -617,7 +668,12 @@ void AArch64_IMM::expandMOVImm(uint64_t Imm, unsigned BitSize,
   // FIXME: Add more two-instruction sequences.
 
   // Three instruction sequences.
-  //
+
+  // Attempt to use a sequence of MOVN+MOVK+EOR/EON (shifted register).
+  // The MOVK can be avoided if Imm contains a zero / one chunk.
+  if (tryCopyWithNegation(Imm, Insn))
+    return;
+
   // Prefer MOVZ/MOVN followed by two MOVK; it's more readable, and possibly
   // the fastest sequence with fast literal generation. (If neither MOVK is
   // part of a fast literal generation pair, it could be slower than the
diff --git a/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp b/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp
index 0f4bbfc3d610e..536260afb9482 100644
--- a/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp
@@ -169,6 +169,8 @@ bool AArch64ExpandPseudo::expandMOVImm(MachineBasicBlock &MBB,
                 .addImm(I->Op2));
       }
       break;
+    case AArch64::EONXrs:
+    case AArch64::EORXrs:
     case AArch64::ORRWrs:
     case AArch64::ORRXrs: {
       Register DstReg = MI.getOperand(0).getReg();
diff --git a/llvm/test/CodeGen/AArch64/arm64-movi.ll b/llvm/test/CodeGen/AArch64/arm64-movi.ll
index cc1ba5c9c1536..c918f209bfda7 100644
--- a/llvm/test/CodeGen/AArch64/arm64-movi.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-movi.ll
@@ -557,9 +557,8 @@ define i64 @orr_32_eor_64() nounwind {
 define i64 @movn_0_eon() {
 ; CHECK-LABEL: movn_0_eon:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov x0, #43690 // =0xaaaa
-; CHECK-NEXT:    movk x0, #21845, lsl #32
-; CHECK-NEXT:    movk x0, #65535, lsl #48
+; CHECK-NEXT:    mov x0, #-43691 // =0xffffffffffff5555
+; CHECK-NEXT:    eon x0, x0, x0, lsl #32
 ; CHECK-NEXT:    ret
   ret i64 u0xffff55550000aaaa
 }
@@ -567,9 +566,8 @@ define i64 @movn_0_eon() {
 define i64 @movn_1_eon() {
 ; CHECK-LABEL: movn_1_eon:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov x0, #2863267840 // =0xaaaa0000
-; CHECK-NEXT:    movk x0, #65535, lsl #32
-; CHECK-NEXT:    movk x0, #21845, lsl #48
+; CHECK-NEXT:    mov x0, #-2863267841 // =0xffffffff5555ffff
+; CHECK-NEXT:    eon x0, x0, x0, lsl #32
 ; CHECK-NEXT:    ret
   ret i64 u0x5555ffffaaaa0000
 }
@@ -577,9 +575,8 @@ define i64 @movn_1_eon() {
 define i64 @movn_0_eor() {
 ; CHECK-LABEL: movn_0_eor:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov x0, #21845 // =0x5555
-; CHECK-NEXT:    movk x0, #65535, lsl #16
-; CHECK-NEXT:    movk x0, #43690, lsl #32
+; CHECK-NEXT:    mov x0, #-43691 // =0xffffffffffff5555
+; CHECK-NEXT:    eor x0, x0, x0, lsl #32
 ; CHECK-NEXT:    ret
   ret i64 u0x0000aaaaffff5555
 }
@@ -587,9 +584,8 @@ define i64 @movn_0_eor() {
 define i64 @movn_1_eor() {
 ; CHECK-LABEL: movn_1_eor:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov x0, #65535 // =0xffff
-; CHECK-NEXT:    movk x0, #21845, lsl #16
-; CHECK-NEXT:    movk x0, #43690, lsl #48
+; CHECK-NEXT:    mov x0, #-2863267841 // =0xffffffff5555ffff
+; CHECK-NEXT:    eor x0, x0, x0, lsl #32
 ; CHECK-NEXT:    ret
   ret i64 u0xaaaa00005555ffff
 }
@@ -597,10 +593,9 @@ define i64 @movn_1_eor() {
 define i64 @movn_movk_eor() {
 ; CHECK-LABEL: movn_movk_eor:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov x0, #43690 // =0xaaaa
+; CHECK-NEXT:    mov x0, #-21846 // =0xffffffffffffaaaa
 ; CHECK-NEXT:    movk x0, #52428, lsl #16
-; CHECK-NEXT:    movk x0, #21845, lsl #32
-; CHECK-NEXT:    movk x0, #13107, lsl #48
+; CHECK-NEXT:    eor x0, x0, x0, lsl #32
 ; CHECK-NEXT:    ret
   ret i64 u0x33335555ccccaaaa
 }