[llvm] [AArch64] Improve expansion of immediates of the form (~w << 32 | w). (PR #162286)
Ricardo Jesus via llvm-commits
llvm-commits at lists.llvm.org
Tue Oct 21 09:37:53 PDT 2025
https://github.com/rj-jesus updated https://github.com/llvm/llvm-project/pull/162286
>From 3056f39874354cf285fd1a453daf9dec216b377e Mon Sep 17 00:00:00 2001
From: Ricardo Jesus <rjj at nvidia.com>
Date: Wed, 24 Sep 2025 08:09:01 -0700
Subject: [PATCH 1/5] Add tests.
---
llvm/test/CodeGen/AArch64/arm64-movi.ll | 55 +++++++++++++++++++++++++
1 file changed, 55 insertions(+)
diff --git a/llvm/test/CodeGen/AArch64/arm64-movi.ll b/llvm/test/CodeGen/AArch64/arm64-movi.ll
index c9074c2adbe3c..cc1ba5c9c1536 100644
--- a/llvm/test/CodeGen/AArch64/arm64-movi.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-movi.ll
@@ -549,3 +549,58 @@ define i64 @orr_32_eor_64() nounwind {
; CHECK-NEXT: ret
ret i64 18446604367017541391
}
+
+;==--------------------------------------------------------------------------==
+; Tests for EOR / EON with MOVN.
+;==--------------------------------------------------------------------------==
+
+define i64 @movn_0_eon() {
+; CHECK-LABEL: movn_0_eon:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov x0, #43690 // =0xaaaa
+; CHECK-NEXT: movk x0, #21845, lsl #32
+; CHECK-NEXT: movk x0, #65535, lsl #48
+; CHECK-NEXT: ret
+ ret i64 u0xffff55550000aaaa
+}
+
+define i64 @movn_1_eon() {
+; CHECK-LABEL: movn_1_eon:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov x0, #2863267840 // =0xaaaa0000
+; CHECK-NEXT: movk x0, #65535, lsl #32
+; CHECK-NEXT: movk x0, #21845, lsl #48
+; CHECK-NEXT: ret
+ ret i64 u0x5555ffffaaaa0000
+}
+
+define i64 @movn_0_eor() {
+; CHECK-LABEL: movn_0_eor:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov x0, #21845 // =0x5555
+; CHECK-NEXT: movk x0, #65535, lsl #16
+; CHECK-NEXT: movk x0, #43690, lsl #32
+; CHECK-NEXT: ret
+ ret i64 u0x0000aaaaffff5555
+}
+
+define i64 @movn_1_eor() {
+; CHECK-LABEL: movn_1_eor:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov x0, #65535 // =0xffff
+; CHECK-NEXT: movk x0, #21845, lsl #16
+; CHECK-NEXT: movk x0, #43690, lsl #48
+; CHECK-NEXT: ret
+ ret i64 u0xaaaa00005555ffff
+}
+
+define i64 @movn_movk_eor() {
+; CHECK-LABEL: movn_movk_eor:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov x0, #43690 // =0xaaaa
+; CHECK-NEXT: movk x0, #52428, lsl #16
+; CHECK-NEXT: movk x0, #21845, lsl #32
+; CHECK-NEXT: movk x0, #13107, lsl #48
+; CHECK-NEXT: ret
+ ret i64 u0x33335555ccccaaaa
+}
>From 151b440030f8001ef611eef6c0f7675990c0b5b8 Mon Sep 17 00:00:00 2001
From: Ricardo Jesus <rjj at nvidia.com>
Date: Wed, 24 Sep 2025 08:08:24 -0700
Subject: [PATCH 2/5] [AArch64] Improve expansion of immediates of the form (~w
<< 32 | w).
When one half of a 64-bit immediate corresponds to the negation of the
other half, we can use a sequence of MOVN, MOVK and EOR to expand the
bottom half of the immediate and replicate its negation to the top half.
In the general case, this saves us a MOVK compared to expanding the
immediate explicitly.
As a refinement, when the bottom half contains a 16-bit chunk of ones,
the intermediate MOVK can be omitted. Similarly, when the bottom half
contains a chunk of zeros, we can alternatively expand its negation and
use a EON to reconstruct the expected result. In either case, this still
saves us a MOVK compared to the default expansion.
---
llvm/lib/Target/AArch64/AArch64ExpandImm.cpp | 58 ++++++++++++++++++-
.../AArch64/AArch64ExpandPseudoInsts.cpp | 2 +
llvm/test/CodeGen/AArch64/arm64-movi.ll | 25 ++++----
3 files changed, 69 insertions(+), 16 deletions(-)
diff --git a/llvm/lib/Target/AArch64/AArch64ExpandImm.cpp b/llvm/lib/Target/AArch64/AArch64ExpandImm.cpp
index 98016271a9d00..184c45448dc9a 100644
--- a/llvm/lib/Target/AArch64/AArch64ExpandImm.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ExpandImm.cpp
@@ -239,6 +239,57 @@ static bool trySequenceOfOnes(uint64_t UImm,
return true;
}
+// Attempt to expand 64-bit immediate values whose negated upper half match
+// the lower half (for example, 0x1234'5678'edcb'a987).
+// Immediates of this form can generally be expanded via a sequence of
+// MOVN+MOVK to expand the lower half, followed by an EOR to shift and negate
+// the result to the upper half, e.g.:
+// mov x0, #-22137 // =0xffffffffffffa987
+// movk x0, #60875, lsl #16 // =0xffffffffedcba987
+// eor x0, x0, x0, lsl #32 // =0xffffffffedcba987 ^ 0xedcba98700000000
+// =0x12345678edcba987.
+// When the lower half contains a 16-bit chunk of ones, such as
+// 0x0000'5678'ffff'a987, the intermediate MOVK is redundant.
+// Similarly, when it contains a 16-bit chunk of zeros, such as
+// 0xffff'5678'0000'a987, the expansion can instead be effected by expanding
+// the negation of the lower half and negating the result with an EON, e.g.:
+// mov x0, #-43400 // =0xffffffffffff5678
+// eon x0, x0, x0, lsl #32 // =0xffffffffffff5678 ^ ~0xffff567800000000
+// =0xffffffffffff5678 ^ 0x0000a987ffffffff
+// =0xffff56780000a987.
+// In any of these cases, the expansion with EOR/EON saves an instruction
+// compared to the default expansion based on MOV and MOVKs.
+static bool tryCopyWithNegation(uint64_t Imm,
+ SmallVectorImpl<ImmInsnModel> &Insn) {
+ // We need the negation of the upper half of Imm to match the lower half.
+ // Degenerate cases where Imm is a run of ones should be handled separately.
+ if ((~Imm >> 32) != (Imm & 0xffffffffULL) || llvm::isShiftedMask_64(Imm))
+ return false;
+
+ const unsigned Mask = 0xffff;
+ unsigned Opc = AArch64::EORXrs;
+
+ // If we have a chunk of all zeros in the lower half, we can save a MOVK by
+ // materialising the negated immediate and negating the result with an EON.
+ if ((Imm & Mask) == 0 || ((Imm >> 16) & Mask) == 0) {
+ Opc = AArch64::EONXrs;
+ Imm = ~Imm;
+ }
+
+ unsigned Imm0 = Imm & Mask;
+ unsigned Imm16 = (Imm >> 16) & Mask;
+ if (Imm0 != Mask) {
+ Insn.push_back({AArch64::MOVNXi, Imm0 ^ Mask, 0});
+ if (Imm16 != Mask)
+ Insn.push_back({AArch64::MOVKXi, Imm16, 16});
+ } else {
+ Insn.push_back({AArch64::MOVNXi, Imm16 ^ Mask, 16});
+ }
+
+ Insn.push_back({Opc, 0, 32});
+ return true;
+}
+
static uint64_t GetRunOfOnesStartingAt(uint64_t V, uint64_t StartPosition) {
uint64_t NumOnes = llvm::countr_one(V >> StartPosition);
@@ -617,7 +668,12 @@ void AArch64_IMM::expandMOVImm(uint64_t Imm, unsigned BitSize,
// FIXME: Add more two-instruction sequences.
// Three instruction sequences.
- //
+
+ // Attempt to use a sequence of MOVN+MOVK+EOR/EON (shifted register).
+ // The MOVK can be avoided if Imm contains a zero / one chunk.
+ if (tryCopyWithNegation(Imm, Insn))
+ return;
+
// Prefer MOVZ/MOVN followed by two MOVK; it's more readable, and possibly
// the fastest sequence with fast literal generation. (If neither MOVK is
// part of a fast literal generation pair, it could be slower than the
diff --git a/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp b/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp
index 0f4bbfc3d610e..536260afb9482 100644
--- a/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp
@@ -169,6 +169,8 @@ bool AArch64ExpandPseudo::expandMOVImm(MachineBasicBlock &MBB,
.addImm(I->Op2));
}
break;
+ case AArch64::EONXrs:
+ case AArch64::EORXrs:
case AArch64::ORRWrs:
case AArch64::ORRXrs: {
Register DstReg = MI.getOperand(0).getReg();
diff --git a/llvm/test/CodeGen/AArch64/arm64-movi.ll b/llvm/test/CodeGen/AArch64/arm64-movi.ll
index cc1ba5c9c1536..c918f209bfda7 100644
--- a/llvm/test/CodeGen/AArch64/arm64-movi.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-movi.ll
@@ -557,9 +557,8 @@ define i64 @orr_32_eor_64() nounwind {
define i64 @movn_0_eon() {
; CHECK-LABEL: movn_0_eon:
; CHECK: // %bb.0:
-; CHECK-NEXT: mov x0, #43690 // =0xaaaa
-; CHECK-NEXT: movk x0, #21845, lsl #32
-; CHECK-NEXT: movk x0, #65535, lsl #48
+; CHECK-NEXT: mov x0, #-43691 // =0xffffffffffff5555
+; CHECK-NEXT: eon x0, x0, x0, lsl #32
; CHECK-NEXT: ret
ret i64 u0xffff55550000aaaa
}
@@ -567,9 +566,8 @@ define i64 @movn_0_eon() {
define i64 @movn_1_eon() {
; CHECK-LABEL: movn_1_eon:
; CHECK: // %bb.0:
-; CHECK-NEXT: mov x0, #2863267840 // =0xaaaa0000
-; CHECK-NEXT: movk x0, #65535, lsl #32
-; CHECK-NEXT: movk x0, #21845, lsl #48
+; CHECK-NEXT: mov x0, #-2863267841 // =0xffffffff5555ffff
+; CHECK-NEXT: eon x0, x0, x0, lsl #32
; CHECK-NEXT: ret
ret i64 u0x5555ffffaaaa0000
}
@@ -577,9 +575,8 @@ define i64 @movn_1_eon() {
define i64 @movn_0_eor() {
; CHECK-LABEL: movn_0_eor:
; CHECK: // %bb.0:
-; CHECK-NEXT: mov x0, #21845 // =0x5555
-; CHECK-NEXT: movk x0, #65535, lsl #16
-; CHECK-NEXT: movk x0, #43690, lsl #32
+; CHECK-NEXT: mov x0, #-43691 // =0xffffffffffff5555
+; CHECK-NEXT: eor x0, x0, x0, lsl #32
; CHECK-NEXT: ret
ret i64 u0x0000aaaaffff5555
}
@@ -587,9 +584,8 @@ define i64 @movn_0_eor() {
define i64 @movn_1_eor() {
; CHECK-LABEL: movn_1_eor:
; CHECK: // %bb.0:
-; CHECK-NEXT: mov x0, #65535 // =0xffff
-; CHECK-NEXT: movk x0, #21845, lsl #16
-; CHECK-NEXT: movk x0, #43690, lsl #48
+; CHECK-NEXT: mov x0, #-2863267841 // =0xffffffff5555ffff
+; CHECK-NEXT: eor x0, x0, x0, lsl #32
; CHECK-NEXT: ret
ret i64 u0xaaaa00005555ffff
}
@@ -597,10 +593,9 @@ define i64 @movn_1_eor() {
define i64 @movn_movk_eor() {
; CHECK-LABEL: movn_movk_eor:
; CHECK: // %bb.0:
-; CHECK-NEXT: mov x0, #43690 // =0xaaaa
+; CHECK-NEXT: mov x0, #-21846 // =0xffffffffffffaaaa
; CHECK-NEXT: movk x0, #52428, lsl #16
-; CHECK-NEXT: movk x0, #21845, lsl #32
-; CHECK-NEXT: movk x0, #13107, lsl #48
+; CHECK-NEXT: eor x0, x0, x0, lsl #32
; CHECK-NEXT: ret
ret i64 u0x33335555ccccaaaa
}
>From fde8a5fea7f89c24986f56c6160dfc698a8e5e9e Mon Sep 17 00:00:00 2001
From: Ricardo Jesus <rjj at nvidia.com>
Date: Thu, 9 Oct 2025 01:43:20 -0700
Subject: [PATCH 3/5] Add separate calls for two/three sequences.
---
llvm/lib/Target/AArch64/AArch64ExpandImm.cpp | 19 ++++++++++++-------
1 file changed, 12 insertions(+), 7 deletions(-)
diff --git a/llvm/lib/Target/AArch64/AArch64ExpandImm.cpp b/llvm/lib/Target/AArch64/AArch64ExpandImm.cpp
index 184c45448dc9a..da2b343f26b7c 100644
--- a/llvm/lib/Target/AArch64/AArch64ExpandImm.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ExpandImm.cpp
@@ -259,7 +259,7 @@ static bool trySequenceOfOnes(uint64_t UImm,
// =0xffff56780000a987.
// In any of these cases, the expansion with EOR/EON saves an instruction
// compared to the default expansion based on MOV and MOVKs.
-static bool tryCopyWithNegation(uint64_t Imm,
+static bool tryCopyWithNegation(uint64_t Imm, bool AllowThreeSequence,
SmallVectorImpl<ImmInsnModel> &Insn) {
// We need the negation of the upper half of Imm to match the lower half.
// Degenerate cases where Imm is a run of ones should be handled separately.
@@ -278,6 +278,8 @@ static bool tryCopyWithNegation(uint64_t Imm,
unsigned Imm0 = Imm & Mask;
unsigned Imm16 = (Imm >> 16) & Mask;
+ if (Imm0 != Mask && Imm16 != Mask && !AllowThreeSequence)
+ return false;
if (Imm0 != Mask) {
Insn.push_back({AArch64::MOVNXi, Imm0 ^ Mask, 0});
if (Imm16 != Mask)
@@ -665,15 +667,14 @@ void AArch64_IMM::expandMOVImm(uint64_t Imm, unsigned BitSize,
if (tryEorOfLogicalImmediates(UImm, Insn))
return;
+ // Attempt to use a sequence of MOVN+EOR/EON (shifted register).
+ if (tryCopyWithNegation(Imm, /*AllowThreeSequence=*/false, Insn))
+ return;
+
// FIXME: Add more two-instruction sequences.
// Three instruction sequences.
-
- // Attempt to use a sequence of MOVN+MOVK+EOR/EON (shifted register).
- // The MOVK can be avoided if Imm contains a zero / one chunk.
- if (tryCopyWithNegation(Imm, Insn))
- return;
-
+ //
// Prefer MOVZ/MOVN followed by two MOVK; it's more readable, and possibly
// the fastest sequence with fast literal generation. (If neither MOVK is
// part of a fast literal generation pair, it could be slower than the
@@ -697,6 +698,10 @@ void AArch64_IMM::expandMOVImm(uint64_t Imm, unsigned BitSize,
if (BitSize == 64 && trySequenceOfOnes(UImm, Insn))
return;
+ // Attempt to use a sequence of MOVN+MOVK+EOR (shifted register).
+ if (tryCopyWithNegation(Imm, /*AllowThreeSequence=*/true, Insn))
+ return;
+
// We found no possible two or three instruction sequence; use the general
// four-instruction sequence.
expandMOVImmSimple(Imm, BitSize, OneChunks, ZeroChunks, Insn);
>From 39752d3004af95b3ac4783d15e4c9821810e9f99 Mon Sep 17 00:00:00 2001
From: Ricardo Jesus <rjj at nvidia.com>
Date: Tue, 21 Oct 2025 07:11:36 -0700
Subject: [PATCH 4/5] Test other shift amounts.
---
llvm/test/CodeGen/AArch64/arm64-movi.ll | 144 ++++++++++++++++++++++--
1 file changed, 134 insertions(+), 10 deletions(-)
diff --git a/llvm/test/CodeGen/AArch64/arm64-movi.ll b/llvm/test/CodeGen/AArch64/arm64-movi.ll
index c918f209bfda7..e1c78ec8c843e 100644
--- a/llvm/test/CodeGen/AArch64/arm64-movi.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-movi.ll
@@ -554,8 +554,18 @@ define i64 @orr_32_eor_64() nounwind {
; Tests for EOR / EON with MOVN.
;==--------------------------------------------------------------------------==
-define i64 @movn_0_eon() {
-; CHECK-LABEL: movn_0_eon:
+define i64 @movn_0_eon_lsl_17() {
+; CHECK-LABEL: movn_0_eon_lsl_17:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov x0, #-4370 // =0xffffffffffffeeee
+; CHECK-NEXT: movk x0, #8738, lsl #16
+; CHECK-NEXT: movk x0, #65534, lsl #32
+; CHECK-NEXT: ret
+ ret i64 u0xfffffffe2222eeee
+}
+
+define i64 @movn_0_eon_lsl_32() {
+; CHECK-LABEL: movn_0_eon_lsl_32:
; CHECK: // %bb.0:
; CHECK-NEXT: mov x0, #-43691 // =0xffffffffffff5555
; CHECK-NEXT: eon x0, x0, x0, lsl #32
@@ -563,8 +573,28 @@ define i64 @movn_0_eon() {
ret i64 u0xffff55550000aaaa
}
-define i64 @movn_1_eon() {
-; CHECK-LABEL: movn_1_eon:
+define i64 @movn_0_eon_lsl_47() {
+; CHECK-LABEL: movn_0_eon_lsl_47:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov x0, #61166 // =0xeeee
+; CHECK-NEXT: movk x0, #32768, lsl #32
+; CHECK-NEXT: movk x0, #34952, lsl #48
+; CHECK-NEXT: ret
+ ret i64 u0x888880000000eeee
+}
+
+define i64 @movn_1_eon_lsl_17() {
+; CHECK-LABEL: movn_1_eon_lsl_17:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov x0, #286261248 // =0x11100000
+; CHECK-NEXT: movk x0, #8739, lsl #32
+; CHECK-NEXT: movk x0, #65534, lsl #48
+; CHECK-NEXT: ret
+ ret i64 u0xfffe222311100000
+}
+
+define i64 @movn_1_eon_lsl_32() {
+; CHECK-LABEL: movn_1_eon_lsl_32:
; CHECK: // %bb.0:
; CHECK-NEXT: mov x0, #-2863267841 // =0xffffffff5555ffff
; CHECK-NEXT: eon x0, x0, x0, lsl #32
@@ -572,8 +602,28 @@ define i64 @movn_1_eon() {
ret i64 u0x5555ffffaaaa0000
}
-define i64 @movn_0_eor() {
-; CHECK-LABEL: movn_0_eor:
+define i64 @movn_1_eon_lsl_46() {
+; CHECK-LABEL: movn_1_eon_lsl_46:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov x0, #4008509440 // =0xeeed0000
+; CHECK-NEXT: movk x0, #49152, lsl #32
+; CHECK-NEXT: movk x0, #49151, lsl #48
+; CHECK-NEXT: ret
+ ret i64 u0xbfffc000eeed0000
+}
+
+define i64 @movn_0_eor_lsl_17() {
+; CHECK-LABEL: movn_0_eor_lsl_17:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov x0, #4369 // =0x1111
+; CHECK-NEXT: movk x0, #56797, lsl #16
+; CHECK-NEXT: movk x0, #1, lsl #32
+; CHECK-NEXT: ret
+ ret i64 u0x00000001dddd1111
+}
+
+define i64 @movn_0_eor_lsl_32() {
+; CHECK-LABEL: movn_0_eor_lsl_32:
; CHECK: // %bb.0:
; CHECK-NEXT: mov x0, #-43691 // =0xffffffffffff5555
; CHECK-NEXT: eor x0, x0, x0, lsl #32
@@ -581,8 +631,28 @@ define i64 @movn_0_eor() {
ret i64 u0x0000aaaaffff5555
}
-define i64 @movn_1_eor() {
-; CHECK-LABEL: movn_1_eor:
+define i64 @movn_0_eor_lsl_47() {
+; CHECK-LABEL: movn_0_eor_lsl_47:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov x0, #-61167 // =0xffffffffffff1111
+; CHECK-NEXT: movk x0, #32767, lsl #32
+; CHECK-NEXT: movk x0, #30583, lsl #48
+; CHECK-NEXT: ret
+ ret i64 u0x77777fffffff1111
+}
+
+define i64 @movn_1_eor_lsl_17() {
+; CHECK-LABEL: movn_1_eor_lsl_17:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov x0, #-286261249 // =0xffffffffeeefffff
+; CHECK-NEXT: movk x0, #56796, lsl #32
+; CHECK-NEXT: movk x0, #1, lsl #48
+; CHECK-NEXT: ret
+ ret i64 u0x0001dddceeefffff
+}
+
+define i64 @movn_1_eor_lsl_32() {
+; CHECK-LABEL: movn_1_eor_lsl_32:
; CHECK: // %bb.0:
; CHECK-NEXT: mov x0, #-2863267841 // =0xffffffff5555ffff
; CHECK-NEXT: eor x0, x0, x0, lsl #32
@@ -590,8 +660,51 @@ define i64 @movn_1_eor() {
ret i64 u0xaaaa00005555ffff
}
-define i64 @movn_movk_eor() {
-; CHECK-LABEL: movn_movk_eor:
+define i64 @movn_1_eor_lsl_46() {
+; CHECK-LABEL: movn_1_eor_lsl_46:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov x0, #-4008509441
+; CHECK-NEXT: movk x0, #16383, lsl #32
+; CHECK-NEXT: movk x0, #16384, lsl #48
+; CHECK-NEXT: ret
+ ret i64 u0x40003fff1112ffff
+}
+
+define i64 @movn_movk_eon_lsl_17() {
+; CHECK-LABEL: movn_movk_eon_lsl_17:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov x0, #43399 // =0xa987
+; CHECK-NEXT: movk x0, #16699, lsl #16
+; CHECK-NEXT: movk x0, #9320, lsl #32
+; CHECK-NEXT: movk x0, #65534, lsl #48
+; CHECK-NEXT: ret
+ ret i64 u0xfffe2468413ba987
+}
+
+define i64 @movn_movk_eon_lsl_47() {
+; CHECK-LABEL: movn_movk_eon_lsl_47:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov x0, #61166 // =0xeeee
+; CHECK-NEXT: movk x0, #21554, lsl #16
+; CHECK-NEXT: movk x0, #32768, lsl #32
+; CHECK-NEXT: movk x0, #34952, lsl #48
+; CHECK-NEXT: ret
+ ret i64 u0x888880005432eeee
+}
+
+define i64 @movn_movk_eor_lsl_17() {
+; CHECK-LABEL: movn_movk_eor_lsl_17:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov x0, #22136 // =0x5678
+; CHECK-NEXT: movk x0, #48836, lsl #16
+; CHECK-NEXT: movk x0, #56215, lsl #32
+; CHECK-NEXT: movk x0, #1, lsl #48
+; CHECK-NEXT: ret
+ ret i64 u0x0001db97bec45678
+}
+
+define i64 @movn_movk_eor_lsl_32() {
+; CHECK-LABEL: movn_movk_eor_lsl_32:
; CHECK: // %bb.0:
; CHECK-NEXT: mov x0, #-21846 // =0xffffffffffffaaaa
; CHECK-NEXT: movk x0, #52428, lsl #16
@@ -599,3 +712,14 @@ define i64 @movn_movk_eor() {
; CHECK-NEXT: ret
ret i64 u0x33335555ccccaaaa
}
+
+define i64 @movn_movk_eor_lsl_47() {
+; CHECK-LABEL: movn_movk_eor_lsl_47:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov x0, #4369 // =0x1111
+; CHECK-NEXT: movk x0, #43981, lsl #16
+; CHECK-NEXT: movk x0, #32767, lsl #32
+; CHECK-NEXT: movk x0, #30583, lsl #48
+; CHECK-NEXT: ret
+ ret i64 u0x77777fffabcd1111
+}
>From 0a2928939cd0c3193505cb9c9438ec4b56c7a506 Mon Sep 17 00:00:00 2001
From: Ricardo Jesus <rjj at nvidia.com>
Date: Tue, 14 Oct 2025 04:20:42 -0700
Subject: [PATCH 5/5] Support other shift amounts.
---
llvm/lib/Target/AArch64/AArch64ExpandImm.cpp | 66 +++++++++++--------
llvm/test/CodeGen/AArch64/arm64-movi.ll | 64 ++++++++----------
.../AArch64/srem-seteq-illegal-types.ll | 7 +-
3 files changed, 68 insertions(+), 69 deletions(-)
diff --git a/llvm/lib/Target/AArch64/AArch64ExpandImm.cpp b/llvm/lib/Target/AArch64/AArch64ExpandImm.cpp
index da2b343f26b7c..2530daf6bb9dc 100644
--- a/llvm/lib/Target/AArch64/AArch64ExpandImm.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ExpandImm.cpp
@@ -239,15 +239,18 @@ static bool trySequenceOfOnes(uint64_t UImm,
return true;
}
-// Attempt to expand 64-bit immediate values whose negated upper half match
-// the lower half (for example, 0x1234'5678'edcb'a987).
-// Immediates of this form can generally be expanded via a sequence of
-// MOVN+MOVK to expand the lower half, followed by an EOR to shift and negate
-// the result to the upper half, e.g.:
+// Attempt to expand 64-bit immediate values that consist of shifted negated
+// components such as 0x1234'5678'edcb'a987, where the upper half is the
+// negation of the lower half. Immediates of this form can generally be
+// expanded via a sequence of MOVN+MOVK to expand the lower half, followed by
+// an EOR or EON to shift and negate the result to the upper half, for example:
// mov x0, #-22137 // =0xffffffffffffa987
// movk x0, #60875, lsl #16 // =0xffffffffedcba987
// eor x0, x0, x0, lsl #32 // =0xffffffffedcba987 ^ 0xedcba98700000000
// =0x12345678edcba987.
+// The logic extends to other shift amounts in the range [17, 48) (outside that
+// range we get runs of ones/zeros that are optimised separately).
+//
// When the lower half contains a 16-bit chunk of ones, such as
// 0x0000'5678'ffff'a987, the intermediate MOVK is redundant.
// Similarly, when it contains a 16-bit chunk of zeros, such as
@@ -261,35 +264,44 @@ static bool trySequenceOfOnes(uint64_t UImm,
// compared to the default expansion based on MOV and MOVKs.
static bool tryCopyWithNegation(uint64_t Imm, bool AllowThreeSequence,
SmallVectorImpl<ImmInsnModel> &Insn) {
- // We need the negation of the upper half of Imm to match the lower half.
// Degenerate cases where Imm is a run of ones should be handled separately.
- if ((~Imm >> 32) != (Imm & 0xffffffffULL) || llvm::isShiftedMask_64(Imm))
+ if (!Imm || llvm::isShiftedMask_64(Imm))
return false;
const unsigned Mask = 0xffff;
- unsigned Opc = AArch64::EORXrs;
- // If we have a chunk of all zeros in the lower half, we can save a MOVK by
- // materialising the negated immediate and negating the result with an EON.
- if ((Imm & Mask) == 0 || ((Imm >> 16) & Mask) == 0) {
- Opc = AArch64::EONXrs;
- Imm = ~Imm;
- }
+ auto tryExpansion = [&](unsigned Opc, uint64_t C, unsigned N) {
+ assert((C >> 32) == 0xffffffffULL && "Invalid immediate");
+ const unsigned Imm0 = C & Mask;
+ const unsigned Imm16 = (C >> 16) & Mask;
+ if (Imm0 != Mask && Imm16 != Mask && !AllowThreeSequence)
+ return false;
+
+ if (Imm0 != Mask) {
+ Insn.push_back({AArch64::MOVNXi, Imm0 ^ Mask, 0});
+ if (Imm16 != Mask)
+ Insn.push_back({AArch64::MOVKXi, Imm16, 16});
+ } else {
+ Insn.push_back({AArch64::MOVNXi, Imm16 ^ Mask, 16});
+ }
- unsigned Imm0 = Imm & Mask;
- unsigned Imm16 = (Imm >> 16) & Mask;
- if (Imm0 != Mask && Imm16 != Mask && !AllowThreeSequence)
- return false;
- if (Imm0 != Mask) {
- Insn.push_back({AArch64::MOVNXi, Imm0 ^ Mask, 0});
- if (Imm16 != Mask)
- Insn.push_back({AArch64::MOVKXi, Imm16, 16});
- } else {
- Insn.push_back({AArch64::MOVNXi, Imm16 ^ Mask, 16});
+ Insn.push_back({Opc, 0, N});
+ return true;
+ };
+
+ for (unsigned N = 17; N < 48; ++N) {
+ // Attempt EOR.
+ uint64_t C = 0xffffffff00000000ULL | (Imm ^ (Imm << N));
+ if ((C ^ (C << N)) == Imm && tryExpansion(AArch64::EORXrs, C, N))
+ return true;
+
+ // Attempt EON.
+ C = 0xffffffff00000000ULL | (Imm ^ ~(~Imm << N));
+ if ((C ^ ~(C << N)) == Imm && tryExpansion(AArch64::EONXrs, C, N))
+ return true;
}
- Insn.push_back({Opc, 0, 32});
- return true;
+ return false;
}
static uint64_t GetRunOfOnesStartingAt(uint64_t V, uint64_t StartPosition) {
@@ -698,7 +710,7 @@ void AArch64_IMM::expandMOVImm(uint64_t Imm, unsigned BitSize,
if (BitSize == 64 && trySequenceOfOnes(UImm, Insn))
return;
- // Attempt to use a sequence of MOVN+MOVK+EOR (shifted register).
+ // Attempt to use a sequence of MOVN+MOVK+EOR/EON (shifted register).
if (tryCopyWithNegation(Imm, /*AllowThreeSequence=*/true, Insn))
return;
diff --git a/llvm/test/CodeGen/AArch64/arm64-movi.ll b/llvm/test/CodeGen/AArch64/arm64-movi.ll
index e1c78ec8c843e..c4d33faa4eda4 100644
--- a/llvm/test/CodeGen/AArch64/arm64-movi.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-movi.ll
@@ -557,9 +557,8 @@ define i64 @orr_32_eor_64() nounwind {
define i64 @movn_0_eon_lsl_17() {
; CHECK-LABEL: movn_0_eon_lsl_17:
; CHECK: // %bb.0:
-; CHECK-NEXT: mov x0, #-4370 // =0xffffffffffffeeee
-; CHECK-NEXT: movk x0, #8738, lsl #16
-; CHECK-NEXT: movk x0, #65534, lsl #32
+; CHECK-NEXT: mov x0, #-61167 // =0xffffffffffff1111
+; CHECK-NEXT: eon x0, x0, x0, lsl #17
; CHECK-NEXT: ret
ret i64 u0xfffffffe2222eeee
}
@@ -576,9 +575,8 @@ define i64 @movn_0_eon_lsl_32() {
define i64 @movn_0_eon_lsl_47() {
; CHECK-LABEL: movn_0_eon_lsl_47:
; CHECK: // %bb.0:
-; CHECK-NEXT: mov x0, #61166 // =0xeeee
-; CHECK-NEXT: movk x0, #32768, lsl #32
-; CHECK-NEXT: movk x0, #34952, lsl #48
+; CHECK-NEXT: mov x0, #-61167 // =0xffffffffffff1111
+; CHECK-NEXT: eon x0, x0, x0, lsl #47
; CHECK-NEXT: ret
ret i64 u0x888880000000eeee
}
@@ -586,9 +584,8 @@ define i64 @movn_0_eon_lsl_47() {
define i64 @movn_1_eon_lsl_17() {
; CHECK-LABEL: movn_1_eon_lsl_17:
; CHECK: // %bb.0:
-; CHECK-NEXT: mov x0, #286261248 // =0x11100000
-; CHECK-NEXT: movk x0, #8739, lsl #32
-; CHECK-NEXT: movk x0, #65534, lsl #48
+; CHECK-NEXT: mov x0, #-4008574977 // =0xffffffff1111ffff
+; CHECK-NEXT: eon x0, x0, x0, lsl #17
; CHECK-NEXT: ret
ret i64 u0xfffe222311100000
}
@@ -605,9 +602,8 @@ define i64 @movn_1_eon_lsl_32() {
define i64 @movn_1_eon_lsl_46() {
; CHECK-LABEL: movn_1_eon_lsl_46:
; CHECK: // %bb.0:
-; CHECK-NEXT: mov x0, #4008509440 // =0xeeed0000
-; CHECK-NEXT: movk x0, #49152, lsl #32
-; CHECK-NEXT: movk x0, #49151, lsl #48
+; CHECK-NEXT: mov x0, #-4008509441 // =0xffffffff1112ffff
+; CHECK-NEXT: eon x0, x0, x0, lsl #46
; CHECK-NEXT: ret
ret i64 u0xbfffc000eeed0000
}
@@ -615,9 +611,8 @@ define i64 @movn_1_eon_lsl_46() {
define i64 @movn_0_eor_lsl_17() {
; CHECK-LABEL: movn_0_eor_lsl_17:
; CHECK: // %bb.0:
-; CHECK-NEXT: mov x0, #4369 // =0x1111
-; CHECK-NEXT: movk x0, #56797, lsl #16
-; CHECK-NEXT: movk x0, #1, lsl #32
+; CHECK-NEXT: mov x0, #-61167 // =0xffffffffffff1111
+; CHECK-NEXT: eor x0, x0, x0, lsl #17
; CHECK-NEXT: ret
ret i64 u0x00000001dddd1111
}
@@ -635,8 +630,7 @@ define i64 @movn_0_eor_lsl_47() {
; CHECK-LABEL: movn_0_eor_lsl_47:
; CHECK: // %bb.0:
; CHECK-NEXT: mov x0, #-61167 // =0xffffffffffff1111
-; CHECK-NEXT: movk x0, #32767, lsl #32
-; CHECK-NEXT: movk x0, #30583, lsl #48
+; CHECK-NEXT: eor x0, x0, x0, lsl #47
; CHECK-NEXT: ret
ret i64 u0x77777fffffff1111
}
@@ -644,9 +638,8 @@ define i64 @movn_0_eor_lsl_47() {
define i64 @movn_1_eor_lsl_17() {
; CHECK-LABEL: movn_1_eor_lsl_17:
; CHECK: // %bb.0:
-; CHECK-NEXT: mov x0, #-286261249 // =0xffffffffeeefffff
-; CHECK-NEXT: movk x0, #56796, lsl #32
-; CHECK-NEXT: movk x0, #1, lsl #48
+; CHECK-NEXT: mov x0, #-4008574977 // =0xffffffff1111ffff
+; CHECK-NEXT: eor x0, x0, x0, lsl #17
; CHECK-NEXT: ret
ret i64 u0x0001dddceeefffff
}
@@ -663,9 +656,8 @@ define i64 @movn_1_eor_lsl_32() {
define i64 @movn_1_eor_lsl_46() {
; CHECK-LABEL: movn_1_eor_lsl_46:
; CHECK: // %bb.0:
-; CHECK-NEXT: mov x0, #-4008509441
-; CHECK-NEXT: movk x0, #16383, lsl #32
-; CHECK-NEXT: movk x0, #16384, lsl #48
+; CHECK-NEXT: mov x0, #-4008509441 // =0xffffffff1112ffff
+; CHECK-NEXT: eor x0, x0, x0, lsl #46
; CHECK-NEXT: ret
ret i64 u0x40003fff1112ffff
}
@@ -673,10 +665,9 @@ define i64 @movn_1_eor_lsl_46() {
define i64 @movn_movk_eon_lsl_17() {
; CHECK-LABEL: movn_movk_eon_lsl_17:
; CHECK: // %bb.0:
-; CHECK-NEXT: mov x0, #43399 // =0xa987
-; CHECK-NEXT: movk x0, #16699, lsl #16
-; CHECK-NEXT: movk x0, #9320, lsl #32
-; CHECK-NEXT: movk x0, #65534, lsl #48
+; CHECK-NEXT: mov x0, #-43400 // =0xffffffffffff5678
+; CHECK-NEXT: movk x0, #4660, lsl #16
+; CHECK-NEXT: eon x0, x0, x0, lsl #17
; CHECK-NEXT: ret
ret i64 u0xfffe2468413ba987
}
@@ -684,10 +675,9 @@ define i64 @movn_movk_eon_lsl_17() {
define i64 @movn_movk_eon_lsl_47() {
; CHECK-LABEL: movn_movk_eon_lsl_47:
; CHECK: // %bb.0:
-; CHECK-NEXT: mov x0, #61166 // =0xeeee
-; CHECK-NEXT: movk x0, #21554, lsl #16
-; CHECK-NEXT: movk x0, #32768, lsl #32
-; CHECK-NEXT: movk x0, #34952, lsl #48
+; CHECK-NEXT: mov x0, #-61167 // =0xffffffffffff1111
+; CHECK-NEXT: movk x0, #43981, lsl #16
+; CHECK-NEXT: eon x0, x0, x0, lsl #47
; CHECK-NEXT: ret
ret i64 u0x888880005432eeee
}
@@ -695,10 +685,9 @@ define i64 @movn_movk_eon_lsl_47() {
define i64 @movn_movk_eor_lsl_17() {
; CHECK-LABEL: movn_movk_eor_lsl_17:
; CHECK: // %bb.0:
-; CHECK-NEXT: mov x0, #22136 // =0x5678
-; CHECK-NEXT: movk x0, #48836, lsl #16
-; CHECK-NEXT: movk x0, #56215, lsl #32
-; CHECK-NEXT: movk x0, #1, lsl #48
+; CHECK-NEXT: mov x0, #-43400 // =0xffffffffffff5678
+; CHECK-NEXT: movk x0, #4660, lsl #16
+; CHECK-NEXT: eor x0, x0, x0, lsl #17
; CHECK-NEXT: ret
ret i64 u0x0001db97bec45678
}
@@ -716,10 +705,9 @@ define i64 @movn_movk_eor_lsl_32() {
define i64 @movn_movk_eor_lsl_47() {
; CHECK-LABEL: movn_movk_eor_lsl_47:
; CHECK: // %bb.0:
-; CHECK-NEXT: mov x0, #4369 // =0x1111
+; CHECK-NEXT: mov x0, #-61167 // =0xffffffffffff1111
; CHECK-NEXT: movk x0, #43981, lsl #16
-; CHECK-NEXT: movk x0, #32767, lsl #32
-; CHECK-NEXT: movk x0, #30583, lsl #48
+; CHECK-NEXT: eor x0, x0, x0, lsl #47
; CHECK-NEXT: ret
ret i64 u0x77777fffabcd1111
}
diff --git a/llvm/test/CodeGen/AArch64/srem-seteq-illegal-types.ll b/llvm/test/CodeGen/AArch64/srem-seteq-illegal-types.ll
index 884d668157e5f..bd4cc62255439 100644
--- a/llvm/test/CodeGen/AArch64/srem-seteq-illegal-types.ll
+++ b/llvm/test/CodeGen/AArch64/srem-seteq-illegal-types.ll
@@ -61,14 +61,13 @@ define <3 x i1> @test_srem_vec(<3 x i33> %X) nounwind {
; CHECK-NEXT: sbfx x8, x0, #0, #33
; CHECK-NEXT: sbfx x10, x1, #0, #33
; CHECK-NEXT: movk x9, #29127, lsl #16
-; CHECK-NEXT: mov x13, #7281 // =0x1c71
+; CHECK-NEXT: mov x13, #-7282 // =0xffffffffffffe38e
; CHECK-NEXT: sbfx x12, x2, #0, #33
; CHECK-NEXT: movk x9, #50972, lsl #32
-; CHECK-NEXT: movk x13, #29127, lsl #16
+; CHECK-NEXT: movk x13, #36408, lsl #16
; CHECK-NEXT: movk x9, #7281, lsl #48
-; CHECK-NEXT: movk x13, #50972, lsl #32
+; CHECK-NEXT: eon x13, x13, x13, lsl #33
; CHECK-NEXT: smulh x11, x8, x9
-; CHECK-NEXT: movk x13, #7281, lsl #48
; CHECK-NEXT: smulh x9, x10, x9
; CHECK-NEXT: smulh x13, x12, x13
; CHECK-NEXT: add x11, x11, x11, lsr #63
More information about the llvm-commits
mailing list