[llvm] [X86][APX] Compress non-redundant NDD ADD to LEA (PR #158254)
Phoebe Wang via llvm-commits
llvm-commits at lists.llvm.org
Sun Sep 14 17:39:38 PDT 2025
https://github.com/phoebewang updated https://github.com/llvm/llvm-project/pull/158254
>From c679d43b31850862bb8b2db16bd0a3135a2980d3 Mon Sep 17 00:00:00 2001
From: Phoebe Wang <phoebe.wang at intel.com>
Date: Fri, 12 Sep 2025 17:25:49 +0800
Subject: [PATCH] [X86][APX] Compress non-redundant NDD ADD to LEA
---
llvm/lib/Target/X86/X86CompressEVEX.cpp | 48 +++++++++++++++----
llvm/test/CodeGen/X86/apx/add.ll | 28 +++++------
llvm/test/CodeGen/X86/apx/mul-i1024.ll | 6 +--
.../CodeGen/X86/apx/ndd-false-deps-asm.mir | 4 +-
llvm/test/CodeGen/X86/apx/shl.ll | 8 ++--
llvm/test/CodeGen/X86/apx/sub.ll | 6 +--
6 files changed, 61 insertions(+), 39 deletions(-)
diff --git a/llvm/lib/Target/X86/X86CompressEVEX.cpp b/llvm/lib/Target/X86/X86CompressEVEX.cpp
index 4ea30de78402f..c0c7f5adf06ef 100644
--- a/llvm/lib/Target/X86/X86CompressEVEX.cpp
+++ b/llvm/lib/Target/X86/X86CompressEVEX.cpp
@@ -174,7 +174,8 @@ static bool performCustomAdjustments(MachineInstr &MI, unsigned NewOpc) {
return true;
}
-static bool CompressEVEXImpl(MachineInstr &MI, const X86Subtarget &ST) {
+static bool CompressEVEXImpl(MachineInstr &MI, MachineBasicBlock &MBB,
+ const X86Subtarget &ST) {
uint64_t TSFlags = MI.getDesc().TSFlags;
// Check for EVEX instructions only.
@@ -239,14 +240,14 @@ static bool CompressEVEXImpl(MachineInstr &MI, const X86Subtarget &ST) {
return I->NewOpc;
};
- // Redundant NDD ops cannot be safely compressed if either:
- // - the legacy op would introduce a partial write that BreakFalseDeps
- // identified as a potential stall, or
- // - the op is writing to a subregister of a live register, i.e. the
- // full (zeroed) result is used.
- // Both cases are indicated by an implicit def of the superregister.
+ Register Dst = MI.getOperand(0).getReg();
if (IsRedundantNDD) {
- Register Dst = MI.getOperand(0).getReg();
+ // Redundant NDD ops cannot be safely compressed if either:
+ // - the legacy op would introduce a partial write that BreakFalseDeps
+ // identified as a potential stall, or
+ // - the op is writing to a subregister of a live register, i.e. the
+ // full (zeroed) result is used.
+ // Both cases are indicated by an implicit def of the superregister.
if (Dst &&
(X86::GR16RegClass.contains(Dst) || X86::GR8RegClass.contains(Dst))) {
Register Super = getX86SubSuperRegister(Dst, 64);
@@ -260,6 +261,33 @@ static bool CompressEVEXImpl(MachineInstr &MI, const X86Subtarget &ST) {
if (!X86EnableAPXForRelocation)
assert(!isAddMemInstrWithRelocation(MI) &&
"Unexpected NDD instruction with relocation!");
+ } else if (Opc == X86::ADD32ri_ND || Opc == X86::ADD64ri32_ND ||
+ Opc == X86::ADD32rr_ND || Opc == X86::ADD64rr_ND) {
+ // Non-redundant NDD ADD can be compressed to LEA when:
+ // - No EGPR register used and
+ // - EFLAGS is dead.
+ if (!usesExtendedRegister(MI) &&
+ MI.registerDefIsDead(X86::EFLAGS, /*TRI=*/nullptr)) {
+ Register Src1 = MI.getOperand(1).getReg();
+ const MachineOperand &Src2 = MI.getOperand(2);
+ bool Is32BitReg = Opc == X86::ADD32ri_ND || Opc == X86::ADD32rr_ND;
+ const MCInstrDesc &NewDesc =
+ ST.getInstrInfo()->get(Is32BitReg ? X86::LEA32r : X86::LEA64r);
+ if (Is32BitReg)
+ Src1 = getX86SubSuperRegister(Src1, 64);
+ MachineInstrBuilder MIB = BuildMI(MBB, MI, MI.getDebugLoc(), NewDesc, Dst)
+ .addReg(Src1)
+ .addImm(1);
+ if (Opc == X86::ADD32ri_ND || Opc == X86::ADD64ri32_ND)
+ MIB.addReg(0).add(Src2);
+ else if (Is32BitReg)
+ MIB.addReg(getX86SubSuperRegister(Src2.getReg(), 64)).addImm(0);
+ else
+ MIB.add(Src2).addImm(0);
+ MIB.addReg(0);
+ MI.removeFromParent();
+ return true;
+ }
}
// NonNF -> NF only if it's not a compressible NDD instruction and eflags is
@@ -318,8 +346,8 @@ bool CompressEVEXPass::runOnMachineFunction(MachineFunction &MF) {
for (MachineBasicBlock &MBB : MF) {
// Traverse the basic block.
- for (MachineInstr &MI : MBB)
- Changed |= CompressEVEXImpl(MI, ST);
+ for (MachineInstr &MI : llvm::make_early_inc_range(MBB))
+ Changed |= CompressEVEXImpl(MI, MBB, ST);
}
LLVM_DEBUG(dbgs() << "End X86CompressEVEXPass\n";);
return Changed;
diff --git a/llvm/test/CodeGen/X86/apx/add.ll b/llvm/test/CodeGen/X86/apx/add.ll
index 86343811901a9..4ab0edfba7ce8 100644
--- a/llvm/test/CodeGen/X86/apx/add.ll
+++ b/llvm/test/CodeGen/X86/apx/add.ll
@@ -36,12 +36,12 @@ entry:
define i32 @add32rr(i32 noundef %a, i32 noundef %b) {
; CHECK-LABEL: add32rr:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: addl %esi, %edi, %eax # encoding: [0x62,0xf4,0x7c,0x18,0x01,0xf7]
+; CHECK-NEXT: leal (%rdi,%rsi), %eax # encoding: [0x8d,0x04,0x37]
; CHECK-NEXT: retq # encoding: [0xc3]
;
; NF-LABEL: add32rr:
; NF: # %bb.0: # %entry
-; NF-NEXT: {nf} addl %esi, %edi, %eax # EVEX TO EVEX Compression encoding: [0x62,0xf4,0x7c,0x1c,0x01,0xf7]
+; NF-NEXT: leal (%rdi,%rsi), %eax # encoding: [0x8d,0x04,0x37]
; NF-NEXT: retq # encoding: [0xc3]
entry:
%add = add i32 %a, %b
@@ -51,12 +51,12 @@ entry:
define i64 @add64rr(i64 noundef %a, i64 noundef %b) {
; CHECK-LABEL: add64rr:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: addq %rsi, %rdi, %rax # encoding: [0x62,0xf4,0xfc,0x18,0x01,0xf7]
+; CHECK-NEXT: leaq (%rdi,%rsi), %rax # encoding: [0x48,0x8d,0x04,0x37]
; CHECK-NEXT: retq # encoding: [0xc3]
;
; NF-LABEL: add64rr:
; NF: # %bb.0: # %entry
-; NF-NEXT: {nf} addq %rsi, %rdi, %rax # EVEX TO EVEX Compression encoding: [0x62,0xf4,0xfc,0x1c,0x01,0xf7]
+; NF-NEXT: leaq (%rdi,%rsi), %rax # encoding: [0x48,0x8d,0x04,0x37]
; NF-NEXT: retq # encoding: [0xc3]
entry:
%add = add i64 %a, %b
@@ -145,12 +145,12 @@ entry:
define i32 @add32ri8(i32 noundef %a) {
; CHECK-LABEL: add32ri8:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: addl $123, %edi, %eax # encoding: [0x62,0xf4,0x7c,0x18,0x83,0xc7,0x7b]
+; CHECK-NEXT: leal 123(%rdi), %eax # encoding: [0x8d,0x47,0x7b]
; CHECK-NEXT: retq # encoding: [0xc3]
;
; NF-LABEL: add32ri8:
; NF: # %bb.0: # %entry
-; NF-NEXT: {nf} addl $123, %edi, %eax # EVEX TO EVEX Compression encoding: [0x62,0xf4,0x7c,0x1c,0x83,0xc7,0x7b]
+; NF-NEXT: leal 123(%rdi), %eax # encoding: [0x8d,0x47,0x7b]
; NF-NEXT: retq # encoding: [0xc3]
entry:
%add = add i32 %a, 123
@@ -160,12 +160,12 @@ entry:
define i64 @add64ri8(i64 noundef %a) {
; CHECK-LABEL: add64ri8:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: addq $123, %rdi, %rax # encoding: [0x62,0xf4,0xfc,0x18,0x83,0xc7,0x7b]
+; CHECK-NEXT: leaq 123(%rdi), %rax # encoding: [0x48,0x8d,0x47,0x7b]
; CHECK-NEXT: retq # encoding: [0xc3]
;
; NF-LABEL: add64ri8:
; NF: # %bb.0: # %entry
-; NF-NEXT: {nf} addq $123, %rdi, %rax # EVEX TO EVEX Compression encoding: [0x62,0xf4,0xfc,0x1c,0x83,0xc7,0x7b]
+; NF-NEXT: leaq 123(%rdi), %rax # encoding: [0x48,0x8d,0x47,0x7b]
; NF-NEXT: retq # encoding: [0xc3]
entry:
%add = add i64 %a, 123
@@ -207,14 +207,12 @@ entry:
define i32 @add32ri(i32 noundef %a) {
; CHECK-LABEL: add32ri:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: addl $123456, %edi, %eax # encoding: [0x62,0xf4,0x7c,0x18,0x81,0xc7,0x40,0xe2,0x01,0x00]
-; CHECK-NEXT: # imm = 0x1E240
+; CHECK-NEXT: leal 123456(%rdi), %eax # encoding: [0x8d,0x87,0x40,0xe2,0x01,0x00]
; CHECK-NEXT: retq # encoding: [0xc3]
;
; NF-LABEL: add32ri:
; NF: # %bb.0: # %entry
-; NF-NEXT: {nf} addl $123456, %edi, %eax # EVEX TO EVEX Compression encoding: [0x62,0xf4,0x7c,0x1c,0x81,0xc7,0x40,0xe2,0x01,0x00]
-; NF-NEXT: # imm = 0x1E240
+; NF-NEXT: leal 123456(%rdi), %eax # encoding: [0x8d,0x87,0x40,0xe2,0x01,0x00]
; NF-NEXT: retq # encoding: [0xc3]
entry:
%add = add i32 %a, 123456
@@ -224,14 +222,12 @@ entry:
define i64 @add64ri(i64 noundef %a) {
; CHECK-LABEL: add64ri:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: addq $123456, %rdi, %rax # encoding: [0x62,0xf4,0xfc,0x18,0x81,0xc7,0x40,0xe2,0x01,0x00]
-; CHECK-NEXT: # imm = 0x1E240
+; CHECK-NEXT: leaq 123456(%rdi), %rax # encoding: [0x48,0x8d,0x87,0x40,0xe2,0x01,0x00]
; CHECK-NEXT: retq # encoding: [0xc3]
;
; NF-LABEL: add64ri:
; NF: # %bb.0: # %entry
-; NF-NEXT: {nf} addq $123456, %rdi, %rax # EVEX TO EVEX Compression encoding: [0x62,0xf4,0xfc,0x1c,0x81,0xc7,0x40,0xe2,0x01,0x00]
-; NF-NEXT: # imm = 0x1E240
+; NF-NEXT: leaq 123456(%rdi), %rax # encoding: [0x48,0x8d,0x87,0x40,0xe2,0x01,0x00]
; NF-NEXT: retq # encoding: [0xc3]
entry:
%add = add i64 %a, 123456
diff --git a/llvm/test/CodeGen/X86/apx/mul-i1024.ll b/llvm/test/CodeGen/X86/apx/mul-i1024.ll
index a29a92176f432..0bb3b179cc305 100644
--- a/llvm/test/CodeGen/X86/apx/mul-i1024.ll
+++ b/llvm/test/CodeGen/X86/apx/mul-i1024.ll
@@ -1613,7 +1613,7 @@ define void @test_1024(ptr %a, ptr %b, ptr %out) nounwind {
; EGPR-NDD-NEXT: imulq %r23, %rdi
; EGPR-NDD-NEXT: addq %rdi, %rdx
; EGPR-NDD-NEXT: imulq 120(%r22), %r24, %rax
-; EGPR-NDD-NEXT: addq %rax, %rdx, %r9
+; EGPR-NDD-NEXT: leaq (%rdx,%rax), %r9
; EGPR-NDD-NEXT: movq 96(%r22), %r20
; EGPR-NDD-NEXT: movq 104(%r22), %rdi
; EGPR-NDD-NEXT: imulq %rdi, %r26, %r10
@@ -1756,7 +1756,7 @@ define void @test_1024(ptr %a, ptr %b, ptr %out) nounwind {
; EGPR-NDD-NEXT: addq %rsi, %rdx
; EGPR-NDD-NEXT: movq 104(%r15), %r8
; EGPR-NDD-NEXT: imulq %r10, %r8, %rax
-; EGPR-NDD-NEXT: addq %rax, %rdx, %rsi
+; EGPR-NDD-NEXT: leaq (%rdx,%rax), %rsi
; EGPR-NDD-NEXT: movq 112(%r15), %rax
; EGPR-NDD-NEXT: imulq %r23, %rax, %r9
; EGPR-NDD-NEXT: mulq %r16
@@ -1793,7 +1793,7 @@ define void @test_1024(ptr %a, ptr %b, ptr %out) nounwind {
; EGPR-NDD-NEXT: movq %rax, %r9
; EGPR-NDD-NEXT: addq %r8, %rdx
; EGPR-NDD-NEXT: imulq %r16, %r25, %rax
-; EGPR-NDD-NEXT: addq %rax, %rdx, %r8
+; EGPR-NDD-NEXT: leaq (%rdx,%rax), %r8
; EGPR-NDD-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r23 # 8-byte Reload
; EGPR-NDD-NEXT: imulq %r23, %r24, %r16
; EGPR-NDD-NEXT: movq %r24, %rax
diff --git a/llvm/test/CodeGen/X86/apx/ndd-false-deps-asm.mir b/llvm/test/CodeGen/X86/apx/ndd-false-deps-asm.mir
index 5be5ca8d71947..bfc0120765e53 100644
--- a/llvm/test/CodeGen/X86/apx/ndd-false-deps-asm.mir
+++ b/llvm/test/CodeGen/X86/apx/ndd-false-deps-asm.mir
@@ -15,14 +15,14 @@
define signext i16 @partial_write(ptr %p, i32 %a, i32 %b, i16 signext %x, i16 signext %y) #0 {
; RCDEFAULT-LABEL: partial_write:
; RCDEFAULT: # %bb.0: # %entry
- ; RCDEFAULT-NEXT: addl %esi, %edx, %eax # encoding: [0x62,0xf4,0x7c,0x18,0x01,0xf2]
+ ; RCDEFAULT-NEXT: leal (%rdx,%rsi), %eax # encoding: [0x8d,0x04,0x32]
; RCDEFAULT-NEXT: movl %eax, (%rdi) # encoding: [0x89,0x07]
; RCDEFAULT-NEXT: addw %cx, %ax, %ax # encoding: [0x62,0xf4,0x7d,0x18,0x01,0xc8]
; RCDEFAULT-NEXT: retq # encoding: [0xc3]
;
; RC1-LABEL: partial_write:
; RC1: # %bb.0: # %entry
- ; RC1-NEXT: addl %esi, %edx, %eax # encoding: [0x62,0xf4,0x7c,0x18,0x01,0xf2]
+ ; RC1-NEXT: leal (%rdx,%rsi), %eax # encoding: [0x8d,0x04,0x32]
; RC1-NEXT: movl %eax, (%rdi) # encoding: [0x89,0x07]
; RC1-NEXT: addw %cx, %ax # EVEX TO LEGACY Compression encoding: [0x66,0x01,0xc8]
; RC1-NEXT: retq # encoding: [0xc3]
diff --git a/llvm/test/CodeGen/X86/apx/shl.ll b/llvm/test/CodeGen/X86/apx/shl.ll
index 896cd55bc7452..9c6229a483c73 100644
--- a/llvm/test/CodeGen/X86/apx/shl.ll
+++ b/llvm/test/CodeGen/X86/apx/shl.ll
@@ -396,12 +396,12 @@ entry:
define i32 @shl32r1(i32 noundef %a) {
; CHECK-LABEL: shl32r1:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: addl %edi, %edi, %eax # encoding: [0x62,0xf4,0x7c,0x18,0x01,0xff]
+; CHECK-NEXT: leal (%rdi,%rdi), %eax # encoding: [0x8d,0x04,0x3f]
; CHECK-NEXT: retq # encoding: [0xc3]
;
; NF-LABEL: shl32r1:
; NF: # %bb.0: # %entry
-; NF-NEXT: {nf} addl %edi, %edi, %eax # EVEX TO EVEX Compression encoding: [0x62,0xf4,0x7c,0x1c,0x01,0xff]
+; NF-NEXT: leal (%rdi,%rdi), %eax # encoding: [0x8d,0x04,0x3f]
; NF-NEXT: retq # encoding: [0xc3]
entry:
%shl = shl i32 %a, 1
@@ -411,12 +411,12 @@ entry:
define i64 @shl64r1(i64 noundef %a) {
; CHECK-LABEL: shl64r1:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: addq %rdi, %rdi, %rax # encoding: [0x62,0xf4,0xfc,0x18,0x01,0xff]
+; CHECK-NEXT: leaq (%rdi,%rdi), %rax # encoding: [0x48,0x8d,0x04,0x3f]
; CHECK-NEXT: retq # encoding: [0xc3]
;
; NF-LABEL: shl64r1:
; NF: # %bb.0: # %entry
-; NF-NEXT: {nf} addq %rdi, %rdi, %rax # EVEX TO EVEX Compression encoding: [0x62,0xf4,0xfc,0x1c,0x01,0xff]
+; NF-NEXT: leaq (%rdi,%rdi), %rax # encoding: [0x48,0x8d,0x04,0x3f]
; NF-NEXT: retq # encoding: [0xc3]
entry:
%shl = shl i64 %a, 1
diff --git a/llvm/test/CodeGen/X86/apx/sub.ll b/llvm/test/CodeGen/X86/apx/sub.ll
index d7914577634e7..75ee8cf31dee5 100644
--- a/llvm/test/CodeGen/X86/apx/sub.ll
+++ b/llvm/test/CodeGen/X86/apx/sub.ll
@@ -207,14 +207,12 @@ entry:
define i32 @sub32ri(i32 noundef %a) {
; CHECK-LABEL: sub32ri:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: addl $-123456, %edi, %eax # encoding: [0x62,0xf4,0x7c,0x18,0x81,0xc7,0xc0,0x1d,0xfe,0xff]
-; CHECK-NEXT: # imm = 0xFFFE1DC0
+; CHECK-NEXT: leal -123456(%rdi), %eax # encoding: [0x8d,0x87,0xc0,0x1d,0xfe,0xff]
; CHECK-NEXT: retq # encoding: [0xc3]
;
; NF-LABEL: sub32ri:
; NF: # %bb.0: # %entry
-; NF-NEXT: {nf} addl $-123456, %edi, %eax # EVEX TO EVEX Compression encoding: [0x62,0xf4,0x7c,0x1c,0x81,0xc7,0xc0,0x1d,0xfe,0xff]
-; NF-NEXT: # imm = 0xFFFE1DC0
+; NF-NEXT: leal -123456(%rdi), %eax # encoding: [0x8d,0x87,0xc0,0x1d,0xfe,0xff]
; NF-NEXT: retq # encoding: [0xc3]
entry:
%sub = sub i32 %a, 123456
More information about the llvm-commits
mailing list