[llvm] [X86] remove unnecessary movs when %rdx is an input to mulx (PR #184462)
Aiden Grossman via llvm-commits
llvm-commits at lists.llvm.org
Wed Mar 4 06:49:37 PST 2026
https://github.com/boomanaiden154 updated https://github.com/llvm/llvm-project/pull/184462
>From 6b18ed078ed7a9a1bdf54d751ac1afa34859c557 Mon Sep 17 00:00:00 2001
From: Takashiidobe <idobetakashi at gmail.com>
Date: Tue, 3 Mar 2026 17:49:52 -0500
Subject: [PATCH] [X86] remove unnecessary movs when %rdx is an argument to
mulx
---
llvm/lib/Target/X86/X86ISelDAGToDAG.cpp | 18 +++++
.../CodeGen/X86/mulx64-no-implicit-copy.ll | 69 +++++++++++++++++++
2 files changed, 87 insertions(+)
create mode 100644 llvm/test/CodeGen/X86/mulx64-no-implicit-copy.ll
diff --git a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
index 101ea3e231a5c..cc846e0d1492e 100644
--- a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
+++ b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
@@ -5882,6 +5882,24 @@ void X86DAGToDAGISel::Select(SDNode *Node) {
std::swap(N0, N1);
}
+ // For MULX, the implicit source must be in RDX (LoReg). If N1 is
+ // already a CopyFromReg of LoReg and N0 is not, flip so that N0
+ // (which feeds the CopyToReg below) is the operand already in LoReg,
+ // avoiding an unnecessary register-to-register copy before the multiply.
+ if (UseMULX && !foldedLoad) {
+ MachineRegisterInfo &MRI = CurDAG->getMachineFunction().getRegInfo();
+ auto GetPhysReg = [&](SDValue V) -> Register {
+ if (V.getOpcode() != ISD::CopyFromReg)
+ return Register();
+ Register Reg = cast<RegisterSDNode>(V.getOperand(1))->getReg();
+ if (Reg.isVirtual())
+ return MRI.getLiveInPhysReg(Reg);
+ return Reg;
+ };
+ if (GetPhysReg(N1) == LoReg && GetPhysReg(N0) != LoReg)
+ std::swap(N0, N1);
+ }
+
SDValue InGlue = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, LoReg,
N0, SDValue()).getValue(1);
SDValue ResHi, ResLo;
diff --git a/llvm/test/CodeGen/X86/mulx64-no-implicit-copy.ll b/llvm/test/CodeGen/X86/mulx64-no-implicit-copy.ll
new file mode 100644
index 0000000000000..60f8309440f29
--- /dev/null
+++ b/llvm/test/CodeGen/X86/mulx64-no-implicit-copy.ll
@@ -0,0 +1,69 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+bmi2 | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-unknown -mcpu=core-avx2 | FileCheck %s
+;
+; When a MULX operand already lives in RDX (the implicit source register) at
+; function entry, no register-copy preamble should be emitted before the MULX.
+; In SysV x86-64 ABI the 3rd integer argument arrives in %rdx.
+
+; Pure 64x64->128 multiply: arg 'a' is the 3rd argument, arriving in %rdx.
+; The mul i128 node is emitted with operands in (b, a) order (as clang does);
+define void @mul64_u128_a_in_rdx(ptr %hi, ptr %lo, i64 %a, i64 %b) {
+; CHECK-LABEL: mul64_u128_a_in_rdx:
+; CHECK: # %bb.0:
+; CHECK-NEXT: mulxq %rcx, %rcx, %rax
+; CHECK-NEXT: movq %rcx, (%rsi)
+; CHECK-NEXT: movq %rax, (%rdi)
+; CHECK-NEXT: retq
+ %za = zext i64 %a to i128
+ %zb = zext i64 %b to i128
+ %r = mul nuw i128 %zb, %za
+ %lo_val = trunc i128 %r to i64
+ store i64 %lo_val, ptr %lo
+ %hi_shr = lshr i128 %r, 64
+ %hi_val = trunc nuw i128 %hi_shr to i64
+ store i64 %hi_val, ptr %hi
+ ret void
+}
+
+; Same multiply with operands in natural (a, b) order.
+define void @mul64_u128_a_in_rdx_natural_order(ptr %hi, ptr %lo, i64 %a, i64 %b) {
+; CHECK-LABEL: mul64_u128_a_in_rdx_natural_order:
+; CHECK: # %bb.0:
+; CHECK-NEXT: mulxq %rcx, %rcx, %rax
+; CHECK-NEXT: movq %rcx, (%rsi)
+; CHECK-NEXT: movq %rax, (%rdi)
+; CHECK-NEXT: retq
+ %za = zext i64 %a to i128
+ %zb = zext i64 %b to i128
+ %r = mul nuw i128 %za, %zb
+ %lo_val = trunc i128 %r to i64
+ store i64 %lo_val, ptr %lo
+ %hi_shr = lshr i128 %r, 64
+ %hi_val = trunc nuw i128 %hi_shr to i64
+ store i64 %hi_val, ptr %hi
+ ret void
+}
+
+; Multiply-add: hi:lo = a*b + c. 'a' is the 3rd arg in %rdx, 'c' is in %r8.
+define void @muladd64_a_in_rdx(ptr %hi, ptr %lo, i64 %a, i64 %b, i64 %c) {
+; CHECK-LABEL: muladd64_a_in_rdx:
+; CHECK: # %bb.0:
+; CHECK-NEXT: mulxq %rcx, %rax, %rcx
+; CHECK-NEXT: addq %r8, %rax
+; CHECK-NEXT: adcq $0, %rcx
+; CHECK-NEXT: movq %rax, (%rsi)
+; CHECK-NEXT: movq %rcx, (%rdi)
+; CHECK-NEXT: retq
+ %za = zext i64 %a to i128
+ %zb = zext i64 %b to i128
+ %r = mul nuw i128 %zb, %za
+ %zc = zext i64 %c to i128
+ %r2 = add nuw i128 %r, %zc
+ %lo_val = trunc i128 %r2 to i64
+ store i64 %lo_val, ptr %lo
+ %hi_shr = lshr i128 %r2, 64
+ %hi_val = trunc nuw i128 %hi_shr to i64
+ store i64 %hi_val, ptr %hi
+ ret void
+}
More information about the llvm-commits
mailing list