[llvm] [X86] Put R20/R21/R28/R29 later in GR64 list (PR #120510)
Feng Zou via llvm-commits
llvm-commits at lists.llvm.org
Thu Dec 19 01:49:55 PST 2024
https://github.com/fzou1 updated https://github.com/llvm/llvm-project/pull/120510
>From 21c1c05bc179759f8febd001f53495e20dc3f60b Mon Sep 17 00:00:00 2001
From: Feng Zou <feng.zou at intel.com>
Date: Tue, 17 Dec 2024 10:43:31 +0800
Subject: [PATCH 1/2] [X86] Put R20/R21/R28/R29 later in GR64 list
Because these registers require extra bytes to encode in certain memory
form. Putting them later in the list will reduce code size when EGPR is
enabled. And align the same order in GR8, GR16 and GR32 lists.
Example:
movq (%r20), %r11 # encoding: [0xd5,0x1c,0x8b,0x1c,0x24]
movq (%r22), %r11 # encoding: [0xd5,0x1c,0x8b,0x1e]
---
llvm/lib/Target/X86/X86RegisterInfo.td | 22 +-
llvm/test/CodeGen/X86/apx/mul-i1024.ll | 1436 ++++++++++++------------
2 files changed, 729 insertions(+), 729 deletions(-)
diff --git a/llvm/lib/Target/X86/X86RegisterInfo.td b/llvm/lib/Target/X86/X86RegisterInfo.td
index f93f920b6aeca3..d218ad0aefc8c5 100644
--- a/llvm/lib/Target/X86/X86RegisterInfo.td
+++ b/llvm/lib/Target/X86/X86RegisterInfo.td
@@ -550,9 +550,9 @@ def SSP : X86Reg<"ssp", 0>;
// cannot be encoded.
def GR8 : RegisterClass<"X86", [i8], 8,
(add AL, CL, DL, AH, CH, DH, BL, BH, SIL, DIL, BPL, SPL,
- R8B, R9B, R10B, R11B, R16B, R17B, R18B, R19B, R20B,
- R21B, R22B, R23B, R24B, R25B, R26B, R27B, R28B, R29B,
- R30B, R31B, R14B, R15B, R12B, R13B)> {
+ R8B, R9B, R10B, R11B, R16B, R17B, R18B, R19B, R22B,
+ R23B, R24B, R25B, R26B, R27B, R30B, R31B, R14B,
+ R15B, R12B, R13B, R20B, R21B, R28B, R29B)> {
let AltOrders = [(sub GR8, AH, BH, CH, DH)];
let AltOrderSelect = [{
return MF.getSubtarget<X86Subtarget>().is64Bit();
@@ -567,9 +567,9 @@ def GRH8 : RegisterClass<"X86", [i8], 8,
R26BH, R27BH, R28BH, R29BH, R30BH, R31BH)>;
def GR16 : RegisterClass<"X86", [i16], 16,
(add AX, CX, DX, SI, DI, BX, BP, SP, R8W, R9W, R10W,
- R11W, R16W, R17W, R18W, R19W, R20W, R21W, R22W, R23W,
- R24W, R25W, R26W, R27W, R28W, R29W, R30W, R31W, R14W,
- R15W, R12W, R13W)>;
+ R11W, R16W, R17W, R18W, R19W, R22W, R23W, R24W,
+ R25W, R26W, R27W, R30W, R31W, R14W, R15W, R12W,
+ R13W, R20W, R21W, R28W, R29W)>;
let isAllocatable = 0 in
def GRH16 : RegisterClass<"X86", [i16], 16,
@@ -579,9 +579,9 @@ def GRH16 : RegisterClass<"X86", [i16], 16,
R25WH, R26WH, R27WH, R28WH, R29WH, R30WH, R31WH)>;
def GR32 : RegisterClass<"X86", [i32], 32,
(add EAX, ECX, EDX, ESI, EDI, EBX, EBP, ESP, R8D, R9D,
- R10D, R11D, R16D, R17D, R18D, R19D, R20D, R21D, R22D,
- R23D, R24D, R25D, R26D, R27D, R28D, R29D, R30D, R31D,
- R14D, R15D, R12D, R13D)>;
+ R10D, R11D, R16D, R17D, R18D, R19D, R22D, R23D,
+ R24D, R25D, R26D, R27D, R30D, R31D, R14D, R15D,
+ R12D, R13D, R20D, R21D, R28D, R29D)>;
// GR64 - 64-bit GPRs. This oddly includes RIP, which isn't accurate, since
// RIP isn't really a register and it can't be used anywhere except in an
@@ -590,8 +590,8 @@ def GR32 : RegisterClass<"X86", [i32], 32,
// tests because of the inclusion of RIP in this register class.
def GR64 : RegisterClass<"X86", [i64], 64,
(add RAX, RCX, RDX, RSI, RDI, R8, R9, R10, R11, R16, R17,
- R18, R19, R20, R21, R22, R23, R24, R25, R26, R27, R28, R29,
- R30, R31, RBX, R14, R15, R12, R13, RBP, RSP, RIP)>;
+ R18, R19, R22, R23, R24, R25, R26, R27, R30, R31, RBX,
+ R14, R15, R12, R13, R20, R21, R28, R29, RBP, RSP, RIP)>;
// GR64PLTSafe - 64-bit GPRs without R10, R11, RSP and RIP. Could be used when
// emitting code for intrinsics, which use implict input registers.
diff --git a/llvm/test/CodeGen/X86/apx/mul-i1024.ll b/llvm/test/CodeGen/X86/apx/mul-i1024.ll
index a4d15a1b21d6b4..a29a92176f4323 100644
--- a/llvm/test/CodeGen/X86/apx/mul-i1024.ll
+++ b/llvm/test/CodeGen/X86/apx/mul-i1024.ll
@@ -13,104 +13,104 @@ define void @test_1024(ptr %a, ptr %b, ptr %out) nounwind {
; EGPR-NEXT: pushq %rbx
; EGPR-NEXT: subq $104, %rsp
; EGPR-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; EGPR-NEXT: movq %rdi, %r24
+; EGPR-NEXT: movq %rdi, %r26
; EGPR-NEXT: movq (%rdi), %r13
; EGPR-NEXT: movq 8(%rdi), %r18
-; EGPR-NEXT: movq 24(%rdi), %r29
+; EGPR-NEXT: movq 24(%rdi), %r21
; EGPR-NEXT: movq 16(%rdi), %r17
; EGPR-NEXT: movq 40(%rdi), %rdi
-; EGPR-NEXT: movq 32(%r24), %r10
-; EGPR-NEXT: movq 56(%r24), %r15
-; EGPR-NEXT: movq 48(%r24), %r12
+; EGPR-NEXT: movq 32(%r26), %r10
+; EGPR-NEXT: movq 56(%r26), %r15
+; EGPR-NEXT: movq 48(%r26), %r12
; EGPR-NEXT: movq %rsi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; EGPR-NEXT: movq 24(%rsi), %r23
+; EGPR-NEXT: movq 24(%rsi), %r25
; EGPR-NEXT: movq 16(%rsi), %r11
-; EGPR-NEXT: movq (%rsi), %r27
+; EGPR-NEXT: movq (%rsi), %r31
; EGPR-NEXT: movq 8(%rsi), %r14
; EGPR-NEXT: movq %r12, %rax
-; EGPR-NEXT: mulq %r27
+; EGPR-NEXT: mulq %r31
; EGPR-NEXT: movq %rdx, %r8
; EGPR-NEXT: movq %rax, %r19
; EGPR-NEXT: movq %r15, %rax
-; EGPR-NEXT: mulq %r27
+; EGPR-NEXT: mulq %r31
; EGPR-NEXT: movq %rdx, %r9
; EGPR-NEXT: movq %rax, %r16
; EGPR-NEXT: addq %r8, %r16
; EGPR-NEXT: adcq $0, %r9
; EGPR-NEXT: movq %r12, %rax
; EGPR-NEXT: mulq %r14
-; EGPR-NEXT: movq %rdx, %r20
+; EGPR-NEXT: movq %rdx, %r22
; EGPR-NEXT: movq %rax, %r8
; EGPR-NEXT: addq %r16, %r8
-; EGPR-NEXT: adcq %r9, %r20
+; EGPR-NEXT: adcq %r9, %r22
; EGPR-NEXT: setb %al
; EGPR-NEXT: movzbl %al, %ecx
; EGPR-NEXT: movq %r15, %rax
; EGPR-NEXT: mulq %r14
; EGPR-NEXT: movq %rdx, %r9
; EGPR-NEXT: movq %rax, %r16
-; EGPR-NEXT: addq %r20, %r16
+; EGPR-NEXT: addq %r22, %r16
; EGPR-NEXT: adcq %rcx, %r9
; EGPR-NEXT: movq %r10, %rax
-; EGPR-NEXT: mulq %r27
-; EGPR-NEXT: movq %rdx, %r20
-; EGPR-NEXT: movq %rax, %r25
+; EGPR-NEXT: mulq %r31
+; EGPR-NEXT: movq %rdx, %r22
+; EGPR-NEXT: movq %rax, %r27
; EGPR-NEXT: movq %rdi, %rax
-; EGPR-NEXT: mulq %r27
-; EGPR-NEXT: movq %rdx, %r21
-; EGPR-NEXT: movq %rax, %r22
-; EGPR-NEXT: addq %r20, %r22
-; EGPR-NEXT: adcq $0, %r21
+; EGPR-NEXT: mulq %r31
+; EGPR-NEXT: movq %rdx, %r23
+; EGPR-NEXT: movq %rax, %r24
+; EGPR-NEXT: addq %r22, %r24
+; EGPR-NEXT: adcq $0, %r23
; EGPR-NEXT: movq %r10, %rax
; EGPR-NEXT: mulq %r14
-; EGPR-NEXT: movq %rdx, %r20
-; EGPR-NEXT: movq %rax, %r28
-; EGPR-NEXT: addq %r22, %r28
-; EGPR-NEXT: adcq %r21, %r20
+; EGPR-NEXT: movq %rdx, %r22
+; EGPR-NEXT: movq %rax, %r20
+; EGPR-NEXT: addq %r24, %r20
+; EGPR-NEXT: adcq %r23, %r22
; EGPR-NEXT: setb %al
; EGPR-NEXT: movzbl %al, %ecx
; EGPR-NEXT: movq %rdi, %rax
; EGPR-NEXT: mulq %r14
-; EGPR-NEXT: movq %rdx, %r21
-; EGPR-NEXT: movq %rax, %r22
-; EGPR-NEXT: addq %r20, %r22
-; EGPR-NEXT: adcq %rcx, %r21
-; EGPR-NEXT: addq %r19, %r22
-; EGPR-NEXT: adcq %r8, %r21
+; EGPR-NEXT: movq %rdx, %r23
+; EGPR-NEXT: movq %rax, %r24
+; EGPR-NEXT: addq %r22, %r24
+; EGPR-NEXT: adcq %rcx, %r23
+; EGPR-NEXT: addq %r19, %r24
+; EGPR-NEXT: adcq %r8, %r23
; EGPR-NEXT: adcq $0, %r16
; EGPR-NEXT: adcq $0, %r9
; EGPR-NEXT: movq %r10, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; EGPR-NEXT: movq %r10, %rax
; EGPR-NEXT: mulq %r11
; EGPR-NEXT: movq %rdx, %r8
-; EGPR-NEXT: movq %rax, %r30
+; EGPR-NEXT: movq %rax, %r28
; EGPR-NEXT: movq %rdi, %rax
; EGPR-NEXT: movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; EGPR-NEXT: mulq %r11
; EGPR-NEXT: movq %rdx, %r19
-; EGPR-NEXT: movq %rax, %r20
-; EGPR-NEXT: addq %r8, %r20
+; EGPR-NEXT: movq %rax, %r22
+; EGPR-NEXT: addq %r8, %r22
; EGPR-NEXT: adcq $0, %r19
; EGPR-NEXT: movq %r10, %rax
-; EGPR-NEXT: mulq %r23
+; EGPR-NEXT: mulq %r25
; EGPR-NEXT: movq %rdx, %rbx
-; EGPR-NEXT: movq %rax, %r31
-; EGPR-NEXT: addq %r20, %r31
+; EGPR-NEXT: movq %rax, %r29
+; EGPR-NEXT: addq %r22, %r29
; EGPR-NEXT: adcq %r19, %rbx
; EGPR-NEXT: setb %al
; EGPR-NEXT: movzbl %al, %ecx
; EGPR-NEXT: movq %rdi, %rax
-; EGPR-NEXT: mulq %r23
-; EGPR-NEXT: movq %rdx, %r26
+; EGPR-NEXT: mulq %r25
+; EGPR-NEXT: movq %rdx, %r30
; EGPR-NEXT: movq %rax, %r8
; EGPR-NEXT: addq %rbx, %r8
-; EGPR-NEXT: adcq %rcx, %r26
-; EGPR-NEXT: addq %r22, %r30
-; EGPR-NEXT: adcq %r21, %r31
+; EGPR-NEXT: adcq %rcx, %r30
+; EGPR-NEXT: addq %r24, %r28
+; EGPR-NEXT: adcq %r23, %r29
; EGPR-NEXT: adcq $0, %r8
-; EGPR-NEXT: adcq $0, %r26
+; EGPR-NEXT: adcq $0, %r30
; EGPR-NEXT: addq %r16, %r8
-; EGPR-NEXT: adcq %r9, %r26
+; EGPR-NEXT: adcq %r9, %r30
; EGPR-NEXT: setb %al
; EGPR-NEXT: movzbl %al, %ecx
; EGPR-NEXT: movq %r12, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
@@ -122,34 +122,34 @@ define void @test_1024(ptr %a, ptr %b, ptr %out) nounwind {
; EGPR-NEXT: movq %r15, %rax
; EGPR-NEXT: mulq %r11
; EGPR-NEXT: movq %rdx, %r16
-; EGPR-NEXT: movq %rax, %r21
-; EGPR-NEXT: addq %r9, %r21
+; EGPR-NEXT: movq %rax, %r23
+; EGPR-NEXT: addq %r9, %r23
; EGPR-NEXT: adcq $0, %r16
; EGPR-NEXT: movq %r12, %rax
-; EGPR-NEXT: mulq %r23
+; EGPR-NEXT: mulq %r25
; EGPR-NEXT: movq %rdx, %r9
; EGPR-NEXT: movq %rax, %rdi
-; EGPR-NEXT: addq %r21, %rdi
+; EGPR-NEXT: addq %r23, %rdi
; EGPR-NEXT: adcq %r16, %r9
; EGPR-NEXT: setb %al
; EGPR-NEXT: movzbl %al, %r10d
; EGPR-NEXT: movq %r15, %rax
-; EGPR-NEXT: mulq %r23
-; EGPR-NEXT: movq %rdx, %r21
-; EGPR-NEXT: movq %rax, %r22
-; EGPR-NEXT: addq %r9, %r22
-; EGPR-NEXT: adcq %r10, %r21
+; EGPR-NEXT: mulq %r25
+; EGPR-NEXT: movq %rdx, %r23
+; EGPR-NEXT: movq %rax, %r24
+; EGPR-NEXT: addq %r9, %r24
+; EGPR-NEXT: adcq %r10, %r23
; EGPR-NEXT: addq %r8, %rsi
; EGPR-NEXT: movq %rsi, %r19
-; EGPR-NEXT: adcq %r26, %rdi
-; EGPR-NEXT: adcq %rcx, %r22
-; EGPR-NEXT: adcq $0, %r21
+; EGPR-NEXT: adcq %r30, %rdi
+; EGPR-NEXT: adcq %rcx, %r24
+; EGPR-NEXT: adcq $0, %r23
; EGPR-NEXT: movq %r17, %rax
-; EGPR-NEXT: mulq %r27
+; EGPR-NEXT: mulq %r31
; EGPR-NEXT: movq %rdx, %r8
; EGPR-NEXT: movq %rax, %rbx
-; EGPR-NEXT: movq %r29, %rax
-; EGPR-NEXT: mulq %r27
+; EGPR-NEXT: movq %r21, %rax
+; EGPR-NEXT: mulq %r31
; EGPR-NEXT: movq %rdx, %r9
; EGPR-NEXT: movq %rax, %r16
; EGPR-NEXT: addq %r8, %r16
@@ -157,12 +157,12 @@ define void @test_1024(ptr %a, ptr %b, ptr %out) nounwind {
; EGPR-NEXT: movq %r17, %rax
; EGPR-NEXT: mulq %r14
; EGPR-NEXT: movq %rdx, %r8
-; EGPR-NEXT: movq %rax, %r26
-; EGPR-NEXT: addq %r16, %r26
+; EGPR-NEXT: movq %rax, %r30
+; EGPR-NEXT: addq %r16, %r30
; EGPR-NEXT: adcq %r9, %r8
; EGPR-NEXT: setb %al
; EGPR-NEXT: movzbl %al, %ecx
-; EGPR-NEXT: movq %r29, %rax
+; EGPR-NEXT: movq %r21, %rax
; EGPR-NEXT: mulq %r14
; EGPR-NEXT: movq %r14, %rsi
; EGPR-NEXT: movq %rdx, %r9
@@ -170,11 +170,11 @@ define void @test_1024(ptr %a, ptr %b, ptr %out) nounwind {
; EGPR-NEXT: addq %r8, %r16
; EGPR-NEXT: adcq %rcx, %r9
; EGPR-NEXT: movq %r13, %rax
-; EGPR-NEXT: mulq %r27
+; EGPR-NEXT: mulq %r31
; EGPR-NEXT: movq %rdx, %r8
; EGPR-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; EGPR-NEXT: movq %r18, %rax
-; EGPR-NEXT: mulq %r27
+; EGPR-NEXT: mulq %r31
; EGPR-NEXT: movq %rdx, %r14
; EGPR-NEXT: movq %rax, %r15
; EGPR-NEXT: addq %r8, %r15
@@ -195,40 +195,40 @@ define void @test_1024(ptr %a, ptr %b, ptr %out) nounwind {
; EGPR-NEXT: movzbl %cl, %eax
; EGPR-NEXT: adcq %rax, %r8
; EGPR-NEXT: addq %rbx, %r15
-; EGPR-NEXT: adcq %r26, %r8
+; EGPR-NEXT: adcq %r30, %r8
; EGPR-NEXT: adcq $0, %r16
; EGPR-NEXT: adcq $0, %r9
; EGPR-NEXT: movq %r13, %rax
; EGPR-NEXT: mulq %r11
-; EGPR-NEXT: movq %rdx, %r26
+; EGPR-NEXT: movq %rdx, %r30
; EGPR-NEXT: movq %rax, %rsi
; EGPR-NEXT: movq %r18, %rax
; EGPR-NEXT: mulq %r11
; EGPR-NEXT: movq %rdx, %rbx
; EGPR-NEXT: movq %rax, %r14
-; EGPR-NEXT: addq %r26, %r14
+; EGPR-NEXT: addq %r30, %r14
; EGPR-NEXT: adcq $0, %rbx
; EGPR-NEXT: movq %r13, %rax
-; EGPR-NEXT: mulq %r23
+; EGPR-NEXT: mulq %r25
; EGPR-NEXT: movq %rdx, %r12
; EGPR-NEXT: addq %r14, %rax
; EGPR-NEXT: movq %rax, %r10
; EGPR-NEXT: adcq %rbx, %r12
; EGPR-NEXT: setb %cl
; EGPR-NEXT: movq %r18, %rax
-; EGPR-NEXT: mulq %r23
+; EGPR-NEXT: mulq %r25
; EGPR-NEXT: movq %rdx, %r14
-; EGPR-NEXT: movq %rax, %r26
-; EGPR-NEXT: addq %r12, %r26
+; EGPR-NEXT: movq %rax, %r30
+; EGPR-NEXT: addq %r12, %r30
; EGPR-NEXT: movzbl %cl, %eax
; EGPR-NEXT: adcq %rax, %r14
; EGPR-NEXT: addq %r15, %rsi
; EGPR-NEXT: movq %rsi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; EGPR-NEXT: adcq %r8, %r10
; EGPR-NEXT: movq %r10, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; EGPR-NEXT: adcq $0, %r26
+; EGPR-NEXT: adcq $0, %r30
; EGPR-NEXT: adcq $0, %r14
-; EGPR-NEXT: addq %r16, %r26
+; EGPR-NEXT: addq %r16, %r30
; EGPR-NEXT: adcq %r9, %r14
; EGPR-NEXT: setb %cl
; EGPR-NEXT: movq %r17, %rax
@@ -236,48 +236,48 @@ define void @test_1024(ptr %a, ptr %b, ptr %out) nounwind {
; EGPR-NEXT: mulq %r11
; EGPR-NEXT: movq %rdx, %r8
; EGPR-NEXT: movq %rax, %rbx
-; EGPR-NEXT: movq %r29, %rax
+; EGPR-NEXT: movq %r21, %rax
; EGPR-NEXT: mulq %r11
; EGPR-NEXT: movq %rdx, %r9
; EGPR-NEXT: movq %rax, %r16
; EGPR-NEXT: addq %r8, %r16
; EGPR-NEXT: adcq $0, %r9
; EGPR-NEXT: movq %r17, %rax
-; EGPR-NEXT: mulq %r23
+; EGPR-NEXT: mulq %r25
; EGPR-NEXT: movq %rdx, %r8
; EGPR-NEXT: movq %rax, %r15
; EGPR-NEXT: addq %r16, %r15
; EGPR-NEXT: adcq %r9, %r8
; EGPR-NEXT: setb %r9b
-; EGPR-NEXT: movq %r29, %rax
-; EGPR-NEXT: mulq %r23
+; EGPR-NEXT: movq %r21, %rax
+; EGPR-NEXT: mulq %r25
; EGPR-NEXT: movq %rdx, %r12
; EGPR-NEXT: movq %rax, %rbp
; EGPR-NEXT: addq %r8, %rbp
; EGPR-NEXT: movzbl %r9b, %eax
; EGPR-NEXT: adcq %rax, %r12
-; EGPR-NEXT: addq %r26, %rbx
+; EGPR-NEXT: addq %r30, %rbx
; EGPR-NEXT: adcq %r14, %r15
; EGPR-NEXT: movzbl %cl, %eax
; EGPR-NEXT: adcq %rax, %rbp
; EGPR-NEXT: adcq $0, %r12
-; EGPR-NEXT: addq %r25, %rbx
+; EGPR-NEXT: addq %r27, %rbx
; EGPR-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload
-; EGPR-NEXT: movq 32(%rsi), %r25
-; EGPR-NEXT: adcq %r28, %r15
-; EGPR-NEXT: adcq %r30, %rbp
-; EGPR-NEXT: adcq %r31, %r12
+; EGPR-NEXT: movq 32(%rsi), %r27
+; EGPR-NEXT: adcq %r20, %r15
+; EGPR-NEXT: adcq %r28, %rbp
+; EGPR-NEXT: adcq %r29, %r12
; EGPR-NEXT: adcq $0, %r19
; EGPR-NEXT: movq %r19, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; EGPR-NEXT: adcq $0, %rdi
-; EGPR-NEXT: adcq $0, %r22
-; EGPR-NEXT: adcq $0, %r21
+; EGPR-NEXT: adcq $0, %r24
+; EGPR-NEXT: adcq $0, %r23
; EGPR-NEXT: movq %r17, %rax
-; EGPR-NEXT: mulq %r25
+; EGPR-NEXT: mulq %r27
; EGPR-NEXT: movq %rdx, %r8
-; EGPR-NEXT: movq %rax, %r28
-; EGPR-NEXT: movq %r29, %rax
-; EGPR-NEXT: mulq %r25
+; EGPR-NEXT: movq %rax, %r20
+; EGPR-NEXT: movq %r21, %rax
+; EGPR-NEXT: mulq %r27
; EGPR-NEXT: movq %rdx, %r9
; EGPR-NEXT: movq %rax, %r16
; EGPR-NEXT: addq %r8, %r16
@@ -286,11 +286,11 @@ define void @test_1024(ptr %a, ptr %b, ptr %out) nounwind {
; EGPR-NEXT: movq %r17, %rax
; EGPR-NEXT: mulq %rcx
; EGPR-NEXT: movq %rdx, %r8
-; EGPR-NEXT: movq %rax, %r26
-; EGPR-NEXT: addq %r16, %r26
+; EGPR-NEXT: movq %rax, %r30
+; EGPR-NEXT: addq %r16, %r30
; EGPR-NEXT: adcq %r9, %r8
; EGPR-NEXT: setb %r10b
-; EGPR-NEXT: movq %r29, %rax
+; EGPR-NEXT: movq %r21, %rax
; EGPR-NEXT: mulq %rcx
; EGPR-NEXT: movq %rdx, %r9
; EGPR-NEXT: movq %rax, %r16
@@ -298,138 +298,138 @@ define void @test_1024(ptr %a, ptr %b, ptr %out) nounwind {
; EGPR-NEXT: movzbl %r10b, %eax
; EGPR-NEXT: adcq %rax, %r9
; EGPR-NEXT: movq %r13, %rax
-; EGPR-NEXT: mulq %r25
+; EGPR-NEXT: mulq %r27
; EGPR-NEXT: movq %rdx, %r8
; EGPR-NEXT: movq %rax, %r19
; EGPR-NEXT: movq %r18, %rax
-; EGPR-NEXT: mulq %r25
-; EGPR-NEXT: movq %rdx, %r30
-; EGPR-NEXT: movq %rax, %r31
-; EGPR-NEXT: addq %r8, %r31
-; EGPR-NEXT: adcq $0, %r30
+; EGPR-NEXT: mulq %r27
+; EGPR-NEXT: movq %rdx, %r28
+; EGPR-NEXT: movq %rax, %r29
+; EGPR-NEXT: addq %r8, %r29
+; EGPR-NEXT: adcq $0, %r28
; EGPR-NEXT: movq %r13, %rax
; EGPR-NEXT: mulq %rcx
; EGPR-NEXT: movq %rdx, %r8
-; EGPR-NEXT: movq %rax, %r20
-; EGPR-NEXT: addq %r31, %r20
-; EGPR-NEXT: adcq %r30, %r8
+; EGPR-NEXT: movq %rax, %r22
+; EGPR-NEXT: addq %r29, %r22
+; EGPR-NEXT: adcq %r28, %r8
; EGPR-NEXT: setb %r10b
; EGPR-NEXT: movq %r18, %rax
; EGPR-NEXT: mulq %rcx
-; EGPR-NEXT: movq %rdx, %r30
-; EGPR-NEXT: movq %rax, %r31
-; EGPR-NEXT: addq %r8, %r31
+; EGPR-NEXT: movq %rdx, %r28
+; EGPR-NEXT: movq %rax, %r29
+; EGPR-NEXT: addq %r8, %r29
; EGPR-NEXT: movzbl %r10b, %eax
-; EGPR-NEXT: adcq %rax, %r30
-; EGPR-NEXT: addq %r28, %r31
-; EGPR-NEXT: adcq %r26, %r30
+; EGPR-NEXT: adcq %rax, %r28
+; EGPR-NEXT: addq %r20, %r29
+; EGPR-NEXT: adcq %r30, %r28
; EGPR-NEXT: adcq $0, %r16
; EGPR-NEXT: adcq $0, %r9
-; EGPR-NEXT: movq 48(%rsi), %r28
+; EGPR-NEXT: movq 48(%rsi), %r20
; EGPR-NEXT: movq %r13, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; EGPR-NEXT: movq %r13, %rax
-; EGPR-NEXT: mulq %r28
+; EGPR-NEXT: mulq %r20
; EGPR-NEXT: movq %rdx, %r8
; EGPR-NEXT: movq %rax, %r11
; EGPR-NEXT: movq %r18, %rax
; EGPR-NEXT: movq %r18, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; EGPR-NEXT: mulq %r28
-; EGPR-NEXT: movq %rdx, %r26
+; EGPR-NEXT: mulq %r20
+; EGPR-NEXT: movq %rdx, %r30
; EGPR-NEXT: movq %rax, %r14
; EGPR-NEXT: addq %r8, %r14
-; EGPR-NEXT: adcq $0, %r26
+; EGPR-NEXT: adcq $0, %r30
; EGPR-NEXT: movq 56(%rsi), %r10
; EGPR-NEXT: movq %r13, %rax
; EGPR-NEXT: mulq %r10
; EGPR-NEXT: movq %rdx, %r13
; EGPR-NEXT: addq %r14, %rax
; EGPR-NEXT: movq %rax, %r14
-; EGPR-NEXT: adcq %r26, %r13
+; EGPR-NEXT: adcq %r30, %r13
; EGPR-NEXT: setb %sil
; EGPR-NEXT: movq %r18, %rax
; EGPR-NEXT: mulq %r10
-; EGPR-NEXT: movq %rdx, %r26
+; EGPR-NEXT: movq %rdx, %r30
; EGPR-NEXT: movq %rax, %r8
; EGPR-NEXT: addq %r13, %r8
; EGPR-NEXT: movzbl %sil, %eax
-; EGPR-NEXT: adcq %rax, %r26
-; EGPR-NEXT: addq %r31, %r11
-; EGPR-NEXT: adcq %r30, %r14
+; EGPR-NEXT: adcq %rax, %r30
+; EGPR-NEXT: addq %r29, %r11
+; EGPR-NEXT: adcq %r28, %r14
; EGPR-NEXT: adcq $0, %r8
-; EGPR-NEXT: adcq $0, %r26
+; EGPR-NEXT: adcq $0, %r30
; EGPR-NEXT: addq %r16, %r8
-; EGPR-NEXT: adcq %r9, %r26
+; EGPR-NEXT: adcq %r9, %r30
; EGPR-NEXT: setb %r18b
; EGPR-NEXT: movq %r17, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; EGPR-NEXT: movq %r17, %rax
-; EGPR-NEXT: mulq %r28
+; EGPR-NEXT: mulq %r20
; EGPR-NEXT: movq %rdx, %r9
-; EGPR-NEXT: movq %rax, %r30
-; EGPR-NEXT: movq %r29, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; EGPR-NEXT: movq %r29, %rax
-; EGPR-NEXT: mulq %r28
+; EGPR-NEXT: movq %rax, %r28
+; EGPR-NEXT: movq %r21, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; EGPR-NEXT: movq %r21, %rax
+; EGPR-NEXT: mulq %r20
; EGPR-NEXT: movq %rdx, %r16
-; EGPR-NEXT: movq %rax, %r31
-; EGPR-NEXT: addq %r9, %r31
+; EGPR-NEXT: movq %rax, %r29
+; EGPR-NEXT: addq %r9, %r29
; EGPR-NEXT: adcq $0, %r16
; EGPR-NEXT: movq %r17, %rax
; EGPR-NEXT: mulq %r10
; EGPR-NEXT: movq %rdx, %r9
; EGPR-NEXT: movq %rax, %r17
-; EGPR-NEXT: addq %r31, %r17
+; EGPR-NEXT: addq %r29, %r17
; EGPR-NEXT: adcq %r16, %r9
; EGPR-NEXT: setb %r16b
-; EGPR-NEXT: movq %r29, %rax
+; EGPR-NEXT: movq %r21, %rax
; EGPR-NEXT: mulq %r10
; EGPR-NEXT: movq %rdx, %r13
-; EGPR-NEXT: movq %rax, %r31
-; EGPR-NEXT: addq %r9, %r31
+; EGPR-NEXT: movq %rax, %r29
+; EGPR-NEXT: addq %r9, %r29
; EGPR-NEXT: movzbl %r16b, %eax
; EGPR-NEXT: adcq %rax, %r13
-; EGPR-NEXT: addq %r8, %r30
-; EGPR-NEXT: adcq %r26, %r17
+; EGPR-NEXT: addq %r8, %r28
+; EGPR-NEXT: adcq %r30, %r17
; EGPR-NEXT: movzbl %r18b, %eax
-; EGPR-NEXT: adcq %rax, %r31
+; EGPR-NEXT: adcq %rax, %r29
; EGPR-NEXT: adcq $0, %r13
; EGPR-NEXT: addq %rbx, %r19
; EGPR-NEXT: movq %r19, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; EGPR-NEXT: adcq %r15, %r20
-; EGPR-NEXT: movq %r20, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; EGPR-NEXT: adcq %r15, %r22
+; EGPR-NEXT: movq %r22, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; EGPR-NEXT: adcq %rbp, %r11
; EGPR-NEXT: movq %r11, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; EGPR-NEXT: adcq %r12, %r14
; EGPR-NEXT: movq %r14, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; EGPR-NEXT: adcq $0, %r30
+; EGPR-NEXT: adcq $0, %r28
; EGPR-NEXT: adcq $0, %r17
-; EGPR-NEXT: adcq $0, %r31
+; EGPR-NEXT: adcq $0, %r29
; EGPR-NEXT: adcq $0, %r13
-; EGPR-NEXT: addq {{[-0-9]+}}(%r{{[sb]}}p), %r30 # 8-byte Folded Reload
+; EGPR-NEXT: addq {{[-0-9]+}}(%r{{[sb]}}p), %r28 # 8-byte Folded Reload
; EGPR-NEXT: adcq %rdi, %r17
-; EGPR-NEXT: adcq %r22, %r31
-; EGPR-NEXT: adcq %r21, %r13
+; EGPR-NEXT: adcq %r24, %r29
+; EGPR-NEXT: adcq %r23, %r13
; EGPR-NEXT: setb %r15b
; EGPR-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload
; EGPR-NEXT: movq %rsi, %rax
-; EGPR-NEXT: mulq %r25
+; EGPR-NEXT: mulq %r27
; EGPR-NEXT: movq %rdx, %r8
; EGPR-NEXT: movq %rax, %r19
-; EGPR-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r21 # 8-byte Reload
-; EGPR-NEXT: movq %r21, %rax
-; EGPR-NEXT: mulq %r25
+; EGPR-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r23 # 8-byte Reload
+; EGPR-NEXT: movq %r23, %rax
+; EGPR-NEXT: mulq %r27
; EGPR-NEXT: movq %rdx, %r9
; EGPR-NEXT: movq %rax, %r16
; EGPR-NEXT: addq %r8, %r16
; EGPR-NEXT: adcq $0, %r9
; EGPR-NEXT: movq %rsi, %rax
-; EGPR-NEXT: movq %rsi, %r29
+; EGPR-NEXT: movq %rsi, %r21
; EGPR-NEXT: mulq %rcx
; EGPR-NEXT: movq %rdx, %r8
-; EGPR-NEXT: movq %rax, %r20
-; EGPR-NEXT: addq %r16, %r20
+; EGPR-NEXT: movq %rax, %r22
+; EGPR-NEXT: addq %r16, %r22
; EGPR-NEXT: adcq %r9, %r8
; EGPR-NEXT: setb %r18b
-; EGPR-NEXT: movq %r21, %rax
-; EGPR-NEXT: movq %r21, %r14
+; EGPR-NEXT: movq %r23, %rax
+; EGPR-NEXT: movq %r23, %r14
; EGPR-NEXT: mulq %rcx
; EGPR-NEXT: movq %rdx, %r9
; EGPR-NEXT: movq %rax, %r16
@@ -438,77 +438,77 @@ define void @test_1024(ptr %a, ptr %b, ptr %out) nounwind {
; EGPR-NEXT: adcq %rax, %r9
; EGPR-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rbx # 8-byte Reload
; EGPR-NEXT: movq %rbx, %rax
-; EGPR-NEXT: mulq %r25
+; EGPR-NEXT: mulq %r27
; EGPR-NEXT: movq %rdx, %r8
; EGPR-NEXT: movq %rax, %rdi
; EGPR-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload
; EGPR-NEXT: movq %rsi, %rax
-; EGPR-NEXT: mulq %r25
-; EGPR-NEXT: movq %rdx, %r21
-; EGPR-NEXT: movq %rax, %r22
-; EGPR-NEXT: addq %r8, %r22
-; EGPR-NEXT: adcq $0, %r21
+; EGPR-NEXT: mulq %r27
+; EGPR-NEXT: movq %rdx, %r23
+; EGPR-NEXT: movq %rax, %r24
+; EGPR-NEXT: addq %r8, %r24
+; EGPR-NEXT: adcq $0, %r23
; EGPR-NEXT: movq %rbx, %rax
; EGPR-NEXT: mulq %rcx
; EGPR-NEXT: movq %rdx, %r8
-; EGPR-NEXT: addq %r22, %rax
+; EGPR-NEXT: addq %r24, %rax
; EGPR-NEXT: movq %rax, %r11
-; EGPR-NEXT: adcq %r21, %r8
+; EGPR-NEXT: adcq %r23, %r8
; EGPR-NEXT: setb %r18b
; EGPR-NEXT: movq %rsi, %rax
-; EGPR-NEXT: movq %rsi, %r21
+; EGPR-NEXT: movq %rsi, %r23
; EGPR-NEXT: mulq %rcx
-; EGPR-NEXT: movq %rdx, %r22
-; EGPR-NEXT: movq %rax, %r26
-; EGPR-NEXT: addq %r8, %r26
+; EGPR-NEXT: movq %rdx, %r24
+; EGPR-NEXT: movq %rax, %r30
+; EGPR-NEXT: addq %r8, %r30
; EGPR-NEXT: movzbl %r18b, %eax
-; EGPR-NEXT: adcq %rax, %r22
-; EGPR-NEXT: addq %r19, %r26
-; EGPR-NEXT: adcq %r20, %r22
+; EGPR-NEXT: adcq %rax, %r24
+; EGPR-NEXT: addq %r19, %r30
+; EGPR-NEXT: adcq %r22, %r24
; EGPR-NEXT: adcq $0, %r16
; EGPR-NEXT: adcq $0, %r9
; EGPR-NEXT: movq %rbx, %rax
-; EGPR-NEXT: mulq %r28
+; EGPR-NEXT: mulq %r20
; EGPR-NEXT: movq %rdx, %r8
; EGPR-NEXT: movq %rax, %rsi
-; EGPR-NEXT: movq %r21, %rax
-; EGPR-NEXT: mulq %r28
+; EGPR-NEXT: movq %r23, %rax
+; EGPR-NEXT: mulq %r20
; EGPR-NEXT: movq %rdx, %r19
-; EGPR-NEXT: movq %rax, %r20
-; EGPR-NEXT: addq %r8, %r20
+; EGPR-NEXT: movq %rax, %r22
+; EGPR-NEXT: addq %r8, %r22
; EGPR-NEXT: adcq $0, %r19
; EGPR-NEXT: movq %rbx, %rax
; EGPR-NEXT: mulq %r10
; EGPR-NEXT: movq %rdx, %rbx
-; EGPR-NEXT: addq %r20, %rax
-; EGPR-NEXT: movq %rax, %r20
+; EGPR-NEXT: addq %r22, %rax
+; EGPR-NEXT: movq %rax, %r22
; EGPR-NEXT: adcq %r19, %rbx
; EGPR-NEXT: setb %r18b
-; EGPR-NEXT: movq %r21, %rax
+; EGPR-NEXT: movq %r23, %rax
; EGPR-NEXT: mulq %r10
-; EGPR-NEXT: movq %rdx, %r21
+; EGPR-NEXT: movq %rdx, %r23
; EGPR-NEXT: movq %rax, %r8
; EGPR-NEXT: addq %rbx, %r8
; EGPR-NEXT: movzbl %r18b, %eax
-; EGPR-NEXT: adcq %rax, %r21
-; EGPR-NEXT: addq %r26, %rsi
-; EGPR-NEXT: adcq %r22, %r20
+; EGPR-NEXT: adcq %rax, %r23
+; EGPR-NEXT: addq %r30, %rsi
+; EGPR-NEXT: adcq %r24, %r22
; EGPR-NEXT: adcq $0, %r8
-; EGPR-NEXT: adcq $0, %r21
+; EGPR-NEXT: adcq $0, %r23
; EGPR-NEXT: addq %r16, %r8
-; EGPR-NEXT: adcq %r9, %r21
+; EGPR-NEXT: adcq %r9, %r23
; EGPR-NEXT: setb %r18b
-; EGPR-NEXT: movq %r29, %rax
-; EGPR-NEXT: mulq %r28
+; EGPR-NEXT: movq %r21, %rax
+; EGPR-NEXT: mulq %r20
; EGPR-NEXT: movq %rdx, %r9
-; EGPR-NEXT: movq %rax, %r22
+; EGPR-NEXT: movq %rax, %r24
; EGPR-NEXT: movq %r14, %rax
-; EGPR-NEXT: mulq %r28
+; EGPR-NEXT: mulq %r20
; EGPR-NEXT: movq %rdx, %r16
; EGPR-NEXT: movq %rax, %r19
; EGPR-NEXT: addq %r9, %r19
; EGPR-NEXT: adcq $0, %r16
-; EGPR-NEXT: movq %r29, %rax
+; EGPR-NEXT: movq %r21, %rax
; EGPR-NEXT: mulq %r10
; EGPR-NEXT: movq %rdx, %r9
; EGPR-NEXT: addq %r19, %rax
@@ -522,121 +522,121 @@ define void @test_1024(ptr %a, ptr %b, ptr %out) nounwind {
; EGPR-NEXT: addq %r9, %r12
; EGPR-NEXT: movzbl %r16b, %eax
; EGPR-NEXT: adcq %rax, %rbp
-; EGPR-NEXT: addq %r8, %r22
-; EGPR-NEXT: adcq %r21, %r19
+; EGPR-NEXT: addq %r8, %r24
+; EGPR-NEXT: adcq %r23, %r19
; EGPR-NEXT: movzbl %r18b, %eax
; EGPR-NEXT: adcq %rax, %r12
; EGPR-NEXT: adcq $0, %rbp
-; EGPR-NEXT: addq %r30, %rdi
+; EGPR-NEXT: addq %r28, %rdi
; EGPR-NEXT: movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; EGPR-NEXT: adcq %r17, %r11
; EGPR-NEXT: movq %r11, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; EGPR-NEXT: adcq %r31, %rsi
+; EGPR-NEXT: adcq %r29, %rsi
; EGPR-NEXT: movq %rsi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; EGPR-NEXT: adcq %r13, %r20
-; EGPR-NEXT: movq %r20, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; EGPR-NEXT: adcq %r13, %r22
+; EGPR-NEXT: movq %r22, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; EGPR-NEXT: movzbl %r15b, %eax
-; EGPR-NEXT: adcq %rax, %r22
-; EGPR-NEXT: movq %r22, (%rsp) # 8-byte Spill
+; EGPR-NEXT: adcq %rax, %r24
+; EGPR-NEXT: movq %r24, (%rsp) # 8-byte Spill
; EGPR-NEXT: adcq $0, %r19
; EGPR-NEXT: movq %r19, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; EGPR-NEXT: adcq $0, %r12
; EGPR-NEXT: adcq $0, %rbp
-; EGPR-NEXT: movq 64(%r24), %r21
+; EGPR-NEXT: movq 64(%r26), %r23
; EGPR-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdi # 8-byte Reload
; EGPR-NEXT: movq %rdi, %rax
-; EGPR-NEXT: mulq %r21
+; EGPR-NEXT: mulq %r23
; EGPR-NEXT: movq %rdx, %r8
-; EGPR-NEXT: movq %rax, %r22
-; EGPR-NEXT: movq %r23, %rax
-; EGPR-NEXT: mulq %r21
+; EGPR-NEXT: movq %rax, %r24
+; EGPR-NEXT: movq %r25, %rax
+; EGPR-NEXT: mulq %r23
; EGPR-NEXT: movq %rdx, %r9
; EGPR-NEXT: movq %rax, %r16
; EGPR-NEXT: addq %r8, %r16
; EGPR-NEXT: adcq $0, %r9
-; EGPR-NEXT: movq 72(%r24), %r30
+; EGPR-NEXT: movq 72(%r26), %r28
; EGPR-NEXT: movq %rdi, %rax
-; EGPR-NEXT: mulq %r30
+; EGPR-NEXT: mulq %r28
; EGPR-NEXT: movq %rdx, %r8
-; EGPR-NEXT: movq %rax, %r26
-; EGPR-NEXT: addq %r16, %r26
+; EGPR-NEXT: movq %rax, %r30
+; EGPR-NEXT: addq %r16, %r30
; EGPR-NEXT: adcq %r9, %r8
; EGPR-NEXT: setb %r18b
-; EGPR-NEXT: movq %r23, %rax
-; EGPR-NEXT: mulq %r30
+; EGPR-NEXT: movq %r25, %rax
+; EGPR-NEXT: mulq %r28
; EGPR-NEXT: movq %rdx, %r9
; EGPR-NEXT: movq %rax, %r16
; EGPR-NEXT: addq %r8, %r16
; EGPR-NEXT: movzbl %r18b, %eax
; EGPR-NEXT: adcq %rax, %r9
-; EGPR-NEXT: movq %r27, %rax
-; EGPR-NEXT: mulq %r21
+; EGPR-NEXT: movq %r31, %rax
+; EGPR-NEXT: mulq %r23
; EGPR-NEXT: movq %rdx, %r8
; EGPR-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; EGPR-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r11 # 8-byte Reload
; EGPR-NEXT: movq %r11, %rax
-; EGPR-NEXT: mulq %r21
-; EGPR-NEXT: movq %rdx, %r31
+; EGPR-NEXT: mulq %r23
+; EGPR-NEXT: movq %rdx, %r29
; EGPR-NEXT: movq %rax, %rbx
; EGPR-NEXT: addq %r8, %rbx
-; EGPR-NEXT: adcq $0, %r31
-; EGPR-NEXT: movq %r27, %rax
-; EGPR-NEXT: mulq %r30
+; EGPR-NEXT: adcq $0, %r29
+; EGPR-NEXT: movq %r31, %rax
+; EGPR-NEXT: mulq %r28
; EGPR-NEXT: movq %rdx, %r8
; EGPR-NEXT: addq %rbx, %rax
; EGPR-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; EGPR-NEXT: adcq %r31, %r8
+; EGPR-NEXT: adcq %r29, %r8
; EGPR-NEXT: setb %r18b
; EGPR-NEXT: movq %r11, %rax
-; EGPR-NEXT: mulq %r30
-; EGPR-NEXT: movq %rdx, %r31
+; EGPR-NEXT: mulq %r28
+; EGPR-NEXT: movq %rdx, %r29
; EGPR-NEXT: movq %rax, %rbx
; EGPR-NEXT: addq %r8, %rbx
; EGPR-NEXT: movzbl %r18b, %eax
-; EGPR-NEXT: adcq %rax, %r31
-; EGPR-NEXT: addq %r22, %rbx
-; EGPR-NEXT: adcq %r26, %r31
+; EGPR-NEXT: adcq %rax, %r29
+; EGPR-NEXT: addq %r24, %rbx
+; EGPR-NEXT: adcq %r30, %r29
; EGPR-NEXT: adcq $0, %r16
; EGPR-NEXT: adcq $0, %r9
-; EGPR-NEXT: movq 80(%r24), %r13
-; EGPR-NEXT: movq %r27, %rax
+; EGPR-NEXT: movq 80(%r26), %r13
+; EGPR-NEXT: movq %r31, %rax
; EGPR-NEXT: mulq %r13
; EGPR-NEXT: movq %rdx, %r8
; EGPR-NEXT: movq %rax, %rsi
; EGPR-NEXT: movq %r11, %rax
; EGPR-NEXT: mulq %r13
-; EGPR-NEXT: movq %rdx, %r26
+; EGPR-NEXT: movq %rdx, %r30
; EGPR-NEXT: movq %rax, %r14
; EGPR-NEXT: addq %r8, %r14
-; EGPR-NEXT: adcq $0, %r26
-; EGPR-NEXT: movq 88(%r24), %r18
-; EGPR-NEXT: movq %r27, %rax
+; EGPR-NEXT: adcq $0, %r30
+; EGPR-NEXT: movq 88(%r26), %r18
+; EGPR-NEXT: movq %r31, %rax
; EGPR-NEXT: mulq %r18
; EGPR-NEXT: movq %rdx, %r15
-; EGPR-NEXT: movq %rax, %r22
-; EGPR-NEXT: addq %r14, %r22
-; EGPR-NEXT: adcq %r26, %r15
+; EGPR-NEXT: movq %rax, %r24
+; EGPR-NEXT: addq %r14, %r24
+; EGPR-NEXT: adcq %r30, %r15
; EGPR-NEXT: setb %r14b
; EGPR-NEXT: movq %r11, %rax
; EGPR-NEXT: mulq %r18
-; EGPR-NEXT: movq %rdx, %r26
+; EGPR-NEXT: movq %rdx, %r30
; EGPR-NEXT: movq %rax, %r8
; EGPR-NEXT: addq %r15, %r8
; EGPR-NEXT: movzbl %r14b, %eax
-; EGPR-NEXT: adcq %rax, %r26
+; EGPR-NEXT: adcq %rax, %r30
; EGPR-NEXT: addq %rbx, %rsi
; EGPR-NEXT: movq %rsi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; EGPR-NEXT: adcq %r31, %r22
+; EGPR-NEXT: adcq %r29, %r24
; EGPR-NEXT: adcq $0, %r8
-; EGPR-NEXT: adcq $0, %r26
+; EGPR-NEXT: adcq $0, %r30
; EGPR-NEXT: addq %r16, %r8
-; EGPR-NEXT: adcq %r9, %r26
-; EGPR-NEXT: setb %r31b
+; EGPR-NEXT: adcq %r9, %r30
+; EGPR-NEXT: setb %r29b
; EGPR-NEXT: movq %rdi, %rax
; EGPR-NEXT: mulq %r13
; EGPR-NEXT: movq %rdx, %r9
; EGPR-NEXT: movq %rax, %rsi
-; EGPR-NEXT: movq %r23, %rax
+; EGPR-NEXT: movq %r25, %rax
; EGPR-NEXT: mulq %r13
; EGPR-NEXT: movq %rdx, %r16
; EGPR-NEXT: movq %rax, %r14
@@ -649,7 +649,7 @@ define void @test_1024(ptr %a, ptr %b, ptr %out) nounwind {
; EGPR-NEXT: addq %r14, %rbx
; EGPR-NEXT: adcq %r16, %r9
; EGPR-NEXT: setb %r16b
-; EGPR-NEXT: movq %r23, %rax
+; EGPR-NEXT: movq %r25, %rax
; EGPR-NEXT: mulq %r18
; EGPR-NEXT: movq %rdx, %r14
; EGPR-NEXT: movq %rax, %r15
@@ -657,116 +657,116 @@ define void @test_1024(ptr %a, ptr %b, ptr %out) nounwind {
; EGPR-NEXT: movzbl %r16b, %eax
; EGPR-NEXT: adcq %rax, %r14
; EGPR-NEXT: addq %r8, %rsi
-; EGPR-NEXT: adcq %r26, %rbx
-; EGPR-NEXT: movzbl %r31b, %eax
+; EGPR-NEXT: adcq %r30, %rbx
+; EGPR-NEXT: movzbl %r29b, %eax
; EGPR-NEXT: adcq %rax, %r15
; EGPR-NEXT: adcq $0, %r14
-; EGPR-NEXT: imulq %r25, %r18
-; EGPR-NEXT: movq %r25, %rax
+; EGPR-NEXT: imulq %r27, %r18
+; EGPR-NEXT: movq %r27, %rax
; EGPR-NEXT: mulq %r13
; EGPR-NEXT: movq %rax, %r8
; EGPR-NEXT: addq %r18, %rdx
; EGPR-NEXT: imulq %rcx, %r13
; EGPR-NEXT: addq %rdx, %r13
-; EGPR-NEXT: movq %r28, %r9
-; EGPR-NEXT: imulq %r30, %r9
-; EGPR-NEXT: movq %r28, %rax
-; EGPR-NEXT: mulq %r21
-; EGPR-NEXT: movq %rax, %r26
+; EGPR-NEXT: movq %r20, %r9
+; EGPR-NEXT: imulq %r28, %r9
+; EGPR-NEXT: movq %r20, %rax
+; EGPR-NEXT: mulq %r23
+; EGPR-NEXT: movq %rax, %r30
; EGPR-NEXT: addq %r9, %rdx
-; EGPR-NEXT: imulq %r21, %r10
+; EGPR-NEXT: imulq %r23, %r10
; EGPR-NEXT: addq %rdx, %r10
-; EGPR-NEXT: addq %r8, %r26
+; EGPR-NEXT: addq %r8, %r30
; EGPR-NEXT: adcq %r13, %r10
-; EGPR-NEXT: movq %r21, %rax
-; EGPR-NEXT: mulq %r25
+; EGPR-NEXT: movq %r23, %rax
+; EGPR-NEXT: mulq %r27
; EGPR-NEXT: movq %rdx, %r8
; EGPR-NEXT: movq %rax, %r9
-; EGPR-NEXT: movq %r30, %rax
-; EGPR-NEXT: mulq %r25
-; EGPR-NEXT: movq %rdx, %r25
-; EGPR-NEXT: movq %rax, %r28
-; EGPR-NEXT: addq %r8, %r28
-; EGPR-NEXT: adcq $0, %r25
-; EGPR-NEXT: movq %r21, %rax
+; EGPR-NEXT: movq %r28, %rax
+; EGPR-NEXT: mulq %r27
+; EGPR-NEXT: movq %rdx, %r27
+; EGPR-NEXT: movq %rax, %r20
+; EGPR-NEXT: addq %r8, %r20
+; EGPR-NEXT: adcq $0, %r27
+; EGPR-NEXT: movq %r23, %rax
; EGPR-NEXT: mulq %rcx
; EGPR-NEXT: movq %rdx, %r8
; EGPR-NEXT: movq %rax, %r16
-; EGPR-NEXT: addq %r28, %r16
-; EGPR-NEXT: adcq %r25, %r8
+; EGPR-NEXT: addq %r20, %r16
+; EGPR-NEXT: adcq %r27, %r8
; EGPR-NEXT: setb %r18b
-; EGPR-NEXT: movq %r30, %rax
+; EGPR-NEXT: movq %r28, %rax
; EGPR-NEXT: mulq %rcx
-; EGPR-NEXT: movq %rdx, %r21
-; EGPR-NEXT: movq %rax, %r28
-; EGPR-NEXT: addq %r8, %r28
+; EGPR-NEXT: movq %rdx, %r23
+; EGPR-NEXT: movq %rax, %r20
+; EGPR-NEXT: addq %r8, %r20
; EGPR-NEXT: movzbl %r18b, %eax
-; EGPR-NEXT: adcq %rax, %r21
-; EGPR-NEXT: addq %r26, %r28
-; EGPR-NEXT: adcq %r10, %r21
-; EGPR-NEXT: movq 112(%r24), %rcx
-; EGPR-NEXT: movq %r27, %rax
+; EGPR-NEXT: adcq %rax, %r23
+; EGPR-NEXT: addq %r30, %r20
+; EGPR-NEXT: adcq %r10, %r23
+; EGPR-NEXT: movq 112(%r26), %rcx
+; EGPR-NEXT: movq %r31, %rax
; EGPR-NEXT: mulq %rcx
; EGPR-NEXT: movq %rax, %r8
; EGPR-NEXT: imulq %r11, %rcx
; EGPR-NEXT: addq %rdx, %rcx
-; EGPR-NEXT: movq 120(%r24), %rax
-; EGPR-NEXT: imulq %r27, %rax
+; EGPR-NEXT: movq 120(%r26), %rax
+; EGPR-NEXT: imulq %r31, %rax
; EGPR-NEXT: addq %rax, %rcx
-; EGPR-NEXT: movq 96(%r24), %r25
-; EGPR-NEXT: movq 104(%r24), %r26
+; EGPR-NEXT: movq 96(%r26), %r27
+; EGPR-NEXT: movq 104(%r26), %r30
; EGPR-NEXT: movq %rdi, %rax
-; EGPR-NEXT: imulq %r26, %rdi
-; EGPR-NEXT: mulq %r25
-; EGPR-NEXT: movq %rax, %r29
-; EGPR-NEXT: addq %rdi, %rdx
-; EGPR-NEXT: imulq %r25, %r23
-; EGPR-NEXT: addq %rdx, %r23
-; EGPR-NEXT: addq %r8, %r29
-; EGPR-NEXT: adcq %rcx, %r23
-; EGPR-NEXT: movq %r25, %rax
+; EGPR-NEXT: imulq %r30, %rdi
; EGPR-NEXT: mulq %r27
+; EGPR-NEXT: movq %rax, %r21
+; EGPR-NEXT: addq %rdi, %rdx
+; EGPR-NEXT: imulq %r27, %r25
+; EGPR-NEXT: addq %rdx, %r25
+; EGPR-NEXT: addq %r8, %r21
+; EGPR-NEXT: adcq %rcx, %r25
+; EGPR-NEXT: movq %r27, %rax
+; EGPR-NEXT: mulq %r31
; EGPR-NEXT: movq %rdx, %r8
-; EGPR-NEXT: movq %rax, %r20
-; EGPR-NEXT: movq %r26, %rax
-; EGPR-NEXT: mulq %r27
-; EGPR-NEXT: movq %rdx, %r27
-; EGPR-NEXT: movq %rax, %r30
-; EGPR-NEXT: addq %r8, %r30
-; EGPR-NEXT: adcq $0, %r27
-; EGPR-NEXT: movq %r25, %rax
+; EGPR-NEXT: movq %rax, %r22
+; EGPR-NEXT: movq %r30, %rax
+; EGPR-NEXT: mulq %r31
+; EGPR-NEXT: movq %rdx, %r31
+; EGPR-NEXT: movq %rax, %r28
+; EGPR-NEXT: addq %r8, %r28
+; EGPR-NEXT: adcq $0, %r31
+; EGPR-NEXT: movq %r27, %rax
; EGPR-NEXT: mulq %r11
; EGPR-NEXT: movq %rdx, %r8
-; EGPR-NEXT: movq %rax, %r25
-; EGPR-NEXT: addq %r30, %r25
-; EGPR-NEXT: adcq %r27, %r8
+; EGPR-NEXT: movq %rax, %r27
+; EGPR-NEXT: addq %r28, %r27
+; EGPR-NEXT: adcq %r31, %r8
; EGPR-NEXT: setb %cl
-; EGPR-NEXT: movq %r26, %rax
+; EGPR-NEXT: movq %r30, %rax
; EGPR-NEXT: mulq %r11
-; EGPR-NEXT: movq %rdx, %r24
-; EGPR-NEXT: movq %rax, %r27
-; EGPR-NEXT: addq %r8, %r27
+; EGPR-NEXT: movq %rdx, %r26
+; EGPR-NEXT: movq %rax, %r31
+; EGPR-NEXT: addq %r8, %r31
; EGPR-NEXT: movzbl %cl, %eax
-; EGPR-NEXT: adcq %rax, %r24
-; EGPR-NEXT: addq %r29, %r27
-; EGPR-NEXT: adcq %r23, %r24
-; EGPR-NEXT: addq %r9, %r20
-; EGPR-NEXT: adcq %r16, %r25
-; EGPR-NEXT: adcq %r28, %r27
-; EGPR-NEXT: adcq %r21, %r24
-; EGPR-NEXT: addq %rsi, %r20
-; EGPR-NEXT: adcq %rbx, %r25
-; EGPR-NEXT: adcq %r15, %r27
-; EGPR-NEXT: adcq %r14, %r24
+; EGPR-NEXT: adcq %rax, %r26
+; EGPR-NEXT: addq %r21, %r31
+; EGPR-NEXT: adcq %r25, %r26
+; EGPR-NEXT: addq %r9, %r22
+; EGPR-NEXT: adcq %r16, %r27
+; EGPR-NEXT: adcq %r20, %r31
+; EGPR-NEXT: adcq %r23, %r26
+; EGPR-NEXT: addq %rsi, %r22
+; EGPR-NEXT: adcq %rbx, %r27
+; EGPR-NEXT: adcq %r15, %r31
+; EGPR-NEXT: adcq %r14, %r26
; EGPR-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r11 # 8-byte Reload
; EGPR-NEXT: movq 80(%r11), %rbx
; EGPR-NEXT: movq %rbx, %rax
; EGPR-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r19 # 8-byte Reload
; EGPR-NEXT: mulq %r19
-; EGPR-NEXT: movq %rax, %r21
+; EGPR-NEXT: movq %rax, %r23
; EGPR-NEXT: movq %rdx, %r8
-; EGPR-NEXT: movq 88(%r11), %r28
-; EGPR-NEXT: movq %r28, %rax
+; EGPR-NEXT: movq 88(%r11), %r20
+; EGPR-NEXT: movq %r20, %rax
; EGPR-NEXT: mulq %r19
; EGPR-NEXT: movq %rdx, %r9
; EGPR-NEXT: movq %rax, %r16
@@ -776,11 +776,11 @@ define void @test_1024(ptr %a, ptr %b, ptr %out) nounwind {
; EGPR-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r17 # 8-byte Reload
; EGPR-NEXT: mulq %r17
; EGPR-NEXT: movq %rdx, %r8
-; EGPR-NEXT: movq %rax, %r26
-; EGPR-NEXT: addq %r16, %r26
+; EGPR-NEXT: movq %rax, %r30
+; EGPR-NEXT: addq %r16, %r30
; EGPR-NEXT: adcq %r9, %r8
; EGPR-NEXT: setb %cl
-; EGPR-NEXT: movq %r28, %rax
+; EGPR-NEXT: movq %r20, %rax
; EGPR-NEXT: mulq %r17
; EGPR-NEXT: movq %rdx, %r9
; EGPR-NEXT: movq %rax, %r16
@@ -790,71 +790,71 @@ define void @test_1024(ptr %a, ptr %b, ptr %out) nounwind {
; EGPR-NEXT: movq 64(%r11), %r15
; EGPR-NEXT: movq %r15, %rax
; EGPR-NEXT: mulq %r19
-; EGPR-NEXT: movq %rax, %r23
+; EGPR-NEXT: movq %rax, %r25
; EGPR-NEXT: movq %rdx, %r8
; EGPR-NEXT: movq 72(%r11), %r14
; EGPR-NEXT: movq %r14, %rax
; EGPR-NEXT: mulq %r19
-; EGPR-NEXT: movq %rdx, %r30
-; EGPR-NEXT: movq %rax, %r31
-; EGPR-NEXT: addq %r8, %r31
-; EGPR-NEXT: adcq $0, %r30
+; EGPR-NEXT: movq %rdx, %r28
+; EGPR-NEXT: movq %rax, %r29
+; EGPR-NEXT: addq %r8, %r29
+; EGPR-NEXT: adcq $0, %r28
; EGPR-NEXT: movq %r15, %rax
; EGPR-NEXT: mulq %r17
; EGPR-NEXT: movq %rdx, %r8
-; EGPR-NEXT: movq %rax, %r29
-; EGPR-NEXT: addq %r31, %r29
-; EGPR-NEXT: adcq %r30, %r8
+; EGPR-NEXT: movq %rax, %r21
+; EGPR-NEXT: addq %r29, %r21
+; EGPR-NEXT: adcq %r28, %r8
; EGPR-NEXT: setb %cl
; EGPR-NEXT: movq %r14, %rax
; EGPR-NEXT: mulq %r17
-; EGPR-NEXT: movq %rdx, %r31
+; EGPR-NEXT: movq %rdx, %r29
; EGPR-NEXT: movq %rax, %r13
; EGPR-NEXT: addq %r8, %r13
; EGPR-NEXT: movzbl %cl, %eax
-; EGPR-NEXT: adcq %rax, %r31
-; EGPR-NEXT: addq %r21, %r13
-; EGPR-NEXT: adcq %r26, %r31
+; EGPR-NEXT: adcq %rax, %r29
+; EGPR-NEXT: addq %r23, %r13
+; EGPR-NEXT: adcq %r30, %r29
; EGPR-NEXT: adcq $0, %r16
; EGPR-NEXT: adcq $0, %r9
; EGPR-NEXT: movq %r15, %rax
; EGPR-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdi # 8-byte Reload
; EGPR-NEXT: mulq %rdi
; EGPR-NEXT: movq %rdx, %r8
-; EGPR-NEXT: movq %rax, %r30
+; EGPR-NEXT: movq %rax, %r28
; EGPR-NEXT: movq %r14, %rax
; EGPR-NEXT: mulq %rdi
-; EGPR-NEXT: movq %rdx, %r26
+; EGPR-NEXT: movq %rdx, %r30
; EGPR-NEXT: movq %rax, %rcx
; EGPR-NEXT: addq %r8, %rcx
-; EGPR-NEXT: adcq $0, %r26
+; EGPR-NEXT: adcq $0, %r30
; EGPR-NEXT: movq %r15, %rax
; EGPR-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r18 # 8-byte Reload
; EGPR-NEXT: mulq %r18
; EGPR-NEXT: movq %rdx, %r10
-; EGPR-NEXT: movq %rax, %r21
-; EGPR-NEXT: addq %rcx, %r21
-; EGPR-NEXT: adcq %r26, %r10
+; EGPR-NEXT: movq %rax, %r23
+; EGPR-NEXT: addq %rcx, %r23
+; EGPR-NEXT: adcq %r30, %r10
; EGPR-NEXT: setb %cl
; EGPR-NEXT: movq %r14, %rax
; EGPR-NEXT: mulq %r18
-; EGPR-NEXT: movq %rdx, %r26
+; EGPR-NEXT: movq %rdx, %r30
; EGPR-NEXT: movq %rax, %r8
; EGPR-NEXT: addq %r10, %r8
; EGPR-NEXT: movzbl %cl, %eax
-; EGPR-NEXT: adcq %rax, %r26
-; EGPR-NEXT: addq %r13, %r30
-; EGPR-NEXT: adcq %r31, %r21
+; EGPR-NEXT: adcq %rax, %r30
+; EGPR-NEXT: addq %r13, %r28
+; EGPR-NEXT: adcq %r29, %r23
; EGPR-NEXT: adcq $0, %r8
-; EGPR-NEXT: adcq $0, %r26
+; EGPR-NEXT: adcq $0, %r30
; EGPR-NEXT: addq %r16, %r8
-; EGPR-NEXT: adcq %r9, %r26
+; EGPR-NEXT: adcq %r9, %r30
; EGPR-NEXT: setb %sil
; EGPR-NEXT: movq %rbx, %rax
; EGPR-NEXT: mulq %rdi
; EGPR-NEXT: movq %rdx, %rcx
-; EGPR-NEXT: movq %rax, %r31
-; EGPR-NEXT: movq %r28, %rax
+; EGPR-NEXT: movq %rax, %r29
+; EGPR-NEXT: movq %r20, %rax
; EGPR-NEXT: mulq %rdi
; EGPR-NEXT: movq %rdx, %r9
; EGPR-NEXT: movq %rax, %r10
@@ -867,15 +867,15 @@ define void @test_1024(ptr %a, ptr %b, ptr %out) nounwind {
; EGPR-NEXT: addq %r10, %r13
; EGPR-NEXT: adcq %r9, %rcx
; EGPR-NEXT: setb %r10b
-; EGPR-NEXT: movq %r28, %rax
+; EGPR-NEXT: movq %r20, %rax
; EGPR-NEXT: mulq %r18
; EGPR-NEXT: movq %rdx, %r16
; EGPR-NEXT: movq %rax, %r9
; EGPR-NEXT: addq %rcx, %r9
; EGPR-NEXT: movzbl %r10b, %eax
; EGPR-NEXT: adcq %rax, %r16
-; EGPR-NEXT: addq %r8, %r31
-; EGPR-NEXT: adcq %r26, %r13
+; EGPR-NEXT: addq %r8, %r29
+; EGPR-NEXT: adcq %r30, %r13
; EGPR-NEXT: movzbl %sil, %eax
; EGPR-NEXT: adcq %rax, %r9
; EGPR-NEXT: adcq $0, %r16
@@ -885,9 +885,9 @@ define void @test_1024(ptr %a, ptr %b, ptr %out) nounwind {
; EGPR-NEXT: mulq %rdi
; EGPR-NEXT: movq %rax, %r8
; EGPR-NEXT: addq %r18, %rdx
-; EGPR-NEXT: movq 104(%r11), %r26
+; EGPR-NEXT: movq 104(%r11), %r30
; EGPR-NEXT: movq %rdi, %rax
-; EGPR-NEXT: imulq %r26, %rax
+; EGPR-NEXT: imulq %r30, %rax
; EGPR-NEXT: addq %rdx, %rax
; EGPR-NEXT: movq %rax, %r10
; EGPR-NEXT: movq 112(%r11), %rax
@@ -912,14 +912,14 @@ define void @test_1024(ptr %a, ptr %b, ptr %out) nounwind {
; EGPR-NEXT: addq %r8, %r10
; EGPR-NEXT: adcq $0, %rcx
; EGPR-NEXT: movq %r19, %rax
-; EGPR-NEXT: mulq %r26
+; EGPR-NEXT: mulq %r30
; EGPR-NEXT: movq %rdx, %r8
; EGPR-NEXT: movq %rax, %r11
; EGPR-NEXT: addq %r10, %r11
; EGPR-NEXT: adcq %rcx, %r8
; EGPR-NEXT: setb %cl
; EGPR-NEXT: movq %r17, %rax
-; EGPR-NEXT: mulq %r26
+; EGPR-NEXT: mulq %r30
; EGPR-NEXT: movq %rdx, %r10
; EGPR-NEXT: movq %rax, %r17
; EGPR-NEXT: addq %r8, %r17
@@ -944,12 +944,12 @@ define void @test_1024(ptr %a, ptr %b, ptr %out) nounwind {
; EGPR-NEXT: movq %rbx, %rax
; EGPR-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Reload
; EGPR-NEXT: mulq %r8
-; EGPR-NEXT: movq %rax, %r26
+; EGPR-NEXT: movq %rax, %r30
; EGPR-NEXT: addq %rdi, %rdx
-; EGPR-NEXT: imulq %r8, %r28
-; EGPR-NEXT: addq %rdx, %r28
-; EGPR-NEXT: addq %rcx, %r26
-; EGPR-NEXT: adcq %r18, %r28
+; EGPR-NEXT: imulq %r8, %r20
+; EGPR-NEXT: addq %rdx, %r20
+; EGPR-NEXT: addq %rcx, %r30
+; EGPR-NEXT: adcq %r18, %r20
; EGPR-NEXT: movq %r8, %rax
; EGPR-NEXT: movq %r8, %rdi
; EGPR-NEXT: mulq %r15
@@ -973,28 +973,28 @@ define void @test_1024(ptr %a, ptr %b, ptr %out) nounwind {
; EGPR-NEXT: addq %rcx, %rax
; EGPR-NEXT: movzbl %dil, %ecx
; EGPR-NEXT: adcq %rcx, %rdx
-; EGPR-NEXT: addq %r26, %rax
-; EGPR-NEXT: adcq %r28, %rdx
+; EGPR-NEXT: addq %r30, %rax
+; EGPR-NEXT: adcq %r20, %rdx
; EGPR-NEXT: addq %rsi, %r8
; EGPR-NEXT: adcq %r11, %r18
; EGPR-NEXT: adcq %r17, %rax
; EGPR-NEXT: adcq %r10, %rdx
-; EGPR-NEXT: addq %r31, %r8
+; EGPR-NEXT: addq %r29, %r8
; EGPR-NEXT: adcq %r13, %r18
; EGPR-NEXT: adcq %r9, %rax
; EGPR-NEXT: adcq %r16, %rdx
-; EGPR-NEXT: addq {{[-0-9]+}}(%r{{[sb]}}p), %r23 # 8-byte Folded Reload
-; EGPR-NEXT: adcq {{[-0-9]+}}(%r{{[sb]}}p), %r29 # 8-byte Folded Reload
-; EGPR-NEXT: adcq {{[-0-9]+}}(%r{{[sb]}}p), %r30 # 8-byte Folded Reload
-; EGPR-NEXT: adcq %r22, %r21
-; EGPR-NEXT: adcq %r20, %r8
-; EGPR-NEXT: adcq %r25, %r18
-; EGPR-NEXT: adcq %r27, %rax
-; EGPR-NEXT: adcq %r24, %rdx
-; EGPR-NEXT: addq {{[-0-9]+}}(%r{{[sb]}}p), %r23 # 8-byte Folded Reload
-; EGPR-NEXT: adcq {{[-0-9]+}}(%r{{[sb]}}p), %r29 # 8-byte Folded Reload
-; EGPR-NEXT: adcq {{[-0-9]+}}(%r{{[sb]}}p), %r30 # 8-byte Folded Reload
+; EGPR-NEXT: addq {{[-0-9]+}}(%r{{[sb]}}p), %r25 # 8-byte Folded Reload
+; EGPR-NEXT: adcq {{[-0-9]+}}(%r{{[sb]}}p), %r21 # 8-byte Folded Reload
+; EGPR-NEXT: adcq {{[-0-9]+}}(%r{{[sb]}}p), %r28 # 8-byte Folded Reload
+; EGPR-NEXT: adcq %r24, %r23
+; EGPR-NEXT: adcq %r22, %r8
+; EGPR-NEXT: adcq %r27, %r18
+; EGPR-NEXT: adcq %r31, %rax
+; EGPR-NEXT: adcq %r26, %rdx
+; EGPR-NEXT: addq {{[-0-9]+}}(%r{{[sb]}}p), %r25 # 8-byte Folded Reload
; EGPR-NEXT: adcq {{[-0-9]+}}(%r{{[sb]}}p), %r21 # 8-byte Folded Reload
+; EGPR-NEXT: adcq {{[-0-9]+}}(%r{{[sb]}}p), %r28 # 8-byte Folded Reload
+; EGPR-NEXT: adcq {{[-0-9]+}}(%r{{[sb]}}p), %r23 # 8-byte Folded Reload
; EGPR-NEXT: adcq (%rsp), %r8 # 8-byte Folded Reload
; EGPR-NEXT: adcq {{[-0-9]+}}(%r{{[sb]}}p), %r18 # 8-byte Folded Reload
; EGPR-NEXT: adcq %r12, %rax
@@ -1016,10 +1016,10 @@ define void @test_1024(ptr %a, ptr %b, ptr %out) nounwind {
; EGPR-NEXT: movq %rsi, 48(%rcx)
; EGPR-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload
; EGPR-NEXT: movq %rsi, 56(%rcx)
-; EGPR-NEXT: movq %r23, 64(%rcx)
-; EGPR-NEXT: movq %r29, 72(%rcx)
-; EGPR-NEXT: movq %r30, 80(%rcx)
-; EGPR-NEXT: movq %r21, 88(%rcx)
+; EGPR-NEXT: movq %r25, 64(%rcx)
+; EGPR-NEXT: movq %r21, 72(%rcx)
+; EGPR-NEXT: movq %r28, 80(%rcx)
+; EGPR-NEXT: movq %r23, 88(%rcx)
; EGPR-NEXT: movq %r8, 96(%rcx)
; EGPR-NEXT: movq %r18, 104(%rcx)
; EGPR-NEXT: movq %rax, 112(%rcx)
@@ -1044,67 +1044,67 @@ define void @test_1024(ptr %a, ptr %b, ptr %out) nounwind {
; EGPR-NDD-NEXT: subq $96, %rsp
; EGPR-NDD-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; EGPR-NDD-NEXT: movq %rsi, %r15
-; EGPR-NDD-NEXT: movq %rdi, %r20
+; EGPR-NDD-NEXT: movq %rdi, %r22
; EGPR-NDD-NEXT: movq (%rdi), %r17
; EGPR-NDD-NEXT: movq 8(%rdi), %r11
; EGPR-NDD-NEXT: movq 24(%rdi), %r9
; EGPR-NDD-NEXT: movq 16(%rdi), %r10
; EGPR-NDD-NEXT: movq 40(%rdi), %rdi
-; EGPR-NDD-NEXT: movq 32(%r20), %r16
-; EGPR-NDD-NEXT: movq 56(%r20), %r18
-; EGPR-NDD-NEXT: movq 48(%r20), %r23
+; EGPR-NDD-NEXT: movq 32(%r22), %r16
+; EGPR-NDD-NEXT: movq 56(%r22), %r18
+; EGPR-NDD-NEXT: movq 48(%r22), %r25
; EGPR-NDD-NEXT: movq 24(%rsi), %r14
-; EGPR-NDD-NEXT: movq 16(%rsi), %r24
-; EGPR-NDD-NEXT: movq (%rsi), %r22
-; EGPR-NDD-NEXT: movq 8(%rsi), %r21
-; EGPR-NDD-NEXT: movq %r23, %rax
-; EGPR-NDD-NEXT: mulq %r22
-; EGPR-NDD-NEXT: movq %rdx, %r25
+; EGPR-NDD-NEXT: movq 16(%rsi), %r26
+; EGPR-NDD-NEXT: movq (%rsi), %r24
+; EGPR-NDD-NEXT: movq 8(%rsi), %r23
+; EGPR-NDD-NEXT: movq %r25, %rax
+; EGPR-NDD-NEXT: mulq %r24
+; EGPR-NDD-NEXT: movq %rdx, %r27
; EGPR-NDD-NEXT: movq %rax, %r19
; EGPR-NDD-NEXT: movq %r18, %rax
-; EGPR-NDD-NEXT: mulq %r22
-; EGPR-NDD-NEXT: addq %rax, %r25
+; EGPR-NDD-NEXT: mulq %r24
+; EGPR-NDD-NEXT: addq %rax, %r27
; EGPR-NDD-NEXT: adcq $0, %rdx, %rcx
-; EGPR-NDD-NEXT: movq %r23, %rax
-; EGPR-NDD-NEXT: mulq %r21
-; EGPR-NDD-NEXT: addq %r25, %rax, %rsi
+; EGPR-NDD-NEXT: movq %r25, %rax
+; EGPR-NDD-NEXT: mulq %r23
+; EGPR-NDD-NEXT: addq %r27, %rax, %rsi
; EGPR-NDD-NEXT: adcq %rdx, %rcx
; EGPR-NDD-NEXT: setb %al
; EGPR-NDD-NEXT: movzbl %al, %r8d
; EGPR-NDD-NEXT: movq %r18, %rax
-; EGPR-NDD-NEXT: mulq %r21
-; EGPR-NDD-NEXT: addq %rcx, %rax, %r27
+; EGPR-NDD-NEXT: mulq %r23
+; EGPR-NDD-NEXT: addq %rcx, %rax, %r31
; EGPR-NDD-NEXT: adcq %rdx, %r8
; EGPR-NDD-NEXT: movq %r16, %rax
-; EGPR-NDD-NEXT: mulq %r22
-; EGPR-NDD-NEXT: movq %rdx, %r26
-; EGPR-NDD-NEXT: movq %rax, %r25
+; EGPR-NDD-NEXT: mulq %r24
+; EGPR-NDD-NEXT: movq %rdx, %r30
+; EGPR-NDD-NEXT: movq %rax, %r27
; EGPR-NDD-NEXT: movq %rdi, %rax
-; EGPR-NDD-NEXT: mulq %r22
-; EGPR-NDD-NEXT: addq %r26, %rax, %rcx
-; EGPR-NDD-NEXT: adcq $0, %rdx, %r26
+; EGPR-NDD-NEXT: mulq %r24
+; EGPR-NDD-NEXT: addq %r30, %rax, %rcx
+; EGPR-NDD-NEXT: adcq $0, %rdx, %r30
; EGPR-NDD-NEXT: movq %r16, %rax
-; EGPR-NDD-NEXT: mulq %r21
+; EGPR-NDD-NEXT: mulq %r23
; EGPR-NDD-NEXT: addq %rax, %rcx
-; EGPR-NDD-NEXT: adcq %rdx, %r26
+; EGPR-NDD-NEXT: adcq %rdx, %r30
; EGPR-NDD-NEXT: setb %al
-; EGPR-NDD-NEXT: movzbl %al, %r28d
+; EGPR-NDD-NEXT: movzbl %al, %r20d
; EGPR-NDD-NEXT: movq %rdi, %rax
-; EGPR-NDD-NEXT: mulq %r21
-; EGPR-NDD-NEXT: addq %r26, %rax
-; EGPR-NDD-NEXT: adcq %r28, %rdx
-; EGPR-NDD-NEXT: addq %rax, %r19, %r28
-; EGPR-NDD-NEXT: adcq %rdx, %rsi, %r29
-; EGPR-NDD-NEXT: adcq $0, %r27
+; EGPR-NDD-NEXT: mulq %r23
+; EGPR-NDD-NEXT: addq %r30, %rax
+; EGPR-NDD-NEXT: adcq %r20, %rdx
+; EGPR-NDD-NEXT: addq %rax, %r19, %r20
+; EGPR-NDD-NEXT: adcq %rdx, %rsi, %r21
+; EGPR-NDD-NEXT: adcq $0, %r31
; EGPR-NDD-NEXT: adcq $0, %r8
; EGPR-NDD-NEXT: movq %r16, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; EGPR-NDD-NEXT: movq %r16, %rax
-; EGPR-NDD-NEXT: mulq %r24
+; EGPR-NDD-NEXT: mulq %r26
; EGPR-NDD-NEXT: movq %rdx, %r19
-; EGPR-NDD-NEXT: movq %rax, %r26
+; EGPR-NDD-NEXT: movq %rax, %r30
; EGPR-NDD-NEXT: movq %rdi, %rax
; EGPR-NDD-NEXT: movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; EGPR-NDD-NEXT: mulq %r24
+; EGPR-NDD-NEXT: mulq %r26
; EGPR-NDD-NEXT: addq %rax, %r19
; EGPR-NDD-NEXT: adcq $0, %rdx, %rsi
; EGPR-NDD-NEXT: movq %r16, %rax
@@ -1112,95 +1112,95 @@ define void @test_1024(ptr %a, ptr %b, ptr %out) nounwind {
; EGPR-NDD-NEXT: addq %rax, %r19
; EGPR-NDD-NEXT: adcq %rdx, %rsi
; EGPR-NDD-NEXT: setb %al
-; EGPR-NDD-NEXT: movzbl %al, %r30d
+; EGPR-NDD-NEXT: movzbl %al, %r28d
; EGPR-NDD-NEXT: movq %rdi, %rax
; EGPR-NDD-NEXT: mulq %r14
; EGPR-NDD-NEXT: addq %rsi, %rax
-; EGPR-NDD-NEXT: adcq %r30, %rdx
-; EGPR-NDD-NEXT: addq %r28, %r26, %rsi
-; EGPR-NDD-NEXT: adcq %r29, %r19, %r28
+; EGPR-NDD-NEXT: adcq %r28, %rdx
+; EGPR-NDD-NEXT: addq %r20, %r30, %rsi
+; EGPR-NDD-NEXT: adcq %r21, %r19, %r20
; EGPR-NDD-NEXT: adcq $0, %rax
; EGPR-NDD-NEXT: adcq $0, %rdx
-; EGPR-NDD-NEXT: addq %rax, %r27
+; EGPR-NDD-NEXT: addq %rax, %r31
; EGPR-NDD-NEXT: adcq %rdx, %r8
; EGPR-NDD-NEXT: setb %al
-; EGPR-NDD-NEXT: movzbl %al, %r31d
-; EGPR-NDD-NEXT: movq %r23, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; EGPR-NDD-NEXT: movq %r23, %rax
-; EGPR-NDD-NEXT: mulq %r24
+; EGPR-NDD-NEXT: movzbl %al, %r29d
+; EGPR-NDD-NEXT: movq %r25, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; EGPR-NDD-NEXT: movq %r25, %rax
+; EGPR-NDD-NEXT: mulq %r26
; EGPR-NDD-NEXT: movq %rdx, %r19
-; EGPR-NDD-NEXT: movq %rax, %r26
+; EGPR-NDD-NEXT: movq %rax, %r30
; EGPR-NDD-NEXT: movq %r18, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; EGPR-NDD-NEXT: movq %r18, %rax
-; EGPR-NDD-NEXT: mulq %r24
+; EGPR-NDD-NEXT: mulq %r26
; EGPR-NDD-NEXT: addq %rax, %r19
-; EGPR-NDD-NEXT: adcq $0, %rdx, %r29
-; EGPR-NDD-NEXT: movq %r23, %rax
+; EGPR-NDD-NEXT: adcq $0, %rdx, %r21
+; EGPR-NDD-NEXT: movq %r25, %rax
; EGPR-NDD-NEXT: mulq %r14
; EGPR-NDD-NEXT: addq %rax, %r19
-; EGPR-NDD-NEXT: adcq %rdx, %r29
+; EGPR-NDD-NEXT: adcq %rdx, %r21
; EGPR-NDD-NEXT: setb %al
-; EGPR-NDD-NEXT: movzbl %al, %r30d
+; EGPR-NDD-NEXT: movzbl %al, %r28d
; EGPR-NDD-NEXT: movq %r18, %rax
; EGPR-NDD-NEXT: mulq %r14
-; EGPR-NDD-NEXT: addq %r29, %rax
-; EGPR-NDD-NEXT: adcq %r30, %rdx
-; EGPR-NDD-NEXT: addq %r27, %r26, %r29
-; EGPR-NDD-NEXT: adcq %r8, %r19, %r30
-; EGPR-NDD-NEXT: adcq %rax, %r31
+; EGPR-NDD-NEXT: addq %r21, %rax
+; EGPR-NDD-NEXT: adcq %r28, %rdx
+; EGPR-NDD-NEXT: addq %r31, %r30, %r21
+; EGPR-NDD-NEXT: adcq %r8, %r19, %r28
+; EGPR-NDD-NEXT: adcq %rax, %r29
; EGPR-NDD-NEXT: adcq $0, %rdx, %rdi
; EGPR-NDD-NEXT: movq %r10, %rax
-; EGPR-NDD-NEXT: mulq %r22
+; EGPR-NDD-NEXT: mulq %r24
; EGPR-NDD-NEXT: movq %rdx, %r19
-; EGPR-NDD-NEXT: movq %rax, %r26
+; EGPR-NDD-NEXT: movq %rax, %r30
; EGPR-NDD-NEXT: movq %r9, %rax
-; EGPR-NDD-NEXT: mulq %r22
+; EGPR-NDD-NEXT: mulq %r24
; EGPR-NDD-NEXT: addq %rax, %r19
; EGPR-NDD-NEXT: adcq $0, %rdx, %r8
; EGPR-NDD-NEXT: movq %r10, %rax
-; EGPR-NDD-NEXT: mulq %r21
+; EGPR-NDD-NEXT: mulq %r23
; EGPR-NDD-NEXT: addq %rax, %r19
; EGPR-NDD-NEXT: adcq %rdx, %r8
; EGPR-NDD-NEXT: setb %al
-; EGPR-NDD-NEXT: movzbl %al, %r27d
+; EGPR-NDD-NEXT: movzbl %al, %r31d
; EGPR-NDD-NEXT: movq %r9, %rax
-; EGPR-NDD-NEXT: mulq %r21
+; EGPR-NDD-NEXT: mulq %r23
; EGPR-NDD-NEXT: addq %rax, %r8
-; EGPR-NDD-NEXT: adcq %r27, %rdx, %rbx
+; EGPR-NDD-NEXT: adcq %r31, %rdx, %rbx
; EGPR-NDD-NEXT: movq %r17, %rax
-; EGPR-NDD-NEXT: mulq %r22
-; EGPR-NDD-NEXT: movq %rdx, %r27
+; EGPR-NDD-NEXT: mulq %r24
+; EGPR-NDD-NEXT: movq %rdx, %r31
; EGPR-NDD-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; EGPR-NDD-NEXT: movq %r11, %rax
-; EGPR-NDD-NEXT: mulq %r22
-; EGPR-NDD-NEXT: addq %rax, %r27
+; EGPR-NDD-NEXT: mulq %r24
+; EGPR-NDD-NEXT: addq %rax, %r31
; EGPR-NDD-NEXT: adcq $0, %rdx, %r12
; EGPR-NDD-NEXT: movq %r17, %rax
-; EGPR-NDD-NEXT: mulq %r21
-; EGPR-NDD-NEXT: addq %r27, %rax
+; EGPR-NDD-NEXT: mulq %r23
+; EGPR-NDD-NEXT: addq %r31, %rax
; EGPR-NDD-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; EGPR-NDD-NEXT: adcq %rdx, %r12
-; EGPR-NDD-NEXT: setb %r27b
+; EGPR-NDD-NEXT: setb %r31b
; EGPR-NDD-NEXT: movq %r11, %rax
-; EGPR-NDD-NEXT: mulq %r21
+; EGPR-NDD-NEXT: mulq %r23
; EGPR-NDD-NEXT: addq %r12, %rax
-; EGPR-NDD-NEXT: movzbl %r27b, %r27d
-; EGPR-NDD-NEXT: adcq %r27, %rdx
-; EGPR-NDD-NEXT: addq %rax, %r26, %r12
+; EGPR-NDD-NEXT: movzbl %r31b, %r31d
+; EGPR-NDD-NEXT: adcq %r31, %rdx
+; EGPR-NDD-NEXT: addq %rax, %r30, %r12
; EGPR-NDD-NEXT: adcq %rdx, %r19
; EGPR-NDD-NEXT: adcq $0, %r8
; EGPR-NDD-NEXT: adcq $0, %rbx
; EGPR-NDD-NEXT: movq %r17, %rax
-; EGPR-NDD-NEXT: mulq %r24
-; EGPR-NDD-NEXT: movq %rdx, %r26
-; EGPR-NDD-NEXT: movq %rax, %r27
+; EGPR-NDD-NEXT: mulq %r26
+; EGPR-NDD-NEXT: movq %rdx, %r30
+; EGPR-NDD-NEXT: movq %rax, %r31
; EGPR-NDD-NEXT: movq %r11, %rax
-; EGPR-NDD-NEXT: mulq %r24
-; EGPR-NDD-NEXT: addq %rax, %r26
+; EGPR-NDD-NEXT: mulq %r26
+; EGPR-NDD-NEXT: addq %rax, %r30
; EGPR-NDD-NEXT: adcq $0, %rdx, %r13
; EGPR-NDD-NEXT: movq %r17, %rax
; EGPR-NDD-NEXT: mulq %r14
-; EGPR-NDD-NEXT: addq %rax, %r26
+; EGPR-NDD-NEXT: addq %rax, %r30
; EGPR-NDD-NEXT: adcq %rdx, %r13
; EGPR-NDD-NEXT: setb %bpl
; EGPR-NDD-NEXT: movq %r11, %rax
@@ -1208,9 +1208,9 @@ define void @test_1024(ptr %a, ptr %b, ptr %out) nounwind {
; EGPR-NDD-NEXT: addq %r13, %rax
; EGPR-NDD-NEXT: movzbl %bpl, %r13d
; EGPR-NDD-NEXT: adcq %r13, %rdx
-; EGPR-NDD-NEXT: addq %r12, %r27
-; EGPR-NDD-NEXT: movq %r27, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; EGPR-NDD-NEXT: adcq %r26, %r19
+; EGPR-NDD-NEXT: addq %r12, %r31
+; EGPR-NDD-NEXT: movq %r31, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; EGPR-NDD-NEXT: adcq %r30, %r19
; EGPR-NDD-NEXT: movq %r19, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; EGPR-NDD-NEXT: adcq $0, %rax
; EGPR-NDD-NEXT: adcq $0, %rdx
@@ -1219,16 +1219,16 @@ define void @test_1024(ptr %a, ptr %b, ptr %out) nounwind {
; EGPR-NDD-NEXT: setb %r19b
; EGPR-NDD-NEXT: movq %r10, %r16
; EGPR-NDD-NEXT: movq %r10, %rax
-; EGPR-NDD-NEXT: mulq %r24
-; EGPR-NDD-NEXT: movq %rdx, %r26
-; EGPR-NDD-NEXT: movq %rax, %r27
+; EGPR-NDD-NEXT: mulq %r26
+; EGPR-NDD-NEXT: movq %rdx, %r30
+; EGPR-NDD-NEXT: movq %rax, %r31
; EGPR-NDD-NEXT: movq %r9, %rax
-; EGPR-NDD-NEXT: mulq %r24
-; EGPR-NDD-NEXT: addq %rax, %r26
+; EGPR-NDD-NEXT: mulq %r26
+; EGPR-NDD-NEXT: addq %rax, %r30
; EGPR-NDD-NEXT: adcq $0, %rdx, %r12
; EGPR-NDD-NEXT: movq %r10, %rax
; EGPR-NDD-NEXT: mulq %r14
-; EGPR-NDD-NEXT: addq %rax, %r26
+; EGPR-NDD-NEXT: addq %rax, %r30
; EGPR-NDD-NEXT: adcq %rdx, %r12
; EGPR-NDD-NEXT: setb %bpl
; EGPR-NDD-NEXT: movq %r9, %rax
@@ -1236,35 +1236,35 @@ define void @test_1024(ptr %a, ptr %b, ptr %out) nounwind {
; EGPR-NDD-NEXT: addq %r12, %rax
; EGPR-NDD-NEXT: movzbl %bpl, %r12d
; EGPR-NDD-NEXT: adcq %r12, %rdx
-; EGPR-NDD-NEXT: addq %r27, %r8
-; EGPR-NDD-NEXT: adcq %r26, %rbx
+; EGPR-NDD-NEXT: addq %r31, %r8
+; EGPR-NDD-NEXT: adcq %r30, %rbx
; EGPR-NDD-NEXT: movzbl %r19b, %r19d
; EGPR-NDD-NEXT: adcq %r19, %rax
; EGPR-NDD-NEXT: adcq $0, %rdx
-; EGPR-NDD-NEXT: addq %r8, %r25, %r12
-; EGPR-NDD-NEXT: movq 32(%r15), %r26
+; EGPR-NDD-NEXT: addq %r8, %r27, %r12
+; EGPR-NDD-NEXT: movq 32(%r15), %r30
; EGPR-NDD-NEXT: adcq %rbx, %rcx, %r13
; EGPR-NDD-NEXT: adcq %rax, %rsi, %rbp
-; EGPR-NDD-NEXT: adcq %rdx, %r28, %rbx
+; EGPR-NDD-NEXT: adcq %rdx, %r20, %rbx
+; EGPR-NDD-NEXT: adcq $0, %r21
+; EGPR-NDD-NEXT: movq %r21, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; EGPR-NDD-NEXT: adcq $0, %r28
; EGPR-NDD-NEXT: adcq $0, %r29
-; EGPR-NDD-NEXT: movq %r29, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; EGPR-NDD-NEXT: adcq $0, %r30
-; EGPR-NDD-NEXT: adcq $0, %r31
; EGPR-NDD-NEXT: adcq $0, %rdi
; EGPR-NDD-NEXT: movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; EGPR-NDD-NEXT: movq %r10, %rax
-; EGPR-NDD-NEXT: mulq %r26
-; EGPR-NDD-NEXT: movq %rdx, %r25
-; EGPR-NDD-NEXT: movq %rax, %r27
+; EGPR-NDD-NEXT: mulq %r30
+; EGPR-NDD-NEXT: movq %rdx, %r27
+; EGPR-NDD-NEXT: movq %rax, %r31
; EGPR-NDD-NEXT: movq %r9, %r19
; EGPR-NDD-NEXT: movq %r9, %rax
-; EGPR-NDD-NEXT: mulq %r26
-; EGPR-NDD-NEXT: addq %rax, %r25
+; EGPR-NDD-NEXT: mulq %r30
+; EGPR-NDD-NEXT: addq %rax, %r27
; EGPR-NDD-NEXT: adcq $0, %rdx, %rcx
; EGPR-NDD-NEXT: movq 40(%r15), %r18
; EGPR-NDD-NEXT: movq %r10, %rax
; EGPR-NDD-NEXT: mulq %r18
-; EGPR-NDD-NEXT: addq %r25, %rax, %r29
+; EGPR-NDD-NEXT: addq %r27, %rax, %r21
; EGPR-NDD-NEXT: adcq %rdx, %rcx
; EGPR-NDD-NEXT: setb %r8b
; EGPR-NDD-NEXT: movq %r9, %rax
@@ -1273,26 +1273,26 @@ define void @test_1024(ptr %a, ptr %b, ptr %out) nounwind {
; EGPR-NDD-NEXT: movzbl %r8b, %eax
; EGPR-NDD-NEXT: adcq %rax, %rdx, %rsi
; EGPR-NDD-NEXT: movq %r17, %rax
-; EGPR-NDD-NEXT: mulq %r26
-; EGPR-NDD-NEXT: movq %rdx, %r28
-; EGPR-NDD-NEXT: movq %rax, %r25
+; EGPR-NDD-NEXT: mulq %r30
+; EGPR-NDD-NEXT: movq %rdx, %r20
+; EGPR-NDD-NEXT: movq %rax, %r27
; EGPR-NDD-NEXT: movq %r11, %r10
; EGPR-NDD-NEXT: movq %r11, %rax
-; EGPR-NDD-NEXT: mulq %r26
-; EGPR-NDD-NEXT: addq %r28, %rax, %r8
-; EGPR-NDD-NEXT: adcq $0, %rdx, %r28
+; EGPR-NDD-NEXT: mulq %r30
+; EGPR-NDD-NEXT: addq %r20, %rax, %r8
+; EGPR-NDD-NEXT: adcq $0, %rdx, %r20
; EGPR-NDD-NEXT: movq %r17, %rax
; EGPR-NDD-NEXT: mulq %r18
-; EGPR-NDD-NEXT: addq %r8, %rax, %r23
-; EGPR-NDD-NEXT: adcq %rdx, %r28
+; EGPR-NDD-NEXT: addq %r8, %rax, %r25
+; EGPR-NDD-NEXT: adcq %rdx, %r20
; EGPR-NDD-NEXT: setb %cl
; EGPR-NDD-NEXT: movq %r11, %rax
; EGPR-NDD-NEXT: mulq %r18
-; EGPR-NDD-NEXT: addq %r28, %rax
+; EGPR-NDD-NEXT: addq %r20, %rax
; EGPR-NDD-NEXT: movzbl %cl, %ecx
; EGPR-NDD-NEXT: adcq %rdx, %rcx
-; EGPR-NDD-NEXT: addq %rax, %r27
-; EGPR-NDD-NEXT: adcq %rcx, %r29, %r8
+; EGPR-NDD-NEXT: addq %rax, %r31
+; EGPR-NDD-NEXT: adcq %rcx, %r21, %r8
; EGPR-NDD-NEXT: adcq $0, %rdi
; EGPR-NDD-NEXT: adcq $0, %rsi, %r9
; EGPR-NDD-NEXT: movq 48(%r15), %r11
@@ -1300,17 +1300,17 @@ define void @test_1024(ptr %a, ptr %b, ptr %out) nounwind {
; EGPR-NDD-NEXT: movq %r17, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; EGPR-NDD-NEXT: movq %r17, %rax
; EGPR-NDD-NEXT: mulq %r11
-; EGPR-NDD-NEXT: movq %rdx, %r28
-; EGPR-NDD-NEXT: movq %rax, %r29
+; EGPR-NDD-NEXT: movq %rdx, %r20
+; EGPR-NDD-NEXT: movq %rax, %r21
; EGPR-NDD-NEXT: movq %r10, %rax
; EGPR-NDD-NEXT: movq %r10, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; EGPR-NDD-NEXT: mulq %r11
-; EGPR-NDD-NEXT: addq %rax, %r28
+; EGPR-NDD-NEXT: addq %rax, %r20
; EGPR-NDD-NEXT: adcq $0, %rdx, %rcx
; EGPR-NDD-NEXT: movq 56(%r15), %r17
; EGPR-NDD-NEXT: movq %rsi, %rax
; EGPR-NDD-NEXT: mulq %r17
-; EGPR-NDD-NEXT: addq %rax, %r28
+; EGPR-NDD-NEXT: addq %rax, %r20
; EGPR-NDD-NEXT: adcq %rdx, %rcx
; EGPR-NDD-NEXT: setb %sil
; EGPR-NDD-NEXT: movq %r10, %rax
@@ -1318,8 +1318,8 @@ define void @test_1024(ptr %a, ptr %b, ptr %out) nounwind {
; EGPR-NDD-NEXT: addq %rcx, %rax
; EGPR-NDD-NEXT: movzbl %sil, %ecx
; EGPR-NDD-NEXT: adcq %rdx, %rcx
-; EGPR-NDD-NEXT: addq %r29, %r27
-; EGPR-NDD-NEXT: adcq %r8, %r28, %r10
+; EGPR-NDD-NEXT: addq %r21, %r31
+; EGPR-NDD-NEXT: adcq %r8, %r20, %r10
; EGPR-NDD-NEXT: adcq $0, %rax
; EGPR-NDD-NEXT: adcq $0, %rcx
; EGPR-NDD-NEXT: addq %rax, %rdi
@@ -1328,16 +1328,16 @@ define void @test_1024(ptr %a, ptr %b, ptr %out) nounwind {
; EGPR-NDD-NEXT: movq %r16, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; EGPR-NDD-NEXT: movq %r16, %rax
; EGPR-NDD-NEXT: mulq %r11
-; EGPR-NDD-NEXT: movq %rdx, %r28
-; EGPR-NDD-NEXT: movq %rax, %r29
+; EGPR-NDD-NEXT: movq %rdx, %r20
+; EGPR-NDD-NEXT: movq %rax, %r21
; EGPR-NDD-NEXT: movq %r19, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; EGPR-NDD-NEXT: movq %r19, %rax
; EGPR-NDD-NEXT: mulq %r11
-; EGPR-NDD-NEXT: addq %rax, %r28
+; EGPR-NDD-NEXT: addq %rax, %r20
; EGPR-NDD-NEXT: adcq $0, %rdx, %r9
; EGPR-NDD-NEXT: movq %r16, %rax
; EGPR-NDD-NEXT: mulq %r17
-; EGPR-NDD-NEXT: addq %rax, %r28
+; EGPR-NDD-NEXT: addq %rax, %r20
; EGPR-NDD-NEXT: adcq %rdx, %r9
; EGPR-NDD-NEXT: setb %cl
; EGPR-NDD-NEXT: movq %r19, %rax
@@ -1345,17 +1345,17 @@ define void @test_1024(ptr %a, ptr %b, ptr %out) nounwind {
; EGPR-NDD-NEXT: addq %r9, %rax
; EGPR-NDD-NEXT: movzbl %cl, %ecx
; EGPR-NDD-NEXT: adcq %rdx, %rcx
-; EGPR-NDD-NEXT: addq %r29, %rdi
-; EGPR-NDD-NEXT: adcq %r28, %r8
+; EGPR-NDD-NEXT: addq %r21, %rdi
+; EGPR-NDD-NEXT: adcq %r20, %r8
; EGPR-NDD-NEXT: movzbl %sil, %edx
; EGPR-NDD-NEXT: adcq %rdx, %rax
; EGPR-NDD-NEXT: adcq $0, %rcx
-; EGPR-NDD-NEXT: addq %r12, %r25
-; EGPR-NDD-NEXT: movq %r25, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; EGPR-NDD-NEXT: adcq %r13, %r23, %r19
-; EGPR-NDD-NEXT: movq %r19, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; EGPR-NDD-NEXT: adcq %rbp, %r27
+; EGPR-NDD-NEXT: addq %r12, %r27
; EGPR-NDD-NEXT: movq %r27, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; EGPR-NDD-NEXT: adcq %r13, %r25, %r19
+; EGPR-NDD-NEXT: movq %r19, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; EGPR-NDD-NEXT: adcq %rbp, %r31
+; EGPR-NDD-NEXT: movq %r31, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; EGPR-NDD-NEXT: adcq %rbx, %r10
; EGPR-NDD-NEXT: movq %r10, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; EGPR-NDD-NEXT: adcq $0, %rdi
@@ -1363,23 +1363,23 @@ define void @test_1024(ptr %a, ptr %b, ptr %out) nounwind {
; EGPR-NDD-NEXT: adcq $0, %rax
; EGPR-NDD-NEXT: adcq $0, %rcx
; EGPR-NDD-NEXT: addq %rdi, {{[-0-9]+}}(%r{{[sb]}}p), %r19 # 8-byte Folded Reload
-; EGPR-NDD-NEXT: adcq %r8, %r30
-; EGPR-NDD-NEXT: adcq %rax, %r31
+; EGPR-NDD-NEXT: adcq %r8, %r28
+; EGPR-NDD-NEXT: adcq %rax, %r29
; EGPR-NDD-NEXT: adcq %rcx, {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Folded Reload
; EGPR-NDD-NEXT: setb %r8b
; EGPR-NDD-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r13 # 8-byte Reload
; EGPR-NDD-NEXT: movq %r13, %rax
-; EGPR-NDD-NEXT: mulq %r26
-; EGPR-NDD-NEXT: movq %rdx, %r25
-; EGPR-NDD-NEXT: movq %rax, %r28
+; EGPR-NDD-NEXT: mulq %r30
+; EGPR-NDD-NEXT: movq %rdx, %r27
+; EGPR-NDD-NEXT: movq %rax, %r20
; EGPR-NDD-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r10 # 8-byte Reload
; EGPR-NDD-NEXT: movq %r10, %rax
-; EGPR-NDD-NEXT: mulq %r26
-; EGPR-NDD-NEXT: addq %rax, %r25
+; EGPR-NDD-NEXT: mulq %r30
+; EGPR-NDD-NEXT: addq %rax, %r27
; EGPR-NDD-NEXT: adcq $0, %rdx, %rsi
; EGPR-NDD-NEXT: movq %r13, %rax
; EGPR-NDD-NEXT: mulq %r18
-; EGPR-NDD-NEXT: addq %r25, %rax, %rdi
+; EGPR-NDD-NEXT: addq %r27, %rax, %rdi
; EGPR-NDD-NEXT: adcq %rdx, %rsi
; EGPR-NDD-NEXT: setb %r9b
; EGPR-NDD-NEXT: movq %r10, %rax
@@ -1388,66 +1388,66 @@ define void @test_1024(ptr %a, ptr %b, ptr %out) nounwind {
; EGPR-NDD-NEXT: addq %rax, %rsi
; EGPR-NDD-NEXT: movzbl %r9b, %eax
; EGPR-NDD-NEXT: adcq %rax, %rdx, %r9
-; EGPR-NDD-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r23 # 8-byte Reload
-; EGPR-NDD-NEXT: movq %r23, %rax
-; EGPR-NDD-NEXT: mulq %r26
-; EGPR-NDD-NEXT: movq %rdx, %r29
-; EGPR-NDD-NEXT: movq %rax, %r25
+; EGPR-NDD-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r25 # 8-byte Reload
+; EGPR-NDD-NEXT: movq %r25, %rax
+; EGPR-NDD-NEXT: mulq %r30
+; EGPR-NDD-NEXT: movq %rdx, %r21
+; EGPR-NDD-NEXT: movq %rax, %r27
; EGPR-NDD-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r12 # 8-byte Reload
; EGPR-NDD-NEXT: movq %r12, %rax
-; EGPR-NDD-NEXT: mulq %r26
-; EGPR-NDD-NEXT: addq %rax, %r29
+; EGPR-NDD-NEXT: mulq %r30
+; EGPR-NDD-NEXT: addq %rax, %r21
; EGPR-NDD-NEXT: adcq $0, %rdx, %r10
-; EGPR-NDD-NEXT: movq %r23, %rax
+; EGPR-NDD-NEXT: movq %r25, %rax
; EGPR-NDD-NEXT: mulq %r18
-; EGPR-NDD-NEXT: addq %r29, %rax, %rbx
+; EGPR-NDD-NEXT: addq %r21, %rax, %rbx
; EGPR-NDD-NEXT: adcq %rdx, %r10
-; EGPR-NDD-NEXT: setb %r27b
+; EGPR-NDD-NEXT: setb %r31b
; EGPR-NDD-NEXT: movq %r12, %rax
; EGPR-NDD-NEXT: mulq %r18
; EGPR-NDD-NEXT: addq %r10, %rax
-; EGPR-NDD-NEXT: movzbl %r27b, %r10d
+; EGPR-NDD-NEXT: movzbl %r31b, %r10d
; EGPR-NDD-NEXT: adcq %r10, %rdx
-; EGPR-NDD-NEXT: addq %rax, %r28, %r10
+; EGPR-NDD-NEXT: addq %rax, %r20, %r10
; EGPR-NDD-NEXT: adcq %rdx, %rdi
; EGPR-NDD-NEXT: adcq $0, %rsi
; EGPR-NDD-NEXT: adcq $0, %r9
-; EGPR-NDD-NEXT: movq %r23, %rax
+; EGPR-NDD-NEXT: movq %r25, %rax
; EGPR-NDD-NEXT: mulq %r11
-; EGPR-NDD-NEXT: movq %rdx, %r28
-; EGPR-NDD-NEXT: movq %rax, %r29
+; EGPR-NDD-NEXT: movq %rdx, %r20
+; EGPR-NDD-NEXT: movq %rax, %r21
; EGPR-NDD-NEXT: movq %r12, %rax
; EGPR-NDD-NEXT: mulq %r11
-; EGPR-NDD-NEXT: addq %rax, %r28
-; EGPR-NDD-NEXT: adcq $0, %rdx, %r27
-; EGPR-NDD-NEXT: movq %r23, %rax
+; EGPR-NDD-NEXT: addq %rax, %r20
+; EGPR-NDD-NEXT: adcq $0, %rdx, %r31
+; EGPR-NDD-NEXT: movq %r25, %rax
; EGPR-NDD-NEXT: mulq %r17
-; EGPR-NDD-NEXT: addq %rax, %r28
-; EGPR-NDD-NEXT: adcq %rdx, %r27
+; EGPR-NDD-NEXT: addq %rax, %r20
+; EGPR-NDD-NEXT: adcq %rdx, %r31
; EGPR-NDD-NEXT: setb %bpl
; EGPR-NDD-NEXT: movq %r12, %rax
; EGPR-NDD-NEXT: mulq %r17
-; EGPR-NDD-NEXT: addq %r27, %rax
-; EGPR-NDD-NEXT: movzbl %bpl, %r27d
-; EGPR-NDD-NEXT: adcq %r27, %rdx
-; EGPR-NDD-NEXT: addq %r29, %r10
-; EGPR-NDD-NEXT: adcq %r28, %rdi
+; EGPR-NDD-NEXT: addq %r31, %rax
+; EGPR-NDD-NEXT: movzbl %bpl, %r31d
+; EGPR-NDD-NEXT: adcq %r31, %rdx
+; EGPR-NDD-NEXT: addq %r21, %r10
+; EGPR-NDD-NEXT: adcq %r20, %rdi
; EGPR-NDD-NEXT: adcq $0, %rax
; EGPR-NDD-NEXT: adcq $0, %rdx
; EGPR-NDD-NEXT: addq %rax, %rsi
; EGPR-NDD-NEXT: adcq %rdx, %r9
-; EGPR-NDD-NEXT: setb %r27b
+; EGPR-NDD-NEXT: setb %r31b
; EGPR-NDD-NEXT: movq %r13, %rax
; EGPR-NDD-NEXT: mulq %r11
-; EGPR-NDD-NEXT: movq %rdx, %r28
-; EGPR-NDD-NEXT: movq %rax, %r29
+; EGPR-NDD-NEXT: movq %rdx, %r20
+; EGPR-NDD-NEXT: movq %rax, %r21
; EGPR-NDD-NEXT: movq %r16, %rax
; EGPR-NDD-NEXT: mulq %r11
-; EGPR-NDD-NEXT: addq %rax, %r28
+; EGPR-NDD-NEXT: addq %rax, %r20
; EGPR-NDD-NEXT: adcq $0, %rdx, %r12
; EGPR-NDD-NEXT: movq %r13, %rax
; EGPR-NDD-NEXT: mulq %r17
-; EGPR-NDD-NEXT: addq %rax, %r28
+; EGPR-NDD-NEXT: addq %rax, %r20
; EGPR-NDD-NEXT: adcq %rdx, %r12
; EGPR-NDD-NEXT: setb %bpl
; EGPR-NDD-NEXT: movq %r16, %rax
@@ -1455,16 +1455,16 @@ define void @test_1024(ptr %a, ptr %b, ptr %out) nounwind {
; EGPR-NDD-NEXT: addq %r12, %rax
; EGPR-NDD-NEXT: movzbl %bpl, %r12d
; EGPR-NDD-NEXT: adcq %r12, %rdx
-; EGPR-NDD-NEXT: addq %r29, %rsi
-; EGPR-NDD-NEXT: adcq %r28, %r9
-; EGPR-NDD-NEXT: movzbl %r27b, %r27d
-; EGPR-NDD-NEXT: adcq %r27, %rax
+; EGPR-NDD-NEXT: addq %r21, %rsi
+; EGPR-NDD-NEXT: adcq %r20, %r9
+; EGPR-NDD-NEXT: movzbl %r31b, %r31d
+; EGPR-NDD-NEXT: adcq %r31, %rax
; EGPR-NDD-NEXT: adcq $0, %rdx
-; EGPR-NDD-NEXT: addq %r25, %r19
+; EGPR-NDD-NEXT: addq %r27, %r19
; EGPR-NDD-NEXT: movq %r19, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; EGPR-NDD-NEXT: adcq %rbx, %r30
-; EGPR-NDD-NEXT: movq %r30, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; EGPR-NDD-NEXT: adcq %r31, %r10
+; EGPR-NDD-NEXT: adcq %rbx, %r28
+; EGPR-NDD-NEXT: movq %r28, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; EGPR-NDD-NEXT: adcq %r29, %r10
; EGPR-NDD-NEXT: movq %r10, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; EGPR-NDD-NEXT: adcq %rdi, %rcx
; EGPR-NDD-NEXT: movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
@@ -1477,88 +1477,88 @@ define void @test_1024(ptr %a, ptr %b, ptr %out) nounwind {
; EGPR-NDD-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; EGPR-NDD-NEXT: adcq $0, %rdx
; EGPR-NDD-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; EGPR-NDD-NEXT: movq 64(%r20), %r28
-; EGPR-NDD-NEXT: movq %r24, %rax
-; EGPR-NDD-NEXT: mulq %r28
-; EGPR-NDD-NEXT: movq %rdx, %r25
-; EGPR-NDD-NEXT: movq %rax, %r30
+; EGPR-NDD-NEXT: movq 64(%r22), %r20
+; EGPR-NDD-NEXT: movq %r26, %rax
+; EGPR-NDD-NEXT: mulq %r20
+; EGPR-NDD-NEXT: movq %rdx, %r27
+; EGPR-NDD-NEXT: movq %rax, %r28
; EGPR-NDD-NEXT: movq %r14, %rax
-; EGPR-NDD-NEXT: mulq %r28
-; EGPR-NDD-NEXT: addq %rax, %r25
+; EGPR-NDD-NEXT: mulq %r20
+; EGPR-NDD-NEXT: addq %rax, %r27
; EGPR-NDD-NEXT: adcq $0, %rdx, %rcx
-; EGPR-NDD-NEXT: movq 72(%r20), %r29
-; EGPR-NDD-NEXT: movq %r24, %rax
-; EGPR-NDD-NEXT: mulq %r29
-; EGPR-NDD-NEXT: addq %rax, %r25
+; EGPR-NDD-NEXT: movq 72(%r22), %r21
+; EGPR-NDD-NEXT: movq %r26, %rax
+; EGPR-NDD-NEXT: mulq %r21
+; EGPR-NDD-NEXT: addq %rax, %r27
; EGPR-NDD-NEXT: adcq %rdx, %rcx
; EGPR-NDD-NEXT: setb %sil
; EGPR-NDD-NEXT: movq %r14, %rax
-; EGPR-NDD-NEXT: mulq %r29
+; EGPR-NDD-NEXT: mulq %r21
; EGPR-NDD-NEXT: addq %rax, %rcx
; EGPR-NDD-NEXT: movzbl %sil, %eax
; EGPR-NDD-NEXT: adcq %rax, %rdx, %rsi
-; EGPR-NDD-NEXT: movq %r22, %rax
-; EGPR-NDD-NEXT: mulq %r28
-; EGPR-NDD-NEXT: movq %rdx, %r31
+; EGPR-NDD-NEXT: movq %r24, %rax
+; EGPR-NDD-NEXT: mulq %r20
+; EGPR-NDD-NEXT: movq %rdx, %r29
; EGPR-NDD-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; EGPR-NDD-NEXT: movq %r21, %rax
-; EGPR-NDD-NEXT: mulq %r28
-; EGPR-NDD-NEXT: addq %rax, %r31
+; EGPR-NDD-NEXT: movq %r23, %rax
+; EGPR-NDD-NEXT: mulq %r20
+; EGPR-NDD-NEXT: addq %rax, %r29
; EGPR-NDD-NEXT: adcq $0, %rdx, %rdi
-; EGPR-NDD-NEXT: movq %r22, %rax
-; EGPR-NDD-NEXT: mulq %r29
-; EGPR-NDD-NEXT: addq %r31, %rax
+; EGPR-NDD-NEXT: movq %r24, %rax
+; EGPR-NDD-NEXT: mulq %r21
+; EGPR-NDD-NEXT: addq %r29, %rax
; EGPR-NDD-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; EGPR-NDD-NEXT: adcq %rdx, %rdi
; EGPR-NDD-NEXT: setb %r8b
-; EGPR-NDD-NEXT: movq %r21, %rax
-; EGPR-NDD-NEXT: mulq %r29
+; EGPR-NDD-NEXT: movq %r23, %rax
+; EGPR-NDD-NEXT: mulq %r21
; EGPR-NDD-NEXT: addq %rdi, %rax
; EGPR-NDD-NEXT: movzbl %r8b, %edi
; EGPR-NDD-NEXT: adcq %rdi, %rdx
-; EGPR-NDD-NEXT: addq %rax, %r30, %rdi
-; EGPR-NDD-NEXT: adcq %rdx, %r25
+; EGPR-NDD-NEXT: addq %rax, %r28, %rdi
+; EGPR-NDD-NEXT: adcq %rdx, %r27
; EGPR-NDD-NEXT: adcq $0, %rcx
; EGPR-NDD-NEXT: adcq $0, %rsi
-; EGPR-NDD-NEXT: movq 80(%r20), %r8
-; EGPR-NDD-NEXT: movq %r22, %rax
+; EGPR-NDD-NEXT: movq 80(%r22), %r8
+; EGPR-NDD-NEXT: movq %r24, %rax
; EGPR-NDD-NEXT: mulq %r8
-; EGPR-NDD-NEXT: movq %rdx, %r30
-; EGPR-NDD-NEXT: movq %rax, %r31
-; EGPR-NDD-NEXT: movq %r21, %rax
+; EGPR-NDD-NEXT: movq %rdx, %r28
+; EGPR-NDD-NEXT: movq %rax, %r29
+; EGPR-NDD-NEXT: movq %r23, %rax
; EGPR-NDD-NEXT: mulq %r8
-; EGPR-NDD-NEXT: addq %rax, %r30
+; EGPR-NDD-NEXT: addq %rax, %r28
; EGPR-NDD-NEXT: adcq $0, %rdx, %r9
-; EGPR-NDD-NEXT: movq 88(%r20), %rbx
-; EGPR-NDD-NEXT: movq %r22, %rax
+; EGPR-NDD-NEXT: movq 88(%r22), %rbx
+; EGPR-NDD-NEXT: movq %r24, %rax
; EGPR-NDD-NEXT: mulq %rbx
-; EGPR-NDD-NEXT: addq %rax, %r30
+; EGPR-NDD-NEXT: addq %rax, %r28
; EGPR-NDD-NEXT: adcq %rdx, %r9
; EGPR-NDD-NEXT: setb %r10b
-; EGPR-NDD-NEXT: movq %r21, %rax
+; EGPR-NDD-NEXT: movq %r23, %rax
; EGPR-NDD-NEXT: mulq %rbx
; EGPR-NDD-NEXT: addq %r9, %rax
; EGPR-NDD-NEXT: movzbl %r10b, %r9d
; EGPR-NDD-NEXT: adcq %r9, %rdx
-; EGPR-NDD-NEXT: addq %r31, %rdi
+; EGPR-NDD-NEXT: addq %r29, %rdi
; EGPR-NDD-NEXT: movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; EGPR-NDD-NEXT: adcq %r25, %r30, %rbp
+; EGPR-NDD-NEXT: adcq %r27, %r28, %rbp
; EGPR-NDD-NEXT: adcq $0, %rax
; EGPR-NDD-NEXT: adcq $0, %rdx
; EGPR-NDD-NEXT: addq %rax, %rcx
; EGPR-NDD-NEXT: adcq %rdx, %rsi
; EGPR-NDD-NEXT: setb %dil
-; EGPR-NDD-NEXT: movq %r24, %rax
+; EGPR-NDD-NEXT: movq %r26, %rax
; EGPR-NDD-NEXT: mulq %r8
-; EGPR-NDD-NEXT: movq %rdx, %r30
-; EGPR-NDD-NEXT: movq %rax, %r31
+; EGPR-NDD-NEXT: movq %rdx, %r28
+; EGPR-NDD-NEXT: movq %rax, %r29
; EGPR-NDD-NEXT: movq %r14, %rax
; EGPR-NDD-NEXT: mulq %r8
-; EGPR-NDD-NEXT: addq %rax, %r30
+; EGPR-NDD-NEXT: addq %rax, %r28
; EGPR-NDD-NEXT: adcq $0, %rdx, %r9
-; EGPR-NDD-NEXT: movq %r24, %rax
+; EGPR-NDD-NEXT: movq %r26, %rax
; EGPR-NDD-NEXT: mulq %rbx
-; EGPR-NDD-NEXT: addq %rax, %r30
+; EGPR-NDD-NEXT: addq %rax, %r28
; EGPR-NDD-NEXT: adcq %rdx, %r9
; EGPR-NDD-NEXT: setb %r10b
; EGPR-NDD-NEXT: movq %r14, %rax
@@ -1566,191 +1566,191 @@ define void @test_1024(ptr %a, ptr %b, ptr %out) nounwind {
; EGPR-NDD-NEXT: addq %r9, %rax
; EGPR-NDD-NEXT: movzbl %r10b, %r9d
; EGPR-NDD-NEXT: adcq %r9, %rdx
-; EGPR-NDD-NEXT: addq %rcx, %r31, %r25
-; EGPR-NDD-NEXT: adcq %rsi, %r30, %r12
+; EGPR-NDD-NEXT: addq %rcx, %r29, %r27
+; EGPR-NDD-NEXT: adcq %rsi, %r28, %r12
; EGPR-NDD-NEXT: movzbl %dil, %r19d
; EGPR-NDD-NEXT: adcq %rax, %r19
-; EGPR-NDD-NEXT: adcq $0, %rdx, %r31
-; EGPR-NDD-NEXT: imulq %r26, %rbx
-; EGPR-NDD-NEXT: movq %r26, %rax
+; EGPR-NDD-NEXT: adcq $0, %rdx, %r29
+; EGPR-NDD-NEXT: imulq %r30, %rbx
+; EGPR-NDD-NEXT: movq %r30, %rax
; EGPR-NDD-NEXT: mulq %r8
-; EGPR-NDD-NEXT: movq %rax, %r30
+; EGPR-NDD-NEXT: movq %rax, %r28
; EGPR-NDD-NEXT: addq %rbx, %rdx
; EGPR-NDD-NEXT: imulq %r18, %r8
; EGPR-NDD-NEXT: addq %rdx, %r8
-; EGPR-NDD-NEXT: imulq %r29, %r11, %rcx
+; EGPR-NDD-NEXT: imulq %r21, %r11, %rcx
; EGPR-NDD-NEXT: movq %r11, %rax
-; EGPR-NDD-NEXT: mulq %r28
+; EGPR-NDD-NEXT: mulq %r20
; EGPR-NDD-NEXT: addq %rdx, %rcx
-; EGPR-NDD-NEXT: imulq %r28, %r17, %r16
+; EGPR-NDD-NEXT: imulq %r20, %r17, %r16
; EGPR-NDD-NEXT: addq %r16, %rcx
-; EGPR-NDD-NEXT: addq %r30, %rax, %rsi
+; EGPR-NDD-NEXT: addq %r28, %rax, %rsi
; EGPR-NDD-NEXT: adcq %rcx, %r8
-; EGPR-NDD-NEXT: movq %r28, %rax
-; EGPR-NDD-NEXT: mulq %r26
-; EGPR-NDD-NEXT: movq %rdx, %r30
-; EGPR-NDD-NEXT: movq %rax, %r27
-; EGPR-NDD-NEXT: movq %r29, %rax
-; EGPR-NDD-NEXT: mulq %r26
-; EGPR-NDD-NEXT: addq %r30, %rax, %rcx
+; EGPR-NDD-NEXT: movq %r20, %rax
+; EGPR-NDD-NEXT: mulq %r30
+; EGPR-NDD-NEXT: movq %rdx, %r28
+; EGPR-NDD-NEXT: movq %rax, %r31
+; EGPR-NDD-NEXT: movq %r21, %rax
+; EGPR-NDD-NEXT: mulq %r30
+; EGPR-NDD-NEXT: addq %r28, %rax, %rcx
; EGPR-NDD-NEXT: adcq $0, %rdx, %rdi
-; EGPR-NDD-NEXT: movq %r28, %rax
+; EGPR-NDD-NEXT: movq %r20, %rax
; EGPR-NDD-NEXT: mulq %r18
; EGPR-NDD-NEXT: addq %rax, %rcx
; EGPR-NDD-NEXT: adcq %rdx, %rdi
; EGPR-NDD-NEXT: setb %r9b
-; EGPR-NDD-NEXT: movq %r29, %rax
+; EGPR-NDD-NEXT: movq %r21, %rax
; EGPR-NDD-NEXT: mulq %r18
; EGPR-NDD-NEXT: addq %rdi, %rax
; EGPR-NDD-NEXT: movzbl %r9b, %edi
; EGPR-NDD-NEXT: adcq %rdi, %rdx
; EGPR-NDD-NEXT: addq %rax, %rsi
; EGPR-NDD-NEXT: adcq %rdx, %r8
-; EGPR-NDD-NEXT: movq 112(%r20), %rdi
-; EGPR-NDD-NEXT: movq %r22, %rax
+; EGPR-NDD-NEXT: movq 112(%r22), %rdi
+; EGPR-NDD-NEXT: movq %r24, %rax
; EGPR-NDD-NEXT: mulq %rdi
-; EGPR-NDD-NEXT: movq %rax, %r26
-; EGPR-NDD-NEXT: imulq %r21, %rdi
+; EGPR-NDD-NEXT: movq %rax, %r30
+; EGPR-NDD-NEXT: imulq %r23, %rdi
; EGPR-NDD-NEXT: addq %rdi, %rdx
-; EGPR-NDD-NEXT: imulq 120(%r20), %r22, %rax
+; EGPR-NDD-NEXT: imulq 120(%r22), %r24, %rax
; EGPR-NDD-NEXT: addq %rax, %rdx, %r9
-; EGPR-NDD-NEXT: movq 96(%r20), %r28
-; EGPR-NDD-NEXT: movq 104(%r20), %rdi
-; EGPR-NDD-NEXT: imulq %rdi, %r24, %r10
-; EGPR-NDD-NEXT: movq %r24, %rax
-; EGPR-NDD-NEXT: mulq %r28
+; EGPR-NDD-NEXT: movq 96(%r22), %r20
+; EGPR-NDD-NEXT: movq 104(%r22), %rdi
+; EGPR-NDD-NEXT: imulq %rdi, %r26, %r10
+; EGPR-NDD-NEXT: movq %r26, %rax
+; EGPR-NDD-NEXT: mulq %r20
; EGPR-NDD-NEXT: addq %r10, %rdx
-; EGPR-NDD-NEXT: imulq %r28, %r14, %r23
-; EGPR-NDD-NEXT: addq %r23, %rdx
-; EGPR-NDD-NEXT: addq %rax, %r26
+; EGPR-NDD-NEXT: imulq %r20, %r14, %r25
+; EGPR-NDD-NEXT: addq %r25, %rdx
+; EGPR-NDD-NEXT: addq %rax, %r30
; EGPR-NDD-NEXT: adcq %rdx, %r9
-; EGPR-NDD-NEXT: movq %r28, %rax
-; EGPR-NDD-NEXT: mulq %r22
-; EGPR-NDD-NEXT: movq %rdx, %r23
-; EGPR-NDD-NEXT: movq %rax, %r24
+; EGPR-NDD-NEXT: movq %r20, %rax
+; EGPR-NDD-NEXT: mulq %r24
+; EGPR-NDD-NEXT: movq %rdx, %r25
+; EGPR-NDD-NEXT: movq %rax, %r26
; EGPR-NDD-NEXT: movq %rdi, %rax
-; EGPR-NDD-NEXT: mulq %r22
-; EGPR-NDD-NEXT: addq %rax, %r23
+; EGPR-NDD-NEXT: mulq %r24
+; EGPR-NDD-NEXT: addq %rax, %r25
; EGPR-NDD-NEXT: adcq $0, %rdx, %r10
-; EGPR-NDD-NEXT: movq %r28, %rax
-; EGPR-NDD-NEXT: mulq %r21
-; EGPR-NDD-NEXT: addq %rax, %r23
+; EGPR-NDD-NEXT: movq %r20, %rax
+; EGPR-NDD-NEXT: mulq %r23
+; EGPR-NDD-NEXT: addq %rax, %r25
; EGPR-NDD-NEXT: adcq %rdx, %r10
; EGPR-NDD-NEXT: setb %r11b
; EGPR-NDD-NEXT: movq %rdi, %rax
-; EGPR-NDD-NEXT: mulq %r21
+; EGPR-NDD-NEXT: mulq %r23
; EGPR-NDD-NEXT: addq %r10, %rax
; EGPR-NDD-NEXT: movzbl %r11b, %edi
; EGPR-NDD-NEXT: adcq %rdi, %rdx
-; EGPR-NDD-NEXT: addq %r26, %rax
+; EGPR-NDD-NEXT: addq %r30, %rax
; EGPR-NDD-NEXT: adcq %r9, %rdx
-; EGPR-NDD-NEXT: addq %r27, %r24
-; EGPR-NDD-NEXT: adcq %r23, %rcx
+; EGPR-NDD-NEXT: addq %r31, %r26
+; EGPR-NDD-NEXT: adcq %r25, %rcx
; EGPR-NDD-NEXT: adcq %rsi, %rax
; EGPR-NDD-NEXT: adcq %r8, %rdx
-; EGPR-NDD-NEXT: addq %r24, %r25, %rbx
+; EGPR-NDD-NEXT: addq %r26, %r27, %rbx
; EGPR-NDD-NEXT: adcq %rcx, %r12
; EGPR-NDD-NEXT: adcq %rax, %r19, %r13
-; EGPR-NDD-NEXT: adcq %rdx, %r31, %r30
-; EGPR-NDD-NEXT: movq 80(%r15), %r22
-; EGPR-NDD-NEXT: movq %r22, %rax
+; EGPR-NDD-NEXT: adcq %rdx, %r29, %r28
+; EGPR-NDD-NEXT: movq 80(%r15), %r24
+; EGPR-NDD-NEXT: movq %r24, %rax
; EGPR-NDD-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r16 # 8-byte Reload
; EGPR-NDD-NEXT: mulq %r16
-; EGPR-NDD-NEXT: movq %rax, %r26
+; EGPR-NDD-NEXT: movq %rax, %r30
; EGPR-NDD-NEXT: movq %rdx, %rdi
-; EGPR-NDD-NEXT: movq 88(%r15), %r20
-; EGPR-NDD-NEXT: movq %r20, %rax
+; EGPR-NDD-NEXT: movq 88(%r15), %r22
+; EGPR-NDD-NEXT: movq %r22, %rax
; EGPR-NDD-NEXT: mulq %r16
; EGPR-NDD-NEXT: addq %rax, %rdi
; EGPR-NDD-NEXT: adcq $0, %rdx, %rcx
-; EGPR-NDD-NEXT: movq %r22, %rax
-; EGPR-NDD-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r21 # 8-byte Reload
-; EGPR-NDD-NEXT: mulq %r21
+; EGPR-NDD-NEXT: movq %r24, %rax
+; EGPR-NDD-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r23 # 8-byte Reload
+; EGPR-NDD-NEXT: mulq %r23
; EGPR-NDD-NEXT: addq %rax, %rdi
; EGPR-NDD-NEXT: adcq %rdx, %rcx
; EGPR-NDD-NEXT: setb %sil
-; EGPR-NDD-NEXT: movq %r20, %rax
-; EGPR-NDD-NEXT: mulq %r21
+; EGPR-NDD-NEXT: movq %r22, %rax
+; EGPR-NDD-NEXT: mulq %r23
; EGPR-NDD-NEXT: addq %rax, %rcx
; EGPR-NDD-NEXT: movzbl %sil, %eax
; EGPR-NDD-NEXT: adcq %rax, %rdx, %rsi
-; EGPR-NDD-NEXT: movq 64(%r15), %r24
-; EGPR-NDD-NEXT: movq %r24, %rax
+; EGPR-NDD-NEXT: movq 64(%r15), %r26
+; EGPR-NDD-NEXT: movq %r26, %rax
; EGPR-NDD-NEXT: mulq %r16
-; EGPR-NDD-NEXT: movq %rax, %r29
-; EGPR-NDD-NEXT: movq %rdx, %r27
-; EGPR-NDD-NEXT: movq 72(%r15), %r23
-; EGPR-NDD-NEXT: movq %r23, %rax
+; EGPR-NDD-NEXT: movq %rax, %r21
+; EGPR-NDD-NEXT: movq %rdx, %r31
+; EGPR-NDD-NEXT: movq 72(%r15), %r25
+; EGPR-NDD-NEXT: movq %r25, %rax
; EGPR-NDD-NEXT: mulq %r16
-; EGPR-NDD-NEXT: addq %rax, %r27
+; EGPR-NDD-NEXT: addq %rax, %r31
; EGPR-NDD-NEXT: adcq $0, %rdx, %r8
-; EGPR-NDD-NEXT: movq %r24, %rax
-; EGPR-NDD-NEXT: mulq %r21
-; EGPR-NDD-NEXT: addq %r27, %rax, %r31
+; EGPR-NDD-NEXT: movq %r26, %rax
+; EGPR-NDD-NEXT: mulq %r23
+; EGPR-NDD-NEXT: addq %r31, %rax, %r29
; EGPR-NDD-NEXT: adcq %rdx, %r8
; EGPR-NDD-NEXT: setb %r9b
-; EGPR-NDD-NEXT: movq %r23, %rax
-; EGPR-NDD-NEXT: mulq %r21
+; EGPR-NDD-NEXT: movq %r25, %rax
+; EGPR-NDD-NEXT: mulq %r23
; EGPR-NDD-NEXT: addq %r8, %rax
; EGPR-NDD-NEXT: movzbl %r9b, %r8d
; EGPR-NDD-NEXT: adcq %r8, %rdx
-; EGPR-NDD-NEXT: addq %rax, %r26, %r28
+; EGPR-NDD-NEXT: addq %rax, %r30, %r20
; EGPR-NDD-NEXT: adcq %rdx, %rdi
; EGPR-NDD-NEXT: adcq $0, %rcx
; EGPR-NDD-NEXT: adcq $0, %rsi
-; EGPR-NDD-NEXT: movq %r24, %rax
+; EGPR-NDD-NEXT: movq %r26, %rax
; EGPR-NDD-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r10 # 8-byte Reload
; EGPR-NDD-NEXT: mulq %r10
-; EGPR-NDD-NEXT: movq %rdx, %r26
-; EGPR-NDD-NEXT: movq %rax, %r27
-; EGPR-NDD-NEXT: movq %r23, %rax
+; EGPR-NDD-NEXT: movq %rdx, %r30
+; EGPR-NDD-NEXT: movq %rax, %r31
+; EGPR-NDD-NEXT: movq %r25, %rax
; EGPR-NDD-NEXT: mulq %r10
-; EGPR-NDD-NEXT: addq %rax, %r26
+; EGPR-NDD-NEXT: addq %rax, %r30
; EGPR-NDD-NEXT: adcq $0, %rdx, %r8
-; EGPR-NDD-NEXT: movq %r24, %rax
+; EGPR-NDD-NEXT: movq %r26, %rax
; EGPR-NDD-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r11 # 8-byte Reload
; EGPR-NDD-NEXT: mulq %r11
-; EGPR-NDD-NEXT: addq %r26, %rax, %r25
+; EGPR-NDD-NEXT: addq %r30, %rax, %r27
; EGPR-NDD-NEXT: adcq %rdx, %r8
; EGPR-NDD-NEXT: setb %r9b
-; EGPR-NDD-NEXT: movq %r23, %rax
+; EGPR-NDD-NEXT: movq %r25, %rax
; EGPR-NDD-NEXT: mulq %r11
; EGPR-NDD-NEXT: addq %r8, %rax
; EGPR-NDD-NEXT: movzbl %r9b, %r8d
; EGPR-NDD-NEXT: adcq %r8, %rdx
-; EGPR-NDD-NEXT: addq %r27, %r28
-; EGPR-NDD-NEXT: adcq %rdi, %r25
+; EGPR-NDD-NEXT: addq %r31, %r20
+; EGPR-NDD-NEXT: adcq %rdi, %r27
; EGPR-NDD-NEXT: adcq $0, %rax
; EGPR-NDD-NEXT: adcq $0, %rdx
; EGPR-NDD-NEXT: addq %rax, %rcx
; EGPR-NDD-NEXT: adcq %rdx, %rsi
; EGPR-NDD-NEXT: setb %dil
-; EGPR-NDD-NEXT: movq %r22, %rax
+; EGPR-NDD-NEXT: movq %r24, %rax
; EGPR-NDD-NEXT: mulq %r10
-; EGPR-NDD-NEXT: movq %rdx, %r26
-; EGPR-NDD-NEXT: movq %rax, %r27
-; EGPR-NDD-NEXT: movq %r20, %rax
+; EGPR-NDD-NEXT: movq %rdx, %r30
+; EGPR-NDD-NEXT: movq %rax, %r31
+; EGPR-NDD-NEXT: movq %r22, %rax
; EGPR-NDD-NEXT: mulq %r10
-; EGPR-NDD-NEXT: addq %rax, %r26
+; EGPR-NDD-NEXT: addq %rax, %r30
; EGPR-NDD-NEXT: adcq $0, %rdx, %r8
-; EGPR-NDD-NEXT: movq %r22, %rax
+; EGPR-NDD-NEXT: movq %r24, %rax
; EGPR-NDD-NEXT: mulq %r11
-; EGPR-NDD-NEXT: addq %r26, %rax, %r19
+; EGPR-NDD-NEXT: addq %r30, %rax, %r19
; EGPR-NDD-NEXT: adcq %rdx, %r8
; EGPR-NDD-NEXT: setb %r9b
-; EGPR-NDD-NEXT: movq %r20, %rax
+; EGPR-NDD-NEXT: movq %r22, %rax
; EGPR-NDD-NEXT: mulq %r11
; EGPR-NDD-NEXT: addq %r8, %rax
; EGPR-NDD-NEXT: movzbl %r9b, %r8d
; EGPR-NDD-NEXT: adcq %r8, %rdx
-; EGPR-NDD-NEXT: addq %rcx, %r27
+; EGPR-NDD-NEXT: addq %rcx, %r31
; EGPR-NDD-NEXT: adcq %rsi, %r19
; EGPR-NDD-NEXT: movzbl %dil, %ecx
; EGPR-NDD-NEXT: adcq %rax, %rcx
; EGPR-NDD-NEXT: adcq $0, %rdx, %rdi
-; EGPR-NDD-NEXT: movq 96(%r15), %r26
-; EGPR-NDD-NEXT: imulq %r11, %r26, %rsi
-; EGPR-NDD-NEXT: movq %r26, %rax
+; EGPR-NDD-NEXT: movq 96(%r15), %r30
+; EGPR-NDD-NEXT: imulq %r11, %r30, %rsi
+; EGPR-NDD-NEXT: movq %r30, %rax
; EGPR-NDD-NEXT: mulq %r10
; EGPR-NDD-NEXT: movq %rax, %r18
; EGPR-NDD-NEXT: addq %rsi, %rdx
@@ -1758,7 +1758,7 @@ define void @test_1024(ptr %a, ptr %b, ptr %out) nounwind {
; EGPR-NDD-NEXT: imulq %r10, %r8, %rax
; EGPR-NDD-NEXT: addq %rax, %rdx, %rsi
; EGPR-NDD-NEXT: movq 112(%r15), %rax
-; EGPR-NDD-NEXT: imulq %r21, %rax, %r9
+; EGPR-NDD-NEXT: imulq %r23, %rax, %r9
; EGPR-NDD-NEXT: mulq %r16
; EGPR-NDD-NEXT: addq %r9, %rdx
; EGPR-NDD-NEXT: imulq 120(%r15), %r16, %r9
@@ -1767,11 +1767,11 @@ define void @test_1024(ptr %a, ptr %b, ptr %out) nounwind {
; EGPR-NDD-NEXT: adcq %rsi, %rdx, %r9
; EGPR-NDD-NEXT: movq %r16, %rax
; EGPR-NDD-NEXT: movq %r16, %r18
-; EGPR-NDD-NEXT: mulq %r26
+; EGPR-NDD-NEXT: mulq %r30
; EGPR-NDD-NEXT: movq %rdx, %r17
; EGPR-NDD-NEXT: movq %rax, %rsi
-; EGPR-NDD-NEXT: movq %r21, %rax
-; EGPR-NDD-NEXT: mulq %r26
+; EGPR-NDD-NEXT: movq %r23, %rax
+; EGPR-NDD-NEXT: mulq %r30
; EGPR-NDD-NEXT: addq %r17, %rax, %r11
; EGPR-NDD-NEXT: adcq $0, %rdx, %r16
; EGPR-NDD-NEXT: movq %r18, %rax
@@ -1779,72 +1779,72 @@ define void @test_1024(ptr %a, ptr %b, ptr %out) nounwind {
; EGPR-NDD-NEXT: addq %rax, %r11
; EGPR-NDD-NEXT: adcq %rdx, %r16
; EGPR-NDD-NEXT: setb %r17b
-; EGPR-NDD-NEXT: movq %r21, %rax
+; EGPR-NDD-NEXT: movq %r23, %rax
; EGPR-NDD-NEXT: mulq %r8
; EGPR-NDD-NEXT: addq %r16, %rax
; EGPR-NDD-NEXT: movzbl %r17b, %r8d
; EGPR-NDD-NEXT: adcq %r8, %rdx
; EGPR-NDD-NEXT: addq %rax, %r10
; EGPR-NDD-NEXT: adcq %r9, %rdx, %r17
-; EGPR-NDD-NEXT: imulq {{[-0-9]+}}(%r{{[sb]}}p), %r24, %r8 # 8-byte Folded Reload
-; EGPR-NDD-NEXT: movq %r24, %rax
+; EGPR-NDD-NEXT: imulq {{[-0-9]+}}(%r{{[sb]}}p), %r26, %r8 # 8-byte Folded Reload
+; EGPR-NDD-NEXT: movq %r26, %rax
; EGPR-NDD-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r16 # 8-byte Reload
; EGPR-NDD-NEXT: mulq %r16
; EGPR-NDD-NEXT: movq %rax, %r9
; EGPR-NDD-NEXT: addq %r8, %rdx
-; EGPR-NDD-NEXT: imulq %r16, %r23, %rax
+; EGPR-NDD-NEXT: imulq %r16, %r25, %rax
; EGPR-NDD-NEXT: addq %rax, %rdx, %r8
-; EGPR-NDD-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r21 # 8-byte Reload
-; EGPR-NDD-NEXT: imulq %r21, %r22, %r16
-; EGPR-NDD-NEXT: movq %r22, %rax
-; EGPR-NDD-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r26 # 8-byte Reload
-; EGPR-NDD-NEXT: mulq %r26
+; EGPR-NDD-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r23 # 8-byte Reload
+; EGPR-NDD-NEXT: imulq %r23, %r24, %r16
+; EGPR-NDD-NEXT: movq %r24, %rax
+; EGPR-NDD-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r30 # 8-byte Reload
+; EGPR-NDD-NEXT: mulq %r30
; EGPR-NDD-NEXT: addq %r16, %rdx
-; EGPR-NDD-NEXT: imulq %r26, %r20
-; EGPR-NDD-NEXT: addq %r20, %rdx
+; EGPR-NDD-NEXT: imulq %r30, %r22
+; EGPR-NDD-NEXT: addq %r22, %rdx
; EGPR-NDD-NEXT: addq %r9, %rax, %r16
; EGPR-NDD-NEXT: adcq %r8, %rdx, %r18
-; EGPR-NDD-NEXT: movq %r26, %rax
-; EGPR-NDD-NEXT: mulq %r24
+; EGPR-NDD-NEXT: movq %r30, %rax
+; EGPR-NDD-NEXT: mulq %r26
; EGPR-NDD-NEXT: movq %rdx, %r8
; EGPR-NDD-NEXT: movq %rax, %r9
-; EGPR-NDD-NEXT: movq %r21, %rax
-; EGPR-NDD-NEXT: movq %r21, %r22
-; EGPR-NDD-NEXT: mulq %r24
+; EGPR-NDD-NEXT: movq %r23, %rax
+; EGPR-NDD-NEXT: movq %r23, %r24
+; EGPR-NDD-NEXT: mulq %r26
; EGPR-NDD-NEXT: addq %rax, %r8
-; EGPR-NDD-NEXT: adcq $0, %rdx, %r20
-; EGPR-NDD-NEXT: movq %r26, %rax
-; EGPR-NDD-NEXT: mulq %r23
+; EGPR-NDD-NEXT: adcq $0, %rdx, %r22
+; EGPR-NDD-NEXT: movq %r30, %rax
+; EGPR-NDD-NEXT: mulq %r25
; EGPR-NDD-NEXT: addq %rax, %r8
-; EGPR-NDD-NEXT: adcq %rdx, %r20
-; EGPR-NDD-NEXT: setb %r21b
-; EGPR-NDD-NEXT: movq %r22, %rax
-; EGPR-NDD-NEXT: mulq %r23
-; EGPR-NDD-NEXT: addq %r20, %rax
-; EGPR-NDD-NEXT: movzbl %r21b, %r20d
-; EGPR-NDD-NEXT: adcq %r20, %rdx
+; EGPR-NDD-NEXT: adcq %rdx, %r22
+; EGPR-NDD-NEXT: setb %r23b
+; EGPR-NDD-NEXT: movq %r24, %rax
+; EGPR-NDD-NEXT: mulq %r25
+; EGPR-NDD-NEXT: addq %r22, %rax
+; EGPR-NDD-NEXT: movzbl %r23b, %r22d
+; EGPR-NDD-NEXT: adcq %r22, %rdx
; EGPR-NDD-NEXT: addq %r16, %rax
; EGPR-NDD-NEXT: adcq %r18, %rdx
; EGPR-NDD-NEXT: addq %r9, %rsi
; EGPR-NDD-NEXT: adcq %r11, %r8
; EGPR-NDD-NEXT: adcq %r10, %rax
; EGPR-NDD-NEXT: adcq %r17, %rdx
-; EGPR-NDD-NEXT: addq %r27, %rsi
+; EGPR-NDD-NEXT: addq %r31, %rsi
; EGPR-NDD-NEXT: adcq %r19, %r8
; EGPR-NDD-NEXT: adcq %rcx, %rax
; EGPR-NDD-NEXT: adcq %rdi, %rdx
-; EGPR-NDD-NEXT: addq {{[-0-9]+}}(%r{{[sb]}}p), %r29 # 8-byte Folded Reload
-; EGPR-NDD-NEXT: adcq {{[-0-9]+}}(%r{{[sb]}}p), %r31 # 8-byte Folded Reload
-; EGPR-NDD-NEXT: adcq {{[-0-9]+}}(%r{{[sb]}}p), %r28 # 8-byte Folded Reload
-; EGPR-NDD-NEXT: adcq %rbp, %r25
+; EGPR-NDD-NEXT: addq {{[-0-9]+}}(%r{{[sb]}}p), %r21 # 8-byte Folded Reload
+; EGPR-NDD-NEXT: adcq {{[-0-9]+}}(%r{{[sb]}}p), %r29 # 8-byte Folded Reload
+; EGPR-NDD-NEXT: adcq {{[-0-9]+}}(%r{{[sb]}}p), %r20 # 8-byte Folded Reload
+; EGPR-NDD-NEXT: adcq %rbp, %r27
; EGPR-NDD-NEXT: adcq %rbx, %rsi
; EGPR-NDD-NEXT: adcq %r12, %r8
; EGPR-NDD-NEXT: adcq %r13, %rax
-; EGPR-NDD-NEXT: adcq %r30, %rdx
-; EGPR-NDD-NEXT: addq %r29, {{[-0-9]+}}(%r{{[sb]}}p), %r29 # 8-byte Folded Reload
-; EGPR-NDD-NEXT: adcq %r31, {{[-0-9]+}}(%r{{[sb]}}p), %r31 # 8-byte Folded Reload
-; EGPR-NDD-NEXT: adcq %r28, {{[-0-9]+}}(%r{{[sb]}}p), %r28 # 8-byte Folded Reload
-; EGPR-NDD-NEXT: adcq %r25, {{[-0-9]+}}(%r{{[sb]}}p), %r25 # 8-byte Folded Reload
+; EGPR-NDD-NEXT: adcq %r28, %rdx
+; EGPR-NDD-NEXT: addq %r21, {{[-0-9]+}}(%r{{[sb]}}p), %r21 # 8-byte Folded Reload
+; EGPR-NDD-NEXT: adcq %r29, {{[-0-9]+}}(%r{{[sb]}}p), %r29 # 8-byte Folded Reload
+; EGPR-NDD-NEXT: adcq %r20, {{[-0-9]+}}(%r{{[sb]}}p), %r20 # 8-byte Folded Reload
+; EGPR-NDD-NEXT: adcq %r27, {{[-0-9]+}}(%r{{[sb]}}p), %r27 # 8-byte Folded Reload
; EGPR-NDD-NEXT: adcq %rsi, (%rsp), %rsi # 8-byte Folded Reload
; EGPR-NDD-NEXT: adcq %r8, {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Folded Reload
; EGPR-NDD-NEXT: adcq %rax, {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Folded Reload
@@ -1866,10 +1866,10 @@ define void @test_1024(ptr %a, ptr %b, ptr %out) nounwind {
; EGPR-NDD-NEXT: movq %rdi, 48(%rcx)
; EGPR-NDD-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdi # 8-byte Reload
; EGPR-NDD-NEXT: movq %rdi, 56(%rcx)
-; EGPR-NDD-NEXT: movq %r29, 64(%rcx)
-; EGPR-NDD-NEXT: movq %r31, 72(%rcx)
-; EGPR-NDD-NEXT: movq %r28, 80(%rcx)
-; EGPR-NDD-NEXT: movq %r25, 88(%rcx)
+; EGPR-NDD-NEXT: movq %r21, 64(%rcx)
+; EGPR-NDD-NEXT: movq %r29, 72(%rcx)
+; EGPR-NDD-NEXT: movq %r20, 80(%rcx)
+; EGPR-NDD-NEXT: movq %r27, 88(%rcx)
; EGPR-NDD-NEXT: movq %rsi, 96(%rcx)
; EGPR-NDD-NEXT: movq %r8, 104(%rcx)
; EGPR-NDD-NEXT: movq %rax, 112(%rcx)
>From 9353bf0564524ce5d2c975e53a47e294a77f2407 Mon Sep 17 00:00:00 2001
From: Feng Zou <feng.zou at intel.com>
Date: Thu, 19 Dec 2024 17:49:17 +0800
Subject: [PATCH 2/2] Update comments
---
llvm/lib/Target/X86/X86RegisterInfo.td | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/llvm/lib/Target/X86/X86RegisterInfo.td b/llvm/lib/Target/X86/X86RegisterInfo.td
index d218ad0aefc8c5..48459b3aca5081 100644
--- a/llvm/lib/Target/X86/X86RegisterInfo.td
+++ b/llvm/lib/Target/X86/X86RegisterInfo.td
@@ -541,8 +541,8 @@ def SSP : X86Reg<"ssp", 0>;
// R12, R13, R14, and R15 for X86-64) are callee-save registers.
// In 64-mode, there are 12 additional i8 registers, SIL, DIL, BPL, SPL, and
// R8B, ... R15B.
-// Allocate R12 and R13 last, as these require an extra byte when
-// encoded in x86_64 instructions.
+// Allocate R12, R13, R20, R21, R28 and R29 last, as these require an extra byte
+// when encoded in x86_64 instructions.
// FIXME: Allow AH, CH, DH, BH to be used as general-purpose registers in
// 64-bit mode. The main complication is that they cannot be encoded in an
// instruction requiring a REX prefix, while SIL, DIL, BPL, R8D, etc.
More information about the llvm-commits
mailing list