[llvm] [X86][RA] Add two address hints for compressible NDD instructions. (PR #98603)
Freddy Ye via llvm-commits
llvm-commits at lists.llvm.org
Mon Aug 5 00:55:23 PDT 2024
https://github.com/FreddyLeaf updated https://github.com/llvm/llvm-project/pull/98603
>From 85e65ae29ddde4bc62295132e73ee6b7f914ef2b Mon Sep 17 00:00:00 2001
From: Freddy Ye <freddy.ye at intel.com>
Date: Fri, 5 Jul 2024 19:12:15 +0800
Subject: [PATCH 1/6] Add two address hints for compressible NDD instructions.
---
llvm/lib/Target/X86/X86RegisterInfo.cpp | 42 +
llvm/test/CodeGen/X86/apx/and.ll | 64 +-
llvm/test/CodeGen/X86/apx/cmov.ll | 22 +-
llvm/test/CodeGen/X86/apx/mul-i1024.ll | 916 ++++++++++-----------
llvm/test/CodeGen/X86/apx/or.ll | 64 +-
llvm/test/CodeGen/X86/apx/shift-eflags.ll | 28 +-
llvm/test/CodeGen/X86/apx/sub.ll | 96 +--
llvm/test/CodeGen/X86/apx/xor.ll | 80 +-
llvm/test/CodeGen/X86/cmp.ll | 52 +-
llvm/test/CodeGen/X86/popcnt.ll | 216 ++---
llvm/test/CodeGen/X86/select_const_i128.ll | 4 +-
11 files changed, 809 insertions(+), 775 deletions(-)
diff --git a/llvm/lib/Target/X86/X86RegisterInfo.cpp b/llvm/lib/Target/X86/X86RegisterInfo.cpp
index 555ede9e95403..5777bd0496881 100644
--- a/llvm/lib/Target/X86/X86RegisterInfo.cpp
+++ b/llvm/lib/Target/X86/X86RegisterInfo.cpp
@@ -45,6 +45,12 @@ static cl::opt<bool>
EnableBasePointer("x86-use-base-pointer", cl::Hidden, cl::init(true),
cl::desc("Enable use of a base pointer for complex stack frames"));
+static cl::opt<bool>
+ DisableRegAllocHints("x86-disable-regalloc-hints", cl::Hidden,
+ cl::init(false),
+ cl::desc("Disable two address hints for register "
+ "allocation"));
+
X86RegisterInfo::X86RegisterInfo(const Triple &TT)
: X86GenRegisterInfo((TT.isArch64Bit() ? X86::RIP : X86::EIP),
X86_MC::getDwarfRegFlavour(TT, false),
@@ -1082,6 +1088,42 @@ bool X86RegisterInfo::getRegAllocationHints(Register VirtReg,
VirtReg, Order, Hints, MF, VRM, Matrix);
unsigned ID = RC.getID();
+
+ if (!VRM || DisableRegAllocHints)
+ return BaseImplRetVal;
+
+ // Add any two address hints after any copy hints.
+ SmallSet<unsigned, 4> TwoAddrHints;
+
+ auto tryAddHint = [&](const MachineOperand &VRRegMO,
+ const MachineOperand &MO) -> void {
+ Register Reg = MO.getReg();
+ Register PhysReg =
+ Register::isPhysicalRegister(Reg) ? Reg : Register(VRM->getPhys(Reg));
+ if (PhysReg && !MRI->isReserved(PhysReg) && !is_contained(Hints, PhysReg))
+ TwoAddrHints.insert(PhysReg);
+ };
+
+ for (auto &MO : MRI->reg_nodbg_operands(VirtReg)) {
+ const MachineInstr &MI = *MO.getParent();
+ if (X86::getNonNDVariant(MI.getOpcode())) {
+ unsigned OpIdx = MI.getOperandNo(&MO);
+ if (OpIdx == 0 && MI.getOperand(1).isReg()) {
+ tryAddHint(MO, MI.getOperand(1));
+ if (MI.isCommutable() && MI.getOperand(2).isReg())
+ tryAddHint(MO, MI.getOperand(2));
+ } else if (OpIdx == 1) {
+ tryAddHint(MO, MI.getOperand(0));
+ } else if (MI.isCommutable() && OpIdx == 2) {
+ tryAddHint(MO, MI.getOperand(0));
+ }
+ }
+ }
+
+ for (MCPhysReg OrderReg : Order)
+ if (TwoAddrHints.count(OrderReg))
+ Hints.push_back(OrderReg);
+
if (ID != X86::TILERegClassID)
return BaseImplRetVal;
diff --git a/llvm/test/CodeGen/X86/apx/and.ll b/llvm/test/CodeGen/X86/apx/and.ll
index 51858ad591605..23aed77b948b9 100644
--- a/llvm/test/CodeGen/X86/apx/and.ll
+++ b/llvm/test/CodeGen/X86/apx/and.ll
@@ -482,17 +482,17 @@ define i1 @andflag16rr(i16 %a, i16 %b) {
define i1 @andflag32rr(i32 %a, i32 %b) {
; CHECK-LABEL: andflag32rr:
; CHECK: # %bb.0:
-; CHECK-NEXT: andl %esi, %edi, %ecx # encoding: [0x62,0xf4,0x74,0x18,0x21,0xf7]
+; CHECK-NEXT: andl %edi, %esi # EVEX TO LEGACY Compression encoding: [0x21,0xfe]
; CHECK-NEXT: sete %al # encoding: [0x0f,0x94,0xc0]
-; CHECK-NEXT: movl %ecx, d64(%rip) # encoding: [0x89,0x0d,A,A,A,A]
+; CHECK-NEXT: movl %esi, d64(%rip) # encoding: [0x89,0x35,A,A,A,A]
; CHECK-NEXT: # fixup A - offset: 2, value: d64-4, kind: reloc_riprel_4byte
; CHECK-NEXT: retq # encoding: [0xc3]
;
; NF-LABEL: andflag32rr:
; NF: # %bb.0:
-; NF-NEXT: andl %esi, %edi, %ecx # encoding: [0x62,0xf4,0x74,0x18,0x21,0xf7]
+; NF-NEXT: andl %edi, %esi # EVEX TO LEGACY Compression encoding: [0x21,0xfe]
; NF-NEXT: sete %al # encoding: [0x0f,0x94,0xc0]
-; NF-NEXT: movl %ecx, d64(%rip) # encoding: [0x89,0x0d,A,A,A,A]
+; NF-NEXT: movl %esi, d64(%rip) # encoding: [0x89,0x35,A,A,A,A]
; NF-NEXT: # fixup A - offset: 2, value: d64-4, kind: reloc_riprel_4byte
; NF-NEXT: retq # encoding: [0xc3]
%v0 = and i32 %a, %b ; 0xff << 50
@@ -504,17 +504,17 @@ define i1 @andflag32rr(i32 %a, i32 %b) {
define i1 @andflag64rr(i64 %a, i64 %b) {
; CHECK-LABEL: andflag64rr:
; CHECK: # %bb.0:
-; CHECK-NEXT: andq %rsi, %rdi, %rcx # encoding: [0x62,0xf4,0xf4,0x18,0x21,0xf7]
+; CHECK-NEXT: andq %rdi, %rsi # EVEX TO LEGACY Compression encoding: [0x48,0x21,0xfe]
; CHECK-NEXT: sete %al # encoding: [0x0f,0x94,0xc0]
-; CHECK-NEXT: movq %rcx, d64(%rip) # encoding: [0x48,0x89,0x0d,A,A,A,A]
+; CHECK-NEXT: movq %rsi, d64(%rip) # encoding: [0x48,0x89,0x35,A,A,A,A]
; CHECK-NEXT: # fixup A - offset: 3, value: d64-4, kind: reloc_riprel_4byte
; CHECK-NEXT: retq # encoding: [0xc3]
;
; NF-LABEL: andflag64rr:
; NF: # %bb.0:
-; NF-NEXT: andq %rsi, %rdi, %rcx # encoding: [0x62,0xf4,0xf4,0x18,0x21,0xf7]
+; NF-NEXT: andq %rdi, %rsi # EVEX TO LEGACY Compression encoding: [0x48,0x21,0xfe]
; NF-NEXT: sete %al # encoding: [0x0f,0x94,0xc0]
-; NF-NEXT: movq %rcx, d64(%rip) # encoding: [0x48,0x89,0x0d,A,A,A,A]
+; NF-NEXT: movq %rsi, d64(%rip) # encoding: [0x48,0x89,0x35,A,A,A,A]
; NF-NEXT: # fixup A - offset: 3, value: d64-4, kind: reloc_riprel_4byte
; NF-NEXT: retq # encoding: [0xc3]
%v0 = and i64 %a, %b ; 0xff << 50
@@ -578,17 +578,17 @@ define i1 @andflag16rm(ptr %ptr, i16 %b) {
define i1 @andflag32rm(ptr %ptr, i32 %b) {
; CHECK-LABEL: andflag32rm:
; CHECK: # %bb.0:
-; CHECK-NEXT: andl (%rdi), %esi, %ecx # encoding: [0x62,0xf4,0x74,0x18,0x23,0x37]
+; CHECK-NEXT: andl (%rdi), %esi # EVEX TO LEGACY Compression encoding: [0x23,0x37]
; CHECK-NEXT: sete %al # encoding: [0x0f,0x94,0xc0]
-; CHECK-NEXT: movl %ecx, d64(%rip) # encoding: [0x89,0x0d,A,A,A,A]
+; CHECK-NEXT: movl %esi, d64(%rip) # encoding: [0x89,0x35,A,A,A,A]
; CHECK-NEXT: # fixup A - offset: 2, value: d64-4, kind: reloc_riprel_4byte
; CHECK-NEXT: retq # encoding: [0xc3]
;
; NF-LABEL: andflag32rm:
; NF: # %bb.0:
-; NF-NEXT: andl (%rdi), %esi, %ecx # encoding: [0x62,0xf4,0x74,0x18,0x23,0x37]
+; NF-NEXT: andl (%rdi), %esi # EVEX TO LEGACY Compression encoding: [0x23,0x37]
; NF-NEXT: sete %al # encoding: [0x0f,0x94,0xc0]
-; NF-NEXT: movl %ecx, d64(%rip) # encoding: [0x89,0x0d,A,A,A,A]
+; NF-NEXT: movl %esi, d64(%rip) # encoding: [0x89,0x35,A,A,A,A]
; NF-NEXT: # fixup A - offset: 2, value: d64-4, kind: reloc_riprel_4byte
; NF-NEXT: retq # encoding: [0xc3]
%a = load i32, ptr %ptr
@@ -601,17 +601,17 @@ define i1 @andflag32rm(ptr %ptr, i32 %b) {
define i1 @andflag64rm(ptr %ptr, i64 %b) {
; CHECK-LABEL: andflag64rm:
; CHECK: # %bb.0:
-; CHECK-NEXT: andq (%rdi), %rsi, %rcx # encoding: [0x62,0xf4,0xf4,0x18,0x23,0x37]
+; CHECK-NEXT: andq (%rdi), %rsi # EVEX TO LEGACY Compression encoding: [0x48,0x23,0x37]
; CHECK-NEXT: sete %al # encoding: [0x0f,0x94,0xc0]
-; CHECK-NEXT: movq %rcx, d64(%rip) # encoding: [0x48,0x89,0x0d,A,A,A,A]
+; CHECK-NEXT: movq %rsi, d64(%rip) # encoding: [0x48,0x89,0x35,A,A,A,A]
; CHECK-NEXT: # fixup A - offset: 3, value: d64-4, kind: reloc_riprel_4byte
; CHECK-NEXT: retq # encoding: [0xc3]
;
; NF-LABEL: andflag64rm:
; NF: # %bb.0:
-; NF-NEXT: andq (%rdi), %rsi, %rcx # encoding: [0x62,0xf4,0xf4,0x18,0x23,0x37]
+; NF-NEXT: andq (%rdi), %rsi # EVEX TO LEGACY Compression encoding: [0x48,0x23,0x37]
; NF-NEXT: sete %al # encoding: [0x0f,0x94,0xc0]
-; NF-NEXT: movq %rcx, d64(%rip) # encoding: [0x48,0x89,0x0d,A,A,A,A]
+; NF-NEXT: movq %rsi, d64(%rip) # encoding: [0x48,0x89,0x35,A,A,A,A]
; NF-NEXT: # fixup A - offset: 3, value: d64-4, kind: reloc_riprel_4byte
; NF-NEXT: retq # encoding: [0xc3]
%a = load i64, ptr %ptr
@@ -672,19 +672,19 @@ define i1 @andflag16ri(i16 %a) {
define i1 @andflag32ri(i32 %a) {
; CHECK-LABEL: andflag32ri:
; CHECK: # %bb.0:
-; CHECK-NEXT: andl $123456, %edi, %ecx # encoding: [0x62,0xf4,0x74,0x18,0x81,0xe7,0x40,0xe2,0x01,0x00]
+; CHECK-NEXT: andl $123456, %edi # EVEX TO LEGACY Compression encoding: [0x81,0xe7,0x40,0xe2,0x01,0x00]
; CHECK-NEXT: # imm = 0x1E240
; CHECK-NEXT: sete %al # encoding: [0x0f,0x94,0xc0]
-; CHECK-NEXT: movl %ecx, d64(%rip) # encoding: [0x89,0x0d,A,A,A,A]
+; CHECK-NEXT: movl %edi, d64(%rip) # encoding: [0x89,0x3d,A,A,A,A]
; CHECK-NEXT: # fixup A - offset: 2, value: d64-4, kind: reloc_riprel_4byte
; CHECK-NEXT: retq # encoding: [0xc3]
;
; NF-LABEL: andflag32ri:
; NF: # %bb.0:
-; NF-NEXT: andl $123456, %edi, %ecx # encoding: [0x62,0xf4,0x74,0x18,0x81,0xe7,0x40,0xe2,0x01,0x00]
+; NF-NEXT: andl $123456, %edi # EVEX TO LEGACY Compression encoding: [0x81,0xe7,0x40,0xe2,0x01,0x00]
; NF-NEXT: # imm = 0x1E240
; NF-NEXT: sete %al # encoding: [0x0f,0x94,0xc0]
-; NF-NEXT: movl %ecx, d64(%rip) # encoding: [0x89,0x0d,A,A,A,A]
+; NF-NEXT: movl %edi, d64(%rip) # encoding: [0x89,0x3d,A,A,A,A]
; NF-NEXT: # fixup A - offset: 2, value: d64-4, kind: reloc_riprel_4byte
; NF-NEXT: retq # encoding: [0xc3]
%v0 = and i32 %a, 123456 ; 0xff << 50
@@ -696,19 +696,19 @@ define i1 @andflag32ri(i32 %a) {
define i1 @andflag64ri(i64 %a) {
; CHECK-LABEL: andflag64ri:
; CHECK: # %bb.0:
-; CHECK-NEXT: andq $123456, %rdi, %rcx # encoding: [0x62,0xf4,0xf4,0x18,0x81,0xe7,0x40,0xe2,0x01,0x00]
+; CHECK-NEXT: andq $123456, %rdi # EVEX TO LEGACY Compression encoding: [0x48,0x81,0xe7,0x40,0xe2,0x01,0x00]
; CHECK-NEXT: # imm = 0x1E240
; CHECK-NEXT: sete %al # encoding: [0x0f,0x94,0xc0]
-; CHECK-NEXT: movq %rcx, d64(%rip) # encoding: [0x48,0x89,0x0d,A,A,A,A]
+; CHECK-NEXT: movq %rdi, d64(%rip) # encoding: [0x48,0x89,0x3d,A,A,A,A]
; CHECK-NEXT: # fixup A - offset: 3, value: d64-4, kind: reloc_riprel_4byte
; CHECK-NEXT: retq # encoding: [0xc3]
;
; NF-LABEL: andflag64ri:
; NF: # %bb.0:
-; NF-NEXT: andq $123456, %rdi, %rcx # encoding: [0x62,0xf4,0xf4,0x18,0x81,0xe7,0x40,0xe2,0x01,0x00]
+; NF-NEXT: andq $123456, %rdi # EVEX TO LEGACY Compression encoding: [0x48,0x81,0xe7,0x40,0xe2,0x01,0x00]
; NF-NEXT: # imm = 0x1E240
; NF-NEXT: sete %al # encoding: [0x0f,0x94,0xc0]
-; NF-NEXT: movq %rcx, d64(%rip) # encoding: [0x48,0x89,0x0d,A,A,A,A]
+; NF-NEXT: movq %rdi, d64(%rip) # encoding: [0x48,0x89,0x3d,A,A,A,A]
; NF-NEXT: # fixup A - offset: 3, value: d64-4, kind: reloc_riprel_4byte
; NF-NEXT: retq # encoding: [0xc3]
%v0 = and i64 %a, 123456 ; 0xff << 50
@@ -743,17 +743,17 @@ define i1 @andflag16ri8(i16 %a) {
define i1 @andflag32ri8(i32 %a) {
; CHECK-LABEL: andflag32ri8:
; CHECK: # %bb.0:
-; CHECK-NEXT: andl $123, %edi, %ecx # encoding: [0x62,0xf4,0x74,0x18,0x83,0xe7,0x7b]
+; CHECK-NEXT: andl $123, %edi # EVEX TO LEGACY Compression encoding: [0x83,0xe7,0x7b]
; CHECK-NEXT: sete %al # encoding: [0x0f,0x94,0xc0]
-; CHECK-NEXT: movl %ecx, d64(%rip) # encoding: [0x89,0x0d,A,A,A,A]
+; CHECK-NEXT: movl %edi, d64(%rip) # encoding: [0x89,0x3d,A,A,A,A]
; CHECK-NEXT: # fixup A - offset: 2, value: d64-4, kind: reloc_riprel_4byte
; CHECK-NEXT: retq # encoding: [0xc3]
;
; NF-LABEL: andflag32ri8:
; NF: # %bb.0:
-; NF-NEXT: andl $123, %edi, %ecx # encoding: [0x62,0xf4,0x74,0x18,0x83,0xe7,0x7b]
+; NF-NEXT: andl $123, %edi # EVEX TO LEGACY Compression encoding: [0x83,0xe7,0x7b]
; NF-NEXT: sete %al # encoding: [0x0f,0x94,0xc0]
-; NF-NEXT: movl %ecx, d64(%rip) # encoding: [0x89,0x0d,A,A,A,A]
+; NF-NEXT: movl %edi, d64(%rip) # encoding: [0x89,0x3d,A,A,A,A]
; NF-NEXT: # fixup A - offset: 2, value: d64-4, kind: reloc_riprel_4byte
; NF-NEXT: retq # encoding: [0xc3]
%v0 = and i32 %a, 123 ; 0xff << 50
@@ -765,17 +765,17 @@ define i1 @andflag32ri8(i32 %a) {
define i1 @andflag64ri8(i64 %a) {
; CHECK-LABEL: andflag64ri8:
; CHECK: # %bb.0:
-; CHECK-NEXT: andq $123, %rdi, %rcx # encoding: [0x62,0xf4,0xf4,0x18,0x83,0xe7,0x7b]
+; CHECK-NEXT: andq $123, %rdi # EVEX TO LEGACY Compression encoding: [0x48,0x83,0xe7,0x7b]
; CHECK-NEXT: sete %al # encoding: [0x0f,0x94,0xc0]
-; CHECK-NEXT: movq %rcx, d64(%rip) # encoding: [0x48,0x89,0x0d,A,A,A,A]
+; CHECK-NEXT: movq %rdi, d64(%rip) # encoding: [0x48,0x89,0x3d,A,A,A,A]
; CHECK-NEXT: # fixup A - offset: 3, value: d64-4, kind: reloc_riprel_4byte
; CHECK-NEXT: retq # encoding: [0xc3]
;
; NF-LABEL: andflag64ri8:
; NF: # %bb.0:
-; NF-NEXT: andq $123, %rdi, %rcx # encoding: [0x62,0xf4,0xf4,0x18,0x83,0xe7,0x7b]
+; NF-NEXT: andq $123, %rdi # EVEX TO LEGACY Compression encoding: [0x48,0x83,0xe7,0x7b]
; NF-NEXT: sete %al # encoding: [0x0f,0x94,0xc0]
-; NF-NEXT: movq %rcx, d64(%rip) # encoding: [0x48,0x89,0x0d,A,A,A,A]
+; NF-NEXT: movq %rdi, d64(%rip) # encoding: [0x48,0x89,0x3d,A,A,A,A]
; NF-NEXT: # fixup A - offset: 3, value: d64-4, kind: reloc_riprel_4byte
; NF-NEXT: retq # encoding: [0xc3]
%v0 = and i64 %a, 123 ; 0xff << 50
diff --git a/llvm/test/CodeGen/X86/apx/cmov.ll b/llvm/test/CodeGen/X86/apx/cmov.ll
index 7a6a63f813c0a..7b846120d3f72 100644
--- a/llvm/test/CodeGen/X86/apx/cmov.ll
+++ b/llvm/test/CodeGen/X86/apx/cmov.ll
@@ -5,10 +5,10 @@ define i8 @cmov8(i8 %a, i8 %b, i8 %x, ptr %y.ptr) {
; CHECK-LABEL: cmov8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: cmpb %sil, %dil # encoding: [0x40,0x38,0xf7]
-; CHECK-NEXT: cmoval %edi, %edx, %eax # encoding: [0x62,0xf4,0x7c,0x18,0x47,0xd7]
-; CHECK-NEXT: movzbl (%rcx), %ecx # encoding: [0x0f,0xb6,0x09]
-; CHECK-NEXT: cmovbel %edx, %ecx # EVEX TO LEGACY Compression encoding: [0x0f,0x46,0xca]
-; CHECK-NEXT: addb %cl, %al # EVEX TO LEGACY Compression encoding: [0x00,0xc8]
+; CHECK-NEXT: cmovbel %edx, %edi # EVEX TO LEGACY Compression encoding: [0x0f,0x46,0xfa]
+; CHECK-NEXT: movzbl (%rcx), %eax # encoding: [0x0f,0xb6,0x01]
+; CHECK-NEXT: cmovbel %edx, %eax # EVEX TO LEGACY Compression encoding: [0x0f,0x46,0xc2]
+; CHECK-NEXT: addb %dil, %al # EVEX TO LEGACY Compression encoding: [0x40,0x00,0xf8]
; CHECK-NEXT: retq # encoding: [0xc3]
entry:
%cond = icmp ugt i8 %a, %b
@@ -23,9 +23,9 @@ define i16 @cmov16(i16 %a, i16 %b, i16 %x, ptr %y.ptr) {
; CHECK-LABEL: cmov16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: cmpw %si, %di # encoding: [0x66,0x39,0xf7]
-; CHECK-NEXT: cmoval %edi, %edx, %eax # encoding: [0x62,0xf4,0x7c,0x18,0x47,0xd7]
-; CHECK-NEXT: cmovaw (%rcx), %dx, %cx # encoding: [0x62,0xf4,0x75,0x18,0x47,0x11]
-; CHECK-NEXT: addw %cx, %ax # EVEX TO LEGACY Compression encoding: [0x66,0x01,0xc8]
+; CHECK-NEXT: cmovbel %edx, %edi # EVEX TO LEGACY Compression encoding: [0x0f,0x46,0xfa]
+; CHECK-NEXT: cmovaw (%rcx), %dx, %ax # encoding: [0x62,0xf4,0x7d,0x18,0x47,0x11]
+; CHECK-NEXT: addw %di, %ax # EVEX TO LEGACY Compression encoding: [0x66,0x01,0xf8]
; CHECK-NEXT: retq # encoding: [0xc3]
entry:
%cond = icmp ugt i16 %a, %b
@@ -41,8 +41,8 @@ define i32 @cmov32(i32 %a, i32 %b, i32 %x, ptr %y.ptr) {
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: cmpl %esi, %edi # encoding: [0x39,0xf7]
; CHECK-NEXT: cmoval %edi, %edx, %eax # encoding: [0x62,0xf4,0x7c,0x18,0x47,0xd7]
-; CHECK-NEXT: cmoval (%rcx), %edx, %ecx # encoding: [0x62,0xf4,0x74,0x18,0x47,0x11]
-; CHECK-NEXT: addl %ecx, %eax # EVEX TO LEGACY Compression encoding: [0x01,0xc8]
+; CHECK-NEXT: cmoval (%rcx), %edx # EVEX TO LEGACY Compression encoding: [0x0f,0x47,0x11]
+; CHECK-NEXT: addl %edx, %eax # EVEX TO LEGACY Compression encoding: [0x01,0xd0]
; CHECK-NEXT: retq # encoding: [0xc3]
entry:
%cond = icmp ugt i32 %a, %b
@@ -58,8 +58,8 @@ define i64 @cmov64(i64 %a, i64 %b, i64 %x, ptr %y.ptr) {
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: cmpq %rsi, %rdi # encoding: [0x48,0x39,0xf7]
; CHECK-NEXT: cmovaq %rdi, %rdx, %rax # encoding: [0x62,0xf4,0xfc,0x18,0x47,0xd7]
-; CHECK-NEXT: cmovaq (%rcx), %rdx, %rcx # encoding: [0x62,0xf4,0xf4,0x18,0x47,0x11]
-; CHECK-NEXT: addq %rcx, %rax # EVEX TO LEGACY Compression encoding: [0x48,0x01,0xc8]
+; CHECK-NEXT: cmovaq (%rcx), %rdx # EVEX TO LEGACY Compression encoding: [0x48,0x0f,0x47,0x11]
+; CHECK-NEXT: addq %rdx, %rax # EVEX TO LEGACY Compression encoding: [0x48,0x01,0xd0]
; CHECK-NEXT: retq # encoding: [0xc3]
entry:
%cond = icmp ugt i64 %a, %b
diff --git a/llvm/test/CodeGen/X86/apx/mul-i1024.ll b/llvm/test/CodeGen/X86/apx/mul-i1024.ll
index 2b99c44fc769a..a4d15a1b21d6b 100644
--- a/llvm/test/CodeGen/X86/apx/mul-i1024.ll
+++ b/llvm/test/CodeGen/X86/apx/mul-i1024.ll
@@ -1041,41 +1041,41 @@ define void @test_1024(ptr %a, ptr %b, ptr %out) nounwind {
; EGPR-NDD-NEXT: pushq %r13
; EGPR-NDD-NEXT: pushq %r12
; EGPR-NDD-NEXT: pushq %rbx
-; EGPR-NDD-NEXT: subq $104, %rsp
+; EGPR-NDD-NEXT: subq $96, %rsp
; EGPR-NDD-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; EGPR-NDD-NEXT: movq %rsi, %r15
; EGPR-NDD-NEXT: movq %rdi, %r20
-; EGPR-NDD-NEXT: movq (%rdi), %r16
-; EGPR-NDD-NEXT: movq 8(%rdi), %r14
+; EGPR-NDD-NEXT: movq (%rdi), %r17
+; EGPR-NDD-NEXT: movq 8(%rdi), %r11
; EGPR-NDD-NEXT: movq 24(%rdi), %r9
; EGPR-NDD-NEXT: movq 16(%rdi), %r10
; EGPR-NDD-NEXT: movq 40(%rdi), %rdi
-; EGPR-NDD-NEXT: movq 32(%r20), %r11
-; EGPR-NDD-NEXT: movq 56(%r20), %r17
-; EGPR-NDD-NEXT: movq 48(%r20), %r15
-; EGPR-NDD-NEXT: movq 24(%rsi), %r18
+; EGPR-NDD-NEXT: movq 32(%r20), %r16
+; EGPR-NDD-NEXT: movq 56(%r20), %r18
+; EGPR-NDD-NEXT: movq 48(%r20), %r23
+; EGPR-NDD-NEXT: movq 24(%rsi), %r14
; EGPR-NDD-NEXT: movq 16(%rsi), %r24
; EGPR-NDD-NEXT: movq (%rsi), %r22
; EGPR-NDD-NEXT: movq 8(%rsi), %r21
-; EGPR-NDD-NEXT: movq %rsi, %r23
-; EGPR-NDD-NEXT: movq %r15, %rax
+; EGPR-NDD-NEXT: movq %r23, %rax
; EGPR-NDD-NEXT: mulq %r22
; EGPR-NDD-NEXT: movq %rdx, %r25
; EGPR-NDD-NEXT: movq %rax, %r19
-; EGPR-NDD-NEXT: movq %r17, %rax
+; EGPR-NDD-NEXT: movq %r18, %rax
; EGPR-NDD-NEXT: mulq %r22
-; EGPR-NDD-NEXT: addq %r25, %rax, %rcx
-; EGPR-NDD-NEXT: adcq $0, %rdx, %rsi
-; EGPR-NDD-NEXT: movq %r15, %rax
+; EGPR-NDD-NEXT: addq %rax, %r25
+; EGPR-NDD-NEXT: adcq $0, %rdx, %rcx
+; EGPR-NDD-NEXT: movq %r23, %rax
; EGPR-NDD-NEXT: mulq %r21
-; EGPR-NDD-NEXT: addq %rcx, %rax, %r8
-; EGPR-NDD-NEXT: adcq %rdx, %rsi, %rcx
+; EGPR-NDD-NEXT: addq %r25, %rax, %rsi
+; EGPR-NDD-NEXT: adcq %rdx, %rcx
; EGPR-NDD-NEXT: setb %al
-; EGPR-NDD-NEXT: movzbl %al, %esi
-; EGPR-NDD-NEXT: movq %r17, %rax
+; EGPR-NDD-NEXT: movzbl %al, %r8d
+; EGPR-NDD-NEXT: movq %r18, %rax
; EGPR-NDD-NEXT: mulq %r21
; EGPR-NDD-NEXT: addq %rcx, %rax, %r27
-; EGPR-NDD-NEXT: adcq %rdx, %rsi
-; EGPR-NDD-NEXT: movq %r11, %rax
+; EGPR-NDD-NEXT: adcq %rdx, %r8
+; EGPR-NDD-NEXT: movq %r16, %rax
; EGPR-NDD-NEXT: mulq %r22
; EGPR-NDD-NEXT: movq %rdx, %r26
; EGPR-NDD-NEXT: movq %rax, %r25
@@ -1083,7 +1083,7 @@ define void @test_1024(ptr %a, ptr %b, ptr %out) nounwind {
; EGPR-NDD-NEXT: mulq %r22
; EGPR-NDD-NEXT: addq %r26, %rax, %rcx
; EGPR-NDD-NEXT: adcq $0, %rdx, %r26
-; EGPR-NDD-NEXT: movq %r11, %rax
+; EGPR-NDD-NEXT: movq %r16, %rax
; EGPR-NDD-NEXT: mulq %r21
; EGPR-NDD-NEXT: addq %rax, %rcx
; EGPR-NDD-NEXT: adcq %rdx, %r26
@@ -1094,58 +1094,59 @@ define void @test_1024(ptr %a, ptr %b, ptr %out) nounwind {
; EGPR-NDD-NEXT: addq %r26, %rax
; EGPR-NDD-NEXT: adcq %r28, %rdx
; EGPR-NDD-NEXT: addq %rax, %r19, %r28
-; EGPR-NDD-NEXT: adcq %rdx, %r8
+; EGPR-NDD-NEXT: adcq %rdx, %rsi, %r29
; EGPR-NDD-NEXT: adcq $0, %r27
-; EGPR-NDD-NEXT: adcq $0, %rsi, %r29
-; EGPR-NDD-NEXT: movq %r11, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; EGPR-NDD-NEXT: movq %r11, %rax
+; EGPR-NDD-NEXT: adcq $0, %r8
+; EGPR-NDD-NEXT: movq %r16, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; EGPR-NDD-NEXT: movq %r16, %rax
; EGPR-NDD-NEXT: mulq %r24
; EGPR-NDD-NEXT: movq %rdx, %r19
; EGPR-NDD-NEXT: movq %rax, %r26
; EGPR-NDD-NEXT: movq %rdi, %rax
; EGPR-NDD-NEXT: movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; EGPR-NDD-NEXT: mulq %r24
-; EGPR-NDD-NEXT: addq %r19, %rax, %rsi
-; EGPR-NDD-NEXT: adcq $0, %rdx, %r19
-; EGPR-NDD-NEXT: movq %r11, %rax
-; EGPR-NDD-NEXT: mulq %r18
-; EGPR-NDD-NEXT: addq %rsi, %rax, %r30
-; EGPR-NDD-NEXT: adcq %rdx, %r19, %rsi
+; EGPR-NDD-NEXT: addq %rax, %r19
+; EGPR-NDD-NEXT: adcq $0, %rdx, %rsi
+; EGPR-NDD-NEXT: movq %r16, %rax
+; EGPR-NDD-NEXT: mulq %r14
+; EGPR-NDD-NEXT: addq %rax, %r19
+; EGPR-NDD-NEXT: adcq %rdx, %rsi
; EGPR-NDD-NEXT: setb %al
-; EGPR-NDD-NEXT: movzbl %al, %r19d
+; EGPR-NDD-NEXT: movzbl %al, %r30d
; EGPR-NDD-NEXT: movq %rdi, %rax
-; EGPR-NDD-NEXT: mulq %r18
+; EGPR-NDD-NEXT: mulq %r14
; EGPR-NDD-NEXT: addq %rsi, %rax
-; EGPR-NDD-NEXT: adcq %r19, %rdx
+; EGPR-NDD-NEXT: adcq %r30, %rdx
; EGPR-NDD-NEXT: addq %r28, %r26, %rsi
-; EGPR-NDD-NEXT: adcq %r8, %r30, %r28
+; EGPR-NDD-NEXT: adcq %r29, %r19, %r28
; EGPR-NDD-NEXT: adcq $0, %rax
; EGPR-NDD-NEXT: adcq $0, %rdx
-; EGPR-NDD-NEXT: addq %rax, %r27, %r8
-; EGPR-NDD-NEXT: adcq %rdx, %r29, %r27
+; EGPR-NDD-NEXT: addq %rax, %r27
+; EGPR-NDD-NEXT: adcq %rdx, %r8
; EGPR-NDD-NEXT: setb %al
; EGPR-NDD-NEXT: movzbl %al, %r31d
-; EGPR-NDD-NEXT: movq %r15, %rax
+; EGPR-NDD-NEXT: movq %r23, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; EGPR-NDD-NEXT: movq %r23, %rax
; EGPR-NDD-NEXT: mulq %r24
; EGPR-NDD-NEXT: movq %rdx, %r19
; EGPR-NDD-NEXT: movq %rax, %r26
-; EGPR-NDD-NEXT: movq %r17, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; EGPR-NDD-NEXT: movq %r17, %rax
+; EGPR-NDD-NEXT: movq %r18, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; EGPR-NDD-NEXT: movq %r18, %rax
; EGPR-NDD-NEXT: mulq %r24
; EGPR-NDD-NEXT: addq %rax, %r19
; EGPR-NDD-NEXT: adcq $0, %rdx, %r29
-; EGPR-NDD-NEXT: movq %r15, %rax
-; EGPR-NDD-NEXT: mulq %r18
+; EGPR-NDD-NEXT: movq %r23, %rax
+; EGPR-NDD-NEXT: mulq %r14
; EGPR-NDD-NEXT: addq %rax, %r19
; EGPR-NDD-NEXT: adcq %rdx, %r29
; EGPR-NDD-NEXT: setb %al
; EGPR-NDD-NEXT: movzbl %al, %r30d
-; EGPR-NDD-NEXT: movq %r17, %rax
-; EGPR-NDD-NEXT: mulq %r18
+; EGPR-NDD-NEXT: movq %r18, %rax
+; EGPR-NDD-NEXT: mulq %r14
; EGPR-NDD-NEXT: addq %r29, %rax
; EGPR-NDD-NEXT: adcq %r30, %rdx
-; EGPR-NDD-NEXT: addq %r8, %r26, %r29
-; EGPR-NDD-NEXT: adcq %r27, %r19, %r30
+; EGPR-NDD-NEXT: addq %r27, %r26, %r29
+; EGPR-NDD-NEXT: adcq %r8, %r19, %r30
; EGPR-NDD-NEXT: adcq %rax, %r31
; EGPR-NDD-NEXT: adcq $0, %rdx, %rdi
; EGPR-NDD-NEXT: movq %r10, %rax
@@ -1154,69 +1155,69 @@ define void @test_1024(ptr %a, ptr %b, ptr %out) nounwind {
; EGPR-NDD-NEXT: movq %rax, %r26
; EGPR-NDD-NEXT: movq %r9, %rax
; EGPR-NDD-NEXT: mulq %r22
-; EGPR-NDD-NEXT: addq %r19, %rax, %r8
-; EGPR-NDD-NEXT: adcq $0, %rdx, %r19
+; EGPR-NDD-NEXT: addq %rax, %r19
+; EGPR-NDD-NEXT: adcq $0, %rdx, %r8
; EGPR-NDD-NEXT: movq %r10, %rax
; EGPR-NDD-NEXT: mulq %r21
-; EGPR-NDD-NEXT: addq %rax, %r8
-; EGPR-NDD-NEXT: adcq %rdx, %r19
+; EGPR-NDD-NEXT: addq %rax, %r19
+; EGPR-NDD-NEXT: adcq %rdx, %r8
; EGPR-NDD-NEXT: setb %al
; EGPR-NDD-NEXT: movzbl %al, %r27d
; EGPR-NDD-NEXT: movq %r9, %rax
; EGPR-NDD-NEXT: mulq %r21
-; EGPR-NDD-NEXT: addq %rax, %r19
+; EGPR-NDD-NEXT: addq %rax, %r8
; EGPR-NDD-NEXT: adcq %r27, %rdx, %rbx
-; EGPR-NDD-NEXT: movq %r16, %rax
+; EGPR-NDD-NEXT: movq %r17, %rax
; EGPR-NDD-NEXT: mulq %r22
; EGPR-NDD-NEXT: movq %rdx, %r27
; EGPR-NDD-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; EGPR-NDD-NEXT: movq %r14, %rax
+; EGPR-NDD-NEXT: movq %r11, %rax
; EGPR-NDD-NEXT: mulq %r22
; EGPR-NDD-NEXT: addq %rax, %r27
; EGPR-NDD-NEXT: adcq $0, %rdx, %r12
-; EGPR-NDD-NEXT: movq %r16, %rax
+; EGPR-NDD-NEXT: movq %r17, %rax
; EGPR-NDD-NEXT: mulq %r21
; EGPR-NDD-NEXT: addq %r27, %rax
; EGPR-NDD-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; EGPR-NDD-NEXT: adcq %rdx, %r12, %r27
-; EGPR-NDD-NEXT: setb %bpl
-; EGPR-NDD-NEXT: movq %r14, %rax
+; EGPR-NDD-NEXT: adcq %rdx, %r12
+; EGPR-NDD-NEXT: setb %r27b
+; EGPR-NDD-NEXT: movq %r11, %rax
; EGPR-NDD-NEXT: mulq %r21
-; EGPR-NDD-NEXT: addq %r27, %rax
-; EGPR-NDD-NEXT: movzbl %bpl, %r27d
+; EGPR-NDD-NEXT: addq %r12, %rax
+; EGPR-NDD-NEXT: movzbl %r27b, %r27d
; EGPR-NDD-NEXT: adcq %r27, %rdx
; EGPR-NDD-NEXT: addq %rax, %r26, %r12
-; EGPR-NDD-NEXT: adcq %rdx, %r8
-; EGPR-NDD-NEXT: adcq $0, %r19
+; EGPR-NDD-NEXT: adcq %rdx, %r19
+; EGPR-NDD-NEXT: adcq $0, %r8
; EGPR-NDD-NEXT: adcq $0, %rbx
-; EGPR-NDD-NEXT: movq %r16, %rax
+; EGPR-NDD-NEXT: movq %r17, %rax
; EGPR-NDD-NEXT: mulq %r24
; EGPR-NDD-NEXT: movq %rdx, %r26
; EGPR-NDD-NEXT: movq %rax, %r27
-; EGPR-NDD-NEXT: movq %r14, %rax
+; EGPR-NDD-NEXT: movq %r11, %rax
; EGPR-NDD-NEXT: mulq %r24
; EGPR-NDD-NEXT: addq %rax, %r26
; EGPR-NDD-NEXT: adcq $0, %rdx, %r13
-; EGPR-NDD-NEXT: movq %r16, %rax
-; EGPR-NDD-NEXT: mulq %r18
+; EGPR-NDD-NEXT: movq %r17, %rax
+; EGPR-NDD-NEXT: mulq %r14
; EGPR-NDD-NEXT: addq %rax, %r26
; EGPR-NDD-NEXT: adcq %rdx, %r13
; EGPR-NDD-NEXT: setb %bpl
-; EGPR-NDD-NEXT: movq %r14, %rax
-; EGPR-NDD-NEXT: mulq %r18
+; EGPR-NDD-NEXT: movq %r11, %rax
+; EGPR-NDD-NEXT: mulq %r14
; EGPR-NDD-NEXT: addq %r13, %rax
; EGPR-NDD-NEXT: movzbl %bpl, %r13d
; EGPR-NDD-NEXT: adcq %r13, %rdx
-; EGPR-NDD-NEXT: addq %r12, %r27, %r11
-; EGPR-NDD-NEXT: movq %r11, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; EGPR-NDD-NEXT: adcq %r26, %r8
-; EGPR-NDD-NEXT: movq %r8, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; EGPR-NDD-NEXT: addq %r12, %r27
+; EGPR-NDD-NEXT: movq %r27, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; EGPR-NDD-NEXT: adcq %r26, %r19
+; EGPR-NDD-NEXT: movq %r19, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; EGPR-NDD-NEXT: adcq $0, %rax
; EGPR-NDD-NEXT: adcq $0, %rdx
-; EGPR-NDD-NEXT: addq %rax, %r19, %r8
-; EGPR-NDD-NEXT: adcq %rdx, %rbx, %r19
-; EGPR-NDD-NEXT: setb %bl
-; EGPR-NDD-NEXT: movq %r10, %r17
+; EGPR-NDD-NEXT: addq %rax, %r8
+; EGPR-NDD-NEXT: adcq %rdx, %rbx
+; EGPR-NDD-NEXT: setb %r19b
+; EGPR-NDD-NEXT: movq %r10, %r16
; EGPR-NDD-NEXT: movq %r10, %rax
; EGPR-NDD-NEXT: mulq %r24
; EGPR-NDD-NEXT: movq %rdx, %r26
@@ -1226,32 +1227,31 @@ define void @test_1024(ptr %a, ptr %b, ptr %out) nounwind {
; EGPR-NDD-NEXT: addq %rax, %r26
; EGPR-NDD-NEXT: adcq $0, %rdx, %r12
; EGPR-NDD-NEXT: movq %r10, %rax
-; EGPR-NDD-NEXT: movq %r18, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; EGPR-NDD-NEXT: mulq %r18
+; EGPR-NDD-NEXT: mulq %r14
; EGPR-NDD-NEXT: addq %rax, %r26
; EGPR-NDD-NEXT: adcq %rdx, %r12
; EGPR-NDD-NEXT: setb %bpl
; EGPR-NDD-NEXT: movq %r9, %rax
-; EGPR-NDD-NEXT: mulq %r18
+; EGPR-NDD-NEXT: mulq %r14
; EGPR-NDD-NEXT: addq %r12, %rax
; EGPR-NDD-NEXT: movzbl %bpl, %r12d
; EGPR-NDD-NEXT: adcq %r12, %rdx
; EGPR-NDD-NEXT: addq %r27, %r8
-; EGPR-NDD-NEXT: adcq %r26, %r19
-; EGPR-NDD-NEXT: movzbl %bl, %r26d
-; EGPR-NDD-NEXT: adcq %r26, %rax
+; EGPR-NDD-NEXT: adcq %r26, %rbx
+; EGPR-NDD-NEXT: movzbl %r19b, %r19d
+; EGPR-NDD-NEXT: adcq %r19, %rax
; EGPR-NDD-NEXT: adcq $0, %rdx
; EGPR-NDD-NEXT: addq %r8, %r25, %r12
-; EGPR-NDD-NEXT: movq 32(%r23), %r26
-; EGPR-NDD-NEXT: adcq %r19, %rcx, %r13
+; EGPR-NDD-NEXT: movq 32(%r15), %r26
+; EGPR-NDD-NEXT: adcq %rbx, %rcx, %r13
; EGPR-NDD-NEXT: adcq %rax, %rsi, %rbp
; EGPR-NDD-NEXT: adcq %rdx, %r28, %rbx
-; EGPR-NDD-NEXT: adcq $0, %r29, %rax
-; EGPR-NDD-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; EGPR-NDD-NEXT: adcq $0, %r29
+; EGPR-NDD-NEXT: movq %r29, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; EGPR-NDD-NEXT: adcq $0, %r30
; EGPR-NDD-NEXT: adcq $0, %r31
-; EGPR-NDD-NEXT: adcq $0, %rdi, %rax
-; EGPR-NDD-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; EGPR-NDD-NEXT: adcq $0, %rdi
+; EGPR-NDD-NEXT: movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; EGPR-NDD-NEXT: movq %r10, %rax
; EGPR-NDD-NEXT: mulq %r26
; EGPR-NDD-NEXT: movq %rdx, %r25
@@ -1259,341 +1259,333 @@ define void @test_1024(ptr %a, ptr %b, ptr %out) nounwind {
; EGPR-NDD-NEXT: movq %r9, %r19
; EGPR-NDD-NEXT: movq %r9, %rax
; EGPR-NDD-NEXT: mulq %r26
-; EGPR-NDD-NEXT: addq %r25, %rax, %rcx
-; EGPR-NDD-NEXT: adcq $0, %rdx, %r8
-; EGPR-NDD-NEXT: movq 40(%r23), %r18
-; EGPR-NDD-NEXT: movq %r23, %r11
+; EGPR-NDD-NEXT: addq %rax, %r25
+; EGPR-NDD-NEXT: adcq $0, %rdx, %rcx
+; EGPR-NDD-NEXT: movq 40(%r15), %r18
; EGPR-NDD-NEXT: movq %r10, %rax
; EGPR-NDD-NEXT: mulq %r18
-; EGPR-NDD-NEXT: addq %rcx, %rax, %rdi
-; EGPR-NDD-NEXT: adcq %rdx, %r8
-; EGPR-NDD-NEXT: setb %r25b
+; EGPR-NDD-NEXT: addq %r25, %rax, %r29
+; EGPR-NDD-NEXT: adcq %rdx, %rcx
+; EGPR-NDD-NEXT: setb %r8b
; EGPR-NDD-NEXT: movq %r9, %rax
; EGPR-NDD-NEXT: mulq %r18
-; EGPR-NDD-NEXT: addq %r8, %rax, %r29
-; EGPR-NDD-NEXT: movzbl %r25b, %eax
+; EGPR-NDD-NEXT: addq %rcx, %rax, %rdi
+; EGPR-NDD-NEXT: movzbl %r8b, %eax
; EGPR-NDD-NEXT: adcq %rax, %rdx, %rsi
-; EGPR-NDD-NEXT: movq %r16, %rax
+; EGPR-NDD-NEXT: movq %r17, %rax
; EGPR-NDD-NEXT: mulq %r26
; EGPR-NDD-NEXT: movq %rdx, %r28
; EGPR-NDD-NEXT: movq %rax, %r25
-; EGPR-NDD-NEXT: movq %r14, %rax
+; EGPR-NDD-NEXT: movq %r11, %r10
+; EGPR-NDD-NEXT: movq %r11, %rax
; EGPR-NDD-NEXT: mulq %r26
; EGPR-NDD-NEXT: addq %r28, %rax, %r8
; EGPR-NDD-NEXT: adcq $0, %rdx, %r28
-; EGPR-NDD-NEXT: movq %r16, %rax
-; EGPR-NDD-NEXT: movq %r16, %r10
+; EGPR-NDD-NEXT: movq %r17, %rax
; EGPR-NDD-NEXT: mulq %r18
; EGPR-NDD-NEXT: addq %r8, %rax, %r23
; EGPR-NDD-NEXT: adcq %rdx, %r28
; EGPR-NDD-NEXT: setb %cl
-; EGPR-NDD-NEXT: movq %r14, %rax
-; EGPR-NDD-NEXT: movq %r14, %r16
+; EGPR-NDD-NEXT: movq %r11, %rax
; EGPR-NDD-NEXT: mulq %r18
; EGPR-NDD-NEXT: addq %r28, %rax
; EGPR-NDD-NEXT: movzbl %cl, %ecx
; EGPR-NDD-NEXT: adcq %rdx, %rcx
; EGPR-NDD-NEXT: addq %rax, %r27
-; EGPR-NDD-NEXT: adcq %rcx, %rdi
-; EGPR-NDD-NEXT: adcq $0, %r29, %r8
+; EGPR-NDD-NEXT: adcq %rcx, %r29, %r8
+; EGPR-NDD-NEXT: adcq $0, %rdi
; EGPR-NDD-NEXT: adcq $0, %rsi, %r9
-; EGPR-NDD-NEXT: movq %r11, %r14
-; EGPR-NDD-NEXT: movq %r11, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; EGPR-NDD-NEXT: movq 48(%r11), %r11
-; EGPR-NDD-NEXT: movq %r10, %rsi
-; EGPR-NDD-NEXT: movq %r10, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; EGPR-NDD-NEXT: movq %r10, %rax
+; EGPR-NDD-NEXT: movq 48(%r15), %r11
+; EGPR-NDD-NEXT: movq %r17, %rsi
+; EGPR-NDD-NEXT: movq %r17, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; EGPR-NDD-NEXT: movq %r17, %rax
; EGPR-NDD-NEXT: mulq %r11
; EGPR-NDD-NEXT: movq %rdx, %r28
; EGPR-NDD-NEXT: movq %rax, %r29
-; EGPR-NDD-NEXT: movq %r16, %rax
-; EGPR-NDD-NEXT: movq %r16, %r10
-; EGPR-NDD-NEXT: movq %r16, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; EGPR-NDD-NEXT: movq %r10, %rax
+; EGPR-NDD-NEXT: movq %r10, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; EGPR-NDD-NEXT: mulq %r11
; EGPR-NDD-NEXT: addq %rax, %r28
; EGPR-NDD-NEXT: adcq $0, %rdx, %rcx
-; EGPR-NDD-NEXT: movq 56(%r14), %r16
+; EGPR-NDD-NEXT: movq 56(%r15), %r17
; EGPR-NDD-NEXT: movq %rsi, %rax
-; EGPR-NDD-NEXT: mulq %r16
+; EGPR-NDD-NEXT: mulq %r17
; EGPR-NDD-NEXT: addq %rax, %r28
; EGPR-NDD-NEXT: adcq %rdx, %rcx
; EGPR-NDD-NEXT: setb %sil
; EGPR-NDD-NEXT: movq %r10, %rax
-; EGPR-NDD-NEXT: mulq %r16
+; EGPR-NDD-NEXT: mulq %r17
; EGPR-NDD-NEXT: addq %rcx, %rax
; EGPR-NDD-NEXT: movzbl %sil, %ecx
; EGPR-NDD-NEXT: adcq %rdx, %rcx
-; EGPR-NDD-NEXT: addq %r27, %r29, %r10
-; EGPR-NDD-NEXT: adcq %r28, %rdi
+; EGPR-NDD-NEXT: addq %r29, %r27
+; EGPR-NDD-NEXT: adcq %r8, %r28, %r10
; EGPR-NDD-NEXT: adcq $0, %rax
; EGPR-NDD-NEXT: adcq $0, %rcx
-; EGPR-NDD-NEXT: addq %rax, %r8
-; EGPR-NDD-NEXT: adcq %rcx, %r9, %rsi
-; EGPR-NDD-NEXT: setb %r9b
-; EGPR-NDD-NEXT: movq %r17, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; EGPR-NDD-NEXT: movq %r17, %rax
+; EGPR-NDD-NEXT: addq %rax, %rdi
+; EGPR-NDD-NEXT: adcq %rcx, %r9, %r8
+; EGPR-NDD-NEXT: setb %sil
+; EGPR-NDD-NEXT: movq %r16, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; EGPR-NDD-NEXT: movq %r16, %rax
; EGPR-NDD-NEXT: mulq %r11
; EGPR-NDD-NEXT: movq %rdx, %r28
; EGPR-NDD-NEXT: movq %rax, %r29
+; EGPR-NDD-NEXT: movq %r19, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; EGPR-NDD-NEXT: movq %r19, %rax
; EGPR-NDD-NEXT: mulq %r11
-; EGPR-NDD-NEXT: addq %r28, %rax, %r27
-; EGPR-NDD-NEXT: adcq $0, %rdx, %r28
-; EGPR-NDD-NEXT: movq %r17, %rax
-; EGPR-NDD-NEXT: mulq %r16
-; EGPR-NDD-NEXT: addq %rax, %r27
-; EGPR-NDD-NEXT: adcq %rdx, %r28
+; EGPR-NDD-NEXT: addq %rax, %r28
+; EGPR-NDD-NEXT: adcq $0, %rdx, %r9
+; EGPR-NDD-NEXT: movq %r16, %rax
+; EGPR-NDD-NEXT: mulq %r17
+; EGPR-NDD-NEXT: addq %rax, %r28
+; EGPR-NDD-NEXT: adcq %rdx, %r9
; EGPR-NDD-NEXT: setb %cl
; EGPR-NDD-NEXT: movq %r19, %rax
-; EGPR-NDD-NEXT: movq %r19, %r17
-; EGPR-NDD-NEXT: mulq %r16
-; EGPR-NDD-NEXT: addq %r28, %rax
+; EGPR-NDD-NEXT: mulq %r17
+; EGPR-NDD-NEXT: addq %r9, %rax
; EGPR-NDD-NEXT: movzbl %cl, %ecx
; EGPR-NDD-NEXT: adcq %rdx, %rcx
-; EGPR-NDD-NEXT: addq %r8, %r29, %rdx
-; EGPR-NDD-NEXT: adcq %r27, %rsi
-; EGPR-NDD-NEXT: movzbl %r9b, %r8d
-; EGPR-NDD-NEXT: adcq %r8, %rax
+; EGPR-NDD-NEXT: addq %r29, %rdi
+; EGPR-NDD-NEXT: adcq %r28, %r8
+; EGPR-NDD-NEXT: movzbl %sil, %edx
+; EGPR-NDD-NEXT: adcq %rdx, %rax
; EGPR-NDD-NEXT: adcq $0, %rcx
-; EGPR-NDD-NEXT: addq %r12, %r25, %r8
-; EGPR-NDD-NEXT: movq %r8, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; EGPR-NDD-NEXT: adcq %r13, %r23, %r8
-; EGPR-NDD-NEXT: movq %r8, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; EGPR-NDD-NEXT: adcq %rbp, %r10, %r8
-; EGPR-NDD-NEXT: movq %r8, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; EGPR-NDD-NEXT: adcq %rbx, %rdi
-; EGPR-NDD-NEXT: movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; EGPR-NDD-NEXT: adcq $0, %rdx
-; EGPR-NDD-NEXT: adcq $0, %rsi
+; EGPR-NDD-NEXT: addq %r12, %r25
+; EGPR-NDD-NEXT: movq %r25, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; EGPR-NDD-NEXT: adcq %r13, %r23, %r19
+; EGPR-NDD-NEXT: movq %r19, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; EGPR-NDD-NEXT: adcq %rbp, %r27
+; EGPR-NDD-NEXT: movq %r27, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; EGPR-NDD-NEXT: adcq %rbx, %r10
+; EGPR-NDD-NEXT: movq %r10, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; EGPR-NDD-NEXT: adcq $0, %rdi
+; EGPR-NDD-NEXT: adcq $0, %r8
; EGPR-NDD-NEXT: adcq $0, %rax
-; EGPR-NDD-NEXT: adcq $0, %rcx, %rdi
-; EGPR-NDD-NEXT: addq %rdx, {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Folded Reload
-; EGPR-NDD-NEXT: adcq %rsi, %r30, %r19
-; EGPR-NDD-NEXT: adcq %rax, %r31, %r30
-; EGPR-NDD-NEXT: adcq %rdi, {{[-0-9]+}}(%r{{[sb]}}p), %r31 # 8-byte Folded Reload
-; EGPR-NDD-NEXT: setb %bpl
-; EGPR-NDD-NEXT: movq %r15, %rax
+; EGPR-NDD-NEXT: adcq $0, %rcx
+; EGPR-NDD-NEXT: addq %rdi, {{[-0-9]+}}(%r{{[sb]}}p), %r19 # 8-byte Folded Reload
+; EGPR-NDD-NEXT: adcq %r8, %r30
+; EGPR-NDD-NEXT: adcq %rax, %r31
+; EGPR-NDD-NEXT: adcq %rcx, {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Folded Reload
+; EGPR-NDD-NEXT: setb %r8b
+; EGPR-NDD-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r13 # 8-byte Reload
+; EGPR-NDD-NEXT: movq %r13, %rax
; EGPR-NDD-NEXT: mulq %r26
; EGPR-NDD-NEXT: movq %rdx, %r25
; EGPR-NDD-NEXT: movq %rax, %r28
-; EGPR-NDD-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r9 # 8-byte Reload
-; EGPR-NDD-NEXT: movq %r9, %rax
+; EGPR-NDD-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r10 # 8-byte Reload
+; EGPR-NDD-NEXT: movq %r10, %rax
; EGPR-NDD-NEXT: mulq %r26
-; EGPR-NDD-NEXT: addq %r25, %rax, %rsi
-; EGPR-NDD-NEXT: adcq $0, %rdx, %rdi
-; EGPR-NDD-NEXT: movq %r15, %rax
-; EGPR-NDD-NEXT: movq %r15, %r13
+; EGPR-NDD-NEXT: addq %rax, %r25
+; EGPR-NDD-NEXT: adcq $0, %rdx, %rsi
+; EGPR-NDD-NEXT: movq %r13, %rax
; EGPR-NDD-NEXT: mulq %r18
-; EGPR-NDD-NEXT: addq %rax, %rsi
-; EGPR-NDD-NEXT: adcq %rdx, %rdi
-; EGPR-NDD-NEXT: setb %r8b
-; EGPR-NDD-NEXT: movq %r9, %rax
-; EGPR-NDD-NEXT: movq %r9, %r23
+; EGPR-NDD-NEXT: addq %r25, %rax, %rdi
+; EGPR-NDD-NEXT: adcq %rdx, %rsi
+; EGPR-NDD-NEXT: setb %r9b
+; EGPR-NDD-NEXT: movq %r10, %rax
+; EGPR-NDD-NEXT: movq %r10, %r16
; EGPR-NDD-NEXT: mulq %r18
-; EGPR-NDD-NEXT: addq %rax, %rdi
-; EGPR-NDD-NEXT: movzbl %r8b, %eax
-; EGPR-NDD-NEXT: adcq %rax, %rdx, %r8
-; EGPR-NDD-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r14 # 8-byte Reload
-; EGPR-NDD-NEXT: movq %r14, %rax
+; EGPR-NDD-NEXT: addq %rax, %rsi
+; EGPR-NDD-NEXT: movzbl %r9b, %eax
+; EGPR-NDD-NEXT: adcq %rax, %rdx, %r9
+; EGPR-NDD-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r23 # 8-byte Reload
+; EGPR-NDD-NEXT: movq %r23, %rax
; EGPR-NDD-NEXT: mulq %r26
; EGPR-NDD-NEXT: movq %rdx, %r29
; EGPR-NDD-NEXT: movq %rax, %r25
-; EGPR-NDD-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r15 # 8-byte Reload
-; EGPR-NDD-NEXT: movq %r15, %rax
+; EGPR-NDD-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r12 # 8-byte Reload
+; EGPR-NDD-NEXT: movq %r12, %rax
; EGPR-NDD-NEXT: mulq %r26
-; EGPR-NDD-NEXT: addq %r29, %rax, %r9
+; EGPR-NDD-NEXT: addq %rax, %r29
; EGPR-NDD-NEXT: adcq $0, %rdx, %r10
-; EGPR-NDD-NEXT: movq %r14, %rax
+; EGPR-NDD-NEXT: movq %r23, %rax
; EGPR-NDD-NEXT: mulq %r18
-; EGPR-NDD-NEXT: addq %r9, %rax, %rbx
-; EGPR-NDD-NEXT: adcq %rdx, %r10, %r9
-; EGPR-NDD-NEXT: setb %r10b
-; EGPR-NDD-NEXT: movq %r15, %rax
+; EGPR-NDD-NEXT: addq %r29, %rax, %rbx
+; EGPR-NDD-NEXT: adcq %rdx, %r10
+; EGPR-NDD-NEXT: setb %r27b
+; EGPR-NDD-NEXT: movq %r12, %rax
; EGPR-NDD-NEXT: mulq %r18
-; EGPR-NDD-NEXT: addq %r9, %rax
-; EGPR-NDD-NEXT: movzbl %r10b, %r9d
-; EGPR-NDD-NEXT: adcq %r9, %rdx
-; EGPR-NDD-NEXT: addq %rax, %r28, %r9
-; EGPR-NDD-NEXT: adcq %rdx, %rsi
-; EGPR-NDD-NEXT: adcq $0, %rdi
-; EGPR-NDD-NEXT: adcq $0, %r8
-; EGPR-NDD-NEXT: movq %r14, %rax
+; EGPR-NDD-NEXT: addq %r10, %rax
+; EGPR-NDD-NEXT: movzbl %r27b, %r10d
+; EGPR-NDD-NEXT: adcq %r10, %rdx
+; EGPR-NDD-NEXT: addq %rax, %r28, %r10
+; EGPR-NDD-NEXT: adcq %rdx, %rdi
+; EGPR-NDD-NEXT: adcq $0, %rsi
+; EGPR-NDD-NEXT: adcq $0, %r9
+; EGPR-NDD-NEXT: movq %r23, %rax
; EGPR-NDD-NEXT: mulq %r11
; EGPR-NDD-NEXT: movq %rdx, %r28
; EGPR-NDD-NEXT: movq %rax, %r29
-; EGPR-NDD-NEXT: movq %r15, %rax
+; EGPR-NDD-NEXT: movq %r12, %rax
; EGPR-NDD-NEXT: mulq %r11
-; EGPR-NDD-NEXT: addq %r28, %rax, %r10
+; EGPR-NDD-NEXT: addq %rax, %r28
; EGPR-NDD-NEXT: adcq $0, %rdx, %r27
-; EGPR-NDD-NEXT: movq %r14, %rax
-; EGPR-NDD-NEXT: mulq %r16
-; EGPR-NDD-NEXT: addq %rax, %r10
+; EGPR-NDD-NEXT: movq %r23, %rax
+; EGPR-NDD-NEXT: mulq %r17
+; EGPR-NDD-NEXT: addq %rax, %r28
; EGPR-NDD-NEXT: adcq %rdx, %r27
-; EGPR-NDD-NEXT: setb %r28b
-; EGPR-NDD-NEXT: movq %r15, %rax
-; EGPR-NDD-NEXT: mulq %r16
+; EGPR-NDD-NEXT: setb %bpl
+; EGPR-NDD-NEXT: movq %r12, %rax
+; EGPR-NDD-NEXT: mulq %r17
; EGPR-NDD-NEXT: addq %r27, %rax
-; EGPR-NDD-NEXT: movzbl %r28b, %r27d
+; EGPR-NDD-NEXT: movzbl %bpl, %r27d
; EGPR-NDD-NEXT: adcq %r27, %rdx
-; EGPR-NDD-NEXT: addq %r29, %r9
-; EGPR-NDD-NEXT: adcq %r10, %rsi
+; EGPR-NDD-NEXT: addq %r29, %r10
+; EGPR-NDD-NEXT: adcq %r28, %rdi
; EGPR-NDD-NEXT: adcq $0, %rax
; EGPR-NDD-NEXT: adcq $0, %rdx
-; EGPR-NDD-NEXT: addq %rax, %rdi
-; EGPR-NDD-NEXT: adcq %rdx, %r8
-; EGPR-NDD-NEXT: setb %r10b
+; EGPR-NDD-NEXT: addq %rax, %rsi
+; EGPR-NDD-NEXT: adcq %rdx, %r9
+; EGPR-NDD-NEXT: setb %r27b
; EGPR-NDD-NEXT: movq %r13, %rax
; EGPR-NDD-NEXT: mulq %r11
; EGPR-NDD-NEXT: movq %rdx, %r28
; EGPR-NDD-NEXT: movq %rax, %r29
-; EGPR-NDD-NEXT: movq %r23, %r14
-; EGPR-NDD-NEXT: movq %r23, %rax
+; EGPR-NDD-NEXT: movq %r16, %rax
; EGPR-NDD-NEXT: mulq %r11
-; EGPR-NDD-NEXT: addq %r28, %rax, %r27
-; EGPR-NDD-NEXT: adcq $0, %rdx, %r28
+; EGPR-NDD-NEXT: addq %rax, %r28
+; EGPR-NDD-NEXT: adcq $0, %rdx, %r12
; EGPR-NDD-NEXT: movq %r13, %rax
-; EGPR-NDD-NEXT: mulq %r16
-; EGPR-NDD-NEXT: addq %rax, %r27
-; EGPR-NDD-NEXT: adcq %rdx, %r28
-; EGPR-NDD-NEXT: setb %r15b
-; EGPR-NDD-NEXT: movq %r23, %rax
-; EGPR-NDD-NEXT: mulq %r16
-; EGPR-NDD-NEXT: addq %r28, %rax
-; EGPR-NDD-NEXT: movzbl %r15b, %r28d
-; EGPR-NDD-NEXT: adcq %r28, %rdx
-; EGPR-NDD-NEXT: addq %r29, %rdi
-; EGPR-NDD-NEXT: adcq %r27, %r8
-; EGPR-NDD-NEXT: movzbl %r10b, %r10d
-; EGPR-NDD-NEXT: adcq %r10, %rax
+; EGPR-NDD-NEXT: mulq %r17
+; EGPR-NDD-NEXT: addq %rax, %r28
+; EGPR-NDD-NEXT: adcq %rdx, %r12
+; EGPR-NDD-NEXT: setb %bpl
+; EGPR-NDD-NEXT: movq %r16, %rax
+; EGPR-NDD-NEXT: mulq %r17
+; EGPR-NDD-NEXT: addq %r12, %rax
+; EGPR-NDD-NEXT: movzbl %bpl, %r12d
+; EGPR-NDD-NEXT: adcq %r12, %rdx
+; EGPR-NDD-NEXT: addq %r29, %rsi
+; EGPR-NDD-NEXT: adcq %r28, %r9
+; EGPR-NDD-NEXT: movzbl %r27b, %r27d
+; EGPR-NDD-NEXT: adcq %r27, %rax
; EGPR-NDD-NEXT: adcq $0, %rdx
-; EGPR-NDD-NEXT: addq %r25, %rcx
-; EGPR-NDD-NEXT: movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; EGPR-NDD-NEXT: adcq %r19, %rbx, %rcx
-; EGPR-NDD-NEXT: movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; EGPR-NDD-NEXT: adcq %r30, %r9, %rcx
-; EGPR-NDD-NEXT: movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; EGPR-NDD-NEXT: adcq %r31, %rsi, %rcx
-; EGPR-NDD-NEXT: movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; EGPR-NDD-NEXT: movzbl %bpl, %ecx
+; EGPR-NDD-NEXT: addq %r25, %r19
+; EGPR-NDD-NEXT: movq %r19, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; EGPR-NDD-NEXT: adcq %rbx, %r30
+; EGPR-NDD-NEXT: movq %r30, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; EGPR-NDD-NEXT: adcq %r31, %r10
+; EGPR-NDD-NEXT: movq %r10, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; EGPR-NDD-NEXT: adcq %rdi, %rcx
; EGPR-NDD-NEXT: movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; EGPR-NDD-NEXT: adcq $0, %r8, %rcx
+; EGPR-NDD-NEXT: movzbl %r8b, %ecx
+; EGPR-NDD-NEXT: adcq %rsi, %rcx
; EGPR-NDD-NEXT: movq %rcx, (%rsp) # 8-byte Spill
+; EGPR-NDD-NEXT: adcq $0, %r9
+; EGPR-NDD-NEXT: movq %r9, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; EGPR-NDD-NEXT: adcq $0, %rax
; EGPR-NDD-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; EGPR-NDD-NEXT: adcq $0, %rdx, %rax
-; EGPR-NDD-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; EGPR-NDD-NEXT: adcq $0, %rdx
+; EGPR-NDD-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; EGPR-NDD-NEXT: movq 64(%r20), %r28
; EGPR-NDD-NEXT: movq %r24, %rax
; EGPR-NDD-NEXT: mulq %r28
; EGPR-NDD-NEXT: movq %rdx, %r25
; EGPR-NDD-NEXT: movq %rax, %r30
-; EGPR-NDD-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r23 # 8-byte Reload
-; EGPR-NDD-NEXT: movq %r23, %rax
+; EGPR-NDD-NEXT: movq %r14, %rax
; EGPR-NDD-NEXT: mulq %r28
-; EGPR-NDD-NEXT: addq %r25, %rax, %rcx
-; EGPR-NDD-NEXT: adcq $0, %rdx, %rsi
+; EGPR-NDD-NEXT: addq %rax, %r25
+; EGPR-NDD-NEXT: adcq $0, %rdx, %rcx
; EGPR-NDD-NEXT: movq 72(%r20), %r29
; EGPR-NDD-NEXT: movq %r24, %rax
; EGPR-NDD-NEXT: mulq %r29
-; EGPR-NDD-NEXT: addq %rax, %rcx
-; EGPR-NDD-NEXT: adcq %rdx, %rsi
-; EGPR-NDD-NEXT: setb %dil
-; EGPR-NDD-NEXT: movq %r23, %rax
+; EGPR-NDD-NEXT: addq %rax, %r25
+; EGPR-NDD-NEXT: adcq %rdx, %rcx
+; EGPR-NDD-NEXT: setb %sil
+; EGPR-NDD-NEXT: movq %r14, %rax
; EGPR-NDD-NEXT: mulq %r29
-; EGPR-NDD-NEXT: addq %rax, %rsi
-; EGPR-NDD-NEXT: movzbl %dil, %eax
-; EGPR-NDD-NEXT: adcq %rax, %rdx, %rdi
+; EGPR-NDD-NEXT: addq %rax, %rcx
+; EGPR-NDD-NEXT: movzbl %sil, %eax
+; EGPR-NDD-NEXT: adcq %rax, %rdx, %rsi
; EGPR-NDD-NEXT: movq %r22, %rax
; EGPR-NDD-NEXT: mulq %r28
; EGPR-NDD-NEXT: movq %rdx, %r31
; EGPR-NDD-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; EGPR-NDD-NEXT: movq %r21, %rax
; EGPR-NDD-NEXT: mulq %r28
-; EGPR-NDD-NEXT: addq %r31, %rax, %r8
-; EGPR-NDD-NEXT: adcq $0, %rdx, %r9
+; EGPR-NDD-NEXT: addq %rax, %r31
+; EGPR-NDD-NEXT: adcq $0, %rdx, %rdi
; EGPR-NDD-NEXT: movq %r22, %rax
; EGPR-NDD-NEXT: mulq %r29
-; EGPR-NDD-NEXT: addq %r8, %rax
+; EGPR-NDD-NEXT: addq %r31, %rax
; EGPR-NDD-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; EGPR-NDD-NEXT: adcq %rdx, %r9, %r8
-; EGPR-NDD-NEXT: setb %r9b
+; EGPR-NDD-NEXT: adcq %rdx, %rdi
+; EGPR-NDD-NEXT: setb %r8b
; EGPR-NDD-NEXT: movq %r21, %rax
; EGPR-NDD-NEXT: mulq %r29
-; EGPR-NDD-NEXT: addq %r8, %rax
-; EGPR-NDD-NEXT: movzbl %r9b, %r8d
-; EGPR-NDD-NEXT: adcq %r8, %rdx
-; EGPR-NDD-NEXT: addq %rax, %r30, %r8
-; EGPR-NDD-NEXT: adcq %rdx, %rcx
+; EGPR-NDD-NEXT: addq %rdi, %rax
+; EGPR-NDD-NEXT: movzbl %r8b, %edi
+; EGPR-NDD-NEXT: adcq %rdi, %rdx
+; EGPR-NDD-NEXT: addq %rax, %r30, %rdi
+; EGPR-NDD-NEXT: adcq %rdx, %r25
+; EGPR-NDD-NEXT: adcq $0, %rcx
; EGPR-NDD-NEXT: adcq $0, %rsi
-; EGPR-NDD-NEXT: adcq $0, %rdi
-; EGPR-NDD-NEXT: movq 80(%r20), %rbx
+; EGPR-NDD-NEXT: movq 80(%r20), %r8
; EGPR-NDD-NEXT: movq %r22, %rax
-; EGPR-NDD-NEXT: mulq %rbx
+; EGPR-NDD-NEXT: mulq %r8
; EGPR-NDD-NEXT: movq %rdx, %r30
; EGPR-NDD-NEXT: movq %rax, %r31
; EGPR-NDD-NEXT: movq %r21, %rax
-; EGPR-NDD-NEXT: mulq %rbx
-; EGPR-NDD-NEXT: addq %r30, %rax, %r9
-; EGPR-NDD-NEXT: adcq $0, %rdx, %r10
-; EGPR-NDD-NEXT: movq 88(%r20), %r15
+; EGPR-NDD-NEXT: mulq %r8
+; EGPR-NDD-NEXT: addq %rax, %r30
+; EGPR-NDD-NEXT: adcq $0, %rdx, %r9
+; EGPR-NDD-NEXT: movq 88(%r20), %rbx
; EGPR-NDD-NEXT: movq %r22, %rax
-; EGPR-NDD-NEXT: mulq %r15
-; EGPR-NDD-NEXT: addq %rax, %r9
-; EGPR-NDD-NEXT: adcq %rdx, %r10
-; EGPR-NDD-NEXT: setb %r19b
+; EGPR-NDD-NEXT: mulq %rbx
+; EGPR-NDD-NEXT: addq %rax, %r30
+; EGPR-NDD-NEXT: adcq %rdx, %r9
+; EGPR-NDD-NEXT: setb %r10b
; EGPR-NDD-NEXT: movq %r21, %rax
-; EGPR-NDD-NEXT: mulq %r15
-; EGPR-NDD-NEXT: addq %r10, %rax
-; EGPR-NDD-NEXT: movzbl %r19b, %r10d
-; EGPR-NDD-NEXT: adcq %r10, %rdx
-; EGPR-NDD-NEXT: addq %r31, %r8
-; EGPR-NDD-NEXT: movq %r8, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; EGPR-NDD-NEXT: adcq %r9, %rcx
-; EGPR-NDD-NEXT: movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; EGPR-NDD-NEXT: mulq %rbx
+; EGPR-NDD-NEXT: addq %r9, %rax
+; EGPR-NDD-NEXT: movzbl %r10b, %r9d
+; EGPR-NDD-NEXT: adcq %r9, %rdx
+; EGPR-NDD-NEXT: addq %r31, %rdi
+; EGPR-NDD-NEXT: movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; EGPR-NDD-NEXT: adcq %r25, %r30, %rbp
; EGPR-NDD-NEXT: adcq $0, %rax
-; EGPR-NDD-NEXT: adcq $0, %rdx, %rcx
-; EGPR-NDD-NEXT: addq %rax, %rsi
-; EGPR-NDD-NEXT: adcq %rdi, %rcx
+; EGPR-NDD-NEXT: adcq $0, %rdx
+; EGPR-NDD-NEXT: addq %rax, %rcx
+; EGPR-NDD-NEXT: adcq %rdx, %rsi
; EGPR-NDD-NEXT: setb %dil
; EGPR-NDD-NEXT: movq %r24, %rax
-; EGPR-NDD-NEXT: mulq %rbx
+; EGPR-NDD-NEXT: mulq %r8
; EGPR-NDD-NEXT: movq %rdx, %r30
; EGPR-NDD-NEXT: movq %rax, %r31
-; EGPR-NDD-NEXT: movq %r23, %rax
-; EGPR-NDD-NEXT: mulq %rbx
-; EGPR-NDD-NEXT: addq %r30, %rax, %r8
+; EGPR-NDD-NEXT: movq %r14, %rax
+; EGPR-NDD-NEXT: mulq %r8
+; EGPR-NDD-NEXT: addq %rax, %r30
; EGPR-NDD-NEXT: adcq $0, %rdx, %r9
; EGPR-NDD-NEXT: movq %r24, %rax
-; EGPR-NDD-NEXT: mulq %r15
-; EGPR-NDD-NEXT: addq %rax, %r8
+; EGPR-NDD-NEXT: mulq %rbx
+; EGPR-NDD-NEXT: addq %rax, %r30
; EGPR-NDD-NEXT: adcq %rdx, %r9
; EGPR-NDD-NEXT: setb %r10b
-; EGPR-NDD-NEXT: movq %r23, %rax
-; EGPR-NDD-NEXT: mulq %r15
+; EGPR-NDD-NEXT: movq %r14, %rax
+; EGPR-NDD-NEXT: mulq %rbx
; EGPR-NDD-NEXT: addq %r9, %rax
; EGPR-NDD-NEXT: movzbl %r10b, %r9d
; EGPR-NDD-NEXT: adcq %r9, %rdx
-; EGPR-NDD-NEXT: addq %rsi, %r31, %r25
-; EGPR-NDD-NEXT: adcq %rcx, %r8, %r19
-; EGPR-NDD-NEXT: movzbl %dil, %ecx
-; EGPR-NDD-NEXT: adcq %rcx, %rax, %r31
-; EGPR-NDD-NEXT: adcq $0, %rdx, %r12
-; EGPR-NDD-NEXT: imulq %r15, %r26, %rcx
+; EGPR-NDD-NEXT: addq %rcx, %r31, %r25
+; EGPR-NDD-NEXT: adcq %rsi, %r30, %r12
+; EGPR-NDD-NEXT: movzbl %dil, %r19d
+; EGPR-NDD-NEXT: adcq %rax, %r19
+; EGPR-NDD-NEXT: adcq $0, %rdx, %r31
+; EGPR-NDD-NEXT: imulq %r26, %rbx
; EGPR-NDD-NEXT: movq %r26, %rax
-; EGPR-NDD-NEXT: mulq %rbx
+; EGPR-NDD-NEXT: mulq %r8
; EGPR-NDD-NEXT: movq %rax, %r30
-; EGPR-NDD-NEXT: addq %rcx, %rdx, %rax
-; EGPR-NDD-NEXT: imulq %rbx, %r18, %rcx
-; EGPR-NDD-NEXT: addq %rax, %rcx
-; EGPR-NDD-NEXT: imulq %r29, %r11, %rsi
+; EGPR-NDD-NEXT: addq %rbx, %rdx
+; EGPR-NDD-NEXT: imulq %r18, %r8
+; EGPR-NDD-NEXT: addq %rdx, %r8
+; EGPR-NDD-NEXT: imulq %r29, %r11, %rcx
; EGPR-NDD-NEXT: movq %r11, %rax
; EGPR-NDD-NEXT: mulq %r28
-; EGPR-NDD-NEXT: addq %rsi, %rdx
-; EGPR-NDD-NEXT: imulq %r28, %r16, %rsi
-; EGPR-NDD-NEXT: addq %rsi, %rdx
+; EGPR-NDD-NEXT: addq %rdx, %rcx
+; EGPR-NDD-NEXT: imulq %r28, %r17, %r16
+; EGPR-NDD-NEXT: addq %r16, %rcx
; EGPR-NDD-NEXT: addq %r30, %rax, %rsi
-; EGPR-NDD-NEXT: adcq %rcx, %rdx, %rdi
+; EGPR-NDD-NEXT: adcq %rcx, %r8
; EGPR-NDD-NEXT: movq %r28, %rax
; EGPR-NDD-NEXT: mulq %r26
; EGPR-NDD-NEXT: movq %rdx, %r30
@@ -1601,215 +1593,215 @@ define void @test_1024(ptr %a, ptr %b, ptr %out) nounwind {
; EGPR-NDD-NEXT: movq %r29, %rax
; EGPR-NDD-NEXT: mulq %r26
; EGPR-NDD-NEXT: addq %r30, %rax, %rcx
-; EGPR-NDD-NEXT: adcq $0, %rdx, %r8
+; EGPR-NDD-NEXT: adcq $0, %rdx, %rdi
; EGPR-NDD-NEXT: movq %r28, %rax
; EGPR-NDD-NEXT: mulq %r18
; EGPR-NDD-NEXT: addq %rax, %rcx
-; EGPR-NDD-NEXT: adcq %rdx, %r8
+; EGPR-NDD-NEXT: adcq %rdx, %rdi
; EGPR-NDD-NEXT: setb %r9b
; EGPR-NDD-NEXT: movq %r29, %rax
; EGPR-NDD-NEXT: mulq %r18
-; EGPR-NDD-NEXT: addq %r8, %rax
-; EGPR-NDD-NEXT: movzbl %r9b, %r8d
-; EGPR-NDD-NEXT: adcq %r8, %rdx
+; EGPR-NDD-NEXT: addq %rdi, %rax
+; EGPR-NDD-NEXT: movzbl %r9b, %edi
+; EGPR-NDD-NEXT: adcq %rdi, %rdx
; EGPR-NDD-NEXT: addq %rax, %rsi
-; EGPR-NDD-NEXT: adcq %rdi, %rdx, %r29
+; EGPR-NDD-NEXT: adcq %rdx, %r8
; EGPR-NDD-NEXT: movq 112(%r20), %rdi
; EGPR-NDD-NEXT: movq %r22, %rax
; EGPR-NDD-NEXT: mulq %rdi
; EGPR-NDD-NEXT: movq %rax, %r26
-; EGPR-NDD-NEXT: imulq %rdi, %r21, %rax
-; EGPR-NDD-NEXT: addq %rdx, %rax
-; EGPR-NDD-NEXT: imulq 120(%r20), %r22, %rdx
-; EGPR-NDD-NEXT: addq %rdx, %rax, %r8
+; EGPR-NDD-NEXT: imulq %r21, %rdi
+; EGPR-NDD-NEXT: addq %rdi, %rdx
+; EGPR-NDD-NEXT: imulq 120(%r20), %r22, %rax
+; EGPR-NDD-NEXT: addq %rax, %rdx, %r9
; EGPR-NDD-NEXT: movq 96(%r20), %r28
; EGPR-NDD-NEXT: movq 104(%r20), %rdi
-; EGPR-NDD-NEXT: imulq %rdi, %r24, %r9
+; EGPR-NDD-NEXT: imulq %rdi, %r24, %r10
; EGPR-NDD-NEXT: movq %r24, %rax
; EGPR-NDD-NEXT: mulq %r28
-; EGPR-NDD-NEXT: addq %r9, %rdx
-; EGPR-NDD-NEXT: imulq %r28, %r23, %r9
-; EGPR-NDD-NEXT: addq %r9, %rdx
-; EGPR-NDD-NEXT: addq %r26, %rax, %r9
-; EGPR-NDD-NEXT: adcq %rdx, %r8
+; EGPR-NDD-NEXT: addq %r10, %rdx
+; EGPR-NDD-NEXT: imulq %r28, %r14, %r23
+; EGPR-NDD-NEXT: addq %r23, %rdx
+; EGPR-NDD-NEXT: addq %rax, %r26
+; EGPR-NDD-NEXT: adcq %rdx, %r9
; EGPR-NDD-NEXT: movq %r28, %rax
; EGPR-NDD-NEXT: mulq %r22
; EGPR-NDD-NEXT: movq %rdx, %r23
; EGPR-NDD-NEXT: movq %rax, %r24
; EGPR-NDD-NEXT: movq %rdi, %rax
; EGPR-NDD-NEXT: mulq %r22
-; EGPR-NDD-NEXT: addq %r23, %rax, %r10
-; EGPR-NDD-NEXT: adcq $0, %rdx, %r11
+; EGPR-NDD-NEXT: addq %rax, %r23
+; EGPR-NDD-NEXT: adcq $0, %rdx, %r10
; EGPR-NDD-NEXT: movq %r28, %rax
; EGPR-NDD-NEXT: mulq %r21
-; EGPR-NDD-NEXT: addq %rax, %r10
-; EGPR-NDD-NEXT: adcq %rdx, %r11
-; EGPR-NDD-NEXT: setb %r16b
+; EGPR-NDD-NEXT: addq %rax, %r23
+; EGPR-NDD-NEXT: adcq %rdx, %r10
+; EGPR-NDD-NEXT: setb %r11b
; EGPR-NDD-NEXT: movq %rdi, %rax
; EGPR-NDD-NEXT: mulq %r21
-; EGPR-NDD-NEXT: addq %r11, %rax
-; EGPR-NDD-NEXT: movzbl %r16b, %edi
+; EGPR-NDD-NEXT: addq %r10, %rax
+; EGPR-NDD-NEXT: movzbl %r11b, %edi
; EGPR-NDD-NEXT: adcq %rdi, %rdx
-; EGPR-NDD-NEXT: addq %r9, %rax
-; EGPR-NDD-NEXT: adcq %r8, %rdx
-; EGPR-NDD-NEXT: addq %r27, %r24, %rdi
-; EGPR-NDD-NEXT: adcq %r10, %rcx
+; EGPR-NDD-NEXT: addq %r26, %rax
+; EGPR-NDD-NEXT: adcq %r9, %rdx
+; EGPR-NDD-NEXT: addq %r27, %r24
+; EGPR-NDD-NEXT: adcq %r23, %rcx
; EGPR-NDD-NEXT: adcq %rsi, %rax
-; EGPR-NDD-NEXT: adcq %r29, %rdx
-; EGPR-NDD-NEXT: addq %rdi, %r25, %r15
-; EGPR-NDD-NEXT: adcq %rcx, %r19, %rbx
-; EGPR-NDD-NEXT: adcq %rax, %r31, %rbp
-; EGPR-NDD-NEXT: adcq %rdx, %r12, %r30
-; EGPR-NDD-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r18 # 8-byte Reload
-; EGPR-NDD-NEXT: movq 80(%r18), %r22
+; EGPR-NDD-NEXT: adcq %r8, %rdx
+; EGPR-NDD-NEXT: addq %r24, %r25, %rbx
+; EGPR-NDD-NEXT: adcq %rcx, %r12
+; EGPR-NDD-NEXT: adcq %rax, %r19, %r13
+; EGPR-NDD-NEXT: adcq %rdx, %r31, %r30
+; EGPR-NDD-NEXT: movq 80(%r15), %r22
; EGPR-NDD-NEXT: movq %r22, %rax
-; EGPR-NDD-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r21 # 8-byte Reload
-; EGPR-NDD-NEXT: mulq %r21
+; EGPR-NDD-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r16 # 8-byte Reload
+; EGPR-NDD-NEXT: mulq %r16
; EGPR-NDD-NEXT: movq %rax, %r26
; EGPR-NDD-NEXT: movq %rdx, %rdi
-; EGPR-NDD-NEXT: movq 88(%r18), %r20
+; EGPR-NDD-NEXT: movq 88(%r15), %r20
; EGPR-NDD-NEXT: movq %r20, %rax
-; EGPR-NDD-NEXT: mulq %r21
-; EGPR-NDD-NEXT: addq %rdi, %rax, %rcx
-; EGPR-NDD-NEXT: adcq $0, %rdx, %rsi
+; EGPR-NDD-NEXT: mulq %r16
+; EGPR-NDD-NEXT: addq %rax, %rdi
+; EGPR-NDD-NEXT: adcq $0, %rdx, %rcx
; EGPR-NDD-NEXT: movq %r22, %rax
-; EGPR-NDD-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r12 # 8-byte Reload
-; EGPR-NDD-NEXT: mulq %r12
-; EGPR-NDD-NEXT: addq %rax, %rcx
-; EGPR-NDD-NEXT: adcq %rdx, %rsi
-; EGPR-NDD-NEXT: setb %dil
+; EGPR-NDD-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r21 # 8-byte Reload
+; EGPR-NDD-NEXT: mulq %r21
+; EGPR-NDD-NEXT: addq %rax, %rdi
+; EGPR-NDD-NEXT: adcq %rdx, %rcx
+; EGPR-NDD-NEXT: setb %sil
; EGPR-NDD-NEXT: movq %r20, %rax
-; EGPR-NDD-NEXT: mulq %r12
-; EGPR-NDD-NEXT: addq %rax, %rsi
-; EGPR-NDD-NEXT: movzbl %dil, %eax
-; EGPR-NDD-NEXT: adcq %rax, %rdx, %rdi
-; EGPR-NDD-NEXT: movq 64(%r18), %r24
-; EGPR-NDD-NEXT: movq %r24, %rax
; EGPR-NDD-NEXT: mulq %r21
+; EGPR-NDD-NEXT: addq %rax, %rcx
+; EGPR-NDD-NEXT: movzbl %sil, %eax
+; EGPR-NDD-NEXT: adcq %rax, %rdx, %rsi
+; EGPR-NDD-NEXT: movq 64(%r15), %r24
+; EGPR-NDD-NEXT: movq %r24, %rax
+; EGPR-NDD-NEXT: mulq %r16
; EGPR-NDD-NEXT: movq %rax, %r29
; EGPR-NDD-NEXT: movq %rdx, %r27
-; EGPR-NDD-NEXT: movq 72(%r18), %r23
+; EGPR-NDD-NEXT: movq 72(%r15), %r23
; EGPR-NDD-NEXT: movq %r23, %rax
-; EGPR-NDD-NEXT: mulq %r21
-; EGPR-NDD-NEXT: addq %r27, %rax, %r8
-; EGPR-NDD-NEXT: adcq $0, %rdx, %r9
+; EGPR-NDD-NEXT: mulq %r16
+; EGPR-NDD-NEXT: addq %rax, %r27
+; EGPR-NDD-NEXT: adcq $0, %rdx, %r8
; EGPR-NDD-NEXT: movq %r24, %rax
-; EGPR-NDD-NEXT: mulq %r12
-; EGPR-NDD-NEXT: addq %r8, %rax, %r31
-; EGPR-NDD-NEXT: adcq %rdx, %r9, %r8
+; EGPR-NDD-NEXT: mulq %r21
+; EGPR-NDD-NEXT: addq %r27, %rax, %r31
+; EGPR-NDD-NEXT: adcq %rdx, %r8
; EGPR-NDD-NEXT: setb %r9b
; EGPR-NDD-NEXT: movq %r23, %rax
-; EGPR-NDD-NEXT: mulq %r12
+; EGPR-NDD-NEXT: mulq %r21
; EGPR-NDD-NEXT: addq %r8, %rax
; EGPR-NDD-NEXT: movzbl %r9b, %r8d
; EGPR-NDD-NEXT: adcq %r8, %rdx
-; EGPR-NDD-NEXT: addq %rax, %r26, %r8
-; EGPR-NDD-NEXT: adcq %rdx, %rcx
+; EGPR-NDD-NEXT: addq %rax, %r26, %r28
+; EGPR-NDD-NEXT: adcq %rdx, %rdi
+; EGPR-NDD-NEXT: adcq $0, %rcx
; EGPR-NDD-NEXT: adcq $0, %rsi
-; EGPR-NDD-NEXT: adcq $0, %rdi
; EGPR-NDD-NEXT: movq %r24, %rax
-; EGPR-NDD-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r16 # 8-byte Reload
-; EGPR-NDD-NEXT: mulq %r16
+; EGPR-NDD-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r10 # 8-byte Reload
+; EGPR-NDD-NEXT: mulq %r10
; EGPR-NDD-NEXT: movq %rdx, %r26
; EGPR-NDD-NEXT: movq %rax, %r27
; EGPR-NDD-NEXT: movq %r23, %rax
-; EGPR-NDD-NEXT: mulq %r16
-; EGPR-NDD-NEXT: addq %r26, %rax, %r9
-; EGPR-NDD-NEXT: adcq $0, %rdx, %r10
+; EGPR-NDD-NEXT: mulq %r10
+; EGPR-NDD-NEXT: addq %rax, %r26
+; EGPR-NDD-NEXT: adcq $0, %rdx, %r8
; EGPR-NDD-NEXT: movq %r24, %rax
-; EGPR-NDD-NEXT: mulq %r17
-; EGPR-NDD-NEXT: addq %rax, %r9
-; EGPR-NDD-NEXT: adcq %rdx, %r10
-; EGPR-NDD-NEXT: setb %r11b
+; EGPR-NDD-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r11 # 8-byte Reload
+; EGPR-NDD-NEXT: mulq %r11
+; EGPR-NDD-NEXT: addq %r26, %rax, %r25
+; EGPR-NDD-NEXT: adcq %rdx, %r8
+; EGPR-NDD-NEXT: setb %r9b
; EGPR-NDD-NEXT: movq %r23, %rax
-; EGPR-NDD-NEXT: mulq %r17
-; EGPR-NDD-NEXT: addq %r10, %rax
-; EGPR-NDD-NEXT: movzbl %r11b, %r10d
-; EGPR-NDD-NEXT: adcq %r10, %rdx
-; EGPR-NDD-NEXT: addq %r8, %r27, %r28
-; EGPR-NDD-NEXT: adcq %rcx, %r9, %r25
+; EGPR-NDD-NEXT: mulq %r11
+; EGPR-NDD-NEXT: addq %r8, %rax
+; EGPR-NDD-NEXT: movzbl %r9b, %r8d
+; EGPR-NDD-NEXT: adcq %r8, %rdx
+; EGPR-NDD-NEXT: addq %r27, %r28
+; EGPR-NDD-NEXT: adcq %rdi, %r25
; EGPR-NDD-NEXT: adcq $0, %rax
-; EGPR-NDD-NEXT: adcq $0, %rdx, %rcx
-; EGPR-NDD-NEXT: addq %rax, %rsi
-; EGPR-NDD-NEXT: adcq %rdi, %rcx
+; EGPR-NDD-NEXT: adcq $0, %rdx
+; EGPR-NDD-NEXT: addq %rax, %rcx
+; EGPR-NDD-NEXT: adcq %rdx, %rsi
; EGPR-NDD-NEXT: setb %dil
; EGPR-NDD-NEXT: movq %r22, %rax
-; EGPR-NDD-NEXT: mulq %r16
+; EGPR-NDD-NEXT: mulq %r10
; EGPR-NDD-NEXT: movq %rdx, %r26
; EGPR-NDD-NEXT: movq %rax, %r27
; EGPR-NDD-NEXT: movq %r20, %rax
-; EGPR-NDD-NEXT: mulq %r16
-; EGPR-NDD-NEXT: addq %r26, %rax, %r8
-; EGPR-NDD-NEXT: adcq $0, %rdx, %r9
+; EGPR-NDD-NEXT: mulq %r10
+; EGPR-NDD-NEXT: addq %rax, %r26
+; EGPR-NDD-NEXT: adcq $0, %rdx, %r8
; EGPR-NDD-NEXT: movq %r22, %rax
-; EGPR-NDD-NEXT: mulq %r17
-; EGPR-NDD-NEXT: addq %rax, %r8
-; EGPR-NDD-NEXT: adcq %rdx, %r9
-; EGPR-NDD-NEXT: setb %r10b
+; EGPR-NDD-NEXT: mulq %r11
+; EGPR-NDD-NEXT: addq %r26, %rax, %r19
+; EGPR-NDD-NEXT: adcq %rdx, %r8
+; EGPR-NDD-NEXT: setb %r9b
; EGPR-NDD-NEXT: movq %r20, %rax
-; EGPR-NDD-NEXT: mulq %r17
-; EGPR-NDD-NEXT: addq %r9, %rax
-; EGPR-NDD-NEXT: movzbl %r10b, %r9d
-; EGPR-NDD-NEXT: adcq %r9, %rdx
-; EGPR-NDD-NEXT: addq %rsi, %r27
-; EGPR-NDD-NEXT: adcq %rcx, %r8, %r19
+; EGPR-NDD-NEXT: mulq %r11
+; EGPR-NDD-NEXT: addq %r8, %rax
+; EGPR-NDD-NEXT: movzbl %r9b, %r8d
+; EGPR-NDD-NEXT: adcq %r8, %rdx
+; EGPR-NDD-NEXT: addq %rcx, %r27
+; EGPR-NDD-NEXT: adcq %rsi, %r19
; EGPR-NDD-NEXT: movzbl %dil, %ecx
; EGPR-NDD-NEXT: adcq %rax, %rcx
; EGPR-NDD-NEXT: adcq $0, %rdx, %rdi
-; EGPR-NDD-NEXT: movq %r18, %r9
-; EGPR-NDD-NEXT: movq 96(%r18), %r26
-; EGPR-NDD-NEXT: imulq %r17, %r26, %rsi
+; EGPR-NDD-NEXT: movq 96(%r15), %r26
+; EGPR-NDD-NEXT: imulq %r11, %r26, %rsi
; EGPR-NDD-NEXT: movq %r26, %rax
-; EGPR-NDD-NEXT: mulq %r16
+; EGPR-NDD-NEXT: mulq %r10
; EGPR-NDD-NEXT: movq %rax, %r18
-; EGPR-NDD-NEXT: addq %rsi, %rdx, %rax
-; EGPR-NDD-NEXT: movq 104(%r9), %r8
-; EGPR-NDD-NEXT: imulq %r16, %r8, %rdx
-; EGPR-NDD-NEXT: addq %rdx, %rax, %rsi
-; EGPR-NDD-NEXT: movq 112(%r9), %rax
-; EGPR-NDD-NEXT: movq %r9, %r11
-; EGPR-NDD-NEXT: imulq %r12, %rax, %r9
-; EGPR-NDD-NEXT: mulq %r21
+; EGPR-NDD-NEXT: addq %rsi, %rdx
+; EGPR-NDD-NEXT: movq 104(%r15), %r8
+; EGPR-NDD-NEXT: imulq %r10, %r8, %rax
+; EGPR-NDD-NEXT: addq %rax, %rdx, %rsi
+; EGPR-NDD-NEXT: movq 112(%r15), %rax
+; EGPR-NDD-NEXT: imulq %r21, %rax, %r9
+; EGPR-NDD-NEXT: mulq %r16
; EGPR-NDD-NEXT: addq %r9, %rdx
-; EGPR-NDD-NEXT: imulq 120(%r11), %r21, %r9
+; EGPR-NDD-NEXT: imulq 120(%r15), %r16, %r9
; EGPR-NDD-NEXT: addq %r9, %rdx
-; EGPR-NDD-NEXT: addq %r18, %rax, %r9
-; EGPR-NDD-NEXT: adcq %rsi, %rdx, %r16
-; EGPR-NDD-NEXT: movq %r21, %rax
+; EGPR-NDD-NEXT: addq %r18, %rax, %r10
+; EGPR-NDD-NEXT: adcq %rsi, %rdx, %r9
+; EGPR-NDD-NEXT: movq %r16, %rax
+; EGPR-NDD-NEXT: movq %r16, %r18
; EGPR-NDD-NEXT: mulq %r26
; EGPR-NDD-NEXT: movq %rdx, %r17
; EGPR-NDD-NEXT: movq %rax, %rsi
-; EGPR-NDD-NEXT: movq %r12, %rax
-; EGPR-NDD-NEXT: mulq %r26
-; EGPR-NDD-NEXT: addq %r17, %rax, %r10
-; EGPR-NDD-NEXT: adcq $0, %rdx, %r17
; EGPR-NDD-NEXT: movq %r21, %rax
+; EGPR-NDD-NEXT: mulq %r26
+; EGPR-NDD-NEXT: addq %r17, %rax, %r11
+; EGPR-NDD-NEXT: adcq $0, %rdx, %r16
+; EGPR-NDD-NEXT: movq %r18, %rax
; EGPR-NDD-NEXT: mulq %r8
-; EGPR-NDD-NEXT: addq %r10, %rax, %r11
-; EGPR-NDD-NEXT: adcq %rdx, %r17, %r10
+; EGPR-NDD-NEXT: addq %rax, %r11
+; EGPR-NDD-NEXT: adcq %rdx, %r16
; EGPR-NDD-NEXT: setb %r17b
-; EGPR-NDD-NEXT: movq %r12, %rax
+; EGPR-NDD-NEXT: movq %r21, %rax
; EGPR-NDD-NEXT: mulq %r8
-; EGPR-NDD-NEXT: addq %r10, %rax
+; EGPR-NDD-NEXT: addq %r16, %rax
; EGPR-NDD-NEXT: movzbl %r17b, %r8d
; EGPR-NDD-NEXT: adcq %r8, %rdx
-; EGPR-NDD-NEXT: addq %r9, %rax, %r10
-; EGPR-NDD-NEXT: adcq %r16, %rdx, %r17
-; EGPR-NDD-NEXT: imulq %r14, %r24, %r8
+; EGPR-NDD-NEXT: addq %rax, %r10
+; EGPR-NDD-NEXT: adcq %r9, %rdx, %r17
+; EGPR-NDD-NEXT: imulq {{[-0-9]+}}(%r{{[sb]}}p), %r24, %r8 # 8-byte Folded Reload
; EGPR-NDD-NEXT: movq %r24, %rax
-; EGPR-NDD-NEXT: mulq %r13
+; EGPR-NDD-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r16 # 8-byte Reload
+; EGPR-NDD-NEXT: mulq %r16
; EGPR-NDD-NEXT: movq %rax, %r9
-; EGPR-NDD-NEXT: addq %r8, %rdx, %rax
-; EGPR-NDD-NEXT: imulq %r13, %r23, %rdx
-; EGPR-NDD-NEXT: addq %rdx, %rax, %r8
+; EGPR-NDD-NEXT: addq %r8, %rdx
+; EGPR-NDD-NEXT: imulq %r16, %r23, %rax
+; EGPR-NDD-NEXT: addq %rax, %rdx, %r8
; EGPR-NDD-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r21 # 8-byte Reload
; EGPR-NDD-NEXT: imulq %r21, %r22, %r16
; EGPR-NDD-NEXT: movq %r22, %rax
; EGPR-NDD-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r26 # 8-byte Reload
; EGPR-NDD-NEXT: mulq %r26
; EGPR-NDD-NEXT: addq %r16, %rdx
-; EGPR-NDD-NEXT: imulq %r26, %r20, %r16
-; EGPR-NDD-NEXT: addq %r16, %rdx
+; EGPR-NDD-NEXT: imulq %r26, %r20
+; EGPR-NDD-NEXT: addq %r20, %rdx
; EGPR-NDD-NEXT: addq %r9, %rax, %r16
; EGPR-NDD-NEXT: adcq %r8, %rdx, %r18
; EGPR-NDD-NEXT: movq %r26, %rax
@@ -1840,49 +1832,49 @@ define void @test_1024(ptr %a, ptr %b, ptr %out) nounwind {
; EGPR-NDD-NEXT: addq %r27, %rsi
; EGPR-NDD-NEXT: adcq %r19, %r8
; EGPR-NDD-NEXT: adcq %rcx, %rax
-; EGPR-NDD-NEXT: adcq %rdx, %rdi, %rcx
-; EGPR-NDD-NEXT: addq {{[-0-9]+}}(%r{{[sb]}}p), %r29, %rdx # 8-byte Folded Reload
-; EGPR-NDD-NEXT: adcq {{[-0-9]+}}(%r{{[sb]}}p), %r31, %rdi # 8-byte Folded Reload
-; EGPR-NDD-NEXT: adcq {{[-0-9]+}}(%r{{[sb]}}p), %r28, %r9 # 8-byte Folded Reload
-; EGPR-NDD-NEXT: adcq {{[-0-9]+}}(%r{{[sb]}}p), %r25, %r10 # 8-byte Folded Reload
-; EGPR-NDD-NEXT: adcq %r15, %rsi
-; EGPR-NDD-NEXT: adcq %rbx, %r8
-; EGPR-NDD-NEXT: adcq %rbp, %rax
-; EGPR-NDD-NEXT: adcq %r30, %rcx
-; EGPR-NDD-NEXT: addq %rdx, {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Folded Reload
-; EGPR-NDD-NEXT: adcq %rdi, {{[-0-9]+}}(%r{{[sb]}}p), %rdi # 8-byte Folded Reload
-; EGPR-NDD-NEXT: adcq %r9, {{[-0-9]+}}(%r{{[sb]}}p), %r9 # 8-byte Folded Reload
-; EGPR-NDD-NEXT: adcq %r10, {{[-0-9]+}}(%r{{[sb]}}p), %r10 # 8-byte Folded Reload
-; EGPR-NDD-NEXT: adcq %rsi, {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Folded Reload
-; EGPR-NDD-NEXT: adcq %r8, (%rsp), %r8 # 8-byte Folded Reload
+; EGPR-NDD-NEXT: adcq %rdi, %rdx
+; EGPR-NDD-NEXT: addq {{[-0-9]+}}(%r{{[sb]}}p), %r29 # 8-byte Folded Reload
+; EGPR-NDD-NEXT: adcq {{[-0-9]+}}(%r{{[sb]}}p), %r31 # 8-byte Folded Reload
+; EGPR-NDD-NEXT: adcq {{[-0-9]+}}(%r{{[sb]}}p), %r28 # 8-byte Folded Reload
+; EGPR-NDD-NEXT: adcq %rbp, %r25
+; EGPR-NDD-NEXT: adcq %rbx, %rsi
+; EGPR-NDD-NEXT: adcq %r12, %r8
+; EGPR-NDD-NEXT: adcq %r13, %rax
+; EGPR-NDD-NEXT: adcq %r30, %rdx
+; EGPR-NDD-NEXT: addq %r29, {{[-0-9]+}}(%r{{[sb]}}p), %r29 # 8-byte Folded Reload
+; EGPR-NDD-NEXT: adcq %r31, {{[-0-9]+}}(%r{{[sb]}}p), %r31 # 8-byte Folded Reload
+; EGPR-NDD-NEXT: adcq %r28, {{[-0-9]+}}(%r{{[sb]}}p), %r28 # 8-byte Folded Reload
+; EGPR-NDD-NEXT: adcq %r25, {{[-0-9]+}}(%r{{[sb]}}p), %r25 # 8-byte Folded Reload
+; EGPR-NDD-NEXT: adcq %rsi, (%rsp), %rsi # 8-byte Folded Reload
+; EGPR-NDD-NEXT: adcq %r8, {{[-0-9]+}}(%r{{[sb]}}p), %r8 # 8-byte Folded Reload
; EGPR-NDD-NEXT: adcq %rax, {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Folded Reload
-; EGPR-NDD-NEXT: adcq %rcx, {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Folded Reload
-; EGPR-NDD-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r11 # 8-byte Reload
-; EGPR-NDD-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r16 # 8-byte Reload
-; EGPR-NDD-NEXT: movq %r16, (%r11)
-; EGPR-NDD-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r16 # 8-byte Reload
-; EGPR-NDD-NEXT: movq %r16, 8(%r11)
-; EGPR-NDD-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r16 # 8-byte Reload
-; EGPR-NDD-NEXT: movq %r16, 16(%r11)
-; EGPR-NDD-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r16 # 8-byte Reload
-; EGPR-NDD-NEXT: movq %r16, 24(%r11)
-; EGPR-NDD-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r16 # 8-byte Reload
-; EGPR-NDD-NEXT: movq %r16, 32(%r11)
-; EGPR-NDD-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r16 # 8-byte Reload
-; EGPR-NDD-NEXT: movq %r16, 40(%r11)
-; EGPR-NDD-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r16 # 8-byte Reload
-; EGPR-NDD-NEXT: movq %r16, 48(%r11)
-; EGPR-NDD-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r16 # 8-byte Reload
-; EGPR-NDD-NEXT: movq %r16, 56(%r11)
-; EGPR-NDD-NEXT: movq %rdx, 64(%r11)
-; EGPR-NDD-NEXT: movq %rdi, 72(%r11)
-; EGPR-NDD-NEXT: movq %r9, 80(%r11)
-; EGPR-NDD-NEXT: movq %r10, 88(%r11)
-; EGPR-NDD-NEXT: movq %rsi, 96(%r11)
-; EGPR-NDD-NEXT: movq %r8, 104(%r11)
-; EGPR-NDD-NEXT: movq %rax, 112(%r11)
-; EGPR-NDD-NEXT: movq %rcx, 120(%r11)
-; EGPR-NDD-NEXT: addq $104, %rsp
+; EGPR-NDD-NEXT: adcq %rdx, {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Folded Reload
+; EGPR-NDD-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload
+; EGPR-NDD-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdi # 8-byte Reload
+; EGPR-NDD-NEXT: movq %rdi, (%rcx)
+; EGPR-NDD-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdi # 8-byte Reload
+; EGPR-NDD-NEXT: movq %rdi, 8(%rcx)
+; EGPR-NDD-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdi # 8-byte Reload
+; EGPR-NDD-NEXT: movq %rdi, 16(%rcx)
+; EGPR-NDD-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdi # 8-byte Reload
+; EGPR-NDD-NEXT: movq %rdi, 24(%rcx)
+; EGPR-NDD-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdi # 8-byte Reload
+; EGPR-NDD-NEXT: movq %rdi, 32(%rcx)
+; EGPR-NDD-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdi # 8-byte Reload
+; EGPR-NDD-NEXT: movq %rdi, 40(%rcx)
+; EGPR-NDD-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdi # 8-byte Reload
+; EGPR-NDD-NEXT: movq %rdi, 48(%rcx)
+; EGPR-NDD-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdi # 8-byte Reload
+; EGPR-NDD-NEXT: movq %rdi, 56(%rcx)
+; EGPR-NDD-NEXT: movq %r29, 64(%rcx)
+; EGPR-NDD-NEXT: movq %r31, 72(%rcx)
+; EGPR-NDD-NEXT: movq %r28, 80(%rcx)
+; EGPR-NDD-NEXT: movq %r25, 88(%rcx)
+; EGPR-NDD-NEXT: movq %rsi, 96(%rcx)
+; EGPR-NDD-NEXT: movq %r8, 104(%rcx)
+; EGPR-NDD-NEXT: movq %rax, 112(%rcx)
+; EGPR-NDD-NEXT: movq %rdx, 120(%rcx)
+; EGPR-NDD-NEXT: addq $96, %rsp
; EGPR-NDD-NEXT: popq %rbx
; EGPR-NDD-NEXT: popq %r12
; EGPR-NDD-NEXT: popq %r13
diff --git a/llvm/test/CodeGen/X86/apx/or.ll b/llvm/test/CodeGen/X86/apx/or.ll
index 6a3db295c8c15..e51ba9d9bf039 100644
--- a/llvm/test/CodeGen/X86/apx/or.ll
+++ b/llvm/test/CodeGen/X86/apx/or.ll
@@ -478,17 +478,17 @@ define i1 @orflag16rr(i16 %a, i16 %b) {
define i1 @orflag32rr(i32 %a, i32 %b) {
; CHECK-LABEL: orflag32rr:
; CHECK: # %bb.0:
-; CHECK-NEXT: orl %esi, %edi, %ecx # encoding: [0x62,0xf4,0x74,0x18,0x09,0xf7]
+; CHECK-NEXT: orl %edi, %esi # EVEX TO LEGACY Compression encoding: [0x09,0xfe]
; CHECK-NEXT: sete %al # encoding: [0x0f,0x94,0xc0]
-; CHECK-NEXT: movl %ecx, d64(%rip) # encoding: [0x89,0x0d,A,A,A,A]
+; CHECK-NEXT: movl %esi, d64(%rip) # encoding: [0x89,0x35,A,A,A,A]
; CHECK-NEXT: # fixup A - offset: 2, value: d64-4, kind: reloc_riprel_4byte
; CHECK-NEXT: retq # encoding: [0xc3]
;
; NF-LABEL: orflag32rr:
; NF: # %bb.0:
-; NF-NEXT: orl %esi, %edi, %ecx # encoding: [0x62,0xf4,0x74,0x18,0x09,0xf7]
+; NF-NEXT: orl %edi, %esi # EVEX TO LEGACY Compression encoding: [0x09,0xfe]
; NF-NEXT: sete %al # encoding: [0x0f,0x94,0xc0]
-; NF-NEXT: movl %ecx, d64(%rip) # encoding: [0x89,0x0d,A,A,A,A]
+; NF-NEXT: movl %esi, d64(%rip) # encoding: [0x89,0x35,A,A,A,A]
; NF-NEXT: # fixup A - offset: 2, value: d64-4, kind: reloc_riprel_4byte
; NF-NEXT: retq # encoding: [0xc3]
%v0 = or i32 %a, %b ; 0xff << 50
@@ -500,17 +500,17 @@ define i1 @orflag32rr(i32 %a, i32 %b) {
define i1 @orflag64rr(i64 %a, i64 %b) {
; CHECK-LABEL: orflag64rr:
; CHECK: # %bb.0:
-; CHECK-NEXT: orq %rsi, %rdi, %rcx # encoding: [0x62,0xf4,0xf4,0x18,0x09,0xf7]
+; CHECK-NEXT: orq %rdi, %rsi # EVEX TO LEGACY Compression encoding: [0x48,0x09,0xfe]
; CHECK-NEXT: sete %al # encoding: [0x0f,0x94,0xc0]
-; CHECK-NEXT: movq %rcx, d64(%rip) # encoding: [0x48,0x89,0x0d,A,A,A,A]
+; CHECK-NEXT: movq %rsi, d64(%rip) # encoding: [0x48,0x89,0x35,A,A,A,A]
; CHECK-NEXT: # fixup A - offset: 3, value: d64-4, kind: reloc_riprel_4byte
; CHECK-NEXT: retq # encoding: [0xc3]
;
; NF-LABEL: orflag64rr:
; NF: # %bb.0:
-; NF-NEXT: orq %rsi, %rdi, %rcx # encoding: [0x62,0xf4,0xf4,0x18,0x09,0xf7]
+; NF-NEXT: orq %rdi, %rsi # EVEX TO LEGACY Compression encoding: [0x48,0x09,0xfe]
; NF-NEXT: sete %al # encoding: [0x0f,0x94,0xc0]
-; NF-NEXT: movq %rcx, d64(%rip) # encoding: [0x48,0x89,0x0d,A,A,A,A]
+; NF-NEXT: movq %rsi, d64(%rip) # encoding: [0x48,0x89,0x35,A,A,A,A]
; NF-NEXT: # fixup A - offset: 3, value: d64-4, kind: reloc_riprel_4byte
; NF-NEXT: retq # encoding: [0xc3]
%v0 = or i64 %a, %b ; 0xff << 50
@@ -574,17 +574,17 @@ define i1 @orflag16rm(ptr %ptr, i16 %b) {
define i1 @orflag32rm(ptr %ptr, i32 %b) {
; CHECK-LABEL: orflag32rm:
; CHECK: # %bb.0:
-; CHECK-NEXT: orl (%rdi), %esi, %ecx # encoding: [0x62,0xf4,0x74,0x18,0x0b,0x37]
+; CHECK-NEXT: orl (%rdi), %esi # EVEX TO LEGACY Compression encoding: [0x0b,0x37]
; CHECK-NEXT: sete %al # encoding: [0x0f,0x94,0xc0]
-; CHECK-NEXT: movl %ecx, d64(%rip) # encoding: [0x89,0x0d,A,A,A,A]
+; CHECK-NEXT: movl %esi, d64(%rip) # encoding: [0x89,0x35,A,A,A,A]
; CHECK-NEXT: # fixup A - offset: 2, value: d64-4, kind: reloc_riprel_4byte
; CHECK-NEXT: retq # encoding: [0xc3]
;
; NF-LABEL: orflag32rm:
; NF: # %bb.0:
-; NF-NEXT: orl (%rdi), %esi, %ecx # encoding: [0x62,0xf4,0x74,0x18,0x0b,0x37]
+; NF-NEXT: orl (%rdi), %esi # EVEX TO LEGACY Compression encoding: [0x0b,0x37]
; NF-NEXT: sete %al # encoding: [0x0f,0x94,0xc0]
-; NF-NEXT: movl %ecx, d64(%rip) # encoding: [0x89,0x0d,A,A,A,A]
+; NF-NEXT: movl %esi, d64(%rip) # encoding: [0x89,0x35,A,A,A,A]
; NF-NEXT: # fixup A - offset: 2, value: d64-4, kind: reloc_riprel_4byte
; NF-NEXT: retq # encoding: [0xc3]
%a = load i32, ptr %ptr
@@ -597,17 +597,17 @@ define i1 @orflag32rm(ptr %ptr, i32 %b) {
define i1 @orflag64rm(ptr %ptr, i64 %b) {
; CHECK-LABEL: orflag64rm:
; CHECK: # %bb.0:
-; CHECK-NEXT: orq (%rdi), %rsi, %rcx # encoding: [0x62,0xf4,0xf4,0x18,0x0b,0x37]
+; CHECK-NEXT: orq (%rdi), %rsi # EVEX TO LEGACY Compression encoding: [0x48,0x0b,0x37]
; CHECK-NEXT: sete %al # encoding: [0x0f,0x94,0xc0]
-; CHECK-NEXT: movq %rcx, d64(%rip) # encoding: [0x48,0x89,0x0d,A,A,A,A]
+; CHECK-NEXT: movq %rsi, d64(%rip) # encoding: [0x48,0x89,0x35,A,A,A,A]
; CHECK-NEXT: # fixup A - offset: 3, value: d64-4, kind: reloc_riprel_4byte
; CHECK-NEXT: retq # encoding: [0xc3]
;
; NF-LABEL: orflag64rm:
; NF: # %bb.0:
-; NF-NEXT: orq (%rdi), %rsi, %rcx # encoding: [0x62,0xf4,0xf4,0x18,0x0b,0x37]
+; NF-NEXT: orq (%rdi), %rsi # EVEX TO LEGACY Compression encoding: [0x48,0x0b,0x37]
; NF-NEXT: sete %al # encoding: [0x0f,0x94,0xc0]
-; NF-NEXT: movq %rcx, d64(%rip) # encoding: [0x48,0x89,0x0d,A,A,A,A]
+; NF-NEXT: movq %rsi, d64(%rip) # encoding: [0x48,0x89,0x35,A,A,A,A]
; NF-NEXT: # fixup A - offset: 3, value: d64-4, kind: reloc_riprel_4byte
; NF-NEXT: retq # encoding: [0xc3]
%a = load i64, ptr %ptr
@@ -668,19 +668,19 @@ define i1 @orflag16ri(i16 %a) {
define i1 @orflag32ri(i32 %a) {
; CHECK-LABEL: orflag32ri:
; CHECK: # %bb.0:
-; CHECK-NEXT: orl $123456, %edi, %ecx # encoding: [0x62,0xf4,0x74,0x18,0x81,0xcf,0x40,0xe2,0x01,0x00]
+; CHECK-NEXT: orl $123456, %edi # EVEX TO LEGACY Compression encoding: [0x81,0xcf,0x40,0xe2,0x01,0x00]
; CHECK-NEXT: # imm = 0x1E240
; CHECK-NEXT: sete %al # encoding: [0x0f,0x94,0xc0]
-; CHECK-NEXT: movl %ecx, d64(%rip) # encoding: [0x89,0x0d,A,A,A,A]
+; CHECK-NEXT: movl %edi, d64(%rip) # encoding: [0x89,0x3d,A,A,A,A]
; CHECK-NEXT: # fixup A - offset: 2, value: d64-4, kind: reloc_riprel_4byte
; CHECK-NEXT: retq # encoding: [0xc3]
;
; NF-LABEL: orflag32ri:
; NF: # %bb.0:
-; NF-NEXT: orl $123456, %edi, %ecx # encoding: [0x62,0xf4,0x74,0x18,0x81,0xcf,0x40,0xe2,0x01,0x00]
+; NF-NEXT: orl $123456, %edi # EVEX TO LEGACY Compression encoding: [0x81,0xcf,0x40,0xe2,0x01,0x00]
; NF-NEXT: # imm = 0x1E240
; NF-NEXT: sete %al # encoding: [0x0f,0x94,0xc0]
-; NF-NEXT: movl %ecx, d64(%rip) # encoding: [0x89,0x0d,A,A,A,A]
+; NF-NEXT: movl %edi, d64(%rip) # encoding: [0x89,0x3d,A,A,A,A]
; NF-NEXT: # fixup A - offset: 2, value: d64-4, kind: reloc_riprel_4byte
; NF-NEXT: retq # encoding: [0xc3]
%v0 = or i32 %a, 123456 ; 0xff << 50
@@ -692,19 +692,19 @@ define i1 @orflag32ri(i32 %a) {
define i1 @orflag64ri(i64 %a) {
; CHECK-LABEL: orflag64ri:
; CHECK: # %bb.0:
-; CHECK-NEXT: orq $123456, %rdi, %rcx # encoding: [0x62,0xf4,0xf4,0x18,0x81,0xcf,0x40,0xe2,0x01,0x00]
+; CHECK-NEXT: orq $123456, %rdi # EVEX TO LEGACY Compression encoding: [0x48,0x81,0xcf,0x40,0xe2,0x01,0x00]
; CHECK-NEXT: # imm = 0x1E240
; CHECK-NEXT: sete %al # encoding: [0x0f,0x94,0xc0]
-; CHECK-NEXT: movq %rcx, d64(%rip) # encoding: [0x48,0x89,0x0d,A,A,A,A]
+; CHECK-NEXT: movq %rdi, d64(%rip) # encoding: [0x48,0x89,0x3d,A,A,A,A]
; CHECK-NEXT: # fixup A - offset: 3, value: d64-4, kind: reloc_riprel_4byte
; CHECK-NEXT: retq # encoding: [0xc3]
;
; NF-LABEL: orflag64ri:
; NF: # %bb.0:
-; NF-NEXT: orq $123456, %rdi, %rcx # encoding: [0x62,0xf4,0xf4,0x18,0x81,0xcf,0x40,0xe2,0x01,0x00]
+; NF-NEXT: orq $123456, %rdi # EVEX TO LEGACY Compression encoding: [0x48,0x81,0xcf,0x40,0xe2,0x01,0x00]
; NF-NEXT: # imm = 0x1E240
; NF-NEXT: sete %al # encoding: [0x0f,0x94,0xc0]
-; NF-NEXT: movq %rcx, d64(%rip) # encoding: [0x48,0x89,0x0d,A,A,A,A]
+; NF-NEXT: movq %rdi, d64(%rip) # encoding: [0x48,0x89,0x3d,A,A,A,A]
; NF-NEXT: # fixup A - offset: 3, value: d64-4, kind: reloc_riprel_4byte
; NF-NEXT: retq # encoding: [0xc3]
%v0 = or i64 %a, 123456 ; 0xff << 50
@@ -739,17 +739,17 @@ define i1 @orflag16ri8(i16 %a) {
define i1 @orflag32ri8(i32 %a) {
; CHECK-LABEL: orflag32ri8:
; CHECK: # %bb.0:
-; CHECK-NEXT: orl $123, %edi, %ecx # encoding: [0x62,0xf4,0x74,0x18,0x83,0xcf,0x7b]
+; CHECK-NEXT: orl $123, %edi # EVEX TO LEGACY Compression encoding: [0x83,0xcf,0x7b]
; CHECK-NEXT: sete %al # encoding: [0x0f,0x94,0xc0]
-; CHECK-NEXT: movl %ecx, d64(%rip) # encoding: [0x89,0x0d,A,A,A,A]
+; CHECK-NEXT: movl %edi, d64(%rip) # encoding: [0x89,0x3d,A,A,A,A]
; CHECK-NEXT: # fixup A - offset: 2, value: d64-4, kind: reloc_riprel_4byte
; CHECK-NEXT: retq # encoding: [0xc3]
;
; NF-LABEL: orflag32ri8:
; NF: # %bb.0:
-; NF-NEXT: orl $123, %edi, %ecx # encoding: [0x62,0xf4,0x74,0x18,0x83,0xcf,0x7b]
+; NF-NEXT: orl $123, %edi # EVEX TO LEGACY Compression encoding: [0x83,0xcf,0x7b]
; NF-NEXT: sete %al # encoding: [0x0f,0x94,0xc0]
-; NF-NEXT: movl %ecx, d64(%rip) # encoding: [0x89,0x0d,A,A,A,A]
+; NF-NEXT: movl %edi, d64(%rip) # encoding: [0x89,0x3d,A,A,A,A]
; NF-NEXT: # fixup A - offset: 2, value: d64-4, kind: reloc_riprel_4byte
; NF-NEXT: retq # encoding: [0xc3]
%v0 = or i32 %a, 123 ; 0xff << 50
@@ -761,17 +761,17 @@ define i1 @orflag32ri8(i32 %a) {
define i1 @orflag64ri8(i64 %a) {
; CHECK-LABEL: orflag64ri8:
; CHECK: # %bb.0:
-; CHECK-NEXT: orq $123, %rdi, %rcx # encoding: [0x62,0xf4,0xf4,0x18,0x83,0xcf,0x7b]
+; CHECK-NEXT: orq $123, %rdi # EVEX TO LEGACY Compression encoding: [0x48,0x83,0xcf,0x7b]
; CHECK-NEXT: sete %al # encoding: [0x0f,0x94,0xc0]
-; CHECK-NEXT: movq %rcx, d64(%rip) # encoding: [0x48,0x89,0x0d,A,A,A,A]
+; CHECK-NEXT: movq %rdi, d64(%rip) # encoding: [0x48,0x89,0x3d,A,A,A,A]
; CHECK-NEXT: # fixup A - offset: 3, value: d64-4, kind: reloc_riprel_4byte
; CHECK-NEXT: retq # encoding: [0xc3]
;
; NF-LABEL: orflag64ri8:
; NF: # %bb.0:
-; NF-NEXT: orq $123, %rdi, %rcx # encoding: [0x62,0xf4,0xf4,0x18,0x83,0xcf,0x7b]
+; NF-NEXT: orq $123, %rdi # EVEX TO LEGACY Compression encoding: [0x48,0x83,0xcf,0x7b]
; NF-NEXT: sete %al # encoding: [0x0f,0x94,0xc0]
-; NF-NEXT: movq %rcx, d64(%rip) # encoding: [0x48,0x89,0x0d,A,A,A,A]
+; NF-NEXT: movq %rdi, d64(%rip) # encoding: [0x48,0x89,0x3d,A,A,A,A]
; NF-NEXT: # fixup A - offset: 3, value: d64-4, kind: reloc_riprel_4byte
; NF-NEXT: retq # encoding: [0xc3]
%v0 = or i64 %a, 123 ; 0xff << 50
diff --git a/llvm/test/CodeGen/X86/apx/shift-eflags.ll b/llvm/test/CodeGen/X86/apx/shift-eflags.ll
index 5da5090307e62..2659f8031ef77 100644
--- a/llvm/test/CodeGen/X86/apx/shift-eflags.ll
+++ b/llvm/test/CodeGen/X86/apx/shift-eflags.ll
@@ -7,7 +7,7 @@
define i32 @ashr_const(i32 %a0, i32 %a1, i32 %a2, i32 %a3) {
; CHECK-LABEL: ashr_const:
; CHECK: # %bb.0:
-; CHECK-NEXT: sarl $14, %edi, %eax
+; CHECK-NEXT: sarl $14, %edi
; CHECK-NEXT: cmovel %edx, %ecx, %eax
; CHECK-NEXT: retq
%s = ashr i32 %a0, 14
@@ -85,7 +85,7 @@ define i32 @shl_const_self_select(i32 %a0, i32 %a1, i32 %a2, i32 %a3) {
define i32 @ashr_const1(i32 %a0, i32 %a1, i32 %a2, i32 %a3) {
; CHECK-LABEL: ashr_const1:
; CHECK: # %bb.0:
-; CHECK-NEXT: sarl %edi, %eax
+; CHECK-NEXT: sarl %edi
; CHECK-NEXT: cmovel %edx, %ecx, %eax
; CHECK-NEXT: retq
%s = ashr i32 %a0, 1
@@ -166,8 +166,8 @@ define i32 @ashr_var(i32 %a0, i32 %a1, i32 %a2, i32 %a3) {
; CHECK-NEXT: movl %ecx, %eax
; CHECK-NEXT: movl %esi, %ecx
; CHECK-NEXT: # kill: def $cl killed $cl killed $ecx
-; CHECK-NEXT: sarl %cl, %edi, %ecx
-; CHECK-NEXT: testl %ecx, %ecx
+; CHECK-NEXT: sarl %cl, %edi
+; CHECK-NEXT: testl %edi, %edi
; CHECK-NEXT: cmovel %edx, %eax
; CHECK-NEXT: retq
%s = ashr i32 %a0, %a1
@@ -183,8 +183,8 @@ define i32 @lshr_var(i32 %a0, i32 %a1, i32 %a2, i32 %a3) {
; CHECK-NEXT: movl %ecx, %eax
; CHECK-NEXT: movl %esi, %ecx
; CHECK-NEXT: # kill: def $cl killed $cl killed $ecx
-; CHECK-NEXT: shrl %cl, %edi, %ecx
-; CHECK-NEXT: testl %ecx, %ecx
+; CHECK-NEXT: shrl %cl, %edi
+; CHECK-NEXT: testl %edi, %edi
; CHECK-NEXT: cmovel %edx, %eax
; CHECK-NEXT: retq
%s = lshr i32 %a0, %a1
@@ -200,8 +200,8 @@ define i32 @shl_var(i32 %a0, i32 %a1, i32 %a2, i32 %a3) {
; CHECK-NEXT: movl %ecx, %eax
; CHECK-NEXT: movl %esi, %ecx
; CHECK-NEXT: # kill: def $cl killed $cl killed $ecx
-; CHECK-NEXT: shll %cl, %edi, %ecx
-; CHECK-NEXT: testl %ecx, %ecx
+; CHECK-NEXT: shll %cl, %edi
+; CHECK-NEXT: testl %edi, %edi
; CHECK-NEXT: cmovel %edx, %eax
; CHECK-NEXT: retq
%s = shl i32 %a0, %a1
@@ -264,8 +264,8 @@ define i32 @ashr_var_amt_never_zero(i32 %a0, i32 %a1, i32 %a2, i32 %a3) {
; CHECK: # %bb.0:
; CHECK-NEXT: movl %ecx, %eax
; CHECK-NEXT: orb $1, %sil, %cl
-; CHECK-NEXT: sarl %cl, %edi, %ecx
-; CHECK-NEXT: testl %ecx, %ecx
+; CHECK-NEXT: sarl %cl, %edi
+; CHECK-NEXT: testl %edi, %edi
; CHECK-NEXT: cmovel %edx, %eax
; CHECK-NEXT: retq
%a = or i32 %a1, 1
@@ -281,8 +281,8 @@ define i32 @lshr_var_amt_never_zero(i32 %a0, i32 %a1, i32 %a2, i32 %a3) {
; CHECK: # %bb.0:
; CHECK-NEXT: movl %ecx, %eax
; CHECK-NEXT: orb $1, %sil, %cl
-; CHECK-NEXT: shrl %cl, %edi, %ecx
-; CHECK-NEXT: testl %ecx, %ecx
+; CHECK-NEXT: shrl %cl, %edi
+; CHECK-NEXT: testl %edi, %edi
; CHECK-NEXT: cmovel %edx, %eax
; CHECK-NEXT: retq
%a = or i32 %a1, 1
@@ -298,8 +298,8 @@ define i32 @shl_var_amt_never_zero(i32 %a0, i32 %a1, i32 %a2, i32 %a3) {
; CHECK: # %bb.0:
; CHECK-NEXT: movl %ecx, %eax
; CHECK-NEXT: orb $1, %sil, %cl
-; CHECK-NEXT: shll %cl, %edi, %ecx
-; CHECK-NEXT: testl %ecx, %ecx
+; CHECK-NEXT: shll %cl, %edi
+; CHECK-NEXT: testl %edi, %edi
; CHECK-NEXT: cmovel %edx, %eax
; CHECK-NEXT: retq
%a = or i32 %a1, 1
diff --git a/llvm/test/CodeGen/X86/apx/sub.ll b/llvm/test/CodeGen/X86/apx/sub.ll
index 75d705557cdf2..9519fab4ee518 100644
--- a/llvm/test/CodeGen/X86/apx/sub.ll
+++ b/llvm/test/CodeGen/X86/apx/sub.ll
@@ -451,16 +451,16 @@ define i16 @subflag16rr(i16 noundef %a, i16 noundef %b) {
; CHECK-LABEL: subflag16rr:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0]
-; CHECK-NEXT: subw %si, %di, %cx # encoding: [0x62,0xf4,0x75,0x18,0x29,0xf7]
-; CHECK-NEXT: cmovael %ecx, %eax # EVEX TO LEGACY Compression encoding: [0x0f,0x43,0xc1]
+; CHECK-NEXT: subw %si, %di # EVEX TO LEGACY Compression encoding: [0x66,0x29,0xf7]
+; CHECK-NEXT: cmovael %edi, %eax # EVEX TO LEGACY Compression encoding: [0x0f,0x43,0xc7]
; CHECK-NEXT: # kill: def $ax killed $ax killed $eax
; CHECK-NEXT: retq # encoding: [0xc3]
;
; NF-LABEL: subflag16rr:
; NF: # %bb.0: # %entry
; NF-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0]
-; NF-NEXT: subw %si, %di, %cx # encoding: [0x62,0xf4,0x75,0x18,0x29,0xf7]
-; NF-NEXT: cmovael %ecx, %eax # EVEX TO LEGACY Compression encoding: [0x0f,0x43,0xc1]
+; NF-NEXT: subw %si, %di # EVEX TO LEGACY Compression encoding: [0x66,0x29,0xf7]
+; NF-NEXT: cmovael %edi, %eax # EVEX TO LEGACY Compression encoding: [0x0f,0x43,0xc7]
; NF-NEXT: # kill: def $ax killed $ax killed $eax
; NF-NEXT: retq # encoding: [0xc3]
entry:
@@ -472,15 +472,15 @@ define i32 @subflag32rr(i32 noundef %a, i32 noundef %b) {
; CHECK-LABEL: subflag32rr:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0]
-; CHECK-NEXT: subl %esi, %edi, %ecx # encoding: [0x62,0xf4,0x74,0x18,0x29,0xf7]
-; CHECK-NEXT: cmovael %ecx, %eax # EVEX TO LEGACY Compression encoding: [0x0f,0x43,0xc1]
+; CHECK-NEXT: subl %esi, %edi # EVEX TO LEGACY Compression encoding: [0x29,0xf7]
+; CHECK-NEXT: cmovael %edi, %eax # EVEX TO LEGACY Compression encoding: [0x0f,0x43,0xc7]
; CHECK-NEXT: retq # encoding: [0xc3]
;
; NF-LABEL: subflag32rr:
; NF: # %bb.0: # %entry
; NF-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0]
-; NF-NEXT: subl %esi, %edi, %ecx # encoding: [0x62,0xf4,0x74,0x18,0x29,0xf7]
-; NF-NEXT: cmovael %ecx, %eax # EVEX TO LEGACY Compression encoding: [0x0f,0x43,0xc1]
+; NF-NEXT: subl %esi, %edi # EVEX TO LEGACY Compression encoding: [0x29,0xf7]
+; NF-NEXT: cmovael %edi, %eax # EVEX TO LEGACY Compression encoding: [0x0f,0x43,0xc7]
; NF-NEXT: retq # encoding: [0xc3]
entry:
%sub = call i32 @llvm.usub.sat.i32(i32 %a, i32 %b)
@@ -491,15 +491,15 @@ define i64 @subflag64rr(i64 noundef %a, i64 noundef %b) {
; CHECK-LABEL: subflag64rr:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0]
-; CHECK-NEXT: subq %rsi, %rdi, %rcx # encoding: [0x62,0xf4,0xf4,0x18,0x29,0xf7]
-; CHECK-NEXT: cmovaeq %rcx, %rax # EVEX TO LEGACY Compression encoding: [0x48,0x0f,0x43,0xc1]
+; CHECK-NEXT: subq %rsi, %rdi # EVEX TO LEGACY Compression encoding: [0x48,0x29,0xf7]
+; CHECK-NEXT: cmovaeq %rdi, %rax # EVEX TO LEGACY Compression encoding: [0x48,0x0f,0x43,0xc7]
; CHECK-NEXT: retq # encoding: [0xc3]
;
; NF-LABEL: subflag64rr:
; NF: # %bb.0: # %entry
; NF-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0]
-; NF-NEXT: subq %rsi, %rdi, %rcx # encoding: [0x62,0xf4,0xf4,0x18,0x29,0xf7]
-; NF-NEXT: cmovaeq %rcx, %rax # EVEX TO LEGACY Compression encoding: [0x48,0x0f,0x43,0xc1]
+; NF-NEXT: subq %rsi, %rdi # EVEX TO LEGACY Compression encoding: [0x48,0x29,0xf7]
+; NF-NEXT: cmovaeq %rdi, %rax # EVEX TO LEGACY Compression encoding: [0x48,0x0f,0x43,0xc7]
; NF-NEXT: retq # encoding: [0xc3]
entry:
%sub = call i64 @llvm.usub.sat.i64(i64 %a, i64 %b)
@@ -534,16 +534,16 @@ define i16 @subflag16rm(i16 noundef %a, ptr %b) {
; CHECK-LABEL: subflag16rm:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0]
-; CHECK-NEXT: subw (%rsi), %di, %cx # encoding: [0x62,0xf4,0x75,0x18,0x2b,0x3e]
-; CHECK-NEXT: cmovael %ecx, %eax # EVEX TO LEGACY Compression encoding: [0x0f,0x43,0xc1]
+; CHECK-NEXT: subw (%rsi), %di # EVEX TO LEGACY Compression encoding: [0x66,0x2b,0x3e]
+; CHECK-NEXT: cmovael %edi, %eax # EVEX TO LEGACY Compression encoding: [0x0f,0x43,0xc7]
; CHECK-NEXT: # kill: def $ax killed $ax killed $eax
; CHECK-NEXT: retq # encoding: [0xc3]
;
; NF-LABEL: subflag16rm:
; NF: # %bb.0: # %entry
; NF-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0]
-; NF-NEXT: subw (%rsi), %di, %cx # encoding: [0x62,0xf4,0x75,0x18,0x2b,0x3e]
-; NF-NEXT: cmovael %ecx, %eax # EVEX TO LEGACY Compression encoding: [0x0f,0x43,0xc1]
+; NF-NEXT: subw (%rsi), %di # EVEX TO LEGACY Compression encoding: [0x66,0x2b,0x3e]
+; NF-NEXT: cmovael %edi, %eax # EVEX TO LEGACY Compression encoding: [0x0f,0x43,0xc7]
; NF-NEXT: # kill: def $ax killed $ax killed $eax
; NF-NEXT: retq # encoding: [0xc3]
entry:
@@ -556,15 +556,15 @@ define i32 @subflag32rm(i32 noundef %a, ptr %b) {
; CHECK-LABEL: subflag32rm:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0]
-; CHECK-NEXT: subl (%rsi), %edi, %ecx # encoding: [0x62,0xf4,0x74,0x18,0x2b,0x3e]
-; CHECK-NEXT: cmovael %ecx, %eax # EVEX TO LEGACY Compression encoding: [0x0f,0x43,0xc1]
+; CHECK-NEXT: subl (%rsi), %edi # EVEX TO LEGACY Compression encoding: [0x2b,0x3e]
+; CHECK-NEXT: cmovael %edi, %eax # EVEX TO LEGACY Compression encoding: [0x0f,0x43,0xc7]
; CHECK-NEXT: retq # encoding: [0xc3]
;
; NF-LABEL: subflag32rm:
; NF: # %bb.0: # %entry
; NF-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0]
-; NF-NEXT: subl (%rsi), %edi, %ecx # encoding: [0x62,0xf4,0x74,0x18,0x2b,0x3e]
-; NF-NEXT: cmovael %ecx, %eax # EVEX TO LEGACY Compression encoding: [0x0f,0x43,0xc1]
+; NF-NEXT: subl (%rsi), %edi # EVEX TO LEGACY Compression encoding: [0x2b,0x3e]
+; NF-NEXT: cmovael %edi, %eax # EVEX TO LEGACY Compression encoding: [0x0f,0x43,0xc7]
; NF-NEXT: retq # encoding: [0xc3]
entry:
%t = load i32, ptr %b
@@ -576,15 +576,15 @@ define i64 @subflag64rm(i64 noundef %a, ptr %b) {
; CHECK-LABEL: subflag64rm:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0]
-; CHECK-NEXT: subq (%rsi), %rdi, %rcx # encoding: [0x62,0xf4,0xf4,0x18,0x2b,0x3e]
-; CHECK-NEXT: cmovaeq %rcx, %rax # EVEX TO LEGACY Compression encoding: [0x48,0x0f,0x43,0xc1]
+; CHECK-NEXT: subq (%rsi), %rdi # EVEX TO LEGACY Compression encoding: [0x48,0x2b,0x3e]
+; CHECK-NEXT: cmovaeq %rdi, %rax # EVEX TO LEGACY Compression encoding: [0x48,0x0f,0x43,0xc7]
; CHECK-NEXT: retq # encoding: [0xc3]
;
; NF-LABEL: subflag64rm:
; NF: # %bb.0: # %entry
; NF-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0]
-; NF-NEXT: subq (%rsi), %rdi, %rcx # encoding: [0x62,0xf4,0xf4,0x18,0x2b,0x3e]
-; NF-NEXT: cmovaeq %rcx, %rax # EVEX TO LEGACY Compression encoding: [0x48,0x0f,0x43,0xc1]
+; NF-NEXT: subq (%rsi), %rdi # EVEX TO LEGACY Compression encoding: [0x48,0x2b,0x3e]
+; NF-NEXT: cmovaeq %rdi, %rax # EVEX TO LEGACY Compression encoding: [0x48,0x0f,0x43,0xc7]
; NF-NEXT: retq # encoding: [0xc3]
entry:
%t = load i64, ptr %b
@@ -596,16 +596,16 @@ define i16 @subflag16ri8(i16 noundef %a) {
; CHECK-LABEL: subflag16ri8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0]
-; CHECK-NEXT: subw $123, %di, %cx # encoding: [0x62,0xf4,0x75,0x18,0x83,0xef,0x7b]
-; CHECK-NEXT: cmovael %ecx, %eax # EVEX TO LEGACY Compression encoding: [0x0f,0x43,0xc1]
+; CHECK-NEXT: subw $123, %di # EVEX TO LEGACY Compression encoding: [0x66,0x83,0xef,0x7b]
+; CHECK-NEXT: cmovael %edi, %eax # EVEX TO LEGACY Compression encoding: [0x0f,0x43,0xc7]
; CHECK-NEXT: # kill: def $ax killed $ax killed $eax
; CHECK-NEXT: retq # encoding: [0xc3]
;
; NF-LABEL: subflag16ri8:
; NF: # %bb.0: # %entry
; NF-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0]
-; NF-NEXT: subw $123, %di, %cx # encoding: [0x62,0xf4,0x75,0x18,0x83,0xef,0x7b]
-; NF-NEXT: cmovael %ecx, %eax # EVEX TO LEGACY Compression encoding: [0x0f,0x43,0xc1]
+; NF-NEXT: subw $123, %di # EVEX TO LEGACY Compression encoding: [0x66,0x83,0xef,0x7b]
+; NF-NEXT: cmovael %edi, %eax # EVEX TO LEGACY Compression encoding: [0x0f,0x43,0xc7]
; NF-NEXT: # kill: def $ax killed $ax killed $eax
; NF-NEXT: retq # encoding: [0xc3]
entry:
@@ -617,15 +617,15 @@ define i32 @subflag32ri8(i32 noundef %a) {
; CHECK-LABEL: subflag32ri8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0]
-; CHECK-NEXT: subl $123, %edi, %ecx # encoding: [0x62,0xf4,0x74,0x18,0x83,0xef,0x7b]
-; CHECK-NEXT: cmovael %ecx, %eax # EVEX TO LEGACY Compression encoding: [0x0f,0x43,0xc1]
+; CHECK-NEXT: subl $123, %edi # EVEX TO LEGACY Compression encoding: [0x83,0xef,0x7b]
+; CHECK-NEXT: cmovael %edi, %eax # EVEX TO LEGACY Compression encoding: [0x0f,0x43,0xc7]
; CHECK-NEXT: retq # encoding: [0xc3]
;
; NF-LABEL: subflag32ri8:
; NF: # %bb.0: # %entry
; NF-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0]
-; NF-NEXT: subl $123, %edi, %ecx # encoding: [0x62,0xf4,0x74,0x18,0x83,0xef,0x7b]
-; NF-NEXT: cmovael %ecx, %eax # EVEX TO LEGACY Compression encoding: [0x0f,0x43,0xc1]
+; NF-NEXT: subl $123, %edi # EVEX TO LEGACY Compression encoding: [0x83,0xef,0x7b]
+; NF-NEXT: cmovael %edi, %eax # EVEX TO LEGACY Compression encoding: [0x0f,0x43,0xc7]
; NF-NEXT: retq # encoding: [0xc3]
entry:
%sub = call i32 @llvm.usub.sat.i32(i32 %a, i32 123)
@@ -636,15 +636,15 @@ define i64 @subflag64ri8(i64 noundef %a) {
; CHECK-LABEL: subflag64ri8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0]
-; CHECK-NEXT: subq $123, %rdi, %rcx # encoding: [0x62,0xf4,0xf4,0x18,0x83,0xef,0x7b]
-; CHECK-NEXT: cmovaeq %rcx, %rax # EVEX TO LEGACY Compression encoding: [0x48,0x0f,0x43,0xc1]
+; CHECK-NEXT: subq $123, %rdi # EVEX TO LEGACY Compression encoding: [0x48,0x83,0xef,0x7b]
+; CHECK-NEXT: cmovaeq %rdi, %rax # EVEX TO LEGACY Compression encoding: [0x48,0x0f,0x43,0xc7]
; CHECK-NEXT: retq # encoding: [0xc3]
;
; NF-LABEL: subflag64ri8:
; NF: # %bb.0: # %entry
; NF-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0]
-; NF-NEXT: subq $123, %rdi, %rcx # encoding: [0x62,0xf4,0xf4,0x18,0x83,0xef,0x7b]
-; NF-NEXT: cmovaeq %rcx, %rax # EVEX TO LEGACY Compression encoding: [0x48,0x0f,0x43,0xc1]
+; NF-NEXT: subq $123, %rdi # EVEX TO LEGACY Compression encoding: [0x48,0x83,0xef,0x7b]
+; NF-NEXT: cmovaeq %rdi, %rax # EVEX TO LEGACY Compression encoding: [0x48,0x0f,0x43,0xc7]
; NF-NEXT: retq # encoding: [0xc3]
entry:
%sub = call i64 @llvm.usub.sat.i64(i64 %a, i64 123)
@@ -678,18 +678,18 @@ define i16 @subflag16ri(i16 noundef %a) {
; CHECK-LABEL: subflag16ri:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0]
-; CHECK-NEXT: subw $1234, %di, %cx # encoding: [0x62,0xf4,0x75,0x18,0x81,0xef,0xd2,0x04]
+; CHECK-NEXT: subw $1234, %di # EVEX TO LEGACY Compression encoding: [0x66,0x81,0xef,0xd2,0x04]
; CHECK-NEXT: # imm = 0x4D2
-; CHECK-NEXT: cmovael %ecx, %eax # EVEX TO LEGACY Compression encoding: [0x0f,0x43,0xc1]
+; CHECK-NEXT: cmovael %edi, %eax # EVEX TO LEGACY Compression encoding: [0x0f,0x43,0xc7]
; CHECK-NEXT: # kill: def $ax killed $ax killed $eax
; CHECK-NEXT: retq # encoding: [0xc3]
;
; NF-LABEL: subflag16ri:
; NF: # %bb.0: # %entry
; NF-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0]
-; NF-NEXT: subw $1234, %di, %cx # encoding: [0x62,0xf4,0x75,0x18,0x81,0xef,0xd2,0x04]
+; NF-NEXT: subw $1234, %di # EVEX TO LEGACY Compression encoding: [0x66,0x81,0xef,0xd2,0x04]
; NF-NEXT: # imm = 0x4D2
-; NF-NEXT: cmovael %ecx, %eax # EVEX TO LEGACY Compression encoding: [0x0f,0x43,0xc1]
+; NF-NEXT: cmovael %edi, %eax # EVEX TO LEGACY Compression encoding: [0x0f,0x43,0xc7]
; NF-NEXT: # kill: def $ax killed $ax killed $eax
; NF-NEXT: retq # encoding: [0xc3]
entry:
@@ -701,17 +701,17 @@ define i32 @subflag32ri(i32 noundef %a) {
; CHECK-LABEL: subflag32ri:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0]
-; CHECK-NEXT: subl $123456, %edi, %ecx # encoding: [0x62,0xf4,0x74,0x18,0x81,0xef,0x40,0xe2,0x01,0x00]
+; CHECK-NEXT: subl $123456, %edi # EVEX TO LEGACY Compression encoding: [0x81,0xef,0x40,0xe2,0x01,0x00]
; CHECK-NEXT: # imm = 0x1E240
-; CHECK-NEXT: cmovael %ecx, %eax # EVEX TO LEGACY Compression encoding: [0x0f,0x43,0xc1]
+; CHECK-NEXT: cmovael %edi, %eax # EVEX TO LEGACY Compression encoding: [0x0f,0x43,0xc7]
; CHECK-NEXT: retq # encoding: [0xc3]
;
; NF-LABEL: subflag32ri:
; NF: # %bb.0: # %entry
; NF-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0]
-; NF-NEXT: subl $123456, %edi, %ecx # encoding: [0x62,0xf4,0x74,0x18,0x81,0xef,0x40,0xe2,0x01,0x00]
+; NF-NEXT: subl $123456, %edi # EVEX TO LEGACY Compression encoding: [0x81,0xef,0x40,0xe2,0x01,0x00]
; NF-NEXT: # imm = 0x1E240
-; NF-NEXT: cmovael %ecx, %eax # EVEX TO LEGACY Compression encoding: [0x0f,0x43,0xc1]
+; NF-NEXT: cmovael %edi, %eax # EVEX TO LEGACY Compression encoding: [0x0f,0x43,0xc7]
; NF-NEXT: retq # encoding: [0xc3]
entry:
%sub = call i32 @llvm.usub.sat.i32(i32 %a, i32 123456)
@@ -722,17 +722,17 @@ define i64 @subflag64ri(i64 noundef %a) {
; CHECK-LABEL: subflag64ri:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0]
-; CHECK-NEXT: subq $123456, %rdi, %rcx # encoding: [0x62,0xf4,0xf4,0x18,0x81,0xef,0x40,0xe2,0x01,0x00]
+; CHECK-NEXT: subq $123456, %rdi # EVEX TO LEGACY Compression encoding: [0x48,0x81,0xef,0x40,0xe2,0x01,0x00]
; CHECK-NEXT: # imm = 0x1E240
-; CHECK-NEXT: cmovaeq %rcx, %rax # EVEX TO LEGACY Compression encoding: [0x48,0x0f,0x43,0xc1]
+; CHECK-NEXT: cmovaeq %rdi, %rax # EVEX TO LEGACY Compression encoding: [0x48,0x0f,0x43,0xc7]
; CHECK-NEXT: retq # encoding: [0xc3]
;
; NF-LABEL: subflag64ri:
; NF: # %bb.0: # %entry
; NF-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0]
-; NF-NEXT: subq $123456, %rdi, %rcx # encoding: [0x62,0xf4,0xf4,0x18,0x81,0xef,0x40,0xe2,0x01,0x00]
+; NF-NEXT: subq $123456, %rdi # EVEX TO LEGACY Compression encoding: [0x48,0x81,0xef,0x40,0xe2,0x01,0x00]
; NF-NEXT: # imm = 0x1E240
-; NF-NEXT: cmovaeq %rcx, %rax # EVEX TO LEGACY Compression encoding: [0x48,0x0f,0x43,0xc1]
+; NF-NEXT: cmovaeq %rdi, %rax # EVEX TO LEGACY Compression encoding: [0x48,0x0f,0x43,0xc7]
; NF-NEXT: retq # encoding: [0xc3]
entry:
%sub = call i64 @llvm.usub.sat.i64(i64 %a, i64 123456)
diff --git a/llvm/test/CodeGen/X86/apx/xor.ll b/llvm/test/CodeGen/X86/apx/xor.ll
index 3426f9cc92ce7..d908849e2848f 100644
--- a/llvm/test/CodeGen/X86/apx/xor.ll
+++ b/llvm/test/CodeGen/X86/apx/xor.ll
@@ -428,8 +428,8 @@ entry:
define i1 @xorflag8rr(i8 %a, i8 %b) {
; CHECK-LABEL: xorflag8rr:
; CHECK: # %bb.0:
-; CHECK-NEXT: xorl %edi, %esi, %eax # encoding: [0x62,0xf4,0x7c,0x18,0x31,0xfe]
-; CHECK-NEXT: xorb $-1, %al, %cl # encoding: [0x62,0xf4,0x74,0x18,0x80,0xf0,0xff]
+; CHECK-NEXT: xorl %edi, %esi # EVEX TO LEGACY Compression encoding: [0x31,0xfe]
+; CHECK-NEXT: xorb $-1, %sil, %cl # encoding: [0x62,0xf4,0x74,0x18,0x80,0xf6,0xff]
; CHECK-NEXT: sete %al # encoding: [0x0f,0x94,0xc0]
; CHECK-NEXT: movb %cl, d64(%rip) # encoding: [0x88,0x0d,A,A,A,A]
; CHECK-NEXT: # fixup A - offset: 2, value: d64-4, kind: reloc_riprel_4byte
@@ -437,8 +437,8 @@ define i1 @xorflag8rr(i8 %a, i8 %b) {
;
; NF-LABEL: xorflag8rr:
; NF: # %bb.0:
-; NF-NEXT: {nf} xorl %edi, %esi, %eax # EVEX TO EVEX Compression encoding: [0x62,0xf4,0x7c,0x1c,0x31,0xfe]
-; NF-NEXT: xorb $-1, %al, %cl # encoding: [0x62,0xf4,0x74,0x18,0x80,0xf0,0xff]
+; NF-NEXT: xorl %edi, %esi # EVEX TO LEGACY Compression encoding: [0x31,0xfe]
+; NF-NEXT: xorb $-1, %sil, %cl # encoding: [0x62,0xf4,0x74,0x18,0x80,0xf6,0xff]
; NF-NEXT: sete %al # encoding: [0x0f,0x94,0xc0]
; NF-NEXT: movb %cl, d64(%rip) # encoding: [0x88,0x0d,A,A,A,A]
; NF-NEXT: # fixup A - offset: 2, value: d64-4, kind: reloc_riprel_4byte
@@ -453,8 +453,8 @@ define i1 @xorflag8rr(i8 %a, i8 %b) {
define i1 @xorflag16rr(i16 %a, i16 %b) {
; CHECK-LABEL: xorflag16rr:
; CHECK: # %bb.0:
-; CHECK-NEXT: xorl %edi, %esi, %eax # encoding: [0x62,0xf4,0x7c,0x18,0x31,0xfe]
-; CHECK-NEXT: xorw $-1, %ax, %cx # encoding: [0x62,0xf4,0x75,0x18,0x83,0xf0,0xff]
+; CHECK-NEXT: xorl %edi, %esi # EVEX TO LEGACY Compression encoding: [0x31,0xfe]
+; CHECK-NEXT: xorw $-1, %si, %cx # encoding: [0x62,0xf4,0x75,0x18,0x83,0xf6,0xff]
; CHECK-NEXT: sete %al # encoding: [0x0f,0x94,0xc0]
; CHECK-NEXT: movw %cx, d64(%rip) # encoding: [0x66,0x89,0x0d,A,A,A,A]
; CHECK-NEXT: # fixup A - offset: 3, value: d64-4, kind: reloc_riprel_4byte
@@ -462,8 +462,8 @@ define i1 @xorflag16rr(i16 %a, i16 %b) {
;
; NF-LABEL: xorflag16rr:
; NF: # %bb.0:
-; NF-NEXT: {nf} xorl %edi, %esi, %eax # EVEX TO EVEX Compression encoding: [0x62,0xf4,0x7c,0x1c,0x31,0xfe]
-; NF-NEXT: xorw $-1, %ax, %cx # encoding: [0x62,0xf4,0x75,0x18,0x83,0xf0,0xff]
+; NF-NEXT: xorl %edi, %esi # EVEX TO LEGACY Compression encoding: [0x31,0xfe]
+; NF-NEXT: xorw $-1, %si, %cx # encoding: [0x62,0xf4,0x75,0x18,0x83,0xf6,0xff]
; NF-NEXT: sete %al # encoding: [0x0f,0x94,0xc0]
; NF-NEXT: movw %cx, d64(%rip) # encoding: [0x66,0x89,0x0d,A,A,A,A]
; NF-NEXT: # fixup A - offset: 3, value: d64-4, kind: reloc_riprel_4byte
@@ -478,17 +478,17 @@ define i1 @xorflag16rr(i16 %a, i16 %b) {
define i1 @xorflag32rr(i32 %a, i32 %b) {
; CHECK-LABEL: xorflag32rr:
; CHECK: # %bb.0:
-; CHECK-NEXT: xorl %esi, %edi, %ecx # encoding: [0x62,0xf4,0x74,0x18,0x31,0xf7]
+; CHECK-NEXT: xorl %edi, %esi # EVEX TO LEGACY Compression encoding: [0x31,0xfe]
; CHECK-NEXT: sete %al # encoding: [0x0f,0x94,0xc0]
-; CHECK-NEXT: movl %ecx, d64(%rip) # encoding: [0x89,0x0d,A,A,A,A]
+; CHECK-NEXT: movl %esi, d64(%rip) # encoding: [0x89,0x35,A,A,A,A]
; CHECK-NEXT: # fixup A - offset: 2, value: d64-4, kind: reloc_riprel_4byte
; CHECK-NEXT: retq # encoding: [0xc3]
;
; NF-LABEL: xorflag32rr:
; NF: # %bb.0:
-; NF-NEXT: xorl %esi, %edi, %ecx # encoding: [0x62,0xf4,0x74,0x18,0x31,0xf7]
+; NF-NEXT: xorl %edi, %esi # EVEX TO LEGACY Compression encoding: [0x31,0xfe]
; NF-NEXT: sete %al # encoding: [0x0f,0x94,0xc0]
-; NF-NEXT: movl %ecx, d64(%rip) # encoding: [0x89,0x0d,A,A,A,A]
+; NF-NEXT: movl %esi, d64(%rip) # encoding: [0x89,0x35,A,A,A,A]
; NF-NEXT: # fixup A - offset: 2, value: d64-4, kind: reloc_riprel_4byte
; NF-NEXT: retq # encoding: [0xc3]
%v0 = xor i32 %a, %b ; 0xff << 50
@@ -500,17 +500,17 @@ define i1 @xorflag32rr(i32 %a, i32 %b) {
define i1 @xorflag64rr(i64 %a, i64 %b) {
; CHECK-LABEL: xorflag64rr:
; CHECK: # %bb.0:
-; CHECK-NEXT: xorq %rsi, %rdi, %rcx # encoding: [0x62,0xf4,0xf4,0x18,0x31,0xf7]
+; CHECK-NEXT: xorq %rdi, %rsi # EVEX TO LEGACY Compression encoding: [0x48,0x31,0xfe]
; CHECK-NEXT: sete %al # encoding: [0x0f,0x94,0xc0]
-; CHECK-NEXT: movq %rcx, d64(%rip) # encoding: [0x48,0x89,0x0d,A,A,A,A]
+; CHECK-NEXT: movq %rsi, d64(%rip) # encoding: [0x48,0x89,0x35,A,A,A,A]
; CHECK-NEXT: # fixup A - offset: 3, value: d64-4, kind: reloc_riprel_4byte
; CHECK-NEXT: retq # encoding: [0xc3]
;
; NF-LABEL: xorflag64rr:
; NF: # %bb.0:
-; NF-NEXT: xorq %rsi, %rdi, %rcx # encoding: [0x62,0xf4,0xf4,0x18,0x31,0xf7]
+; NF-NEXT: xorq %rdi, %rsi # EVEX TO LEGACY Compression encoding: [0x48,0x31,0xfe]
; NF-NEXT: sete %al # encoding: [0x0f,0x94,0xc0]
-; NF-NEXT: movq %rcx, d64(%rip) # encoding: [0x48,0x89,0x0d,A,A,A,A]
+; NF-NEXT: movq %rsi, d64(%rip) # encoding: [0x48,0x89,0x35,A,A,A,A]
; NF-NEXT: # fixup A - offset: 3, value: d64-4, kind: reloc_riprel_4byte
; NF-NEXT: retq # encoding: [0xc3]
%v0 = xor i64 %a, %b ; 0xff << 50
@@ -574,17 +574,17 @@ define i1 @xorflag16rm(ptr %ptr, i16 %b) {
define i1 @xorflag32rm(ptr %ptr, i32 %b) {
; CHECK-LABEL: xorflag32rm:
; CHECK: # %bb.0:
-; CHECK-NEXT: xorl (%rdi), %esi, %ecx # encoding: [0x62,0xf4,0x74,0x18,0x33,0x37]
+; CHECK-NEXT: xorl (%rdi), %esi # EVEX TO LEGACY Compression encoding: [0x33,0x37]
; CHECK-NEXT: sete %al # encoding: [0x0f,0x94,0xc0]
-; CHECK-NEXT: movl %ecx, d64(%rip) # encoding: [0x89,0x0d,A,A,A,A]
+; CHECK-NEXT: movl %esi, d64(%rip) # encoding: [0x89,0x35,A,A,A,A]
; CHECK-NEXT: # fixup A - offset: 2, value: d64-4, kind: reloc_riprel_4byte
; CHECK-NEXT: retq # encoding: [0xc3]
;
; NF-LABEL: xorflag32rm:
; NF: # %bb.0:
-; NF-NEXT: xorl (%rdi), %esi, %ecx # encoding: [0x62,0xf4,0x74,0x18,0x33,0x37]
+; NF-NEXT: xorl (%rdi), %esi # EVEX TO LEGACY Compression encoding: [0x33,0x37]
; NF-NEXT: sete %al # encoding: [0x0f,0x94,0xc0]
-; NF-NEXT: movl %ecx, d64(%rip) # encoding: [0x89,0x0d,A,A,A,A]
+; NF-NEXT: movl %esi, d64(%rip) # encoding: [0x89,0x35,A,A,A,A]
; NF-NEXT: # fixup A - offset: 2, value: d64-4, kind: reloc_riprel_4byte
; NF-NEXT: retq # encoding: [0xc3]
%a = load i32, ptr %ptr
@@ -597,17 +597,17 @@ define i1 @xorflag32rm(ptr %ptr, i32 %b) {
define i1 @xorflag64rm(ptr %ptr, i64 %b) {
; CHECK-LABEL: xorflag64rm:
; CHECK: # %bb.0:
-; CHECK-NEXT: xorq (%rdi), %rsi, %rcx # encoding: [0x62,0xf4,0xf4,0x18,0x33,0x37]
+; CHECK-NEXT: xorq (%rdi), %rsi # EVEX TO LEGACY Compression encoding: [0x48,0x33,0x37]
; CHECK-NEXT: sete %al # encoding: [0x0f,0x94,0xc0]
-; CHECK-NEXT: movq %rcx, d64(%rip) # encoding: [0x48,0x89,0x0d,A,A,A,A]
+; CHECK-NEXT: movq %rsi, d64(%rip) # encoding: [0x48,0x89,0x35,A,A,A,A]
; CHECK-NEXT: # fixup A - offset: 3, value: d64-4, kind: reloc_riprel_4byte
; CHECK-NEXT: retq # encoding: [0xc3]
;
; NF-LABEL: xorflag64rm:
; NF: # %bb.0:
-; NF-NEXT: xorq (%rdi), %rsi, %rcx # encoding: [0x62,0xf4,0xf4,0x18,0x33,0x37]
+; NF-NEXT: xorq (%rdi), %rsi # EVEX TO LEGACY Compression encoding: [0x48,0x33,0x37]
; NF-NEXT: sete %al # encoding: [0x0f,0x94,0xc0]
-; NF-NEXT: movq %rcx, d64(%rip) # encoding: [0x48,0x89,0x0d,A,A,A,A]
+; NF-NEXT: movq %rsi, d64(%rip) # encoding: [0x48,0x89,0x35,A,A,A,A]
; NF-NEXT: # fixup A - offset: 3, value: d64-4, kind: reloc_riprel_4byte
; NF-NEXT: retq # encoding: [0xc3]
%a = load i64, ptr %ptr
@@ -668,19 +668,19 @@ define i1 @xorflag16ri(i16 %a) {
define i1 @xorflag32ri(i32 %a) {
; CHECK-LABEL: xorflag32ri:
; CHECK: # %bb.0:
-; CHECK-NEXT: xorl $123456, %edi, %ecx # encoding: [0x62,0xf4,0x74,0x18,0x81,0xf7,0x40,0xe2,0x01,0x00]
+; CHECK-NEXT: xorl $123456, %edi # EVEX TO LEGACY Compression encoding: [0x81,0xf7,0x40,0xe2,0x01,0x00]
; CHECK-NEXT: # imm = 0x1E240
; CHECK-NEXT: sete %al # encoding: [0x0f,0x94,0xc0]
-; CHECK-NEXT: movl %ecx, d64(%rip) # encoding: [0x89,0x0d,A,A,A,A]
+; CHECK-NEXT: movl %edi, d64(%rip) # encoding: [0x89,0x3d,A,A,A,A]
; CHECK-NEXT: # fixup A - offset: 2, value: d64-4, kind: reloc_riprel_4byte
; CHECK-NEXT: retq # encoding: [0xc3]
;
; NF-LABEL: xorflag32ri:
; NF: # %bb.0:
-; NF-NEXT: xorl $123456, %edi, %ecx # encoding: [0x62,0xf4,0x74,0x18,0x81,0xf7,0x40,0xe2,0x01,0x00]
+; NF-NEXT: xorl $123456, %edi # EVEX TO LEGACY Compression encoding: [0x81,0xf7,0x40,0xe2,0x01,0x00]
; NF-NEXT: # imm = 0x1E240
; NF-NEXT: sete %al # encoding: [0x0f,0x94,0xc0]
-; NF-NEXT: movl %ecx, d64(%rip) # encoding: [0x89,0x0d,A,A,A,A]
+; NF-NEXT: movl %edi, d64(%rip) # encoding: [0x89,0x3d,A,A,A,A]
; NF-NEXT: # fixup A - offset: 2, value: d64-4, kind: reloc_riprel_4byte
; NF-NEXT: retq # encoding: [0xc3]
%v0 = xor i32 %a, 123456 ; 0xff << 50
@@ -692,19 +692,19 @@ define i1 @xorflag32ri(i32 %a) {
define i1 @xorflag64ri(i64 %a) {
; CHECK-LABEL: xorflag64ri:
; CHECK: # %bb.0:
-; CHECK-NEXT: xorq $123456, %rdi, %rcx # encoding: [0x62,0xf4,0xf4,0x18,0x81,0xf7,0x40,0xe2,0x01,0x00]
+; CHECK-NEXT: xorq $123456, %rdi # EVEX TO LEGACY Compression encoding: [0x48,0x81,0xf7,0x40,0xe2,0x01,0x00]
; CHECK-NEXT: # imm = 0x1E240
; CHECK-NEXT: sete %al # encoding: [0x0f,0x94,0xc0]
-; CHECK-NEXT: movq %rcx, d64(%rip) # encoding: [0x48,0x89,0x0d,A,A,A,A]
+; CHECK-NEXT: movq %rdi, d64(%rip) # encoding: [0x48,0x89,0x3d,A,A,A,A]
; CHECK-NEXT: # fixup A - offset: 3, value: d64-4, kind: reloc_riprel_4byte
; CHECK-NEXT: retq # encoding: [0xc3]
;
; NF-LABEL: xorflag64ri:
; NF: # %bb.0:
-; NF-NEXT: xorq $123456, %rdi, %rcx # encoding: [0x62,0xf4,0xf4,0x18,0x81,0xf7,0x40,0xe2,0x01,0x00]
+; NF-NEXT: xorq $123456, %rdi # EVEX TO LEGACY Compression encoding: [0x48,0x81,0xf7,0x40,0xe2,0x01,0x00]
; NF-NEXT: # imm = 0x1E240
; NF-NEXT: sete %al # encoding: [0x0f,0x94,0xc0]
-; NF-NEXT: movq %rcx, d64(%rip) # encoding: [0x48,0x89,0x0d,A,A,A,A]
+; NF-NEXT: movq %rdi, d64(%rip) # encoding: [0x48,0x89,0x3d,A,A,A,A]
; NF-NEXT: # fixup A - offset: 3, value: d64-4, kind: reloc_riprel_4byte
; NF-NEXT: retq # encoding: [0xc3]
%v0 = xor i64 %a, 123456 ; 0xff << 50
@@ -739,17 +739,17 @@ define i1 @xorflag16ri8(i16 %a) {
define i1 @xorflag32ri8(i32 %a) {
; CHECK-LABEL: xorflag32ri8:
; CHECK: # %bb.0:
-; CHECK-NEXT: xorl $123, %edi, %ecx # encoding: [0x62,0xf4,0x74,0x18,0x83,0xf7,0x7b]
+; CHECK-NEXT: xorl $123, %edi # EVEX TO LEGACY Compression encoding: [0x83,0xf7,0x7b]
; CHECK-NEXT: sete %al # encoding: [0x0f,0x94,0xc0]
-; CHECK-NEXT: movl %ecx, d64(%rip) # encoding: [0x89,0x0d,A,A,A,A]
+; CHECK-NEXT: movl %edi, d64(%rip) # encoding: [0x89,0x3d,A,A,A,A]
; CHECK-NEXT: # fixup A - offset: 2, value: d64-4, kind: reloc_riprel_4byte
; CHECK-NEXT: retq # encoding: [0xc3]
;
; NF-LABEL: xorflag32ri8:
; NF: # %bb.0:
-; NF-NEXT: xorl $123, %edi, %ecx # encoding: [0x62,0xf4,0x74,0x18,0x83,0xf7,0x7b]
+; NF-NEXT: xorl $123, %edi # EVEX TO LEGACY Compression encoding: [0x83,0xf7,0x7b]
; NF-NEXT: sete %al # encoding: [0x0f,0x94,0xc0]
-; NF-NEXT: movl %ecx, d64(%rip) # encoding: [0x89,0x0d,A,A,A,A]
+; NF-NEXT: movl %edi, d64(%rip) # encoding: [0x89,0x3d,A,A,A,A]
; NF-NEXT: # fixup A - offset: 2, value: d64-4, kind: reloc_riprel_4byte
; NF-NEXT: retq # encoding: [0xc3]
%v0 = xor i32 %a, 123 ; 0xff << 50
@@ -761,17 +761,17 @@ define i1 @xorflag32ri8(i32 %a) {
define i1 @xorflag64ri8(i64 %a) {
; CHECK-LABEL: xorflag64ri8:
; CHECK: # %bb.0:
-; CHECK-NEXT: xorq $123, %rdi, %rcx # encoding: [0x62,0xf4,0xf4,0x18,0x83,0xf7,0x7b]
+; CHECK-NEXT: xorq $123, %rdi # EVEX TO LEGACY Compression encoding: [0x48,0x83,0xf7,0x7b]
; CHECK-NEXT: sete %al # encoding: [0x0f,0x94,0xc0]
-; CHECK-NEXT: movq %rcx, d64(%rip) # encoding: [0x48,0x89,0x0d,A,A,A,A]
+; CHECK-NEXT: movq %rdi, d64(%rip) # encoding: [0x48,0x89,0x3d,A,A,A,A]
; CHECK-NEXT: # fixup A - offset: 3, value: d64-4, kind: reloc_riprel_4byte
; CHECK-NEXT: retq # encoding: [0xc3]
;
; NF-LABEL: xorflag64ri8:
; NF: # %bb.0:
-; NF-NEXT: xorq $123, %rdi, %rcx # encoding: [0x62,0xf4,0xf4,0x18,0x83,0xf7,0x7b]
+; NF-NEXT: xorq $123, %rdi # EVEX TO LEGACY Compression encoding: [0x48,0x83,0xf7,0x7b]
; NF-NEXT: sete %al # encoding: [0x0f,0x94,0xc0]
-; NF-NEXT: movq %rcx, d64(%rip) # encoding: [0x48,0x89,0x0d,A,A,A,A]
+; NF-NEXT: movq %rdi, d64(%rip) # encoding: [0x48,0x89,0x3d,A,A,A,A]
; NF-NEXT: # fixup A - offset: 3, value: d64-4, kind: reloc_riprel_4byte
; NF-NEXT: retq # encoding: [0xc3]
%v0 = xor i64 %a, 123 ; 0xff << 50
diff --git a/llvm/test/CodeGen/X86/cmp.ll b/llvm/test/CodeGen/X86/cmp.ll
index 5a63d36a6be4e..0965b1c7208f6 100644
--- a/llvm/test/CodeGen/X86/cmp.ll
+++ b/llvm/test/CodeGen/X86/cmp.ll
@@ -178,7 +178,7 @@ define i32 @test7(i64 %res) nounwind {
; NDD-LABEL: test7:
; NDD: # %bb.0: # %entry
; NDD-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0]
-; NDD-NEXT: shrq $32, %rdi, %rcx # encoding: [0x62,0xf4,0xf4,0x18,0xc1,0xef,0x20]
+; NDD-NEXT: shrq $32, %rdi # EVEX TO LEGACY Compression encoding: [0x48,0xc1,0xef,0x20]
; NDD-NEXT: sete %al # encoding: [0x0f,0x94,0xc0]
; NDD-NEXT: retq # encoding: [0xc3]
entry:
@@ -198,9 +198,9 @@ define i32 @test8(i64 %res) nounwind {
;
; NDD-LABEL: test8:
; NDD: # %bb.0:
-; NDD-NEXT: shrq $32, %rdi, %rcx # encoding: [0x62,0xf4,0xf4,0x18,0xc1,0xef,0x20]
+; NDD-NEXT: shrq $32, %rdi # EVEX TO LEGACY Compression encoding: [0x48,0xc1,0xef,0x20]
; NDD-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0]
-; NDD-NEXT: cmpl $3, %ecx # encoding: [0x83,0xf9,0x03]
+; NDD-NEXT: cmpl $3, %edi # encoding: [0x83,0xff,0x03]
; NDD-NEXT: setb %al # encoding: [0x0f,0x92,0xc0]
; NDD-NEXT: retq # encoding: [0xc3]
%lnot = icmp ult i64 %res, 12884901888
@@ -219,7 +219,7 @@ define i32 @test9(i64 %res) nounwind {
; NDD-LABEL: test9:
; NDD: # %bb.0:
; NDD-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0]
-; NDD-NEXT: shrq $33, %rdi, %rcx # encoding: [0x62,0xf4,0xf4,0x18,0xc1,0xef,0x21]
+; NDD-NEXT: shrq $33, %rdi # EVEX TO LEGACY Compression encoding: [0x48,0xc1,0xef,0x21]
; NDD-NEXT: sete %al # encoding: [0x0f,0x94,0xc0]
; NDD-NEXT: retq # encoding: [0xc3]
%lnot = icmp ult i64 %res, 8589934592
@@ -238,7 +238,7 @@ define i32 @test10(i64 %res) nounwind {
; NDD-LABEL: test10:
; NDD: # %bb.0:
; NDD-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0]
-; NDD-NEXT: shrq $32, %rdi, %rcx # encoding: [0x62,0xf4,0xf4,0x18,0xc1,0xef,0x20]
+; NDD-NEXT: shrq $32, %rdi # EVEX TO LEGACY Compression encoding: [0x48,0xc1,0xef,0x20]
; NDD-NEXT: setne %al # encoding: [0x0f,0x95,0xc0]
; NDD-NEXT: retq # encoding: [0xc3]
%lnot = icmp uge i64 %res, 4294967296
@@ -257,9 +257,9 @@ define i32 @test11(i64 %l) nounwind {
;
; NDD-LABEL: test11:
; NDD: # %bb.0:
-; NDD-NEXT: shrq $47, %rdi, %rcx # encoding: [0x62,0xf4,0xf4,0x18,0xc1,0xef,0x2f]
+; NDD-NEXT: shrq $47, %rdi # EVEX TO LEGACY Compression encoding: [0x48,0xc1,0xef,0x2f]
; NDD-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0]
-; NDD-NEXT: cmpl $1, %ecx # encoding: [0x83,0xf9,0x01]
+; NDD-NEXT: cmpl $1, %edi # encoding: [0x83,0xff,0x01]
; NDD-NEXT: sete %al # encoding: [0x0f,0x94,0xc0]
; NDD-NEXT: retq # encoding: [0xc3]
%shr.mask = and i64 %l, -140737488355328
@@ -331,7 +331,7 @@ define i32 @test14(i32 %mask, i32 %base, i32 %intra) {
;
; NDD-LABEL: test14:
; NDD: # %bb.0:
-; NDD-NEXT: shrl $7, %edi, %eax # encoding: [0x62,0xf4,0x7c,0x18,0xc1,0xef,0x07]
+; NDD-NEXT: shrl $7, %edi # EVEX TO LEGACY Compression encoding: [0xc1,0xef,0x07]
; NDD-NEXT: cmovnsl %edx, %esi, %eax # encoding: [0x62,0xf4,0x7c,0x18,0x49,0xf2]
; NDD-NEXT: retq # encoding: [0xc3]
%s = lshr i32 %mask, 7
@@ -353,10 +353,10 @@ define zeroext i1 @test15(i32 %bf.load, i32 %n) {
;
; NDD-LABEL: test15:
; NDD: # %bb.0:
-; NDD-NEXT: shrl $16, %edi, %eax # encoding: [0x62,0xf4,0x7c,0x18,0xc1,0xef,0x10]
-; NDD-NEXT: sete %cl # encoding: [0x0f,0x94,0xc1]
-; NDD-NEXT: cmpl %esi, %eax # encoding: [0x39,0xf0]
-; NDD-NEXT: setae %al # encoding: [0x0f,0x93,0xc0]
+; NDD-NEXT: shrl $16, %edi # EVEX TO LEGACY Compression encoding: [0xc1,0xef,0x10]
+; NDD-NEXT: sete %al # encoding: [0x0f,0x94,0xc0]
+; NDD-NEXT: cmpl %esi, %edi # encoding: [0x39,0xf7]
+; NDD-NEXT: setae %cl # encoding: [0x0f,0x93,0xc1]
; NDD-NEXT: orb %cl, %al # EVEX TO LEGACY Compression encoding: [0x08,0xc8]
; NDD-NEXT: retq # encoding: [0xc3]
%bf.lshr = lshr i32 %bf.load, 16
@@ -482,7 +482,7 @@ define i32 @highmask_i64_mask64(i64 %val) {
; NDD-LABEL: highmask_i64_mask64:
; NDD: # %bb.0:
; NDD-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0]
-; NDD-NEXT: shrq $41, %rdi, %rcx # encoding: [0x62,0xf4,0xf4,0x18,0xc1,0xef,0x29]
+; NDD-NEXT: shrq $41, %rdi # EVEX TO LEGACY Compression encoding: [0x48,0xc1,0xef,0x29]
; NDD-NEXT: setne %al # encoding: [0x0f,0x95,0xc0]
; NDD-NEXT: retq # encoding: [0xc3]
%and = and i64 %val, -2199023255552
@@ -526,7 +526,7 @@ define i32 @highmask_i64_mask32(i64 %val) {
; NDD-LABEL: highmask_i64_mask32:
; NDD: # %bb.0:
; NDD-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0]
-; NDD-NEXT: shrq $20, %rdi, %rcx # encoding: [0x62,0xf4,0xf4,0x18,0xc1,0xef,0x14]
+; NDD-NEXT: shrq $20, %rdi # EVEX TO LEGACY Compression encoding: [0x48,0xc1,0xef,0x14]
; NDD-NEXT: sete %al # encoding: [0x0f,0x94,0xc0]
; NDD-NEXT: retq # encoding: [0xc3]
%and = and i64 %val, -1048576
@@ -584,7 +584,7 @@ define i32 @lowmask_i64_mask64(i64 %val) {
; NDD-LABEL: lowmask_i64_mask64:
; NDD: # %bb.0:
; NDD-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0]
-; NDD-NEXT: shlq $16, %rdi, %rcx # encoding: [0x62,0xf4,0xf4,0x18,0xc1,0xe7,0x10]
+; NDD-NEXT: shlq $16, %rdi # EVEX TO LEGACY Compression encoding: [0x48,0xc1,0xe7,0x10]
; NDD-NEXT: sete %al # encoding: [0x0f,0x94,0xc0]
; NDD-NEXT: retq # encoding: [0xc3]
%and = and i64 %val, 281474976710655
@@ -628,7 +628,7 @@ define i32 @lowmask_i64_mask32(i64 %val) {
; NDD-LABEL: lowmask_i64_mask32:
; NDD: # %bb.0:
; NDD-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0]
-; NDD-NEXT: shlq $44, %rdi, %rcx # encoding: [0x62,0xf4,0xf4,0x18,0xc1,0xe7,0x2c]
+; NDD-NEXT: shlq $44, %rdi # EVEX TO LEGACY Compression encoding: [0x48,0xc1,0xe7,0x2c]
; NDD-NEXT: setne %al # encoding: [0x0f,0x95,0xc0]
; NDD-NEXT: retq # encoding: [0xc3]
%and = and i64 %val, 1048575
@@ -739,8 +739,8 @@ define i1 @shifted_mask64_testb(i64 %a) {
;
; NDD-LABEL: shifted_mask64_testb:
; NDD: # %bb.0:
-; NDD-NEXT: shrq $50, %rdi, %rax # encoding: [0x62,0xf4,0xfc,0x18,0xc1,0xef,0x32]
-; NDD-NEXT: testb %al, %al # encoding: [0x84,0xc0]
+; NDD-NEXT: shrq $50, %rdi # EVEX TO LEGACY Compression encoding: [0x48,0xc1,0xef,0x32]
+; NDD-NEXT: testb %dil, %dil # encoding: [0x40,0x84,0xff]
; NDD-NEXT: setne %al # encoding: [0x0f,0x95,0xc0]
; NDD-NEXT: retq # encoding: [0xc3]
%v0 = and i64 %a, 287104476244869120 ; 0xff << 50
@@ -758,8 +758,8 @@ define i1 @shifted_mask64_testw(i64 %a) {
;
; NDD-LABEL: shifted_mask64_testw:
; NDD: # %bb.0:
-; NDD-NEXT: shrq $33, %rdi, %rax # encoding: [0x62,0xf4,0xfc,0x18,0xc1,0xef,0x21]
-; NDD-NEXT: testw %ax, %ax # encoding: [0x66,0x85,0xc0]
+; NDD-NEXT: shrq $33, %rdi # EVEX TO LEGACY Compression encoding: [0x48,0xc1,0xef,0x21]
+; NDD-NEXT: testw %di, %di # encoding: [0x66,0x85,0xff]
; NDD-NEXT: setne %al # encoding: [0x0f,0x95,0xc0]
; NDD-NEXT: retq # encoding: [0xc3]
%v0 = and i64 %a, 562941363486720 ; 0xffff << 33
@@ -777,8 +777,8 @@ define i1 @shifted_mask64_testl(i64 %a) {
;
; NDD-LABEL: shifted_mask64_testl:
; NDD: # %bb.0:
-; NDD-NEXT: shrq $7, %rdi, %rax # encoding: [0x62,0xf4,0xfc,0x18,0xc1,0xef,0x07]
-; NDD-NEXT: testl %eax, %eax # encoding: [0x85,0xc0]
+; NDD-NEXT: shrq $7, %rdi # EVEX TO LEGACY Compression encoding: [0x48,0xc1,0xef,0x07]
+; NDD-NEXT: testl %edi, %edi # encoding: [0x85,0xff]
; NDD-NEXT: sete %al # encoding: [0x0f,0x94,0xc0]
; NDD-NEXT: retq # encoding: [0xc3]
%v0 = and i64 %a, 549755813760 ; 0xffffffff << 7
@@ -817,9 +817,9 @@ define i1 @shifted_mask64_extra_use_and(i64 %a) {
; NDD: # %bb.0:
; NDD-NEXT: movabsq $287104476244869120, %rax # encoding: [0x48,0xb8,0x00,0x00,0x00,0x00,0x00,0x00,0xfc,0x03]
; NDD-NEXT: # imm = 0x3FC000000000000
-; NDD-NEXT: andq %rax, %rdi, %rcx # encoding: [0x62,0xf4,0xf4,0x18,0x21,0xc7]
+; NDD-NEXT: andq %rax, %rdi # EVEX TO LEGACY Compression encoding: [0x48,0x21,0xc7]
; NDD-NEXT: setne %al # encoding: [0x0f,0x95,0xc0]
-; NDD-NEXT: movq %rcx, d64(%rip) # encoding: [0x48,0x89,0x0d,A,A,A,A]
+; NDD-NEXT: movq %rdi, d64(%rip) # encoding: [0x48,0x89,0x3d,A,A,A,A]
; NDD-NEXT: # fixup A - offset: 3, value: d64-4, kind: reloc_riprel_4byte
; NDD-NEXT: retq # encoding: [0xc3]
%v0 = and i64 %a, 287104476244869120 ; 0xff << 50
@@ -868,10 +868,10 @@ define i1 @shifted_mask32_extra_use_and(i64 %a) {
;
; NDD-LABEL: shifted_mask32_extra_use_and:
; NDD: # %bb.0:
-; NDD-NEXT: andq $66846720, %rdi, %rcx # encoding: [0x62,0xf4,0xf4,0x18,0x81,0xe7,0x00,0x00,0xfc,0x03]
+; NDD-NEXT: andq $66846720, %rdi # EVEX TO LEGACY Compression encoding: [0x48,0x81,0xe7,0x00,0x00,0xfc,0x03]
; NDD-NEXT: # imm = 0x3FC0000
; NDD-NEXT: setne %al # encoding: [0x0f,0x95,0xc0]
-; NDD-NEXT: movq %rcx, d64(%rip) # encoding: [0x48,0x89,0x0d,A,A,A,A]
+; NDD-NEXT: movq %rdi, d64(%rip) # encoding: [0x48,0x89,0x3d,A,A,A,A]
; NDD-NEXT: # fixup A - offset: 3, value: d64-4, kind: reloc_riprel_4byte
; NDD-NEXT: retq # encoding: [0xc3]
%v0 = and i64 %a, 66846720 ; 0xff << 50
diff --git a/llvm/test/CodeGen/X86/popcnt.ll b/llvm/test/CodeGen/X86/popcnt.ll
index 13fa639dc63be..35c7c0e09f394 100644
--- a/llvm/test/CodeGen/X86/popcnt.ll
+++ b/llvm/test/CodeGen/X86/popcnt.ll
@@ -182,11 +182,11 @@ define i32 @cnt32(i32 %x) nounwind readnone {
; X64-NDD: # %bb.0:
; X64-NDD-NEXT: shrl %edi, %eax
; X64-NDD-NEXT: andl $1431655765, %eax # imm = 0x55555555
-; X64-NDD-NEXT: subl %eax, %edi, %eax
-; X64-NDD-NEXT: andl $858993459, %eax, %ecx # imm = 0x33333333
-; X64-NDD-NEXT: shrl $2, %eax
-; X64-NDD-NEXT: andl $858993459, %eax # imm = 0x33333333
-; X64-NDD-NEXT: addl %ecx, %eax
+; X64-NDD-NEXT: subl %eax, %edi
+; X64-NDD-NEXT: andl $858993459, %edi, %eax # imm = 0x33333333
+; X64-NDD-NEXT: shrl $2, %edi
+; X64-NDD-NEXT: andl $858993459, %edi # imm = 0x33333333
+; X64-NDD-NEXT: addl %edi, %eax
; X64-NDD-NEXT: shrl $4, %eax, %ecx
; X64-NDD-NEXT: addl %ecx, %eax
; X64-NDD-NEXT: andl $252645135, %eax # imm = 0xF0F0F0F
@@ -277,12 +277,12 @@ define i64 @cnt64(i64 %x) nounwind readnone {
; X64-NDD-NEXT: shrq %rdi, %rax
; X64-NDD-NEXT: movabsq $6148914691236517205, %rcx # imm = 0x5555555555555555
; X64-NDD-NEXT: andq %rcx, %rax
-; X64-NDD-NEXT: subq %rax, %rdi, %rax
-; X64-NDD-NEXT: movabsq $3689348814741910323, %rcx # imm = 0x3333333333333333
-; X64-NDD-NEXT: andq %rcx, %rax, %rdx
-; X64-NDD-NEXT: shrq $2, %rax
-; X64-NDD-NEXT: andq %rcx, %rax
-; X64-NDD-NEXT: addq %rdx, %rax
+; X64-NDD-NEXT: subq %rax, %rdi
+; X64-NDD-NEXT: movabsq $3689348814741910323, %rax # imm = 0x3333333333333333
+; X64-NDD-NEXT: andq %rax, %rdi, %rcx
+; X64-NDD-NEXT: shrq $2, %rdi
+; X64-NDD-NEXT: andq %rdi, %rax
+; X64-NDD-NEXT: addq %rcx, %rax
; X64-NDD-NEXT: shrq $4, %rax, %rcx
; X64-NDD-NEXT: addq %rcx, %rax
; X64-NDD-NEXT: movabsq $1085102592571150095, %rcx # imm = 0xF0F0F0F0F0F0F0F
@@ -491,32 +491,32 @@ define i128 @cnt128(i128 %x) nounwind readnone {
; X64-NDD-NEXT: shrq %rsi, %rax
; X64-NDD-NEXT: movabsq $6148914691236517205, %rcx # imm = 0x5555555555555555
; X64-NDD-NEXT: andq %rcx, %rax
-; X64-NDD-NEXT: subq %rax, %rsi, %rax
-; X64-NDD-NEXT: movabsq $3689348814741910323, %rdx # imm = 0x3333333333333333
-; X64-NDD-NEXT: andq %rdx, %rax, %rsi
-; X64-NDD-NEXT: shrq $2, %rax
-; X64-NDD-NEXT: andq %rdx, %rax
-; X64-NDD-NEXT: addq %rsi, %rax
-; X64-NDD-NEXT: shrq $4, %rax, %rsi
-; X64-NDD-NEXT: addq %rsi, %rax
+; X64-NDD-NEXT: subq %rax, %rsi
+; X64-NDD-NEXT: movabsq $3689348814741910323, %rax # imm = 0x3333333333333333
+; X64-NDD-NEXT: andq %rax, %rsi, %rdx
+; X64-NDD-NEXT: shrq $2, %rsi
+; X64-NDD-NEXT: andq %rax, %rsi
+; X64-NDD-NEXT: addq %rsi, %rdx
+; X64-NDD-NEXT: shrq $4, %rdx, %rsi
+; X64-NDD-NEXT: addq %rsi, %rdx
; X64-NDD-NEXT: movabsq $1085102592571150095, %rsi # imm = 0xF0F0F0F0F0F0F0F
-; X64-NDD-NEXT: andq %rsi, %rax
+; X64-NDD-NEXT: andq %rsi, %rdx
; X64-NDD-NEXT: movabsq $72340172838076673, %r8 # imm = 0x101010101010101
-; X64-NDD-NEXT: imulq %r8, %rax
-; X64-NDD-NEXT: shrq $56, %rax
+; X64-NDD-NEXT: imulq %r8, %rdx
+; X64-NDD-NEXT: shrq $56, %rdx
; X64-NDD-NEXT: shrq %rdi, %r9
; X64-NDD-NEXT: andq %r9, %rcx
-; X64-NDD-NEXT: subq %rcx, %rdi, %rcx
-; X64-NDD-NEXT: andq %rdx, %rcx, %rdi
-; X64-NDD-NEXT: shrq $2, %rcx
-; X64-NDD-NEXT: andq %rdx, %rcx
-; X64-NDD-NEXT: addq %rdi, %rcx
-; X64-NDD-NEXT: shrq $4, %rcx, %rdx
-; X64-NDD-NEXT: addq %rdx, %rcx
-; X64-NDD-NEXT: andq %rsi, %rcx
-; X64-NDD-NEXT: imulq %r8, %rcx
-; X64-NDD-NEXT: shrq $56, %rcx
+; X64-NDD-NEXT: subq %rcx, %rdi
+; X64-NDD-NEXT: andq %rax, %rdi, %rcx
+; X64-NDD-NEXT: shrq $2, %rdi
+; X64-NDD-NEXT: andq %rdi, %rax
+; X64-NDD-NEXT: addq %rcx, %rax
+; X64-NDD-NEXT: shrq $4, %rax, %rcx
; X64-NDD-NEXT: addq %rcx, %rax
+; X64-NDD-NEXT: andq %rsi, %rax
+; X64-NDD-NEXT: imulq %r8, %rax
+; X64-NDD-NEXT: shrq $56, %rax
+; X64-NDD-NEXT: addq %rdx, %rax
; X64-NDD-NEXT: xorl %edx, %edx
; X64-NDD-NEXT: retq
;
@@ -685,12 +685,12 @@ define i64 @cnt64_noimplicitfloat(i64 %x) nounwind readnone noimplicitfloat {
; X64-NDD-NEXT: shrq %rdi, %rax
; X64-NDD-NEXT: movabsq $6148914691236517205, %rcx # imm = 0x5555555555555555
; X64-NDD-NEXT: andq %rcx, %rax
-; X64-NDD-NEXT: subq %rax, %rdi, %rax
-; X64-NDD-NEXT: movabsq $3689348814741910323, %rcx # imm = 0x3333333333333333
-; X64-NDD-NEXT: andq %rcx, %rax, %rdx
-; X64-NDD-NEXT: shrq $2, %rax
-; X64-NDD-NEXT: andq %rcx, %rax
-; X64-NDD-NEXT: addq %rdx, %rax
+; X64-NDD-NEXT: subq %rax, %rdi
+; X64-NDD-NEXT: movabsq $3689348814741910323, %rax # imm = 0x3333333333333333
+; X64-NDD-NEXT: andq %rax, %rdi, %rcx
+; X64-NDD-NEXT: shrq $2, %rdi
+; X64-NDD-NEXT: andq %rdi, %rax
+; X64-NDD-NEXT: addq %rcx, %rax
; X64-NDD-NEXT: shrq $4, %rax, %rcx
; X64-NDD-NEXT: addq %rcx, %rax
; X64-NDD-NEXT: movabsq $1085102592571150095, %rcx # imm = 0xF0F0F0F0F0F0F0F
@@ -759,12 +759,12 @@ define i32 @cnt32_optsize(i32 %x) nounwind readnone optsize {
; X64-NDD: # %bb.0:
; X64-NDD-NEXT: shrl %edi, %eax
; X64-NDD-NEXT: andl $1431655765, %eax # imm = 0x55555555
-; X64-NDD-NEXT: subl %eax, %edi, %eax
-; X64-NDD-NEXT: movl $858993459, %ecx # imm = 0x33333333
-; X64-NDD-NEXT: andl %ecx, %eax, %edx
-; X64-NDD-NEXT: shrl $2, %eax
-; X64-NDD-NEXT: andl %ecx, %eax
-; X64-NDD-NEXT: addl %edx, %eax
+; X64-NDD-NEXT: subl %eax, %edi
+; X64-NDD-NEXT: movl $858993459, %eax # imm = 0x33333333
+; X64-NDD-NEXT: andl %eax, %edi, %ecx
+; X64-NDD-NEXT: shrl $2, %edi
+; X64-NDD-NEXT: andl %edi, %eax
+; X64-NDD-NEXT: addl %ecx, %eax
; X64-NDD-NEXT: shrl $4, %eax, %ecx
; X64-NDD-NEXT: addl %ecx, %eax
; X64-NDD-NEXT: andl $252645135, %eax # imm = 0xF0F0F0F
@@ -864,12 +864,12 @@ define i64 @cnt64_optsize(i64 %x) nounwind readnone optsize {
; X64-NDD-NEXT: shrq %rdi, %rax
; X64-NDD-NEXT: movabsq $6148914691236517205, %rcx # imm = 0x5555555555555555
; X64-NDD-NEXT: andq %rcx, %rax
-; X64-NDD-NEXT: subq %rax, %rdi, %rax
-; X64-NDD-NEXT: movabsq $3689348814741910323, %rcx # imm = 0x3333333333333333
-; X64-NDD-NEXT: andq %rcx, %rax, %rdx
-; X64-NDD-NEXT: shrq $2, %rax
-; X64-NDD-NEXT: andq %rcx, %rax
-; X64-NDD-NEXT: addq %rdx, %rax
+; X64-NDD-NEXT: subq %rax, %rdi
+; X64-NDD-NEXT: movabsq $3689348814741910323, %rax # imm = 0x3333333333333333
+; X64-NDD-NEXT: andq %rax, %rdi, %rcx
+; X64-NDD-NEXT: shrq $2, %rdi
+; X64-NDD-NEXT: andq %rdi, %rax
+; X64-NDD-NEXT: addq %rcx, %rax
; X64-NDD-NEXT: shrq $4, %rax, %rcx
; X64-NDD-NEXT: addq %rcx, %rax
; X64-NDD-NEXT: movabsq $1085102592571150095, %rcx # imm = 0xF0F0F0F0F0F0F0F
@@ -1087,32 +1087,32 @@ define i128 @cnt128_optsize(i128 %x) nounwind readnone optsize {
; X64-NDD-NEXT: shrq %rsi, %rax
; X64-NDD-NEXT: movabsq $6148914691236517205, %rcx # imm = 0x5555555555555555
; X64-NDD-NEXT: andq %rcx, %rax
-; X64-NDD-NEXT: subq %rax, %rsi, %rax
-; X64-NDD-NEXT: movabsq $3689348814741910323, %rdx # imm = 0x3333333333333333
-; X64-NDD-NEXT: andq %rdx, %rax, %rsi
-; X64-NDD-NEXT: shrq $2, %rax
-; X64-NDD-NEXT: andq %rdx, %rax
-; X64-NDD-NEXT: addq %rsi, %rax
-; X64-NDD-NEXT: shrq $4, %rax, %rsi
-; X64-NDD-NEXT: addq %rsi, %rax
+; X64-NDD-NEXT: subq %rax, %rsi
+; X64-NDD-NEXT: movabsq $3689348814741910323, %rax # imm = 0x3333333333333333
+; X64-NDD-NEXT: andq %rax, %rsi, %rdx
+; X64-NDD-NEXT: shrq $2, %rsi
+; X64-NDD-NEXT: andq %rax, %rsi
+; X64-NDD-NEXT: addq %rsi, %rdx
+; X64-NDD-NEXT: shrq $4, %rdx, %rsi
+; X64-NDD-NEXT: addq %rsi, %rdx
; X64-NDD-NEXT: movabsq $1085102592571150095, %rsi # imm = 0xF0F0F0F0F0F0F0F
-; X64-NDD-NEXT: andq %rsi, %rax
+; X64-NDD-NEXT: andq %rsi, %rdx
; X64-NDD-NEXT: movabsq $72340172838076673, %r8 # imm = 0x101010101010101
-; X64-NDD-NEXT: imulq %r8, %rax
-; X64-NDD-NEXT: shrq $56, %rax
+; X64-NDD-NEXT: imulq %r8, %rdx
+; X64-NDD-NEXT: shrq $56, %rdx
; X64-NDD-NEXT: shrq %rdi, %r9
; X64-NDD-NEXT: andq %r9, %rcx
-; X64-NDD-NEXT: subq %rcx, %rdi, %rcx
-; X64-NDD-NEXT: andq %rdx, %rcx, %rdi
-; X64-NDD-NEXT: shrq $2, %rcx
-; X64-NDD-NEXT: andq %rdx, %rcx
-; X64-NDD-NEXT: addq %rdi, %rcx
-; X64-NDD-NEXT: shrq $4, %rcx, %rdx
-; X64-NDD-NEXT: addq %rdx, %rcx
-; X64-NDD-NEXT: andq %rsi, %rcx
-; X64-NDD-NEXT: imulq %r8, %rcx
-; X64-NDD-NEXT: shrq $56, %rcx
+; X64-NDD-NEXT: subq %rcx, %rdi
+; X64-NDD-NEXT: andq %rax, %rdi, %rcx
+; X64-NDD-NEXT: shrq $2, %rdi
+; X64-NDD-NEXT: andq %rdi, %rax
+; X64-NDD-NEXT: addq %rcx, %rax
+; X64-NDD-NEXT: shrq $4, %rax, %rcx
; X64-NDD-NEXT: addq %rcx, %rax
+; X64-NDD-NEXT: andq %rsi, %rax
+; X64-NDD-NEXT: imulq %r8, %rax
+; X64-NDD-NEXT: shrq $56, %rax
+; X64-NDD-NEXT: addq %rdx, %rax
; X64-NDD-NEXT: xorl %edx, %edx
; X64-NDD-NEXT: retq
;
@@ -1257,11 +1257,11 @@ define i32 @cnt32_pgso(i32 %x) nounwind readnone !prof !14 {
; X64-NDD: # %bb.0:
; X64-NDD-NEXT: shrl %edi, %eax
; X64-NDD-NEXT: andl $1431655765, %eax # imm = 0x55555555
-; X64-NDD-NEXT: subl %eax, %edi, %eax
-; X64-NDD-NEXT: andl $858993459, %eax, %ecx # imm = 0x33333333
-; X64-NDD-NEXT: shrl $2, %eax
-; X64-NDD-NEXT: andl $858993459, %eax # imm = 0x33333333
-; X64-NDD-NEXT: addl %ecx, %eax
+; X64-NDD-NEXT: subl %eax, %edi
+; X64-NDD-NEXT: andl $858993459, %edi, %eax # imm = 0x33333333
+; X64-NDD-NEXT: shrl $2, %edi
+; X64-NDD-NEXT: andl $858993459, %edi # imm = 0x33333333
+; X64-NDD-NEXT: addl %edi, %eax
; X64-NDD-NEXT: shrl $4, %eax, %ecx
; X64-NDD-NEXT: addl %ecx, %eax
; X64-NDD-NEXT: andl $252645135, %eax # imm = 0xF0F0F0F
@@ -1352,12 +1352,12 @@ define i64 @cnt64_pgso(i64 %x) nounwind readnone !prof !14 {
; X64-NDD-NEXT: shrq %rdi, %rax
; X64-NDD-NEXT: movabsq $6148914691236517205, %rcx # imm = 0x5555555555555555
; X64-NDD-NEXT: andq %rcx, %rax
-; X64-NDD-NEXT: subq %rax, %rdi, %rax
-; X64-NDD-NEXT: movabsq $3689348814741910323, %rcx # imm = 0x3333333333333333
-; X64-NDD-NEXT: andq %rcx, %rax, %rdx
-; X64-NDD-NEXT: shrq $2, %rax
-; X64-NDD-NEXT: andq %rcx, %rax
-; X64-NDD-NEXT: addq %rdx, %rax
+; X64-NDD-NEXT: subq %rax, %rdi
+; X64-NDD-NEXT: movabsq $3689348814741910323, %rax # imm = 0x3333333333333333
+; X64-NDD-NEXT: andq %rax, %rdi, %rcx
+; X64-NDD-NEXT: shrq $2, %rdi
+; X64-NDD-NEXT: andq %rdi, %rax
+; X64-NDD-NEXT: addq %rcx, %rax
; X64-NDD-NEXT: shrq $4, %rax, %rcx
; X64-NDD-NEXT: addq %rcx, %rax
; X64-NDD-NEXT: movabsq $1085102592571150095, %rcx # imm = 0xF0F0F0F0F0F0F0F
@@ -1568,32 +1568,32 @@ define i128 @cnt128_pgso(i128 %x) nounwind readnone !prof !14 {
; X64-NDD-NEXT: shrq %rsi, %rax
; X64-NDD-NEXT: movabsq $6148914691236517205, %rcx # imm = 0x5555555555555555
; X64-NDD-NEXT: andq %rcx, %rax
-; X64-NDD-NEXT: subq %rax, %rsi, %rax
-; X64-NDD-NEXT: movabsq $3689348814741910323, %rdx # imm = 0x3333333333333333
-; X64-NDD-NEXT: andq %rdx, %rax, %rsi
-; X64-NDD-NEXT: shrq $2, %rax
-; X64-NDD-NEXT: andq %rdx, %rax
-; X64-NDD-NEXT: addq %rsi, %rax
-; X64-NDD-NEXT: shrq $4, %rax, %rsi
-; X64-NDD-NEXT: addq %rsi, %rax
+; X64-NDD-NEXT: subq %rax, %rsi
+; X64-NDD-NEXT: movabsq $3689348814741910323, %rax # imm = 0x3333333333333333
+; X64-NDD-NEXT: andq %rax, %rsi, %rdx
+; X64-NDD-NEXT: shrq $2, %rsi
+; X64-NDD-NEXT: andq %rax, %rsi
+; X64-NDD-NEXT: addq %rsi, %rdx
+; X64-NDD-NEXT: shrq $4, %rdx, %rsi
+; X64-NDD-NEXT: addq %rsi, %rdx
; X64-NDD-NEXT: movabsq $1085102592571150095, %rsi # imm = 0xF0F0F0F0F0F0F0F
-; X64-NDD-NEXT: andq %rsi, %rax
+; X64-NDD-NEXT: andq %rsi, %rdx
; X64-NDD-NEXT: movabsq $72340172838076673, %r8 # imm = 0x101010101010101
-; X64-NDD-NEXT: imulq %r8, %rax
-; X64-NDD-NEXT: shrq $56, %rax
+; X64-NDD-NEXT: imulq %r8, %rdx
+; X64-NDD-NEXT: shrq $56, %rdx
; X64-NDD-NEXT: shrq %rdi, %r9
; X64-NDD-NEXT: andq %r9, %rcx
-; X64-NDD-NEXT: subq %rcx, %rdi, %rcx
-; X64-NDD-NEXT: andq %rdx, %rcx, %rdi
-; X64-NDD-NEXT: shrq $2, %rcx
-; X64-NDD-NEXT: andq %rdx, %rcx
-; X64-NDD-NEXT: addq %rdi, %rcx
-; X64-NDD-NEXT: shrq $4, %rcx, %rdx
-; X64-NDD-NEXT: addq %rdx, %rcx
-; X64-NDD-NEXT: andq %rsi, %rcx
-; X64-NDD-NEXT: imulq %r8, %rcx
-; X64-NDD-NEXT: shrq $56, %rcx
+; X64-NDD-NEXT: subq %rcx, %rdi
+; X64-NDD-NEXT: andq %rax, %rdi, %rcx
+; X64-NDD-NEXT: shrq $2, %rdi
+; X64-NDD-NEXT: andq %rdi, %rax
+; X64-NDD-NEXT: addq %rcx, %rax
+; X64-NDD-NEXT: shrq $4, %rax, %rcx
; X64-NDD-NEXT: addq %rcx, %rax
+; X64-NDD-NEXT: andq %rsi, %rax
+; X64-NDD-NEXT: imulq %r8, %rax
+; X64-NDD-NEXT: shrq $56, %rax
+; X64-NDD-NEXT: addq %rdx, %rax
; X64-NDD-NEXT: xorl %edx, %edx
; X64-NDD-NEXT: retq
;
@@ -1739,11 +1739,11 @@ define i32 @popcount_zext_i32(i16 zeroext %x) {
; X64-NDD: # %bb.0:
; X64-NDD-NEXT: shrl %edi, %eax
; X64-NDD-NEXT: andl $21845, %eax # imm = 0x5555
-; X64-NDD-NEXT: subl %eax, %edi, %eax
-; X64-NDD-NEXT: andl $858993459, %eax, %ecx # imm = 0x33333333
-; X64-NDD-NEXT: shrl $2, %eax
-; X64-NDD-NEXT: andl $858993459, %eax # imm = 0x33333333
-; X64-NDD-NEXT: addl %ecx, %eax
+; X64-NDD-NEXT: subl %eax, %edi
+; X64-NDD-NEXT: andl $858993459, %edi, %eax # imm = 0x33333333
+; X64-NDD-NEXT: shrl $2, %edi
+; X64-NDD-NEXT: andl $858993459, %edi # imm = 0x33333333
+; X64-NDD-NEXT: addl %edi, %eax
; X64-NDD-NEXT: shrl $4, %eax, %ecx
; X64-NDD-NEXT: addl %ecx, %eax
; X64-NDD-NEXT: andl $252645135, %eax # imm = 0xF0F0F0F
diff --git a/llvm/test/CodeGen/X86/select_const_i128.ll b/llvm/test/CodeGen/X86/select_const_i128.ll
index d7859baec815c..f0f0c584a7fc8 100644
--- a/llvm/test/CodeGen/X86/select_const_i128.ll
+++ b/llvm/test/CodeGen/X86/select_const_i128.ll
@@ -23,8 +23,8 @@ define i128 @select_eq_i128(ptr %a) {
; NDD-NEXT: ptest %xmm0, %xmm0
; NDD-NEXT: setne %al
; NDD-NEXT: addq $-1, %rax
-; NDD-NEXT: movabsq $9223372036854775807, %rcx # imm = 0x7FFFFFFFFFFFFFFF
-; NDD-NEXT: adcq $0, %rcx, %rdx
+; NDD-NEXT: movabsq $9223372036854775807, %rdx # imm = 0x7FFFFFFFFFFFFFFF
+; NDD-NEXT: adcq $0, %rdx
; NDD-NEXT: retq
%1 = load i128, ptr %a, align 16
%cmp = icmp eq i128 %1, 1
>From c7886071d08d5cee5d0010a1ead6cdee8ad8805d Mon Sep 17 00:00:00 2001
From: Freddy Ye <freddy.ye at intel.com>
Date: Sun, 4 Aug 2024 20:04:03 +0800
Subject: [PATCH 2/6] address comments.
---
llvm/lib/Target/X86/X86RegisterInfo.cpp | 63 +++++++++++++------------
1 file changed, 32 insertions(+), 31 deletions(-)
diff --git a/llvm/lib/Target/X86/X86RegisterInfo.cpp b/llvm/lib/Target/X86/X86RegisterInfo.cpp
index 5777bd0496881..33e883a94f6cf 100644
--- a/llvm/lib/Target/X86/X86RegisterInfo.cpp
+++ b/llvm/lib/Target/X86/X86RegisterInfo.cpp
@@ -46,7 +46,7 @@ EnableBasePointer("x86-use-base-pointer", cl::Hidden, cl::init(true),
cl::desc("Enable use of a base pointer for complex stack frames"));
static cl::opt<bool>
- DisableRegAllocHints("x86-disable-regalloc-hints", cl::Hidden,
+ DisableRegAllocNDDHints("x86-disable-regalloc-hints-for-ndd", cl::Hidden,
cl::init(false),
cl::desc("Disable two address hints for register "
"allocation"));
@@ -1089,43 +1089,43 @@ bool X86RegisterInfo::getRegAllocationHints(Register VirtReg,
unsigned ID = RC.getID();
- if (!VRM || DisableRegAllocHints)
- return BaseImplRetVal;
-
- // Add any two address hints after any copy hints.
- SmallSet<unsigned, 4> TwoAddrHints;
+ if (ID != X86::TILERegClassID) {
+ if (!VRM || DisableRegAllocNDDHints)
+ return BaseImplRetVal;
- auto tryAddHint = [&](const MachineOperand &VRRegMO,
- const MachineOperand &MO) -> void {
- Register Reg = MO.getReg();
- Register PhysReg =
- Register::isPhysicalRegister(Reg) ? Reg : Register(VRM->getPhys(Reg));
- if (PhysReg && !MRI->isReserved(PhysReg) && !is_contained(Hints, PhysReg))
- TwoAddrHints.insert(PhysReg);
- };
+ // Add any two address hints after any copy hints.
+ SmallSet<unsigned, 4> TwoAddrHints;
- for (auto &MO : MRI->reg_nodbg_operands(VirtReg)) {
- const MachineInstr &MI = *MO.getParent();
- if (X86::getNonNDVariant(MI.getOpcode())) {
- unsigned OpIdx = MI.getOperandNo(&MO);
- if (OpIdx == 0 && MI.getOperand(1).isReg()) {
- tryAddHint(MO, MI.getOperand(1));
- if (MI.isCommutable() && MI.getOperand(2).isReg())
- tryAddHint(MO, MI.getOperand(2));
- } else if (OpIdx == 1) {
- tryAddHint(MO, MI.getOperand(0));
- } else if (MI.isCommutable() && OpIdx == 2) {
- tryAddHint(MO, MI.getOperand(0));
+ auto tryAddNDDHint = [&](const MachineOperand &MO) -> void {
+ Register Reg = MO.getReg();
+ Register PhysReg =
+ Register::isPhysicalRegister(Reg) ? Reg : Register(VRM->getPhys(Reg));
+ if (PhysReg && !MRI->isReserved(PhysReg) && !is_contained(Hints, PhysReg))
+ TwoAddrHints.insert(PhysReg);
+ };
+
+ for (auto &MO : MRI->reg_nodbg_operands(VirtReg)) {
+ const MachineInstr &MI = *MO.getParent();
+ if (X86::getNonNDVariant(MI.getOpcode())) {
+ unsigned OpIdx = MI.getOperandNo(&MO);
+ if (OpIdx == 0 && MI.getOperand(1).isReg()) {
+ tryAddNDDHint(MI.getOperand(1));
+ if (MI.isCommutable() && MI.getOperand(2).isReg())
+ tryAddNDDHint(MI.getOperand(2));
+ } else if (OpIdx == 1) {
+ tryAddNDDHint(MI.getOperand(0));
+ } else if (MI.isCommutable() && OpIdx == 2) {
+ tryAddNDDHint(MI.getOperand(0));
+ }
}
}
- }
- for (MCPhysReg OrderReg : Order)
- if (TwoAddrHints.count(OrderReg))
- Hints.push_back(OrderReg);
+ for (MCPhysReg OrderReg : Order)
+ if (TwoAddrHints.count(OrderReg))
+ Hints.push_back(OrderReg);
- if (ID != X86::TILERegClassID)
return BaseImplRetVal;
+ }
ShapeT VirtShape = getTileShape(VirtReg, const_cast<VirtRegMap *>(VRM), MRI);
auto AddHint = [&](MCPhysReg PhysReg) {
@@ -1163,4 +1163,5 @@ bool X86RegisterInfo::getRegAllocationHints(Register VirtReg,
#undef DEBUG_TYPE
return true;
+
}
>From 6ca56bdc8fef6f9ae33b66e356d4394e6e1e40f1 Mon Sep 17 00:00:00 2001
From: Freddy Ye <freddy.ye at intel.com>
Date: Mon, 5 Aug 2024 14:45:19 +0800
Subject: [PATCH 3/6] format
---
llvm/lib/Target/X86/X86RegisterInfo.cpp | 7 +++----
1 file changed, 3 insertions(+), 4 deletions(-)
diff --git a/llvm/lib/Target/X86/X86RegisterInfo.cpp b/llvm/lib/Target/X86/X86RegisterInfo.cpp
index 33e883a94f6cf..6284b86107244 100644
--- a/llvm/lib/Target/X86/X86RegisterInfo.cpp
+++ b/llvm/lib/Target/X86/X86RegisterInfo.cpp
@@ -47,9 +47,9 @@ EnableBasePointer("x86-use-base-pointer", cl::Hidden, cl::init(true),
static cl::opt<bool>
DisableRegAllocNDDHints("x86-disable-regalloc-hints-for-ndd", cl::Hidden,
- cl::init(false),
- cl::desc("Disable two address hints for register "
- "allocation"));
+ cl::init(false),
+ cl::desc("Disable two address hints for register "
+ "allocation"));
X86RegisterInfo::X86RegisterInfo(const Triple &TT)
: X86GenRegisterInfo((TT.isArch64Bit() ? X86::RIP : X86::EIP),
@@ -1163,5 +1163,4 @@ bool X86RegisterInfo::getRegAllocationHints(Register VirtReg,
#undef DEBUG_TYPE
return true;
-
}
>From 64ffdac8f0b92e8b0ca412e9e20ac9772ea2bda2 Mon Sep 17 00:00:00 2001
From: Freddy Ye <freddy.ye at intel.com>
Date: Mon, 5 Aug 2024 15:28:06 +0800
Subject: [PATCH 4/6] address comments.
---
llvm/lib/Target/X86/X86RegisterInfo.cpp | 10 +++++-----
1 file changed, 5 insertions(+), 5 deletions(-)
diff --git a/llvm/lib/Target/X86/X86RegisterInfo.cpp b/llvm/lib/Target/X86/X86RegisterInfo.cpp
index 6284b86107244..834246436bac3 100644
--- a/llvm/lib/Target/X86/X86RegisterInfo.cpp
+++ b/llvm/lib/Target/X86/X86RegisterInfo.cpp
@@ -1096,7 +1096,7 @@ bool X86RegisterInfo::getRegAllocationHints(Register VirtReg,
// Add any two address hints after any copy hints.
SmallSet<unsigned, 4> TwoAddrHints;
- auto tryAddNDDHint = [&](const MachineOperand &MO) -> void {
+ auto TryAddNDDHint = [&](const MachineOperand &MO) {
Register Reg = MO.getReg();
Register PhysReg =
Register::isPhysicalRegister(Reg) ? Reg : Register(VRM->getPhys(Reg));
@@ -1109,13 +1109,13 @@ bool X86RegisterInfo::getRegAllocationHints(Register VirtReg,
if (X86::getNonNDVariant(MI.getOpcode())) {
unsigned OpIdx = MI.getOperandNo(&MO);
if (OpIdx == 0 && MI.getOperand(1).isReg()) {
- tryAddNDDHint(MI.getOperand(1));
+ TryAddNDDHint(MI.getOperand(1));
if (MI.isCommutable() && MI.getOperand(2).isReg())
- tryAddNDDHint(MI.getOperand(2));
+ TryAddNDDHint(MI.getOperand(2));
} else if (OpIdx == 1) {
- tryAddNDDHint(MI.getOperand(0));
+ TryAddNDDHint(MI.getOperand(0));
} else if (MI.isCommutable() && OpIdx == 2) {
- tryAddNDDHint(MI.getOperand(0));
+ TryAddNDDHint(MI.getOperand(0));
}
}
}
>From 2eac5defb8c760c595c1ccb3a9981e1905a319af Mon Sep 17 00:00:00 2001
From: Freddy Ye <freddy.ye at intel.com>
Date: Mon, 5 Aug 2024 15:40:10 +0800
Subject: [PATCH 5/6] address comments.
---
llvm/lib/Target/X86/X86RegisterInfo.cpp | 24 +++++++++++++-----------
1 file changed, 13 insertions(+), 11 deletions(-)
diff --git a/llvm/lib/Target/X86/X86RegisterInfo.cpp b/llvm/lib/Target/X86/X86RegisterInfo.cpp
index 834246436bac3..241419f13825c 100644
--- a/llvm/lib/Target/X86/X86RegisterInfo.cpp
+++ b/llvm/lib/Target/X86/X86RegisterInfo.cpp
@@ -1104,19 +1104,21 @@ bool X86RegisterInfo::getRegAllocationHints(Register VirtReg,
TwoAddrHints.insert(PhysReg);
};
+ // NDD instructions is compressible when Op0 is allocated to the same
+ // physic register as Op1 (or Op2 is it's commutable).
for (auto &MO : MRI->reg_nodbg_operands(VirtReg)) {
const MachineInstr &MI = *MO.getParent();
- if (X86::getNonNDVariant(MI.getOpcode())) {
- unsigned OpIdx = MI.getOperandNo(&MO);
- if (OpIdx == 0 && MI.getOperand(1).isReg()) {
- TryAddNDDHint(MI.getOperand(1));
- if (MI.isCommutable() && MI.getOperand(2).isReg())
- TryAddNDDHint(MI.getOperand(2));
- } else if (OpIdx == 1) {
- TryAddNDDHint(MI.getOperand(0));
- } else if (MI.isCommutable() && OpIdx == 2) {
- TryAddNDDHint(MI.getOperand(0));
- }
+ if (X86::getNonNDVariant(MI.getOpcode()) == 0)
+ continue;
+ unsigned OpIdx = MI.getOperandNo(&MO);
+ if (OpIdx == 0 && MI.getOperand(1).isReg()) {
+ TryAddNDDHint(MI.getOperand(1));
+ if (MI.isCommutable() && MI.getOperand(2).isReg())
+ TryAddNDDHint(MI.getOperand(2));
+ } else if (OpIdx == 1) {
+ TryAddNDDHint(MI.getOperand(0));
+ } else if (MI.isCommutable() && OpIdx == 2) {
+ TryAddNDDHint(MI.getOperand(0));
}
}
>From 18754fdfe4ede2dd6b4722774cb9228b246dd358 Mon Sep 17 00:00:00 2001
From: Freddy Ye <freddy.ye at intel.com>
Date: Mon, 5 Aug 2024 15:55:08 +0800
Subject: [PATCH 6/6] address comments.
---
llvm/lib/Target/X86/X86RegisterInfo.cpp | 9 ++++++---
1 file changed, 6 insertions(+), 3 deletions(-)
diff --git a/llvm/lib/Target/X86/X86RegisterInfo.cpp b/llvm/lib/Target/X86/X86RegisterInfo.cpp
index 241419f13825c..5a005e990844e 100644
--- a/llvm/lib/Target/X86/X86RegisterInfo.cpp
+++ b/llvm/lib/Target/X86/X86RegisterInfo.cpp
@@ -1105,16 +1105,19 @@ bool X86RegisterInfo::getRegAllocationHints(Register VirtReg,
};
// NDD instructions is compressible when Op0 is allocated to the same
- // physic register as Op1 (or Op2 is it's commutable).
+ // physic register as Op1 (or Op2 if it's commutable).
for (auto &MO : MRI->reg_nodbg_operands(VirtReg)) {
const MachineInstr &MI = *MO.getParent();
if (X86::getNonNDVariant(MI.getOpcode()) == 0)
continue;
unsigned OpIdx = MI.getOperandNo(&MO);
- if (OpIdx == 0 && MI.getOperand(1).isReg()) {
+ if (OpIdx == 0) {
+ assert(MI.getOperand(1).isReg());
TryAddNDDHint(MI.getOperand(1));
- if (MI.isCommutable() && MI.getOperand(2).isReg())
+ if (MI.isCommutable()) {
+ assert(MI.getOperand(2).isReg());
TryAddNDDHint(MI.getOperand(2));
+ }
} else if (OpIdx == 1) {
TryAddNDDHint(MI.getOperand(0));
} else if (MI.isCommutable() && OpIdx == 2) {
More information about the llvm-commits
mailing list