[llvm] Revert "[InlineSpiller] Check rematerialization before folding operand (#134015)" (PR #137801)
Matt Arsenault via llvm-commits
llvm-commits at lists.llvm.org
Tue Apr 29 05:41:06 PDT 2025
https://github.com/arsenm created https://github.com/llvm/llvm-project/pull/137801
This reverts commit b25b51eb639da2e726c95bef00409e0d3913435d.
The InlineSpiller should conceptually not be aware of the allocation order.
>From 0257a0c5421205bd449cb6662b693d57bae9d01f Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault at amd.com>
Date: Tue, 29 Apr 2025 14:40:17 +0200
Subject: [PATCH] Revert "[InlineSpiller] Check rematerialization before
folding operand (#134015)"
This reverts commit b25b51eb639da2e726c95bef00409e0d3913435d.
The InlineSpiller should conceptually not be aware of the allocation order.
---
llvm/include/llvm/CodeGen/Spiller.h | 6 +-
llvm/lib/CodeGen/InlineSpiller.cpp | 38 +---
llvm/lib/CodeGen/RegAllocGreedy.cpp | 6 +-
llvm/test/CodeGen/X86/avx-cmp.ll | 3 +-
llvm/test/CodeGen/X86/eq-or-eq-range-of-2.ll | 3 +-
.../X86/fold-int-pow2-with-fmul-or-fdiv.ll | 3 +-
.../test/CodeGen/X86/fptosi-sat-vector-128.ll | 21 +-
.../test/CodeGen/X86/fptoui-sat-vector-128.ll | 188 +++++++-----------
llvm/test/CodeGen/X86/mmx-fold-zero.ll | 6 +-
.../vector-interleaved-load-i8-stride-7.ll | 9 +-
10 files changed, 98 insertions(+), 185 deletions(-)
diff --git a/llvm/include/llvm/CodeGen/Spiller.h b/llvm/include/llvm/CodeGen/Spiller.h
index 84fc872a07606..3132cefeb6c68 100644
--- a/llvm/include/llvm/CodeGen/Spiller.h
+++ b/llvm/include/llvm/CodeGen/Spiller.h
@@ -23,7 +23,6 @@ class LiveIntervals;
class LiveStacks;
class MachineDominatorTree;
class MachineBlockFrequencyInfo;
-class AllocationOrder;
/// Spiller interface.
///
@@ -36,7 +35,7 @@ class Spiller {
virtual ~Spiller() = 0;
/// spill - Spill the LRE.getParent() live interval.
- virtual void spill(LiveRangeEdit &LRE, AllocationOrder *Order = nullptr) = 0;
+ virtual void spill(LiveRangeEdit &LRE) = 0;
/// Return the registers that were spilled.
virtual ArrayRef<Register> getSpilledRegs() = 0;
@@ -59,8 +58,7 @@ class Spiller {
/// of deferring though VirtRegMap.
Spiller *createInlineSpiller(const Spiller::RequiredAnalyses &Analyses,
MachineFunction &MF, VirtRegMap &VRM,
- VirtRegAuxInfo &VRAI,
- LiveRegMatrix *Matrix = nullptr);
+ VirtRegAuxInfo &VRAI);
} // end namespace llvm
diff --git a/llvm/lib/CodeGen/InlineSpiller.cpp b/llvm/lib/CodeGen/InlineSpiller.cpp
index f384740be2e33..920873c739f46 100644
--- a/llvm/lib/CodeGen/InlineSpiller.cpp
+++ b/llvm/lib/CodeGen/InlineSpiller.cpp
@@ -11,7 +11,6 @@
//
//===----------------------------------------------------------------------===//
-#include "AllocationOrder.h"
#include "SplitKit.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/DenseMap.h"
@@ -24,7 +23,6 @@
#include "llvm/CodeGen/LiveInterval.h"
#include "llvm/CodeGen/LiveIntervals.h"
#include "llvm/CodeGen/LiveRangeEdit.h"
-#include "llvm/CodeGen/LiveRegMatrix.h"
#include "llvm/CodeGen/LiveStacks.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
@@ -151,14 +149,12 @@ class InlineSpiller : public Spiller {
MachineRegisterInfo &MRI;
const TargetInstrInfo &TII;
const TargetRegisterInfo &TRI;
- LiveRegMatrix *Matrix = nullptr;
// Variables that are valid during spill(), but used by multiple methods.
LiveRangeEdit *Edit = nullptr;
LiveInterval *StackInt = nullptr;
int StackSlot;
Register Original;
- AllocationOrder *Order = nullptr;
// All registers to spill to StackSlot, including the main register.
SmallVector<Register, 8> RegsToSpill;
@@ -188,13 +184,13 @@ class InlineSpiller : public Spiller {
public:
InlineSpiller(const Spiller::RequiredAnalyses &Analyses, MachineFunction &MF,
- VirtRegMap &VRM, VirtRegAuxInfo &VRAI, LiveRegMatrix *Matrix)
+ VirtRegMap &VRM, VirtRegAuxInfo &VRAI)
: MF(MF), LIS(Analyses.LIS), LSS(Analyses.LSS), VRM(VRM),
MRI(MF.getRegInfo()), TII(*MF.getSubtarget().getInstrInfo()),
- TRI(*MF.getSubtarget().getRegisterInfo()), Matrix(Matrix),
- HSpiller(Analyses, MF, VRM), VRAI(VRAI) {}
+ TRI(*MF.getSubtarget().getRegisterInfo()), HSpiller(Analyses, MF, VRM),
+ VRAI(VRAI) {}
- void spill(LiveRangeEdit &, AllocationOrder *Order = nullptr) override;
+ void spill(LiveRangeEdit &) override;
ArrayRef<Register> getSpilledRegs() override { return RegsToSpill; }
ArrayRef<Register> getReplacedRegs() override { return RegsReplaced; }
void postOptimization() override;
@@ -211,7 +207,6 @@ class InlineSpiller : public Spiller {
void markValueUsed(LiveInterval*, VNInfo*);
bool canGuaranteeAssignmentAfterRemat(Register VReg, MachineInstr &MI);
- bool hasPhysRegAvailable(const MachineInstr &MI);
bool reMaterializeFor(LiveInterval &, MachineInstr &MI);
void reMaterializeAll();
@@ -234,8 +229,8 @@ void Spiller::anchor() {}
Spiller *
llvm::createInlineSpiller(const InlineSpiller::RequiredAnalyses &Analyses,
MachineFunction &MF, VirtRegMap &VRM,
- VirtRegAuxInfo &VRAI, LiveRegMatrix *Matrix) {
- return new InlineSpiller(Analyses, MF, VRM, VRAI, Matrix);
+ VirtRegAuxInfo &VRAI) {
+ return new InlineSpiller(Analyses, MF, VRM, VRAI);
}
//===----------------------------------------------------------------------===//
@@ -620,23 +615,6 @@ bool InlineSpiller::canGuaranteeAssignmentAfterRemat(Register VReg,
return true;
}
-/// hasPhysRegAvailable - Check if there is an available physical register for
-/// rematerialization.
-bool InlineSpiller::hasPhysRegAvailable(const MachineInstr &MI) {
- if (!Order || !Matrix)
- return false;
-
- SlotIndex UseIdx = LIS.getInstructionIndex(MI).getRegSlot(true);
- SlotIndex PrevIdx = UseIdx.getPrevSlot();
-
- for (MCPhysReg PhysReg : *Order) {
- if (!Matrix->checkInterference(PrevIdx, UseIdx, PhysReg))
- return true;
- }
-
- return false;
-}
-
/// reMaterializeFor - Attempt to rematerialize before MI instead of reloading.
bool InlineSpiller::reMaterializeFor(LiveInterval &VirtReg, MachineInstr &MI) {
// Analyze instruction
@@ -683,7 +661,6 @@ bool InlineSpiller::reMaterializeFor(LiveInterval &VirtReg, MachineInstr &MI) {
// Before rematerializing into a register for a single instruction, try to
// fold a load into the instruction. That avoids allocating a new register.
if (RM.OrigMI->canFoldAsLoad() &&
- (RM.OrigMI->mayLoad() || !hasPhysRegAvailable(MI)) &&
foldMemoryOperand(Ops, RM.OrigMI)) {
Edit->markRematerialized(RM.ParentVNI);
++NumFoldedLoads;
@@ -1305,10 +1282,9 @@ void InlineSpiller::spillAll() {
Edit->eraseVirtReg(Reg);
}
-void InlineSpiller::spill(LiveRangeEdit &edit, AllocationOrder *order) {
+void InlineSpiller::spill(LiveRangeEdit &edit) {
++NumSpilledRanges;
Edit = &edit;
- Order = order;
assert(!edit.getReg().isStack() && "Trying to spill a stack slot.");
// Share a stack slot among all descendants of Original.
Original = VRM.getOriginal(edit.getReg());
diff --git a/llvm/lib/CodeGen/RegAllocGreedy.cpp b/llvm/lib/CodeGen/RegAllocGreedy.cpp
index 250776e8bf7b1..56d3bd953f57d 100644
--- a/llvm/lib/CodeGen/RegAllocGreedy.cpp
+++ b/llvm/lib/CodeGen/RegAllocGreedy.cpp
@@ -2664,7 +2664,7 @@ MCRegister RAGreedy::selectOrSplitImpl(const LiveInterval &VirtReg,
NamedRegionTimer T("spill", "Spiller", TimerGroupName,
TimerGroupDescription, TimePassesIsEnabled);
LiveRangeEdit LRE(&VirtReg, NewVRegs, *MF, *LIS, VRM, this, &DeadRemats);
- spiller().spill(LRE, &Order);
+ spiller().spill(LRE);
ExtraInfo->setStage(NewVRegs.begin(), NewVRegs.end(), RS_Done);
// Tell LiveDebugVariables about the new ranges. Ranges not being covered by
@@ -2908,8 +2908,8 @@ bool RAGreedy::run(MachineFunction &mf) {
PriorityAdvisor = PriorityProvider->getAdvisor(*MF, *this, *Indexes);
VRAI = std::make_unique<VirtRegAuxInfo>(*MF, *LIS, *VRM, *Loops, *MBFI);
- SpillerInstance.reset(createInlineSpiller({*LIS, *LSS, *DomTree, *MBFI}, *MF,
- *VRM, *VRAI, Matrix));
+ SpillerInstance.reset(
+ createInlineSpiller({*LIS, *LSS, *DomTree, *MBFI}, *MF, *VRM, *VRAI));
VRAI->calculateSpillWeightsAndHints();
diff --git a/llvm/test/CodeGen/X86/avx-cmp.ll b/llvm/test/CodeGen/X86/avx-cmp.ll
index 3ced4f71bad8c..d31107bfeb7bb 100644
--- a/llvm/test/CodeGen/X86/avx-cmp.ll
+++ b/llvm/test/CodeGen/X86/avx-cmp.ll
@@ -43,8 +43,7 @@ define void @render(double %a0) nounwind {
; CHECK-NEXT: # in Loop: Header=BB2_2 Depth=1
; CHECK-NEXT: vmovsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 8-byte Reload
; CHECK-NEXT: # xmm0 = mem[0],zero
-; CHECK-NEXT: vxorpd %xmm1, %xmm1, %xmm1
-; CHECK-NEXT: vucomisd %xmm1, %xmm0
+; CHECK-NEXT: vucomisd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; CHECK-NEXT: jne .LBB2_4
; CHECK-NEXT: jnp .LBB2_2
; CHECK-NEXT: .LBB2_4: # %if.then
diff --git a/llvm/test/CodeGen/X86/eq-or-eq-range-of-2.ll b/llvm/test/CodeGen/X86/eq-or-eq-range-of-2.ll
index 3243d950740ca..95a7a10d50f59 100644
--- a/llvm/test/CodeGen/X86/eq-or-eq-range-of-2.ll
+++ b/llvm/test/CodeGen/X86/eq-or-eq-range-of-2.ll
@@ -111,8 +111,7 @@ define <4 x i32> @eq_or_eq_ult_2_fail_multiuse(<4 x i32> %x) {
; AVX512-NEXT: callq use.v4.i32 at PLT
; AVX512-NEXT: vmovdqa (%rsp), %xmm0 # 16-byte Reload
; AVX512-NEXT: vpcmpltud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %k1
-; AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
-; AVX512-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
+; AVX512-NEXT: vmovdqa32 {{.*#+}} xmm0 {%k1} {z} = [4294967295,4294967295,4294967295,4294967295]
; AVX512-NEXT: addq $24, %rsp
; AVX512-NEXT: .cfi_def_cfa_offset 8
; AVX512-NEXT: retq
diff --git a/llvm/test/CodeGen/X86/fold-int-pow2-with-fmul-or-fdiv.ll b/llvm/test/CodeGen/X86/fold-int-pow2-with-fmul-or-fdiv.ll
index 59a61722927de..67c9e7cc22236 100644
--- a/llvm/test/CodeGen/X86/fold-int-pow2-with-fmul-or-fdiv.ll
+++ b/llvm/test/CodeGen/X86/fold-int-pow2-with-fmul-or-fdiv.ll
@@ -195,8 +195,7 @@ define <8 x half> @fmul_pow2_8xhalf(<8 x i16> %i) {
; CHECK-SSE-NEXT: callq __truncsfhf2 at PLT
; CHECK-SSE-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
; CHECK-SSE-NEXT: movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
-; CHECK-SSE-NEXT: pxor %xmm1, %xmm1
-; CHECK-SSE-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
+; CHECK-SSE-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7]
; CHECK-SSE-NEXT: cvtdq2ps %xmm0, %xmm0
; CHECK-SSE-NEXT: callq __truncsfhf2 at PLT
; CHECK-SSE-NEXT: callq __extendhfsf2 at PLT
diff --git a/llvm/test/CodeGen/X86/fptosi-sat-vector-128.ll b/llvm/test/CodeGen/X86/fptosi-sat-vector-128.ll
index 7f6d64c21724a..536a1ae3b918d 100644
--- a/llvm/test/CodeGen/X86/fptosi-sat-vector-128.ll
+++ b/llvm/test/CodeGen/X86/fptosi-sat-vector-128.ll
@@ -567,8 +567,7 @@ define <8 x i1> @test_signed_v8i1_v8f16(<8 x half> %f) nounwind {
; CHECK-NEXT: cvttss2si %xmm0, %eax
; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; CHECK-NEXT: cmovbl %ebp, %eax
-; CHECK-NEXT: xorps %xmm1, %xmm1
-; CHECK-NEXT: ucomiss %xmm1, %xmm0
+; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; CHECK-NEXT: cmoval %ebx, %eax
; CHECK-NEXT: ucomiss %xmm0, %xmm0
; CHECK-NEXT: cmovpl %ebx, %eax
@@ -582,8 +581,7 @@ define <8 x i1> @test_signed_v8i1_v8f16(<8 x half> %f) nounwind {
; CHECK-NEXT: cvttss2si %xmm0, %eax
; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; CHECK-NEXT: cmovbl %ebp, %eax
-; CHECK-NEXT: xorps %xmm1, %xmm1
-; CHECK-NEXT: ucomiss %xmm1, %xmm0
+; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; CHECK-NEXT: cmoval %ebx, %eax
; CHECK-NEXT: ucomiss %xmm0, %xmm0
; CHECK-NEXT: cmovpl %ebx, %eax
@@ -595,8 +593,7 @@ define <8 x i1> @test_signed_v8i1_v8f16(<8 x half> %f) nounwind {
; CHECK-NEXT: cvttss2si %xmm0, %eax
; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; CHECK-NEXT: cmovbl %ebp, %eax
-; CHECK-NEXT: xorps %xmm1, %xmm1
-; CHECK-NEXT: ucomiss %xmm1, %xmm0
+; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; CHECK-NEXT: cmoval %ebx, %eax
; CHECK-NEXT: ucomiss %xmm0, %xmm0
; CHECK-NEXT: cmovpl %ebx, %eax
@@ -612,8 +609,7 @@ define <8 x i1> @test_signed_v8i1_v8f16(<8 x half> %f) nounwind {
; CHECK-NEXT: cvttss2si %xmm0, %eax
; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; CHECK-NEXT: cmovbl %ebp, %eax
-; CHECK-NEXT: xorps %xmm1, %xmm1
-; CHECK-NEXT: ucomiss %xmm1, %xmm0
+; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; CHECK-NEXT: cmoval %ebx, %eax
; CHECK-NEXT: ucomiss %xmm0, %xmm0
; CHECK-NEXT: cmovpl %ebx, %eax
@@ -625,8 +621,7 @@ define <8 x i1> @test_signed_v8i1_v8f16(<8 x half> %f) nounwind {
; CHECK-NEXT: cvttss2si %xmm0, %eax
; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; CHECK-NEXT: cmovbl %ebp, %eax
-; CHECK-NEXT: xorps %xmm1, %xmm1
-; CHECK-NEXT: ucomiss %xmm1, %xmm0
+; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; CHECK-NEXT: cmoval %ebx, %eax
; CHECK-NEXT: ucomiss %xmm0, %xmm0
; CHECK-NEXT: cmovpl %ebx, %eax
@@ -639,8 +634,7 @@ define <8 x i1> @test_signed_v8i1_v8f16(<8 x half> %f) nounwind {
; CHECK-NEXT: cvttss2si %xmm0, %eax
; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; CHECK-NEXT: cmovbl %ebp, %eax
-; CHECK-NEXT: xorps %xmm1, %xmm1
-; CHECK-NEXT: ucomiss %xmm1, %xmm0
+; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; CHECK-NEXT: cmoval %ebx, %eax
; CHECK-NEXT: ucomiss %xmm0, %xmm0
; CHECK-NEXT: cmovpl %ebx, %eax
@@ -652,8 +646,7 @@ define <8 x i1> @test_signed_v8i1_v8f16(<8 x half> %f) nounwind {
; CHECK-NEXT: cvttss2si %xmm0, %eax
; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; CHECK-NEXT: cmovbl %ebp, %eax
-; CHECK-NEXT: xorps %xmm1, %xmm1
-; CHECK-NEXT: ucomiss %xmm1, %xmm0
+; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; CHECK-NEXT: cmoval %ebx, %eax
; CHECK-NEXT: ucomiss %xmm0, %xmm0
; CHECK-NEXT: cmovpl %ebx, %eax
diff --git a/llvm/test/CodeGen/X86/fptoui-sat-vector-128.ll b/llvm/test/CodeGen/X86/fptoui-sat-vector-128.ll
index ffbdd66529f5c..4305886168abe 100644
--- a/llvm/test/CodeGen/X86/fptoui-sat-vector-128.ll
+++ b/llvm/test/CodeGen/X86/fptoui-sat-vector-128.ll
@@ -280,12 +280,11 @@ define <4 x i128> @test_unsigned_v4i128_v4f32(<4 x float> %f) nounwind {
; CHECK-NEXT: callq __fixunssfti at PLT
; CHECK-NEXT: movq %rax, %r12
; CHECK-NEXT: movq %rdx, %r13
-; CHECK-NEXT: xorps %xmm0, %xmm0
-; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
-; CHECK-NEXT: ucomiss %xmm0, %xmm1
+; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; CHECK-NEXT: cmovbq %r14, %r13
; CHECK-NEXT: cmovbq %r14, %r12
-; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
+; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; CHECK-NEXT: cmovaq %rbp, %r12
; CHECK-NEXT: cmovaq %rbp, %r13
; CHECK-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload
@@ -294,21 +293,19 @@ define <4 x i128> @test_unsigned_v4i128_v4f32(<4 x float> %f) nounwind {
; CHECK-NEXT: callq __fixunssfti at PLT
; CHECK-NEXT: movq %rax, %rbp
; CHECK-NEXT: movq %rdx, %r14
-; CHECK-NEXT: xorps %xmm0, %xmm0
-; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
-; CHECK-NEXT: ucomiss %xmm0, %xmm1
+; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; CHECK-NEXT: movl $0, %eax
; CHECK-NEXT: cmovbq %rax, %r14
; CHECK-NEXT: cmovbq %rax, %rbp
-; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
+; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; CHECK-NEXT: movq $-1, %rax
; CHECK-NEXT: cmovaq %rax, %rbp
; CHECK-NEXT: cmovaq %rax, %r14
; CHECK-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload
; CHECK-NEXT: callq __fixunssfti at PLT
; CHECK-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload
-; CHECK-NEXT: xorps %xmm1, %xmm1
-; CHECK-NEXT: ucomiss %xmm1, %xmm0
+; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; CHECK-NEXT: movl $0, %ecx
; CHECK-NEXT: cmovbq %rcx, %rdx
; CHECK-NEXT: cmovbq %rcx, %rax
@@ -509,8 +506,7 @@ define <2 x i128> @test_unsigned_v2i128_v2f64(<2 x double> %f) nounwind {
; CHECK-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload
; CHECK-NEXT: callq __fixunsdfti at PLT
; CHECK-NEXT: movapd (%rsp), %xmm0 # 16-byte Reload
-; CHECK-NEXT: xorpd %xmm1, %xmm1
-; CHECK-NEXT: ucomisd %xmm1, %xmm0
+; CHECK-NEXT: ucomisd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; CHECK-NEXT: cmovbq %r12, %rdx
; CHECK-NEXT: cmovbq %r12, %rax
; CHECK-NEXT: ucomisd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
@@ -566,8 +562,7 @@ define <8 x i1> @test_unsigned_v8i1_v8f16(<8 x half> %f) nounwind {
; CHECK-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,3,3,3]
; CHECK-NEXT: callq __extendhfsf2 at PLT
; CHECK-NEXT: cvttss2si %xmm0, %eax
-; CHECK-NEXT: xorps %xmm1, %xmm1
-; CHECK-NEXT: ucomiss %xmm1, %xmm0
+; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; CHECK-NEXT: cmovbl %ebx, %eax
; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; CHECK-NEXT: cmoval %ebp, %eax
@@ -579,8 +574,7 @@ define <8 x i1> @test_unsigned_v8i1_v8f16(<8 x half> %f) nounwind {
; CHECK-NEXT: psrldq {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; CHECK-NEXT: callq __extendhfsf2 at PLT
; CHECK-NEXT: cvttss2si %xmm0, %eax
-; CHECK-NEXT: xorps %xmm1, %xmm1
-; CHECK-NEXT: ucomiss %xmm1, %xmm0
+; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; CHECK-NEXT: cmovbl %ebx, %eax
; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; CHECK-NEXT: cmoval %ebp, %eax
@@ -590,8 +584,7 @@ define <8 x i1> @test_unsigned_v8i1_v8f16(<8 x half> %f) nounwind {
; CHECK-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1]
; CHECK-NEXT: callq __extendhfsf2 at PLT
; CHECK-NEXT: cvttss2si %xmm0, %eax
-; CHECK-NEXT: xorps %xmm1, %xmm1
-; CHECK-NEXT: ucomiss %xmm1, %xmm0
+; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; CHECK-NEXT: cmovbl %ebx, %eax
; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; CHECK-NEXT: cmoval %ebp, %eax
@@ -605,8 +598,7 @@ define <8 x i1> @test_unsigned_v8i1_v8f16(<8 x half> %f) nounwind {
; CHECK-NEXT: psrlq $48, %xmm0
; CHECK-NEXT: callq __extendhfsf2 at PLT
; CHECK-NEXT: cvttss2si %xmm0, %eax
-; CHECK-NEXT: xorps %xmm1, %xmm1
-; CHECK-NEXT: ucomiss %xmm1, %xmm0
+; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; CHECK-NEXT: cmovbl %ebx, %eax
; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; CHECK-NEXT: cmoval %ebp, %eax
@@ -616,8 +608,7 @@ define <8 x i1> @test_unsigned_v8i1_v8f16(<8 x half> %f) nounwind {
; CHECK-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,1,1]
; CHECK-NEXT: callq __extendhfsf2 at PLT
; CHECK-NEXT: cvttss2si %xmm0, %eax
-; CHECK-NEXT: xorps %xmm1, %xmm1
-; CHECK-NEXT: ucomiss %xmm1, %xmm0
+; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; CHECK-NEXT: cmovbl %ebx, %eax
; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; CHECK-NEXT: cmoval %ebp, %eax
@@ -628,8 +619,7 @@ define <8 x i1> @test_unsigned_v8i1_v8f16(<8 x half> %f) nounwind {
; CHECK-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload
; CHECK-NEXT: callq __extendhfsf2 at PLT
; CHECK-NEXT: cvttss2si %xmm0, %eax
-; CHECK-NEXT: xorps %xmm1, %xmm1
-; CHECK-NEXT: ucomiss %xmm1, %xmm0
+; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; CHECK-NEXT: cmovbl %ebx, %eax
; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; CHECK-NEXT: cmoval %ebp, %eax
@@ -639,8 +629,7 @@ define <8 x i1> @test_unsigned_v8i1_v8f16(<8 x half> %f) nounwind {
; CHECK-NEXT: psrld $16, %xmm0
; CHECK-NEXT: callq __extendhfsf2 at PLT
; CHECK-NEXT: cvttss2si %xmm0, %eax
-; CHECK-NEXT: xorps %xmm1, %xmm1
-; CHECK-NEXT: ucomiss %xmm1, %xmm0
+; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; CHECK-NEXT: cmovbl %ebx, %eax
; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; CHECK-NEXT: cmoval %ebp, %eax
@@ -684,8 +673,7 @@ define <8 x i8> @test_unsigned_v8i8_v8f16(<8 x half> %f) nounwind {
; CHECK-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1]
; CHECK-NEXT: callq __extendhfsf2 at PLT
; CHECK-NEXT: cvttss2si %xmm0, %eax
-; CHECK-NEXT: xorps %xmm1, %xmm1
-; CHECK-NEXT: ucomiss %xmm1, %xmm0
+; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; CHECK-NEXT: cmovbl %ebx, %eax
; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; CHECK-NEXT: cmoval %ebp, %eax
@@ -694,8 +682,7 @@ define <8 x i8> @test_unsigned_v8i8_v8f16(<8 x half> %f) nounwind {
; CHECK-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload
; CHECK-NEXT: callq __extendhfsf2 at PLT
; CHECK-NEXT: cvttss2si %xmm0, %eax
-; CHECK-NEXT: xorps %xmm1, %xmm1
-; CHECK-NEXT: ucomiss %xmm1, %xmm0
+; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; CHECK-NEXT: cmovbl %ebx, %eax
; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; CHECK-NEXT: cmoval %ebp, %eax
@@ -704,8 +691,7 @@ define <8 x i8> @test_unsigned_v8i8_v8f16(<8 x half> %f) nounwind {
; CHECK-NEXT: psrld $16, %xmm0
; CHECK-NEXT: callq __extendhfsf2 at PLT
; CHECK-NEXT: cvttss2si %xmm0, %eax
-; CHECK-NEXT: xorps %xmm1, %xmm1
-; CHECK-NEXT: ucomiss %xmm1, %xmm0
+; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; CHECK-NEXT: cmovbl %ebx, %eax
; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; CHECK-NEXT: cmoval %ebp, %eax
@@ -716,8 +702,7 @@ define <8 x i8> @test_unsigned_v8i8_v8f16(<8 x half> %f) nounwind {
; CHECK-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,1,1]
; CHECK-NEXT: callq __extendhfsf2 at PLT
; CHECK-NEXT: cvttss2si %xmm0, %eax
-; CHECK-NEXT: xorps %xmm1, %xmm1
-; CHECK-NEXT: ucomiss %xmm1, %xmm0
+; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; CHECK-NEXT: cmovbl %ebx, %eax
; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; CHECK-NEXT: cmoval %ebp, %eax
@@ -728,8 +713,7 @@ define <8 x i8> @test_unsigned_v8i8_v8f16(<8 x half> %f) nounwind {
; CHECK-NEXT: psrlq $48, %xmm0
; CHECK-NEXT: callq __extendhfsf2 at PLT
; CHECK-NEXT: cvttss2si %xmm0, %eax
-; CHECK-NEXT: xorps %xmm1, %xmm1
-; CHECK-NEXT: ucomiss %xmm1, %xmm0
+; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; CHECK-NEXT: cmovbl %ebx, %eax
; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; CHECK-NEXT: cmoval %ebp, %eax
@@ -742,8 +726,7 @@ define <8 x i8> @test_unsigned_v8i8_v8f16(<8 x half> %f) nounwind {
; CHECK-NEXT: psrldq {{.*#+}} xmm0 = xmm0[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; CHECK-NEXT: callq __extendhfsf2 at PLT
; CHECK-NEXT: cvttss2si %xmm0, %r14d
-; CHECK-NEXT: xorps %xmm1, %xmm1
-; CHECK-NEXT: ucomiss %xmm1, %xmm0
+; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; CHECK-NEXT: cmovbl %ebx, %r14d
; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; CHECK-NEXT: cmoval %ebp, %r14d
@@ -752,8 +735,7 @@ define <8 x i8> @test_unsigned_v8i8_v8f16(<8 x half> %f) nounwind {
; CHECK-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,3,3,3]
; CHECK-NEXT: callq __extendhfsf2 at PLT
; CHECK-NEXT: cvttss2si %xmm0, %eax
-; CHECK-NEXT: xorps %xmm1, %xmm1
-; CHECK-NEXT: ucomiss %xmm1, %xmm0
+; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; CHECK-NEXT: cmovbl %ebx, %eax
; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; CHECK-NEXT: cmoval %ebp, %eax
@@ -795,8 +777,7 @@ define <8 x i16> @test_unsigned_v8i16_v8f16(<8 x half> %f) nounwind {
; CHECK-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,3,3,3]
; CHECK-NEXT: callq __extendhfsf2 at PLT
; CHECK-NEXT: cvttss2si %xmm0, %eax
-; CHECK-NEXT: xorps %xmm1, %xmm1
-; CHECK-NEXT: ucomiss %xmm1, %xmm0
+; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; CHECK-NEXT: cmovbl %ebx, %eax
; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; CHECK-NEXT: cmoval %ebp, %eax
@@ -808,8 +789,7 @@ define <8 x i16> @test_unsigned_v8i16_v8f16(<8 x half> %f) nounwind {
; CHECK-NEXT: psrldq {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; CHECK-NEXT: callq __extendhfsf2 at PLT
; CHECK-NEXT: cvttss2si %xmm0, %eax
-; CHECK-NEXT: xorps %xmm1, %xmm1
-; CHECK-NEXT: ucomiss %xmm1, %xmm0
+; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; CHECK-NEXT: cmovbl %ebx, %eax
; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; CHECK-NEXT: cmoval %ebp, %eax
@@ -819,8 +799,7 @@ define <8 x i16> @test_unsigned_v8i16_v8f16(<8 x half> %f) nounwind {
; CHECK-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1]
; CHECK-NEXT: callq __extendhfsf2 at PLT
; CHECK-NEXT: cvttss2si %xmm0, %eax
-; CHECK-NEXT: xorps %xmm1, %xmm1
-; CHECK-NEXT: ucomiss %xmm1, %xmm0
+; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; CHECK-NEXT: cmovbl %ebx, %eax
; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; CHECK-NEXT: cmoval %ebp, %eax
@@ -834,8 +813,7 @@ define <8 x i16> @test_unsigned_v8i16_v8f16(<8 x half> %f) nounwind {
; CHECK-NEXT: psrlq $48, %xmm0
; CHECK-NEXT: callq __extendhfsf2 at PLT
; CHECK-NEXT: cvttss2si %xmm0, %eax
-; CHECK-NEXT: xorps %xmm1, %xmm1
-; CHECK-NEXT: ucomiss %xmm1, %xmm0
+; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; CHECK-NEXT: cmovbl %ebx, %eax
; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; CHECK-NEXT: cmoval %ebp, %eax
@@ -845,8 +823,7 @@ define <8 x i16> @test_unsigned_v8i16_v8f16(<8 x half> %f) nounwind {
; CHECK-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,1,1]
; CHECK-NEXT: callq __extendhfsf2 at PLT
; CHECK-NEXT: cvttss2si %xmm0, %eax
-; CHECK-NEXT: xorps %xmm1, %xmm1
-; CHECK-NEXT: ucomiss %xmm1, %xmm0
+; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; CHECK-NEXT: cmovbl %ebx, %eax
; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; CHECK-NEXT: cmoval %ebp, %eax
@@ -857,8 +834,7 @@ define <8 x i16> @test_unsigned_v8i16_v8f16(<8 x half> %f) nounwind {
; CHECK-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload
; CHECK-NEXT: callq __extendhfsf2 at PLT
; CHECK-NEXT: cvttss2si %xmm0, %eax
-; CHECK-NEXT: xorps %xmm1, %xmm1
-; CHECK-NEXT: ucomiss %xmm1, %xmm0
+; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; CHECK-NEXT: cmovbl %ebx, %eax
; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; CHECK-NEXT: cmoval %ebp, %eax
@@ -868,8 +844,7 @@ define <8 x i16> @test_unsigned_v8i16_v8f16(<8 x half> %f) nounwind {
; CHECK-NEXT: psrld $16, %xmm0
; CHECK-NEXT: callq __extendhfsf2 at PLT
; CHECK-NEXT: cvttss2si %xmm0, %eax
-; CHECK-NEXT: xorps %xmm1, %xmm1
-; CHECK-NEXT: ucomiss %xmm1, %xmm0
+; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; CHECK-NEXT: cmovbl %ebx, %eax
; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; CHECK-NEXT: cmoval %ebp, %eax
@@ -911,8 +886,7 @@ define <8 x i32> @test_unsigned_v8i32_v8f16(<8 x half> %f) nounwind {
; CHECK-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,1,1]
; CHECK-NEXT: callq __extendhfsf2 at PLT
; CHECK-NEXT: cvttss2si %xmm0, %rax
-; CHECK-NEXT: xorps %xmm1, %xmm1
-; CHECK-NEXT: ucomiss %xmm1, %xmm0
+; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; CHECK-NEXT: cmovbl %ebx, %eax
; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; CHECK-NEXT: cmoval %ebp, %eax
@@ -923,8 +897,7 @@ define <8 x i32> @test_unsigned_v8i32_v8f16(<8 x half> %f) nounwind {
; CHECK-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload
; CHECK-NEXT: callq __extendhfsf2 at PLT
; CHECK-NEXT: cvttss2si %xmm0, %rax
-; CHECK-NEXT: xorps %xmm1, %xmm1
-; CHECK-NEXT: ucomiss %xmm1, %xmm0
+; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; CHECK-NEXT: cmovbl %ebx, %eax
; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; CHECK-NEXT: cmoval %ebp, %eax
@@ -934,8 +907,7 @@ define <8 x i32> @test_unsigned_v8i32_v8f16(<8 x half> %f) nounwind {
; CHECK-NEXT: psrld $16, %xmm0
; CHECK-NEXT: callq __extendhfsf2 at PLT
; CHECK-NEXT: cvttss2si %xmm0, %rax
-; CHECK-NEXT: xorps %xmm1, %xmm1
-; CHECK-NEXT: ucomiss %xmm1, %xmm0
+; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; CHECK-NEXT: cmovbl %ebx, %eax
; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; CHECK-NEXT: cmoval %ebp, %eax
@@ -949,8 +921,7 @@ define <8 x i32> @test_unsigned_v8i32_v8f16(<8 x half> %f) nounwind {
; CHECK-NEXT: psrldq {{.*#+}} xmm0 = xmm0[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; CHECK-NEXT: callq __extendhfsf2 at PLT
; CHECK-NEXT: cvttss2si %xmm0, %rax
-; CHECK-NEXT: pxor %xmm1, %xmm1
-; CHECK-NEXT: ucomiss %xmm1, %xmm0
+; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; CHECK-NEXT: cmovbl %ebx, %eax
; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; CHECK-NEXT: cmoval %ebp, %eax
@@ -960,8 +931,7 @@ define <8 x i32> @test_unsigned_v8i32_v8f16(<8 x half> %f) nounwind {
; CHECK-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,3,3,3]
; CHECK-NEXT: callq __extendhfsf2 at PLT
; CHECK-NEXT: cvttss2si %xmm0, %rax
-; CHECK-NEXT: xorps %xmm1, %xmm1
-; CHECK-NEXT: ucomiss %xmm1, %xmm0
+; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; CHECK-NEXT: cmovbl %ebx, %eax
; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; CHECK-NEXT: cmoval %ebp, %eax
@@ -973,8 +943,7 @@ define <8 x i32> @test_unsigned_v8i32_v8f16(<8 x half> %f) nounwind {
; CHECK-NEXT: psrldq {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; CHECK-NEXT: callq __extendhfsf2 at PLT
; CHECK-NEXT: cvttss2si %xmm0, %rax
-; CHECK-NEXT: xorps %xmm1, %xmm1
-; CHECK-NEXT: ucomiss %xmm1, %xmm0
+; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; CHECK-NEXT: cmovbl %ebx, %eax
; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; CHECK-NEXT: cmoval %ebp, %eax
@@ -984,8 +953,7 @@ define <8 x i32> @test_unsigned_v8i32_v8f16(<8 x half> %f) nounwind {
; CHECK-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1]
; CHECK-NEXT: callq __extendhfsf2 at PLT
; CHECK-NEXT: cvttss2si %xmm0, %rax
-; CHECK-NEXT: xorps %xmm1, %xmm1
-; CHECK-NEXT: ucomiss %xmm1, %xmm0
+; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; CHECK-NEXT: cmovbl %ebx, %eax
; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; CHECK-NEXT: cmoval %ebp, %eax
@@ -1039,8 +1007,7 @@ define <8 x i64> @test_unsigned_v8i64_v8f16(<8 x half> %f) nounwind {
; CHECK-NEXT: sarq $63, %rdx
; CHECK-NEXT: andq %rax, %rdx
; CHECK-NEXT: orq %rcx, %rdx
-; CHECK-NEXT: xorps %xmm1, %xmm1
-; CHECK-NEXT: ucomiss %xmm1, %xmm0
+; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; CHECK-NEXT: cmovbq %rbx, %rdx
; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; CHECK-NEXT: cmovaq %r14, %rdx
@@ -1059,8 +1026,7 @@ define <8 x i64> @test_unsigned_v8i64_v8f16(<8 x half> %f) nounwind {
; CHECK-NEXT: sarq $63, %rdx
; CHECK-NEXT: andq %rax, %rdx
; CHECK-NEXT: orq %rcx, %rdx
-; CHECK-NEXT: xorps %xmm1, %xmm1
-; CHECK-NEXT: ucomiss %xmm1, %xmm0
+; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; CHECK-NEXT: cmovbq %rbx, %rdx
; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; CHECK-NEXT: cmovaq %r14, %rdx
@@ -1077,8 +1043,7 @@ define <8 x i64> @test_unsigned_v8i64_v8f16(<8 x half> %f) nounwind {
; CHECK-NEXT: sarq $63, %rdx
; CHECK-NEXT: andq %rax, %rdx
; CHECK-NEXT: orq %rcx, %rdx
-; CHECK-NEXT: xorps %xmm1, %xmm1
-; CHECK-NEXT: ucomiss %xmm1, %xmm0
+; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; CHECK-NEXT: cmovbq %rbx, %rdx
; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; CHECK-NEXT: cmovaq %r14, %rdx
@@ -1097,8 +1062,7 @@ define <8 x i64> @test_unsigned_v8i64_v8f16(<8 x half> %f) nounwind {
; CHECK-NEXT: sarq $63, %rdx
; CHECK-NEXT: andq %rax, %rdx
; CHECK-NEXT: orq %rcx, %rdx
-; CHECK-NEXT: xorps %xmm1, %xmm1
-; CHECK-NEXT: ucomiss %xmm1, %xmm0
+; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; CHECK-NEXT: cmovbq %rbx, %rdx
; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; CHECK-NEXT: cmovaq %r14, %rdx
@@ -1115,8 +1079,7 @@ define <8 x i64> @test_unsigned_v8i64_v8f16(<8 x half> %f) nounwind {
; CHECK-NEXT: sarq $63, %rdx
; CHECK-NEXT: andq %rax, %rdx
; CHECK-NEXT: orq %rcx, %rdx
-; CHECK-NEXT: xorps %xmm1, %xmm1
-; CHECK-NEXT: ucomiss %xmm1, %xmm0
+; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; CHECK-NEXT: cmovbq %rbx, %rdx
; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; CHECK-NEXT: cmovaq %r14, %rdx
@@ -1135,8 +1098,7 @@ define <8 x i64> @test_unsigned_v8i64_v8f16(<8 x half> %f) nounwind {
; CHECK-NEXT: sarq $63, %rdx
; CHECK-NEXT: andq %rax, %rdx
; CHECK-NEXT: orq %rcx, %rdx
-; CHECK-NEXT: xorps %xmm1, %xmm1
-; CHECK-NEXT: ucomiss %xmm1, %xmm0
+; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; CHECK-NEXT: cmovbq %rbx, %rdx
; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; CHECK-NEXT: cmovaq %r14, %rdx
@@ -1153,8 +1115,7 @@ define <8 x i64> @test_unsigned_v8i64_v8f16(<8 x half> %f) nounwind {
; CHECK-NEXT: sarq $63, %rdx
; CHECK-NEXT: andq %rax, %rdx
; CHECK-NEXT: orq %rcx, %rdx
-; CHECK-NEXT: xorps %xmm1, %xmm1
-; CHECK-NEXT: ucomiss %xmm1, %xmm0
+; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; CHECK-NEXT: cmovbq %rbx, %rdx
; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; CHECK-NEXT: cmovaq %r14, %rdx
@@ -1206,13 +1167,12 @@ define <8 x i128> @test_unsigned_v8i128_v8f16(<8 x half> %f) nounwind {
; CHECK-NEXT: callq __extendhfsf2 at PLT
; CHECK-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
; CHECK-NEXT: callq __fixunssfti at PLT
-; CHECK-NEXT: xorps %xmm0, %xmm0
-; CHECK-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 4-byte Reload
-; CHECK-NEXT: # xmm1 = mem[0],zero,zero,zero
-; CHECK-NEXT: ucomiss %xmm0, %xmm1
+; CHECK-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload
+; CHECK-NEXT: # xmm0 = mem[0],zero,zero,zero
+; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; CHECK-NEXT: cmovbq %r12, %rdx
; CHECK-NEXT: cmovbq %r12, %rax
-; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
+; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; CHECK-NEXT: cmovaq %r13, %rax
; CHECK-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; CHECK-NEXT: cmovaq %r13, %rdx
@@ -1222,13 +1182,12 @@ define <8 x i128> @test_unsigned_v8i128_v8f16(<8 x half> %f) nounwind {
; CHECK-NEXT: callq __extendhfsf2 at PLT
; CHECK-NEXT: movd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Folded Spill
; CHECK-NEXT: callq __fixunssfti at PLT
-; CHECK-NEXT: pxor %xmm0, %xmm0
-; CHECK-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 4-byte Reload
-; CHECK-NEXT: # xmm1 = mem[0],zero,zero,zero
-; CHECK-NEXT: ucomiss %xmm0, %xmm1
+; CHECK-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload
+; CHECK-NEXT: # xmm0 = mem[0],zero,zero,zero
+; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; CHECK-NEXT: cmovbq %r12, %rdx
; CHECK-NEXT: cmovbq %r12, %rax
-; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
+; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; CHECK-NEXT: cmovaq %r13, %rax
; CHECK-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; CHECK-NEXT: cmovaq %r13, %rdx
@@ -1238,13 +1197,12 @@ define <8 x i128> @test_unsigned_v8i128_v8f16(<8 x half> %f) nounwind {
; CHECK-NEXT: callq __extendhfsf2 at PLT
; CHECK-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
; CHECK-NEXT: callq __fixunssfti at PLT
-; CHECK-NEXT: xorps %xmm0, %xmm0
-; CHECK-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 4-byte Reload
-; CHECK-NEXT: # xmm1 = mem[0],zero,zero,zero
-; CHECK-NEXT: ucomiss %xmm0, %xmm1
+; CHECK-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload
+; CHECK-NEXT: # xmm0 = mem[0],zero,zero,zero
+; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; CHECK-NEXT: cmovbq %r12, %rdx
; CHECK-NEXT: cmovbq %r12, %rax
-; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
+; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; CHECK-NEXT: cmovaq %r13, %rax
; CHECK-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; CHECK-NEXT: cmovaq %r13, %rdx
@@ -1255,13 +1213,12 @@ define <8 x i128> @test_unsigned_v8i128_v8f16(<8 x half> %f) nounwind {
; CHECK-NEXT: movd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Folded Spill
; CHECK-NEXT: callq __fixunssfti at PLT
; CHECK-NEXT: movq %rdx, %rbp
-; CHECK-NEXT: pxor %xmm0, %xmm0
-; CHECK-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 4-byte Reload
-; CHECK-NEXT: # xmm1 = mem[0],zero,zero,zero
-; CHECK-NEXT: ucomiss %xmm0, %xmm1
+; CHECK-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload
+; CHECK-NEXT: # xmm0 = mem[0],zero,zero,zero
+; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; CHECK-NEXT: cmovbq %r12, %rbp
; CHECK-NEXT: cmovbq %r12, %rax
-; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
+; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; CHECK-NEXT: cmovaq %r13, %rax
; CHECK-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; CHECK-NEXT: cmovaq %r13, %rbp
@@ -1272,13 +1229,12 @@ define <8 x i128> @test_unsigned_v8i128_v8f16(<8 x half> %f) nounwind {
; CHECK-NEXT: callq __fixunssfti at PLT
; CHECK-NEXT: movq %rax, %r14
; CHECK-NEXT: movq %rdx, %r15
-; CHECK-NEXT: xorps %xmm0, %xmm0
-; CHECK-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 4-byte Reload
-; CHECK-NEXT: # xmm1 = mem[0],zero,zero,zero
-; CHECK-NEXT: ucomiss %xmm0, %xmm1
+; CHECK-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload
+; CHECK-NEXT: # xmm0 = mem[0],zero,zero,zero
+; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; CHECK-NEXT: cmovbq %r12, %r15
; CHECK-NEXT: cmovbq %r12, %r14
-; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
+; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; CHECK-NEXT: cmovaq %r13, %r14
; CHECK-NEXT: cmovaq %r13, %r15
; CHECK-NEXT: movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
@@ -1288,14 +1244,13 @@ define <8 x i128> @test_unsigned_v8i128_v8f16(<8 x half> %f) nounwind {
; CHECK-NEXT: callq __fixunssfti at PLT
; CHECK-NEXT: movq %rax, %r12
; CHECK-NEXT: movq %rdx, %r13
-; CHECK-NEXT: pxor %xmm0, %xmm0
-; CHECK-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 4-byte Reload
-; CHECK-NEXT: # xmm1 = mem[0],zero,zero,zero
-; CHECK-NEXT: ucomiss %xmm0, %xmm1
+; CHECK-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload
+; CHECK-NEXT: # xmm0 = mem[0],zero,zero,zero
+; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; CHECK-NEXT: movl $0, %eax
; CHECK-NEXT: cmovbq %rax, %r13
; CHECK-NEXT: cmovbq %rax, %r12
-; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
+; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; CHECK-NEXT: movq $-1, %rax
; CHECK-NEXT: cmovaq %rax, %r12
; CHECK-NEXT: cmovaq %rax, %r13
@@ -1303,14 +1258,13 @@ define <8 x i128> @test_unsigned_v8i128_v8f16(<8 x half> %f) nounwind {
; CHECK-NEXT: callq __extendhfsf2 at PLT
; CHECK-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
; CHECK-NEXT: callq __fixunssfti at PLT
-; CHECK-NEXT: xorps %xmm0, %xmm0
-; CHECK-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 4-byte Reload
-; CHECK-NEXT: # xmm1 = mem[0],zero,zero,zero
-; CHECK-NEXT: ucomiss %xmm0, %xmm1
+; CHECK-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload
+; CHECK-NEXT: # xmm0 = mem[0],zero,zero,zero
+; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; CHECK-NEXT: movl $0, %ecx
; CHECK-NEXT: cmovbq %rcx, %rdx
; CHECK-NEXT: cmovbq %rcx, %rax
-; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
+; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; CHECK-NEXT: movq $-1, %rcx
; CHECK-NEXT: cmovaq %rcx, %rax
; CHECK-NEXT: cmovaq %rcx, %rdx
diff --git a/llvm/test/CodeGen/X86/mmx-fold-zero.ll b/llvm/test/CodeGen/X86/mmx-fold-zero.ll
index a3ef81daef337..a6e1275875dbc 100644
--- a/llvm/test/CodeGen/X86/mmx-fold-zero.ll
+++ b/llvm/test/CodeGen/X86/mmx-fold-zero.ll
@@ -34,8 +34,7 @@ define double @mmx_zero(double, double, double, double) nounwind {
; X86-NEXT: paddw %mm2, %mm0
; X86-NEXT: paddw %mm6, %mm0
; X86-NEXT: pmuludq %mm3, %mm0
-; X86-NEXT: pxor %mm3, %mm3
-; X86-NEXT: paddw %mm3, %mm0
+; X86-NEXT: paddw {{\.?LCPI[0-9]+_[0-9]+}}, %mm0
; X86-NEXT: paddw %mm1, %mm0
; X86-NEXT: pmuludq %mm7, %mm0
; X86-NEXT: pmuludq (%esp), %mm0 # 8-byte Folded Reload
@@ -73,8 +72,7 @@ define double @mmx_zero(double, double, double, double) nounwind {
; X64-NEXT: paddw %mm2, %mm0
; X64-NEXT: paddw %mm6, %mm0
; X64-NEXT: pmuludq %mm3, %mm0
-; X64-NEXT: pxor %mm3, %mm3
-; X64-NEXT: paddw %mm3, %mm0
+; X64-NEXT: paddw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %mm0
; X64-NEXT: paddw %mm1, %mm0
; X64-NEXT: pmuludq %mm7, %mm0
; X64-NEXT: pmuludq {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload
diff --git a/llvm/test/CodeGen/X86/vector-interleaved-load-i8-stride-7.ll b/llvm/test/CodeGen/X86/vector-interleaved-load-i8-stride-7.ll
index 86c932a5bb1f9..5ab09194c5b83 100644
--- a/llvm/test/CodeGen/X86/vector-interleaved-load-i8-stride-7.ll
+++ b/llvm/test/CodeGen/X86/vector-interleaved-load-i8-stride-7.ll
@@ -9870,8 +9870,7 @@ define void @load_i8_stride7_vf64(ptr %in.vec, ptr %out.vec0, ptr %out.vec1, ptr
; AVX-NEXT: vpshufb %xmm3, %xmm1, %xmm3
; AVX-NEXT: vpshufb %xmm4, %xmm0, %xmm4
; AVX-NEXT: vpor %xmm3, %xmm4, %xmm3
-; AVX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; AVX-NEXT: vpblendw {{.*#+}} xmm3 = xmm3[0,1,2,3,4,5,6],xmm0[7]
+; AVX-NEXT: vpblendw {{.*#+}} xmm3 = xmm3[0,1,2,3,4,5,6],mem[7]
; AVX-NEXT: vpshufb %xmm5, %xmm13, %xmm4
; AVX-NEXT: vpor %xmm4, %xmm3, %xmm3
; AVX-NEXT: vpshufb %xmm8, %xmm14, %xmm4
@@ -9906,8 +9905,7 @@ define void @load_i8_stride7_vf64(ptr %in.vec, ptr %out.vec0, ptr %out.vec1, ptr
; AVX-NEXT: # xmm5 = mem[0,0]
; AVX-NEXT: vpshufb %xmm5, %xmm10, %xmm9
; AVX-NEXT: vpor %xmm7, %xmm9, %xmm7
-; AVX-NEXT: vpxor %xmm9, %xmm9, %xmm9
-; AVX-NEXT: vpblendw {{.*#+}} xmm7 = xmm7[0,1,2,3,4,5,6],xmm9[7]
+; AVX-NEXT: vpblendw {{.*#+}} xmm7 = xmm7[0,1,2,3,4,5,6],mem[7]
; AVX-NEXT: vmovddup {{.*#+}} xmm9 = [0,128,128,128,128,128,6,13,0,128,128,128,128,128,6,13]
; AVX-NEXT: # xmm9 = mem[0,0]
; AVX-NEXT: vpshufb %xmm9, %xmm11, %xmm10
@@ -9941,8 +9939,7 @@ define void @load_i8_stride7_vf64(ptr %in.vec, ptr %out.vec0, ptr %out.vec1, ptr
; AVX-NEXT: vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
; AVX-NEXT: vpshufb %xmm5, %xmm0, %xmm5
; AVX-NEXT: vpor %xmm4, %xmm5, %xmm4
-; AVX-NEXT: vpxor %xmm5, %xmm5, %xmm5
-; AVX-NEXT: vpblendw {{.*#+}} xmm4 = xmm4[0,1,2,3,4,5,6],xmm5[7]
+; AVX-NEXT: vpblendw {{.*#+}} xmm4 = xmm4[0,1,2,3,4,5,6],mem[7]
; AVX-NEXT: vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm5 # 16-byte Reload
; AVX-NEXT: vpshufb %xmm9, %xmm5, %xmm5
; AVX-NEXT: vpor %xmm5, %xmm4, %xmm4
More information about the llvm-commits
mailing list