[llvm] 943f3e5 - [X86] Remove x86-experimental-unordered-atomic-isel option and associated code
Philip Reames via llvm-commits
llvm-commits at lists.llvm.org
Mon Dec 4 20:04:06 PST 2023
Author: Philip Reames
Date: 2023-12-04T20:03:38-08:00
New Revision: 943f3e52a0532d1d2b5c743635e1aed15033154b
URL: https://github.com/llvm/llvm-project/commit/943f3e52a0532d1d2b5c743635e1aed15033154b
DIFF: https://github.com/llvm/llvm-project/commit/943f3e52a0532d1d2b5c743635e1aed15033154b.diff
LOG: [X86] Remove x86-experimental-unordered-atomic-isel option and associated code
This option enables an experimental lowering for unordered atomics I worked
on a few years back. It never reached production quality, and hasn't been
worked on in years. So let's rip it out.
This wasn't a crazy idea, but I hit some stumbling block which prevented me
from pushing it across the finish line. From the look of 027aa27, that
change description is probably a good summary. I don't remember the
details any longer.
Added:
Modified:
llvm/include/llvm/CodeGen/TargetLowering.h
llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
llvm/lib/Target/X86/X86ISelLowering.cpp
llvm/lib/Target/X86/X86ISelLowering.h
llvm/test/CodeGen/X86/atomic-unordered.ll
Removed:
################################################################################
diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h
index 3cd89c71f7164..490125164ab34 100644
--- a/llvm/include/llvm/CodeGen/TargetLowering.h
+++ b/llvm/include/llvm/CodeGen/TargetLowering.h
@@ -4715,25 +4715,6 @@ class TargetLowering : public TargetLoweringBase {
return Chain;
}
- /// Should SelectionDAG lower an atomic store of the given kind as a normal
- /// StoreSDNode (as opposed to an AtomicSDNode)? NOTE: The intention is to
- /// eventually migrate all targets to the using StoreSDNodes, but porting is
- /// being done target at a time.
- virtual bool lowerAtomicStoreAsStoreSDNode(const StoreInst &SI) const {
- assert(SI.isAtomic() && "violated precondition");
- return false;
- }
-
- /// Should SelectionDAG lower an atomic load of the given kind as a normal
- /// LoadSDNode (as opposed to an AtomicSDNode)? NOTE: The intention is to
- /// eventually migrate all targets to the using LoadSDNodes, but porting is
- /// being done target at a time.
- virtual bool lowerAtomicLoadAsLoadSDNode(const LoadInst &LI) const {
- assert(LI.isAtomic() && "violated precondition");
- return false;
- }
-
-
/// This callback is invoked by the type legalizer to legalize nodes with an
/// illegal operand type but legal result types. It replaces the
/// LowerOperation callback in the type Legalizer. The reason we can not do
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index da7d9ace4114a..ed1c96a873748 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -4857,23 +4857,6 @@ void SelectionDAGBuilder::visitAtomicLoad(const LoadInst &I) {
InChain = TLI.prepareVolatileOrAtomicLoad(InChain, dl, DAG);
SDValue Ptr = getValue(I.getPointerOperand());
-
- if (TLI.lowerAtomicLoadAsLoadSDNode(I)) {
- // TODO: Once this is better exercised by tests, it should be merged with
- // the normal path for loads to prevent future divergence.
- SDValue L = DAG.getLoad(MemVT, dl, InChain, Ptr, MMO);
- if (MemVT != VT)
- L = DAG.getPtrExtOrTrunc(L, dl, VT);
-
- setValue(&I, L);
- SDValue OutChain = L.getValue(1);
- if (!I.isUnordered())
- DAG.setRoot(OutChain);
- else
- PendingLoads.push_back(OutChain);
- return;
- }
-
SDValue L = DAG.getAtomic(ISD::ATOMIC_LOAD, dl, MemVT, MemVT, InChain,
Ptr, MMO);
@@ -4913,14 +4896,6 @@ void SelectionDAGBuilder::visitAtomicStore(const StoreInst &I) {
Val = DAG.getPtrExtOrTrunc(Val, dl, MemVT);
SDValue Ptr = getValue(I.getPointerOperand());
- if (TLI.lowerAtomicStoreAsStoreSDNode(I)) {
- // TODO: Once this is better exercised by tests, it should be merged with
- // the normal path for stores to prevent future divergence.
- SDValue S = DAG.getStore(InChain, dl, Val, Ptr, MMO);
- setValue(&I, S);
- DAG.setRoot(S);
- return;
- }
SDValue OutChain =
DAG.getAtomic(ISD::ATOMIC_STORE, dl, MemVT, InChain, Val, Ptr, MMO);
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 6167be7bdf84e..1edf758a278d4 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -83,13 +83,6 @@ static cl::opt<bool> MulConstantOptimization(
"SHIFT, LEA, etc."),
cl::Hidden);
-static cl::opt<bool> ExperimentalUnorderedISEL(
- "x86-experimental-unordered-atomic-isel", cl::init(false),
- cl::desc("Use LoadSDNode and StoreSDNode instead of "
- "AtomicSDNode for unordered atomic loads and "
- "stores respectively."),
- cl::Hidden);
-
X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
const X86Subtarget &STI)
: TargetLowering(TM), Subtarget(STI) {
@@ -30598,18 +30591,6 @@ X86TargetLowering::lowerIdempotentRMWIntoFencedLoad(AtomicRMWInst *AI) const {
return Loaded;
}
-bool X86TargetLowering::lowerAtomicStoreAsStoreSDNode(const StoreInst &SI) const {
- if (!SI.isUnordered())
- return false;
- return ExperimentalUnorderedISEL;
-}
-bool X86TargetLowering::lowerAtomicLoadAsLoadSDNode(const LoadInst &LI) const {
- if (!LI.isUnordered())
- return false;
- return ExperimentalUnorderedISEL;
-}
-
-
/// Emit a locked operation on a stack location which does not change any
/// memory location, but does involve a lock prefix. Location is chosen to be
/// a) very likely accessed only by a single thread to minimize cache traffic,
diff --git a/llvm/lib/Target/X86/X86ISelLowering.h b/llvm/lib/Target/X86/X86ISelLowering.h
index 3b1b2603fd8fc..9bd1622cb0d3a 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.h
+++ b/llvm/lib/Target/X86/X86ISelLowering.h
@@ -1753,9 +1753,6 @@ namespace llvm {
LoadInst *
lowerIdempotentRMWIntoFencedLoad(AtomicRMWInst *AI) const override;
- bool lowerAtomicStoreAsStoreSDNode(const StoreInst &SI) const override;
- bool lowerAtomicLoadAsLoadSDNode(const LoadInst &LI) const override;
-
bool needsCmpXchgNb(Type *MemType) const;
void SetupEntryBlockForSjLj(MachineInstr &MI, MachineBasicBlock *MBB,
diff --git a/llvm/test/CodeGen/X86/atomic-unordered.ll b/llvm/test/CodeGen/X86/atomic-unordered.ll
index b66988c8bd24b..df123be53474f 100644
--- a/llvm/test/CodeGen/X86/atomic-unordered.ll
+++ b/llvm/test/CodeGen/X86/atomic-unordered.ll
@@ -1,8 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -O0 < %s -mtriple=x86_64-linux-generic -verify-machineinstrs -mcpu=skylake -x86-experimental-unordered-atomic-isel=0 | FileCheck --check-prefixes=CHECK,CHECK-O0,CHECK-O0-CUR %s
-; RUN: llc -O3 < %s -mtriple=x86_64-linux-generic -verify-machineinstrs -mcpu=skylake -x86-experimental-unordered-atomic-isel=0 | FileCheck --check-prefixes=CHECK,CHECK-O3,CHECK-O3-CUR %s
-; RUN: llc -O0 < %s -mtriple=x86_64-linux-generic -verify-machineinstrs -mcpu=skylake -x86-experimental-unordered-atomic-isel=1 | FileCheck --check-prefixes=CHECK,CHECK-O0,CHECK-O0-EX %s
-; RUN: llc -O3 < %s -mtriple=x86_64-linux-generic -verify-machineinstrs -mcpu=skylake -x86-experimental-unordered-atomic-isel=1 | FileCheck --check-prefixes=CHECK,CHECK-O3,CHECK-O3-EX %s
+; RUN: llc -O0 < %s -mtriple=x86_64-linux-generic -verify-machineinstrs -mcpu=skylake | FileCheck --check-prefixes=CHECK,CHECK-O0 %s
+; RUN: llc -O3 < %s -mtriple=x86_64-linux-generic -verify-machineinstrs -mcpu=skylake | FileCheck --check-prefixes=CHECK,CHECK-O3 %s
define i8 @load_i8(ptr %ptr) {
; CHECK-O0-LABEL: load_i8:
@@ -408,33 +406,21 @@ define void @store_i256(ptr %ptr, i256 %v) {
; Legal if wider type is also atomic (TODO)
define void @vec_store(ptr %p0, <2 x i32> %vec) {
-; CHECK-O0-CUR-LABEL: vec_store:
-; CHECK-O0-CUR: # %bb.0:
-; CHECK-O0-CUR-NEXT: vmovd %xmm0, %ecx
-; CHECK-O0-CUR-NEXT: vpextrd $1, %xmm0, %eax
-; CHECK-O0-CUR-NEXT: movl %ecx, (%rdi)
-; CHECK-O0-CUR-NEXT: movl %eax, 4(%rdi)
-; CHECK-O0-CUR-NEXT: retq
-;
-; CHECK-O3-CUR-LABEL: vec_store:
-; CHECK-O3-CUR: # %bb.0:
-; CHECK-O3-CUR-NEXT: vmovd %xmm0, %eax
-; CHECK-O3-CUR-NEXT: vpextrd $1, %xmm0, %ecx
-; CHECK-O3-CUR-NEXT: movl %eax, (%rdi)
-; CHECK-O3-CUR-NEXT: movl %ecx, 4(%rdi)
-; CHECK-O3-CUR-NEXT: retq
-;
-; CHECK-O0-EX-LABEL: vec_store:
-; CHECK-O0-EX: # %bb.0:
-; CHECK-O0-EX-NEXT: vmovd %xmm0, (%rdi)
-; CHECK-O0-EX-NEXT: vpextrd $1, %xmm0, 4(%rdi)
-; CHECK-O0-EX-NEXT: retq
-;
-; CHECK-O3-EX-LABEL: vec_store:
-; CHECK-O3-EX: # %bb.0:
-; CHECK-O3-EX-NEXT: vmovss %xmm0, (%rdi)
-; CHECK-O3-EX-NEXT: vextractps $1, %xmm0, 4(%rdi)
-; CHECK-O3-EX-NEXT: retq
+; CHECK-O0-LABEL: vec_store:
+; CHECK-O0: # %bb.0:
+; CHECK-O0-NEXT: vmovd %xmm0, %ecx
+; CHECK-O0-NEXT: vpextrd $1, %xmm0, %eax
+; CHECK-O0-NEXT: movl %ecx, (%rdi)
+; CHECK-O0-NEXT: movl %eax, 4(%rdi)
+; CHECK-O0-NEXT: retq
+;
+; CHECK-O3-LABEL: vec_store:
+; CHECK-O3: # %bb.0:
+; CHECK-O3-NEXT: vmovd %xmm0, %eax
+; CHECK-O3-NEXT: vpextrd $1, %xmm0, %ecx
+; CHECK-O3-NEXT: movl %eax, (%rdi)
+; CHECK-O3-NEXT: movl %ecx, 4(%rdi)
+; CHECK-O3-NEXT: retq
%v1 = extractelement <2 x i32> %vec, i32 0
%v2 = extractelement <2 x i32> %vec, i32 1
%p1 = getelementptr i32, ptr %p0, i64 1
@@ -445,33 +431,21 @@ define void @vec_store(ptr %p0, <2 x i32> %vec) {
; Not legal to widen due to alignment restriction
define void @vec_store_unaligned(ptr %p0, <2 x i32> %vec) {
-; CHECK-O0-CUR-LABEL: vec_store_unaligned:
-; CHECK-O0-CUR: # %bb.0:
-; CHECK-O0-CUR-NEXT: vmovd %xmm0, %ecx
-; CHECK-O0-CUR-NEXT: vpextrd $1, %xmm0, %eax
-; CHECK-O0-CUR-NEXT: movl %ecx, (%rdi)
-; CHECK-O0-CUR-NEXT: movl %eax, 4(%rdi)
-; CHECK-O0-CUR-NEXT: retq
-;
-; CHECK-O3-CUR-LABEL: vec_store_unaligned:
-; CHECK-O3-CUR: # %bb.0:
-; CHECK-O3-CUR-NEXT: vmovd %xmm0, %eax
-; CHECK-O3-CUR-NEXT: vpextrd $1, %xmm0, %ecx
-; CHECK-O3-CUR-NEXT: movl %eax, (%rdi)
-; CHECK-O3-CUR-NEXT: movl %ecx, 4(%rdi)
-; CHECK-O3-CUR-NEXT: retq
-;
-; CHECK-O0-EX-LABEL: vec_store_unaligned:
-; CHECK-O0-EX: # %bb.0:
-; CHECK-O0-EX-NEXT: vmovd %xmm0, (%rdi)
-; CHECK-O0-EX-NEXT: vpextrd $1, %xmm0, 4(%rdi)
-; CHECK-O0-EX-NEXT: retq
-;
-; CHECK-O3-EX-LABEL: vec_store_unaligned:
-; CHECK-O3-EX: # %bb.0:
-; CHECK-O3-EX-NEXT: vmovss %xmm0, (%rdi)
-; CHECK-O3-EX-NEXT: vextractps $1, %xmm0, 4(%rdi)
-; CHECK-O3-EX-NEXT: retq
+; CHECK-O0-LABEL: vec_store_unaligned:
+; CHECK-O0: # %bb.0:
+; CHECK-O0-NEXT: vmovd %xmm0, %ecx
+; CHECK-O0-NEXT: vpextrd $1, %xmm0, %eax
+; CHECK-O0-NEXT: movl %ecx, (%rdi)
+; CHECK-O0-NEXT: movl %eax, 4(%rdi)
+; CHECK-O0-NEXT: retq
+;
+; CHECK-O3-LABEL: vec_store_unaligned:
+; CHECK-O3: # %bb.0:
+; CHECK-O3-NEXT: vmovd %xmm0, %eax
+; CHECK-O3-NEXT: vpextrd $1, %xmm0, %ecx
+; CHECK-O3-NEXT: movl %eax, (%rdi)
+; CHECK-O3-NEXT: movl %ecx, 4(%rdi)
+; CHECK-O3-NEXT: retq
%v1 = extractelement <2 x i32> %vec, i32 0
%v2 = extractelement <2 x i32> %vec, i32 1
%p1 = getelementptr i32, ptr %p0, i64 1
@@ -485,31 +459,12 @@ define void @vec_store_unaligned(ptr %p0, <2 x i32> %vec) {
; Legal if wider type is also atomic (TODO)
; Also, can avoid register move from xmm to eax (TODO)
define void @widen_broadcast2(ptr %p0, <2 x i32> %vec) {
-; CHECK-O0-CUR-LABEL: widen_broadcast2:
-; CHECK-O0-CUR: # %bb.0:
-; CHECK-O0-CUR-NEXT: vmovd %xmm0, %eax
-; CHECK-O0-CUR-NEXT: movl %eax, (%rdi)
-; CHECK-O0-CUR-NEXT: movl %eax, 4(%rdi)
-; CHECK-O0-CUR-NEXT: retq
-;
-; CHECK-O3-CUR-LABEL: widen_broadcast2:
-; CHECK-O3-CUR: # %bb.0:
-; CHECK-O3-CUR-NEXT: vmovd %xmm0, %eax
-; CHECK-O3-CUR-NEXT: movl %eax, (%rdi)
-; CHECK-O3-CUR-NEXT: movl %eax, 4(%rdi)
-; CHECK-O3-CUR-NEXT: retq
-;
-; CHECK-O0-EX-LABEL: widen_broadcast2:
-; CHECK-O0-EX: # %bb.0:
-; CHECK-O0-EX-NEXT: vmovd %xmm0, (%rdi)
-; CHECK-O0-EX-NEXT: vmovd %xmm0, 4(%rdi)
-; CHECK-O0-EX-NEXT: retq
-;
-; CHECK-O3-EX-LABEL: widen_broadcast2:
-; CHECK-O3-EX: # %bb.0:
-; CHECK-O3-EX-NEXT: vmovss %xmm0, (%rdi)
-; CHECK-O3-EX-NEXT: vmovss %xmm0, 4(%rdi)
-; CHECK-O3-EX-NEXT: retq
+; CHECK-LABEL: widen_broadcast2:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vmovd %xmm0, %eax
+; CHECK-NEXT: movl %eax, (%rdi)
+; CHECK-NEXT: movl %eax, 4(%rdi)
+; CHECK-NEXT: retq
%v1 = extractelement <2 x i32> %vec, i32 0
%p1 = getelementptr i32, ptr %p0, i64 1
store atomic i32 %v1, ptr %p0 unordered, align 8
@@ -519,31 +474,12 @@ define void @widen_broadcast2(ptr %p0, <2 x i32> %vec) {
; Not legal to widen due to alignment restriction
define void @widen_broadcast2_unaligned(ptr %p0, <2 x i32> %vec) {
-; CHECK-O0-CUR-LABEL: widen_broadcast2_unaligned:
-; CHECK-O0-CUR: # %bb.0:
-; CHECK-O0-CUR-NEXT: vmovd %xmm0, %eax
-; CHECK-O0-CUR-NEXT: movl %eax, (%rdi)
-; CHECK-O0-CUR-NEXT: movl %eax, 4(%rdi)
-; CHECK-O0-CUR-NEXT: retq
-;
-; CHECK-O3-CUR-LABEL: widen_broadcast2_unaligned:
-; CHECK-O3-CUR: # %bb.0:
-; CHECK-O3-CUR-NEXT: vmovd %xmm0, %eax
-; CHECK-O3-CUR-NEXT: movl %eax, (%rdi)
-; CHECK-O3-CUR-NEXT: movl %eax, 4(%rdi)
-; CHECK-O3-CUR-NEXT: retq
-;
-; CHECK-O0-EX-LABEL: widen_broadcast2_unaligned:
-; CHECK-O0-EX: # %bb.0:
-; CHECK-O0-EX-NEXT: vmovd %xmm0, (%rdi)
-; CHECK-O0-EX-NEXT: vmovd %xmm0, 4(%rdi)
-; CHECK-O0-EX-NEXT: retq
-;
-; CHECK-O3-EX-LABEL: widen_broadcast2_unaligned:
-; CHECK-O3-EX: # %bb.0:
-; CHECK-O3-EX-NEXT: vmovss %xmm0, (%rdi)
-; CHECK-O3-EX-NEXT: vmovss %xmm0, 4(%rdi)
-; CHECK-O3-EX-NEXT: retq
+; CHECK-LABEL: widen_broadcast2_unaligned:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vmovd %xmm0, %eax
+; CHECK-NEXT: movl %eax, (%rdi)
+; CHECK-NEXT: movl %eax, 4(%rdi)
+; CHECK-NEXT: retq
%v1 = extractelement <2 x i32> %vec, i32 0
%p1 = getelementptr i32, ptr %p0, i64 1
store atomic i32 %v1, ptr %p0 unordered, align 4
@@ -610,17 +546,11 @@ define i64 @load_fold_add3(ptr %p1, ptr %p2) {
; CHECK-O0-NEXT: addq (%rsi), %rax
; CHECK-O0-NEXT: retq
;
-; CHECK-O3-CUR-LABEL: load_fold_add3:
-; CHECK-O3-CUR: # %bb.0:
-; CHECK-O3-CUR-NEXT: movq (%rsi), %rax
-; CHECK-O3-CUR-NEXT: addq (%rdi), %rax
-; CHECK-O3-CUR-NEXT: retq
-;
-; CHECK-O3-EX-LABEL: load_fold_add3:
-; CHECK-O3-EX: # %bb.0:
-; CHECK-O3-EX-NEXT: movq (%rdi), %rax
-; CHECK-O3-EX-NEXT: addq (%rsi), %rax
-; CHECK-O3-EX-NEXT: retq
+; CHECK-O3-LABEL: load_fold_add3:
+; CHECK-O3: # %bb.0:
+; CHECK-O3-NEXT: movq (%rsi), %rax
+; CHECK-O3-NEXT: addq (%rdi), %rax
+; CHECK-O3-NEXT: retq
%v = load atomic i64, ptr %p1 unordered, align 8
%v2 = load atomic i64, ptr %p2 unordered, align 8
%ret = add i64 %v, %v2
@@ -704,17 +634,11 @@ define i64 @load_fold_mul3(ptr %p1, ptr %p2) {
; CHECK-O0-NEXT: imulq (%rsi), %rax
; CHECK-O0-NEXT: retq
;
-; CHECK-O3-CUR-LABEL: load_fold_mul3:
-; CHECK-O3-CUR: # %bb.0:
-; CHECK-O3-CUR-NEXT: movq (%rsi), %rax
-; CHECK-O3-CUR-NEXT: imulq (%rdi), %rax
-; CHECK-O3-CUR-NEXT: retq
-;
-; CHECK-O3-EX-LABEL: load_fold_mul3:
-; CHECK-O3-EX: # %bb.0:
-; CHECK-O3-EX-NEXT: movq (%rdi), %rax
-; CHECK-O3-EX-NEXT: imulq (%rsi), %rax
-; CHECK-O3-EX-NEXT: retq
+; CHECK-O3-LABEL: load_fold_mul3:
+; CHECK-O3: # %bb.0:
+; CHECK-O3-NEXT: movq (%rsi), %rax
+; CHECK-O3-NEXT: imulq (%rdi), %rax
+; CHECK-O3-NEXT: retq
%v = load atomic i64, ptr %p1 unordered, align 8
%v2 = load atomic i64, ptr %p2 unordered, align 8
%ret = mul i64 %v, %v2
@@ -823,20 +747,13 @@ define i64 @load_fold_udiv1(ptr %p) {
; CHECK-O0-NEXT: divq %rcx
; CHECK-O0-NEXT: retq
;
-; CHECK-O3-CUR-LABEL: load_fold_udiv1:
-; CHECK-O3-CUR: # %bb.0:
-; CHECK-O3-CUR-NEXT: movq (%rdi), %rdx
-; CHECK-O3-CUR-NEXT: movabsq $-8608480567731124087, %rax # imm = 0x8888888888888889
-; CHECK-O3-CUR-NEXT: mulxq %rax, %rax, %rax
-; CHECK-O3-CUR-NEXT: shrq $3, %rax
-; CHECK-O3-CUR-NEXT: retq
-;
-; CHECK-O3-EX-LABEL: load_fold_udiv1:
-; CHECK-O3-EX: # %bb.0:
-; CHECK-O3-EX-NEXT: movabsq $-8608480567731124087, %rdx # imm = 0x8888888888888889
-; CHECK-O3-EX-NEXT: mulxq (%rdi), %rax, %rax
-; CHECK-O3-EX-NEXT: shrq $3, %rax
-; CHECK-O3-EX-NEXT: retq
+; CHECK-O3-LABEL: load_fold_udiv1:
+; CHECK-O3: # %bb.0:
+; CHECK-O3-NEXT: movq (%rdi), %rdx
+; CHECK-O3-NEXT: movabsq $-8608480567731124087, %rax # imm = 0x8888888888888889
+; CHECK-O3-NEXT: mulxq %rax, %rax, %rax
+; CHECK-O3-NEXT: shrq $3, %rax
+; CHECK-O3-NEXT: retq
%v = load atomic i64, ptr %p unordered, align 8
%ret = udiv i64 %v, 15
ret i64 %ret
@@ -1288,17 +1205,11 @@ define i64 @load_fold_and3(ptr %p1, ptr %p2) {
; CHECK-O0-NEXT: andq (%rsi), %rax
; CHECK-O0-NEXT: retq
;
-; CHECK-O3-CUR-LABEL: load_fold_and3:
-; CHECK-O3-CUR: # %bb.0:
-; CHECK-O3-CUR-NEXT: movq (%rsi), %rax
-; CHECK-O3-CUR-NEXT: andq (%rdi), %rax
-; CHECK-O3-CUR-NEXT: retq
-;
-; CHECK-O3-EX-LABEL: load_fold_and3:
-; CHECK-O3-EX: # %bb.0:
-; CHECK-O3-EX-NEXT: movq (%rdi), %rax
-; CHECK-O3-EX-NEXT: andq (%rsi), %rax
-; CHECK-O3-EX-NEXT: retq
+; CHECK-O3-LABEL: load_fold_and3:
+; CHECK-O3: # %bb.0:
+; CHECK-O3-NEXT: movq (%rsi), %rax
+; CHECK-O3-NEXT: andq (%rdi), %rax
+; CHECK-O3-NEXT: retq
%v = load atomic i64, ptr %p1 unordered, align 8
%v2 = load atomic i64, ptr %p2 unordered, align 8
%ret = and i64 %v, %v2
@@ -1335,17 +1246,11 @@ define i64 @load_fold_or3(ptr %p1, ptr %p2) {
; CHECK-O0-NEXT: orq (%rsi), %rax
; CHECK-O0-NEXT: retq
;
-; CHECK-O3-CUR-LABEL: load_fold_or3:
-; CHECK-O3-CUR: # %bb.0:
-; CHECK-O3-CUR-NEXT: movq (%rsi), %rax
-; CHECK-O3-CUR-NEXT: orq (%rdi), %rax
-; CHECK-O3-CUR-NEXT: retq
-;
-; CHECK-O3-EX-LABEL: load_fold_or3:
-; CHECK-O3-EX: # %bb.0:
-; CHECK-O3-EX-NEXT: movq (%rdi), %rax
-; CHECK-O3-EX-NEXT: orq (%rsi), %rax
-; CHECK-O3-EX-NEXT: retq
+; CHECK-O3-LABEL: load_fold_or3:
+; CHECK-O3: # %bb.0:
+; CHECK-O3-NEXT: movq (%rsi), %rax
+; CHECK-O3-NEXT: orq (%rdi), %rax
+; CHECK-O3-NEXT: retq
%v = load atomic i64, ptr %p1 unordered, align 8
%v2 = load atomic i64, ptr %p2 unordered, align 8
%ret = or i64 %v, %v2
@@ -1382,17 +1287,11 @@ define i64 @load_fold_xor3(ptr %p1, ptr %p2) {
; CHECK-O0-NEXT: xorq (%rsi), %rax
; CHECK-O0-NEXT: retq
;
-; CHECK-O3-CUR-LABEL: load_fold_xor3:
-; CHECK-O3-CUR: # %bb.0:
-; CHECK-O3-CUR-NEXT: movq (%rsi), %rax
-; CHECK-O3-CUR-NEXT: xorq (%rdi), %rax
-; CHECK-O3-CUR-NEXT: retq
-;
-; CHECK-O3-EX-LABEL: load_fold_xor3:
-; CHECK-O3-EX: # %bb.0:
-; CHECK-O3-EX-NEXT: movq (%rdi), %rax
-; CHECK-O3-EX-NEXT: xorq (%rsi), %rax
-; CHECK-O3-EX-NEXT: retq
+; CHECK-O3-LABEL: load_fold_xor3:
+; CHECK-O3: # %bb.0:
+; CHECK-O3-NEXT: movq (%rsi), %rax
+; CHECK-O3-NEXT: xorq (%rdi), %rax
+; CHECK-O3-NEXT: retq
%v = load atomic i64, ptr %p1 unordered, align 8
%v2 = load atomic i64, ptr %p2 unordered, align 8
%ret = xor i64 %v, %v2
@@ -1444,19 +1343,12 @@ define i1 @load_fold_icmp3(ptr %p1, ptr %p2) {
; CHECK-O0-NEXT: sete %al
; CHECK-O0-NEXT: retq
;
-; CHECK-O3-CUR-LABEL: load_fold_icmp3:
-; CHECK-O3-CUR: # %bb.0:
-; CHECK-O3-CUR-NEXT: movq (%rsi), %rax
-; CHECK-O3-CUR-NEXT: cmpq %rax, (%rdi)
-; CHECK-O3-CUR-NEXT: sete %al
-; CHECK-O3-CUR-NEXT: retq
-;
-; CHECK-O3-EX-LABEL: load_fold_icmp3:
-; CHECK-O3-EX: # %bb.0:
-; CHECK-O3-EX-NEXT: movq (%rdi), %rax
-; CHECK-O3-EX-NEXT: cmpq (%rsi), %rax
-; CHECK-O3-EX-NEXT: sete %al
-; CHECK-O3-EX-NEXT: retq
+; CHECK-O3-LABEL: load_fold_icmp3:
+; CHECK-O3: # %bb.0:
+; CHECK-O3-NEXT: movq (%rsi), %rax
+; CHECK-O3-NEXT: cmpq %rax, (%rdi)
+; CHECK-O3-NEXT: sete %al
+; CHECK-O3-NEXT: retq
%v = load atomic i64, ptr %p1 unordered, align 8
%v2 = load atomic i64, ptr %p2 unordered, align 8
%ret = icmp eq i64 %v, %v2
@@ -1653,31 +1545,14 @@ define void @rmw_fold_sdiv2(ptr %p, i64 %v) {
; Legal, as expected
define void @rmw_fold_udiv1(ptr %p, i64 %v) {
-; CHECK-O0-LABEL: rmw_fold_udiv1:
-; CHECK-O0: # %bb.0:
-; CHECK-O0-NEXT: movq (%rdi), %rdx
-; CHECK-O0-NEXT: movabsq $-8608480567731124087, %rax # imm = 0x8888888888888889
-; CHECK-O0-NEXT: mulxq %rax, %rax, %rax
-; CHECK-O0-NEXT: shrq $3, %rax
-; CHECK-O0-NEXT: movq %rax, (%rdi)
-; CHECK-O0-NEXT: retq
-;
-; CHECK-O3-CUR-LABEL: rmw_fold_udiv1:
-; CHECK-O3-CUR: # %bb.0:
-; CHECK-O3-CUR-NEXT: movq (%rdi), %rdx
-; CHECK-O3-CUR-NEXT: movabsq $-8608480567731124087, %rax # imm = 0x8888888888888889
-; CHECK-O3-CUR-NEXT: mulxq %rax, %rax, %rax
-; CHECK-O3-CUR-NEXT: shrq $3, %rax
-; CHECK-O3-CUR-NEXT: movq %rax, (%rdi)
-; CHECK-O3-CUR-NEXT: retq
-;
-; CHECK-O3-EX-LABEL: rmw_fold_udiv1:
-; CHECK-O3-EX: # %bb.0:
-; CHECK-O3-EX-NEXT: movabsq $-8608480567731124087, %rdx # imm = 0x8888888888888889
-; CHECK-O3-EX-NEXT: mulxq (%rdi), %rax, %rax
-; CHECK-O3-EX-NEXT: shrq $3, %rax
-; CHECK-O3-EX-NEXT: movq %rax, (%rdi)
-; CHECK-O3-EX-NEXT: retq
+; CHECK-LABEL: rmw_fold_udiv1:
+; CHECK: # %bb.0:
+; CHECK-NEXT: movq (%rdi), %rdx
+; CHECK-NEXT: movabsq $-8608480567731124087, %rax # imm = 0x8888888888888889
+; CHECK-NEXT: mulxq %rax, %rax, %rax
+; CHECK-NEXT: shrq $3, %rax
+; CHECK-NEXT: movq %rax, (%rdi)
+; CHECK-NEXT: retq
%prev = load atomic i64, ptr %p unordered, align 8
%val = udiv i64 %prev, 15
store atomic i64 %val, ptr %p unordered, align 8
@@ -1868,24 +1743,12 @@ define void @rmw_fold_urem2(ptr %p, i64 %v) {
; Legal to fold (TODO)
define void @rmw_fold_shl1(ptr %p, i64 %v) {
-; CHECK-O0-LABEL: rmw_fold_shl1:
-; CHECK-O0: # %bb.0:
-; CHECK-O0-NEXT: movq (%rdi), %rax
-; CHECK-O0-NEXT: shlq $15, %rax
-; CHECK-O0-NEXT: movq %rax, (%rdi)
-; CHECK-O0-NEXT: retq
-;
-; CHECK-O3-CUR-LABEL: rmw_fold_shl1:
-; CHECK-O3-CUR: # %bb.0:
-; CHECK-O3-CUR-NEXT: movq (%rdi), %rax
-; CHECK-O3-CUR-NEXT: shlq $15, %rax
-; CHECK-O3-CUR-NEXT: movq %rax, (%rdi)
-; CHECK-O3-CUR-NEXT: retq
-;
-; CHECK-O3-EX-LABEL: rmw_fold_shl1:
-; CHECK-O3-EX: # %bb.0:
-; CHECK-O3-EX-NEXT: shlq $15, (%rdi)
-; CHECK-O3-EX-NEXT: retq
+; CHECK-LABEL: rmw_fold_shl1:
+; CHECK: # %bb.0:
+; CHECK-NEXT: movq (%rdi), %rax
+; CHECK-NEXT: shlq $15, %rax
+; CHECK-NEXT: movq %rax, (%rdi)
+; CHECK-NEXT: retq
%prev = load atomic i64, ptr %p unordered, align 8
%val = shl i64 %prev, 15
store atomic i64 %val, ptr %p unordered, align 8
@@ -1904,18 +1767,11 @@ define void @rmw_fold_shl2(ptr %p, i64 %v) {
; CHECK-O0-NEXT: movq %rax, (%rdi)
; CHECK-O0-NEXT: retq
;
-; CHECK-O3-CUR-LABEL: rmw_fold_shl2:
-; CHECK-O3-CUR: # %bb.0:
-; CHECK-O3-CUR-NEXT: shlxq %rsi, (%rdi), %rax
-; CHECK-O3-CUR-NEXT: movq %rax, (%rdi)
-; CHECK-O3-CUR-NEXT: retq
-;
-; CHECK-O3-EX-LABEL: rmw_fold_shl2:
-; CHECK-O3-EX: # %bb.0:
-; CHECK-O3-EX-NEXT: movq %rsi, %rcx
-; CHECK-O3-EX-NEXT: # kill: def $cl killed $cl killed $rcx
-; CHECK-O3-EX-NEXT: shlq %cl, (%rdi)
-; CHECK-O3-EX-NEXT: retq
+; CHECK-O3-LABEL: rmw_fold_shl2:
+; CHECK-O3: # %bb.0:
+; CHECK-O3-NEXT: shlxq %rsi, (%rdi), %rax
+; CHECK-O3-NEXT: movq %rax, (%rdi)
+; CHECK-O3-NEXT: retq
%prev = load atomic i64, ptr %p unordered, align 8
%val = shl i64 %prev, %v
store atomic i64 %val, ptr %p unordered, align 8
@@ -1924,24 +1780,12 @@ define void @rmw_fold_shl2(ptr %p, i64 %v) {
; Legal to fold (TODO)
define void @rmw_fold_lshr1(ptr %p, i64 %v) {
-; CHECK-O0-LABEL: rmw_fold_lshr1:
-; CHECK-O0: # %bb.0:
-; CHECK-O0-NEXT: movq (%rdi), %rax
-; CHECK-O0-NEXT: shrq $15, %rax
-; CHECK-O0-NEXT: movq %rax, (%rdi)
-; CHECK-O0-NEXT: retq
-;
-; CHECK-O3-CUR-LABEL: rmw_fold_lshr1:
-; CHECK-O3-CUR: # %bb.0:
-; CHECK-O3-CUR-NEXT: movq (%rdi), %rax
-; CHECK-O3-CUR-NEXT: shrq $15, %rax
-; CHECK-O3-CUR-NEXT: movq %rax, (%rdi)
-; CHECK-O3-CUR-NEXT: retq
-;
-; CHECK-O3-EX-LABEL: rmw_fold_lshr1:
-; CHECK-O3-EX: # %bb.0:
-; CHECK-O3-EX-NEXT: shrq $15, (%rdi)
-; CHECK-O3-EX-NEXT: retq
+; CHECK-LABEL: rmw_fold_lshr1:
+; CHECK: # %bb.0:
+; CHECK-NEXT: movq (%rdi), %rax
+; CHECK-NEXT: shrq $15, %rax
+; CHECK-NEXT: movq %rax, (%rdi)
+; CHECK-NEXT: retq
%prev = load atomic i64, ptr %p unordered, align 8
%val = lshr i64 %prev, 15
store atomic i64 %val, ptr %p unordered, align 8
@@ -1960,18 +1804,11 @@ define void @rmw_fold_lshr2(ptr %p, i64 %v) {
; CHECK-O0-NEXT: movq %rax, (%rdi)
; CHECK-O0-NEXT: retq
;
-; CHECK-O3-CUR-LABEL: rmw_fold_lshr2:
-; CHECK-O3-CUR: # %bb.0:
-; CHECK-O3-CUR-NEXT: shrxq %rsi, (%rdi), %rax
-; CHECK-O3-CUR-NEXT: movq %rax, (%rdi)
-; CHECK-O3-CUR-NEXT: retq
-;
-; CHECK-O3-EX-LABEL: rmw_fold_lshr2:
-; CHECK-O3-EX: # %bb.0:
-; CHECK-O3-EX-NEXT: movq %rsi, %rcx
-; CHECK-O3-EX-NEXT: # kill: def $cl killed $cl killed $rcx
-; CHECK-O3-EX-NEXT: shrq %cl, (%rdi)
-; CHECK-O3-EX-NEXT: retq
+; CHECK-O3-LABEL: rmw_fold_lshr2:
+; CHECK-O3: # %bb.0:
+; CHECK-O3-NEXT: shrxq %rsi, (%rdi), %rax
+; CHECK-O3-NEXT: movq %rax, (%rdi)
+; CHECK-O3-NEXT: retq
%prev = load atomic i64, ptr %p unordered, align 8
%val = lshr i64 %prev, %v
store atomic i64 %val, ptr %p unordered, align 8
@@ -1980,24 +1817,12 @@ define void @rmw_fold_lshr2(ptr %p, i64 %v) {
; Legal to fold (TODO)
define void @rmw_fold_ashr1(ptr %p, i64 %v) {
-; CHECK-O0-LABEL: rmw_fold_ashr1:
-; CHECK-O0: # %bb.0:
-; CHECK-O0-NEXT: movq (%rdi), %rax
-; CHECK-O0-NEXT: sarq $15, %rax
-; CHECK-O0-NEXT: movq %rax, (%rdi)
-; CHECK-O0-NEXT: retq
-;
-; CHECK-O3-CUR-LABEL: rmw_fold_ashr1:
-; CHECK-O3-CUR: # %bb.0:
-; CHECK-O3-CUR-NEXT: movq (%rdi), %rax
-; CHECK-O3-CUR-NEXT: sarq $15, %rax
-; CHECK-O3-CUR-NEXT: movq %rax, (%rdi)
-; CHECK-O3-CUR-NEXT: retq
-;
-; CHECK-O3-EX-LABEL: rmw_fold_ashr1:
-; CHECK-O3-EX: # %bb.0:
-; CHECK-O3-EX-NEXT: sarq $15, (%rdi)
-; CHECK-O3-EX-NEXT: retq
+; CHECK-LABEL: rmw_fold_ashr1:
+; CHECK: # %bb.0:
+; CHECK-NEXT: movq (%rdi), %rax
+; CHECK-NEXT: sarq $15, %rax
+; CHECK-NEXT: movq %rax, (%rdi)
+; CHECK-NEXT: retq
%prev = load atomic i64, ptr %p unordered, align 8
%val = ashr i64 %prev, 15
store atomic i64 %val, ptr %p unordered, align 8
@@ -2016,18 +1841,11 @@ define void @rmw_fold_ashr2(ptr %p, i64 %v) {
; CHECK-O0-NEXT: movq %rax, (%rdi)
; CHECK-O0-NEXT: retq
;
-; CHECK-O3-CUR-LABEL: rmw_fold_ashr2:
-; CHECK-O3-CUR: # %bb.0:
-; CHECK-O3-CUR-NEXT: sarxq %rsi, (%rdi), %rax
-; CHECK-O3-CUR-NEXT: movq %rax, (%rdi)
-; CHECK-O3-CUR-NEXT: retq
-;
-; CHECK-O3-EX-LABEL: rmw_fold_ashr2:
-; CHECK-O3-EX: # %bb.0:
-; CHECK-O3-EX-NEXT: movq %rsi, %rcx
-; CHECK-O3-EX-NEXT: # kill: def $cl killed $cl killed $rcx
-; CHECK-O3-EX-NEXT: sarq %cl, (%rdi)
-; CHECK-O3-EX-NEXT: retq
+; CHECK-O3-LABEL: rmw_fold_ashr2:
+; CHECK-O3: # %bb.0:
+; CHECK-O3-NEXT: sarxq %rsi, (%rdi), %rax
+; CHECK-O3-NEXT: movq %rax, (%rdi)
+; CHECK-O3-NEXT: retq
%prev = load atomic i64, ptr %p unordered, align 8
%val = ashr i64 %prev, %v
store atomic i64 %val, ptr %p unordered, align 8
@@ -2391,26 +2209,12 @@ define i64 @fold_constant(i64 %arg) {
}
define i64 @fold_constant_clobber(ptr %p, i64 %arg) {
-; CHECK-O0-LABEL: fold_constant_clobber:
-; CHECK-O0: # %bb.0:
-; CHECK-O0-NEXT: movq Constant(%rip), %rax
-; CHECK-O0-NEXT: movq $5, (%rdi)
-; CHECK-O0-NEXT: addq %rsi, %rax
-; CHECK-O0-NEXT: retq
-;
-; CHECK-O3-CUR-LABEL: fold_constant_clobber:
-; CHECK-O3-CUR: # %bb.0:
-; CHECK-O3-CUR-NEXT: movq Constant(%rip), %rax
-; CHECK-O3-CUR-NEXT: movq $5, (%rdi)
-; CHECK-O3-CUR-NEXT: addq %rsi, %rax
-; CHECK-O3-CUR-NEXT: retq
-;
-; CHECK-O3-EX-LABEL: fold_constant_clobber:
-; CHECK-O3-EX: # %bb.0:
-; CHECK-O3-EX-NEXT: movq %rsi, %rax
-; CHECK-O3-EX-NEXT: addq Constant(%rip), %rax
-; CHECK-O3-EX-NEXT: movq $5, (%rdi)
-; CHECK-O3-EX-NEXT: retq
+; CHECK-LABEL: fold_constant_clobber:
+; CHECK: # %bb.0:
+; CHECK-NEXT: movq Constant(%rip), %rax
+; CHECK-NEXT: movq $5, (%rdi)
+; CHECK-NEXT: addq %rsi, %rax
+; CHECK-NEXT: retq
%v = load atomic i64, ptr @Constant unordered, align 8
store i64 5, ptr %p
%ret = add i64 %v, %arg
@@ -2418,26 +2222,12 @@ define i64 @fold_constant_clobber(ptr %p, i64 %arg) {
}
define i64 @fold_constant_fence(i64 %arg) {
-; CHECK-O0-LABEL: fold_constant_fence:
-; CHECK-O0: # %bb.0:
-; CHECK-O0-NEXT: movq Constant(%rip), %rax
-; CHECK-O0-NEXT: mfence
-; CHECK-O0-NEXT: addq %rdi, %rax
-; CHECK-O0-NEXT: retq
-;
-; CHECK-O3-CUR-LABEL: fold_constant_fence:
-; CHECK-O3-CUR: # %bb.0:
-; CHECK-O3-CUR-NEXT: movq Constant(%rip), %rax
-; CHECK-O3-CUR-NEXT: mfence
-; CHECK-O3-CUR-NEXT: addq %rdi, %rax
-; CHECK-O3-CUR-NEXT: retq
-;
-; CHECK-O3-EX-LABEL: fold_constant_fence:
-; CHECK-O3-EX: # %bb.0:
-; CHECK-O3-EX-NEXT: movq %rdi, %rax
-; CHECK-O3-EX-NEXT: addq Constant(%rip), %rax
-; CHECK-O3-EX-NEXT: mfence
-; CHECK-O3-EX-NEXT: retq
+; CHECK-LABEL: fold_constant_fence:
+; CHECK: # %bb.0:
+; CHECK-NEXT: movq Constant(%rip), %rax
+; CHECK-NEXT: mfence
+; CHECK-NEXT: addq %rdi, %rax
+; CHECK-NEXT: retq
%v = load atomic i64, ptr @Constant unordered, align 8
fence seq_cst
%ret = add i64 %v, %arg
@@ -2445,26 +2235,12 @@ define i64 @fold_constant_fence(i64 %arg) {
}
define i64 @fold_invariant_clobber(ptr dereferenceable(8) %p, i64 %arg) {
-; CHECK-O0-LABEL: fold_invariant_clobber:
-; CHECK-O0: # %bb.0:
-; CHECK-O0-NEXT: movq (%rdi), %rax
-; CHECK-O0-NEXT: movq $5, (%rdi)
-; CHECK-O0-NEXT: addq %rsi, %rax
-; CHECK-O0-NEXT: retq
-;
-; CHECK-O3-CUR-LABEL: fold_invariant_clobber:
-; CHECK-O3-CUR: # %bb.0:
-; CHECK-O3-CUR-NEXT: movq (%rdi), %rax
-; CHECK-O3-CUR-NEXT: movq $5, (%rdi)
-; CHECK-O3-CUR-NEXT: addq %rsi, %rax
-; CHECK-O3-CUR-NEXT: retq
-;
-; CHECK-O3-EX-LABEL: fold_invariant_clobber:
-; CHECK-O3-EX: # %bb.0:
-; CHECK-O3-EX-NEXT: movq %rsi, %rax
-; CHECK-O3-EX-NEXT: addq (%rdi), %rax
-; CHECK-O3-EX-NEXT: movq $5, (%rdi)
-; CHECK-O3-EX-NEXT: retq
+; CHECK-LABEL: fold_invariant_clobber:
+; CHECK: # %bb.0:
+; CHECK-NEXT: movq (%rdi), %rax
+; CHECK-NEXT: movq $5, (%rdi)
+; CHECK-NEXT: addq %rsi, %rax
+; CHECK-NEXT: retq
%v = load atomic i64, ptr %p unordered, align 8, !invariant.load !{}
store i64 5, ptr %p
%ret = add i64 %v, %arg
@@ -2473,26 +2249,12 @@ define i64 @fold_invariant_clobber(ptr dereferenceable(8) %p, i64 %arg) {
define i64 @fold_invariant_fence(ptr dereferenceable(8) %p, i64 %arg) {
-; CHECK-O0-LABEL: fold_invariant_fence:
-; CHECK-O0: # %bb.0:
-; CHECK-O0-NEXT: movq (%rdi), %rax
-; CHECK-O0-NEXT: mfence
-; CHECK-O0-NEXT: addq %rsi, %rax
-; CHECK-O0-NEXT: retq
-;
-; CHECK-O3-CUR-LABEL: fold_invariant_fence:
-; CHECK-O3-CUR: # %bb.0:
-; CHECK-O3-CUR-NEXT: movq (%rdi), %rax
-; CHECK-O3-CUR-NEXT: mfence
-; CHECK-O3-CUR-NEXT: addq %rsi, %rax
-; CHECK-O3-CUR-NEXT: retq
-;
-; CHECK-O3-EX-LABEL: fold_invariant_fence:
-; CHECK-O3-EX: # %bb.0:
-; CHECK-O3-EX-NEXT: movq %rsi, %rax
-; CHECK-O3-EX-NEXT: addq (%rdi), %rax
-; CHECK-O3-EX-NEXT: mfence
-; CHECK-O3-EX-NEXT: retq
+; CHECK-LABEL: fold_invariant_fence:
+; CHECK: # %bb.0:
+; CHECK-NEXT: movq (%rdi), %rax
+; CHECK-NEXT: mfence
+; CHECK-NEXT: addq %rsi, %rax
+; CHECK-NEXT: retq
%v = load atomic i64, ptr %p unordered, align 8, !invariant.load !{}
fence seq_cst
%ret = add i64 %v, %arg
@@ -2503,32 +2265,18 @@ define i64 @fold_invariant_fence(ptr dereferenceable(8) %p, i64 %arg) {
; Exercise a few cases involving any extend idioms
define i16 @load_i8_anyext_i16(ptr %ptr) {
-; CHECK-O0-CUR-LABEL: load_i8_anyext_i16:
-; CHECK-O0-CUR: # %bb.0:
-; CHECK-O0-CUR-NEXT: movb (%rdi), %al
-; CHECK-O0-CUR-NEXT: movzbl %al, %eax
-; CHECK-O0-CUR-NEXT: # kill: def $ax killed $ax killed $eax
-; CHECK-O0-CUR-NEXT: retq
-;
-; CHECK-O3-CUR-LABEL: load_i8_anyext_i16:
-; CHECK-O3-CUR: # %bb.0:
-; CHECK-O3-CUR-NEXT: movzbl (%rdi), %eax
-; CHECK-O3-CUR-NEXT: # kill: def $ax killed $ax killed $eax
-; CHECK-O3-CUR-NEXT: retq
-;
-; CHECK-O0-EX-LABEL: load_i8_anyext_i16:
-; CHECK-O0-EX: # %bb.0:
-; CHECK-O0-EX-NEXT: vpbroadcastb (%rdi), %xmm0
-; CHECK-O0-EX-NEXT: vmovd %xmm0, %eax
-; CHECK-O0-EX-NEXT: # kill: def $ax killed $ax killed $eax
-; CHECK-O0-EX-NEXT: retq
-;
-; CHECK-O3-EX-LABEL: load_i8_anyext_i16:
-; CHECK-O3-EX: # %bb.0:
-; CHECK-O3-EX-NEXT: vpbroadcastb (%rdi), %xmm0
-; CHECK-O3-EX-NEXT: vmovd %xmm0, %eax
-; CHECK-O3-EX-NEXT: # kill: def $ax killed $ax killed $eax
-; CHECK-O3-EX-NEXT: retq
+; CHECK-O0-LABEL: load_i8_anyext_i16:
+; CHECK-O0: # %bb.0:
+; CHECK-O0-NEXT: movb (%rdi), %al
+; CHECK-O0-NEXT: movzbl %al, %eax
+; CHECK-O0-NEXT: # kill: def $ax killed $ax killed $eax
+; CHECK-O0-NEXT: retq
+;
+; CHECK-O3-LABEL: load_i8_anyext_i16:
+; CHECK-O3: # %bb.0:
+; CHECK-O3-NEXT: movzbl (%rdi), %eax
+; CHECK-O3-NEXT: # kill: def $ax killed $ax killed $eax
+; CHECK-O3-NEXT: retq
%v = load atomic i8, ptr %ptr unordered, align 2
%vec = insertelement <2 x i8> undef, i8 %v, i32 0
%res = bitcast <2 x i8> %vec to i16
@@ -2536,28 +2284,16 @@ define i16 @load_i8_anyext_i16(ptr %ptr) {
}
define i32 @load_i8_anyext_i32(ptr %ptr) {
-; CHECK-O0-CUR-LABEL: load_i8_anyext_i32:
-; CHECK-O0-CUR: # %bb.0:
-; CHECK-O0-CUR-NEXT: movb (%rdi), %al
-; CHECK-O0-CUR-NEXT: movzbl %al, %eax
-; CHECK-O0-CUR-NEXT: retq
-;
-; CHECK-O3-CUR-LABEL: load_i8_anyext_i32:
-; CHECK-O3-CUR: # %bb.0:
-; CHECK-O3-CUR-NEXT: movzbl (%rdi), %eax
-; CHECK-O3-CUR-NEXT: retq
-;
-; CHECK-O0-EX-LABEL: load_i8_anyext_i32:
-; CHECK-O0-EX: # %bb.0:
-; CHECK-O0-EX-NEXT: vpbroadcastb (%rdi), %xmm0
-; CHECK-O0-EX-NEXT: vmovd %xmm0, %eax
-; CHECK-O0-EX-NEXT: retq
-;
-; CHECK-O3-EX-LABEL: load_i8_anyext_i32:
-; CHECK-O3-EX: # %bb.0:
-; CHECK-O3-EX-NEXT: vpbroadcastb (%rdi), %xmm0
-; CHECK-O3-EX-NEXT: vmovd %xmm0, %eax
-; CHECK-O3-EX-NEXT: retq
+; CHECK-O0-LABEL: load_i8_anyext_i32:
+; CHECK-O0: # %bb.0:
+; CHECK-O0-NEXT: movb (%rdi), %al
+; CHECK-O0-NEXT: movzbl %al, %eax
+; CHECK-O0-NEXT: retq
+;
+; CHECK-O3-LABEL: load_i8_anyext_i32:
+; CHECK-O3: # %bb.0:
+; CHECK-O3-NEXT: movzbl (%rdi), %eax
+; CHECK-O3-NEXT: retq
%v = load atomic i8, ptr %ptr unordered, align 4
%vec = insertelement <4 x i8> undef, i8 %v, i32 0
%res = bitcast <4 x i8> %vec to i32
@@ -2565,29 +2301,17 @@ define i32 @load_i8_anyext_i32(ptr %ptr) {
}
define i32 @load_i16_anyext_i32(ptr %ptr) {
-; CHECK-O0-CUR-LABEL: load_i16_anyext_i32:
-; CHECK-O0-CUR: # %bb.0:
-; CHECK-O0-CUR-NEXT: movw (%rdi), %cx
-; CHECK-O0-CUR-NEXT: # implicit-def: $eax
-; CHECK-O0-CUR-NEXT: movw %cx, %ax
-; CHECK-O0-CUR-NEXT: retq
-;
-; CHECK-O3-CUR-LABEL: load_i16_anyext_i32:
-; CHECK-O3-CUR: # %bb.0:
-; CHECK-O3-CUR-NEXT: movzwl (%rdi), %eax
-; CHECK-O3-CUR-NEXT: retq
-;
-; CHECK-O0-EX-LABEL: load_i16_anyext_i32:
-; CHECK-O0-EX: # %bb.0:
-; CHECK-O0-EX-NEXT: vpbroadcastw (%rdi), %xmm0
-; CHECK-O0-EX-NEXT: vmovd %xmm0, %eax
-; CHECK-O0-EX-NEXT: retq
-;
-; CHECK-O3-EX-LABEL: load_i16_anyext_i32:
-; CHECK-O3-EX: # %bb.0:
-; CHECK-O3-EX-NEXT: vpbroadcastw (%rdi), %xmm0
-; CHECK-O3-EX-NEXT: vmovd %xmm0, %eax
-; CHECK-O3-EX-NEXT: retq
+; CHECK-O0-LABEL: load_i16_anyext_i32:
+; CHECK-O0: # %bb.0:
+; CHECK-O0-NEXT: movw (%rdi), %cx
+; CHECK-O0-NEXT: # implicit-def: $eax
+; CHECK-O0-NEXT: movw %cx, %ax
+; CHECK-O0-NEXT: retq
+;
+; CHECK-O3-LABEL: load_i16_anyext_i32:
+; CHECK-O3: # %bb.0:
+; CHECK-O3-NEXT: movzwl (%rdi), %eax
+; CHECK-O3-NEXT: retq
%v = load atomic i16, ptr %ptr unordered, align 4
%vec = insertelement <2 x i16> undef, i16 %v, i64 0
%res = bitcast <2 x i16> %vec to i32
@@ -2595,33 +2319,21 @@ define i32 @load_i16_anyext_i32(ptr %ptr) {
}
define i64 @load_i16_anyext_i64(ptr %ptr) {
-; CHECK-O0-CUR-LABEL: load_i16_anyext_i64:
-; CHECK-O0-CUR: # %bb.0:
-; CHECK-O0-CUR-NEXT: movw (%rdi), %cx
-; CHECK-O0-CUR-NEXT: # implicit-def: $eax
-; CHECK-O0-CUR-NEXT: movw %cx, %ax
-; CHECK-O0-CUR-NEXT: vmovd %eax, %xmm0
-; CHECK-O0-CUR-NEXT: vmovq %xmm0, %rax
-; CHECK-O0-CUR-NEXT: retq
-;
-; CHECK-O3-CUR-LABEL: load_i16_anyext_i64:
-; CHECK-O3-CUR: # %bb.0:
-; CHECK-O3-CUR-NEXT: movzwl (%rdi), %eax
-; CHECK-O3-CUR-NEXT: vmovd %eax, %xmm0
-; CHECK-O3-CUR-NEXT: vmovq %xmm0, %rax
-; CHECK-O3-CUR-NEXT: retq
-;
-; CHECK-O0-EX-LABEL: load_i16_anyext_i64:
-; CHECK-O0-EX: # %bb.0:
-; CHECK-O0-EX-NEXT: vpbroadcastw (%rdi), %xmm0
-; CHECK-O0-EX-NEXT: vmovq %xmm0, %rax
-; CHECK-O0-EX-NEXT: retq
-;
-; CHECK-O3-EX-LABEL: load_i16_anyext_i64:
-; CHECK-O3-EX: # %bb.0:
-; CHECK-O3-EX-NEXT: vpbroadcastw (%rdi), %xmm0
-; CHECK-O3-EX-NEXT: vmovq %xmm0, %rax
-; CHECK-O3-EX-NEXT: retq
+; CHECK-O0-LABEL: load_i16_anyext_i64:
+; CHECK-O0: # %bb.0:
+; CHECK-O0-NEXT: movw (%rdi), %cx
+; CHECK-O0-NEXT: # implicit-def: $eax
+; CHECK-O0-NEXT: movw %cx, %ax
+; CHECK-O0-NEXT: vmovd %eax, %xmm0
+; CHECK-O0-NEXT: vmovq %xmm0, %rax
+; CHECK-O0-NEXT: retq
+;
+; CHECK-O3-LABEL: load_i16_anyext_i64:
+; CHECK-O3: # %bb.0:
+; CHECK-O3-NEXT: movzwl (%rdi), %eax
+; CHECK-O3-NEXT: vmovd %eax, %xmm0
+; CHECK-O3-NEXT: vmovq %xmm0, %rax
+; CHECK-O3-NEXT: retq
%v = load atomic i16, ptr %ptr unordered, align 8
%vec = insertelement <4 x i16> undef, i16 %v, i64 0
%res = bitcast <4 x i16> %vec to i64
@@ -2675,30 +2387,18 @@ define i1 @fold_cmp_over_fence(ptr %p, i32 %v1) {
; CHECK-O0-NEXT: # kill: def $al killed $al killed $eax
; CHECK-O0-NEXT: retq
;
-; CHECK-O3-CUR-LABEL: fold_cmp_over_fence:
-; CHECK-O3-CUR: # %bb.0:
-; CHECK-O3-CUR-NEXT: movl (%rdi), %eax
-; CHECK-O3-CUR-NEXT: mfence
-; CHECK-O3-CUR-NEXT: cmpl %eax, %esi
-; CHECK-O3-CUR-NEXT: jne .LBB116_2
-; CHECK-O3-CUR-NEXT: # %bb.1: # %taken
-; CHECK-O3-CUR-NEXT: movb $1, %al
-; CHECK-O3-CUR-NEXT: retq
-; CHECK-O3-CUR-NEXT: .LBB116_2: # %untaken
-; CHECK-O3-CUR-NEXT: xorl %eax, %eax
-; CHECK-O3-CUR-NEXT: retq
-;
-; CHECK-O3-EX-LABEL: fold_cmp_over_fence:
-; CHECK-O3-EX: # %bb.0:
-; CHECK-O3-EX-NEXT: cmpl (%rdi), %esi
-; CHECK-O3-EX-NEXT: mfence
-; CHECK-O3-EX-NEXT: jne .LBB116_2
-; CHECK-O3-EX-NEXT: # %bb.1: # %taken
-; CHECK-O3-EX-NEXT: movb $1, %al
-; CHECK-O3-EX-NEXT: retq
-; CHECK-O3-EX-NEXT: .LBB116_2: # %untaken
-; CHECK-O3-EX-NEXT: xorl %eax, %eax
-; CHECK-O3-EX-NEXT: retq
+; CHECK-O3-LABEL: fold_cmp_over_fence:
+; CHECK-O3: # %bb.0:
+; CHECK-O3-NEXT: movl (%rdi), %eax
+; CHECK-O3-NEXT: mfence
+; CHECK-O3-NEXT: cmpl %eax, %esi
+; CHECK-O3-NEXT: jne .LBB116_2
+; CHECK-O3-NEXT: # %bb.1: # %taken
+; CHECK-O3-NEXT: movb $1, %al
+; CHECK-O3-NEXT: retq
+; CHECK-O3-NEXT: .LBB116_2: # %untaken
+; CHECK-O3-NEXT: xorl %eax, %eax
+; CHECK-O3-NEXT: retq
%v2 = load atomic i32, ptr %p unordered, align 4
fence seq_cst
%cmp = icmp eq i32 %v1, %v2
More information about the llvm-commits
mailing list