[llvm] 65cd2c7 - Revert "[CodeGen] Add support for multiple memory operands in MachineInstr::mayAlias"
Jean-Michel Gorius via llvm-commits
llvm-commits at lists.llvm.org
Fri May 22 12:30:22 PDT 2020
Author: Jean-Michel Gorius
Date: 2020-05-22T21:26:46+02:00
New Revision: 65cd2c7a8015577fea15c861f41d2e4b5768961f
URL: https://github.com/llvm/llvm-project/commit/65cd2c7a8015577fea15c861f41d2e4b5768961f
DIFF: https://github.com/llvm/llvm-project/commit/65cd2c7a8015577fea15c861f41d2e4b5768961f.diff
LOG: Revert "[CodeGen] Add support for multiple memory operands in MachineInstr::mayAlias"
This temporarily reverts commit 7019cea26dfef5882c96f278c32d0f9c49a5e516.
It seems that, for some targets, there are instructions with a lot of memory operands (probably more than would be expected). This causes a lot of buildbots to timeout and notify failed builds. While investigations are ongoing to find out why this happens, revert the changes.
Added:
Modified:
llvm/lib/CodeGen/MachineInstr.cpp
llvm/lib/CodeGen/ScheduleDAGInstrs.cpp
llvm/test/CodeGen/AArch64/merge-store-dependency.ll
llvm/test/CodeGen/ARM/big-endian-neon-fp16-bitconv.ll
llvm/test/CodeGen/ARM/cortex-a57-misched-vldm-wrback.ll
llvm/test/CodeGen/ARM/cortex-a57-misched-vstm-wrback.ll
llvm/test/CodeGen/ARM/cortex-a57-misched-vstm.ll
llvm/test/CodeGen/Thumb2/mve-float32regloops.ll
llvm/test/CodeGen/Thumb2/mve-phireg.ll
llvm/test/CodeGen/Thumb2/mve-vst3.ll
llvm/test/CodeGen/Thumb2/umulo-128-legalisation-lowering.ll
llvm/test/CodeGen/X86/store_op_load_fold2.ll
Removed:
llvm/test/CodeGen/X86/instr-sched-multiple-memops.mir
################################################################################
diff --git a/llvm/lib/CodeGen/MachineInstr.cpp b/llvm/lib/CodeGen/MachineInstr.cpp
index 67c243897dbc..7afa61f2c4db 100644
--- a/llvm/lib/CodeGen/MachineInstr.cpp
+++ b/llvm/lib/CodeGen/MachineInstr.cpp
@@ -1228,88 +1228,81 @@ bool MachineInstr::mayAlias(AAResults *AA, const MachineInstr &Other,
if (TII->areMemAccessesTriviallyDisjoint(*this, Other))
return false;
- if (memoperands_empty() || Other.memoperands_empty())
+ // FIXME: Need to handle multiple memory operands to support all targets.
+ if (!hasOneMemOperand() || !Other.hasOneMemOperand())
return true;
- auto HasAlias = [&](const MachineMemOperand &MMOa,
- const MachineMemOperand &MMOb) {
- // The following interface to AA is fashioned after DAGCombiner::isAlias
- // and operates with MachineMemOperand offset with some important
- // assumptions:
- // - LLVM fundamentally assumes flat address spaces.
- // - MachineOperand offset can *only* result from legalization and
- // cannot affect queries other than the trivial case of overlap
- // checking.
- // - These offsets never wrap and never step outside
- // of allocated objects.
- // - There should never be any negative offsets here.
- //
- // FIXME: Modify API to hide this math from "user"
- // Even before we go to AA we can reason locally about some
- // memory objects. It can save compile time, and possibly catch some
- // corner cases not currently covered.
-
- int64_t OffsetA = MMOa.getOffset();
- int64_t OffsetB = MMOb.getOffset();
- int64_t MinOffset = std::min(OffsetA, OffsetB);
-
- uint64_t WidthA = MMOa.getSize();
- uint64_t WidthB = MMOb.getSize();
- bool KnownWidthA = WidthA != MemoryLocation::UnknownSize;
- bool KnownWidthB = WidthB != MemoryLocation::UnknownSize;
-
- const Value *ValA = MMOa.getValue();
- const Value *ValB = MMOb.getValue();
- bool SameVal = (ValA && ValB && (ValA == ValB));
- if (!SameVal) {
- const PseudoSourceValue *PSVa = MMOa.getPseudoValue();
- const PseudoSourceValue *PSVb = MMOb.getPseudoValue();
- if (PSVa && ValB && !PSVa->mayAlias(&MFI))
- return false;
- if (PSVb && ValA && !PSVb->mayAlias(&MFI))
- return false;
- if (PSVa && PSVb && (PSVa == PSVb))
- SameVal = true;
- }
-
- if (SameVal) {
- if (!KnownWidthA || !KnownWidthB)
- return true;
- int64_t MaxOffset = std::max(OffsetA, OffsetB);
- int64_t LowWidth = (MinOffset == OffsetA) ? WidthA : WidthB;
- return (MinOffset + LowWidth > MaxOffset);
- }
+ MachineMemOperand *MMOa = *memoperands_begin();
+ MachineMemOperand *MMOb = *Other.memoperands_begin();
+
+ // The following interface to AA is fashioned after DAGCombiner::isAlias
+ // and operates with MachineMemOperand offset with some important
+ // assumptions:
+ // - LLVM fundamentally assumes flat address spaces.
+ // - MachineOperand offset can *only* result from legalization and
+ // cannot affect queries other than the trivial case of overlap
+ // checking.
+ // - These offsets never wrap and never step outside
+ // of allocated objects.
+ // - There should never be any negative offsets here.
+ //
+ // FIXME: Modify API to hide this math from "user"
+ // Even before we go to AA we can reason locally about some
+ // memory objects. It can save compile time, and possibly catch some
+ // corner cases not currently covered.
+
+ int64_t OffsetA = MMOa->getOffset();
+ int64_t OffsetB = MMOb->getOffset();
+ int64_t MinOffset = std::min(OffsetA, OffsetB);
+
+ uint64_t WidthA = MMOa->getSize();
+ uint64_t WidthB = MMOb->getSize();
+ bool KnownWidthA = WidthA != MemoryLocation::UnknownSize;
+ bool KnownWidthB = WidthB != MemoryLocation::UnknownSize;
+
+ const Value *ValA = MMOa->getValue();
+ const Value *ValB = MMOb->getValue();
+ bool SameVal = (ValA && ValB && (ValA == ValB));
+ if (!SameVal) {
+ const PseudoSourceValue *PSVa = MMOa->getPseudoValue();
+ const PseudoSourceValue *PSVb = MMOb->getPseudoValue();
+ if (PSVa && ValB && !PSVa->mayAlias(&MFI))
+ return false;
+ if (PSVb && ValA && !PSVb->mayAlias(&MFI))
+ return false;
+ if (PSVa && PSVb && (PSVa == PSVb))
+ SameVal = true;
+ }
- if (!AA)
+ if (SameVal) {
+ if (!KnownWidthA || !KnownWidthB)
return true;
+ int64_t MaxOffset = std::max(OffsetA, OffsetB);
+ int64_t LowWidth = (MinOffset == OffsetA) ? WidthA : WidthB;
+ return (MinOffset + LowWidth > MaxOffset);
+ }
- if (!ValA || !ValB)
- return true;
+ if (!AA)
+ return true;
- assert((OffsetA >= 0) && "Negative MachineMemOperand offset");
- assert((OffsetB >= 0) && "Negative MachineMemOperand offset");
+ if (!ValA || !ValB)
+ return true;
- int64_t OverlapA = KnownWidthA ? WidthA + OffsetA - MinOffset
- : MemoryLocation::UnknownSize;
- int64_t OverlapB = KnownWidthB ? WidthB + OffsetB - MinOffset
- : MemoryLocation::UnknownSize;
+ assert((OffsetA >= 0) && "Negative MachineMemOperand offset");
+ assert((OffsetB >= 0) && "Negative MachineMemOperand offset");
- AliasResult AAResult =
- AA->alias(MemoryLocation(ValA, OverlapA,
- UseTBAA ? MMOa.getAAInfo() : AAMDNodes()),
- MemoryLocation(ValB, OverlapB,
- UseTBAA ? MMOb.getAAInfo() : AAMDNodes()));
+ int64_t OverlapA = KnownWidthA ? WidthA + OffsetA - MinOffset
+ : MemoryLocation::UnknownSize;
+ int64_t OverlapB = KnownWidthB ? WidthB + OffsetB - MinOffset
+ : MemoryLocation::UnknownSize;
- return (AAResult != NoAlias);
- };
+ AliasResult AAResult = AA->alias(
+ MemoryLocation(ValA, OverlapA,
+ UseTBAA ? MMOa->getAAInfo() : AAMDNodes()),
+ MemoryLocation(ValB, OverlapB,
+ UseTBAA ? MMOb->getAAInfo() : AAMDNodes()));
- for (auto &&MMOa : memoperands()) {
- for (auto &&MMOb : Other.memoperands()) {
- if (HasAlias(*MMOa, *MMOb))
- return true;
- }
- }
- return false;
+ return (AAResult != NoAlias);
}
/// hasOrderedMemoryRef - Return true if this instruction may have an ordered
diff --git a/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp b/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp
index fb3c9a781e86..edc5c4bdda3d 100644
--- a/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp
+++ b/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp
@@ -544,14 +544,9 @@ static inline bool isGlobalMemoryObject(AAResults *AA, MachineInstr *MI) {
void ScheduleDAGInstrs::addChainDependency (SUnit *SUa, SUnit *SUb,
unsigned Latency) {
if (SUa->getInstr()->mayAlias(AAForDep, *SUb->getInstr(), UseTBAA)) {
- LLVM_DEBUG(dbgs() << "Adding chain dependency\n from: " << *SUb->getInstr()
- << " to: " << *SUa->getInstr());
SDep Dep(SUa, SDep::MayAliasMem);
Dep.setLatency(Latency);
SUb->addPred(Dep);
- } else {
- LLVM_DEBUG(dbgs() << "Not adding chain dependency\n from: "
- << *SUb->getInstr() << " to: " << *SUa->getInstr());
}
}
diff --git a/llvm/test/CodeGen/AArch64/merge-store-dependency.ll b/llvm/test/CodeGen/AArch64/merge-store-dependency.ll
index d4196294758d..5613db1e5214 100644
--- a/llvm/test/CodeGen/AArch64/merge-store-dependency.ll
+++ b/llvm/test/CodeGen/AArch64/merge-store-dependency.ll
@@ -19,11 +19,11 @@ define void @test(%struct1* %fde, i32 %fd, void (i32, i32, i8*)* %func, i8* %arg
; A53-NEXT: mov x19, x8
; A53-NEXT: mov w0, w1
; A53-NEXT: mov w9, #256
-; A53-NEXT: stp x2, x3, [x8, #32]
-; A53-NEXT: mov x2, x8
; A53-NEXT: str q0, [x19, #16]!
; A53-NEXT: str w1, [x19]
; A53-NEXT: mov w1, #4
+; A53-NEXT: stp x2, x3, [x8, #32]
+; A53-NEXT: mov x2, x8
; A53-NEXT: str q0, [x8]
; A53-NEXT: strh w9, [x8, #24]
; A53-NEXT: str wzr, [x8, #20]
diff --git a/llvm/test/CodeGen/ARM/big-endian-neon-fp16-bitconv.ll b/llvm/test/CodeGen/ARM/big-endian-neon-fp16-bitconv.ll
index 693f33553591..9942d6df99a4 100644
--- a/llvm/test/CodeGen/ARM/big-endian-neon-fp16-bitconv.ll
+++ b/llvm/test/CodeGen/ARM/big-endian-neon-fp16-bitconv.ll
@@ -503,12 +503,12 @@ define void @conv_v8f16_to_i128( <8 x half> %a, i128* %store ) {
; CHECK-NEXT: vmov.32 r3, d16[1]
; CHECK-NEXT: vmov.32 r1, d16[0]
; CHECK-NEXT: subs r12, r12, #1
-; CHECK-NEXT: str r12, [r0, #12]
; CHECK-NEXT: sbcs r2, r2, #0
-; CHECK-NEXT: str r2, [r0, #8]
; CHECK-NEXT: sbcs r3, r3, #0
; CHECK-NEXT: sbc r1, r1, #0
; CHECK-NEXT: stm r0, {r1, r3}
+; CHECK-NEXT: str r2, [r0, #8]
+; CHECK-NEXT: str r12, [r0, #12]
; CHECK-NEXT: bx lr
; CHECK-NEXT: .p2align 4
; CHECK-NEXT: @ %bb.1:
diff --git a/llvm/test/CodeGen/ARM/cortex-a57-misched-vldm-wrback.ll b/llvm/test/CodeGen/ARM/cortex-a57-misched-vldm-wrback.ll
index 1a2ad4d9e848..88b772cc294e 100644
--- a/llvm/test/CodeGen/ARM/cortex-a57-misched-vldm-wrback.ll
+++ b/llvm/test/CodeGen/ARM/cortex-a57-misched-vldm-wrback.ll
@@ -9,7 +9,7 @@
; CHECK: ********** MI Scheduling **********
; We need second, post-ra scheduling to have VLDM instruction combined from single-loads
; CHECK: ********** MI Scheduling **********
-; CHECK: SU(1):{{.*}}VLDMDIA_UPD
+; CHECK: VLDMDIA_UPD
; CHECK: rdefs left
; CHECK-NEXT: Latency : 6
; CHECK: Successors:
diff --git a/llvm/test/CodeGen/ARM/cortex-a57-misched-vstm-wrback.ll b/llvm/test/CodeGen/ARM/cortex-a57-misched-vstm-wrback.ll
index 3007630bef63..c517f46e5614 100644
--- a/llvm/test/CodeGen/ARM/cortex-a57-misched-vstm-wrback.ll
+++ b/llvm/test/CodeGen/ARM/cortex-a57-misched-vstm-wrback.ll
@@ -5,7 +5,7 @@
; We need second, post-ra scheduling to have VSTM instruction combined from single-stores
; CHECK: ********** MI Scheduling **********
; CHECK: schedule starting
-; CHECK: SU(2):{{.*}}VSTMDIA_UPD
+; CHECK: VSTMDIA_UPD
; CHECK: rdefs left
; CHECK-NEXT: Latency : 4
; CHECK: Successors:
diff --git a/llvm/test/CodeGen/ARM/cortex-a57-misched-vstm.ll b/llvm/test/CodeGen/ARM/cortex-a57-misched-vstm.ll
index f88bb473dc9a..5e9041ce0842 100644
--- a/llvm/test/CodeGen/ARM/cortex-a57-misched-vstm.ll
+++ b/llvm/test/CodeGen/ARM/cortex-a57-misched-vstm.ll
@@ -5,7 +5,7 @@
; We need second, post-ra scheduling to have VSTM instruction combined from single-stores
; CHECK: ********** MI Scheduling **********
; CHECK: schedule starting
-; CHECK: SU(3):{{.*}}VSTMDIA
+; CHECK: VSTMDIA
; CHECK: rdefs left
; CHECK-NEXT: Latency : 2
diff --git a/llvm/test/CodeGen/Thumb2/mve-float32regloops.ll b/llvm/test/CodeGen/Thumb2/mve-float32regloops.ll
index 02bd955c2336..111a5871a17b 100644
--- a/llvm/test/CodeGen/Thumb2/mve-float32regloops.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-float32regloops.ll
@@ -1092,7 +1092,6 @@ define void @fir(%struct.arm_fir_instance_f32* nocapture readonly %S, float* noc
; CHECK-NEXT: ldrd lr, r10, [r12, #24]
; CHECK-NEXT: vstrb.8 q0, [r11], #16
; CHECK-NEXT: vldrw.u32 q0, [r8], #32
-; CHECK-NEXT: strd r11, r1, [sp, #24] @ 8-byte Folded Spill
; CHECK-NEXT: vldrw.u32 q1, [r8, #-28]
; CHECK-NEXT: vmul.f32 q0, q0, r0
; CHECK-NEXT: vldrw.u32 q6, [r8, #-24]
@@ -1104,12 +1103,13 @@ define void @fir(%struct.arm_fir_instance_f32* nocapture readonly %S, float* noc
; CHECK-NEXT: vfma.f32 q0, q4, r6
; CHECK-NEXT: vldrw.u32 q3, [r8, #-8]
; CHECK-NEXT: vfma.f32 q0, q5, r5
-; CHECK-NEXT: ldr r0, [sp, #16] @ 4-byte Reload
-; CHECK-NEXT: vfma.f32 q0, q2, r3
; CHECK-NEXT: vldrw.u32 q1, [r8, #-4]
+; CHECK-NEXT: vfma.f32 q0, q2, r3
+; CHECK-NEXT: ldr r0, [sp, #16] @ 4-byte Reload
; CHECK-NEXT: vfma.f32 q0, q3, lr
-; CHECK-NEXT: cmp r0, #16
+; CHECK-NEXT: strd r11, r1, [sp, #24] @ 8-byte Folded Spill
; CHECK-NEXT: vfma.f32 q0, q1, r10
+; CHECK-NEXT: cmp r0, #16
; CHECK-NEXT: blo .LBB16_7
; CHECK-NEXT: @ %bb.5: @ %for.body.preheader
; CHECK-NEXT: @ in Loop: Header=BB16_4 Depth=1
diff --git a/llvm/test/CodeGen/Thumb2/mve-phireg.ll b/llvm/test/CodeGen/Thumb2/mve-phireg.ll
index 0fe26fbc4753..e7d6a7323bc1 100644
--- a/llvm/test/CodeGen/Thumb2/mve-phireg.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-phireg.ll
@@ -168,14 +168,16 @@ define dso_local i32 @e() #0 {
; CHECK-NEXT: vmov q1, q4
; CHECK-NEXT: vmov s1, r7
; CHECK-NEXT: vmov.32 q1[1], r6
-; CHECK-NEXT: vmov.32 q5[0], r7
+; CHECK-NEXT: mov.w r10, #0
; CHECK-NEXT: vmov.32 q1[2], r5
-; CHECK-NEXT: vmov s9, r4
+; CHECK-NEXT: vmov.32 q5[0], r7
; CHECK-NEXT: vmov.32 q1[3], r4
-; CHECK-NEXT: vdup.32 q6, r7
+; CHECK-NEXT: strd r0, r10, [sp, #24]
; CHECK-NEXT: vstrw.32 q1, [sp, #76]
; CHECK-NEXT: vmov q1, q5
+; CHECK-NEXT: vmov s9, r4
; CHECK-NEXT: vmov.32 q1[1], r7
+; CHECK-NEXT: vdup.32 q6, r7
; CHECK-NEXT: vmov.f32 s2, s1
; CHECK-NEXT: vmov.f32 s8, s0
; CHECK-NEXT: vmov.32 q1[2], r6
@@ -183,7 +185,6 @@ define dso_local i32 @e() #0 {
; CHECK-NEXT: vmov q7, q6
; CHECK-NEXT: vmov.f32 s10, s1
; CHECK-NEXT: mov.w r8, #4
-; CHECK-NEXT: mov.w r10, #0
; CHECK-NEXT: vmov.32 q1[3], r4
; CHECK-NEXT: vmov.32 q3[0], r4
; CHECK-NEXT: vmov.32 q7[1], r4
@@ -191,7 +192,6 @@ define dso_local i32 @e() #0 {
; CHECK-NEXT: vmov.f32 s11, s3
; CHECK-NEXT: movs r1, #64
; CHECK-NEXT: strh.w r8, [sp, #390]
-; CHECK-NEXT: strd r0, r10, [sp, #24]
; CHECK-NEXT: vstrw.32 q0, [sp, #44]
; CHECK-NEXT: str r0, [r0]
; CHECK-NEXT: vstrw.32 q2, [r0]
diff --git a/llvm/test/CodeGen/Thumb2/mve-vst3.ll b/llvm/test/CodeGen/Thumb2/mve-vst3.ll
index 1f3502987d2d..52de7a45e85b 100644
--- a/llvm/test/CodeGen/Thumb2/mve-vst3.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-vst3.ll
@@ -24,8 +24,8 @@ define void @vst3_v2i32(<2 x i32> *%src, <6 x i32> *%dst) {
; CHECK-NEXT: vmov.f32 s9, s6
; CHECK-NEXT: vmov.f32 s10, s0
; CHECK-NEXT: vmov.f32 s11, s5
-; CHECK-NEXT: vstrw.32 q2, [r1]
; CHECK-NEXT: strd r2, r0, [r1, #16]
+; CHECK-NEXT: vstrw.32 q2, [r1]
; CHECK-NEXT: pop {r4, pc}
entry:
%s1 = getelementptr <2 x i32>, <2 x i32>* %src, i32 0
diff --git a/llvm/test/CodeGen/Thumb2/umulo-128-legalisation-lowering.ll b/llvm/test/CodeGen/Thumb2/umulo-128-legalisation-lowering.ll
index f57c9226179b..ac1c814b838e 100644
--- a/llvm/test/CodeGen/Thumb2/umulo-128-legalisation-lowering.ll
+++ b/llvm/test/CodeGen/Thumb2/umulo-128-legalisation-lowering.ll
@@ -8,17 +8,17 @@ define { i128, i8 } @muloti_test(i128 %l, i128 %r) unnamed_addr #0 {
; THUMBV7-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, r11, lr}
; THUMBV7-NEXT: .pad #44
; THUMBV7-NEXT: sub sp, #44
-; THUMBV7-NEXT: str r0, [sp, #40] @ 4-byte Spill
-; THUMBV7-NEXT: movs r0, #0
; THUMBV7-NEXT: ldrd r4, r7, [sp, #88]
; THUMBV7-NEXT: mov r5, r3
-; THUMBV7-NEXT: strd r0, r0, [sp, #8]
+; THUMBV7-NEXT: str r0, [sp, #40] @ 4-byte Spill
+; THUMBV7-NEXT: movs r0, #0
+; THUMBV7-NEXT: strd r4, r7, [sp]
; THUMBV7-NEXT: mov r1, r3
+; THUMBV7-NEXT: strd r0, r0, [sp, #8]
; THUMBV7-NEXT: mov r6, r2
; THUMBV7-NEXT: mov r0, r2
; THUMBV7-NEXT: movs r2, #0
; THUMBV7-NEXT: movs r3, #0
-; THUMBV7-NEXT: strd r4, r7, [sp]
; THUMBV7-NEXT: bl __multi3
; THUMBV7-NEXT: strd r1, r0, [sp, #32] @ 8-byte Folded Spill
; THUMBV7-NEXT: strd r3, r2, [sp, #24] @ 8-byte Folded Spill
diff --git a/llvm/test/CodeGen/X86/instr-sched-multiple-memops.mir b/llvm/test/CodeGen/X86/instr-sched-multiple-memops.mir
deleted file mode 100644
index 0259f4261c38..000000000000
--- a/llvm/test/CodeGen/X86/instr-sched-multiple-memops.mir
+++ /dev/null
@@ -1,144 +0,0 @@
-# RUN: llc -mtriple=i686-- -o - -run-pass=machine-scheduler -debug %s 2>&1 | FileCheck %s
-# REQUIRES: asserts
-
---- |
- %struct.Macroblock.0.1.2.3.6.17 = type { i32, i32, i32, i32, i32, [8 x i32], %struct.Macroblock.0.1.2.3.6.17*, %struct.Macroblock.0.1.2.3.6.17*, i32, [2 x [4 x [4 x [2 x i32]]]], [16 x i8], [16 x i8], i32, i64, [4 x i32], [4 x i32], i64, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i16, double, i32, i32, i32, i32, i32, i32, i32, i32, i32 }
-
- define void @stepsystem(i32 %x) {
- entry:
- %0 = load i32, i32* undef, align 8
- %inc = add i32 %x, 1
- store i32 %inc, i32* undef, align 8
- store <2 x double> <double 0xD47D42AEA2879F2E, double 0xD47D42AEA2879F2E>, <2 x double>* undef, align 8
- ret void
- }
-
- define void @dct_chroma() {
- cond_true2732.preheader:
- %tmp2666 = getelementptr %struct.Macroblock.0.1.2.3.6.17, %struct.Macroblock.0.1.2.3.6.17* null, i32 0, i32 13
- %tmp2667.us.us = load i64, i64* %tmp2666, align 4
- %tmp2670.us.us = load i64, i64* null, align 4
- %tmp2675.us.us = shl i64 %tmp2670.us.us, 0
- %tmp2675not.us.us = xor i64 %tmp2675.us.us, -1
- %tmp2676.us.us = and i64 %tmp2667.us.us, %tmp2675not.us.us
- store i64 %tmp2676.us.us, i64* %tmp2666, align 4
- ret void
- }
-
-...
----
-name: stepsystem
-alignment: 16
-exposesReturnsTwice: false
-legalized: false
-regBankSelected: false
-selected: false
-failedISel: false
-tracksRegLiveness: true
-hasWinCFI: false
-registers:
- - { id: 0, class: gr32, preferred-register: '' }
- - { id: 1, class: gr32, preferred-register: '' }
- - { id: 2, class: gr32, preferred-register: '' }
- - { id: 3, class: gr32, preferred-register: '' }
- - { id: 4, class: gr32, preferred-register: '' }
-liveins: []
-frameInfo:
- isFrameAddressTaken: false
- isReturnAddressTaken: false
- hasStackMap: false
- hasPatchPoint: false
- stackSize: 0
- offsetAdjustment: 0
- maxAlignment: 4
- adjustsStack: false
- hasCalls: false
- stackProtector: ''
- maxCallFrameSize: 4294967295
- cvBytesOfCalleeSavedRegisters: 0
- hasOpaqueSPAdjustment: false
- hasVAStart: false
- hasMustTailInVarArgFunc: false
- localFrameSize: 0
- savePoint: ''
- restorePoint: ''
-fixedStack:
- - { id: 0, type: default, offset: 0, size: 4, alignment: 4, stack-id: default,
- isImmutable: true, isAliased: false, callee-saved-register: '', callee-saved-restored: true,
- debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
-stack: []
-callSites: []
-constants: []
-machineFunctionInfo: {}
-body: |
- bb.0.entry:
- %1:gr32 = MOV32rm %fixed-stack.0, 1, $noreg, 0, $noreg :: (load 4 from %fixed-stack.0)
- %1:gr32 = INC32r %1, implicit-def dead $eflags
- MOV32mr undef %2:gr32, 1, $noreg, 0, $noreg, %1 :: (store 4 into `i32* undef`, align 8)
- MOV32mi undef %3:gr32, 1, $noreg, 0, $noreg, -729988434 :: (store 4 into `<2 x double>* undef` + 12)
- MOV32mi undef %4:gr32, 1, $noreg, 0, $noreg, -1568170194 :: (store 4 into `<2 x double>* undef` + 8, align 8)
- RET 0
-
-# CHECK-LABEL: stepsystem
-# CHECK: Not adding chain dependency{{[[:space:]]*}}from: MOV32mi {{.*}} :: (store 4 {{.*}}){{[[:space:]]*}}to: MOV32mi {{.*}} :: (store 4 {{.*}})
-# CHECK: Adding chain dependency{{[[:space:]]*}}from: MOV32mi {{.*}} :: (store 4 {{.*}}){{[[:space:]]*}}to: MOV32mr {{.*}} :: (store 4 {{.*}})
-...
----
-name: dct_chroma
-alignment: 16
-exposesReturnsTwice: false
-legalized: false
-regBankSelected: false
-selected: false
-failedISel: false
-tracksRegLiveness: true
-hasWinCFI: false
-registers:
- - { id: 0, class: gr32, preferred-register: '' }
- - { id: 1, class: gr32, preferred-register: '' }
- - { id: 2, class: gr32, preferred-register: '' }
- - { id: 3, class: gr32, preferred-register: '' }
- - { id: 4, class: gr32, preferred-register: '' }
-liveins: []
-frameInfo:
- isFrameAddressTaken: false
- isReturnAddressTaken: false
- hasStackMap: false
- hasPatchPoint: false
- stackSize: 0
- offsetAdjustment: 0
- maxAlignment: 1
- adjustsStack: false
- hasCalls: false
- stackProtector: ''
- maxCallFrameSize: 4294967295
- cvBytesOfCalleeSavedRegisters: 0
- hasOpaqueSPAdjustment: false
- hasVAStart: false
- hasMustTailInVarArgFunc: false
- localFrameSize: 0
- savePoint: ''
- restorePoint: ''
-fixedStack: []
-stack: []
-callSites: []
-constants: []
-machineFunctionInfo: {}
-body: |
- bb.0.cond_true2732.preheader:
- %4:gr32 = MOV32rm $noreg, 1, $noreg, 0, $noreg :: (load 4 from `i64* null`)
- %2:gr32 = MOV32rm $noreg, 1, $noreg, 4, $noreg :: (load 4 from `i64* null` + 4)
- %2:gr32 = NOT32r %2
- %4:gr32 = NOT32r %4
- %4:gr32 = AND32rm %4, $noreg, 1, $noreg, 356, $noreg, implicit-def dead $eflags :: (load 4 from %ir.tmp2666)
- AND32mr $noreg, 1, $noreg, 360, $noreg, %2, implicit-def dead $eflags :: (store 4 into %ir.tmp2666 + 4), (load 4 from %ir.tmp2666 + 4)
- MOV32mr $noreg, 1, $noreg, 356, $noreg, %4 :: (store 4 into %ir.tmp2666)
- RET 0
-
-# Chain dependencies should not be systematically added when at least one of
-# the instructions has more than one memory operand. It should only be added
-# where it would be needed.
-# CHECK-LABEL: dct_chroma
-# CHECK: Not adding chain dependency{{[[:space:]]*}}from: MOV32mr {{.*}} :: (store 4 {{.*}}){{[[:space:]]*}}to: AND32mr {{.*}} :: (store 4 {{.*}}), (load 4 {{.*}})
-# CHECK: Adding chain dependency{{[[:space:]]*}}from: AND32mr {{.*}} :: (store 4 {{.*}}), (load 4 {{.*}}){{[[:space:]]*}}to: %{{.*}} = MOV32rm {{.*}} :: (load 4 {{.*}})
-
diff --git a/llvm/test/CodeGen/X86/store_op_load_fold2.ll b/llvm/test/CodeGen/X86/store_op_load_fold2.ll
index 00db0797f097..674b8d8f9384 100644
--- a/llvm/test/CodeGen/X86/store_op_load_fold2.ll
+++ b/llvm/test/CodeGen/X86/store_op_load_fold2.ll
@@ -17,12 +17,13 @@ cond_true2732.preheader: ; preds = %entry
store i64 %tmp2676.us.us, i64* %tmp2666
ret i32 0
-; INTEL: and dword ptr [360], {{e..}}
; INTEL: and {{e..}}, dword ptr [356]
-; INTEL: mov dword ptr [356], {{e..}}
+; INTEL: and dword ptr [360], {{e..}}
+; FIXME: mov dword ptr [356], {{e..}}
+; The above line comes out as 'mov 360, eax', but when the register is ecx it works?
-; ATT: andl %{{e..}}, 360
; ATT: andl 356, %{{e..}}
+; ATT: andl %{{e..}}, 360
; ATT: movl %{{e..}}, 356
}
More information about the llvm-commits
mailing list