[llvm] 4b11201 - [MachineInstr] Add support for instructions with multiple memory operands.
Michael Liao via llvm-commits
llvm-commits at lists.llvm.org
Tue Nov 3 17:49:15 PST 2020
Author: Michael Liao
Date: 2020-11-03T20:44:40-05:00
New Revision: 4b1120159274ca98b4a58064b116aba88372ac98
URL: https://github.com/llvm/llvm-project/commit/4b1120159274ca98b4a58064b116aba88372ac98
DIFF: https://github.com/llvm/llvm-project/commit/4b1120159274ca98b4a58064b116aba88372ac98.diff
LOG: [MachineInstr] Add support for instructions with multiple memory operands.
- Basically iterate each pair of memory operands from both instructions
and return true if any of them may alias.
- The exception are memory instructions without any memory operand. They
may touch everything and could alias to any memory instruction.
Differential Revision: https://reviews.llvm.org/D89447
Added:
Modified:
llvm/include/llvm/CodeGen/TargetInstrInfo.h
llvm/lib/CodeGen/MachineInstr.cpp
llvm/test/CodeGen/AArch64/merge-store-dependency.ll
llvm/test/CodeGen/ARM/big-endian-neon-fp16-bitconv.ll
llvm/test/CodeGen/Thumb2/mve-float32regloops.ll
llvm/test/CodeGen/Thumb2/mve-phireg.ll
llvm/test/CodeGen/Thumb2/mve-vst3.ll
llvm/test/CodeGen/Thumb2/umulo-128-legalisation-lowering.ll
llvm/test/CodeGen/X86/store_op_load_fold2.ll
Removed:
################################################################################
diff --git a/llvm/include/llvm/CodeGen/TargetInstrInfo.h b/llvm/include/llvm/CodeGen/TargetInstrInfo.h
index 5d5a6efab220..68fc129cc0ed 100644
--- a/llvm/include/llvm/CodeGen/TargetInstrInfo.h
+++ b/llvm/include/llvm/CodeGen/TargetInstrInfo.h
@@ -1737,6 +1737,21 @@ class TargetInstrInfo : public MCInstrInfo {
return 5;
}
+ /// Return the maximal number of alias checks on memory operands. For
+ /// instructions with more than one memory operands, the alias check on a
+ /// single MachineInstr pair has quadratic overhead and results in
+ /// unacceptable performance in the worst case. The limit here is to clamp
+ /// that maximal checks performed. Usually, that's the product of memory
+ /// operand numbers from that pair of MachineInstr to be checked. For
+ /// instance, with two MachineInstrs with 4 and 5 memory operands
+ /// correspondingly, a total of 20 checks are required. With this limit set to
+ /// 16, their alias check is skipped. We choose to limit the product instead
+ /// of the individual instruction as targets may have special MachineInstrs
+ /// with a considerably high number of memory operands, such as `ldm` in ARM.
+ /// Setting this limit per MachineInstr would result in either too high
+ /// overhead or too rigid restriction.
+ virtual unsigned getMemOperandAACheckLimit() const { return 16; }
+
/// Return an array that contains the ids of the target indices (used for the
/// TargetIndex machine operand) and their names.
///
diff --git a/llvm/lib/CodeGen/MachineInstr.cpp b/llvm/lib/CodeGen/MachineInstr.cpp
index d45c53b81816..fd658cdb41b9 100644
--- a/llvm/lib/CodeGen/MachineInstr.cpp
+++ b/llvm/lib/CodeGen/MachineInstr.cpp
@@ -1276,81 +1276,96 @@ bool MachineInstr::mayAlias(AAResults *AA, const MachineInstr &Other,
if (TII->areMemAccessesTriviallyDisjoint(*this, Other))
return false;
- // FIXME: Need to handle multiple memory operands to support all targets.
- if (!hasOneMemOperand() || !Other.hasOneMemOperand())
+ // Memory operations without memory operands may access anything. Be
+ // conservative and assume `MayAlias`.
+ if (memoperands_empty() || Other.memoperands_empty())
return true;
- MachineMemOperand *MMOa = *memoperands_begin();
- MachineMemOperand *MMOb = *Other.memoperands_begin();
-
- // The following interface to AA is fashioned after DAGCombiner::isAlias
- // and operates with MachineMemOperand offset with some important
- // assumptions:
- // - LLVM fundamentally assumes flat address spaces.
- // - MachineOperand offset can *only* result from legalization and
- // cannot affect queries other than the trivial case of overlap
- // checking.
- // - These offsets never wrap and never step outside
- // of allocated objects.
- // - There should never be any negative offsets here.
- //
- // FIXME: Modify API to hide this math from "user"
- // Even before we go to AA we can reason locally about some
- // memory objects. It can save compile time, and possibly catch some
- // corner cases not currently covered.
-
- int64_t OffsetA = MMOa->getOffset();
- int64_t OffsetB = MMOb->getOffset();
- int64_t MinOffset = std::min(OffsetA, OffsetB);
-
- uint64_t WidthA = MMOa->getSize();
- uint64_t WidthB = MMOb->getSize();
- bool KnownWidthA = WidthA != MemoryLocation::UnknownSize;
- bool KnownWidthB = WidthB != MemoryLocation::UnknownSize;
-
- const Value *ValA = MMOa->getValue();
- const Value *ValB = MMOb->getValue();
- bool SameVal = (ValA && ValB && (ValA == ValB));
- if (!SameVal) {
- const PseudoSourceValue *PSVa = MMOa->getPseudoValue();
- const PseudoSourceValue *PSVb = MMOb->getPseudoValue();
- if (PSVa && ValB && !PSVa->mayAlias(&MFI))
- return false;
- if (PSVb && ValA && !PSVb->mayAlias(&MFI))
- return false;
- if (PSVa && PSVb && (PSVa == PSVb))
- SameVal = true;
- }
+ // Skip if there are too many memory operands.
+ auto NumChecks = getNumMemOperands() * Other.getNumMemOperands();
+ if (NumChecks > TII->getMemOperandAACheckLimit())
+ return true;
+
+ auto HasAlias = [MFI, AA, UseTBAA](const MachineMemOperand *MMOa,
+ const MachineMemOperand *MMOb) {
+ // The following interface to AA is fashioned after DAGCombiner::isAlias
+ // and operates with MachineMemOperand offset with some important
+ // assumptions:
+ // - LLVM fundamentally assumes flat address spaces.
+ // - MachineOperand offset can *only* result from legalization and
+ // cannot affect queries other than the trivial case of overlap
+ // checking.
+ // - These offsets never wrap and never step outside
+ // of allocated objects.
+ // - There should never be any negative offsets here.
+ //
+ // FIXME: Modify API to hide this math from "user"
+ // Even before we go to AA we can reason locally about some
+ // memory objects. It can save compile time, and possibly catch some
+ // corner cases not currently covered.
+
+ int64_t OffsetA = MMOa->getOffset();
+ int64_t OffsetB = MMOb->getOffset();
+ int64_t MinOffset = std::min(OffsetA, OffsetB);
+
+ uint64_t WidthA = MMOa->getSize();
+ uint64_t WidthB = MMOb->getSize();
+ bool KnownWidthA = WidthA != MemoryLocation::UnknownSize;
+ bool KnownWidthB = WidthB != MemoryLocation::UnknownSize;
+
+ const Value *ValA = MMOa->getValue();
+ const Value *ValB = MMOb->getValue();
+ bool SameVal = (ValA && ValB && (ValA == ValB));
+ if (!SameVal) {
+ const PseudoSourceValue *PSVa = MMOa->getPseudoValue();
+ const PseudoSourceValue *PSVb = MMOb->getPseudoValue();
+ if (PSVa && ValB && !PSVa->mayAlias(&MFI))
+ return false;
+ if (PSVb && ValA && !PSVb->mayAlias(&MFI))
+ return false;
+ if (PSVa && PSVb && (PSVa == PSVb))
+ SameVal = true;
+ }
+
+ if (SameVal) {
+ if (!KnownWidthA || !KnownWidthB)
+ return true;
+ int64_t MaxOffset = std::max(OffsetA, OffsetB);
+ int64_t LowWidth = (MinOffset == OffsetA) ? WidthA : WidthB;
+ return (MinOffset + LowWidth > MaxOffset);
+ }
- if (SameVal) {
- if (!KnownWidthA || !KnownWidthB)
+ if (!AA)
return true;
- int64_t MaxOffset = std::max(OffsetA, OffsetB);
- int64_t LowWidth = (MinOffset == OffsetA) ? WidthA : WidthB;
- return (MinOffset + LowWidth > MaxOffset);
- }
- if (!AA)
- return true;
+ if (!ValA || !ValB)
+ return true;
- if (!ValA || !ValB)
- return true;
+ assert((OffsetA >= 0) && "Negative MachineMemOperand offset");
+ assert((OffsetB >= 0) && "Negative MachineMemOperand offset");
- assert((OffsetA >= 0) && "Negative MachineMemOperand offset");
- assert((OffsetB >= 0) && "Negative MachineMemOperand offset");
+ int64_t OverlapA = KnownWidthA ? WidthA + OffsetA - MinOffset
+ : MemoryLocation::UnknownSize;
+ int64_t OverlapB = KnownWidthB ? WidthB + OffsetB - MinOffset
+ : MemoryLocation::UnknownSize;
- int64_t OverlapA = KnownWidthA ? WidthA + OffsetA - MinOffset
- : MemoryLocation::UnknownSize;
- int64_t OverlapB = KnownWidthB ? WidthB + OffsetB - MinOffset
- : MemoryLocation::UnknownSize;
+ AliasResult AAResult =
+ AA->alias(MemoryLocation(ValA, OverlapA,
+ UseTBAA ? MMOa->getAAInfo() : AAMDNodes()),
+ MemoryLocation(ValB, OverlapB,
+ UseTBAA ? MMOb->getAAInfo() : AAMDNodes()));
- AliasResult AAResult = AA->alias(
- MemoryLocation(ValA, OverlapA,
- UseTBAA ? MMOa->getAAInfo() : AAMDNodes()),
- MemoryLocation(ValB, OverlapB,
- UseTBAA ? MMOb->getAAInfo() : AAMDNodes()));
+ return (AAResult != NoAlias);
+ };
- return (AAResult != NoAlias);
+ // Check each pair of memory operands from both instructions, which can't
+ // alias only if all pairs won't alias.
+ for (auto *MMOa : memoperands())
+ for (auto *MMOb : Other.memoperands())
+ if (HasAlias(MMOa, MMOb))
+ return true;
+
+ return false;
}
/// hasOrderedMemoryRef - Return true if this instruction may have an ordered
diff --git a/llvm/test/CodeGen/AArch64/merge-store-dependency.ll b/llvm/test/CodeGen/AArch64/merge-store-dependency.ll
index 6850846fec06..3a768d0e3f9b 100644
--- a/llvm/test/CodeGen/AArch64/merge-store-dependency.ll
+++ b/llvm/test/CodeGen/AArch64/merge-store-dependency.ll
@@ -19,11 +19,11 @@ define void @test(%struct1* %fde, i32 %fd, void (i32, i32, i8*)* %func, i8* %arg
; A53-NEXT: mov x19, x8
; A53-NEXT: mov w0, w1
; A53-NEXT: mov w9, #256
+; A53-NEXT: stp x2, x3, [x8, #32]
+; A53-NEXT: mov x2, x8
; A53-NEXT: str q0, [x19, #16]!
; A53-NEXT: str w1, [x19]
; A53-NEXT: mov w1, #4
-; A53-NEXT: stp x2, x3, [x8, #32]
-; A53-NEXT: mov x2, x8
; A53-NEXT: str q0, [x8]
; A53-NEXT: strh w9, [x8, #24]
; A53-NEXT: str wzr, [x8, #20]
diff --git a/llvm/test/CodeGen/ARM/big-endian-neon-fp16-bitconv.ll b/llvm/test/CodeGen/ARM/big-endian-neon-fp16-bitconv.ll
index 9942d6df99a4..693f33553591 100644
--- a/llvm/test/CodeGen/ARM/big-endian-neon-fp16-bitconv.ll
+++ b/llvm/test/CodeGen/ARM/big-endian-neon-fp16-bitconv.ll
@@ -503,12 +503,12 @@ define void @conv_v8f16_to_i128( <8 x half> %a, i128* %store ) {
; CHECK-NEXT: vmov.32 r3, d16[1]
; CHECK-NEXT: vmov.32 r1, d16[0]
; CHECK-NEXT: subs r12, r12, #1
+; CHECK-NEXT: str r12, [r0, #12]
; CHECK-NEXT: sbcs r2, r2, #0
+; CHECK-NEXT: str r2, [r0, #8]
; CHECK-NEXT: sbcs r3, r3, #0
; CHECK-NEXT: sbc r1, r1, #0
; CHECK-NEXT: stm r0, {r1, r3}
-; CHECK-NEXT: str r2, [r0, #8]
-; CHECK-NEXT: str r12, [r0, #12]
; CHECK-NEXT: bx lr
; CHECK-NEXT: .p2align 4
; CHECK-NEXT: @ %bb.1:
diff --git a/llvm/test/CodeGen/Thumb2/mve-float32regloops.ll b/llvm/test/CodeGen/Thumb2/mve-float32regloops.ll
index a43f564951e9..4fe8877aa8bd 100644
--- a/llvm/test/CodeGen/Thumb2/mve-float32regloops.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-float32regloops.ll
@@ -1094,6 +1094,7 @@ define void @fir(%struct.arm_fir_instance_f32* nocapture readonly %S, float* noc
; CHECK-NEXT: ldrd r11, r8, [r12, #24]
; CHECK-NEXT: vstrb.8 q0, [r9], #16
; CHECK-NEXT: vldrw.u32 q0, [r5], #32
+; CHECK-NEXT: strd r9, r1, [sp, #24] @ 8-byte Folded Spill
; CHECK-NEXT: vldrw.u32 q1, [r5, #-28]
; CHECK-NEXT: vmul.f32 q0, q0, r7
; CHECK-NEXT: vldrw.u32 q6, [r5, #-24]
@@ -1105,13 +1106,12 @@ define void @fir(%struct.arm_fir_instance_f32* nocapture readonly %S, float* noc
; CHECK-NEXT: vfma.f32 q0, q4, r6
; CHECK-NEXT: vldrw.u32 q3, [r5, #-8]
; CHECK-NEXT: vfma.f32 q0, q5, r3
-; CHECK-NEXT: vldrw.u32 q1, [r5, #-4]
-; CHECK-NEXT: vfma.f32 q0, q2, lr
; CHECK-NEXT: ldr r0, [sp, #20] @ 4-byte Reload
+; CHECK-NEXT: vfma.f32 q0, q2, lr
+; CHECK-NEXT: vldrw.u32 q1, [r5, #-4]
; CHECK-NEXT: vfma.f32 q0, q3, r11
-; CHECK-NEXT: strd r9, r1, [sp, #24] @ 8-byte Folded Spill
-; CHECK-NEXT: vfma.f32 q0, q1, r8
; CHECK-NEXT: cmp r0, #16
+; CHECK-NEXT: vfma.f32 q0, q1, r8
; CHECK-NEXT: blo .LBB16_7
; CHECK-NEXT: @ %bb.5: @ %for.body.preheader
; CHECK-NEXT: @ in Loop: Header=BB16_4 Depth=1
diff --git a/llvm/test/CodeGen/Thumb2/mve-phireg.ll b/llvm/test/CodeGen/Thumb2/mve-phireg.ll
index e7d6a7323bc1..0fe26fbc4753 100644
--- a/llvm/test/CodeGen/Thumb2/mve-phireg.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-phireg.ll
@@ -168,16 +168,14 @@ define dso_local i32 @e() #0 {
; CHECK-NEXT: vmov q1, q4
; CHECK-NEXT: vmov s1, r7
; CHECK-NEXT: vmov.32 q1[1], r6
-; CHECK-NEXT: mov.w r10, #0
-; CHECK-NEXT: vmov.32 q1[2], r5
; CHECK-NEXT: vmov.32 q5[0], r7
+; CHECK-NEXT: vmov.32 q1[2], r5
+; CHECK-NEXT: vmov s9, r4
; CHECK-NEXT: vmov.32 q1[3], r4
-; CHECK-NEXT: strd r0, r10, [sp, #24]
+; CHECK-NEXT: vdup.32 q6, r7
; CHECK-NEXT: vstrw.32 q1, [sp, #76]
; CHECK-NEXT: vmov q1, q5
-; CHECK-NEXT: vmov s9, r4
; CHECK-NEXT: vmov.32 q1[1], r7
-; CHECK-NEXT: vdup.32 q6, r7
; CHECK-NEXT: vmov.f32 s2, s1
; CHECK-NEXT: vmov.f32 s8, s0
; CHECK-NEXT: vmov.32 q1[2], r6
@@ -185,6 +183,7 @@ define dso_local i32 @e() #0 {
; CHECK-NEXT: vmov q7, q6
; CHECK-NEXT: vmov.f32 s10, s1
; CHECK-NEXT: mov.w r8, #4
+; CHECK-NEXT: mov.w r10, #0
; CHECK-NEXT: vmov.32 q1[3], r4
; CHECK-NEXT: vmov.32 q3[0], r4
; CHECK-NEXT: vmov.32 q7[1], r4
@@ -192,6 +191,7 @@ define dso_local i32 @e() #0 {
; CHECK-NEXT: vmov.f32 s11, s3
; CHECK-NEXT: movs r1, #64
; CHECK-NEXT: strh.w r8, [sp, #390]
+; CHECK-NEXT: strd r0, r10, [sp, #24]
; CHECK-NEXT: vstrw.32 q0, [sp, #44]
; CHECK-NEXT: str r0, [r0]
; CHECK-NEXT: vstrw.32 q2, [r0]
diff --git a/llvm/test/CodeGen/Thumb2/mve-vst3.ll b/llvm/test/CodeGen/Thumb2/mve-vst3.ll
index 600c5279ca91..1ae74c1738c7 100644
--- a/llvm/test/CodeGen/Thumb2/mve-vst3.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-vst3.ll
@@ -24,8 +24,8 @@ define void @vst3_v2i32(<2 x i32> *%src, <6 x i32> *%dst) {
; CHECK-NEXT: vmov.f32 s9, s6
; CHECK-NEXT: vmov.f32 s10, s0
; CHECK-NEXT: vmov.f32 s11, s5
-; CHECK-NEXT: strd r2, r0, [r1, #16]
; CHECK-NEXT: vstrw.32 q2, [r1]
+; CHECK-NEXT: strd r2, r0, [r1, #16]
; CHECK-NEXT: pop {r4, pc}
entry:
%s1 = getelementptr <2 x i32>, <2 x i32>* %src, i32 0
diff --git a/llvm/test/CodeGen/Thumb2/umulo-128-legalisation-lowering.ll b/llvm/test/CodeGen/Thumb2/umulo-128-legalisation-lowering.ll
index ac1c814b838e..f57c9226179b 100644
--- a/llvm/test/CodeGen/Thumb2/umulo-128-legalisation-lowering.ll
+++ b/llvm/test/CodeGen/Thumb2/umulo-128-legalisation-lowering.ll
@@ -8,17 +8,17 @@ define { i128, i8 } @muloti_test(i128 %l, i128 %r) unnamed_addr #0 {
; THUMBV7-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, r11, lr}
; THUMBV7-NEXT: .pad #44
; THUMBV7-NEXT: sub sp, #44
-; THUMBV7-NEXT: ldrd r4, r7, [sp, #88]
-; THUMBV7-NEXT: mov r5, r3
; THUMBV7-NEXT: str r0, [sp, #40] @ 4-byte Spill
; THUMBV7-NEXT: movs r0, #0
-; THUMBV7-NEXT: strd r4, r7, [sp]
-; THUMBV7-NEXT: mov r1, r3
+; THUMBV7-NEXT: ldrd r4, r7, [sp, #88]
+; THUMBV7-NEXT: mov r5, r3
; THUMBV7-NEXT: strd r0, r0, [sp, #8]
+; THUMBV7-NEXT: mov r1, r3
; THUMBV7-NEXT: mov r6, r2
; THUMBV7-NEXT: mov r0, r2
; THUMBV7-NEXT: movs r2, #0
; THUMBV7-NEXT: movs r3, #0
+; THUMBV7-NEXT: strd r4, r7, [sp]
; THUMBV7-NEXT: bl __multi3
; THUMBV7-NEXT: strd r1, r0, [sp, #32] @ 8-byte Folded Spill
; THUMBV7-NEXT: strd r3, r2, [sp, #24] @ 8-byte Folded Spill
diff --git a/llvm/test/CodeGen/X86/store_op_load_fold2.ll b/llvm/test/CodeGen/X86/store_op_load_fold2.ll
index 674b8d8f9384..6f088772436e 100644
--- a/llvm/test/CodeGen/X86/store_op_load_fold2.ll
+++ b/llvm/test/CodeGen/X86/store_op_load_fold2.ll
@@ -17,13 +17,12 @@ cond_true2732.preheader: ; preds = %entry
store i64 %tmp2676.us.us, i64* %tmp2666
ret i32 0
-; INTEL: and {{e..}}, dword ptr [356]
-; INTEL: and dword ptr [360], {{e..}}
-; FIXME: mov dword ptr [356], {{e..}}
-; The above line comes out as 'mov 360, eax', but when the register is ecx it works?
+; INTEL-DAG: and {{e..}}, dword ptr [356]
+; INTEL-DAG: and dword ptr [360], {{e..}}
+; INTEL: mov dword ptr [356], {{e..}}
-; ATT: andl 356, %{{e..}}
-; ATT: andl %{{e..}}, 360
+; ATT-DAG: andl 356, %{{e..}}
+; ATT-DAG: andl %{{e..}}, 360
; ATT: movl %{{e..}}, 356
}
More information about the llvm-commits
mailing list