[llvm] [SelectionDAG] Fix D66309: Allow unordered atomics to have some optimizations done for them (PR #85589)

via llvm-commits llvm-commits at lists.llvm.org
Sun Mar 17 19:23:09 PDT 2024


https://github.com/AtariDreams updated https://github.com/llvm/llvm-project/pull/85589

>From 38c8ea7c69f50939c6d2ca837eacd1e6de18d65d Mon Sep 17 00:00:00 2001
From: Rose <gfunni234 at gmail.com>
Date: Sun, 17 Mar 2024 21:56:43 -0400
Subject: [PATCH 1/2] [SelectionDAG] Add Pre-commit tests (NFC)

---
 llvm/test/CodeGen/PowerPC/store-combine.ll | 44 ++++++++++++++++++++++
 1 file changed, 44 insertions(+)

diff --git a/llvm/test/CodeGen/PowerPC/store-combine.ll b/llvm/test/CodeGen/PowerPC/store-combine.ll
index 9063335bf6619c..114625859d6f5c 100644
--- a/llvm/test/CodeGen/PowerPC/store-combine.ll
+++ b/llvm/test/CodeGen/PowerPC/store-combine.ll
@@ -280,6 +280,50 @@ entry:
   ret void
 }
 
+; One of the stores is atomic unordered
+; ptr p;
+; p0 = _Atomic *p;
+; p[3] = (m >> 0) & 0xFF;
+; p[2] = (m >> 8) & 0xFF;
+; p[1] = (m >> 16) & 0xFF;
+; *p0 = (m >> 24) & 0xFF;
+define void @store_i32_by_i8_bswap_atomic(i32 signext %m, ptr %p) {
+; CHECK-PPC64LE-LABEL: store_i32_by_i8_bswap_atomic:
+; CHECK-PPC64LE:       # %bb.0: # %entry
+; CHECK-PPC64LE-NEXT:    li 5, 2
+; CHECK-PPC64LE-NEXT:    sthbrx 3, 4, 5
+; CHECK-PPC64LE-NEXT:    srwi 5, 3, 16
+; CHECK-PPC64LE-NEXT:    srwi 3, 3, 24
+; CHECK-PPC64LE-NEXT:    stb 5, 1(4)
+; CHECK-PPC64LE-NEXT:    stb 3, 0(4)
+; CHECK-PPC64LE-NEXT:    blr
+;
+; CHECK-PPC64-LABEL: store_i32_by_i8_bswap_atomic:
+; CHECK-PPC64:       # %bb.0: # %entry
+; CHECK-PPC64-NEXT:    sth 3, 2(4)
+; CHECK-PPC64-NEXT:    srwi 5, 3, 16
+; CHECK-PPC64-NEXT:    srwi 3, 3, 24
+; CHECK-PPC64-NEXT:    stb 5, 1(4)
+; CHECK-PPC64-NEXT:    stb 3, 0(4)
+; CHECK-PPC64-NEXT:    blr
+entry:
+  %conv = trunc i32 %m to i8
+  %arrayidx = getelementptr inbounds i8, ptr %p, i64 3
+  store i8 %conv, ptr %arrayidx, align 1
+  %0 = lshr i32 %m, 8
+  %conv3 = trunc i32 %0 to i8
+  %arrayidx4 = getelementptr inbounds i8, ptr %p, i64 2
+  store i8 %conv3, ptr %arrayidx4, align 1
+  %1 = lshr i32 %m, 16
+  %conv7 = trunc i32 %1 to i8
+  %arrayidx8 = getelementptr inbounds i8, ptr %p, i64 1
+  store i8 %conv7, ptr %arrayidx8, align 1
+  %2 = lshr i32 %m, 24
+  %conv11 = trunc i32 %2 to i8
+  store atomic i8 %conv11, ptr %p unordered, align 1
+  ret void
+}
+
 ; There is a store in between individual stores
 ; ptr p, q;
 ; p[3] = (m >> 0) & 0xFF;

>From 0b8b87c56afb5729b92fab6cfcb15820533be160 Mon Sep 17 00:00:00 2001
From: Rose <gfunni234 at gmail.com>
Date: Sun, 17 Mar 2024 20:19:48 -0400
Subject: [PATCH 2/2] [SelectionDAG] Fix D66309: Allow unordered atomics to
 have some optimizations done for them

---
 llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 36 ++++++++-----------
 1 file changed, 14 insertions(+), 22 deletions(-)

diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 5eb53d57c9c2bf..42b97a104e0d44 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -8700,11 +8700,11 @@ SDValue DAGCombiner::mergeTruncStores(StoreSDNode *N) {
   if (LegalOperations || OptLevel == CodeGenOptLevel::None)
     return SDValue();
 
-  // We only handle merging simple stores of 1-4 bytes.
-  // TODO: Allow unordered atomics when wider type is legal (see D66309)
+  // We only handle merging unordered stores of 1-4 bytes.
   EVT MemVT = N->getMemoryVT();
   if (!(MemVT == MVT::i8 || MemVT == MVT::i16 || MemVT == MVT::i32) ||
-      !N->isSimple() || N->isIndexed())
+      !N->isUnordered() || (N->isAtomic() && !isTypeLegal(MemVT)) ||
+      N->isIndexed())
     return SDValue();
 
   // Collect all of the stores in the chain, upto the maximum store width (i64).
@@ -18518,8 +18518,7 @@ SDValue DAGCombiner::ForwardStoreValueToDirectLoad(LoadSDNode *LD) {
   int64_t Offset;
 
   StoreSDNode *ST = getUniqueStoreFeeding(LD, Offset);
-  // TODO: Relax this restriction for unordered atomics (see D66309)
-  if (!ST || !ST->isSimple() || ST->getAddressSpace() != LD->getAddressSpace())
+  if (!ST || !ST->isUnordered() || ST->getAddressSpace() != LD->getAddressSpace())
     return SDValue();
 
   EVT LDType = LD->getValueType(0);
@@ -18651,8 +18650,7 @@ SDValue DAGCombiner::visitLOAD(SDNode *N) {
   // If load is not volatile and there are no uses of the loaded value (and
   // the updated indexed value in case of indexed loads), change uses of the
   // chain value into uses of the chain input (i.e. delete the dead load).
-  // TODO: Allow this for unordered atomics (see D66309)
-  if (LD->isSimple()) {
+  if (LD->isUnordered()) {
     if (N->getValueType(1) == MVT::Other) {
       // Unindexed loads.
       if (!N->hasAnyUseOfValue(0)) {
@@ -21105,13 +21103,12 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
   // If this is a load followed by a store to the same location, then the store
   // is dead/noop. Peek through any truncates if canCombineTruncStore failed.
   // TODO: Add big-endian truncate support with test coverage.
-  // TODO: Can relax for unordered atomics (see D66309)
   SDValue TruncVal = DAG.getDataLayout().isLittleEndian()
                          ? peekThroughTruncates(Value)
                          : Value;
   if (auto *Ld = dyn_cast<LoadSDNode>(TruncVal)) {
     if (Ld->getBasePtr() == Ptr && ST->getMemoryVT() == Ld->getMemoryVT() &&
-        ST->isUnindexed() && ST->isSimple() &&
+        ST->isUnindexed() && ST->isUnordered() &&
         Ld->getAddressSpace() == ST->getAddressSpace() &&
         // There can't be any side effects between the load and store, such as
         // a call or store.
@@ -21125,10 +21122,9 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
   if (SDValue NewST = replaceStoreOfInsertLoad(ST))
     return NewST;
 
-  // TODO: Can relax for unordered atomics (see D66309)
   if (StoreSDNode *ST1 = dyn_cast<StoreSDNode>(Chain)) {
-    if (ST->isUnindexed() && ST->isSimple() &&
-        ST1->isUnindexed() && ST1->isSimple()) {
+    if (ST->isUnindexed() && ST->isUnordered() && ST1->isUnindexed() &&
+        ST1->isUnordered()) {
       if (OptLevel != CodeGenOptLevel::None && ST1->getBasePtr() == Ptr &&
           ST1->getValue() == Value && ST->getMemoryVT() == ST1->getMemoryVT() &&
           ST->getAddressSpace() == ST1->getAddressSpace()) {
@@ -21246,8 +21242,7 @@ SDValue DAGCombiner::visitLIFETIME_END(SDNode *N) {
       break;
     case ISD::STORE: {
       StoreSDNode *ST = dyn_cast<StoreSDNode>(Chain);
-      // TODO: Can relax for unordered atomics (see D66309)
-      if (!ST->isSimple() || ST->isIndexed())
+      if (!ST->isUnordered() || ST->isIndexed())
         continue;
       const TypeSize StoreSize = ST->getMemoryVT().getStoreSize();
       // The bounds of a scalable store are not known until runtime, so this
@@ -26837,8 +26832,7 @@ bool DAGCombiner::SimplifySelectOps(SDNode *TheSelect, SDValue LHS,
     if (LHS.getOperand(0) != RHS.getOperand(0) ||
         // Do not let this transformation reduce the number of volatile loads.
         // Be conservative for atomics for the moment
-        // TODO: This does appear to be legal for unordered atomics (see D66309)
-        !LLD->isSimple() || !RLD->isSimple() ||
+        !LLD->isUnordered() || !RLD->isUnordered() ||
         // FIXME: If either is a pre/post inc/dec load,
         // we'd need to split out the address adjustment.
         LLD->isIndexed() || RLD->isIndexed() ||
@@ -27934,8 +27928,7 @@ void DAGCombiner::GatherAllAliases(SDNode *N, SDValue OriginalChain,
   SmallPtrSet<SDNode *, 16> Visited;  // Visited node set.
 
   // Get alias information for node.
-  // TODO: relax aliasing for unordered atomics (see D66309)
-  const bool IsLoad = isa<LoadSDNode>(N) && cast<LoadSDNode>(N)->isSimple();
+  const bool IsLoad = isa<LoadSDNode>(N) && cast<LoadSDNode>(N)->isUnordered();
 
   // Starting off.
   Chains.push_back(OriginalChain);
@@ -27951,9 +27944,8 @@ void DAGCombiner::GatherAllAliases(SDNode *N, SDValue OriginalChain,
     case ISD::LOAD:
     case ISD::STORE: {
       // Get alias information for C.
-      // TODO: Relax aliasing for unordered atomics (see D66309)
       bool IsOpLoad = isa<LoadSDNode>(C.getNode()) &&
-                      cast<LSBaseSDNode>(C.getNode())->isSimple();
+                      cast<LSBaseSDNode>(C.getNode())->isUnordered();
       if ((IsLoad && IsOpLoad) || !mayAlias(N, C.getNode())) {
         // Look further up the chain.
         C = C.getOperand(0);
@@ -28115,8 +28107,8 @@ bool DAGCombiner::parallelizeChainedStores(StoreSDNode *St) {
     // If the chain has more than one use, then we can't reorder the mem ops.
     if (!SDValue(Chain, 0)->hasOneUse())
       break;
-    // TODO: Relax for unordered atomics (see D66309)
-    if (!Chain->isSimple() || Chain->isIndexed())
+
+    if (!Chain->isUnordered() || Chain->isIndexed())
       break;
 
     // Find the base pointer and offset for this memory node.



More information about the llvm-commits mailing list