[llvm] 5e03c0a - [DAGCombiner] Fix mayAlias not accounting for scalable MMOs with offsets (#90573)

Tue Apr 30 05:20:45 PDT 2024

Author: Luke Lau
Date: 2024-04-30T20:20:40+08:00
New Revision: 5e03c0af4745b97f93c06b43e0f2a02abc881292

URL: https://github.com/llvm/llvm-project/commit/5e03c0af4745b97f93c06b43e0f2a02abc881292
DIFF: https://github.com/llvm/llvm-project/commit/5e03c0af4745b97f93c06b43e0f2a02abc881292.diff

LOG: [DAGCombiner] Fix mayAlias not accounting for scalable MMOs with offsets (#90573)

In #70452 DAGCombiner::mayAlias was taught to handle scalable sizes, but
when it checks via AA->isNoAlias it didn't take into account the case
where the size is scalable but there was an offset too.

For the fixed length case the offset was just accounted for by adding to
the LocationSize, but for the scalable case there doesn't seem to be a
way to represent both a scalable and fixed part in it. So this patch
works around it by bailing if there is an offset.

Fixes #90559

Added: 
    

Modified: 
    llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
    llvm/test/CodeGen/RISCV/rvv/pr90559.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 4b81185c6e311f..3fa8a3578b4fb4 100644

--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -28113,7 +28113,10 @@ bool DAGCombiner::mayAlias(SDNode *Op0, SDNode *Op1) const {
 #endif
 
   if (UseAA && AA && MUC0.MMO->getValue() && MUC1.MMO->getValue() &&
-      Size0.hasValue() && Size1.hasValue()) {
+      Size0.hasValue() && Size1.hasValue() &&
+      // Can't represent a scalable size + fixed offset in LocationSize
+      (!Size0.isScalable() || SrcValOffset0 == 0) &&
+      (!Size1.isScalable() || SrcValOffset1 == 0)) {
     // Use alias analysis information.
     int64_t MinOffset = std::min(SrcValOffset0, SrcValOffset1);
     int64_t Overlap0 =

diff  --git a/llvm/test/CodeGen/RISCV/rvv/pr90559.ll b/llvm/test/CodeGen/RISCV/rvv/pr90559.ll
index 93a251286cf73f..8d330b12055ae9 100644
--- a/llvm/test/CodeGen/RISCV/rvv/pr90559.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/pr90559.ll
@@ -1,19 +1,43 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
 ; RUN: llc < %s -mtriple=riscv64 -mattr=+v | FileCheck %s
 
-; FIXME: The i32 load and store pair isn't dead and shouldn't be omitted.
+; This showcases a miscompile that was fixed in #90573:
+; - The memset will be type-legalized to a 512 bit store + 2 x 128 bit stores.
+; - the load and store of q aliases the upper 128 bits store of p.
+; - The aliasing 128 bit store will be between the chain of the scalar
+;   load/store:
+;
+;   t54: ch = store<(store (s512) into %ir.p, align 1)> t0, ...
+;   t51: ch = store<(store (s128) into %ir.p + 64, align 1)> t0, ...
+;
+;   t44: i64,ch = load<(load (s32) from %ir.q), sext from i32> t0, ...
+;   t50: ch = store<(store (s128) into %ir.p + 80, align 1)> t44:1, ...
+;   t46: ch = store<(store (s32) into %ir.q), trunc to i32> t50, ...
+;
+; Previously, the scalar load/store was incorrectly combined away:
+;
+;   t54: ch = store<(store (s512) into %ir.p, align 1)> t0, ...
+;   t51: ch = store<(store (s128) into %ir.p + 64, align 1)> t0, ...
+;
+;   // MISSING
+;   t50: ch = store<(store (s128) into %ir.p + 80, align 1)> t44:1, ...
+;   // MISSING
+; See also pr83017.ll: This is the same code, but relies on vscale_range instead
+; of -riscv-v-vector-bits-max=128.
 define void @f(ptr %p) vscale_range(2,2) {
 ; CHECK-LABEL: f:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetvli a1, zero, e8, m4, ta, ma
-; CHECK-NEXT:    vmv.v.i v8, 0
-; CHECK-NEXT:    vs4r.v v8, (a0)
-; CHECK-NEXT:    addi a1, a0, 80
+; CHECK-NEXT:    lw a1, 84(a0)
+; CHECK-NEXT:    addi a2, a0, 80
 ; CHECK-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
 ; CHECK-NEXT:    vmv.v.i v8, 0
-; CHECK-NEXT:    vs1r.v v8, (a1)
-; CHECK-NEXT:    addi a0, a0, 64
-; CHECK-NEXT:    vs1r.v v8, (a0)
+; CHECK-NEXT:    vs1r.v v8, (a2)
+; CHECK-NEXT:    vsetvli a2, zero, e8, m4, ta, ma
+; CHECK-NEXT:    vmv.v.i v12, 0
+; CHECK-NEXT:    vs4r.v v12, (a0)
+; CHECK-NEXT:    addi a2, a0, 64
+; CHECK-NEXT:    vs1r.v v8, (a2)
+; CHECK-NEXT:    sw a1, 84(a0)
 ; CHECK-NEXT:    ret
   %q = getelementptr inbounds i8, ptr %p, i64 84
   %x = load i32, ptr %q