[llvm] Reland "DAG: Preserve range metadata when load is narrowed" (#128144) (PR #130609)
via llvm-commits
llvm-commits at lists.llvm.org
Wed Mar 12 05:53:16 PDT 2025
https://github.com/LU-JOHN updated https://github.com/llvm/llvm-project/pull/130609
>From 400d08d79ed36eaa2cc1ad50db766dcea280facc Mon Sep 17 00:00:00 2001
From: John Lu <John.Lu at amd.com>
Date: Fri, 7 Mar 2025 21:26:41 -0600
Subject: [PATCH 1/5] Reland "DAG: Preserve range metadata when load is
narrowed" (#128144)
Changes: Add guard to ensure truncation is strictly smaller than original size.
---
llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 35 ++++++++--
llvm/test/CodeGen/AMDGPU/shl64_reduce.ll | 64 ++++++++++++++++---
2 files changed, 85 insertions(+), 14 deletions(-)
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index bceaf25ee14e4..ca32417773514 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -14957,12 +14957,35 @@ SDValue DAGCombiner::reduceLoadWidth(SDNode *N) {
AddToWorklist(NewPtr.getNode());
SDValue Load;
- if (ExtType == ISD::NON_EXTLOAD)
- Load = DAG.getLoad(VT, DL, LN0->getChain(), NewPtr,
- LN0->getPointerInfo().getWithOffset(PtrOff),
- LN0->getOriginalAlign(),
- LN0->getMemOperand()->getFlags(), LN0->getAAInfo());
- else
+ if (ExtType == ISD::NON_EXTLOAD) {
+ const MDNode *OldRanges = LN0->getRanges();
+ const MDNode *NewRanges = nullptr;
+ // If LSBs are loaded and the truncated ConstantRange for the OldRanges
+ // metadata is not the full-set for the NewWidth then create a NewRanges
+ // metadata for the truncated load
+ if (ShAmt == 0 && OldRanges) {
+ ConstantRange CR = getConstantRangeFromMetadata(*OldRanges);
+
+ // FIXME: OldRanges should match the width of the old load, so
+ // CR.getBitWidth() should be wider than the new narrower load. This
+ // check should be unnecessary.
+ if (CR.getBitWidth() > VT.getScalarSizeInBits()) {
+ ConstantRange TruncatedCR = CR.truncate(VT.getScalarSizeInBits());
+ if (!TruncatedCR.isFullSet()) {
+ Metadata *Bounds[2] = {
+ ConstantAsMetadata::get(
+ ConstantInt::get(*DAG.getContext(), TruncatedCR.getLower())),
+ ConstantAsMetadata::get(
+ ConstantInt::get(*DAG.getContext(), TruncatedCR.getUpper()))};
+ NewRanges = MDNode::get(*DAG.getContext(), Bounds);
+ }
+ }
+ }
+ Load = DAG.getLoad(
+ VT, DL, LN0->getChain(), NewPtr,
+ LN0->getPointerInfo().getWithOffset(PtrOff), LN0->getOriginalAlign(),
+ LN0->getMemOperand()->getFlags(), LN0->getAAInfo(), NewRanges);
+ } else
Load = DAG.getExtLoad(ExtType, DL, VT, LN0->getChain(), NewPtr,
LN0->getPointerInfo().getWithOffset(PtrOff), ExtVT,
LN0->getOriginalAlign(),
diff --git a/llvm/test/CodeGen/AMDGPU/shl64_reduce.ll b/llvm/test/CodeGen/AMDGPU/shl64_reduce.ll
index 05430213c17d2..69242f4e44840 100644
--- a/llvm/test/CodeGen/AMDGPU/shl64_reduce.ll
+++ b/llvm/test/CodeGen/AMDGPU/shl64_reduce.ll
@@ -13,23 +13,66 @@
; Test range with metadata
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-; FIXME: This case should be reduced, but SelectionDAG::computeKnownBits() cannot
-; determine the minimum from metadata in this case. Match current results
-; for now.
-
define i64 @shl_metadata(i64 %arg0, ptr %arg1.ptr) {
; CHECK-LABEL: shl_metadata:
; CHECK: ; %bb.0:
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CHECK-NEXT: flat_load_dword v1, v[2:3]
+; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
+; CHECK-NEXT: v_lshlrev_b32_e32 v1, v1, v0
+; CHECK-NEXT: v_mov_b32_e32 v0, 0
+; CHECK-NEXT: s_setpc_b64 s[30:31]
+ %shift.amt = load i64, ptr %arg1.ptr, !range !0, !noundef !{}
+ %shl = shl i64 %arg0, %shift.amt
+ ret i64 %shl
+}
+
+define i64 @shl_metadata_two_ranges(i64 %arg0, ptr %arg1.ptr) {
+; CHECK-LABEL: shl_metadata_two_ranges:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CHECK-NEXT: flat_load_dword v1, v[2:3]
+; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
+; CHECK-NEXT: v_lshlrev_b32_e32 v1, v1, v0
+; CHECK-NEXT: v_mov_b32_e32 v0, 0
+; CHECK-NEXT: s_setpc_b64 s[30:31]
+ %shift.amt = load i64, ptr %arg1.ptr, !range !1, !noundef !{}
+ %shl = shl i64 %arg0, %shift.amt
+ ret i64 %shl
+}
+
+; Known minimum is too low. Reduction must not be done.
+define i64 @shl_metadata_out_of_range(i64 %arg0, ptr %arg1.ptr) {
+; CHECK-LABEL: shl_metadata_out_of_range:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CHECK-NEXT: flat_load_dword v2, v[2:3]
+; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
+; CHECK-NEXT: v_lshlrev_b64 v[0:1], v2, v[0:1]
+; CHECK-NEXT: s_setpc_b64 s[30:31]
+ %shift.amt = load i64, ptr %arg1.ptr, !range !2, !noundef !{}
+ %shl = shl i64 %arg0, %shift.amt
+ ret i64 %shl
+}
+
+; Bounds cannot be truncated to i32 when load is narrowed to i32.
+; Reduction must not be done.
+; Bounds were chosen so that if bounds were truncated to i32 the
+; known minimum would be 32 and the shl would be erroneously reduced.
+define i64 @shl_metadata_cant_be_narrowed_to_i32(i64 %arg0, ptr %arg1.ptr) {
+; CHECK-LABEL: shl_metadata_cant_be_narrowed_to_i32:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: flat_load_dword v2, v[2:3]
; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; CHECK-NEXT: v_lshlrev_b64 v[0:1], v2, v[0:1]
; CHECK-NEXT: s_setpc_b64 s[30:31]
- %shift.amt = load i64, ptr %arg1.ptr, !range !0
+ %shift.amt = load i64, ptr %arg1.ptr, !range !3, !noundef !{}
%shl = shl i64 %arg0, %shift.amt
ret i64 %shl
}
+; FIXME: This case should be reduced
define <2 x i64> @shl_v2_metadata(<2 x i64> %arg0, ptr %arg1.ptr) {
; CHECK-LABEL: shl_v2_metadata:
; CHECK: ; %bb.0:
@@ -39,11 +82,12 @@ define <2 x i64> @shl_v2_metadata(<2 x i64> %arg0, ptr %arg1.ptr) {
; CHECK-NEXT: v_lshlrev_b64 v[0:1], v4, v[0:1]
; CHECK-NEXT: v_lshlrev_b64 v[2:3], v6, v[2:3]
; CHECK-NEXT: s_setpc_b64 s[30:31]
- %shift.amt = load <2 x i64>, ptr %arg1.ptr, !range !0
+ %shift.amt = load <2 x i64>, ptr %arg1.ptr, !range !0, !noundef !{}
%shl = shl <2 x i64> %arg0, %shift.amt
ret <2 x i64> %shl
}
+; FIXME: This case should be reduced
define <3 x i64> @shl_v3_metadata(<3 x i64> %arg0, ptr %arg1.ptr) {
; CHECK-LABEL: shl_v3_metadata:
; CHECK: ; %bb.0:
@@ -55,11 +99,12 @@ define <3 x i64> @shl_v3_metadata(<3 x i64> %arg0, ptr %arg1.ptr) {
; CHECK-NEXT: v_lshlrev_b64 v[0:1], v8, v[0:1]
; CHECK-NEXT: v_lshlrev_b64 v[2:3], v10, v[2:3]
; CHECK-NEXT: s_setpc_b64 s[30:31]
- %shift.amt = load <3 x i64>, ptr %arg1.ptr, !range !0
+ %shift.amt = load <3 x i64>, ptr %arg1.ptr, !range !0, !noundef !{}
%shl = shl <3 x i64> %arg0, %shift.amt
ret <3 x i64> %shl
}
+; FIXME: This case should be reduced
define <4 x i64> @shl_v4_metadata(<4 x i64> %arg0, ptr %arg1.ptr) {
; CHECK-LABEL: shl_v4_metadata:
; CHECK: ; %bb.0:
@@ -74,12 +119,15 @@ define <4 x i64> @shl_v4_metadata(<4 x i64> %arg0, ptr %arg1.ptr) {
; CHECK-NEXT: v_lshlrev_b64 v[4:5], v13, v[4:5]
; CHECK-NEXT: v_lshlrev_b64 v[6:7], v15, v[6:7]
; CHECK-NEXT: s_setpc_b64 s[30:31]
- %shift.amt = load <4 x i64>, ptr %arg1.ptr, !range !0
+ %shift.amt = load <4 x i64>, ptr %arg1.ptr, !range !0, !noundef !{}
%shl = shl <4 x i64> %arg0, %shift.amt
ret <4 x i64> %shl
}
!0 = !{i64 32, i64 64}
+!1 = !{i64 32, i64 38, i64 42, i64 48}
+!2 = !{i64 31, i64 38, i64 42, i64 48}
+!3 = !{i64 32, i64 38, i64 2147483680, i64 2147483681}
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
; Test range with an "or X, 16"
>From a8fc8d139ad3e2a7bce48ebb6f467f42271e4ac5 Mon Sep 17 00:00:00 2001
From: John Lu <John.Lu at amd.com>
Date: Tue, 11 Mar 2025 09:54:23 -0500
Subject: [PATCH 2/5] Add testcase
Signed-off-by: John Lu <John.Lu at amd.com>
---
llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 11 +++++-----
llvm/test/CodeGen/X86/narrow-load-metadata.ll | 21 +++++++++++++++++++
2 files changed, 27 insertions(+), 5 deletions(-)
create mode 100644 llvm/test/CodeGen/X86/narrow-load-metadata.ll
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index ca32417773514..9658252ba47b4 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -14961,14 +14961,14 @@ SDValue DAGCombiner::reduceLoadWidth(SDNode *N) {
const MDNode *OldRanges = LN0->getRanges();
const MDNode *NewRanges = nullptr;
// If LSBs are loaded and the truncated ConstantRange for the OldRanges
- // metadata is not the full-set for the NewWidth then create a NewRanges
+ // metadata is not the full-set for the new width then create a NewRanges
// metadata for the truncated load
if (ShAmt == 0 && OldRanges) {
ConstantRange CR = getConstantRangeFromMetadata(*OldRanges);
- // FIXME: OldRanges should match the width of the old load, so
- // CR.getBitWidth() should be wider than the new narrower load. This
- // check should be unnecessary.
+ // It is possible for an 8-bit extending load with 8-bit range
+ // metadata to be narrowed to an 8-bit load. This guard is necessary to
+ // ensure that truncation is strictly smaller.
if (CR.getBitWidth() > VT.getScalarSizeInBits()) {
ConstantRange TruncatedCR = CR.truncate(VT.getScalarSizeInBits());
if (!TruncatedCR.isFullSet()) {
@@ -14979,7 +14979,8 @@ SDValue DAGCombiner::reduceLoadWidth(SDNode *N) {
ConstantInt::get(*DAG.getContext(), TruncatedCR.getUpper()))};
NewRanges = MDNode::get(*DAG.getContext(), Bounds);
}
- }
+ } else if (CR.getBitWidth() == VT.getScalarSizeInBits())
+ NewRanges = OldRanges;
}
Load = DAG.getLoad(
VT, DL, LN0->getChain(), NewPtr,
diff --git a/llvm/test/CodeGen/X86/narrow-load-metadata.ll b/llvm/test/CodeGen/X86/narrow-load-metadata.ll
new file mode 100644
index 0000000000000..04af4cb74a512
--- /dev/null
+++ b/llvm/test/CodeGen/X86/narrow-load-metadata.ll
@@ -0,0 +1,21 @@
+; RUN: llc < %s
+;
+; This test case is reduced from RangeConstraintManager.cpp in a ASan build.
+; It crashes reduceLoadWidth in DAGCombiner.cpp. Preservation of range
+; metdata must ensure that ConstantRange truncation is strictly smaller.
+
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+define ptr @_ZN12_GLOBAL__N_121SymbolicRangeInferrer19VisitBinaryOperatorILN5clang18BinaryOperatorKindE15EEENS2_4ento8RangeSetES5_S5_NS2_8QualTypeE() {
+entry:
+ %0 = load i8, ptr null, align 4, !range !0, !noundef !1
+ %retval.sroa.1.0.insert.ext.i = zext i8 %0 to i64
+ %retval.sroa.1.0.insert.shift.i = shl i64 %retval.sroa.1.0.insert.ext.i, 32
+ %coerce.val.ii = trunc i64 %retval.sroa.1.0.insert.shift.i to i40
+ store i40 %coerce.val.ii, ptr null, align 4
+ ret ptr null
+}
+
+!0 = !{i8 0, i8 2}
+!1 = !{}
>From 13d4d56a82566110ed99eadde2743041db2cd6bc Mon Sep 17 00:00:00 2001
From: John Lu <John.Lu at amd.com>
Date: Tue, 11 Mar 2025 14:03:52 -0500
Subject: [PATCH 3/5] Clean up testcase
Signed-off-by: John Lu <John.Lu at amd.com>
---
llvm/test/CodeGen/X86/narrow-load-metadata.ll | 15 ++++++---------
1 file changed, 6 insertions(+), 9 deletions(-)
diff --git a/llvm/test/CodeGen/X86/narrow-load-metadata.ll b/llvm/test/CodeGen/X86/narrow-load-metadata.ll
index 04af4cb74a512..d9ccb4526af73 100644
--- a/llvm/test/CodeGen/X86/narrow-load-metadata.ll
+++ b/llvm/test/CodeGen/X86/narrow-load-metadata.ll
@@ -1,20 +1,17 @@
-; RUN: llc < %s
+; RUN: llc -mtriple=x86_64-unknown-linux-gnu < %s
;
; This test case is reduced from RangeConstraintManager.cpp in a ASan build.
; It crashes reduceLoadWidth in DAGCombiner.cpp. Preservation of range
; metdata must ensure that ConstantRange truncation is strictly smaller.
-target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
-target triple = "x86_64-unknown-linux-gnu"
-
-define ptr @_ZN12_GLOBAL__N_121SymbolicRangeInferrer19VisitBinaryOperatorILN5clang18BinaryOperatorKindE15EEENS2_4ento8RangeSetES5_S5_NS2_8QualTypeE() {
+define i8 @_ZN12_GLOBAL__N_121SymbolicRangeInferrer19VisitBinaryOperatorILN5clang18BinaryOperatorKindE15EEENS2_4ento8RangeSetES5_S5_NS2_8QualTypeE(ptr %valptr) {
entry:
- %0 = load i8, ptr null, align 4, !range !0, !noundef !1
- %retval.sroa.1.0.insert.ext.i = zext i8 %0 to i64
+ %val = load i8, ptr %valptr, align 4, !range !0, !noundef !1
+ %retval.sroa.1.0.insert.ext.i = zext i8 %val to i64
%retval.sroa.1.0.insert.shift.i = shl i64 %retval.sroa.1.0.insert.ext.i, 32
%coerce.val.ii = trunc i64 %retval.sroa.1.0.insert.shift.i to i40
- store i40 %coerce.val.ii, ptr null, align 4
- ret ptr null
+ store i40 %coerce.val.ii, ptr %valptr, align 4
+ ret i8 %val
}
!0 = !{i8 0, i8 2}
>From 0cf9afbddf3e8ac3cd57db8a105a247cf145d224 Mon Sep 17 00:00:00 2001
From: John Lu <John.Lu at amd.com>
Date: Wed, 12 Mar 2025 07:44:24 -0500
Subject: [PATCH 4/5] Use common variable
Signed-off-by: John Lu <John.Lu at amd.com>
---
llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 7 ++++---
1 file changed, 4 insertions(+), 3 deletions(-)
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 9658252ba47b4..c35838601cc9c 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -14965,12 +14965,13 @@ SDValue DAGCombiner::reduceLoadWidth(SDNode *N) {
// metadata for the truncated load
if (ShAmt == 0 && OldRanges) {
ConstantRange CR = getConstantRangeFromMetadata(*OldRanges);
+ unsigned BitSize = VT.getScalarSizeInBits();
// It is possible for an 8-bit extending load with 8-bit range
// metadata to be narrowed to an 8-bit load. This guard is necessary to
// ensure that truncation is strictly smaller.
- if (CR.getBitWidth() > VT.getScalarSizeInBits()) {
- ConstantRange TruncatedCR = CR.truncate(VT.getScalarSizeInBits());
+ if (CR.getBitWidth() > BitSize) {
+ ConstantRange TruncatedCR = CR.truncate(BitSize);
if (!TruncatedCR.isFullSet()) {
Metadata *Bounds[2] = {
ConstantAsMetadata::get(
@@ -14979,7 +14980,7 @@ SDValue DAGCombiner::reduceLoadWidth(SDNode *N) {
ConstantInt::get(*DAG.getContext(), TruncatedCR.getUpper()))};
NewRanges = MDNode::get(*DAG.getContext(), Bounds);
}
- } else if (CR.getBitWidth() == VT.getScalarSizeInBits())
+ } else if (CR.getBitWidth() == BitSize)
NewRanges = OldRanges;
}
Load = DAG.getLoad(
>From 179f83bea0cbe447fee45fd96e484df46f785a1a Mon Sep 17 00:00:00 2001
From: John Lu <John.Lu at amd.com>
Date: Wed, 12 Mar 2025 07:45:12 -0500
Subject: [PATCH 5/5] Simplify function name. Add checks
Signed-off-by: John Lu <John.Lu at amd.com>
---
llvm/test/CodeGen/X86/narrow-load-metadata.ll | 13 ++++++++++---
1 file changed, 10 insertions(+), 3 deletions(-)
diff --git a/llvm/test/CodeGen/X86/narrow-load-metadata.ll b/llvm/test/CodeGen/X86/narrow-load-metadata.ll
index d9ccb4526af73..10aa5b8530a95 100644
--- a/llvm/test/CodeGen/X86/narrow-load-metadata.ll
+++ b/llvm/test/CodeGen/X86/narrow-load-metadata.ll
@@ -1,10 +1,17 @@
-; RUN: llc -mtriple=x86_64-unknown-linux-gnu < %s
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -mtriple=x86_64-unknown-linux-gnu < %s | FileCheck %s
;
; This test case is reduced from RangeConstraintManager.cpp in a ASan build.
; It crashes reduceLoadWidth in DAGCombiner.cpp. Preservation of range
-; metdata must ensure that ConstantRange truncation is strictly smaller.
+; metadata must ensure that ConstantRange truncation is strictly smaller.
-define i8 @_ZN12_GLOBAL__N_121SymbolicRangeInferrer19VisitBinaryOperatorILN5clang18BinaryOperatorKindE15EEENS2_4ento8RangeSetES5_S5_NS2_8QualTypeE(ptr %valptr) {
+define i8 @narrow_load_metadata(ptr %valptr) {
+; CHECK-LABEL: narrow_load_metadata:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: movzbl (%rdi), %eax
+; CHECK-NEXT: movb %al, 4(%rdi)
+; CHECK-NEXT: movl $0, (%rdi)
+; CHECK-NEXT: retq
entry:
%val = load i8, ptr %valptr, align 4, !range !0, !noundef !1
%retval.sroa.1.0.insert.ext.i = zext i8 %val to i64
More information about the llvm-commits
mailing list