[llvm] Improvements to RS4GC BDV Algorithm (PR #69795)
Petr Maj via llvm-commits
llvm-commits at lists.llvm.org
Tue Oct 24 07:45:24 PDT 2023
https://github.com/zduka updated https://github.com/llvm/llvm-project/pull/69795
>From 6968662ec8d7886cd2125e955b2d2e5eacd915b0 Mon Sep 17 00:00:00 2001
From: Petr Maj <pmaj at azul.com>
Date: Sat, 21 Oct 2023 01:11:57 +0200
Subject: [PATCH 1/4] Improvements to RS4GC BDV Algorithm
---
.../Scalar/RewriteStatepointsForGC.cpp | 69 ++++++++++++-------
.../RewriteStatepointsForGC/base-vector.ll | 33 +++++++++
.../RewriteStatepointsForGC/constants.ll | 12 ++++
.../RewriteStatepointsForGC/insert-extract.ll | 56 +++++++++++++++
.../RewriteStatepointsForGC/vector-bitcast.ll | 2 +-
5 files changed, 146 insertions(+), 26 deletions(-)
create mode 100644 llvm/test/Transforms/RewriteStatepointsForGC/insert-extract.ll
diff --git a/llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp b/llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp
index 06c81f53de706cf..4308711fdb32f42 100644
--- a/llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp
+++ b/llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp
@@ -993,7 +993,7 @@ static Value *findBasePointer(Value *I, DefiningValueMapTy &Cache,
NewState.meet(OpState);
});
- BDVState OldState = States[BDV];
+ BDVState OldState = Pair.second;
if (OldState != NewState) {
Progress = true;
States[BDV] = NewState;
@@ -1012,8 +1012,44 @@ static Value *findBasePointer(Value *I, DefiningValueMapTy &Cache,
}
#endif
- // Handle all instructions that have a vector BDV, but the instruction itself
- // is of scalar type.
+ // Even though we have identified a concrete base (or a conflict) for all live
+ // pointers at this point, there are cases where the base is of an
+ // incompatible type compared to the original instruction. We conservatively
+ // mark those as conflicts to ensure that corresponding BDVs will be generated
+ // in the next steps
+
+ // this is a rather explicit check for all cases where we should mark the
+ // state as a conflict to force the latter stages of the algorithm to emit
+ // the BDVs. Note that the IE and EE instruction check is not fully subsumed
+ // by the vector<->scalar check at the end, this is due to the BDV algorithm
+ // being ignorant of BDV types at this junction
+ // TODO: in many cases the instructions emited for the conflicting states
+ // will be identical to the I itself (if the I's operate on their BDVs
+ // themselves). We should expoit this, but can't do it here since it would
+ // break the invariant about the BDVs not being known to be a base.
+ // TODO: the code also does not handle constants at all - the algithm relies
+ // on all constants having the same BDV and therefore constant-only insns
+ // will never be in conflict, but this check is ignored here. If the
+ // constant conflicts will be to BDVs themselves, they will be identical
+ // instructions and will get optimized away (as in the above TODO)
+ auto MarkConflict = [&](Instruction *I, Value *BaseValue) {
+ // II mixes vector & scalar so is always a conflict
+ if (isa<InsertElementInst>(I))
+ return true;
+ // EE mixes vector and scalar so is always conflict
+ if (isa<ExtractElementInst>(I))
+ return true;
+ // shuffle vector is always conflict as it creates new vector from existing
+ // ones
+ if (isa<ShuffleVectorInst>(I))
+ return true;
+ // any other insns that change from vector to scalar are marked to be on
+ // safe side
+ if (!areBothVectorOrScalar(BaseValue, I))
+ return true;
+ return false;
+ };
+
for (auto Pair : States) {
Instruction *I = cast<Instruction>(Pair.first);
BDVState State = Pair.second;
@@ -1026,30 +1062,13 @@ static Value *findBasePointer(Value *I, DefiningValueMapTy &Cache,
"why did it get added?");
assert(!State.isUnknown() && "Optimistic algorithm didn't complete!");
- if (!State.isBase() || !isa<VectorType>(BaseValue->getType()))
+ // since we only mark vec-scalar insns as conflicts in the pass, our work is
+ // done if the instruction already conflicts
+ if (State.isConflict())
continue;
- // extractelement instructions are a bit special in that we may need to
- // insert an extract even when we know an exact base for the instruction.
- // The problem is that we need to convert from a vector base to a scalar
- // base for the particular indice we're interested in.
- if (isa<ExtractElementInst>(I)) {
- auto *EE = cast<ExtractElementInst>(I);
- // TODO: In many cases, the new instruction is just EE itself. We should
- // exploit this, but can't do it here since it would break the invariant
- // about the BDV not being known to be a base.
- auto *BaseInst = ExtractElementInst::Create(
- State.getBaseValue(), EE->getIndexOperand(), "base_ee", EE);
- BaseInst->setMetadata("is_base_value", MDNode::get(I->getContext(), {}));
- States[I] = BDVState(I, BDVState::Base, BaseInst);
- setKnownBase(BaseInst, /* IsKnownBase */true, KnownBases);
- } else if (!isa<VectorType>(I->getType())) {
- // We need to handle cases that have a vector base but the instruction is
- // a scalar type (these could be phis or selects or any instruction that
- // are of scalar type, but the base can be a vector type). We
- // conservatively set this as conflict. Setting the base value for these
- // conflicts is handled in the next loop which traverses States.
+
+ if (MarkConflict(I, BaseValue))
States[I] = BDVState(I, BDVState::Conflict);
- }
}
#ifndef NDEBUG
diff --git a/llvm/test/Transforms/RewriteStatepointsForGC/base-vector.ll b/llvm/test/Transforms/RewriteStatepointsForGC/base-vector.ll
index a29d60e129cb34f..b3ac71ac18db967 100644
--- a/llvm/test/Transforms/RewriteStatepointsForGC/base-vector.ll
+++ b/llvm/test/Transforms/RewriteStatepointsForGC/base-vector.ll
@@ -352,3 +352,36 @@ entry:
call void @use_vec(<4 x ptr addrspace(1)> %vec)
ret void
}
+
+define i32 @test13() gc "statepoint-example" {
+; CHECK-LABEL: define i32 @test13() gc "statepoint-example" {
+; CHECK-NEXT: bb:
+; CHECK-NEXT: [[GETELEMENTPTR:%.*]] = getelementptr i8, <2 x ptr addrspace(1)> zeroinitializer, <2 x i64> <i64 948, i64 896>
+; CHECK-NEXT: [[SHUFFLEVECTOR_BASE:%.*]] = shufflevector <2 x ptr addrspace(1)> zeroinitializer, <2 x ptr addrspace(1)> zeroinitializer, <4 x i32> <i32 0, i32 3, i32 1, i32 2>, !is_base_value !0
+; CHECK-NEXT: [[SHUFFLEVECTOR:%.*]] = shufflevector <2 x ptr addrspace(1)> zeroinitializer, <2 x ptr addrspace(1)> [[GETELEMENTPTR]], <4 x i32> <i32 0, i32 3, i32 1, i32 2>
+; CHECK-NEXT: br label [[BB1:%.*]]
+; CHECK: bb1:
+; CHECK-NEXT: [[PHI_BASE:%.*]] = phi <4 x ptr addrspace(1)> [ [[SHUFFLEVECTOR_BASE]], [[BB:%.*]] ], [ zeroinitializer, [[BB1]] ], !is_base_value !0
+; CHECK-NEXT: [[PHI:%.*]] = phi <4 x ptr addrspace(1)> [ [[SHUFFLEVECTOR]], [[BB]] ], [ zeroinitializer, [[BB1]] ]
+; CHECK-NEXT: [[EXTRACTELEMENT_BASE:%.*]] = extractelement <4 x ptr addrspace(1)> [[PHI_BASE]], i32 0, !is_base_value !0
+; CHECK-NEXT: [[EXTRACTELEMENT:%.*]] = extractelement <4 x ptr addrspace(1)> [[PHI]], i32 0
+; CHECK-NEXT: [[STATEPOINT_TOKEN:%.*]] = call token (i64, i32, ptr, i32, i32, ...) @llvm.experimental.gc.statepoint.p0(i64 2882400000, i32 0, ptr elementtype(i32 ()) @spam, i32 0, i32 0, i32 0, i32 0) [ "gc-live"(ptr addrspace(1) [[EXTRACTELEMENT]], ptr addrspace(1) [[EXTRACTELEMENT_BASE]]) ]
+; CHECK-NEXT: [[EXTRACTELEMENT_RELOCATED:%.*]] = call coldcc ptr addrspace(1) @llvm.experimental.gc.relocate.p1(token [[STATEPOINT_TOKEN]], i32 1, i32 0)
+; CHECK-NEXT: [[EXTRACTELEMENT_BASE_RELOCATED:%.*]] = call coldcc ptr addrspace(1) @llvm.experimental.gc.relocate.p1(token [[STATEPOINT_TOKEN]], i32 1, i32 1)
+; CHECK-NEXT: [[LOAD:%.*]] = load i32, ptr addrspace(1) [[EXTRACTELEMENT_RELOCATED]], align 4
+; CHECK-NEXT: br label [[BB1]]
+;
+bb:
+ %getelementptr = getelementptr i8, <2 x ptr addrspace(1)> zeroinitializer, <2 x i64> <i64 948, i64 896>
+ %shufflevector = shufflevector <2 x ptr addrspace(1)> zeroinitializer, <2 x ptr addrspace(1)> %getelementptr, <4 x i32> <i32 0, i32 3, i32 1, i32 2>
+ br label %bb1
+
+bb1: ; preds = %bb1, %bb
+ %phi = phi <4 x ptr addrspace(1)> [ %shufflevector, %bb ], [ zeroinitializer, %bb1 ]
+ %extractelement = extractelement <4 x ptr addrspace(1)> %phi, i32 0
+ %call = call i32 @spam()
+ %load = load i32, ptr addrspace(1) %extractelement, align 4
+ br label %bb1
+}
+
+declare i32 @spam() gc "statepoint-example"
\ No newline at end of file
diff --git a/llvm/test/Transforms/RewriteStatepointsForGC/constants.ll b/llvm/test/Transforms/RewriteStatepointsForGC/constants.ll
index 106d786351eec9c..bdd4034f7cbb288 100644
--- a/llvm/test/Transforms/RewriteStatepointsForGC/constants.ll
+++ b/llvm/test/Transforms/RewriteStatepointsForGC/constants.ll
@@ -262,3 +262,15 @@ entry:
; CHECK-DAG: call {{.*}}gc.relocate{{.*}}(%val.base, %val)
ret <2 x ptr addrspace(1)> %val
}
+
+define ptr addrspace(1) @test14() gc "statepoint-example" {
+; CHECK-LABEL: @test14
+; FIXME: this is a extract element of constant vector and as such should not be relocated, but our current code relocates it since the extractelement will conservatively be marked as conflict during the BDV insertion algorithm. In many cases the extra instruction will be identical to the one already present and will be removed by later phases.
+entry:
+ %val = extractelement <2 x ptr addrspace(1)> <ptr addrspace(1) inttoptr (i64 5 to ptr addrspace(1)),
+ ptr addrspace(1) inttoptr (i64 15 to ptr addrspace(1))>, i32 0
+ ; CHECK: gc.statepoint
+ ; CHECK: call {{.*}}gc.relocate
+ call void @foo() [ "deopt"() ]
+ ret ptr addrspace(1) %val
+}
\ No newline at end of file
diff --git a/llvm/test/Transforms/RewriteStatepointsForGC/insert-extract.ll b/llvm/test/Transforms/RewriteStatepointsForGC/insert-extract.ll
new file mode 100644
index 000000000000000..4f99727e9874de0
--- /dev/null
+++ b/llvm/test/Transforms/RewriteStatepointsForGC/insert-extract.ll
@@ -0,0 +1,56 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 3
+; RUN: opt < %s -passes=rewrite-statepoints-for-gc -S 2>&1 | FileCheck %s
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128-ni:1-p2:32:8:8:32-ni:2"
+target triple = "x86_64-unknown-linux-gnu"
+
+define void @barney() gc "statepoint-example" {
+; CHECK-LABEL: define void @barney() gc "statepoint-example" {
+; CHECK-NEXT: bb:
+; CHECK-NEXT: [[GETELEMENTPTR:%.*]] = getelementptr i8, <2 x ptr addrspace(1)> zeroinitializer, <2 x i64> <i64 16, i64 8>
+; CHECK-NEXT: [[EXTRACTELEMENT_BASE:%.*]] = extractelement <2 x ptr addrspace(1)> zeroinitializer, i32 0, !is_base_value !0
+; CHECK-NEXT: [[EXTRACTELEMENT:%.*]] = extractelement <2 x ptr addrspace(1)> [[GETELEMENTPTR]], i32 0
+; CHECK-NEXT: [[INSERTELEMENT_BASE:%.*]] = insertelement <2 x ptr addrspace(1)> zeroinitializer, ptr addrspace(1) [[EXTRACTELEMENT_BASE]], i32 0, !is_base_value !0
+; CHECK-NEXT: [[INSERTELEMENT:%.*]] = insertelement <2 x ptr addrspace(1)> zeroinitializer, ptr addrspace(1) [[EXTRACTELEMENT]], i32 0
+; CHECK-NEXT: [[STATEPOINT_TOKEN:%.*]] = call token (i64, i32, ptr, i32, i32, ...) @llvm.experimental.gc.statepoint.p0(i64 2882400000, i32 0, ptr elementtype(i8 ()) @foo, i32 0, i32 0, i32 0, i32 0) [ "gc-live"(<2 x ptr addrspace(1)> [[INSERTELEMENT]], <2 x ptr addrspace(1)> [[INSERTELEMENT_BASE]]) ]
+; CHECK-NEXT: [[INSERTELEMENT_RELOCATED:%.*]] = call coldcc <2 x ptr addrspace(1)> @llvm.experimental.gc.relocate.v2p1(token [[STATEPOINT_TOKEN]], i32 1, i32 0)
+; CHECK-NEXT: [[INSERTELEMENT_BASE_RELOCATED:%.*]] = call coldcc <2 x ptr addrspace(1)> @llvm.experimental.gc.relocate.v2p1(token [[STATEPOINT_TOKEN]], i32 1, i32 1)
+; CHECK-NEXT: [[EXTRACTELEMENT1:%.*]] = extractelement <2 x ptr addrspace(1)> [[INSERTELEMENT_RELOCATED]], i32 0
+; CHECK-NEXT: ret void
+;
+bb:
+ %getelementptr = getelementptr i8, <2 x ptr addrspace(1)> zeroinitializer, <2 x i64> <i64 16, i64 8>
+ %extractelement = extractelement <2 x ptr addrspace(1)> %getelementptr, i32 0
+ %insertelement = insertelement <2 x ptr addrspace(1)> zeroinitializer, ptr addrspace(1) %extractelement, i32 0
+ %call = call i8 @foo()
+ %extractelement1 = extractelement <2 x ptr addrspace(1)> %insertelement, i32 0
+ ret void
+}
+
+
+define void @bart() gc "statepoint-example" {
+; CHECK-LABEL: define void @bart() gc "statepoint-example" {
+; CHECK-NEXT: always_continue:
+; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i8, <2 x ptr addrspace(1)> zeroinitializer, <2 x i64> <i64 16, i64 8>
+; CHECK-NEXT: [[BASE_EE:%.*]] = extractelement <2 x ptr addrspace(1)> zeroinitializer, i32 0, !is_base_value !0
+; CHECK-NEXT: [[TMP1:%.*]] = extractelement <2 x ptr addrspace(1)> [[TMP0]], i32 0
+; CHECK-NEXT: [[BASE_IE:%.*]] = insertelement <2 x ptr addrspace(1)> zeroinitializer, ptr addrspace(1) [[BASE_EE]], i32 0, !is_base_value !0
+; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x ptr addrspace(1)> zeroinitializer, ptr addrspace(1) [[TMP1]], i32 0
+; CHECK-NEXT: [[OTHER_BASE:%.*]] = insertelement <2 x ptr addrspace(1)> [[BASE_IE]], ptr addrspace(1) [[BASE_EE]], i32 0, !is_base_value !0
+; CHECK-NEXT: [[OTHER:%.*]] = insertelement <2 x ptr addrspace(1)> [[TMP2]], ptr addrspace(1) [[TMP1]], i32 0
+; CHECK-NEXT: [[STATEPOINT_TOKEN:%.*]] = call token (i64, i32, ptr, i32, i32, ...) @llvm.experimental.gc.statepoint.p0(i64 2882400000, i32 0, ptr elementtype(i8 ()) @foo, i32 0, i32 0, i32 0, i32 0) [ "gc-live"(<2 x ptr addrspace(1)> [[OTHER]], <2 x ptr addrspace(1)> [[OTHER_BASE]]) ]
+; CHECK-NEXT: [[OTHER_RELOCATED:%.*]] = call coldcc <2 x ptr addrspace(1)> @llvm.experimental.gc.relocate.v2p1(token [[STATEPOINT_TOKEN]], i32 1, i32 0)
+; CHECK-NEXT: [[OTHER_BASE_RELOCATED:%.*]] = call coldcc <2 x ptr addrspace(1)> @llvm.experimental.gc.relocate.v2p1(token [[STATEPOINT_TOKEN]], i32 1, i32 1)
+; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x ptr addrspace(1)> [[OTHER_RELOCATED]], i32 0
+; CHECK-NEXT: ret void
+;
+always_continue:
+ %0 = getelementptr i8, <2 x ptr addrspace(1)> zeroinitializer, <2 x i64> <i64 16, i64 8>
+ %1 = extractelement <2 x ptr addrspace(1)> %0, i32 0
+ %2 = insertelement <2 x ptr addrspace(1)> zeroinitializer, ptr addrspace(1) %1, i32 0
+ %other = insertelement <2 x ptr addrspace(1)> %2, ptr addrspace(1) %1, i32 0
+ %3 = call i8 @foo()
+ %4 = extractelement <2 x ptr addrspace(1)> %other, i32 0
+ ret void
+}
+
+declare i8 @foo()
\ No newline at end of file
diff --git a/llvm/test/Transforms/RewriteStatepointsForGC/vector-bitcast.ll b/llvm/test/Transforms/RewriteStatepointsForGC/vector-bitcast.ll
index 4c9e7d6af4ca9d3..7324f5f5a4d217e 100644
--- a/llvm/test/Transforms/RewriteStatepointsForGC/vector-bitcast.ll
+++ b/llvm/test/Transforms/RewriteStatepointsForGC/vector-bitcast.ll
@@ -14,7 +14,7 @@ define i32 @test() gc "statepoint-example" {
entry:
; CHECK-LABEL: entry
; CHECK: %bc = bitcast
-; CHECK: %[[p1:[A-Za-z0-9_]+]] = extractelement
+; CHECK: %[[p1:[A-Za-z0-9_.]+]] = extractelement
; CHECK: %[[p2:[A-Za-z0-9_]+]] = extractelement
; CHECK: llvm.experimental.gc.statepoint
; CHECK: %[[p2]].relocated = {{.+}} @llvm.experimental.gc.relocate
>From c51fdf930031412b0691cf8608af2b68d93d8193 Mon Sep 17 00:00:00 2001
From: Petr Maj <53400784+zduka at users.noreply.github.com>
Date: Tue, 24 Oct 2023 16:44:16 +0200
Subject: [PATCH 2/4] Update
llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp
Co-authored-by: annamthomas <anna at azul.com>
---
llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp b/llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp
index 4308711fdb32f42..417af47fd2d4447 100644
--- a/llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp
+++ b/llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp
@@ -1027,7 +1027,7 @@ static Value *findBasePointer(Value *I, DefiningValueMapTy &Cache,
// will be identical to the I itself (if the I's operate on their BDVs
// themselves). We should expoit this, but can't do it here since it would
// break the invariant about the BDVs not being known to be a base.
- // TODO: the code also does not handle constants at all - the algithm relies
+ // TODO: the code also does not handle constants at all - the algorithm relies
// on all constants having the same BDV and therefore constant-only insns
// will never be in conflict, but this check is ignored here. If the
// constant conflicts will be to BDVs themselves, they will be identical
>From 43ec4b274021e0b174ced8bd796c7bf217b7e968 Mon Sep 17 00:00:00 2001
From: Petr Maj <53400784+zduka at users.noreply.github.com>
Date: Tue, 24 Oct 2023 16:44:47 +0200
Subject: [PATCH 3/4] Update
llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp
Co-authored-by: annamthomas <anna at azul.com>
---
llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp b/llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp
index 417af47fd2d4447..061b31ef526fbb4 100644
--- a/llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp
+++ b/llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp
@@ -1016,7 +1016,7 @@ static Value *findBasePointer(Value *I, DefiningValueMapTy &Cache,
// pointers at this point, there are cases where the base is of an
// incompatible type compared to the original instruction. We conservatively
// mark those as conflicts to ensure that corresponding BDVs will be generated
- // in the next steps
+ // in the next steps.
// this is a rather explicit check for all cases where we should mark the
// state as a conflict to force the latter stages of the algorithm to emit
>From 126de75a5dea173a1e66c8c1cb62f2a6c057b4ae Mon Sep 17 00:00:00 2001
From: Petr Maj <53400784+zduka at users.noreply.github.com>
Date: Tue, 24 Oct 2023 16:45:16 +0200
Subject: [PATCH 4/4] Update
llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp
Co-authored-by: annamthomas <anna at azul.com>
---
llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp b/llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp
index 061b31ef526fbb4..6217acb850f9090 100644
--- a/llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp
+++ b/llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp
@@ -1040,7 +1040,7 @@ static Value *findBasePointer(Value *I, DefiningValueMapTy &Cache,
if (isa<ExtractElementInst>(I))
return true;
// shuffle vector is always conflict as it creates new vector from existing
- // ones
+ // ones.
if (isa<ShuffleVectorInst>(I))
return true;
// any other insns that change from vector to scalar are marked to be on
More information about the llvm-commits
mailing list