[llvm] e5296c5 - [AMDGPU] Relax restrictions on unbreakable PHI users in BreakLargePHis
via llvm-commits
llvm-commits at lists.llvm.org
Fri Jul 14 00:02:57 PDT 2023
Author: pvanhout
Date: 2023-07-14T09:02:51+02:00
New Revision: e5296c52e51bf214da32734d2344c9380c58a347
URL: https://github.com/llvm/llvm-project/commit/e5296c52e51bf214da32734d2344c9380c58a347
DIFF: https://github.com/llvm/llvm-project/commit/e5296c52e51bf214da32734d2344c9380c58a347.diff
LOG: [AMDGPU] Relax restrictions on unbreakable PHI users in BreakLargePHis
The previous heuristic rejected a PHI if one of its user was an unbreakable PHI, no matter what the other users were.
This worked well in most cases, but there's one case in rocRAND where
it doesn't work. In that case, a PHI node has 2 PHI users where one is
breakable but not the other. When that PHI node isn't broken performance falls by 35%.
Relaxing the restriction to "require that half of the PHI node users are breakable" fixes the issue, and seems like a sensible change.
Solves SWDEV-409648, SWDEV-398393
Reviewed By: #amdgpu, arsenm
Differential Revision: https://reviews.llvm.org/D155184
Added:
Modified:
llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp
llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-break-large-phis-heuristics.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp b/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp
index 3abd13c1b24c3f..efa0be9f1796c0 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp
@@ -1549,13 +1549,18 @@ bool AMDGPUCodeGenPrepareImpl::canBreakPHINode(const PHINode &I) {
// node as user, we don't want to break this PHI either because it's unlikely
// to be beneficial. We would just explode the vector and reassemble it
// directly, wasting instructions.
+ //
+ // In the case where multiple users are PHI nodes, we want at least half of
+ // them to be breakable.
+ int Score = 0;
for (const Value *U : I.users()) {
- if (const auto *PU = dyn_cast<PHINode>(U)) {
- if (!canBreakPHINode(*PU))
- return false;
- }
+ if (const auto *PU = dyn_cast<PHINode>(U))
+ Score += canBreakPHINode(*PU) ? 1 : -1;
}
+ if (Score < 0)
+ return false;
+
return BreakPhiNodesCache[&I] = true;
}
diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-break-large-phis-heuristics.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-break-large-phis-heuristics.ll
index bae5eac347015b..64d82654a005f9 100644
--- a/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-break-large-phis-heuristics.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-break-large-phis-heuristics.ll
@@ -680,6 +680,83 @@ end:
ret void
}
+
+define amdgpu_kernel void @used_by_unbreakable_and_breakable_phi(<5 x double> %in, ptr %out, i1 %cond, i1 %cond2) {
+; CHECK-LABEL: @used_by_unbreakable_and_breakable_phi(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: br i1 [[COND:%.*]], label [[THEN:%.*]], label [[ELSE:%.*]]
+; CHECK: then:
+; CHECK-NEXT: [[X:%.*]] = insertelement <5 x double> [[IN:%.*]], double 3.140000e+00, i64 3
+; CHECK-NEXT: [[LARGEPHI_EXTRACTSLICE0:%.*]] = extractelement <5 x double> [[X]], i64 0
+; CHECK-NEXT: [[LARGEPHI_EXTRACTSLICE2:%.*]] = extractelement <5 x double> [[X]], i64 1
+; CHECK-NEXT: [[LARGEPHI_EXTRACTSLICE4:%.*]] = extractelement <5 x double> [[X]], i64 2
+; CHECK-NEXT: [[LARGEPHI_EXTRACTSLICE6:%.*]] = extractelement <5 x double> [[X]], i64 3
+; CHECK-NEXT: [[LARGEPHI_EXTRACTSLICE8:%.*]] = extractelement <5 x double> [[X]], i64 4
+; CHECK-NEXT: br label [[FINALLY:%.*]]
+; CHECK: else:
+; CHECK-NEXT: br label [[FINALLY]]
+; CHECK: finally:
+; CHECK-NEXT: [[TMP0:%.*]] = phi double [ [[LARGEPHI_EXTRACTSLICE0]], [[THEN]] ], [ 0.000000e+00, [[ELSE]] ]
+; CHECK-NEXT: [[TMP1:%.*]] = phi double [ [[LARGEPHI_EXTRACTSLICE2]], [[THEN]] ], [ 0.000000e+00, [[ELSE]] ]
+; CHECK-NEXT: [[TMP2:%.*]] = phi double [ [[LARGEPHI_EXTRACTSLICE4]], [[THEN]] ], [ 0.000000e+00, [[ELSE]] ]
+; CHECK-NEXT: [[TMP3:%.*]] = phi double [ [[LARGEPHI_EXTRACTSLICE6]], [[THEN]] ], [ 0.000000e+00, [[ELSE]] ]
+; CHECK-NEXT: [[TMP4:%.*]] = phi double [ [[LARGEPHI_EXTRACTSLICE8]], [[THEN]] ], [ 0.000000e+00, [[ELSE]] ]
+; CHECK-NEXT: [[LARGEPHI_INSERTSLICE0:%.*]] = insertelement <5 x double> poison, double [[TMP0]], i64 0
+; CHECK-NEXT: [[LARGEPHI_INSERTSLICE1:%.*]] = insertelement <5 x double> [[LARGEPHI_INSERTSLICE0]], double [[TMP1]], i64 1
+; CHECK-NEXT: [[LARGEPHI_INSERTSLICE2:%.*]] = insertelement <5 x double> [[LARGEPHI_INSERTSLICE1]], double [[TMP2]], i64 2
+; CHECK-NEXT: [[LARGEPHI_INSERTSLICE3:%.*]] = insertelement <5 x double> [[LARGEPHI_INSERTSLICE2]], double [[TMP3]], i64 3
+; CHECK-NEXT: [[LARGEPHI_INSERTSLICE4:%.*]] = insertelement <5 x double> [[LARGEPHI_INSERTSLICE3]], double [[TMP4]], i64 4
+; CHECK-NEXT: store <5 x double> [[LARGEPHI_INSERTSLICE4]], ptr [[OUT:%.*]], align 1
+; CHECK-NEXT: br i1 [[COND2:%.*]], label [[THEN1:%.*]], label [[END:%.*]]
+; CHECK: then1:
+; CHECK-NEXT: [[LARGEPHI_EXTRACTSLICE01:%.*]] = extractelement <5 x double> [[LARGEPHI_INSERTSLICE4]], i64 0
+; CHECK-NEXT: [[LARGEPHI_EXTRACTSLICE22:%.*]] = extractelement <5 x double> [[LARGEPHI_INSERTSLICE4]], i64 1
+; CHECK-NEXT: [[LARGEPHI_EXTRACTSLICE43:%.*]] = extractelement <5 x double> [[LARGEPHI_INSERTSLICE4]], i64 2
+; CHECK-NEXT: [[LARGEPHI_EXTRACTSLICE64:%.*]] = extractelement <5 x double> [[LARGEPHI_INSERTSLICE4]], i64 3
+; CHECK-NEXT: [[LARGEPHI_EXTRACTSLICE85:%.*]] = extractelement <5 x double> [[LARGEPHI_INSERTSLICE4]], i64 4
+; CHECK-NEXT: br label [[END]]
+; CHECK: end:
+; CHECK-NEXT: [[ENDVAL:%.*]] = phi <5 x double> [ [[LARGEPHI_INSERTSLICE4]], [[THEN1]] ], [ [[IN]], [[FINALLY]] ]
+; CHECK-NEXT: [[TMP5:%.*]] = phi double [ [[LARGEPHI_EXTRACTSLICE01]], [[THEN1]] ], [ 0.000000e+00, [[FINALLY]] ]
+; CHECK-NEXT: [[TMP6:%.*]] = phi double [ [[LARGEPHI_EXTRACTSLICE22]], [[THEN1]] ], [ 0.000000e+00, [[FINALLY]] ]
+; CHECK-NEXT: [[TMP7:%.*]] = phi double [ [[LARGEPHI_EXTRACTSLICE43]], [[THEN1]] ], [ 0.000000e+00, [[FINALLY]] ]
+; CHECK-NEXT: [[TMP8:%.*]] = phi double [ [[LARGEPHI_EXTRACTSLICE64]], [[THEN1]] ], [ 0.000000e+00, [[FINALLY]] ]
+; CHECK-NEXT: [[TMP9:%.*]] = phi double [ [[LARGEPHI_EXTRACTSLICE85]], [[THEN1]] ], [ 0.000000e+00, [[FINALLY]] ]
+; CHECK-NEXT: [[LARGEPHI_INSERTSLICE06:%.*]] = insertelement <5 x double> poison, double [[TMP5]], i64 0
+; CHECK-NEXT: [[LARGEPHI_INSERTSLICE17:%.*]] = insertelement <5 x double> [[LARGEPHI_INSERTSLICE06]], double [[TMP6]], i64 1
+; CHECK-NEXT: [[LARGEPHI_INSERTSLICE28:%.*]] = insertelement <5 x double> [[LARGEPHI_INSERTSLICE17]], double [[TMP7]], i64 2
+; CHECK-NEXT: [[LARGEPHI_INSERTSLICE39:%.*]] = insertelement <5 x double> [[LARGEPHI_INSERTSLICE28]], double [[TMP8]], i64 3
+; CHECK-NEXT: [[LARGEPHI_INSERTSLICE410:%.*]] = insertelement <5 x double> [[LARGEPHI_INSERTSLICE39]], double [[TMP9]], i64 4
+; CHECK-NEXT: store <5 x double> [[ENDVAL]], ptr [[OUT]], align 1
+; CHECK-NEXT: store <5 x double> [[LARGEPHI_INSERTSLICE410]], ptr [[OUT]], align 1
+; CHECK-NEXT: ret void
+;
+entry:
+ br i1 %cond, label %then, label %else
+
+then:
+ %x = insertelement <5 x double> %in, double 3.140000e+00, i64 3
+ br label %finally
+
+else:
+ br label %finally
+
+finally:
+ %val = phi <5 x double> [ %x, %then ], [ zeroinitializer, %else ]
+ store <5 x double> %val, ptr %out, align 1
+ br i1 %cond2, label %then1, label %end
+
+then1:
+ br label %end
+
+end:
+ %endval = phi <5 x double> [ %val, %then1 ], [ %in, %finally ]
+ %endval2 = phi <5 x double> [ %val, %then1 ], [ zeroinitializer, %finally ]
+ store <5 x double> %endval, ptr %out, align 1
+ store <5 x double> %endval2, ptr %out, align 1
+ ret void
+}
+
; check for infinite recursion
define amdgpu_kernel void @used_by_phi_self(<5 x double> %in, ptr %out, i8 %count) {
; CHECK-LABEL: @used_by_phi_self(
More information about the llvm-commits
mailing list