[llvm] [AArch64][GISel] Separate legalize actions for G_FREEZE from G_IMPLICIT_DEF (PR #88469)
via llvm-commits
llvm-commits at lists.llvm.org
Thu Apr 11 20:39:09 PDT 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-aarch64
Author: Dhruv Chawla (dc03-work)
<details>
<summary>Changes</summary>
It does not make sense to scalarize G_FREEZE as it leads to the generation
of pairs of G_UNMERGE_VALUES and G_BUILD_VECTORs which are difficult to
optimize especially when operations like G_TRUNC operate before G_FREEZE
but after G_UNMERGE_VALUES.
Instead, it is better to legalize G_FREEZE like any other vector type
would be, as it gets lowered to a COPY during instruction selection
anyways.
This is an issue that was encountered when looking at the TSVC
benchmark, where the legalization of G_FREEZE would cause generation of
unnecessary MOVs that adversely affected the performance.
---
Full diff: https://github.com/llvm/llvm-project/pull/88469.diff
3 Files Affected:
- (modified) llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp (+13-2)
- (modified) llvm/test/CodeGen/AArch64/GlobalISel/legalize-freeze.mir (+90)
- (modified) llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir (+2-3)
``````````diff
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
index 96ded69905f7cc..bae999c8fd61a7 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
@@ -87,8 +87,7 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
const bool HasCSSC = ST.hasCSSC();
const bool HasRCPC3 = ST.hasRCPC3();
- getActionDefinitionsBuilder(
- {G_IMPLICIT_DEF, G_FREEZE, G_CONSTANT_FOLD_BARRIER})
+ getActionDefinitionsBuilder({G_IMPLICIT_DEF, G_CONSTANT_FOLD_BARRIER})
.legalFor({p0, s8, s16, s32, s64})
.legalFor(PackedVectorAllTypeList)
.widenScalarToNextPow2(0)
@@ -106,6 +105,18 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
return std::make_pair(0, EltTy);
});
+ getActionDefinitionsBuilder(G_FREEZE)
+ .legalFor({p0, s8, s16, s32, s64})
+ .legalFor(PackedVectorAllTypeList)
+ .widenScalarToNextPow2(0)
+ .clampScalar(0, s8, s64)
+ .moreElementsToNextPow2(0)
+ .widenVectorEltsToVectorMinSize(0, 64)
+ .clampNumElements(0, v8s8, v16s8)
+ .clampNumElements(0, v4s16, v8s16)
+ .clampNumElements(0, v2s32, v4s32)
+ .clampNumElements(0, v2s64, v2s64);
+
getActionDefinitionsBuilder(G_PHI)
.legalFor({p0, s16, s32, s64})
.legalFor(PackedVectorAllTypeList)
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-freeze.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-freeze.mir
index be674d79b54f1d..0f9086fd8972fa 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-freeze.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-freeze.mir
@@ -130,3 +130,93 @@ body: |
%freeze:_(s2) = G_FREEZE %x
%ext:_(s64) = G_ZEXT %freeze
$x0 = COPY %ext(s64)
+...
+---
+name: test_freeze_v4s1
+body: |
+ bb.0.entry:
+ liveins: $q0
+ ; CHECK-LABEL: name: test_freeze_v4s1
+ ; CHECK: liveins: $q0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s8) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s8>) = G_BUILD_VECTOR [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8)
+ ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(<8 x s16>) = G_ANYEXT [[BUILD_VECTOR]](<8 x s8>)
+ ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<4 x s16>), [[UV1:%[0-9]+]]:_(<4 x s16>) = G_UNMERGE_VALUES [[ANYEXT]](<8 x s16>)
+ ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(<4 x s16>) = G_FREEZE [[UV]]
+ ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(<4 x s32>) = G_ANYEXT [[FREEZE]](<4 x s16>)
+ ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+ ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32)
+ ; CHECK-NEXT: %ext:_(<4 x s32>) = G_AND [[ANYEXT1]], [[BUILD_VECTOR1]]
+ ; CHECK-NEXT: $q0 = COPY %ext(<4 x s32>)
+ %x:_(<4 x s1>) = G_IMPLICIT_DEF
+ %freeze:_(<4 x s1>) = G_FREEZE %x
+ %ext:_(<4 x s32>) = G_ZEXT %freeze
+ $q0 = COPY %ext(<4 x s32>)
+...
+---
+name: test_freeze_v3s8
+body: |
+ bb.0.entry:
+ liveins: $q0
+ ; CHECK-LABEL: name: test_freeze_v3s8
+ ; CHECK: liveins: $q0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s8) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s8>) = G_BUILD_VECTOR [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8)
+ ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(<8 x s16>) = G_ANYEXT [[BUILD_VECTOR]](<8 x s8>)
+ ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<4 x s16>), [[UV1:%[0-9]+]]:_(<4 x s16>) = G_UNMERGE_VALUES [[ANYEXT]](<8 x s16>)
+ ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(<4 x s16>) = G_FREEZE [[UV]]
+ ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16), [[UV4:%[0-9]+]]:_(s16), [[UV5:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[FREEZE]](<4 x s16>)
+ ; CHECK-NEXT: %undef:_(s32) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[UV2]](s16)
+ ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+ ; CHECK-NEXT: %ext0:_(s32) = G_AND [[ANYEXT1]], [[C]]
+ ; CHECK-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[UV3]](s16)
+ ; CHECK-NEXT: %ext1:_(s32) = G_AND [[ANYEXT2]], [[C]]
+ ; CHECK-NEXT: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[UV4]](s16)
+ ; CHECK-NEXT: %ext2:_(s32) = G_AND [[ANYEXT3]], [[C]]
+ ; CHECK-NEXT: %res:_(<4 x s32>) = G_BUILD_VECTOR %ext0(s32), %ext1(s32), %ext2(s32), %undef(s32)
+ ; CHECK-NEXT: $q0 = COPY %res(<4 x s32>)
+ %x:_(<3 x s8>) = G_IMPLICIT_DEF
+ %freeze:_(<3 x s8>) = G_FREEZE %x
+ %ext:_(<3 x s32>) = G_ZEXT %freeze
+ %undef:_(s32) = G_IMPLICIT_DEF
+ %ext0:_(s32), %ext1:_(s32), %ext2:_(s32) = G_UNMERGE_VALUES %ext
+ %res:_(<4 x s32>) = G_BUILD_VECTOR %ext0, %ext1, %ext2, %undef
+ $q0 = COPY %res(<4 x s32>)
+...
+---
+name: test_freeze_v4s1_select
+body: |
+ bb.0.entry:
+ liveins: $q0, $q1
+ ; CHECK-LABEL: name: test_freeze_v4s1_select
+ ; CHECK: liveins: $q0, $q1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $q1
+ ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32)
+ ; CHECK-NEXT: [[FCMP:%[0-9]+]]:_(<4 x s32>) = nnan ninf nsz arcp contract afn reassoc G_FCMP floatpred(olt), [[COPY]](<4 x s32>), [[BUILD_VECTOR]]
+ ; CHECK-NEXT: [[FCMP1:%[0-9]+]]:_(<4 x s32>) = nnan ninf nsz arcp contract afn reassoc G_FCMP floatpred(ogt), [[COPY1]](<4 x s32>), [[COPY]]
+ ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(<4 x s16>) = G_TRUNC [[FCMP1]](<4 x s32>)
+ ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(<4 x s16>) = G_FREEZE [[TRUNC]]
+ ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(<4 x s16>) = G_TRUNC [[FCMP]](<4 x s32>)
+ ; CHECK-NEXT: [[AND:%[0-9]+]]:_(<4 x s16>) = G_AND [[TRUNC1]], [[FREEZE]]
+ ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(<4 x s32>) = G_ANYEXT [[AND]](<4 x s16>)
+ ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+ ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[C1]](s32), [[C1]](s32), [[C1]](s32), [[C1]](s32)
+ ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(<4 x s32>) = G_AND [[ANYEXT]], [[BUILD_VECTOR1]]
+ ; CHECK-NEXT: $q0 = COPY [[AND1]](<4 x s32>)
+ %1:_(<4 x s32>) = COPY $q0
+ %2:_(<4 x s32>) = COPY $q1
+ %3:_(s32) = G_CONSTANT i32 0
+ %4:_(<4 x s32>) = G_BUILD_VECTOR %3, %3, %3, %3
+ %5:_(s1) = G_CONSTANT i1 false
+ %6:_(<4 x s1>) = nnan ninf nsz arcp contract afn reassoc G_FCMP floatpred(olt), %1:_(<4 x s32>), %4:_
+ %7:_(<4 x s1>) = nnan ninf nsz arcp contract afn reassoc G_FCMP floatpred(ogt), %2:_(<4 x s32>), %1:_
+ %8:_(<4 x s1>) = G_FREEZE %7
+ %9:_(<4 x s1>) = G_AND %6, %8
+ %10:_(<4 x s32>) = G_ZEXT %9
+ $q0 = COPY %10
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir
index 200e9d19d58d25..c82052154a147f 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir
@@ -131,9 +131,8 @@
# DEBUG-NEXT: .. imm index coverage check SKIPPED: user-defined predicate detected
#
# DEBUG-NEXT: G_FREEZE (opcode {{[0-9]+}}): 1 type index, 0 imm indices
-# DEBUG-NEXT: .. opcode {{[0-9]+}} is aliased to {{[0-9]+}}
-# DEBUG-NEXT: .. type index coverage check SKIPPED: user-defined predicate detected
-# DEBUG-NEXT: .. imm index coverage check SKIPPED: user-defined predicate detected
+# DEBUG-NEXT: .. the first uncovered type index: {{[0-9]+}}, OK
+# DEBUG-NEXT: .. the first uncovered imm index: {{[0-9]+}}, OK
# DEBUG-NEXT: G_CONSTANT_FOLD_BARRIER (opcode {{[0-9]+}}): 1 type index, 0 imm indices
# DEBUG-NEXT: .. opcode {{[0-9]+}} is aliased to {{[0-9]+}}
``````````
</details>
https://github.com/llvm/llvm-project/pull/88469
More information about the llvm-commits
mailing list