[llvm] [GlobalISel] Combine [a, s, z]ext of undef into 0 or undef (PR #117439)
Thorsten Schütt via llvm-commits
llvm-commits at lists.llvm.org
Sat Nov 23 07:28:36 PST 2024
https://github.com/tschuett created https://github.com/llvm/llvm-project/pull/117439
Alternative for https://github.com/llvm/llvm-project/pull/113764
It builds on a minimalistic approach with the legality check in match and a blind apply. The precise patterns are used for better compile-time and modularity. It also moves the pattern check into combiner. While unary_undef_to_zero and propagate_undef_any_op rely on custom C++ code for pattern matching.
Is there a limit on the number of patterns?
G_ANYEXT of undef -> undef
G_SEXT of undef -> 0
G_ZEXT of undef -> 0
The combine is not a member of the post legalizer combiner for AArch64.
Test:
llvm/test/CodeGen/AArch64/GlobalISel/combine-cast.mir
>From 38923158c65e9114e7f7cb1c40a176ccea6a9612 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Thorsten=20Sch=C3=BCtt?= <schuett at gmail.com>
Date: Sat, 23 Nov 2024 16:27:05 +0100
Subject: [PATCH] [GlobalISel] Combine [a,s,z]ext of undef into 0 or undef
Alternative for https://github.com/llvm/llvm-project/pull/113764
It builds on a minimalistic approach with the legality check in match
and a blind apply. The precise patterns are used for better
compile-time and modularity. It also moves the pattern check into
combiner. While unary_undef_to_zero and propagate_undef_any_op rely on
custom C++ code for pattern matching.
Is there a limit on the number of patterns?
G_ANYEXT of undef -> undef
G_SEXT of undef -> 0
G_ZEXT of undef -> 0
The combine is not a member of the post legalizer combiner for AArch64.
Test:
llvm/test/CodeGen/AArch64/GlobalISel/combine-cast.mir
---
.../llvm/CodeGen/GlobalISel/CombinerHelper.h | 4 ++
.../include/llvm/Target/GlobalISel/Combine.td | 28 +++++++++-
.../lib/CodeGen/GlobalISel/CombinerHelper.cpp | 4 ++
.../AArch64/GlobalISel/combine-cast.mir | 52 +++++++++++++++++++
.../CodeGen/AArch64/extract-vector-elt.ll | 15 ++----
.../combine-amdgpu-cvt-f32-ubyte.mir | 6 +--
.../CodeGen/AMDGPU/shrink-add-sub-constant.ll | 7 +--
7 files changed, 93 insertions(+), 23 deletions(-)
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
index 55c3b72c8e027f..6662c1055aa17a 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
@@ -150,6 +150,10 @@ class CombinerHelper {
/// is a legal integer constant type on the target.
bool isConstantLegalOrBeforeLegalizer(const LLT Ty) const;
+ /// \return true if the combine is running prior to legalization, or if \p Ty
+ /// is a legal undef type on the target.
+ bool isUndefLegalOrBeforeLegalizer(const LLT Ty) const;
+
/// MachineRegisterInfo::replaceRegWith() and inform the observer of the changes
void replaceRegWith(MachineRegisterInfo &MRI, Register FromReg, Register ToReg) const;
diff --git a/llvm/include/llvm/Target/GlobalISel/Combine.td b/llvm/include/llvm/Target/GlobalISel/Combine.td
index b0c63fc7c7b806..fee695c4333d99 100644
--- a/llvm/include/llvm/Target/GlobalISel/Combine.td
+++ b/llvm/include/llvm/Target/GlobalISel/Combine.td
@@ -428,7 +428,7 @@ def unary_undef_to_zero: GICombineRule<
// replaced with undef.
def propagate_undef_any_op: GICombineRule<
(defs root:$root),
- (match (wip_match_opcode G_ADD, G_FPTOSI, G_FPTOUI, G_SUB, G_XOR, G_TRUNC, G_BITCAST, G_ANYEXT):$root,
+ (match (wip_match_opcode G_ADD, G_FPTOSI, G_FPTOUI, G_SUB, G_XOR, G_TRUNC, G_BITCAST):$root,
[{ return Helper.matchAnyExplicitUseIsUndef(*${root}); }]),
(apply [{ Helper.replaceInstWithUndef(*${root}); }])>;
@@ -1857,6 +1857,27 @@ class integer_of_opcode<Instruction castOpcode> : GICombineRule <
def integer_of_truncate : integer_of_opcode<G_TRUNC>;
+def anyext_undef: GICombineRule<
+ (defs root:$root),
+ (match (G_IMPLICIT_DEF $undef),
+ (G_ANYEXT $root, $undef):$Aext,
+ [{ return Helper.isUndefLegalOrBeforeLegalizer(MRI.getType(${Aext}->getOperand(0).getReg())); }]),
+ (apply [{ Helper.replaceInstWithUndef(*${Aext}); }])>;
+
+def zext_undef: GICombineRule<
+ (defs root:$root),
+ (match (G_IMPLICIT_DEF $undef),
+ (G_ZEXT $root, $undef):$Zext,
+ [{ return Helper.isConstantLegalOrBeforeLegalizer(MRI.getType(${Zext}->getOperand(0).getReg())); }]),
+ (apply [{ Helper.replaceInstWithConstant(*${Zext}, 0); }])>;
+
+def sext_undef: GICombineRule<
+ (defs root:$root),
+ (match (G_IMPLICIT_DEF $undef),
+ (G_SEXT $root, $undef):$Sext,
+ [{ return Helper.isConstantLegalOrBeforeLegalizer(MRI.getType(${Sext}->getOperand(0).getReg())); }]),
+ (apply [{ Helper.replaceInstWithConstant(*${Sext}, 0); }])>;
+
def cast_of_cast_combines: GICombineGroup<[
truncate_of_zext,
truncate_of_sext,
@@ -1882,7 +1903,10 @@ def cast_combines: GICombineGroup<[
narrow_binop_and,
narrow_binop_or,
narrow_binop_xor,
- integer_of_truncate
+ integer_of_truncate,
+ anyext_undef,
+ sext_undef,
+ zext_undef
]>;
def canonicalize_icmp : GICombineRule<
diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
index d95fc8cfbcf558..29074103115f59 100644
--- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
@@ -171,6 +171,10 @@ bool CombinerHelper::isConstantLegalOrBeforeLegalizer(const LLT Ty) const {
isLegal({TargetOpcode::G_CONSTANT, {EltTy}});
}
+bool CombinerHelper::isUndefLegalOrBeforeLegalizer(const LLT Ty) const {
+ return isPreLegalize() || isLegal({TargetOpcode::G_IMPLICIT_DEF, {Ty}});
+}
+
void CombinerHelper::replaceRegWith(MachineRegisterInfo &MRI, Register FromReg,
Register ToReg) const {
Observer.changingAllUsesOfReg(MRI, FromReg);
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/combine-cast.mir b/llvm/test/CodeGen/AArch64/GlobalISel/combine-cast.mir
index b045deebc56e03..25161652dafac4 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/combine-cast.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/combine-cast.mir
@@ -217,3 +217,55 @@ body: |
%large:_(<2 x s64>) = G_ANYEXT %bv(<2 x s32>)
$q0 = COPY %large(<2 x s64>)
$d0 = COPY %bv(<2 x s32>)
+...
+---
+name: test_combine_anyext_undef
+legalized: true
+body: |
+ bb.1:
+ ; CHECK-PRE-LABEL: name: test_combine_anyext_undef
+ ; CHECK-PRE: %aext:_(s64) = G_IMPLICIT_DEF
+ ; CHECK-PRE-NEXT: $x0 = COPY %aext(s64)
+ ;
+ ; CHECK-POST-LABEL: name: test_combine_anyext_undef
+ ; CHECK-POST: %undef:_(s32) = G_IMPLICIT_DEF
+ ; CHECK-POST-NEXT: %aext:_(s64) = G_ANYEXT %undef(s32)
+ ; CHECK-POST-NEXT: $x0 = COPY %aext(s64)
+ %undef:_(s32) = G_IMPLICIT_DEF
+ %aext:_(s64) = G_ANYEXT %undef(s32)
+ $x0 = COPY %aext(s64)
+...
+---
+name: test_combine_sext_undef
+legalized: true
+body: |
+ bb.1:
+ ; CHECK-PRE-LABEL: name: test_combine_sext_undef
+ ; CHECK-PRE: %sext:_(s64) = G_CONSTANT i64 0
+ ; CHECK-PRE-NEXT: $x0 = COPY %sext(s64)
+ ;
+ ; CHECK-POST-LABEL: name: test_combine_sext_undef
+ ; CHECK-POST: %undef:_(s32) = G_IMPLICIT_DEF
+ ; CHECK-POST-NEXT: %sext:_(s64) = G_SEXT %undef(s32)
+ ; CHECK-POST-NEXT: $x0 = COPY %sext(s64)
+ %undef:_(s32) = G_IMPLICIT_DEF
+ %sext:_(s64) = G_SEXT %undef(s32)
+ $x0 = COPY %sext(s64)
+...
+---
+name: test_combine_zext_undef
+legalized: true
+body: |
+ bb.1:
+ ; CHECK-PRE-LABEL: name: test_combine_zext_undef
+ ; CHECK-PRE: %zext:_(s64) = G_CONSTANT i64 0
+ ; CHECK-PRE-NEXT: $x0 = COPY %zext(s64)
+ ;
+ ; CHECK-POST-LABEL: name: test_combine_zext_undef
+ ; CHECK-POST: %undef:_(s32) = G_IMPLICIT_DEF
+ ; CHECK-POST-NEXT: %zext:_(s64) = G_ZEXT %undef(s32)
+ ; CHECK-POST-NEXT: $x0 = COPY %zext(s64)
+ %undef:_(s32) = G_IMPLICIT_DEF
+ %zext:_(s64) = G_ZEXT %undef(s32)
+ $x0 = COPY %zext(s64)
+...
diff --git a/llvm/test/CodeGen/AArch64/extract-vector-elt.ll b/llvm/test/CodeGen/AArch64/extract-vector-elt.ll
index 5e5fdd6d317057..e89e1516fb1f54 100644
--- a/llvm/test/CodeGen/AArch64/extract-vector-elt.ll
+++ b/llvm/test/CodeGen/AArch64/extract-vector-elt.ll
@@ -8,17 +8,10 @@
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for extract_v4i32_vector_extract_const
define i64 @extract_v2i64_undef_index(<2 x i64> %a, i32 %c) {
-; CHECK-SD-LABEL: extract_v2i64_undef_index:
-; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: fmov x0, d0
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GI-LABEL: extract_v2i64_undef_index:
-; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: str q0, [sp, #-16]!
-; CHECK-GI-NEXT: .cfi_def_cfa_offset 16
-; CHECK-GI-NEXT: ldr x0, [sp], #16
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: extract_v2i64_undef_index:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: fmov x0, d0
+; CHECK-NEXT: ret
entry:
%d = extractelement <2 x i64> %a, i32 undef
ret i64 %d
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-amdgpu-cvt-f32-ubyte.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-amdgpu-cvt-f32-ubyte.mir
index 7893bfa1d38f08..9b39afd32ac378 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-amdgpu-cvt-f32-ubyte.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-amdgpu-cvt-f32-ubyte.mir
@@ -261,8 +261,7 @@ body: |
; CHECK-LABEL: name: cvt_f32_ubyte0_zext_lshr_16
; CHECK: liveins: $vgpr0
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: %shift:_(s16) = G_IMPLICIT_DEF
- ; CHECK-NEXT: %zext:_(s32) = G_ZEXT %shift(s16)
+ ; CHECK-NEXT: %zext:_(s32) = G_CONSTANT i32 0
; CHECK-NEXT: %result:_(s32) = G_AMDGPU_CVT_F32_UBYTE0 %zext
; CHECK-NEXT: $vgpr0 = COPY %result(s32)
%arg:_(s32) = COPY $vgpr0
@@ -284,8 +283,7 @@ body: |
; CHECK-LABEL: name: cvt_f32_ubyte0_zext_lshr_24
; CHECK: liveins: $vgpr0
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: %shift:_(s16) = G_IMPLICIT_DEF
- ; CHECK-NEXT: %zext:_(s32) = G_ZEXT %shift(s16)
+ ; CHECK-NEXT: %zext:_(s32) = G_CONSTANT i32 0
; CHECK-NEXT: %result:_(s32) = G_AMDGPU_CVT_F32_UBYTE0 %zext
; CHECK-NEXT: $vgpr0 = COPY %result(s32)
%arg:_(s32) = COPY $vgpr0
diff --git a/llvm/test/CodeGen/AMDGPU/shrink-add-sub-constant.ll b/llvm/test/CodeGen/AMDGPU/shrink-add-sub-constant.ll
index a1a466fb04440d..384a2c63122b85 100644
--- a/llvm/test/CodeGen/AMDGPU/shrink-add-sub-constant.ll
+++ b/llvm/test/CodeGen/AMDGPU/shrink-add-sub-constant.ll
@@ -4074,14 +4074,12 @@ define amdgpu_kernel void @v_test_v2i16_x_add_undef_neg32(ptr addrspace(1) %out,
; VI-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
; VI-GISEL-NEXT: flat_load_dword v3, v[0:1]
; VI-GISEL-NEXT: v_mov_b32_e32 v0, s0
+; VI-GISEL-NEXT: v_mov_b32_e32 v1, s1
; VI-GISEL-NEXT: v_add_u32_e32 v0, vcc, v0, v2
; VI-GISEL-NEXT: v_not_b32_e32 v2, 31
-; VI-GISEL-NEXT: v_mov_b32_e32 v1, s1
-; VI-GISEL-NEXT: s_and_b32 s0, 0xffff, s0
; VI-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
; VI-GISEL-NEXT: s_waitcnt vmcnt(0)
; VI-GISEL-NEXT: v_add_u16_sdwa v2, v3, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
-; VI-GISEL-NEXT: v_or_b32_e32 v2, s0, v2
; VI-GISEL-NEXT: flat_store_dword v[0:1], v2
; VI-GISEL-NEXT: s_endpgm
;
@@ -4191,15 +4189,12 @@ define amdgpu_kernel void @v_test_v2i16_x_add_neg32_undef(ptr addrspace(1) %out,
; VI-GISEL-NEXT: v_add_u32_e32 v0, vcc, v0, v2
; VI-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
; VI-GISEL-NEXT: flat_load_dword v3, v[0:1]
-; VI-GISEL-NEXT: s_and_b32 s2, 0xffff, s0
; VI-GISEL-NEXT: v_mov_b32_e32 v0, s0
; VI-GISEL-NEXT: v_mov_b32_e32 v1, s1
; VI-GISEL-NEXT: v_add_u32_e32 v0, vcc, v0, v2
-; VI-GISEL-NEXT: s_lshl_b32 s0, s2, 16
; VI-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
; VI-GISEL-NEXT: s_waitcnt vmcnt(0)
; VI-GISEL-NEXT: v_add_u16_e32 v2, 0xffe0, v3
-; VI-GISEL-NEXT: v_or_b32_e32 v2, s0, v2
; VI-GISEL-NEXT: flat_store_dword v[0:1], v2
; VI-GISEL-NEXT: s_endpgm
;
More information about the llvm-commits
mailing list