[llvm] a65b9dd - [AMDGPU] Divergence-driven instruction selection for bfm patterns
Jay Foad via llvm-commits
llvm-commits at lists.llvm.org
Tue Feb 15 02:51:32 PST 2022
Author: Jay Foad
Date: 2022-02-15T10:49:18Z
New Revision: a65b9dd049c785e23df63b0ee104df6beb80fbb8
URL: https://github.com/llvm/llvm-project/commit/a65b9dd049c785e23df63b0ee104df6beb80fbb8
DIFF: https://github.com/llvm/llvm-project/commit/a65b9dd049c785e23df63b0ee104df6beb80fbb8.diff
LOG: [AMDGPU] Divergence-driven instruction selection for bfm patterns
Differential Revision: https://reviews.llvm.org/D119706
Added:
Modified:
llvm/lib/Target/AMDGPU/SIInstructions.td
llvm/test/CodeGen/AMDGPU/bfm.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td
index 1edf17f0a1bb..6e6b44fbad7a 100644
--- a/llvm/lib/Target/AMDGPU/SIInstructions.td
+++ b/llvm/lib/Target/AMDGPU/SIInstructions.td
@@ -2784,20 +2784,21 @@ def : GCNPat<
(S_MOV_B32 SReg_32:$src)
>;
-multiclass BFMPatterns <ValueType vt, InstSI BFM, InstSI MOV> {
+multiclass BFMPatterns <ValueType vt, PatFrag SHL, PatFrag ADD, InstSI BFM> {
def : GCNPat <
- (vt (shl (vt (add (vt (shl 1, vt:$a)), -1)), vt:$b)),
+ (vt (SHL (vt (add (vt (shl 1, vt:$a)), -1)), vt:$b)),
(BFM $a, $b)
>;
def : GCNPat <
- (vt (add (vt (shl 1, vt:$a)), -1)),
- (BFM $a, (MOV (i32 0)))
+ (vt (ADD (vt (shl 1, vt:$a)), -1)),
+ (BFM $a, (i32 0))
>;
}
-defm : BFMPatterns <i32, S_BFM_B32, S_MOV_B32>;
-// FIXME: defm : BFMPatterns <i64, S_BFM_B64, S_MOV_B64>;
+defm : BFMPatterns <i32, UniformBinFrag<shl>, UniformBinFrag<add>, S_BFM_B32>;
+// FIXME: defm : BFMPatterns <i64, UniformBinFrag<shl>, UniformBinFrag<add>, S_BFM_B64>;
+defm : BFMPatterns <i32, DivergentBinFrag<shl>, DivergentBinFrag<add>, V_BFM_B32_e64>;
// Bitfield extract patterns
diff --git a/llvm/test/CodeGen/AMDGPU/bfm.ll b/llvm/test/CodeGen/AMDGPU/bfm.ll
index 06e03c0a934a..33dd90ef2d12 100644
--- a/llvm/test/CodeGen/AMDGPU/bfm.ll
+++ b/llvm/test/CodeGen/AMDGPU/bfm.ll
@@ -1,10 +1,31 @@
-; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck --check-prefixes=SI,FUNC %s
-; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck --check-prefixes=SI,FUNC %s
-; RUN: llc -march=r600 -mcpu=redwood -verify-machineinstrs < %s | FileCheck -check-prefix=FUNC %s
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck --check-prefix=SI %s
+; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck --check-prefix=VI %s
-; FUNC-LABEL: {{^}}bfm_pattern:
-; SI: s_bfm_b32 {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}
-define amdgpu_kernel void @bfm_pattern(i32 addrspace(1)* %out, i32 %x, i32 %y) #0 {
+define amdgpu_kernel void @s_bfm_pattern(i32 addrspace(1)* %out, i32 %x, i32 %y) #0 {
+; SI-LABEL: s_bfm_pattern:
+; SI: ; %bb.0:
+; SI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb
+; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SI-NEXT: s_mov_b32 s3, 0xf000
+; SI-NEXT: s_waitcnt lgkmcnt(0)
+; SI-NEXT: s_bfm_b32 s4, s4, s5
+; SI-NEXT: s_mov_b32 s2, -1
+; SI-NEXT: v_mov_b32_e32 v0, s4
+; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0
+; SI-NEXT: s_endpgm
+;
+; VI-LABEL: s_bfm_pattern:
+; VI: ; %bb.0:
+; VI-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x2c
+; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
+; VI-NEXT: s_waitcnt lgkmcnt(0)
+; VI-NEXT: s_bfm_b32 s2, s2, s3
+; VI-NEXT: v_mov_b32_e32 v0, s0
+; VI-NEXT: v_mov_b32_e32 v1, s1
+; VI-NEXT: v_mov_b32_e32 v2, s2
+; VI-NEXT: flat_store_dword v[0:1], v2
+; VI-NEXT: s_endpgm
%a = shl i32 1, %x
%b = sub i32 %a, 1
%c = shl i32 %b, %y
@@ -12,9 +33,83 @@ define amdgpu_kernel void @bfm_pattern(i32 addrspace(1)* %out, i32 %x, i32 %y) #
ret void
}
-; FUNC-LABEL: {{^}}bfm_pattern_simple:
-; SI: s_bfm_b32 {{s[0-9]+}}, {{s[0-9]+}}, 0
-define amdgpu_kernel void @bfm_pattern_simple(i32 addrspace(1)* %out, i32 %x) #0 {
+define amdgpu_kernel void @s_bfm_pattern_simple(i32 addrspace(1)* %out, i32 %x) #0 {
+; SI-LABEL: s_bfm_pattern_simple:
+; SI: ; %bb.0:
+; SI-NEXT: s_load_dword s2, s[0:1], 0xb
+; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SI-NEXT: s_mov_b32 s3, 0xf000
+; SI-NEXT: s_waitcnt lgkmcnt(0)
+; SI-NEXT: s_bfm_b32 s4, s2, 0
+; SI-NEXT: s_mov_b32 s2, -1
+; SI-NEXT: v_mov_b32_e32 v0, s4
+; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0
+; SI-NEXT: s_endpgm
+;
+; VI-LABEL: s_bfm_pattern_simple:
+; VI: ; %bb.0:
+; VI-NEXT: s_load_dword s2, s[0:1], 0x2c
+; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
+; VI-NEXT: s_waitcnt lgkmcnt(0)
+; VI-NEXT: s_bfm_b32 s2, s2, 0
+; VI-NEXT: v_mov_b32_e32 v0, s0
+; VI-NEXT: v_mov_b32_e32 v1, s1
+; VI-NEXT: v_mov_b32_e32 v2, s2
+; VI-NEXT: flat_store_dword v[0:1], v2
+; VI-NEXT: s_endpgm
+ %a = shl i32 1, %x
+ %b = sub i32 %a, 1
+ store i32 %b, i32 addrspace(1)* %out
+ ret void
+}
+
+define void @v_bfm_pattern(i32 addrspace(1)* %out, i32 %x, i32 %y) #0 {
+; SI-LABEL: v_bfm_pattern:
+; SI: ; %bb.0:
+; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-NEXT: s_mov_b32 s7, 0xf000
+; SI-NEXT: s_mov_b32 s6, 0
+; SI-NEXT: v_bfm_b32_e32 v2, v2, v3
+; SI-NEXT: s_mov_b32 s4, s6
+; SI-NEXT: s_mov_b32 s5, s6
+; SI-NEXT: buffer_store_dword v2, v[0:1], s[4:7], 0 addr64
+; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0)
+; SI-NEXT: s_setpc_b64 s[30:31]
+;
+; VI-LABEL: v_bfm_pattern:
+; VI: ; %bb.0:
+; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-NEXT: v_bfm_b32 v2, v2, v3
+; VI-NEXT: flat_store_dword v[0:1], v2
+; VI-NEXT: s_waitcnt vmcnt(0)
+; VI-NEXT: s_setpc_b64 s[30:31]
+ %a = shl i32 1, %x
+ %b = sub i32 %a, 1
+ %c = shl i32 %b, %y
+ store i32 %c, i32 addrspace(1)* %out
+ ret void
+}
+
+define void @v_bfm_pattern_simple(i32 addrspace(1)* %out, i32 %x) #0 {
+; SI-LABEL: v_bfm_pattern_simple:
+; SI: ; %bb.0:
+; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-NEXT: s_mov_b32 s7, 0xf000
+; SI-NEXT: s_mov_b32 s6, 0
+; SI-NEXT: v_bfm_b32_e64 v2, v2, 0
+; SI-NEXT: s_mov_b32 s4, s6
+; SI-NEXT: s_mov_b32 s5, s6
+; SI-NEXT: buffer_store_dword v2, v[0:1], s[4:7], 0 addr64
+; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0)
+; SI-NEXT: s_setpc_b64 s[30:31]
+;
+; VI-LABEL: v_bfm_pattern_simple:
+; VI: ; %bb.0:
+; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-NEXT: v_bfm_b32 v2, v2, 0
+; VI-NEXT: flat_store_dword v[0:1], v2
+; VI-NEXT: s_waitcnt vmcnt(0)
+; VI-NEXT: s_setpc_b64 s[30:31]
%a = shl i32 1, %x
%b = sub i32 %a, 1
store i32 %b, i32 addrspace(1)* %out
More information about the llvm-commits
mailing list