[PATCH] D82969: AMDGPU: Support commuting register and global operand
Matt Arsenault via Phabricator via llvm-commits
llvm-commits at lists.llvm.org
Wed Jul 1 08:06:22 PDT 2020
arsenm created this revision.
arsenm added reviewers: rampitec, kerbowa, scott.linder.
Herald added subscribers: hiraditya, t-tye, tpr, dstuttard, yaxunl, nhaehnle, wdng, jvesely, kzhuravl.
Herald added a project: LLVM.
https://reviews.llvm.org/D82969
Files:
llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
llvm/test/CodeGen/AMDGPU/lds-relocs.ll
llvm/test/CodeGen/AMDGPU/machine-cse-commute-target-flags.mir
Index: llvm/test/CodeGen/AMDGPU/machine-cse-commute-target-flags.mir
===================================================================
--- llvm/test/CodeGen/AMDGPU/machine-cse-commute-target-flags.mir
+++ llvm/test/CodeGen/AMDGPU/machine-cse-commute-target-flags.mir
@@ -5,7 +5,11 @@
define void @commute_instruction_subreg_target_flag() { ret void }
define void @commute_target_flag_frame_index() { ret void }
define void @commute_target_flag_global() { ret void }
+ define void @commute_target_flag_global_offset() { ret void }
+ define void @commute_target_flag_global_offset_mismatch() { ret void }
+
declare void @func()
+ @gv = external addrspace(1) global i32
...
@@ -64,11 +68,49 @@
; CHECK: liveins: $sgpr0_sgpr1
; CHECK: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
; CHECK: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY]].sub0, target-flags(amdgpu-rel32-lo) @func, implicit-def dead $scc
- ; CHECK: [[S_ADD_U32_1:%[0-9]+]]:sreg_32 = S_ADD_U32 target-flags(amdgpu-rel32-lo) @func, [[COPY]].sub0, implicit-def dead $scc
- ; CHECK: S_ENDPGM 0, implicit [[S_ADD_U32_]], implicit [[S_ADD_U32_1]]
+ ; CHECK: S_ENDPGM 0, implicit [[S_ADD_U32_]], implicit [[S_ADD_U32_]]
%0:sreg_64 = COPY $sgpr0_sgpr1
%1:sreg_32 = S_ADD_U32 %0.sub0, target-flags(amdgpu-rel32-lo) @func, implicit-def dead $scc
%2:sreg_32 = S_ADD_U32 target-flags(amdgpu-rel32-lo) @func, %0.sub0, implicit-def dead $scc
S_ENDPGM 0, implicit %1, implicit %2
...
+
+---
+name: commute_target_flag_global_offset
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $sgpr0_sgpr1
+
+ ; CHECK-LABEL: name: commute_target_flag_global_offset
+ ; CHECK: liveins: $sgpr0_sgpr1
+ ; CHECK: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
+ ; CHECK: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY]].sub0, target-flags(amdgpu-rel32-lo) @gv + 4, implicit-def dead $scc
+ ; CHECK: S_ENDPGM 0, implicit [[S_ADD_U32_]], implicit [[S_ADD_U32_]]
+ %0:sreg_64 = COPY $sgpr0_sgpr1
+ %1:sreg_32 = S_ADD_U32 %0.sub0, target-flags(amdgpu-rel32-lo) @gv + 4, implicit-def dead $scc
+ %2:sreg_32 = S_ADD_U32 target-flags(amdgpu-rel32-lo) @gv + 4, %0.sub0, implicit-def dead $scc
+ S_ENDPGM 0, implicit %1, implicit %2
+
+...
+
+---
+name: commute_target_flag_global_offset_mismatch
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $sgpr0_sgpr1
+
+ ; CHECK-LABEL: name: commute_target_flag_global_offset_mismatch
+ ; CHECK: liveins: $sgpr0_sgpr1
+ ; CHECK: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
+ ; CHECK: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY]].sub0, target-flags(amdgpu-rel32-lo) @gv + 4, implicit-def dead $scc
+ ; CHECK: [[S_ADD_U32_1:%[0-9]+]]:sreg_32 = S_ADD_U32 target-flags(amdgpu-rel32-lo) @gv + 8, [[COPY]].sub0, implicit-def dead $scc
+ ; CHECK: S_ENDPGM 0, implicit [[S_ADD_U32_]], implicit [[S_ADD_U32_1]]
+ %0:sreg_64 = COPY $sgpr0_sgpr1
+ %1:sreg_32 = S_ADD_U32 %0.sub0, target-flags(amdgpu-rel32-lo) @gv + 4, implicit-def dead $scc
+ %2:sreg_32 = S_ADD_U32 target-flags(amdgpu-rel32-lo) @gv + 8, %0.sub0, implicit-def dead $scc
+ S_ENDPGM 0, implicit %1, implicit %2
+
+...
Index: llvm/test/CodeGen/AMDGPU/lds-relocs.ll
===================================================================
--- llvm/test/CodeGen/AMDGPU/lds-relocs.ll
+++ llvm/test/CodeGen/AMDGPU/lds-relocs.ll
@@ -35,7 +35,7 @@
; GCN: v_mov_b32_e32 v1, lds.external at abs32@lo ; encoding: [0xff,0x02,0x02,0x7e,A,A,A,A]
; GCN-NEXT: ; fixup A - offset: 4, value: lds.external at abs32@lo, kind: FK_Data_4{{$}}
;
-; GCN: s_add_i32 s0, lds.defined at abs32@lo, s0 ; encoding: [0xff,0x00,0x00,0x81,A,A,A,A]
+; GCN: s_add_i32 s0, s0, lds.defined at abs32@lo ; encoding: [0x00,0xff,0x00,0x81,A,A,A,A]
; GCN-NEXT: ; fixup A - offset: 4, value: lds.defined at abs32@lo, kind: FK_Data_4{{$}}
;
; GCN: .globl lds.external
Index: llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
===================================================================
--- llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -1857,7 +1857,10 @@
RegOp.ChangeToImmediate(NonRegOp.getImm());
else if (NonRegOp.isFI())
RegOp.ChangeToFrameIndex(NonRegOp.getIndex());
- else
+ else if (NonRegOp.isGlobal()) {
+ RegOp.ChangeToGA(NonRegOp.getGlobal(), NonRegOp.getOffset(),
+ NonRegOp.getTargetFlags());
+ } else
return nullptr;
// Make sure we don't reinterpret a subreg index in the target flags.
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D82969.274816.patch
Type: text/x-patch
Size: 4548 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20200701/9a32be66/attachment.bin>
More information about the llvm-commits
mailing list