[llvm] 14fe460 - AMDGPU: Support commuting register and global operand

Matt Arsenault via llvm-commits llvm-commits at lists.llvm.org
Wed Jul 1 10:59:24 PDT 2020


Author: Matt Arsenault
Date: 2020-07-01T13:59:13-04:00
New Revision: 14fe4607f15ecfc3e9423ddb35d025d1f1b67be5

URL: https://github.com/llvm/llvm-project/commit/14fe4607f15ecfc3e9423ddb35d025d1f1b67be5
DIFF: https://github.com/llvm/llvm-project/commit/14fe4607f15ecfc3e9423ddb35d025d1f1b67be5.diff

LOG: AMDGPU: Support commuting register and global operand

Added: 
    

Modified: 
    llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
    llvm/test/CodeGen/AMDGPU/lds-relocs.ll
    llvm/test/CodeGen/AMDGPU/machine-cse-commute-target-flags.mir

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index efdfca35da29..d7cd624484fc 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -1857,7 +1857,10 @@ static MachineInstr *swapRegAndNonRegOperand(MachineInstr &MI,
     RegOp.ChangeToImmediate(NonRegOp.getImm());
   else if (NonRegOp.isFI())
     RegOp.ChangeToFrameIndex(NonRegOp.getIndex());
-  else
+  else if (NonRegOp.isGlobal()) {
+    RegOp.ChangeToGA(NonRegOp.getGlobal(), NonRegOp.getOffset(),
+                     NonRegOp.getTargetFlags());
+  } else
     return nullptr;
 
   // Make sure we don't reinterpret a subreg index in the target flags.

diff  --git a/llvm/test/CodeGen/AMDGPU/lds-relocs.ll b/llvm/test/CodeGen/AMDGPU/lds-relocs.ll
index dd6bb1f0db2a..cf88c235e037 100644
--- a/llvm/test/CodeGen/AMDGPU/lds-relocs.ll
+++ b/llvm/test/CodeGen/AMDGPU/lds-relocs.ll
@@ -35,7 +35,7 @@
 ; GCN: v_mov_b32_e32 v1, lds.external at abs32@lo ; encoding: [0xff,0x02,0x02,0x7e,A,A,A,A]
 ; GCN-NEXT:              ; fixup A - offset: 4, value: lds.external at abs32@lo, kind: FK_Data_4{{$}}
 ;
-; GCN: s_add_i32 s0, lds.defined at abs32@lo, s0 ; encoding: [0xff,0x00,0x00,0x81,A,A,A,A]
+; GCN: s_add_i32 s0, s0, lds.defined at abs32@lo ; encoding: [0x00,0xff,0x00,0x81,A,A,A,A]
 ; GCN-NEXT:          ; fixup A - offset: 4, value: lds.defined at abs32@lo, kind: FK_Data_4{{$}}
 ;
 ; GCN: .globl lds.external

diff  --git a/llvm/test/CodeGen/AMDGPU/machine-cse-commute-target-flags.mir b/llvm/test/CodeGen/AMDGPU/machine-cse-commute-target-flags.mir
index 22ba87b9572d..a6e6eeb3fc31 100644
--- a/llvm/test/CodeGen/AMDGPU/machine-cse-commute-target-flags.mir
+++ b/llvm/test/CodeGen/AMDGPU/machine-cse-commute-target-flags.mir
@@ -5,7 +5,11 @@
   define void @commute_instruction_subreg_target_flag() { ret void }
   define void @commute_target_flag_frame_index() { ret void }
   define void @commute_target_flag_global() { ret void }
+  define void @commute_target_flag_global_offset() { ret void }
+  define void @commute_target_flag_global_offset_mismatch() { ret void }
+
   declare void @func()
+  @gv = external addrspace(1) global i32
 
 ...
 
@@ -64,11 +68,49 @@ body:             |
     ; CHECK: liveins: $sgpr0_sgpr1
     ; CHECK: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
     ; CHECK: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY]].sub0, target-flags(amdgpu-rel32-lo) @func, implicit-def dead $scc
-    ; CHECK: [[S_ADD_U32_1:%[0-9]+]]:sreg_32 = S_ADD_U32 target-flags(amdgpu-rel32-lo) @func, [[COPY]].sub0, implicit-def dead $scc
-    ; CHECK: S_ENDPGM 0, implicit [[S_ADD_U32_]], implicit [[S_ADD_U32_1]]
+    ; CHECK: S_ENDPGM 0, implicit [[S_ADD_U32_]], implicit [[S_ADD_U32_]]
     %0:sreg_64 = COPY $sgpr0_sgpr1
     %1:sreg_32 = S_ADD_U32 %0.sub0, target-flags(amdgpu-rel32-lo) @func, implicit-def dead $scc
     %2:sreg_32 = S_ADD_U32 target-flags(amdgpu-rel32-lo) @func, %0.sub0, implicit-def dead $scc
     S_ENDPGM 0, implicit %1, implicit %2
 
 ...
+
+---
+name: commute_target_flag_global_offset
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $sgpr0_sgpr1
+
+    ; CHECK-LABEL: name: commute_target_flag_global_offset
+    ; CHECK: liveins: $sgpr0_sgpr1
+    ; CHECK: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
+    ; CHECK: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY]].sub0, target-flags(amdgpu-rel32-lo) @gv + 4, implicit-def dead $scc
+    ; CHECK: S_ENDPGM 0, implicit [[S_ADD_U32_]], implicit [[S_ADD_U32_]]
+    %0:sreg_64 = COPY $sgpr0_sgpr1
+    %1:sreg_32 = S_ADD_U32 %0.sub0, target-flags(amdgpu-rel32-lo) @gv + 4, implicit-def dead $scc
+    %2:sreg_32 = S_ADD_U32 target-flags(amdgpu-rel32-lo) @gv + 4, %0.sub0, implicit-def dead $scc
+    S_ENDPGM 0, implicit %1, implicit %2
+
+...
+
+---
+name: commute_target_flag_global_offset_mismatch
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $sgpr0_sgpr1
+
+    ; CHECK-LABEL: name: commute_target_flag_global_offset_mismatch
+    ; CHECK: liveins: $sgpr0_sgpr1
+    ; CHECK: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
+    ; CHECK: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY]].sub0, target-flags(amdgpu-rel32-lo) @gv + 4, implicit-def dead $scc
+    ; CHECK: [[S_ADD_U32_1:%[0-9]+]]:sreg_32 = S_ADD_U32 target-flags(amdgpu-rel32-lo) @gv + 8, [[COPY]].sub0, implicit-def dead $scc
+    ; CHECK: S_ENDPGM 0, implicit [[S_ADD_U32_]], implicit [[S_ADD_U32_1]]
+    %0:sreg_64 = COPY $sgpr0_sgpr1
+    %1:sreg_32 = S_ADD_U32 %0.sub0, target-flags(amdgpu-rel32-lo) @gv + 4, implicit-def dead $scc
+    %2:sreg_32 = S_ADD_U32 target-flags(amdgpu-rel32-lo) @gv + 8, %0.sub0, implicit-def dead $scc
+    S_ENDPGM 0, implicit %1, implicit %2
+
+...


        


More information about the llvm-commits mailing list