[llvm] 1711020 - AMDGPU: Use isLiteralConstantLike to check whether the operand could ever be literal

Thu Mar 31 08:07:23 PDT 2022

Author: Changpeng Fang
Date: 2022-03-31T08:06:31-07:00
New Revision: 1711020c3769d38e146c2c116376a2255630613a

URL: https://github.com/llvm/llvm-project/commit/1711020c3769d38e146c2c116376a2255630613a
DIFF: https://github.com/llvm/llvm-project/commit/1711020c3769d38e146c2c116376a2255630613a.diff

LOG: AMDGPU: Use isLiteralConstantLike to check whether the operand could ever be literal

Summary:
  To compute the size of a VALU/SALU instruction, we need to check whether an operand
could ever be literal. Previously isLiteralConstant was used, which missed cases
like global variables or external symbols. These misses lead to under-estimation of
the instruction size and branch offset, and thus incorrectly skip the necessary branch
relaxation when the branch offset is actually greater than what the branch bits can hold.
In this work, we use isLiteralConstantLike to check the operands. It maybe conservative,
but it is safe.

Reviewers: arsenm

Differential Revision: https://reviews.llvm.org/D122778

Added: 
    llvm/test/CodeGen/AMDGPU/literal-constant-like-operand-instruction-size.ll

Modified: 
    llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
    llvm/lib/Target/AMDGPU/SIInstructions.td

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index 6f0a61a43070d..0c40735b7542d 100644

--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -7461,7 +7461,9 @@ unsigned SIInstrInfo::getInstSizeInBytes(const MachineInstr &MI) const {
       return DescSize;
     bool HasLiteral = false;
     for (int I = 0, E = MI.getNumExplicitOperands(); I != E; ++I) {
-      if (isLiteralConstant(MI, I)) {
+      const MachineOperand &Op = MI.getOperand(I);
+      const MCOperandInfo &OpInfo = Desc.OpInfo[I];
+      if (isLiteralConstantLike(Op, OpInfo)) {
         HasLiteral = true;
         break;
       }

diff  --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td
index 18e18e76442bb..5ea006d964d09 100644
--- a/llvm/lib/Target/AMDGPU/SIInstructions.td
+++ b/llvm/lib/Target/AMDGPU/SIInstructions.td
@@ -520,6 +520,7 @@ def : GCNPat<
 def SI_CALL : SPseudoInstSI <
   (outs SReg_64:$dst), (ins SSrc_b64:$src0, unknown:$callee)> {
   let Size = 4;
+  let FixedSize = 1;
   let isCall = 1;
   let UseNamedOperandTable = 1;
   let SchedRW = [WriteBranch];
@@ -532,6 +533,7 @@ def SI_TCRETURN : SPseudoInstSI <(outs),
   (ins SReg_64:$src0, unknown:$callee, i32imm:$fp
diff ),
   [(AMDGPUtc_return i64:$src0, tglobaladdr:$callee, i32:$fp
diff )]> {
   let Size = 4;
+  let FixedSize = 1;
   let isCall = 1;
   let isTerminator = 1;
   let isReturn = 1;

diff  --git a/llvm/test/CodeGen/AMDGPU/literal-constant-like-operand-instruction-size.ll b/llvm/test/CodeGen/AMDGPU/literal-constant-like-operand-instruction-size.ll
new file mode 100644
index 0000000000000..6e3641948ac39
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/literal-constant-like-operand-instruction-size.ll
@@ -0,0 +1,39 @@
+; RUN: llc -march=amdgcn -mcpu=gfx906 -verify-machineinstrs -amdgpu-s-branch-bits=6 < %s | FileCheck -check-prefix=GCN %s
+
+
+; Restrict maximum branch to between +31 and -32 dwords
+declare void @llvm.amdgcn.s.sleep(i32) #0
+
+ at name1 = external addrspace(1) global i32
+ at name2 = external addrspace(1) global i32
+ at name3 = external addrspace(1) global i32
+
+; GCN-LABEL: {{^}}branch_offset_test:
+; GCN: s_cmp_eq_u32 s{{[0-9]+}}, 0
+; GCN-NEXT: s_cbranch_scc0 [[BB2:.LBB[0-9]+_[0-9]+]]
+; GCN-NEXT: .LBB{{[0-9]+}}_{{[0-9]+}}: ; %bb
+; GCN-NEXT: s_getpc_b64 s[[[PC_LO:[0-9]+]]:[[PC_HI:[0-9]+]]]
+; GCN-NEXT: [[POST_GETPC:.Lpost_getpc[0-9]+]]:{{$}}
+; GCN-NEXT: s_add_u32 s[[PC_LO]], s[[PC_LO]], ([[BB3:.LBB[0-9]+_[0-9]+]]-[[POST_GETPC]])&4294967295
+; GCN-NEXT: s_addc_u32 s[[PC_HI]], s[[PC_HI]], ([[BB3]]-[[POST_GETPC]])>>32
+; GCN-NEXT: s_setpc_b64 s[[[PC_LO]]:[[PC_HI]]]
+; GCN-NEXT: [[BB2]]: ; %bb2
+; GCN-NEXT: s_getpc_b64 s[[[PC_LO]]:[[PC_HI]]]
+
+; GCN: [[BB3]]: ; %bb3
+define amdgpu_kernel void @branch_offset_test(i32 addrspace(1)* %arg, i32 %cnd) #0 {
+bb:
+  %cmp = icmp eq i32 %cnd, 0
+  br i1 %cmp, label %bb3, label %bb2 ; +8 dword branch
+
+bb2:
+  store i32 1, i32 addrspace(1)* @name1
+  store i32 2, i32 addrspace(1)* @name2
+  store i32 3, i32 addrspace(1)* @name3
+  call void @llvm.amdgcn.s.sleep(i32 0)
+  br label %bb3
+
+bb3:
+  store volatile i32 %cnd, i32 addrspace(1)* %arg
+  ret void
+}