[llvm] b0402a3 - [amdgpu] Add 64-bit PC support when expanding unconditional branches.
Michael Liao via llvm-commits
llvm-commits at lists.llvm.org
Mon Jul 26 11:50:38 PDT 2021
Author: Michael Liao
Date: 2021-07-26T14:50:30-04:00
New Revision: b0402a35fc882ad582ddf128833e531cf2b7f657
URL: https://github.com/llvm/llvm-project/commit/b0402a35fc882ad582ddf128833e531cf2b7f657
DIFF: https://github.com/llvm/llvm-project/commit/b0402a35fc882ad582ddf128833e531cf2b7f657.diff
LOG: [amdgpu] Add 64-bit PC support when expanding unconditional branches.
Reviewed By: arsenm
Differential Revision: https://reviews.llvm.org/D106445
Added:
llvm/test/MC/AMDGPU/offset-expr.s
Modified:
llvm/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp
llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
llvm/lib/Target/AMDGPU/SIInstrInfo.h
llvm/test/CodeGen/AMDGPU/branch-relax-bundle.ll
llvm/test/CodeGen/AMDGPU/branch-relaxation-debug-info.mir
llvm/test/CodeGen/AMDGPU/branch-relaxation-gfx10-branch-offset-bug.ll
llvm/test/CodeGen/AMDGPU/branch-relaxation.ll
llvm/test/MC/AMDGPU/expressions.s
Removed:
################################################################################
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp b/llvm/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp
index b2bdd4d6d1696..3dd27f1996d66 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp
@@ -41,9 +41,6 @@ class AMDGPUMCInstLower {
const TargetSubtargetInfo &ST;
const AsmPrinter ≈
- const MCExpr *getLongBranchBlockExpr(const MachineBasicBlock &SrcBB,
- const MachineOperand &MO) const;
-
public:
AMDGPUMCInstLower(MCContext &ctx, const TargetSubtargetInfo &ST,
const AsmPrinter &AP);
@@ -95,54 +92,21 @@ static MCSymbolRefExpr::VariantKind getVariantKind(unsigned MOFlags) {
}
}
-const MCExpr *AMDGPUMCInstLower::getLongBranchBlockExpr(
- const MachineBasicBlock &SrcBB,
- const MachineOperand &MO) const {
- const MCExpr *DestBBSym
- = MCSymbolRefExpr::create(MO.getMBB()->getSymbol(), Ctx);
- const MCExpr *SrcBBSym = MCSymbolRefExpr::create(SrcBB.getSymbol(), Ctx);
-
- // FIXME: The first half of this assert should be removed. This should
- // probably be PC relative instead of using the source block symbol, and
- // therefore the indirect branch expansion should use a bundle.
- assert(
- skipDebugInstructionsForward(SrcBB.begin(), SrcBB.end())->getOpcode() ==
- AMDGPU::S_GETPC_B64 &&
- ST.getInstrInfo()->get(AMDGPU::S_GETPC_B64).Size == 4);
-
- // s_getpc_b64 returns the address of next instruction.
- const MCConstantExpr *One = MCConstantExpr::create(4, Ctx);
- SrcBBSym = MCBinaryExpr::createAdd(SrcBBSym, One, Ctx);
-
- if (MO.getTargetFlags() == SIInstrInfo::MO_LONG_BRANCH_FORWARD)
- return MCBinaryExpr::createSub(DestBBSym, SrcBBSym, Ctx);
-
- assert(MO.getTargetFlags() == SIInstrInfo::MO_LONG_BRANCH_BACKWARD);
- return MCBinaryExpr::createSub(SrcBBSym, DestBBSym, Ctx);
-}
-
bool AMDGPUMCInstLower::lowerOperand(const MachineOperand &MO,
MCOperand &MCOp) const {
switch (MO.getType()) {
default:
- llvm_unreachable("unknown operand type");
+ break;
case MachineOperand::MO_Immediate:
MCOp = MCOperand::createImm(MO.getImm());
return true;
case MachineOperand::MO_Register:
MCOp = MCOperand::createReg(AMDGPU::getMCReg(MO.getReg(), ST));
return true;
- case MachineOperand::MO_MachineBasicBlock: {
- if (MO.getTargetFlags() != 0) {
- MCOp = MCOperand::createExpr(
- getLongBranchBlockExpr(*MO.getParent()->getParent(), MO));
- } else {
- MCOp = MCOperand::createExpr(
+ case MachineOperand::MO_MachineBasicBlock:
+ MCOp = MCOperand::createExpr(
MCSymbolRefExpr::create(MO.getMBB()->getSymbol(), Ctx));
- }
-
return true;
- }
case MachineOperand::MO_GlobalAddress: {
const GlobalValue *GV = MO.getGlobal();
SmallString<128> SymbolName;
@@ -168,7 +132,15 @@ bool AMDGPUMCInstLower::lowerOperand(const MachineOperand &MO,
case MachineOperand::MO_RegisterMask:
// Regmasks are like implicit defs.
return false;
+ case MachineOperand::MO_MCSymbol:
+ if (MO.getTargetFlags() == SIInstrInfo::MO_FAR_BRANCH_OFFSET) {
+ MCSymbol *Sym = MO.getMCSymbol();
+ MCOp = MCOperand::createExpr(Sym->getVariableValue());
+ return true;
+ }
+ break;
}
+ llvm_unreachable("unknown operand type");
}
void AMDGPUMCInstLower::lower(const MachineInstr *MI, MCInst &OutMI) const {
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index 03a147c53cfe2..7ab0f7a100c5e 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -25,6 +25,7 @@
#include "llvm/CodeGen/ScheduleDAG.h"
#include "llvm/IR/DiagnosticInfo.h"
#include "llvm/IR/IntrinsicsAMDGPU.h"
+#include "llvm/MC/MCContext.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Target/TargetMachine.h"
@@ -2230,32 +2231,36 @@ unsigned SIInstrInfo::insertIndirectBranch(MachineBasicBlock &MBB,
// s_getpc_b64. Insert pc arithmetic code before last terminator.
MachineInstr *GetPC = BuildMI(MBB, I, DL, get(AMDGPU::S_GETPC_B64), PCReg);
- // TODO: Handle > 32-bit block address.
- if (BrOffset >= 0) {
- BuildMI(MBB, I, DL, get(AMDGPU::S_ADD_U32))
- .addReg(PCReg, RegState::Define, AMDGPU::sub0)
- .addReg(PCReg, 0, AMDGPU::sub0)
- .addMBB(&DestBB, MO_LONG_BRANCH_FORWARD);
- BuildMI(MBB, I, DL, get(AMDGPU::S_ADDC_U32))
- .addReg(PCReg, RegState::Define, AMDGPU::sub1)
- .addReg(PCReg, 0, AMDGPU::sub1)
- .addImm(0);
- } else {
- // Backwards branch.
- BuildMI(MBB, I, DL, get(AMDGPU::S_SUB_U32))
+ auto &MCCtx = MF->getContext();
+ MCSymbol *PostGetPCLabel =
+ MCCtx.createTempSymbol("post_getpc", /*AlwaysAddSuffix=*/true);
+ GetPC->setPostInstrSymbol(*MF, PostGetPCLabel);
+
+ MCSymbol *OffsetLo =
+ MCCtx.createTempSymbol("offset_lo", /*AlwaysAddSuffix=*/true);
+ MCSymbol *OffsetHi =
+ MCCtx.createTempSymbol("offset_hi", /*AlwaysAddSuffix=*/true);
+ BuildMI(MBB, I, DL, get(AMDGPU::S_ADD_U32))
.addReg(PCReg, RegState::Define, AMDGPU::sub0)
.addReg(PCReg, 0, AMDGPU::sub0)
- .addMBB(&DestBB, MO_LONG_BRANCH_BACKWARD);
- BuildMI(MBB, I, DL, get(AMDGPU::S_SUBB_U32))
+ .addSym(OffsetLo, MO_FAR_BRANCH_OFFSET);
+ BuildMI(MBB, I, DL, get(AMDGPU::S_ADDC_U32))
.addReg(PCReg, RegState::Define, AMDGPU::sub1)
.addReg(PCReg, 0, AMDGPU::sub1)
- .addImm(0);
- }
+ .addSym(OffsetHi, MO_FAR_BRANCH_OFFSET);
// Insert the indirect branch after the other terminator.
BuildMI(&MBB, DL, get(AMDGPU::S_SETPC_B64))
.addReg(PCReg);
+ auto ComputeBlockSize = [](const TargetInstrInfo *TII,
+ const MachineBasicBlock &MBB) {
+ unsigned Size = 0;
+ for (const MachineInstr &MI : MBB)
+ Size += TII->getInstSizeInBytes(MI);
+ return Size;
+ };
+
// FIXME: If spilling is necessary, this will fail because this scavenger has
// no emergency stack slots. It is non-trivial to spill in this situation,
// because the restore code needs to be specially placed after the
@@ -2300,7 +2305,16 @@ unsigned SIInstrInfo::insertIndirectBranch(MachineBasicBlock &MBB,
MRI.clearVirtRegs();
RS->setRegUsed(Scav);
- return 4 + 8 + 4 + 4;
+ // Now, the distance could be defined.
+ auto *Offset = MCBinaryExpr::createSub(
+ MCSymbolRefExpr::create(DestBB.getSymbol(), MCCtx),
+ MCSymbolRefExpr::create(PostGetPCLabel, MCCtx), MCCtx);
+ // Add offset assignments.
+ auto *Mask = MCConstantExpr::create(0xFFFFFFFFULL, MCCtx);
+ OffsetLo->setVariableValue(MCBinaryExpr::createAnd(Offset, Mask, MCCtx));
+ auto *ShAmt = MCConstantExpr::create(32, MCCtx);
+ OffsetHi->setVariableValue(MCBinaryExpr::createAShr(Offset, ShAmt, MCCtx));
+ return ComputeBlockSize(this, MBB);
}
unsigned SIInstrInfo::getBranchOpcode(SIInstrInfo::BranchPredicate Cond) {
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.h b/llvm/lib/Target/AMDGPU/SIInstrInfo.h
index e55774b94b0b8..fc5e5be035418 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.h
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.h
@@ -161,8 +161,7 @@ class SIInstrInfo final : public AMDGPUGenInstrInfo {
// MO_REL32_HI -> symbol at rel32@hi -> R_AMDGPU_REL32_HI.
MO_REL32_HI = 5,
- MO_LONG_BRANCH_FORWARD = 6,
- MO_LONG_BRANCH_BACKWARD = 7,
+ MO_FAR_BRANCH_OFFSET = 6,
MO_ABS32_LO = 8,
MO_ABS32_HI = 9,
diff --git a/llvm/test/CodeGen/AMDGPU/branch-relax-bundle.ll b/llvm/test/CodeGen/AMDGPU/branch-relax-bundle.ll
index 9f0b8d3553ee0..4cc2e7f3325ed 100644
--- a/llvm/test/CodeGen/AMDGPU/branch-relax-bundle.ll
+++ b/llvm/test/CodeGen/AMDGPU/branch-relax-bundle.ll
@@ -20,6 +20,7 @@ declare void @func() #0
; GCN-LABEL: {{^}}bundle_size:
; GCN: s_cbranch_scc0 [[BB_EXPANSION:BB[0-9]+_[0-9]+]]
; GCN: s_getpc_b64
+; GCN-NEXT: .Lpost_getpc{{[0-9]+}}:{{$}}
; GCN-NEXT: s_add_u32
; GCN-NEXT: s_addc_u32
; GCN-NEXT: s_setpc_b64
diff --git a/llvm/test/CodeGen/AMDGPU/branch-relaxation-debug-info.mir b/llvm/test/CodeGen/AMDGPU/branch-relaxation-debug-info.mir
index 1f9581a29d9d4..262ed70310e6e 100644
--- a/llvm/test/CodeGen/AMDGPU/branch-relaxation-debug-info.mir
+++ b/llvm/test/CodeGen/AMDGPU/branch-relaxation-debug-info.mir
@@ -8,8 +8,9 @@
# GCN-NEXT: ;DEBUG_VALUE: test_debug_value:globalptr_arg <- [DW_OP_plus_uconst 12, DW_OP_stack_value]
# GCN-NEXT: .loc 1 0 42 is_stmt 0 ; /tmp/test_debug_value.cl:0:42
# GCN-NEXT: s_getpc_b64 s{{\[}}[[PC_LO:[0-9]+]]:[[PC_HI:[0-9]+]]{{\]}}
-# GCN-NEXT: s_add_u32 s[[PC_LO]], s[[PC_LO]], BB0_4-(BB0_5+4)
-# GCN-NEXT: s_addc_u32 s[[PC_HI]], s[[PC_HI]], 0
+# GCN-NEXT: [[POST_GETPC:.Lpost_getpc[0-9]+]]:{{$}}
+# GCN-NEXT: s_add_u32 s[[PC_LO]], s[[PC_LO]], (BB0_4-[[POST_GETPC]])&4294967295
+# GCN-NEXT: s_addc_u32 s[[PC_HI]], s[[PC_HI]], (BB0_4-[[POST_GETPC]])>>32
# GCN-NEXT: s_setpc_b64
--- |
diff --git a/llvm/test/CodeGen/AMDGPU/branch-relaxation-gfx10-branch-offset-bug.ll b/llvm/test/CodeGen/AMDGPU/branch-relaxation-gfx10-branch-offset-bug.ll
index 6341e63b3962a..fbd4869f295e0 100644
--- a/llvm/test/CodeGen/AMDGPU/branch-relaxation-gfx10-branch-offset-bug.ll
+++ b/llvm/test/CodeGen/AMDGPU/branch-relaxation-gfx10-branch-offset-bug.ll
@@ -11,8 +11,9 @@
; GFX1010: s_cmp_lg_u32
; GFX1010-NEXT: s_cbranch_scc0 [[RELAX_BB:BB[0-9]+_[0-9]+]]
; GFX1010: s_getpc_b64
-; GFX1010-NEXT: s_add_u32 s{{[0-9]+}}, s{{[0-9]+}}, [[ENDBB:BB[0-9]+_[0-9]+]]-(BB
-; GFX1010-NEXT: s_addc_u32 s{{[0-9]+}}, s{{[0-9]+}}
+; GFX1010-NEXT: [[POST_GETPC:.Lpost_getpc[0-9]+]]:{{$}}
+; GFX1010-NEXT: s_add_u32 s{{[0-9]+}}, s{{[0-9]+}}, ([[ENDBB:BB[0-9]+_[0-9]+]]-[[POST_GETPC]])&4294967295
+; GFX1010-NEXT: s_addc_u32 s{{[0-9]+}}, s{{[0-9]+}}, ([[ENDBB:BB[0-9]+_[0-9]+]]-[[POST_GETPC]])>>32
; GFX1010: [[RELAX_BB]]:
; GCN: v_nop
@@ -59,8 +60,9 @@ bb3:
; GFX1010-NEXT: s_cbranch_execnz [[RELAX_BB:BB[0-9]+_[0-9]+]]
; GCN: s_getpc_b64
-; GCN-NEXT: s_add_u32 s{{[0-9]+}}, s{{[0-9]+}}, [[ENDBB:BB[0-9]+_[0-9]+]]-(BB
-; GCN-NEXT: s_addc_u32 s{{[0-9]+}}, s{{[0-9]+}}
+; GCN-NEXT: [[POST_GETPC:.Lpost_getpc[0-9]+]]:{{$}}
+; GCN-NEXT: s_add_u32 s{{[0-9]+}}, s{{[0-9]+}}, ([[ENDBB:BB[0-9]+_[0-9]+]]-[[POST_GETPC]])&4294967295
+; GCN-NEXT: s_addc_u32 s{{[0-9]+}}, s{{[0-9]+}}, ([[ENDBB:BB[0-9]+_[0-9]+]]-[[POST_GETPC]])>>32
; GCN: [[RELAX_BB]]:
; GCN: v_nop
diff --git a/llvm/test/CodeGen/AMDGPU/branch-relaxation.ll b/llvm/test/CodeGen/AMDGPU/branch-relaxation.ll
index e742f558b148c..151a7e0439f41 100644
--- a/llvm/test/CodeGen/AMDGPU/branch-relaxation.ll
+++ b/llvm/test/CodeGen/AMDGPU/branch-relaxation.ll
@@ -60,10 +60,11 @@ bb3:
; GCN: s_cmp_eq_u32 [[CND]], 0
; GCN-NEXT: s_cbranch_scc0 [[LONGBB:BB[0-9]+_[0-9]+]]
-; GCN-NEXT: [[LONG_JUMP:BB[0-9]+_[0-9]+]]: ; %bb0
+; GCN-NEXT: {{BB[0-9]+_[0-9]+}}: ; %bb0
; GCN-NEXT: s_getpc_b64 s{{\[}}[[PC_LO:[0-9]+]]:[[PC_HI:[0-9]+]]{{\]}}
-; GCN-NEXT: s_add_u32 s[[PC_LO]], s[[PC_LO]], [[ENDBB:BB[0-9]+_[0-9]+]]-([[LONG_JUMP]]+4)
-; GCN-NEXT: s_addc_u32 s[[PC_HI]], s[[PC_HI]], 0
+; GCN-NEXT: [[POST_GETPC:.Lpost_getpc[0-9]+]]:{{$}}
+; GCN-NEXT: s_add_u32 s[[PC_LO]], s[[PC_LO]], ([[ENDBB:BB[0-9]+_[0-9]+]]-[[POST_GETPC]])&4294967295
+; GCN-NEXT: s_addc_u32 s[[PC_HI]], s[[PC_HI]], ([[ENDBB]]-[[POST_GETPC]])>>32
; GCN-NEXT: s_setpc_b64 s{{\[}}[[PC_LO]]:[[PC_HI]]{{\]}}
; GCN-NEXT: [[LONGBB]]:
@@ -104,10 +105,11 @@ bb3:
; GCN-DAG: s_and_b64 vcc, exec, [[UNMASKED]]
; GCN: s_cbranch_vccz [[LONGBB:BB[0-9]+_[0-9]+]]
-; GCN-NEXT: [[LONG_JUMP:BB[0-9]+_[0-9]+]]: ; %bb0
+; GCN-NEXT: {{BB[0-9]+_[0-9]+}}: ; %bb0
; GCN-NEXT: s_getpc_b64 s{{\[}}[[PC_LO:[0-9]+]]:[[PC_HI:[0-9]+]]{{\]}}
-; GCN-NEXT: s_add_u32 s[[PC_LO]], s[[PC_LO]], [[ENDBB:BB[0-9]+_[0-9]+]]-([[LONG_JUMP]]+4)
-; GCN-NEXT: s_addc_u32 s[[PC_HI]], s[[PC_HI]], 0
+; GCN-NEXT: [[POST_GETPC:.Lpost_getpc[0-9]+]]:{{$}}
+; GCN-NEXT: s_add_u32 s[[PC_LO]], s[[PC_LO]], ([[ENDBB:BB[0-9]+_[0-9]+]]-[[POST_GETPC]])&4294967295
+; GCN-NEXT: s_addc_u32 s[[PC_HI]], s[[PC_HI]], ([[ENDBB]]-[[POST_GETPC]])>>32
; GCN-NEXT: s_setpc_b64 s{{\[}}[[PC_LO]]:[[PC_HI]]{{\]}}
; GCN-NEXT: [[LONGBB]]:
@@ -190,12 +192,13 @@ bb3:
; GCN-NEXT: s_cbranch_scc0 [[ENDBB:BB[0-9]+_[0-9]+]]
-; GCN-NEXT: [[LONG_JUMP:BB[0-9]+_[0-9]+]]: ; %bb2
+; GCN-NEXT: {{BB[0-9]+_[0-9]+}}: ; %bb2
; GCN-NEXT: ; in Loop: Header=[[LOOPBB]] Depth=1
; GCN-NEXT: s_getpc_b64 s{{\[}}[[PC_LO:[0-9]+]]:[[PC_HI:[0-9]+]]{{\]}}
-; GCN-NEXT: s_sub_u32 s[[PC_LO]], s[[PC_LO]], ([[LONG_JUMP]]+4)-[[LOOPBB]]
-; GCN-NEXT: s_subb_u32 s[[PC_HI]], s[[PC_HI]], 0
+; GCN-NEXT: [[POST_GETPC:.Lpost_getpc[0-9]+]]:{{$}}
+; GCN-NEXT: s_add_u32 s[[PC_LO]], s[[PC_LO]], ([[LOOPBB]]-[[POST_GETPC]])&4294967295
+; GCN-NEXT: s_addc_u32 s[[PC_HI]], s[[PC_HI]], ([[LOOPBB]]-[[POST_GETPC]])>>32
; GCN-NEXT: s_setpc_b64 s{{\[}}[[PC_LO]]:[[PC_HI]]{{\]}}
; GCN-NEXT: [[ENDBB]]:
@@ -226,10 +229,11 @@ bb3:
; GCN: s_cmp_eq_u32
; GCN: s_cbranch_scc{{[0-1]}} [[BB2:BB[0-9]+_[0-9]+]]
-; GCN-NEXT: [[LONG_JUMP0:BB[0-9]+_[0-9]+]]: ; %bb0
+; GCN-NEXT: {{BB[0-9]+_[0-9]+}}: ; %bb0
; GCN-NEXT: s_getpc_b64 s{{\[}}[[PC0_LO:[0-9]+]]:[[PC0_HI:[0-9]+]]{{\]}}
-; GCN-NEXT: s_add_u32 s[[PC0_LO]], s[[PC0_LO]], [[BB3:BB[0-9]_[0-9]+]]-([[LONG_JUMP0]]+4)
-; GCN-NEXT: s_addc_u32 s[[PC0_HI]], s[[PC0_HI]], 0{{$}}
+; GCN-NEXT: [[POST_GETPC:.Lpost_getpc[0-9]+]]:{{$}}
+; GCN-NEXT: s_add_u32 s[[PC0_LO]], s[[PC0_LO]], ([[BB3:BB[0-9]_[0-9]+]]-[[POST_GETPC]])&4294967295
+; GCN-NEXT: s_addc_u32 s[[PC0_HI]], s[[PC0_HI]], ([[BB3:BB[0-9]_[0-9]+]]-[[POST_GETPC]])>>32
; GCN-NEXT: s_setpc_b64 s{{\[}}[[PC0_LO]]:[[PC0_HI]]{{\]}}
; GCN: [[BB2]]: ; %bb3
@@ -282,12 +286,13 @@ bb4:
; GCN-NEXT: v_nop_e64
; GCN-NEXT: ;;#ASMEND
-; GCN-NEXT: [[LONGBB:BB[0-9]+_[0-9]+]]: ; %loop
+; GCN-NEXT: {{BB[0-9]+_[0-9]+}}: ; %loop
; GCN-NEXT: ; in Loop: Header=[[LOOP]] Depth=1
; GCN-NEXT: s_getpc_b64 s{{\[}}[[PC_LO:[0-9]+]]:[[PC_HI:[0-9]+]]{{\]}}
-; GCN-NEXT: s_sub_u32 s[[PC_LO]], s[[PC_LO]], ([[LONGBB]]+4)-[[LOOP]]
-; GCN-NEXT: s_subb_u32 s[[PC_HI]], s[[PC_HI]], 0{{$}}
+; GCN-NEXT: [[POST_GETPC:.Lpost_getpc[0-9]+]]:{{$}}
+; GCN-NEXT: s_add_u32 s[[PC_LO]], s[[PC_LO]], ([[LOOP]]-[[POST_GETPC]])&4294967295
+; GCN-NEXT: s_addc_u32 s[[PC_HI]], s[[PC_HI]], ([[LOOP]]-[[POST_GETPC]])>>32
; GCN-NEXT: s_setpc_b64 s{{\[}}[[PC_LO]]:[[PC_HI]]{{\]}}
; GCN-NEXT: .Lfunc_end{{[0-9]+}}:
define amdgpu_kernel void @uniform_unconditional_min_long_backward_branch(i32 addrspace(1)* %arg, i32 %arg1) {
@@ -318,10 +323,11 @@ loop:
; GCN-NEXT: v_cmp_{{eq|ne}}_u32_e64
; GCN: s_cbranch_vccz [[BB2:BB[0-9]_[0-9]+]]
-; GCN-NEXT: [[LONGBB1:BB[0-9]+_[0-9]+]]:
+; GCN-NEXT: {{BB[0-9]+_[0-9]+}}:
; GCN-NEXT: s_getpc_b64 s{{\[}}[[PC1_LO:[0-9]+]]:[[PC1_HI:[0-9]+]]{{\]}}
-; GCN-NEXT: s_add_u32 s[[PC1_LO]], s[[PC1_LO]], [[BB3:BB[0-9]+_[0-9]+]]-([[LONGBB1]]+4)
-; GCN-NEXT: s_addc_u32 s[[PC1_HI]], s[[PC1_HI]], 0{{$}}
+; GCN-NEXT: [[POST_GETPC:.Lpost_getpc[0-9]+]]:{{$}}
+; GCN-NEXT: s_add_u32 s[[PC1_LO]], s[[PC1_LO]], ([[BB3:BB[0-9]+_[0-9]+]]-[[POST_GETPC]])&4294967295
+; GCN-NEXT: s_addc_u32 s[[PC1_HI]], s[[PC1_HI]], ([[BB3:BB[0-9]+_[0-9]+]]-[[POST_GETPC]])>>32
; GCN-NEXT: s_setpc_b64 s{{\[}}[[PC1_LO]]:[[PC1_HI]]{{\]}}
; GCN-NEXT: [[BB2]]: ; %bb2
@@ -376,10 +382,11 @@ bb3:
; GCN-NEXT: s_and_saveexec_b64 [[MASK:s\[[0-9]+:[0-9]+\]]], vcc
; GCN-NEXT: s_cbranch_execnz [[IF:BB[0-9]+_[0-9]+]]
-; GCN-NEXT: [[LONGBB:BB[0-9]+_[0-9]+]]: ; %entry
+; GCN-NEXT: {{BB[0-9]+_[0-9]+}}: ; %entry
; GCN-NEXT: s_getpc_b64 s{{\[}}[[PC_LO:[0-9]+]]:[[PC_HI:[0-9]+]]{{\]}}
-; GCN-NEXT: s_add_u32 s[[PC_LO]], s[[PC_LO]], [[BB2:BB[0-9]_[0-9]+]]-([[LONGBB]]+4)
-; GCN-NEXT: s_addc_u32 s[[PC_HI]], s[[PC_HI]], 0{{$}}
+; GCN-NEXT: [[POST_GETPC:.Lpost_getpc[0-9]+]]:{{$}}
+; GCN-NEXT: s_add_u32 s[[PC_LO]], s[[PC_LO]], ([[BB2:BB[0-9]_[0-9]+]]-[[POST_GETPC]])&4294967295
+; GCN-NEXT: s_addc_u32 s[[PC_HI]], s[[PC_HI]], ([[BB2:BB[0-9]_[0-9]+]]-[[POST_GETPC]])>>32
; GCN-NEXT: s_setpc_b64 s{{\[}}[[PC_LO]]:[[PC_HI]]{{\]}}
; GCN-NEXT: [[IF]]: ; %if
@@ -438,11 +445,12 @@ endif:
; GCN: ;;#ASMEND
; GCN: s_cbranch_{{vccz|vccnz}} [[RET:BB[0-9]+_[0-9]+]]
-; GCN-NEXT: [[LONGBB:BB[0-9]+_[0-9]+]]: ; %loop
+; GCN-NEXT: {{BB[0-9]+_[0-9]+}}: ; %loop
; GCN-NEXT: ; in Loop: Header=[[LOOP_BODY]] Depth=1
; GCN-NEXT: s_getpc_b64 s{{\[}}[[PC_LO:[0-9]+]]:[[PC_HI:[0-9]+]]{{\]}}
-; GCN-NEXT: s_sub_u32 s[[PC_LO]], s[[PC_LO]], ([[LONGBB]]+4)-[[LOOP_BODY]]
-; GCN-NEXT: s_subb_u32 s[[PC_HI]], s[[PC_HI]], 0
+; GCN-NEXT: [[POST_GETPC:.Lpost_getpc[0-9]+]]:{{$}}
+; GCN-NEXT: s_add_u32 s[[PC_LO]], s[[PC_LO]], ([[LOOP_BODY]]-[[POST_GETPC]])&4294967295
+; GCN-NEXT: s_addc_u32 s[[PC_HI]], s[[PC_HI]], ([[LOOP_BODY]]-[[POST_GETPC]])>>32
; GCN-NEXT: s_setpc_b64 s{{\[}}[[PC_LO]]:[[PC_HI]]{{\]}}
; GCN-NEXT: [[RET]]: ; %UnifiedReturnBlock
@@ -479,9 +487,11 @@ ret:
; GCN: s_cbranch_scc{{[0-1]}} [[LONG_BR_0:BB[0-9]+_[0-9]+]]
; GCN-NEXT: BB{{[0-9]+_[0-9]+}}:
-; GCN: s_add_u32 s{{[0-9]+}}, s{{[0-9]+}}, [[LONG_BR_DEST0:BB[0-9]+_[0-9]+]]-(
-; GCN-NEXT: s_addc_u32
-; GCN-NEXT: s_setpc_b64
+; GCN: s_getpc_b64 s{{\[}}[[PC_LO:[0-9]+]]:[[PC_HI:[0-9]+]]{{\]}}
+; GCN-NEXT: [[POST_GETPC:.Lpost_getpc[0-9]+]]:{{$}}
+; GCN-NEXT: s_add_u32 s[[PC_LO]], s[[PC_LO]], ([[LONG_BR_DEST0:BB[0-9]+_[0-9]+]]-[[POST_GETPC]])&4294967295
+; GCN-NEXT: s_addc_u32 s[[PC_HI]], s[[PC_HI]], ([[LONG_BR_DEST0]]-[[POST_GETPC]])>>32
+; GCN-NEXT: s_setpc_b64 s{{\[}}[[PC_LO]]:[[PC_HI]]{{\]}}
; GCN-NEXT: [[LONG_BR_0]]:
; GCN: [[LONG_BR_DEST0]]:
diff --git a/llvm/test/MC/AMDGPU/expressions.s b/llvm/test/MC/AMDGPU/expressions.s
index 0b7bdcdebb88f..cb2251d97609c 100644
--- a/llvm/test/MC/AMDGPU/expressions.s
+++ b/llvm/test/MC/AMDGPU/expressions.s
@@ -271,6 +271,12 @@ BB2:
s_sub_u32 vcc_lo, vcc_lo, (BB2+4)-BB1
// VI: s_sub_u32 vcc_lo, vcc_lo, (BB2+4)-BB1 ; encoding: [0x6a,0xff,0xea,0x80,A,A,A,A]
// VI-NEXT: ; fixup A - offset: 4, value: (BB2+4)-BB1, kind: FK_Data_4
+s_add_u32 vcc_lo, vcc_lo, (BB2-BB1)&4294967295
+// VI: s_add_u32 vcc_lo, vcc_lo, (BB2-BB1)&4294967295 ; encoding: [0x6a,0xff,0x6a,0x80,A,A,A,A]
+// VI-NEXT: ; fixup A - offset: 4, value: (BB2-BB1)&4294967295, kind: FK_Data_4
+s_addc_u32 vcc_hi, vcc_hi, (BB2-BB1)>>32
+// VI: s_addc_u32 vcc_hi, vcc_hi, (BB2-BB1)>>32 ; encoding: [0x6b,0xff,0x6b,0x82,A,A,A,A]
+// VI-NEXT: ; fixup A - offset: 4, value: (BB2-BB1)>>32, kind: FK_Data_4
t=1
s_sub_u32 s0, s0, -t
diff --git a/llvm/test/MC/AMDGPU/offset-expr.s b/llvm/test/MC/AMDGPU/offset-expr.s
new file mode 100644
index 0000000000000..7ec05c7531283
--- /dev/null
+++ b/llvm/test/MC/AMDGPU/offset-expr.s
@@ -0,0 +1,18 @@
+// RUN: llvm-mc -arch=amdgcn -mcpu=gfx906 -filetype=obj %s | llvm-objdump -d --mcpu=gfx906 - | FileCheck %s
+
+// Check that the offset is correctly calculated.
+
+BB0:
+v_nop_e64
+v_nop_e64
+BB1:
+v_nop_e64
+BB2:
+s_add_u32 vcc_lo, vcc_lo, (BB2-BB1)&4294967295
+// CHECK: s_add_u32 vcc_lo, vcc_lo, 8 // 000000000018: 806AFF6A 00000008
+s_addc_u32 vcc_hi, vcc_hi, (BB2-BB1)>>32
+// CHECK: s_addc_u32 vcc_hi, vcc_hi, 0 // 000000000020: 826BFF6B 00000000
+s_add_u32 vcc_lo, vcc_lo, (BB0-BB1)&4294967295
+// CHECK: s_add_u32 vcc_lo, vcc_lo, -16 // 000000000028: 806AFF6A FFFFFFF0
+s_addc_u32 vcc_hi, vcc_hi, (BB0-BB1)>>32
+// CHECK: s_addc_u32 vcc_hi, vcc_hi, -1 // 000000000030: 826BFF6B FFFFFFFF
More information about the llvm-commits
mailing list