[llvm] [AMDGPU] MCExpr printing helper with KnownBits support (PR #95951)
Janek van Oirschot via llvm-commits
llvm-commits at lists.llvm.org
Thu Jun 20 07:28:12 PDT 2024
https://github.com/JanekvO updated https://github.com/llvm/llvm-project/pull/95951
>From a368e031be2327bf83d0bda035f3173ec8ff8f55 Mon Sep 17 00:00:00 2001
From: Janek van Oirschot <janek.vanoirschot at amd.com>
Date: Tue, 18 Jun 2024 17:01:05 +0100
Subject: [PATCH 1/5] [AMDGPU] MCExpr printing helper with KnownBits support
---
llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp | 7 +-
.../AMDGPU/MCTargetDesc/AMDGPUMCExpr.cpp | 140 ++++++++++++++++++
.../Target/AMDGPU/MCTargetDesc/AMDGPUMCExpr.h | 9 ++
.../MCTargetDesc/AMDGPUTargetStreamer.cpp | 9 +-
llvm/test/MC/AMDGPU/hsa-sym-exprs-gfx10.s | 18 +--
llvm/test/MC/AMDGPU/hsa-sym-exprs-gfx11.s | 14 +-
llvm/test/MC/AMDGPU/hsa-sym-exprs-gfx12.s | 14 +-
llvm/test/MC/AMDGPU/hsa-sym-exprs-gfx7.s | 16 +-
llvm/test/MC/AMDGPU/hsa-sym-exprs-gfx8.s | 16 +-
llvm/test/MC/AMDGPU/hsa-sym-exprs-gfx90a.s | 4 +-
10 files changed, 194 insertions(+), 53 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
index d8e22f4b0d8fa..29bc5458ddb63 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
@@ -404,12 +404,7 @@ void AMDGPUAsmPrinter::emitCommonFunctionComments(
SmallString<128> AMDGPUAsmPrinter::getMCExprStr(const MCExpr *Value) {
SmallString<128> Str;
raw_svector_ostream OSS(Str);
- int64_t IVal;
- if (Value->evaluateAsAbsolute(IVal)) {
- OSS << static_cast<uint64_t>(IVal);
- } else {
- Value->print(OSS, MAI);
- }
+ AMDGPUMCExprPrint(Value, OSS, MAI);
return Str;
}
diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCExpr.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCExpr.cpp
index 159664faf983f..b40a93af6bb36 100644
--- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCExpr.cpp
+++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCExpr.cpp
@@ -15,6 +15,7 @@
#include "llvm/MC/MCSymbol.h"
#include "llvm/MC/MCValue.h"
#include "llvm/Support/Allocator.h"
+#include "llvm/Support/KnownBits.h"
#include "llvm/Support/raw_ostream.h"
#include <optional>
@@ -314,3 +315,142 @@ AMDGPUVariadicMCExpr::createOccupancy(unsigned InitOcc, const MCExpr *NumSGPRs,
CreateExpr(InitOcc), NumSGPRs, NumVGPRs},
Ctx);
}
+
+static KnownBits AMDGPUMCExprKnownBits(const MCExpr *Expr, raw_ostream &OS,
+ const MCAsmInfo *MAI, unsigned depth) {
+
+ if (depth == 0)
+ return KnownBits(/*BitWidth=*/64);
+
+ depth--;
+
+ switch (Expr->getKind()) {
+ case MCExpr::ExprKind::Binary: {
+ const MCBinaryExpr *BExpr = cast<MCBinaryExpr>(Expr);
+ const MCExpr *LHS = BExpr->getLHS();
+ const MCExpr *RHS = BExpr->getRHS();
+
+ KnownBits LHSKnown = AMDGPUMCExprKnownBits(LHS, OS, MAI, depth);
+ KnownBits RHSKnown = AMDGPUMCExprKnownBits(RHS, OS, MAI, depth);
+
+ switch (BExpr->getOpcode()) {
+ default:
+ return KnownBits(/*BitWidth=*/64);
+ case MCBinaryExpr::Opcode::Add:
+ return KnownBits::computeForAddSub(/*Add=*/true, /*NSW=*/false,
+ /*NUW=*/false, LHSKnown, RHSKnown);
+ case MCBinaryExpr::Opcode::And:
+ return LHSKnown & RHSKnown;
+ case MCBinaryExpr::Opcode::Div:
+ return KnownBits::sdiv(LHSKnown, RHSKnown);
+ case MCBinaryExpr::Opcode::Mod:
+ return KnownBits::srem(LHSKnown, RHSKnown);
+ case MCBinaryExpr::Opcode::Mul:
+ return KnownBits::mul(LHSKnown, RHSKnown);
+ case MCBinaryExpr::Opcode::Or:
+ return LHSKnown | RHSKnown;
+ case MCBinaryExpr::Opcode::Shl:
+ return KnownBits::shl(LHSKnown, RHSKnown);
+ case MCBinaryExpr::Opcode::AShr:
+ return KnownBits::ashr(LHSKnown, RHSKnown);
+ case MCBinaryExpr::Opcode::LShr:
+ return KnownBits::lshr(LHSKnown, RHSKnown);
+ case MCBinaryExpr::Opcode::Sub:
+ return KnownBits::computeForAddSub(/*Add=*/false, /*NSW=*/false,
+ /*NUW=*/false, LHSKnown, RHSKnown);
+ case MCBinaryExpr::Opcode::Xor:
+ return LHSKnown ^ RHSKnown;
+ }
+ }
+ case MCExpr::ExprKind::Constant: {
+ const MCConstantExpr *CE = cast<MCConstantExpr>(Expr);
+ APInt APValue(/*BitWidth=*/64, CE->getValue(), /*isSigned=*/true);
+ return KnownBits::makeConstant(APValue);
+ }
+ case MCExpr::ExprKind::SymbolRef: {
+ const MCSymbolRefExpr *RExpr = cast<MCSymbolRefExpr>(Expr);
+ const MCSymbol &Sym = RExpr->getSymbol();
+ if (!Sym.isVariable())
+ return KnownBits(/*BitWidth=*/64);
+
+ // Variable value retrieval is not for actual use but only for knownbits
+ // analysis.
+ return AMDGPUMCExprKnownBits(Sym.getVariableValue(/*SetUsed=*/false), OS,
+ MAI, depth);
+ }
+ case MCExpr::ExprKind::Unary: {
+ const MCUnaryExpr *UExpr = cast<MCUnaryExpr>(Expr);
+ KnownBits KB = AMDGPUMCExprKnownBits(UExpr->getSubExpr(), OS, MAI, depth);
+
+ switch (UExpr->getOpcode()) {
+ default:
+ return KnownBits(/*BitWidth=*/64);
+ case MCUnaryExpr::Opcode::Minus: {
+ KB.makeNegative();
+ return KB;
+ }
+ case MCUnaryExpr::Opcode::Not: {
+ KnownBits AllOnes(/*BitWidth=*/64);
+ AllOnes.setAllOnes();
+ return KB ^ AllOnes;
+ }
+ case MCUnaryExpr::Opcode::Plus: {
+ KB.makeNonNegative();
+ return KB;
+ }
+ }
+ }
+ case MCExpr::ExprKind::Target: {
+ const AMDGPUVariadicMCExpr *AGVK = cast<AMDGPUVariadicMCExpr>(Expr);
+
+ switch (AGVK->getKind()) {
+ default:
+ return KnownBits(/*BitWidth=*/64);
+ case AMDGPUVariadicMCExpr::VariadicKind::AGVK_Or: {
+ KnownBits KB = AMDGPUMCExprKnownBits(AGVK->getSubExpr(0), OS, MAI, depth);
+ for (const MCExpr *Arg : AGVK->getArgs()) {
+ KB |= AMDGPUMCExprKnownBits(Arg, OS, MAI, depth);
+ }
+ return KB;
+ }
+ case AMDGPUVariadicMCExpr::VariadicKind::AGVK_Max: {
+ KnownBits KB = AMDGPUMCExprKnownBits(AGVK->getSubExpr(0), OS, MAI, depth);
+ for (const MCExpr *Arg : AGVK->getArgs()) {
+ KB = KnownBits::umax(KB, AMDGPUMCExprKnownBits(Arg, OS, MAI, depth));
+ }
+ return KB;
+ }
+ case AMDGPUVariadicMCExpr::VariadicKind::AGVK_ExtraSGPRs:
+ case AMDGPUVariadicMCExpr::VariadicKind::AGVK_TotalNumVGPRs:
+ case AMDGPUVariadicMCExpr::VariadicKind::AGVK_AlignTo:
+ case AMDGPUVariadicMCExpr::VariadicKind::AGVK_Occupancy: {
+ int64_t Val;
+ if (AGVK->evaluateAsAbsolute(Val)) {
+ APInt APValue(/*BitWidth=*/64, Val, /*isSigned=*/false);
+ return KnownBits::makeConstant(APValue);
+ } else {
+ return KnownBits(/*BitWidth=*/64);
+ }
+ }
+ }
+ }
+ }
+ return KnownBits(/*BitWidth=*/64);
+}
+
+void llvm::AMDGPUMCExprPrint(const MCExpr *Expr, raw_ostream &OS,
+ const MCAsmInfo *MAI) {
+ int64_t Val;
+ if (Expr->evaluateAsAbsolute(Val)) {
+ OS << Val;
+ return;
+ }
+
+ KnownBits KB = AMDGPUMCExprKnownBits(Expr, OS, MAI, /*depth=*/16);
+ if (KB.isConstant()) {
+ OS << KB.getConstant();
+ return;
+ }
+
+ Expr->print(OS, MAI);
+}
diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCExpr.h b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCExpr.h
index f92350b592350..67015dcf32343 100644
--- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCExpr.h
+++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCExpr.h
@@ -92,6 +92,7 @@ class AMDGPUVariadicMCExpr : public MCTargetExpr {
const GCNSubtarget &STM,
MCContext &Ctx);
+ ArrayRef<const MCExpr *> getArgs() const { return Args; }
VariadicKind getKind() const { return Kind; }
const MCExpr *getSubExpr(size_t Index) const;
@@ -107,6 +108,14 @@ class AMDGPUVariadicMCExpr : public MCTargetExpr {
}
};
+// Tries to leverage KnownBits for MCExprs to reduce and limit any composed
+// MCExprs printing. E.g., for an expression such as
+// ((unevaluatable_sym | 1) & 1) won't evaluate due to unevaluatable_sym and
+// would verbosely print the full expression; however, KnownBits should deduce
+// the value to be 1. Particularly useful for AMDGPU metadata MCExprs.
+void AMDGPUMCExprPrint(const MCExpr *Expr, raw_ostream &OS,
+ const MCAsmInfo *MAI);
+
} // end namespace llvm
#endif // LLVM_LIB_TARGET_AMDGPU_MCTARGETDESC_AMDGPUMCEXPR_H
diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp
index e805e964ffe4e..25ca4e779fdaf 100644
--- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp
+++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp
@@ -11,6 +11,7 @@
//===----------------------------------------------------------------------===//
#include "AMDGPUTargetStreamer.h"
+#include "AMDGPUMCExpr.h"
#include "AMDGPUMCKernelDescriptor.h"
#include "AMDGPUPTNote.h"
#include "Utils/AMDGPUBaseInfo.h"
@@ -328,14 +329,10 @@ void AMDGPUTargetAsmStreamer::EmitAmdhsaKernelDescriptor(
auto PrintField = [&](const MCExpr *Expr, uint32_t Shift, uint32_t Mask,
StringRef Directive) {
- int64_t IVal;
OS << "\t\t" << Directive << ' ';
- const MCExpr *pgm_rsrc1_bits =
+ const MCExpr *ShiftedAndMaskedExpr =
MCKernelDescriptor::bits_get(Expr, Shift, Mask, getContext());
- if (pgm_rsrc1_bits->evaluateAsAbsolute(IVal))
- OS << static_cast<uint64_t>(IVal);
- else
- pgm_rsrc1_bits->print(OS, MAI);
+ llvm::AMDGPUMCExprPrint(ShiftedAndMaskedExpr, OS, MAI);
OS << '\n';
};
diff --git a/llvm/test/MC/AMDGPU/hsa-sym-exprs-gfx10.s b/llvm/test/MC/AMDGPU/hsa-sym-exprs-gfx10.s
index 95af59c413ae6..ff2c81820c578 100644
--- a/llvm/test/MC/AMDGPU/hsa-sym-exprs-gfx10.s
+++ b/llvm/test/MC/AMDGPU/hsa-sym-exprs-gfx10.s
@@ -102,15 +102,15 @@ expr_defined:
// ASM-NEXT: .amdhsa_group_segment_fixed_size defined_value+2
// ASM-NEXT: .amdhsa_private_segment_fixed_size defined_value+3
// ASM-NEXT: .amdhsa_kernarg_size 0
-// ASM-NEXT: .amdhsa_user_sgpr_count (((((((((((((((((((((((((((((0&(~128))|(1<<7))&(~6144))|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~62))|(0<<1))&62)>>1
-// ASM-NEXT: .amdhsa_user_sgpr_private_segment_buffer (((((0&(~1024))|(1<<10))&(~2048))|(defined_boolean<<11))&1)>>0
-// ASM-NEXT: .amdhsa_user_sgpr_dispatch_ptr (((((0&(~1024))|(1<<10))&(~2048))|(defined_boolean<<11))&2)>>1
-// ASM-NEXT: .amdhsa_user_sgpr_queue_ptr (((((0&(~1024))|(1<<10))&(~2048))|(defined_boolean<<11))&4)>>2
-// ASM-NEXT: .amdhsa_user_sgpr_kernarg_segment_ptr (((((0&(~1024))|(1<<10))&(~2048))|(defined_boolean<<11))&8)>>3
-// ASM-NEXT: .amdhsa_user_sgpr_dispatch_id (((((0&(~1024))|(1<<10))&(~2048))|(defined_boolean<<11))&16)>>4
-// ASM-NEXT: .amdhsa_user_sgpr_flat_scratch_init (((((0&(~1024))|(1<<10))&(~2048))|(defined_boolean<<11))&32)>>5
-// ASM-NEXT: .amdhsa_user_sgpr_private_segment_size (((((0&(~1024))|(1<<10))&(~2048))|(defined_boolean<<11))&64)>>6
-// ASM-NEXT: .amdhsa_wavefront_size32 (((((0&(~1024))|(1<<10))&(~2048))|(defined_boolean<<11))&1024)>>10
+// ASM-NEXT: .amdhsa_user_sgpr_count 0
+// ASM-NEXT: .amdhsa_user_sgpr_private_segment_buffer 0
+// ASM-NEXT: .amdhsa_user_sgpr_dispatch_ptr 0
+// ASM-NEXT: .amdhsa_user_sgpr_queue_ptr 0
+// ASM-NEXT: .amdhsa_user_sgpr_kernarg_segment_ptr 0
+// ASM-NEXT: .amdhsa_user_sgpr_dispatch_id 0
+// ASM-NEXT: .amdhsa_user_sgpr_flat_scratch_init 0
+// ASM-NEXT: .amdhsa_user_sgpr_private_segment_size 0
+// ASM-NEXT: .amdhsa_wavefront_size32 1
// ASM-NEXT: .amdhsa_system_sgpr_private_segment_wavefront_offset (((((((((((((((((((((((((((((0&(~128))|(1<<7))&(~6144))|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~62))|(0<<1))&1)>>0
// ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_x (((((((((((((((((((((((((((((0&(~128))|(1<<7))&(~6144))|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~62))|(0<<1))&128)>>7
// ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_y (((((((((((((((((((((((((((((0&(~128))|(1<<7))&(~6144))|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~62))|(0<<1))&256)>>8
diff --git a/llvm/test/MC/AMDGPU/hsa-sym-exprs-gfx11.s b/llvm/test/MC/AMDGPU/hsa-sym-exprs-gfx11.s
index e1107fb69ba41..fc902129964c3 100644
--- a/llvm/test/MC/AMDGPU/hsa-sym-exprs-gfx11.s
+++ b/llvm/test/MC/AMDGPU/hsa-sym-exprs-gfx11.s
@@ -104,13 +104,13 @@ expr_defined:
// ASM-NEXT: .amdhsa_group_segment_fixed_size defined_value+2
// ASM-NEXT: .amdhsa_private_segment_fixed_size defined_value+3
// ASM-NEXT: .amdhsa_kernarg_size 0
-// ASM-NEXT: .amdhsa_user_sgpr_count (((((((((((((((((((((((((((((((0&(~128))|(1<<7))&(~6144))|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~1))|(defined_boolean<<0))&(~62))|(0<<1))&62)>>1
-// ASM-NEXT: .amdhsa_user_sgpr_dispatch_ptr (((((0&(~1024))|(1<<10))&(~2048))|(defined_boolean<<11))&2)>>1
-// ASM-NEXT: .amdhsa_user_sgpr_queue_ptr (((((0&(~1024))|(1<<10))&(~2048))|(defined_boolean<<11))&4)>>2
-// ASM-NEXT: .amdhsa_user_sgpr_kernarg_segment_ptr (((((0&(~1024))|(1<<10))&(~2048))|(defined_boolean<<11))&8)>>3
-// ASM-NEXT: .amdhsa_user_sgpr_dispatch_id (((((0&(~1024))|(1<<10))&(~2048))|(defined_boolean<<11))&16)>>4
-// ASM-NEXT: .amdhsa_user_sgpr_private_segment_size (((((0&(~1024))|(1<<10))&(~2048))|(defined_boolean<<11))&64)>>6
-// ASM-NEXT: .amdhsa_wavefront_size32 (((((0&(~1024))|(1<<10))&(~2048))|(defined_boolean<<11))&1024)>>10
+// ASM-NEXT: .amdhsa_user_sgpr_count 0
+// ASM-NEXT: .amdhsa_user_sgpr_dispatch_ptr 0
+// ASM-NEXT: .amdhsa_user_sgpr_queue_ptr 0
+// ASM-NEXT: .amdhsa_user_sgpr_kernarg_segment_ptr 0
+// ASM-NEXT: .amdhsa_user_sgpr_dispatch_id 0
+// ASM-NEXT: .amdhsa_user_sgpr_private_segment_size 0
+// ASM-NEXT: .amdhsa_wavefront_size32 1
// ASM-NEXT: .amdhsa_enable_private_segment (((((((((((((((((((((((((((((((0&(~128))|(1<<7))&(~6144))|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~1))|(defined_boolean<<0))&(~62))|(0<<1))&1)>>0
// ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_x (((((((((((((((((((((((((((((((0&(~128))|(1<<7))&(~6144))|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~1))|(defined_boolean<<0))&(~62))|(0<<1))&128)>>7
// ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_y (((((((((((((((((((((((((((((((0&(~128))|(1<<7))&(~6144))|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~1))|(defined_boolean<<0))&(~62))|(0<<1))&256)>>8
diff --git a/llvm/test/MC/AMDGPU/hsa-sym-exprs-gfx12.s b/llvm/test/MC/AMDGPU/hsa-sym-exprs-gfx12.s
index 449616d35186b..bad5fc41bc1b8 100644
--- a/llvm/test/MC/AMDGPU/hsa-sym-exprs-gfx12.s
+++ b/llvm/test/MC/AMDGPU/hsa-sym-exprs-gfx12.s
@@ -106,13 +106,13 @@ expr_defined:
// ASM-NEXT: .amdhsa_group_segment_fixed_size defined_value+2
// ASM-NEXT: .amdhsa_private_segment_fixed_size defined_value+3
// ASM-NEXT: .amdhsa_kernarg_size 0
-// ASM-NEXT: .amdhsa_user_sgpr_count (((((((((((((((((((((((((((((((0&(~128))|(1<<7))&(~6144))|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~1))|(defined_boolean<<0))&(~62))|(0<<1))&62)>>1
-// ASM-NEXT: .amdhsa_user_sgpr_dispatch_ptr (((((0&(~1024))|(1<<10))&(~2048))|(defined_boolean<<11))&2)>>1
-// ASM-NEXT: .amdhsa_user_sgpr_queue_ptr (((((0&(~1024))|(1<<10))&(~2048))|(defined_boolean<<11))&4)>>2
-// ASM-NEXT: .amdhsa_user_sgpr_kernarg_segment_ptr (((((0&(~1024))|(1<<10))&(~2048))|(defined_boolean<<11))&8)>>3
-// ASM-NEXT: .amdhsa_user_sgpr_dispatch_id (((((0&(~1024))|(1<<10))&(~2048))|(defined_boolean<<11))&16)>>4
-// ASM-NEXT: .amdhsa_user_sgpr_private_segment_size (((((0&(~1024))|(1<<10))&(~2048))|(defined_boolean<<11))&64)>>6
-// ASM-NEXT: .amdhsa_wavefront_size32 (((((0&(~1024))|(1<<10))&(~2048))|(defined_boolean<<11))&1024)>>10
+// ASM-NEXT: .amdhsa_user_sgpr_count 0
+// ASM-NEXT: .amdhsa_user_sgpr_dispatch_ptr 0
+// ASM-NEXT: .amdhsa_user_sgpr_queue_ptr 0
+// ASM-NEXT: .amdhsa_user_sgpr_kernarg_segment_ptr 0
+// ASM-NEXT: .amdhsa_user_sgpr_dispatch_id 0
+// ASM-NEXT: .amdhsa_user_sgpr_private_segment_size 0
+// ASM-NEXT: .amdhsa_wavefront_size32 1
// ASM-NEXT: .amdhsa_enable_private_segment (((((((((((((((((((((((((((((((0&(~128))|(1<<7))&(~6144))|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~1))|(defined_boolean<<0))&(~62))|(0<<1))&1)>>0
// ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_x (((((((((((((((((((((((((((((((0&(~128))|(1<<7))&(~6144))|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~1))|(defined_boolean<<0))&(~62))|(0<<1))&128)>>7
// ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_y (((((((((((((((((((((((((((((((0&(~128))|(1<<7))&(~6144))|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~1))|(defined_boolean<<0))&(~62))|(0<<1))&256)>>8
diff --git a/llvm/test/MC/AMDGPU/hsa-sym-exprs-gfx7.s b/llvm/test/MC/AMDGPU/hsa-sym-exprs-gfx7.s
index c7e05441b45ff..22db612721117 100644
--- a/llvm/test/MC/AMDGPU/hsa-sym-exprs-gfx7.s
+++ b/llvm/test/MC/AMDGPU/hsa-sym-exprs-gfx7.s
@@ -94,14 +94,14 @@ expr_defined:
// ASM-NEXT: .amdhsa_group_segment_fixed_size defined_value+2
// ASM-NEXT: .amdhsa_private_segment_fixed_size defined_value+3
// ASM-NEXT: .amdhsa_kernarg_size 0
-// ASM-NEXT: .amdhsa_user_sgpr_count (((((((((((((((((((((((((((((0&(~128))|(1<<7))&(~6144))|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~62))|(0<<1))&62)>>1
-// ASM-NEXT: .amdhsa_user_sgpr_private_segment_buffer (((0&(~2048))|(defined_boolean<<11))&1)>>0
-// ASM-NEXT: .amdhsa_user_sgpr_dispatch_ptr (((0&(~2048))|(defined_boolean<<11))&2)>>1
-// ASM-NEXT: .amdhsa_user_sgpr_queue_ptr (((0&(~2048))|(defined_boolean<<11))&4)>>2
-// ASM-NEXT: .amdhsa_user_sgpr_kernarg_segment_ptr (((0&(~2048))|(defined_boolean<<11))&8)>>3
-// ASM-NEXT: .amdhsa_user_sgpr_dispatch_id (((0&(~2048))|(defined_boolean<<11))&16)>>4
-// ASM-NEXT: .amdhsa_user_sgpr_flat_scratch_init (((0&(~2048))|(defined_boolean<<11))&32)>>5
-// ASM-NEXT: .amdhsa_user_sgpr_private_segment_size (((0&(~2048))|(defined_boolean<<11))&64)>>6
+// ASM-NEXT: .amdhsa_user_sgpr_count 0
+// ASM-NEXT: .amdhsa_user_sgpr_private_segment_buffer 0
+// ASM-NEXT: .amdhsa_user_sgpr_dispatch_ptr 0
+// ASM-NEXT: .amdhsa_user_sgpr_queue_ptr 0
+// ASM-NEXT: .amdhsa_user_sgpr_kernarg_segment_ptr 0
+// ASM-NEXT: .amdhsa_user_sgpr_dispatch_id 0
+// ASM-NEXT: .amdhsa_user_sgpr_flat_scratch_init 0
+// ASM-NEXT: .amdhsa_user_sgpr_private_segment_size 0
// ASM-NEXT: .amdhsa_system_sgpr_private_segment_wavefront_offset (((((((((((((((((((((((((((((0&(~128))|(1<<7))&(~6144))|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~62))|(0<<1))&1)>>0
// ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_x (((((((((((((((((((((((((((((0&(~128))|(1<<7))&(~6144))|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~62))|(0<<1))&128)>>7
// ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_y (((((((((((((((((((((((((((((0&(~128))|(1<<7))&(~6144))|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~62))|(0<<1))&256)>>8
diff --git a/llvm/test/MC/AMDGPU/hsa-sym-exprs-gfx8.s b/llvm/test/MC/AMDGPU/hsa-sym-exprs-gfx8.s
index 49a5015987a65..b2ccbb7b5fd2f 100644
--- a/llvm/test/MC/AMDGPU/hsa-sym-exprs-gfx8.s
+++ b/llvm/test/MC/AMDGPU/hsa-sym-exprs-gfx8.s
@@ -95,14 +95,14 @@ expr_defined:
// ASM-NEXT: .amdhsa_group_segment_fixed_size defined_value+2
// ASM-NEXT: .amdhsa_private_segment_fixed_size defined_value+3
// ASM-NEXT: .amdhsa_kernarg_size 0
-// ASM-NEXT: .amdhsa_user_sgpr_count (((((((((((((((((((((((((((((0&(~128))|(1<<7))&(~6144))|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~62))|(0<<1))&62)>>1
-// ASM-NEXT: .amdhsa_user_sgpr_private_segment_buffer (((0&(~2048))|(defined_boolean<<11))&1)>>0
-// ASM-NEXT: .amdhsa_user_sgpr_dispatch_ptr (((0&(~2048))|(defined_boolean<<11))&2)>>1
-// ASM-NEXT: .amdhsa_user_sgpr_queue_ptr (((0&(~2048))|(defined_boolean<<11))&4)>>2
-// ASM-NEXT: .amdhsa_user_sgpr_kernarg_segment_ptr (((0&(~2048))|(defined_boolean<<11))&8)>>3
-// ASM-NEXT: .amdhsa_user_sgpr_dispatch_id (((0&(~2048))|(defined_boolean<<11))&16)>>4
-// ASM-NEXT: .amdhsa_user_sgpr_flat_scratch_init (((0&(~2048))|(defined_boolean<<11))&32)>>5
-// ASM-NEXT: .amdhsa_user_sgpr_private_segment_size (((0&(~2048))|(defined_boolean<<11))&64)>>6
+// ASM-NEXT: .amdhsa_user_sgpr_count 0
+// ASM-NEXT: .amdhsa_user_sgpr_private_segment_buffer 0
+// ASM-NEXT: .amdhsa_user_sgpr_dispatch_ptr 0
+// ASM-NEXT: .amdhsa_user_sgpr_queue_ptr 0
+// ASM-NEXT: .amdhsa_user_sgpr_kernarg_segment_ptr 0
+// ASM-NEXT: .amdhsa_user_sgpr_dispatch_id 0
+// ASM-NEXT: .amdhsa_user_sgpr_flat_scratch_init 0
+// ASM-NEXT: .amdhsa_user_sgpr_private_segment_size 0
// ASM-NEXT: .amdhsa_system_sgpr_private_segment_wavefront_offset (((((((((((((((((((((((((((((0&(~128))|(1<<7))&(~6144))|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~62))|(0<<1))&1)>>0
// ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_x (((((((((((((((((((((((((((((0&(~128))|(1<<7))&(~6144))|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~62))|(0<<1))&128)>>7
// ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_y (((((((((((((((((((((((((((((0&(~128))|(1<<7))&(~6144))|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~62))|(0<<1))&256)>>8
diff --git a/llvm/test/MC/AMDGPU/hsa-sym-exprs-gfx90a.s b/llvm/test/MC/AMDGPU/hsa-sym-exprs-gfx90a.s
index b7f89239160fc..cd795e9a8d475 100644
--- a/llvm/test/MC/AMDGPU/hsa-sym-exprs-gfx90a.s
+++ b/llvm/test/MC/AMDGPU/hsa-sym-exprs-gfx90a.s
@@ -66,7 +66,7 @@ expr_defined:
// ASM-NEXT: .amdhsa_group_segment_fixed_size 0
// ASM-NEXT: .amdhsa_private_segment_fixed_size 0
// ASM-NEXT: .amdhsa_kernarg_size 0
-// ASM-NEXT: .amdhsa_user_sgpr_count (((((((0&(~128))|(1<<7))&(~1))|(defined_boolean<<0))&(~62))|(0<<1))&62)>>1
+// ASM-NEXT: .amdhsa_user_sgpr_count 0
// ASM-NEXT: .amdhsa_user_sgpr_private_segment_buffer 0
// ASM-NEXT: .amdhsa_user_sgpr_dispatch_ptr 0
// ASM-NEXT: .amdhsa_user_sgpr_queue_ptr 0
@@ -77,7 +77,7 @@ expr_defined:
// ASM-NEXT: .amdhsa_user_sgpr_kernarg_preload_offset 0
// ASM-NEXT: .amdhsa_user_sgpr_private_segment_size 0
// ASM-NEXT: .amdhsa_system_sgpr_private_segment_wavefront_offset (((((((0&(~128))|(1<<7))&(~1))|(defined_boolean<<0))&(~62))|(0<<1))&1)>>0
-// ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_x (((((((0&(~128))|(1<<7))&(~1))|(defined_boolean<<0))&(~62))|(0<<1))&128)>>7
+// ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_x 1
// ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_y (((((((0&(~128))|(1<<7))&(~1))|(defined_boolean<<0))&(~62))|(0<<1))&256)>>8
// ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_z (((((((0&(~128))|(1<<7))&(~1))|(defined_boolean<<0))&(~62))|(0<<1))&512)>>9
// ASM-NEXT: .amdhsa_system_sgpr_workgroup_info (((((((0&(~128))|(1<<7))&(~1))|(defined_boolean<<0))&(~62))|(0<<1))&1024)>>10
>From de95d0f5bbaaac6cb071f3eda3c5f081361f8afa Mon Sep 17 00:00:00 2001
From: Janek van Oirschot <janek.vanoirschot at amd.com>
Date: Wed, 19 Jun 2024 20:05:14 +0100
Subject: [PATCH 2/5] Feedback, bitwidth const
---
.../AMDGPU/MCTargetDesc/AMDGPUMCExpr.cpp | 22 +++++++++----------
1 file changed, 11 insertions(+), 11 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCExpr.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCExpr.cpp
index b40a93af6bb36..79133ed1947cd 100644
--- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCExpr.cpp
+++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCExpr.cpp
@@ -318,9 +318,10 @@ AMDGPUVariadicMCExpr::createOccupancy(unsigned InitOcc, const MCExpr *NumSGPRs,
static KnownBits AMDGPUMCExprKnownBits(const MCExpr *Expr, raw_ostream &OS,
const MCAsmInfo *MAI, unsigned depth) {
+ const unsigned BitWidth = 64;
if (depth == 0)
- return KnownBits(/*BitWidth=*/64);
+ return KnownBits(BitWidth);
depth--;
@@ -335,7 +336,7 @@ static KnownBits AMDGPUMCExprKnownBits(const MCExpr *Expr, raw_ostream &OS,
switch (BExpr->getOpcode()) {
default:
- return KnownBits(/*BitWidth=*/64);
+ return KnownBits(BitWidth);
case MCBinaryExpr::Opcode::Add:
return KnownBits::computeForAddSub(/*Add=*/true, /*NSW=*/false,
/*NUW=*/false, LHSKnown, RHSKnown);
@@ -364,14 +365,14 @@ static KnownBits AMDGPUMCExprKnownBits(const MCExpr *Expr, raw_ostream &OS,
}
case MCExpr::ExprKind::Constant: {
const MCConstantExpr *CE = cast<MCConstantExpr>(Expr);
- APInt APValue(/*BitWidth=*/64, CE->getValue(), /*isSigned=*/true);
+ APInt APValue(BitWidth, CE->getValue(), /*isSigned=*/true);
return KnownBits::makeConstant(APValue);
}
case MCExpr::ExprKind::SymbolRef: {
const MCSymbolRefExpr *RExpr = cast<MCSymbolRefExpr>(Expr);
const MCSymbol &Sym = RExpr->getSymbol();
if (!Sym.isVariable())
- return KnownBits(/*BitWidth=*/64);
+ return KnownBits(BitWidth);
// Variable value retrieval is not for actual use but only for knownbits
// analysis.
@@ -384,13 +385,13 @@ static KnownBits AMDGPUMCExprKnownBits(const MCExpr *Expr, raw_ostream &OS,
switch (UExpr->getOpcode()) {
default:
- return KnownBits(/*BitWidth=*/64);
+ return KnownBits(BitWidth);
case MCUnaryExpr::Opcode::Minus: {
KB.makeNegative();
return KB;
}
case MCUnaryExpr::Opcode::Not: {
- KnownBits AllOnes(/*BitWidth=*/64);
+ KnownBits AllOnes(BitWidth);
AllOnes.setAllOnes();
return KB ^ AllOnes;
}
@@ -405,7 +406,7 @@ static KnownBits AMDGPUMCExprKnownBits(const MCExpr *Expr, raw_ostream &OS,
switch (AGVK->getKind()) {
default:
- return KnownBits(/*BitWidth=*/64);
+ return KnownBits(BitWidth);
case AMDGPUVariadicMCExpr::VariadicKind::AGVK_Or: {
KnownBits KB = AMDGPUMCExprKnownBits(AGVK->getSubExpr(0), OS, MAI, depth);
for (const MCExpr *Arg : AGVK->getArgs()) {
@@ -426,16 +427,15 @@ static KnownBits AMDGPUMCExprKnownBits(const MCExpr *Expr, raw_ostream &OS,
case AMDGPUVariadicMCExpr::VariadicKind::AGVK_Occupancy: {
int64_t Val;
if (AGVK->evaluateAsAbsolute(Val)) {
- APInt APValue(/*BitWidth=*/64, Val, /*isSigned=*/false);
+ APInt APValue(BitWidth, Val, /*isSigned=*/false);
return KnownBits::makeConstant(APValue);
- } else {
- return KnownBits(/*BitWidth=*/64);
}
+ return KnownBits(BitWidth);
}
}
}
}
- return KnownBits(/*BitWidth=*/64);
+ return KnownBits(BitWidth);
}
void llvm::AMDGPUMCExprPrint(const MCExpr *Expr, raw_ostream &OS,
>From fb95433ef07bf04c94e838ebf21538097b460dd7 Mon Sep 17 00:00:00 2001
From: Janek van Oirschot <janek.vanoirschot at amd.com>
Date: Thu, 20 Jun 2024 12:54:10 +0100
Subject: [PATCH 3/5] Add print helper case for accum offset attribute
---
.../Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp | 7 +------
1 file changed, 1 insertion(+), 6 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp
index 25ca4e779fdaf..cf6501dbd3669 100644
--- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp
+++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp
@@ -444,12 +444,7 @@ void AMDGPUTargetAsmStreamer::EmitAmdhsaKernelDescriptor(
accum_bits = MCBinaryExpr::createMul(
accum_bits, MCConstantExpr::create(4, getContext()), getContext());
OS << "\t\t.amdhsa_accum_offset ";
- int64_t IVal;
- if (accum_bits->evaluateAsAbsolute(IVal)) {
- OS << static_cast<uint64_t>(IVal);
- } else {
- accum_bits->print(OS, MAI);
- }
+ llvm::AMDGPUMCExprPrint(accum_bits, OS, MAI);
OS << '\n';
}
>From 301b1481ecc51591d0b9b94cfa49abc30f0b905f Mon Sep 17 00:00:00 2001
From: Janek van Oirschot <Janek.vanOirschot at amd.com>
Date: Thu, 20 Jun 2024 05:24:29 -0700
Subject: [PATCH 4/5] Don't forget to add affected test
---
llvm/test/MC/AMDGPU/hsa-sym-exprs-gfx90a.s | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/llvm/test/MC/AMDGPU/hsa-sym-exprs-gfx90a.s b/llvm/test/MC/AMDGPU/hsa-sym-exprs-gfx90a.s
index cd795e9a8d475..351913840a8a2 100644
--- a/llvm/test/MC/AMDGPU/hsa-sym-exprs-gfx90a.s
+++ b/llvm/test/MC/AMDGPU/hsa-sym-exprs-gfx90a.s
@@ -84,7 +84,7 @@ expr_defined:
// ASM-NEXT: .amdhsa_system_vgpr_workitem_id (((((((0&(~128))|(1<<7))&(~1))|(defined_boolean<<0))&(~62))|(0<<1))&6144)>>11
// ASM-NEXT: .amdhsa_next_free_vgpr 0
// ASM-NEXT: .amdhsa_next_free_sgpr 0
-// ASM-NEXT: .amdhsa_accum_offset (((((((0&(~65536))|(defined_boolean<<16))&(~63))|(0<<0))&63)>>0)+1)*4
+// ASM-NEXT: .amdhsa_accum_offset 4
// ASM-NEXT: .amdhsa_reserve_xnack_mask 1
// ASM-NEXT: .amdhsa_float_round_mode_32 (((((((((((((((((0&(~786432))|(3<<18))&(~2097152))|(1<<21))&(~8388608))|(1<<23))&(~2097152))|(defined_boolean<<21))&(~8388608))|(defined_boolean<<23))&(~67108864))|(defined_boolean<<26))&(~63))|(0<<0))&(~960))|(0<<6))&12288)>>12
// ASM-NEXT: .amdhsa_float_round_mode_16_64 (((((((((((((((((0&(~786432))|(3<<18))&(~2097152))|(1<<21))&(~8388608))|(1<<23))&(~2097152))|(defined_boolean<<21))&(~8388608))|(defined_boolean<<23))&(~67108864))|(defined_boolean<<26))&(~63))|(0<<0))&(~960))|(0<<6))&49152)>>14
>From 8f4ecd753f49e6cb540d3fccb7f60a0ebd630fb9 Mon Sep 17 00:00:00 2001
From: Janek van Oirschot <Janek.vanOirschot at amd.com>
Date: Thu, 20 Jun 2024 07:27:41 -0700
Subject: [PATCH 5/5] Pass print helper function as param for users of it in
Utils subdir
---
.../MCTargetDesc/AMDGPUTargetStreamer.cpp | 2 +-
.../AMDGPU/Utils/AMDKernelCodeTUtils.cpp | 46 ++++++++-----------
.../Target/AMDGPU/Utils/AMDKernelCodeTUtils.h | 5 +-
llvm/test/MC/AMDGPU/amd_kernel_code_t.s | 32 +++++--------
4 files changed, 37 insertions(+), 48 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp
index cf6501dbd3669..afd93f1b2178b 100644
--- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp
+++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp
@@ -246,7 +246,7 @@ void AMDGPUTargetAsmStreamer::EmitDirectiveAMDHSACodeObjectVersion(
void AMDGPUTargetAsmStreamer::EmitAMDKernelCodeT(AMDGPUMCKernelCodeT &Header) {
OS << "\t.amd_kernel_code_t\n";
- Header.EmitKernelCodeT(OS, getContext());
+ Header.EmitKernelCodeT(OS, getContext(), llvm::AMDGPUMCExprPrint);
OS << "\t.end_amd_kernel_code_t\n";
}
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDKernelCodeTUtils.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDKernelCodeTUtils.cpp
index 720d5a1853dbb..7b88ddb7b0e95 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDKernelCodeTUtils.cpp
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDKernelCodeTUtils.cpp
@@ -226,35 +226,35 @@ class PrintField {
template <typename T, T AMDGPUMCKernelCodeT::*ptr,
typename std::enable_if_t<!std::is_integral_v<T>, T> * = nullptr>
static void printField(StringRef Name, const AMDGPUMCKernelCodeT &C,
- raw_ostream &OS, MCContext &Ctx) {
+ raw_ostream &OS, MCContext &Ctx,
+ AMDGPUMCKernelCodeT::PrintHelper Helper) {
OS << Name << " = ";
const MCExpr *Value = C.*ptr;
- int64_t Val;
- if (Value->evaluateAsAbsolute(Val))
- OS << Val;
- else
- Value->print(OS, Ctx.getAsmInfo());
+ Helper(Value, OS, Ctx.getAsmInfo());
}
template <typename T, T AMDGPUMCKernelCodeT::*ptr,
typename std::enable_if_t<std::is_integral_v<T>, T> * = nullptr>
static void printField(StringRef Name, const AMDGPUMCKernelCodeT &C,
- raw_ostream &OS, MCContext &) {
+ raw_ostream &OS, MCContext &,
+ AMDGPUMCKernelCodeT::PrintHelper) {
OS << Name << " = " << (int)(C.*ptr);
}
};
template <typename T, T AMDGPUMCKernelCodeT::*ptr, int shift, int width = 1>
static void printBitField(StringRef Name, const AMDGPUMCKernelCodeT &C,
- raw_ostream &OS, MCContext &) {
+ raw_ostream &OS, MCContext &,
+ AMDGPUMCKernelCodeT::PrintHelper) {
const auto Mask = (static_cast<T>(1) << width) - 1;
OS << Name << " = " << (int)((C.*ptr >> shift) & Mask);
}
using PrintFx = void (*)(StringRef, const AMDGPUMCKernelCodeT &, raw_ostream &,
- MCContext &);
+ MCContext &, AMDGPUMCKernelCodeT::PrintHelper Helper);
-static ArrayRef<PrintFx> getPrinterTable() {
+static ArrayRef<PrintFx>
+getPrinterTable(AMDGPUMCKernelCodeT::PrintHelper Helper) {
static const PrintFx Table[] = {
#define COMPPGM1(name, aname, AccMacro) \
COMPPGM(name, aname, C_00B848_##AccMacro, S_00B848_##AccMacro, 0)
@@ -263,7 +263,7 @@ static ArrayRef<PrintFx> getPrinterTable() {
#define PRINTFIELD(sname, aname, name) PrintField::printField<FLD_T(name)>
#define PRINTCOMP(Complement, PGMType) \
[](StringRef Name, const AMDGPUMCKernelCodeT &C, raw_ostream &OS, \
- MCContext &Ctx) { \
+ MCContext &Ctx, AMDGPUMCKernelCodeT::PrintHelper Helper) { \
OS << Name << " = "; \
auto [Shift, Mask] = getShiftMask(Complement); \
const MCExpr *Value; \
@@ -274,11 +274,7 @@ static ArrayRef<PrintFx> getPrinterTable() {
Value = \
maskShiftGet(C.compute_pgm_resource2_registers, Mask, Shift, Ctx); \
} \
- int64_t Val; \
- if (Value->evaluateAsAbsolute(Val)) \
- OS << Val; \
- else \
- Value->print(OS, Ctx.getAsmInfo()); \
+ Helper(Value, OS, Ctx.getAsmInfo()); \
}
#define RECORD(name, altName, print, parse) print
#include "Utils/AMDKernelCodeTInfo.h"
@@ -379,10 +375,11 @@ static ArrayRef<ParseFx> getParserTable() {
}
static void printAmdKernelCodeField(const AMDGPUMCKernelCodeT &C, int FldIndex,
- raw_ostream &OS, MCContext &Ctx) {
- auto Printer = getPrinterTable()[FldIndex];
+ raw_ostream &OS, MCContext &Ctx,
+ AMDGPUMCKernelCodeT::PrintHelper Helper) {
+ auto Printer = getPrinterTable(Helper)[FldIndex];
if (Printer)
- Printer(get_amd_kernel_code_t_FldNames()[FldIndex + 1], C, OS, Ctx);
+ Printer(get_amd_kernel_code_t_FldNames()[FldIndex + 1], C, OS, Ctx, Helper);
}
void AMDGPUMCKernelCodeT::initDefault(const MCSubtargetInfo *STI,
@@ -459,20 +456,17 @@ bool AMDGPUMCKernelCodeT::ParseKernelCodeT(StringRef ID, MCAsmParser &MCParser,
return Parser ? Parser(*this, MCParser, Err) : false;
}
-void AMDGPUMCKernelCodeT::EmitKernelCodeT(raw_ostream &OS, MCContext &Ctx) {
+void AMDGPUMCKernelCodeT::EmitKernelCodeT(raw_ostream &OS, MCContext &Ctx,
+ PrintHelper Helper) {
const int Size = hasMCExprVersionTable().size();
for (int i = 0; i < Size; ++i) {
OS << "\t\t";
if (hasMCExprVersionTable()[i]) {
OS << get_amd_kernel_code_t_FldNames()[i + 1] << " = ";
- int64_t Val;
const MCExpr *Value = getMCExprForIndex(i);
- if (Value->evaluateAsAbsolute(Val))
- OS << Val;
- else
- Value->print(OS, Ctx.getAsmInfo());
+ Helper(Value, OS, Ctx.getAsmInfo());
} else {
- printAmdKernelCodeField(*this, i, OS, Ctx);
+ printAmdKernelCodeField(*this, i, OS, Ctx, Helper);
}
OS << '\n';
}
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDKernelCodeTUtils.h b/llvm/lib/Target/AMDGPU/Utils/AMDKernelCodeTUtils.h
index 6aeb98f1ce147..0f8d819d99b2a 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDKernelCodeTUtils.h
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDKernelCodeTUtils.h
@@ -27,6 +27,7 @@ class MCExpr;
class MCStreamer;
class MCSubtargetInfo;
class raw_ostream;
+class MCAsmInfo;
namespace AMDGPU {
struct AMDGPUMCKernelCodeT {
@@ -79,8 +80,10 @@ struct AMDGPUMCKernelCodeT {
const MCExpr *&getMCExprForIndex(int Index);
+ using PrintHelper = void (*)(const MCExpr *, raw_ostream &,
+ const MCAsmInfo *);
bool ParseKernelCodeT(StringRef ID, MCAsmParser &MCParser, raw_ostream &Err);
- void EmitKernelCodeT(raw_ostream &OS, MCContext &Ctx);
+ void EmitKernelCodeT(raw_ostream &OS, MCContext &Ctx, PrintHelper Helper);
void EmitKernelCodeT(MCStreamer &OS, MCContext &Ctx);
};
diff --git a/llvm/test/MC/AMDGPU/amd_kernel_code_t.s b/llvm/test/MC/AMDGPU/amd_kernel_code_t.s
index 052ec0bfabb84..3312b3be7f4cb 100644
--- a/llvm/test/MC/AMDGPU/amd_kernel_code_t.s
+++ b/llvm/test/MC/AMDGPU/amd_kernel_code_t.s
@@ -132,16 +132,16 @@ unknown_workitem_private_segment_byte_size:
; ASM-LABEL: unknown_granulated_workitem_vgpr_count:
; ASM: granulated_workitem_vgpr_count = ((0&4294967232)|(unknown&63))&63
-; ASM: granulated_wavefront_sgpr_count = (((0&4294967232)|(unknown&63))>>6)&15
-; ASM: priority = (((0&4294967232)|(unknown&63))>>10)&3
-; ASM: float_mode = (((0&4294967232)|(unknown&63))>>12)&255
-; ASM: priv = (((0&4294967232)|(unknown&63))>>20)&1
-; ASM: enable_dx10_clamp = (((0&4294967232)|(unknown&63))>>21)&1
-; ASM: debug_mode = (((0&4294967232)|(unknown&63))>>22)&1
-; ASM: enable_ieee_mode = (((0&4294967232)|(unknown&63))>>23)&1
-; ASM: enable_wgp_mode = (((0&4294967232)|(unknown&63))>>29)&1
-; ASM: enable_mem_ordered = (((0&4294967232)|(unknown&63))>>30)&1
-; ASM: enable_fwd_progress = (((0&4294967232)|(unknown&63))>>31)&1
+; ASM: granulated_wavefront_sgpr_count = 0
+; ASM: priority = 0
+; ASM: float_mode = 0
+; ASM: priv = 0
+; ASM: enable_dx10_clamp = 0
+; ASM: debug_mode = 0
+; ASM: enable_ieee_mode = 0
+; ASM: enable_wgp_mode = 0
+; ASM: enable_mem_ordered = 0
+; ASM: enable_fwd_progress = 0
.section .unknown_granulated_workitem_vgpr_count
unknown_granulated_workitem_vgpr_count:
.amd_kernel_code_t
@@ -150,17 +150,9 @@ unknown_granulated_workitem_vgpr_count:
s_endpgm
; ASM-LABEL: unknown_enable_sgpr_workgroup_id_x:
-; ASM: enable_sgpr_private_segment_wave_byte_offset = ((0&4294967167)|((unknown&1)<<7))&1
-; ASM: user_sgpr_count = (((0&4294967167)|((unknown&1)<<7))>>1)&31
-; ASM: enable_trap_handler = (((0&4294967167)|((unknown&1)<<7))>>6)&1
; ASM: enable_sgpr_workgroup_id_x = (((0&4294967167)|((unknown&1)<<7))>>7)&1
-; ASM: enable_sgpr_workgroup_id_y = (((0&4294967167)|((unknown&1)<<7))>>8)&1
-; ASM: enable_sgpr_workgroup_id_z = (((0&4294967167)|((unknown&1)<<7))>>9)&1
-; ASM: enable_sgpr_workgroup_info = (((0&4294967167)|((unknown&1)<<7))>>10)&1
-; ASM: enable_vgpr_workitem_id = (((0&4294967167)|((unknown&1)<<7))>>11)&3
-; ASM: enable_exception_msb = (((0&4294967167)|((unknown&1)<<7))>>13)&3
-; ASM: granulated_lds_size = (((0&4294967167)|((unknown&1)<<7))>>15)&511
-; ASM: enable_exception = (((0&4294967167)|((unknown&1)<<7))>>24)&127
+; ASM: enable_sgpr_workgroup_id_y = 0
+; ASM: enable_sgpr_workgroup_id_z = 0
.section .unknown_enable_sgpr_workgroup_id_x
unknown_enable_sgpr_workgroup_id_x:
.amd_kernel_code_t
More information about the llvm-commits
mailing list