[llvm] [AMDGPU] MCExpr printing helper with KnownBits support (PR #95951)

Janek van Oirschot via llvm-commits llvm-commits at lists.llvm.org
Thu Jun 20 07:28:12 PDT 2024


https://github.com/JanekvO updated https://github.com/llvm/llvm-project/pull/95951

>From a368e031be2327bf83d0bda035f3173ec8ff8f55 Mon Sep 17 00:00:00 2001
From: Janek van Oirschot <janek.vanoirschot at amd.com>
Date: Tue, 18 Jun 2024 17:01:05 +0100
Subject: [PATCH 1/5] [AMDGPU] MCExpr printing helper with KnownBits support

---
 llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp   |   7 +-
 .../AMDGPU/MCTargetDesc/AMDGPUMCExpr.cpp      | 140 ++++++++++++++++++
 .../Target/AMDGPU/MCTargetDesc/AMDGPUMCExpr.h |   9 ++
 .../MCTargetDesc/AMDGPUTargetStreamer.cpp     |   9 +-
 llvm/test/MC/AMDGPU/hsa-sym-exprs-gfx10.s     |  18 +--
 llvm/test/MC/AMDGPU/hsa-sym-exprs-gfx11.s     |  14 +-
 llvm/test/MC/AMDGPU/hsa-sym-exprs-gfx12.s     |  14 +-
 llvm/test/MC/AMDGPU/hsa-sym-exprs-gfx7.s      |  16 +-
 llvm/test/MC/AMDGPU/hsa-sym-exprs-gfx8.s      |  16 +-
 llvm/test/MC/AMDGPU/hsa-sym-exprs-gfx90a.s    |   4 +-
 10 files changed, 194 insertions(+), 53 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
index d8e22f4b0d8fa..29bc5458ddb63 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
@@ -404,12 +404,7 @@ void AMDGPUAsmPrinter::emitCommonFunctionComments(
 SmallString<128> AMDGPUAsmPrinter::getMCExprStr(const MCExpr *Value) {
   SmallString<128> Str;
   raw_svector_ostream OSS(Str);
-  int64_t IVal;
-  if (Value->evaluateAsAbsolute(IVal)) {
-    OSS << static_cast<uint64_t>(IVal);
-  } else {
-    Value->print(OSS, MAI);
-  }
+  AMDGPUMCExprPrint(Value, OSS, MAI);
   return Str;
 }
 
diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCExpr.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCExpr.cpp
index 159664faf983f..b40a93af6bb36 100644
--- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCExpr.cpp
+++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCExpr.cpp
@@ -15,6 +15,7 @@
 #include "llvm/MC/MCSymbol.h"
 #include "llvm/MC/MCValue.h"
 #include "llvm/Support/Allocator.h"
+#include "llvm/Support/KnownBits.h"
 #include "llvm/Support/raw_ostream.h"
 #include <optional>
 
@@ -314,3 +315,142 @@ AMDGPUVariadicMCExpr::createOccupancy(unsigned InitOcc, const MCExpr *NumSGPRs,
                  CreateExpr(InitOcc), NumSGPRs, NumVGPRs},
                 Ctx);
 }
+
+static KnownBits AMDGPUMCExprKnownBits(const MCExpr *Expr, raw_ostream &OS,
+                                       const MCAsmInfo *MAI, unsigned depth) {
+
+  if (depth == 0)
+    return KnownBits(/*BitWidth=*/64);
+
+  depth--;
+
+  switch (Expr->getKind()) {
+  case MCExpr::ExprKind::Binary: {
+    const MCBinaryExpr *BExpr = cast<MCBinaryExpr>(Expr);
+    const MCExpr *LHS = BExpr->getLHS();
+    const MCExpr *RHS = BExpr->getRHS();
+
+    KnownBits LHSKnown = AMDGPUMCExprKnownBits(LHS, OS, MAI, depth);
+    KnownBits RHSKnown = AMDGPUMCExprKnownBits(RHS, OS, MAI, depth);
+
+    switch (BExpr->getOpcode()) {
+    default:
+      return KnownBits(/*BitWidth=*/64);
+    case MCBinaryExpr::Opcode::Add:
+      return KnownBits::computeForAddSub(/*Add=*/true, /*NSW=*/false,
+                                         /*NUW=*/false, LHSKnown, RHSKnown);
+    case MCBinaryExpr::Opcode::And:
+      return LHSKnown & RHSKnown;
+    case MCBinaryExpr::Opcode::Div:
+      return KnownBits::sdiv(LHSKnown, RHSKnown);
+    case MCBinaryExpr::Opcode::Mod:
+      return KnownBits::srem(LHSKnown, RHSKnown);
+    case MCBinaryExpr::Opcode::Mul:
+      return KnownBits::mul(LHSKnown, RHSKnown);
+    case MCBinaryExpr::Opcode::Or:
+      return LHSKnown | RHSKnown;
+    case MCBinaryExpr::Opcode::Shl:
+      return KnownBits::shl(LHSKnown, RHSKnown);
+    case MCBinaryExpr::Opcode::AShr:
+      return KnownBits::ashr(LHSKnown, RHSKnown);
+    case MCBinaryExpr::Opcode::LShr:
+      return KnownBits::lshr(LHSKnown, RHSKnown);
+    case MCBinaryExpr::Opcode::Sub:
+      return KnownBits::computeForAddSub(/*Add=*/false, /*NSW=*/false,
+                                         /*NUW=*/false, LHSKnown, RHSKnown);
+    case MCBinaryExpr::Opcode::Xor:
+      return LHSKnown ^ RHSKnown;
+    }
+  }
+  case MCExpr::ExprKind::Constant: {
+    const MCConstantExpr *CE = cast<MCConstantExpr>(Expr);
+    APInt APValue(/*BitWidth=*/64, CE->getValue(), /*isSigned=*/true);
+    return KnownBits::makeConstant(APValue);
+  }
+  case MCExpr::ExprKind::SymbolRef: {
+    const MCSymbolRefExpr *RExpr = cast<MCSymbolRefExpr>(Expr);
+    const MCSymbol &Sym = RExpr->getSymbol();
+    if (!Sym.isVariable())
+      return KnownBits(/*BitWidth=*/64);
+
+    // Variable value retrieval is not for actual use but only for knownbits
+    // analysis.
+    return AMDGPUMCExprKnownBits(Sym.getVariableValue(/*SetUsed=*/false), OS,
+                                 MAI, depth);
+  }
+  case MCExpr::ExprKind::Unary: {
+    const MCUnaryExpr *UExpr = cast<MCUnaryExpr>(Expr);
+    KnownBits KB = AMDGPUMCExprKnownBits(UExpr->getSubExpr(), OS, MAI, depth);
+
+    switch (UExpr->getOpcode()) {
+    default:
+      return KnownBits(/*BitWidth=*/64);
+    case MCUnaryExpr::Opcode::Minus: {
+      KB.makeNegative();
+      return KB;
+    }
+    case MCUnaryExpr::Opcode::Not: {
+      KnownBits AllOnes(/*BitWidth=*/64);
+      AllOnes.setAllOnes();
+      return KB ^ AllOnes;
+    }
+    case MCUnaryExpr::Opcode::Plus: {
+      KB.makeNonNegative();
+      return KB;
+    }
+    }
+  }
+  case MCExpr::ExprKind::Target: {
+    const AMDGPUVariadicMCExpr *AGVK = cast<AMDGPUVariadicMCExpr>(Expr);
+
+    switch (AGVK->getKind()) {
+    default:
+      return KnownBits(/*BitWidth=*/64);
+    case AMDGPUVariadicMCExpr::VariadicKind::AGVK_Or: {
+      KnownBits KB = AMDGPUMCExprKnownBits(AGVK->getSubExpr(0), OS, MAI, depth);
+      for (const MCExpr *Arg : AGVK->getArgs()) {
+        KB |= AMDGPUMCExprKnownBits(Arg, OS, MAI, depth);
+      }
+      return KB;
+    }
+    case AMDGPUVariadicMCExpr::VariadicKind::AGVK_Max: {
+      KnownBits KB = AMDGPUMCExprKnownBits(AGVK->getSubExpr(0), OS, MAI, depth);
+      for (const MCExpr *Arg : AGVK->getArgs()) {
+        KB = KnownBits::umax(KB, AMDGPUMCExprKnownBits(Arg, OS, MAI, depth));
+      }
+      return KB;
+    }
+    case AMDGPUVariadicMCExpr::VariadicKind::AGVK_ExtraSGPRs:
+    case AMDGPUVariadicMCExpr::VariadicKind::AGVK_TotalNumVGPRs:
+    case AMDGPUVariadicMCExpr::VariadicKind::AGVK_AlignTo:
+    case AMDGPUVariadicMCExpr::VariadicKind::AGVK_Occupancy: {
+      int64_t Val;
+      if (AGVK->evaluateAsAbsolute(Val)) {
+        APInt APValue(/*BitWidth=*/64, Val, /*isSigned=*/false);
+        return KnownBits::makeConstant(APValue);
+      } else {
+        return KnownBits(/*BitWidth=*/64);
+      }
+    }
+    }
+  }
+  }
+  return KnownBits(/*BitWidth=*/64);
+}
+
+void llvm::AMDGPUMCExprPrint(const MCExpr *Expr, raw_ostream &OS,
+                             const MCAsmInfo *MAI) {
+  int64_t Val;
+  if (Expr->evaluateAsAbsolute(Val)) {
+    OS << Val;
+    return;
+  }
+
+  KnownBits KB = AMDGPUMCExprKnownBits(Expr, OS, MAI, /*depth=*/16);
+  if (KB.isConstant()) {
+    OS << KB.getConstant();
+    return;
+  }
+
+  Expr->print(OS, MAI);
+}
diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCExpr.h b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCExpr.h
index f92350b592350..67015dcf32343 100644
--- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCExpr.h
+++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCExpr.h
@@ -92,6 +92,7 @@ class AMDGPUVariadicMCExpr : public MCTargetExpr {
                                                      const GCNSubtarget &STM,
                                                      MCContext &Ctx);
 
+  ArrayRef<const MCExpr *> getArgs() const { return Args; }
   VariadicKind getKind() const { return Kind; }
   const MCExpr *getSubExpr(size_t Index) const;
 
@@ -107,6 +108,14 @@ class AMDGPUVariadicMCExpr : public MCTargetExpr {
   }
 };
 
+// Tries to leverage KnownBits for MCExprs to reduce and limit any composed
+// MCExprs printing. E.g., for an expression such as
+// ((unevaluatable_sym | 1) & 1) won't evaluate due to unevaluatable_sym and
+// would verbosely print the full expression; however, KnownBits should deduce
+// the value to be 1. Particularly useful for AMDGPU metadata MCExprs.
+void AMDGPUMCExprPrint(const MCExpr *Expr, raw_ostream &OS,
+                       const MCAsmInfo *MAI);
+
 } // end namespace llvm
 
 #endif // LLVM_LIB_TARGET_AMDGPU_MCTARGETDESC_AMDGPUMCEXPR_H
diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp
index e805e964ffe4e..25ca4e779fdaf 100644
--- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp
+++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp
@@ -11,6 +11,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "AMDGPUTargetStreamer.h"
+#include "AMDGPUMCExpr.h"
 #include "AMDGPUMCKernelDescriptor.h"
 #include "AMDGPUPTNote.h"
 #include "Utils/AMDGPUBaseInfo.h"
@@ -328,14 +329,10 @@ void AMDGPUTargetAsmStreamer::EmitAmdhsaKernelDescriptor(
 
   auto PrintField = [&](const MCExpr *Expr, uint32_t Shift, uint32_t Mask,
                         StringRef Directive) {
-    int64_t IVal;
     OS << "\t\t" << Directive << ' ';
-    const MCExpr *pgm_rsrc1_bits =
+    const MCExpr *ShiftedAndMaskedExpr =
         MCKernelDescriptor::bits_get(Expr, Shift, Mask, getContext());
-    if (pgm_rsrc1_bits->evaluateAsAbsolute(IVal))
-      OS << static_cast<uint64_t>(IVal);
-    else
-      pgm_rsrc1_bits->print(OS, MAI);
+    llvm::AMDGPUMCExprPrint(ShiftedAndMaskedExpr, OS, MAI);
     OS << '\n';
   };
 
diff --git a/llvm/test/MC/AMDGPU/hsa-sym-exprs-gfx10.s b/llvm/test/MC/AMDGPU/hsa-sym-exprs-gfx10.s
index 95af59c413ae6..ff2c81820c578 100644
--- a/llvm/test/MC/AMDGPU/hsa-sym-exprs-gfx10.s
+++ b/llvm/test/MC/AMDGPU/hsa-sym-exprs-gfx10.s
@@ -102,15 +102,15 @@ expr_defined:
 // ASM-NEXT: .amdhsa_group_segment_fixed_size defined_value+2
 // ASM-NEXT: .amdhsa_private_segment_fixed_size defined_value+3
 // ASM-NEXT: .amdhsa_kernarg_size 0
-// ASM-NEXT: .amdhsa_user_sgpr_count (((((((((((((((((((((((((((((0&(~128))|(1<<7))&(~6144))|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~62))|(0<<1))&62)>>1
-// ASM-NEXT: .amdhsa_user_sgpr_private_segment_buffer (((((0&(~1024))|(1<<10))&(~2048))|(defined_boolean<<11))&1)>>0
-// ASM-NEXT: .amdhsa_user_sgpr_dispatch_ptr (((((0&(~1024))|(1<<10))&(~2048))|(defined_boolean<<11))&2)>>1
-// ASM-NEXT: .amdhsa_user_sgpr_queue_ptr (((((0&(~1024))|(1<<10))&(~2048))|(defined_boolean<<11))&4)>>2
-// ASM-NEXT: .amdhsa_user_sgpr_kernarg_segment_ptr (((((0&(~1024))|(1<<10))&(~2048))|(defined_boolean<<11))&8)>>3
-// ASM-NEXT: .amdhsa_user_sgpr_dispatch_id (((((0&(~1024))|(1<<10))&(~2048))|(defined_boolean<<11))&16)>>4
-// ASM-NEXT: .amdhsa_user_sgpr_flat_scratch_init (((((0&(~1024))|(1<<10))&(~2048))|(defined_boolean<<11))&32)>>5
-// ASM-NEXT: .amdhsa_user_sgpr_private_segment_size (((((0&(~1024))|(1<<10))&(~2048))|(defined_boolean<<11))&64)>>6
-// ASM-NEXT: .amdhsa_wavefront_size32 (((((0&(~1024))|(1<<10))&(~2048))|(defined_boolean<<11))&1024)>>10
+// ASM-NEXT: .amdhsa_user_sgpr_count 0
+// ASM-NEXT: .amdhsa_user_sgpr_private_segment_buffer 0
+// ASM-NEXT: .amdhsa_user_sgpr_dispatch_ptr 0
+// ASM-NEXT: .amdhsa_user_sgpr_queue_ptr 0
+// ASM-NEXT: .amdhsa_user_sgpr_kernarg_segment_ptr 0
+// ASM-NEXT: .amdhsa_user_sgpr_dispatch_id 0
+// ASM-NEXT: .amdhsa_user_sgpr_flat_scratch_init 0
+// ASM-NEXT: .amdhsa_user_sgpr_private_segment_size 0
+// ASM-NEXT: .amdhsa_wavefront_size32 1
 // ASM-NEXT: .amdhsa_system_sgpr_private_segment_wavefront_offset (((((((((((((((((((((((((((((0&(~128))|(1<<7))&(~6144))|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~62))|(0<<1))&1)>>0
 // ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_x (((((((((((((((((((((((((((((0&(~128))|(1<<7))&(~6144))|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~62))|(0<<1))&128)>>7
 // ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_y (((((((((((((((((((((((((((((0&(~128))|(1<<7))&(~6144))|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~62))|(0<<1))&256)>>8
diff --git a/llvm/test/MC/AMDGPU/hsa-sym-exprs-gfx11.s b/llvm/test/MC/AMDGPU/hsa-sym-exprs-gfx11.s
index e1107fb69ba41..fc902129964c3 100644
--- a/llvm/test/MC/AMDGPU/hsa-sym-exprs-gfx11.s
+++ b/llvm/test/MC/AMDGPU/hsa-sym-exprs-gfx11.s
@@ -104,13 +104,13 @@ expr_defined:
 // ASM-NEXT: .amdhsa_group_segment_fixed_size defined_value+2
 // ASM-NEXT: .amdhsa_private_segment_fixed_size defined_value+3
 // ASM-NEXT: .amdhsa_kernarg_size 0
-// ASM-NEXT: .amdhsa_user_sgpr_count (((((((((((((((((((((((((((((((0&(~128))|(1<<7))&(~6144))|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~1))|(defined_boolean<<0))&(~62))|(0<<1))&62)>>1
-// ASM-NEXT: .amdhsa_user_sgpr_dispatch_ptr (((((0&(~1024))|(1<<10))&(~2048))|(defined_boolean<<11))&2)>>1
-// ASM-NEXT: .amdhsa_user_sgpr_queue_ptr (((((0&(~1024))|(1<<10))&(~2048))|(defined_boolean<<11))&4)>>2
-// ASM-NEXT: .amdhsa_user_sgpr_kernarg_segment_ptr (((((0&(~1024))|(1<<10))&(~2048))|(defined_boolean<<11))&8)>>3
-// ASM-NEXT: .amdhsa_user_sgpr_dispatch_id (((((0&(~1024))|(1<<10))&(~2048))|(defined_boolean<<11))&16)>>4
-// ASM-NEXT: .amdhsa_user_sgpr_private_segment_size (((((0&(~1024))|(1<<10))&(~2048))|(defined_boolean<<11))&64)>>6
-// ASM-NEXT: .amdhsa_wavefront_size32 (((((0&(~1024))|(1<<10))&(~2048))|(defined_boolean<<11))&1024)>>10
+// ASM-NEXT: .amdhsa_user_sgpr_count 0
+// ASM-NEXT: .amdhsa_user_sgpr_dispatch_ptr 0
+// ASM-NEXT: .amdhsa_user_sgpr_queue_ptr 0
+// ASM-NEXT: .amdhsa_user_sgpr_kernarg_segment_ptr 0
+// ASM-NEXT: .amdhsa_user_sgpr_dispatch_id 0
+// ASM-NEXT: .amdhsa_user_sgpr_private_segment_size 0
+// ASM-NEXT: .amdhsa_wavefront_size32 1
 // ASM-NEXT: .amdhsa_enable_private_segment (((((((((((((((((((((((((((((((0&(~128))|(1<<7))&(~6144))|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~1))|(defined_boolean<<0))&(~62))|(0<<1))&1)>>0
 // ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_x (((((((((((((((((((((((((((((((0&(~128))|(1<<7))&(~6144))|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~1))|(defined_boolean<<0))&(~62))|(0<<1))&128)>>7
 // ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_y (((((((((((((((((((((((((((((((0&(~128))|(1<<7))&(~6144))|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~1))|(defined_boolean<<0))&(~62))|(0<<1))&256)>>8
diff --git a/llvm/test/MC/AMDGPU/hsa-sym-exprs-gfx12.s b/llvm/test/MC/AMDGPU/hsa-sym-exprs-gfx12.s
index 449616d35186b..bad5fc41bc1b8 100644
--- a/llvm/test/MC/AMDGPU/hsa-sym-exprs-gfx12.s
+++ b/llvm/test/MC/AMDGPU/hsa-sym-exprs-gfx12.s
@@ -106,13 +106,13 @@ expr_defined:
 // ASM-NEXT: .amdhsa_group_segment_fixed_size defined_value+2
 // ASM-NEXT: .amdhsa_private_segment_fixed_size defined_value+3
 // ASM-NEXT: .amdhsa_kernarg_size 0
-// ASM-NEXT: .amdhsa_user_sgpr_count (((((((((((((((((((((((((((((((0&(~128))|(1<<7))&(~6144))|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~1))|(defined_boolean<<0))&(~62))|(0<<1))&62)>>1
-// ASM-NEXT: .amdhsa_user_sgpr_dispatch_ptr (((((0&(~1024))|(1<<10))&(~2048))|(defined_boolean<<11))&2)>>1
-// ASM-NEXT: .amdhsa_user_sgpr_queue_ptr (((((0&(~1024))|(1<<10))&(~2048))|(defined_boolean<<11))&4)>>2
-// ASM-NEXT: .amdhsa_user_sgpr_kernarg_segment_ptr (((((0&(~1024))|(1<<10))&(~2048))|(defined_boolean<<11))&8)>>3
-// ASM-NEXT: .amdhsa_user_sgpr_dispatch_id (((((0&(~1024))|(1<<10))&(~2048))|(defined_boolean<<11))&16)>>4
-// ASM-NEXT: .amdhsa_user_sgpr_private_segment_size (((((0&(~1024))|(1<<10))&(~2048))|(defined_boolean<<11))&64)>>6
-// ASM-NEXT: .amdhsa_wavefront_size32 (((((0&(~1024))|(1<<10))&(~2048))|(defined_boolean<<11))&1024)>>10
+// ASM-NEXT: .amdhsa_user_sgpr_count 0
+// ASM-NEXT: .amdhsa_user_sgpr_dispatch_ptr 0
+// ASM-NEXT: .amdhsa_user_sgpr_queue_ptr 0
+// ASM-NEXT: .amdhsa_user_sgpr_kernarg_segment_ptr 0
+// ASM-NEXT: .amdhsa_user_sgpr_dispatch_id 0
+// ASM-NEXT: .amdhsa_user_sgpr_private_segment_size 0
+// ASM-NEXT: .amdhsa_wavefront_size32 1
 // ASM-NEXT: .amdhsa_enable_private_segment (((((((((((((((((((((((((((((((0&(~128))|(1<<7))&(~6144))|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~1))|(defined_boolean<<0))&(~62))|(0<<1))&1)>>0
 // ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_x (((((((((((((((((((((((((((((((0&(~128))|(1<<7))&(~6144))|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~1))|(defined_boolean<<0))&(~62))|(0<<1))&128)>>7
 // ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_y (((((((((((((((((((((((((((((((0&(~128))|(1<<7))&(~6144))|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~1))|(defined_boolean<<0))&(~62))|(0<<1))&256)>>8
diff --git a/llvm/test/MC/AMDGPU/hsa-sym-exprs-gfx7.s b/llvm/test/MC/AMDGPU/hsa-sym-exprs-gfx7.s
index c7e05441b45ff..22db612721117 100644
--- a/llvm/test/MC/AMDGPU/hsa-sym-exprs-gfx7.s
+++ b/llvm/test/MC/AMDGPU/hsa-sym-exprs-gfx7.s
@@ -94,14 +94,14 @@ expr_defined:
 // ASM-NEXT: .amdhsa_group_segment_fixed_size defined_value+2
 // ASM-NEXT: .amdhsa_private_segment_fixed_size defined_value+3
 // ASM-NEXT: .amdhsa_kernarg_size 0
-// ASM-NEXT: .amdhsa_user_sgpr_count (((((((((((((((((((((((((((((0&(~128))|(1<<7))&(~6144))|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~62))|(0<<1))&62)>>1
-// ASM-NEXT: .amdhsa_user_sgpr_private_segment_buffer (((0&(~2048))|(defined_boolean<<11))&1)>>0
-// ASM-NEXT: .amdhsa_user_sgpr_dispatch_ptr (((0&(~2048))|(defined_boolean<<11))&2)>>1
-// ASM-NEXT: .amdhsa_user_sgpr_queue_ptr (((0&(~2048))|(defined_boolean<<11))&4)>>2
-// ASM-NEXT: .amdhsa_user_sgpr_kernarg_segment_ptr (((0&(~2048))|(defined_boolean<<11))&8)>>3
-// ASM-NEXT: .amdhsa_user_sgpr_dispatch_id (((0&(~2048))|(defined_boolean<<11))&16)>>4
-// ASM-NEXT: .amdhsa_user_sgpr_flat_scratch_init (((0&(~2048))|(defined_boolean<<11))&32)>>5
-// ASM-NEXT: .amdhsa_user_sgpr_private_segment_size (((0&(~2048))|(defined_boolean<<11))&64)>>6
+// ASM-NEXT: .amdhsa_user_sgpr_count 0
+// ASM-NEXT: .amdhsa_user_sgpr_private_segment_buffer 0
+// ASM-NEXT: .amdhsa_user_sgpr_dispatch_ptr 0
+// ASM-NEXT: .amdhsa_user_sgpr_queue_ptr 0
+// ASM-NEXT: .amdhsa_user_sgpr_kernarg_segment_ptr 0
+// ASM-NEXT: .amdhsa_user_sgpr_dispatch_id 0
+// ASM-NEXT: .amdhsa_user_sgpr_flat_scratch_init 0
+// ASM-NEXT: .amdhsa_user_sgpr_private_segment_size 0
 // ASM-NEXT: .amdhsa_system_sgpr_private_segment_wavefront_offset (((((((((((((((((((((((((((((0&(~128))|(1<<7))&(~6144))|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~62))|(0<<1))&1)>>0
 // ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_x (((((((((((((((((((((((((((((0&(~128))|(1<<7))&(~6144))|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~62))|(0<<1))&128)>>7
 // ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_y (((((((((((((((((((((((((((((0&(~128))|(1<<7))&(~6144))|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~62))|(0<<1))&256)>>8
diff --git a/llvm/test/MC/AMDGPU/hsa-sym-exprs-gfx8.s b/llvm/test/MC/AMDGPU/hsa-sym-exprs-gfx8.s
index 49a5015987a65..b2ccbb7b5fd2f 100644
--- a/llvm/test/MC/AMDGPU/hsa-sym-exprs-gfx8.s
+++ b/llvm/test/MC/AMDGPU/hsa-sym-exprs-gfx8.s
@@ -95,14 +95,14 @@ expr_defined:
 // ASM-NEXT: .amdhsa_group_segment_fixed_size defined_value+2
 // ASM-NEXT: .amdhsa_private_segment_fixed_size defined_value+3
 // ASM-NEXT: .amdhsa_kernarg_size 0
-// ASM-NEXT: .amdhsa_user_sgpr_count (((((((((((((((((((((((((((((0&(~128))|(1<<7))&(~6144))|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~62))|(0<<1))&62)>>1
-// ASM-NEXT: .amdhsa_user_sgpr_private_segment_buffer (((0&(~2048))|(defined_boolean<<11))&1)>>0
-// ASM-NEXT: .amdhsa_user_sgpr_dispatch_ptr (((0&(~2048))|(defined_boolean<<11))&2)>>1
-// ASM-NEXT: .amdhsa_user_sgpr_queue_ptr (((0&(~2048))|(defined_boolean<<11))&4)>>2
-// ASM-NEXT: .amdhsa_user_sgpr_kernarg_segment_ptr (((0&(~2048))|(defined_boolean<<11))&8)>>3
-// ASM-NEXT: .amdhsa_user_sgpr_dispatch_id (((0&(~2048))|(defined_boolean<<11))&16)>>4
-// ASM-NEXT: .amdhsa_user_sgpr_flat_scratch_init (((0&(~2048))|(defined_boolean<<11))&32)>>5
-// ASM-NEXT: .amdhsa_user_sgpr_private_segment_size (((0&(~2048))|(defined_boolean<<11))&64)>>6
+// ASM-NEXT: .amdhsa_user_sgpr_count 0
+// ASM-NEXT: .amdhsa_user_sgpr_private_segment_buffer 0
+// ASM-NEXT: .amdhsa_user_sgpr_dispatch_ptr 0
+// ASM-NEXT: .amdhsa_user_sgpr_queue_ptr 0
+// ASM-NEXT: .amdhsa_user_sgpr_kernarg_segment_ptr 0
+// ASM-NEXT: .amdhsa_user_sgpr_dispatch_id 0
+// ASM-NEXT: .amdhsa_user_sgpr_flat_scratch_init 0
+// ASM-NEXT: .amdhsa_user_sgpr_private_segment_size 0
 // ASM-NEXT: .amdhsa_system_sgpr_private_segment_wavefront_offset (((((((((((((((((((((((((((((0&(~128))|(1<<7))&(~6144))|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~62))|(0<<1))&1)>>0
 // ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_x (((((((((((((((((((((((((((((0&(~128))|(1<<7))&(~6144))|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~62))|(0<<1))&128)>>7
 // ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_y (((((((((((((((((((((((((((((0&(~128))|(1<<7))&(~6144))|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~62))|(0<<1))&256)>>8
diff --git a/llvm/test/MC/AMDGPU/hsa-sym-exprs-gfx90a.s b/llvm/test/MC/AMDGPU/hsa-sym-exprs-gfx90a.s
index b7f89239160fc..cd795e9a8d475 100644
--- a/llvm/test/MC/AMDGPU/hsa-sym-exprs-gfx90a.s
+++ b/llvm/test/MC/AMDGPU/hsa-sym-exprs-gfx90a.s
@@ -66,7 +66,7 @@ expr_defined:
 // ASM-NEXT: .amdhsa_group_segment_fixed_size 0
 // ASM-NEXT: .amdhsa_private_segment_fixed_size 0
 // ASM-NEXT: .amdhsa_kernarg_size 0
-// ASM-NEXT: .amdhsa_user_sgpr_count (((((((0&(~128))|(1<<7))&(~1))|(defined_boolean<<0))&(~62))|(0<<1))&62)>>1
+// ASM-NEXT: .amdhsa_user_sgpr_count 0
 // ASM-NEXT: .amdhsa_user_sgpr_private_segment_buffer 0
 // ASM-NEXT: .amdhsa_user_sgpr_dispatch_ptr 0
 // ASM-NEXT: .amdhsa_user_sgpr_queue_ptr 0
@@ -77,7 +77,7 @@ expr_defined:
 // ASM-NEXT: .amdhsa_user_sgpr_kernarg_preload_offset 0
 // ASM-NEXT: .amdhsa_user_sgpr_private_segment_size 0
 // ASM-NEXT: .amdhsa_system_sgpr_private_segment_wavefront_offset (((((((0&(~128))|(1<<7))&(~1))|(defined_boolean<<0))&(~62))|(0<<1))&1)>>0
-// ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_x (((((((0&(~128))|(1<<7))&(~1))|(defined_boolean<<0))&(~62))|(0<<1))&128)>>7
+// ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_x 1
 // ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_y (((((((0&(~128))|(1<<7))&(~1))|(defined_boolean<<0))&(~62))|(0<<1))&256)>>8
 // ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_z (((((((0&(~128))|(1<<7))&(~1))|(defined_boolean<<0))&(~62))|(0<<1))&512)>>9
 // ASM-NEXT: .amdhsa_system_sgpr_workgroup_info (((((((0&(~128))|(1<<7))&(~1))|(defined_boolean<<0))&(~62))|(0<<1))&1024)>>10

>From de95d0f5bbaaac6cb071f3eda3c5f081361f8afa Mon Sep 17 00:00:00 2001
From: Janek van Oirschot <janek.vanoirschot at amd.com>
Date: Wed, 19 Jun 2024 20:05:14 +0100
Subject: [PATCH 2/5] Feedback, bitwidth const

---
 .../AMDGPU/MCTargetDesc/AMDGPUMCExpr.cpp      | 22 +++++++++----------
 1 file changed, 11 insertions(+), 11 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCExpr.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCExpr.cpp
index b40a93af6bb36..79133ed1947cd 100644
--- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCExpr.cpp
+++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCExpr.cpp
@@ -318,9 +318,10 @@ AMDGPUVariadicMCExpr::createOccupancy(unsigned InitOcc, const MCExpr *NumSGPRs,
 
 static KnownBits AMDGPUMCExprKnownBits(const MCExpr *Expr, raw_ostream &OS,
                                        const MCAsmInfo *MAI, unsigned depth) {
+  const unsigned BitWidth = 64;
 
   if (depth == 0)
-    return KnownBits(/*BitWidth=*/64);
+    return KnownBits(BitWidth);
 
   depth--;
 
@@ -335,7 +336,7 @@ static KnownBits AMDGPUMCExprKnownBits(const MCExpr *Expr, raw_ostream &OS,
 
     switch (BExpr->getOpcode()) {
     default:
-      return KnownBits(/*BitWidth=*/64);
+      return KnownBits(BitWidth);
     case MCBinaryExpr::Opcode::Add:
       return KnownBits::computeForAddSub(/*Add=*/true, /*NSW=*/false,
                                          /*NUW=*/false, LHSKnown, RHSKnown);
@@ -364,14 +365,14 @@ static KnownBits AMDGPUMCExprKnownBits(const MCExpr *Expr, raw_ostream &OS,
   }
   case MCExpr::ExprKind::Constant: {
     const MCConstantExpr *CE = cast<MCConstantExpr>(Expr);
-    APInt APValue(/*BitWidth=*/64, CE->getValue(), /*isSigned=*/true);
+    APInt APValue(BitWidth, CE->getValue(), /*isSigned=*/true);
     return KnownBits::makeConstant(APValue);
   }
   case MCExpr::ExprKind::SymbolRef: {
     const MCSymbolRefExpr *RExpr = cast<MCSymbolRefExpr>(Expr);
     const MCSymbol &Sym = RExpr->getSymbol();
     if (!Sym.isVariable())
-      return KnownBits(/*BitWidth=*/64);
+      return KnownBits(BitWidth);
 
     // Variable value retrieval is not for actual use but only for knownbits
     // analysis.
@@ -384,13 +385,13 @@ static KnownBits AMDGPUMCExprKnownBits(const MCExpr *Expr, raw_ostream &OS,
 
     switch (UExpr->getOpcode()) {
     default:
-      return KnownBits(/*BitWidth=*/64);
+      return KnownBits(BitWidth);
     case MCUnaryExpr::Opcode::Minus: {
       KB.makeNegative();
       return KB;
     }
     case MCUnaryExpr::Opcode::Not: {
-      KnownBits AllOnes(/*BitWidth=*/64);
+      KnownBits AllOnes(BitWidth);
       AllOnes.setAllOnes();
       return KB ^ AllOnes;
     }
@@ -405,7 +406,7 @@ static KnownBits AMDGPUMCExprKnownBits(const MCExpr *Expr, raw_ostream &OS,
 
     switch (AGVK->getKind()) {
     default:
-      return KnownBits(/*BitWidth=*/64);
+      return KnownBits(BitWidth);
     case AMDGPUVariadicMCExpr::VariadicKind::AGVK_Or: {
       KnownBits KB = AMDGPUMCExprKnownBits(AGVK->getSubExpr(0), OS, MAI, depth);
       for (const MCExpr *Arg : AGVK->getArgs()) {
@@ -426,16 +427,15 @@ static KnownBits AMDGPUMCExprKnownBits(const MCExpr *Expr, raw_ostream &OS,
     case AMDGPUVariadicMCExpr::VariadicKind::AGVK_Occupancy: {
       int64_t Val;
       if (AGVK->evaluateAsAbsolute(Val)) {
-        APInt APValue(/*BitWidth=*/64, Val, /*isSigned=*/false);
+        APInt APValue(BitWidth, Val, /*isSigned=*/false);
         return KnownBits::makeConstant(APValue);
-      } else {
-        return KnownBits(/*BitWidth=*/64);
       }
+      return KnownBits(BitWidth);
     }
     }
   }
   }
-  return KnownBits(/*BitWidth=*/64);
+  return KnownBits(BitWidth);
 }
 
 void llvm::AMDGPUMCExprPrint(const MCExpr *Expr, raw_ostream &OS,

>From fb95433ef07bf04c94e838ebf21538097b460dd7 Mon Sep 17 00:00:00 2001
From: Janek van Oirschot <janek.vanoirschot at amd.com>
Date: Thu, 20 Jun 2024 12:54:10 +0100
Subject: [PATCH 3/5] Add print helper case for accum offset attribute

---
 .../Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp    | 7 +------
 1 file changed, 1 insertion(+), 6 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp
index 25ca4e779fdaf..cf6501dbd3669 100644
--- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp
+++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp
@@ -444,12 +444,7 @@ void AMDGPUTargetAsmStreamer::EmitAmdhsaKernelDescriptor(
     accum_bits = MCBinaryExpr::createMul(
         accum_bits, MCConstantExpr::create(4, getContext()), getContext());
     OS << "\t\t.amdhsa_accum_offset ";
-    int64_t IVal;
-    if (accum_bits->evaluateAsAbsolute(IVal)) {
-      OS << static_cast<uint64_t>(IVal);
-    } else {
-      accum_bits->print(OS, MAI);
-    }
+    llvm::AMDGPUMCExprPrint(accum_bits, OS, MAI);
     OS << '\n';
   }
 

>From 301b1481ecc51591d0b9b94cfa49abc30f0b905f Mon Sep 17 00:00:00 2001
From: Janek van Oirschot <Janek.vanOirschot at amd.com>
Date: Thu, 20 Jun 2024 05:24:29 -0700
Subject: [PATCH 4/5] Don't forget to add affected test

---
 llvm/test/MC/AMDGPU/hsa-sym-exprs-gfx90a.s | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llvm/test/MC/AMDGPU/hsa-sym-exprs-gfx90a.s b/llvm/test/MC/AMDGPU/hsa-sym-exprs-gfx90a.s
index cd795e9a8d475..351913840a8a2 100644
--- a/llvm/test/MC/AMDGPU/hsa-sym-exprs-gfx90a.s
+++ b/llvm/test/MC/AMDGPU/hsa-sym-exprs-gfx90a.s
@@ -84,7 +84,7 @@ expr_defined:
 // ASM-NEXT: .amdhsa_system_vgpr_workitem_id (((((((0&(~128))|(1<<7))&(~1))|(defined_boolean<<0))&(~62))|(0<<1))&6144)>>11
 // ASM-NEXT: .amdhsa_next_free_vgpr 0
 // ASM-NEXT: .amdhsa_next_free_sgpr 0
-// ASM-NEXT: .amdhsa_accum_offset (((((((0&(~65536))|(defined_boolean<<16))&(~63))|(0<<0))&63)>>0)+1)*4
+// ASM-NEXT: .amdhsa_accum_offset 4
 // ASM-NEXT: .amdhsa_reserve_xnack_mask 1
 // ASM-NEXT: .amdhsa_float_round_mode_32 (((((((((((((((((0&(~786432))|(3<<18))&(~2097152))|(1<<21))&(~8388608))|(1<<23))&(~2097152))|(defined_boolean<<21))&(~8388608))|(defined_boolean<<23))&(~67108864))|(defined_boolean<<26))&(~63))|(0<<0))&(~960))|(0<<6))&12288)>>12
 // ASM-NEXT: .amdhsa_float_round_mode_16_64 (((((((((((((((((0&(~786432))|(3<<18))&(~2097152))|(1<<21))&(~8388608))|(1<<23))&(~2097152))|(defined_boolean<<21))&(~8388608))|(defined_boolean<<23))&(~67108864))|(defined_boolean<<26))&(~63))|(0<<0))&(~960))|(0<<6))&49152)>>14

>From 8f4ecd753f49e6cb540d3fccb7f60a0ebd630fb9 Mon Sep 17 00:00:00 2001
From: Janek van Oirschot <Janek.vanOirschot at amd.com>
Date: Thu, 20 Jun 2024 07:27:41 -0700
Subject: [PATCH 5/5] Pass print helper function as param for users of it in
 Utils subdir

---
 .../MCTargetDesc/AMDGPUTargetStreamer.cpp     |  2 +-
 .../AMDGPU/Utils/AMDKernelCodeTUtils.cpp      | 46 ++++++++-----------
 .../Target/AMDGPU/Utils/AMDKernelCodeTUtils.h |  5 +-
 llvm/test/MC/AMDGPU/amd_kernel_code_t.s       | 32 +++++--------
 4 files changed, 37 insertions(+), 48 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp
index cf6501dbd3669..afd93f1b2178b 100644
--- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp
+++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp
@@ -246,7 +246,7 @@ void AMDGPUTargetAsmStreamer::EmitDirectiveAMDHSACodeObjectVersion(
 
 void AMDGPUTargetAsmStreamer::EmitAMDKernelCodeT(AMDGPUMCKernelCodeT &Header) {
   OS << "\t.amd_kernel_code_t\n";
-  Header.EmitKernelCodeT(OS, getContext());
+  Header.EmitKernelCodeT(OS, getContext(), llvm::AMDGPUMCExprPrint);
   OS << "\t.end_amd_kernel_code_t\n";
 }
 
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDKernelCodeTUtils.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDKernelCodeTUtils.cpp
index 720d5a1853dbb..7b88ddb7b0e95 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDKernelCodeTUtils.cpp
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDKernelCodeTUtils.cpp
@@ -226,35 +226,35 @@ class PrintField {
   template <typename T, T AMDGPUMCKernelCodeT::*ptr,
             typename std::enable_if_t<!std::is_integral_v<T>, T> * = nullptr>
   static void printField(StringRef Name, const AMDGPUMCKernelCodeT &C,
-                         raw_ostream &OS, MCContext &Ctx) {
+                         raw_ostream &OS, MCContext &Ctx,
+                         AMDGPUMCKernelCodeT::PrintHelper Helper) {
     OS << Name << " = ";
     const MCExpr *Value = C.*ptr;
-    int64_t Val;
-    if (Value->evaluateAsAbsolute(Val))
-      OS << Val;
-    else
-      Value->print(OS, Ctx.getAsmInfo());
+    Helper(Value, OS, Ctx.getAsmInfo());
   }
 
   template <typename T, T AMDGPUMCKernelCodeT::*ptr,
             typename std::enable_if_t<std::is_integral_v<T>, T> * = nullptr>
   static void printField(StringRef Name, const AMDGPUMCKernelCodeT &C,
-                         raw_ostream &OS, MCContext &) {
+                         raw_ostream &OS, MCContext &,
+                         AMDGPUMCKernelCodeT::PrintHelper) {
     OS << Name << " = " << (int)(C.*ptr);
   }
 };
 
 template <typename T, T AMDGPUMCKernelCodeT::*ptr, int shift, int width = 1>
 static void printBitField(StringRef Name, const AMDGPUMCKernelCodeT &C,
-                          raw_ostream &OS, MCContext &) {
+                          raw_ostream &OS, MCContext &,
+                          AMDGPUMCKernelCodeT::PrintHelper) {
   const auto Mask = (static_cast<T>(1) << width) - 1;
   OS << Name << " = " << (int)((C.*ptr >> shift) & Mask);
 }
 
 using PrintFx = void (*)(StringRef, const AMDGPUMCKernelCodeT &, raw_ostream &,
-                         MCContext &);
+                         MCContext &, AMDGPUMCKernelCodeT::PrintHelper Helper);
 
-static ArrayRef<PrintFx> getPrinterTable() {
+static ArrayRef<PrintFx>
+getPrinterTable(AMDGPUMCKernelCodeT::PrintHelper Helper) {
   static const PrintFx Table[] = {
 #define COMPPGM1(name, aname, AccMacro)                                        \
   COMPPGM(name, aname, C_00B848_##AccMacro, S_00B848_##AccMacro, 0)
@@ -263,7 +263,7 @@ static ArrayRef<PrintFx> getPrinterTable() {
 #define PRINTFIELD(sname, aname, name) PrintField::printField<FLD_T(name)>
 #define PRINTCOMP(Complement, PGMType)                                         \
   [](StringRef Name, const AMDGPUMCKernelCodeT &C, raw_ostream &OS,            \
-     MCContext &Ctx) {                                                         \
+     MCContext &Ctx, AMDGPUMCKernelCodeT::PrintHelper Helper) {                \
     OS << Name << " = ";                                                       \
     auto [Shift, Mask] = getShiftMask(Complement);                             \
     const MCExpr *Value;                                                       \
@@ -274,11 +274,7 @@ static ArrayRef<PrintFx> getPrinterTable() {
       Value =                                                                  \
           maskShiftGet(C.compute_pgm_resource2_registers, Mask, Shift, Ctx);   \
     }                                                                          \
-    int64_t Val;                                                               \
-    if (Value->evaluateAsAbsolute(Val))                                        \
-      OS << Val;                                                               \
-    else                                                                       \
-      Value->print(OS, Ctx.getAsmInfo());                                      \
+    Helper(Value, OS, Ctx.getAsmInfo());                                       \
   }
 #define RECORD(name, altName, print, parse) print
 #include "Utils/AMDKernelCodeTInfo.h"
@@ -379,10 +375,11 @@ static ArrayRef<ParseFx> getParserTable() {
 }
 
 static void printAmdKernelCodeField(const AMDGPUMCKernelCodeT &C, int FldIndex,
-                                    raw_ostream &OS, MCContext &Ctx) {
-  auto Printer = getPrinterTable()[FldIndex];
+                                    raw_ostream &OS, MCContext &Ctx,
+                                    AMDGPUMCKernelCodeT::PrintHelper Helper) {
+  auto Printer = getPrinterTable(Helper)[FldIndex];
   if (Printer)
-    Printer(get_amd_kernel_code_t_FldNames()[FldIndex + 1], C, OS, Ctx);
+    Printer(get_amd_kernel_code_t_FldNames()[FldIndex + 1], C, OS, Ctx, Helper);
 }
 
 void AMDGPUMCKernelCodeT::initDefault(const MCSubtargetInfo *STI,
@@ -459,20 +456,17 @@ bool AMDGPUMCKernelCodeT::ParseKernelCodeT(StringRef ID, MCAsmParser &MCParser,
   return Parser ? Parser(*this, MCParser, Err) : false;
 }
 
-void AMDGPUMCKernelCodeT::EmitKernelCodeT(raw_ostream &OS, MCContext &Ctx) {
+void AMDGPUMCKernelCodeT::EmitKernelCodeT(raw_ostream &OS, MCContext &Ctx,
+                                          PrintHelper Helper) {
   const int Size = hasMCExprVersionTable().size();
   for (int i = 0; i < Size; ++i) {
     OS << "\t\t";
     if (hasMCExprVersionTable()[i]) {
       OS << get_amd_kernel_code_t_FldNames()[i + 1] << " = ";
-      int64_t Val;
       const MCExpr *Value = getMCExprForIndex(i);
-      if (Value->evaluateAsAbsolute(Val))
-        OS << Val;
-      else
-        Value->print(OS, Ctx.getAsmInfo());
+      Helper(Value, OS, Ctx.getAsmInfo());
     } else {
-      printAmdKernelCodeField(*this, i, OS, Ctx);
+      printAmdKernelCodeField(*this, i, OS, Ctx, Helper);
     }
     OS << '\n';
   }
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDKernelCodeTUtils.h b/llvm/lib/Target/AMDGPU/Utils/AMDKernelCodeTUtils.h
index 6aeb98f1ce147..0f8d819d99b2a 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDKernelCodeTUtils.h
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDKernelCodeTUtils.h
@@ -27,6 +27,7 @@ class MCExpr;
 class MCStreamer;
 class MCSubtargetInfo;
 class raw_ostream;
+class MCAsmInfo;
 namespace AMDGPU {
 
 struct AMDGPUMCKernelCodeT {
@@ -79,8 +80,10 @@ struct AMDGPUMCKernelCodeT {
 
   const MCExpr *&getMCExprForIndex(int Index);
 
+  using PrintHelper = void (*)(const MCExpr *, raw_ostream &,
+                               const MCAsmInfo *);
   bool ParseKernelCodeT(StringRef ID, MCAsmParser &MCParser, raw_ostream &Err);
-  void EmitKernelCodeT(raw_ostream &OS, MCContext &Ctx);
+  void EmitKernelCodeT(raw_ostream &OS, MCContext &Ctx, PrintHelper Helper);
   void EmitKernelCodeT(MCStreamer &OS, MCContext &Ctx);
 };
 
diff --git a/llvm/test/MC/AMDGPU/amd_kernel_code_t.s b/llvm/test/MC/AMDGPU/amd_kernel_code_t.s
index 052ec0bfabb84..3312b3be7f4cb 100644
--- a/llvm/test/MC/AMDGPU/amd_kernel_code_t.s
+++ b/llvm/test/MC/AMDGPU/amd_kernel_code_t.s
@@ -132,16 +132,16 @@ unknown_workitem_private_segment_byte_size:
 
 ; ASM-LABEL: unknown_granulated_workitem_vgpr_count:
 ; ASM: granulated_workitem_vgpr_count = ((0&4294967232)|(unknown&63))&63
-; ASM: granulated_wavefront_sgpr_count = (((0&4294967232)|(unknown&63))>>6)&15
-; ASM: priority = (((0&4294967232)|(unknown&63))>>10)&3
-; ASM: float_mode = (((0&4294967232)|(unknown&63))>>12)&255
-; ASM: priv = (((0&4294967232)|(unknown&63))>>20)&1
-; ASM: enable_dx10_clamp = (((0&4294967232)|(unknown&63))>>21)&1
-; ASM: debug_mode = (((0&4294967232)|(unknown&63))>>22)&1
-; ASM: enable_ieee_mode = (((0&4294967232)|(unknown&63))>>23)&1
-; ASM: enable_wgp_mode = (((0&4294967232)|(unknown&63))>>29)&1
-; ASM: enable_mem_ordered = (((0&4294967232)|(unknown&63))>>30)&1
-; ASM: enable_fwd_progress = (((0&4294967232)|(unknown&63))>>31)&1
+; ASM: granulated_wavefront_sgpr_count = 0
+; ASM: priority = 0
+; ASM: float_mode = 0
+; ASM: priv = 0
+; ASM: enable_dx10_clamp = 0
+; ASM: debug_mode = 0
+; ASM: enable_ieee_mode = 0
+; ASM: enable_wgp_mode = 0
+; ASM: enable_mem_ordered = 0
+; ASM: enable_fwd_progress = 0
 .section .unknown_granulated_workitem_vgpr_count
 unknown_granulated_workitem_vgpr_count:
 	.amd_kernel_code_t
@@ -150,17 +150,9 @@ unknown_granulated_workitem_vgpr_count:
 	s_endpgm
 
 ; ASM-LABEL: unknown_enable_sgpr_workgroup_id_x:
-; ASM: enable_sgpr_private_segment_wave_byte_offset = ((0&4294967167)|((unknown&1)<<7))&1
-; ASM: user_sgpr_count = (((0&4294967167)|((unknown&1)<<7))>>1)&31
-; ASM: enable_trap_handler = (((0&4294967167)|((unknown&1)<<7))>>6)&1
 ; ASM: enable_sgpr_workgroup_id_x = (((0&4294967167)|((unknown&1)<<7))>>7)&1
-; ASM: enable_sgpr_workgroup_id_y = (((0&4294967167)|((unknown&1)<<7))>>8)&1
-; ASM: enable_sgpr_workgroup_id_z = (((0&4294967167)|((unknown&1)<<7))>>9)&1
-; ASM: enable_sgpr_workgroup_info = (((0&4294967167)|((unknown&1)<<7))>>10)&1
-; ASM: enable_vgpr_workitem_id = (((0&4294967167)|((unknown&1)<<7))>>11)&3
-; ASM: enable_exception_msb = (((0&4294967167)|((unknown&1)<<7))>>13)&3
-; ASM: granulated_lds_size = (((0&4294967167)|((unknown&1)<<7))>>15)&511
-; ASM: enable_exception = (((0&4294967167)|((unknown&1)<<7))>>24)&127
+; ASM: enable_sgpr_workgroup_id_y = 0
+; ASM: enable_sgpr_workgroup_id_z = 0
 .section .unknown_enable_sgpr_workgroup_id_x
 unknown_enable_sgpr_workgroup_id_x:
 	.amd_kernel_code_t



More information about the llvm-commits mailing list