[llvm] [AMDGPU][MC] Implement fft and rotate modes for ds_swizzle_b32 (PR #108064)

via llvm-commits llvm-commits at lists.llvm.org
Tue Sep 10 11:06:27 PDT 2024


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-backend-amdgpu

Author: Jun Wang (jwanggit86)

<details>
<summary>Changes</summary>

In addition to the basic mode, the ds_swizzle_b32 is supposed to support two specific modes: fft and rotate. This patch implements those two modes.

---
Full diff: https://github.com/llvm/llvm-project/pull/108064.diff


5 Files Affected:

- (modified) llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp (+56) 
- (modified) llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp (+7) 
- (modified) llvm/lib/Target/AMDGPU/SIDefines.h (+24-2) 
- (modified) llvm/lib/Target/AMDGPU/Utils/AMDGPUAsmUtils.cpp (+3-6) 
- (added) llvm/test/MC/AMDGPU/ds_swizzle.s (+93) 


``````````diff
diff --git a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
index 1a10206eea2374..b1516df77dce7d 100644
--- a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
+++ b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
@@ -1843,6 +1843,8 @@ class AMDGPUAsmParser : public MCTargetAsmParser {
   bool parseSwizzleBroadcast(int64_t &Imm);
   bool parseSwizzleSwap(int64_t &Imm);
   bool parseSwizzleReverse(int64_t &Imm);
+  bool parseSwizzleFFT(int64_t &Imm);
+  bool parseSwizzleRotate(int64_t &Imm);
 
   ParseStatus parseGPRIdxMode(OperandVector &Operands);
   int64_t parseGPRIdxMacro();
@@ -8141,6 +8143,56 @@ AMDGPUAsmParser::parseSwizzleBitmaskPerm(int64_t &Imm) {
   return true;
 }
 
+bool AMDGPUAsmParser::parseSwizzleFFT(int64_t &Imm) {
+  using namespace llvm::AMDGPU::Swizzle;
+
+  if (!skipToken(AsmToken::Comma, "expected a comma"))
+    return false;
+
+  SMLoc Loc = getLoc();
+
+  int64_t Type;
+
+  if (!parseExpr(Type))
+    return false;
+
+  if (Type != FFT_NO_SWIZZLE && Type != FFT_SWIZZLE_00 &&
+      Type != FFT_SWIZZLE_10) {
+    const std::string ErrMsg = "invalid FFT swizzle type: must be " +
+                               std::to_string(FFT_SWIZZLE_00) + ", " +
+                               std::to_string(FFT_SWIZZLE_10) + ", or " +
+                               std::to_string(FFT_NO_SWIZZLE);
+    Error(Loc, ErrMsg);
+    return false;
+  }
+
+  Imm = FFT_MODE_ENC | Type;
+  return true;
+}
+
+bool AMDGPUAsmParser::parseSwizzleRotate(int64_t &Imm) {
+  using namespace llvm::AMDGPU::Swizzle;
+
+  SMLoc Loc;
+  int64_t Direction;
+
+  if (!parseSwizzleOperand(Direction, 0, 1,
+                           "direction must be 0 (left) or 1 (right)", Loc))
+    return false;
+
+  int64_t RotateSize;
+  const std::string ErrorMsg =
+      "number of threads to rotate must be in the interval [0," +
+      std::to_string(ROTATE_MAX_SIZE) + "]";
+
+  if (!parseSwizzleOperand(RotateSize, 0, ROTATE_MAX_SIZE, ErrorMsg, Loc))
+    return false;
+
+  Imm = ROTATE_MODE_ENC | (Direction << ROTATE_DIR_SHIFT) |
+        (RotateSize << ROTATE_SIZE_SHIFT);
+  return true;
+}
+
 bool
 AMDGPUAsmParser::parseSwizzleOffset(int64_t &Imm) {
 
@@ -8175,6 +8227,10 @@ AMDGPUAsmParser::parseSwizzleMacro(int64_t &Imm) {
       Ok = parseSwizzleSwap(Imm);
     } else if (trySkipId(IdSymbolic[ID_REVERSE])) {
       Ok = parseSwizzleReverse(Imm);
+    } else if (trySkipId(IdSymbolic[ID_FFT])) {
+      Ok = parseSwizzleFFT(Imm);
+    } else if (trySkipId(IdSymbolic[ID_ROTATE])) {
+      Ok = parseSwizzleRotate(Imm);
     } else {
       Error(ModeLoc, "expected a swizzle mode");
     }
diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp
index 94bf5e4b95270b..a9f660b7e9b86f 100644
--- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp
+++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp
@@ -1556,6 +1556,13 @@ void AMDGPUInstPrinter::printSwizzle(const MCInst *MI, unsigned OpNo,
         O << ")";
       }
     }
+  } else if ((Imm & FFT_ROTATE_MODE_MASK) == FFT_MODE_ENC) {
+    O << "swizzle(" << IdSymbolic[ID_FFT] << ","
+      << (Imm & FFT_SWIZZLE_TYPE_MASK) << ")";
+  } else if ((Imm & FFT_ROTATE_MODE_MASK) == ROTATE_MODE_ENC) {
+    O << "swizzle(" << IdSymbolic[ID_ROTATE] << ","
+      << ((Imm >> ROTATE_DIR_SHIFT) & ROTATE_DIR_MASK) << ","
+      << ((Imm >> ROTATE_SIZE_SHIFT) & ROTATE_SIZE_MASK) << ")";
   } else {
     printU16ImmDecOperand(MI, OpNo, O);
   }
diff --git a/llvm/lib/Target/AMDGPU/SIDefines.h b/llvm/lib/Target/AMDGPU/SIDefines.h
index fb3d83ca30d198..40cd022cc33aa6 100644
--- a/llvm/lib/Target/AMDGPU/SIDefines.h
+++ b/llvm/lib/Target/AMDGPU/SIDefines.h
@@ -841,7 +841,9 @@ enum Id : unsigned { // id of symbolic names
   ID_BITMASK_PERM,
   ID_SWAP,
   ID_REVERSE,
-  ID_BROADCAST
+  ID_BROADCAST,
+  ID_FFT,
+  ID_ROTATE
 };
 
 enum EncBits : unsigned {
@@ -854,6 +856,11 @@ enum EncBits : unsigned {
   BITMASK_PERM_ENC      = 0x0000,
   BITMASK_PERM_ENC_MASK = 0x8000,
 
+  FFT_MODE_ENC          = 0xE000,
+
+  ROTATE_MODE_ENC       = 0xC000,
+  FFT_ROTATE_MODE_MASK  = 0xF000,
+
   // QUAD_PERM encodings
 
   LANE_MASK             = 0x3,
@@ -869,7 +876,22 @@ enum EncBits : unsigned {
 
   BITMASK_AND_SHIFT     = 0,
   BITMASK_OR_SHIFT      = 5,
-  BITMASK_XOR_SHIFT     = 10
+  BITMASK_XOR_SHIFT     = 10,
+
+  // FFT encodings
+
+  FFT_NO_SWIZZLE        = 0x1F,
+  FFT_SWIZZLE_00        = 0x0,
+  FFT_SWIZZLE_10        = 0x10,
+
+  FFT_SWIZZLE_TYPE_MASK = 0x1F,
+
+  // ROTATE encodings
+  ROTATE_MAX_SIZE       = 0x1F,
+  ROTATE_DIR_SHIFT      = 10, // bit position of rotate direction
+  ROTATE_DIR_MASK       = 0x1,
+  ROTATE_SIZE_SHIFT     = 5, // bit position of rotate size
+  ROTATE_SIZE_MASK = ROTATE_MAX_SIZE,
 };
 
 } // namespace Swizzle
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUAsmUtils.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUAsmUtils.cpp
index 5f7549c2921eda..702a0d6118bbae 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUAsmUtils.cpp
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUAsmUtils.cpp
@@ -646,12 +646,9 @@ unsigned const DfmtNfmt2UFmtGFX11[] = {
 namespace Swizzle {
 
 // This must be in sync with llvm::AMDGPU::Swizzle::Id enum members, see SIDefines.h.
-const char* const IdSymbolic[] = {
-  "QUAD_PERM",
-  "BITMASK_PERM",
-  "SWAP",
-  "REVERSE",
-  "BROADCAST",
+const char *const IdSymbolic[] = {
+    "QUAD_PERM", "BITMASK_PERM", "SWAP",   "REVERSE",
+    "BROADCAST", "FFT",          "ROTATE",
 };
 
 } // namespace Swizzle
diff --git a/llvm/test/MC/AMDGPU/ds_swizzle.s b/llvm/test/MC/AMDGPU/ds_swizzle.s
new file mode 100644
index 00000000000000..7d9f5097edbd54
--- /dev/null
+++ b/llvm/test/MC/AMDGPU/ds_swizzle.s
@@ -0,0 +1,93 @@
+// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx908 -show-encoding %s | FileCheck %s
+// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1010 -show-encoding %s | FileCheck -check-prefix=GFX10PLUS %s
+// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1100 -show-encoding %s | FileCheck -check-prefix=GFX10PLUS %s
+// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -show-encoding %s | FileCheck -check-prefix=GFX10PLUS %s
+
+// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx908 -show-encoding %s 2>&1 | FileCheck -check-prefix=ERROR %s --implicit-check-not=error:
+// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1010 -show-encoding %s 2>&1 | FileCheck -check-prefix=ERROR %s --implicit-check-not=error:
+// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1100 -show-encoding %s 2>&1 | FileCheck -check-prefix=ERROR %s --implicit-check-not=error:
+// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -show-encoding %s 2>&1 | FileCheck -check-prefix=ERROR %s --implicit-check-not=error:
+
+//==============================================================================
+// FFT mode
+
+ds_swizzle_b32 v5, v1 offset:swizzle(FFT,0)
+// CHECK:     [0x00,0xe0,0x7a,0xd8,0x01,0x00,0x00,0x05]
+// GFX10PLUS: [0x00,0xe0,0xd4,0xd8,0x01,0x00,0x00,0x05]
+
+ds_swizzle_b32 v5, v1 offset:swizzle(FFT,16)
+// CHECK:     [0x10,0xe0,0x7a,0xd8,0x01,0x00,0x00,0x05]
+// GFX10PLUS: [0x10,0xe0,0xd4,0xd8,0x01,0x00,0x00,0x05]
+
+ds_swizzle_b32 v5, v1 offset:swizzle(FFT,31)
+// CHECK:     [0x1f,0xe0,0x7a,0xd8,0x01,0x00,0x00,0x05]
+// GFX10PLUS: [0x1f,0xe0,0xd4,0xd8,0x01,0x00,0x00,0x05]
+
+ds_swizzle_b32 v5, v1 offset:swizzle(FFT,2)
+// ERROR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid FFT swizzle type: must be 0, 16, or 31
+
+ds_swizzle_b32 v5, v1 offset:swizzle(FFT,32)
+// ERROR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid FFT swizzle type: must be 0, 16, or 31
+
+ds_swizzle_b32 v5, v1 offset:swizzle(FFT)
+// ERROR: :[[@LINE-1]]:{{[0-9]+}}: error: expected a comma
+
+ds_swizzle_b32 v5, v1 offset:swizzle(FFT,16,31)
+// ERROR: :[[@LINE-1]]:{{[0-9]+}}: error: expected a closing parentheses
+
+
+//==============================================================================
+// ROTATE mode
+
+ds_swizzle_b32 v5, v1 offset:swizzle(ROTATE,0,0)
+// CHECK:     [0x00,0xc0,0x7a,0xd8,0x01,0x00,0x00,0x05]
+// GFX10PLUS: [0x00,0xc0,0xd4,0xd8,0x01,0x00,0x00,0x05]
+
+ds_swizzle_b32 v5, v1 offset:swizzle(ROTATE,1,0)
+// CHECK:     [0x00,0xc4,0x7a,0xd8,0x01,0x00,0x00,0x05]
+// GFX10PLUS: [0x00,0xc4,0xd4,0xd8,0x01,0x00,0x00,0x05]
+
+ds_swizzle_b32 v5, v1 offset:swizzle(ROTATE,0,1)
+// CHECK:     [0x20,0xc0,0x7a,0xd8,0x01,0x00,0x00,0x05]
+// GFX10PLUS: [0x20,0xc0,0xd4,0xd8,0x01,0x00,0x00,0x05]
+
+ds_swizzle_b32 v5, v1 offset:swizzle(ROTATE,1,1)
+// CHECK:     [0x20,0xc4,0x7a,0xd8,0x01,0x00,0x00,0x05]
+// GFX10PLUS: [0x20,0xc4,0xd4,0xd8,0x01,0x00,0x00,0x05]
+
+ds_swizzle_b32 v5, v1 offset:swizzle(ROTATE,0,31)
+// CHECK:     [0xe0,0xc3,0x7a,0xd8,0x01,0x00,0x00,0x05]
+// GFX10PLUS: [0xe0,0xc3,0xd4,0xd8,0x01,0x00,0x00,0x05]
+
+ds_swizzle_b32 v5, v1 offset:swizzle(ROTATE,1,31)
+// CHECK:     [0xe0,0xc7,0x7a,0xd8,0x01,0x00,0x00,0x05]
+// GFX10PLUS: [0xe0,0xc7,0xd4,0xd8,0x01,0x00,0x00,0x05]
+
+
+ds_swizzle_b32 v5, v1 offset:swizzle(ROTATE,2,31)
+// ERROR: :[[@LINE-1]]:{{[0-9]+}}: error: direction must be 0 (left) or 1 (right)
+
+ds_swizzle_b32 v5, v1 offset:swizzle(ROTATE,-1,31)
+// ERROR: :[[@LINE-1]]:{{[0-9]+}}: error: direction must be 0 (left) or 1 (right)
+
+ds_swizzle_b32 v5, v1 offset:swizzle(ROTATE,0,32)
+// ERROR: :[[@LINE-1]]:{{[0-9]+}}: error: number of threads to rotate must be in the interval [0,31]
+
+ds_swizzle_b32 v5, v1 offset:swizzle(ROTATE,0,-2)
+// ERROR: :[[@LINE-1]]:{{[0-9]+}}: error: number of threads to rotate must be in the interval [0,31]
+
+ds_swizzle_b32 v5, v1 offset:swizzle(ROTATE)
+// ERROR: :[[@LINE-1]]:{{[0-9]+}}: error: expected a comma
+
+ds_swizzle_b32 v5, v1 offset:swizzle(ROTATE,0)
+// ERROR: :[[@LINE-1]]:{{[0-9]+}}: error: expected a comma
+
+ds_swizzle_b32 v5, v1 offset:swizzle(ROTATE,1)
+// ERROR: :[[@LINE-1]]:{{[0-9]+}}: error: expected a comma
+
+ds_swizzle_b32 v5, v1 offset:swizzle(ROTATE,0,1,2)
+// ERROR: :[[@LINE-1]]:{{[0-9]+}}: error: expected a closing parentheses
+
+
+
+

``````````

</details>


https://github.com/llvm/llvm-project/pull/108064


More information about the llvm-commits mailing list