[libc-commits] [libc] [libc] Implement basic 'fenv.h' utilities on the AMD GPU (PR #83500)
Joseph Huber via libc-commits
libc-commits at lists.llvm.org
Thu Feb 29 19:22:34 PST 2024
================
@@ -0,0 +1,302 @@
+//===-- amdgpu floating point env manipulation functions --------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC___SUPPORT_FPUTIL_AMDGPU_FENVIMPL_H
+#define LLVM_LIBC_SRC___SUPPORT_FPUTIL_AMDGPU_FENVIMPL_H
+
+#include "src/__support/GPU/utils.h"
+#include "src/__support/macros/attributes.h"
+#include "src/__support/macros/properties/architectures.h"
+
+#if !defined(LIBC_TARGET_ARCH_IS_AMDGPU)
+#error "Invalid include"
+#endif
+
+#include <fenv.h>
+#include <stdint.h>
+
+namespace LIBC_NAMESPACE {
+namespace fputil {
+
+namespace internal {
+
+// Gets the immediate argument to access the AMDGPU hardware register. The
+// register access is encoded in a 16-bit immediate value according to the
+// following layout.
+//
+// ┌──────────────┬──────────────┬───────────────┐
+// │ SIZE[15:11] │ OFFSET[10:6] │ ID[5:0] │
+// └──────────────┴──────────────┴───────────────┘
+//
+// This will read the size number of bits starting at the offset bit from the
+// corresponding hardware register ID.
+LIBC_INLINE constexpr uint16_t get_register(uint32_t id, uint32_t offset,
+ uint32_t size) {
+ return static_cast<uint16_t>(size << 11 | offset << 6 | id);
+}
+
+// Integral identifiers for the relevant hardware registers.
+enum Register : uint16_t {
+ // The mode register controls the floating point behaviour of the device. It
+ // can be read or written to by the kernel during runtime It is laid out as a
+ // bit field with the following offsets and sizes listed for the relevant
+ // entries.
+ //
+ // ┌─────┬─────────────┬─────┬─────────┬──────────┬─────────────┬────────────┐
+ // │ ... │ EXCP[20:12] │ ... │ IEEE[9] │ CLAMP[8] │ DENORM[7:4] │ ROUND[3:0] │
+ // └─────┴─────────────┴─────┴─────────┴──────────┴─────────────┴────────────┘
+ //
+ // The rounding mode and denormal modes both control f64/f16 and f32 precision
+ // operations separately with two bits. The accepted values for the rounding
+ // mode are nearest, upward, downward, and toward given 0, 1, 2, and 3
+ // respectively.
+ //
+ // The CLAMP bit indicates that DirectX 10 handling of NaNs is enabled in the
+ // vector ALU. When set this will clamp NaN values to zero and pass them
+ // otherwise. A hardware bug causes this bit to prevent floating exceptions
+ // from being recorded if this bit is set on all generations before GFX12.
+ //
+ // The IEEE bit controls whether or not floating point operations supporting
+ // exception gathering are IEEE 754-2008 compliant.
+ //
+ // The EXCP field indicates which exceptions will cause the instruction to
+ // take a trap if traps are enabled, see the status register. The bit layout
+ // is identical to that in the trap status register. We are only concerned
+ // with the first six bits and ignore the other three.
+ HW_REG_MODE = 1,
+ HW_REG_MODE_ROUND = get_register(HW_REG_MODE, 0, 4),
+ HW_REG_MODE_CLAMP = get_register(HW_REG_MODE, 8, 1),
+ HW_REG_MODE_EXCP = get_register(HW_REG_MODE, 12, 6),
+
+ // The status register is a read-only register that contains information about
+ // how the kernel was launched. The sixth bit TRAP_EN[6] indicates whether or
+ // not traps are enabled for this kernel. If this bit is set along with the
+ // corresponding bit in the mode register then a trap will be taken.
+ HW_REG_STATUS = 2,
+ HW_REG_STATUS_TRAP_EN = get_register(HW_REG_STATUS, 6, 1),
+
+ // The trap status register contains information about the status of the
+ // exceptions. These bits are accumulated regarless of trap handling statuss
+ // and are sticky until cleared.
+ //
+ // 5 4 3 2 1 0
+ // ┌─────────┬───────────┬──────────┬────────────────┬──────────┬─────────┐
+ // │ Inexact │ Underflow │ Overflow │ Divide by zero │ Denormal │ Invalid │
+ // └─────────┴───────────┴──────────┴────────────────┴──────────┴─────────┘
+ //
+ // These exceptions indicate that at least one lane in the current wavefront
+ // signalled an floating point exception. There is no way to increase the
+ // granularity.
+ HW_REG_TRAPSTS = 3,
+ HW_REG_TRAPSTS_EXCP = get_register(HW_REG_TRAPSTS, 0, 6),
+};
+
+// The six bits used to encode the standard floating point exceptions in the
+// trap status register.
+enum ExceptionFlags : uint32_t {
----------------
jhuber6 wrote:
> b/c technically uint8_t(x) << 11 will always give you 0, right?
I think integer promotion makes it take the int32_t argument from the shift.
> Do you want to add a link to the documentation of the encodings in the comments?
I can, if needed, but I pretty much copied everything relevant here so I figured it was redundant.
https://github.com/llvm/llvm-project/pull/83500
More information about the libc-commits
mailing list