[llvm] bea56b0 - [AMDGPU] Have a subtarget feature to control use of real True16 instructions.

Fri Sep 22 02:47:19 PDT 2023

Author: Ivan Kosarev
Date: 2023-09-22T10:47:13+01:00
New Revision: bea56b0bc0682da6465acd6063026513123075fd

URL: https://github.com/llvm/llvm-project/commit/bea56b0bc0682da6465acd6063026513123075fd
DIFF: https://github.com/llvm/llvm-project/commit/bea56b0bc0682da6465acd6063026513123075fd.diff

LOG: [AMDGPU] Have a subtarget feature to control use of real True16 instructions.

Real True16 instructions are as they are defined in the ISA. Fake True16
instructions are identical to real ones except that they take 32-bit
registers as operands and always use their low halves.

Reviewed By: Joe_Nash

Differential Revision: https://reviews.llvm.org/D156100

Added: 
    

Modified: 
    llvm/lib/Target/AMDGPU/AMDGPU.td
    llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
    llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h
    llvm/lib/Target/AMDGPU/SIISelLowering.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td
index 037a8aa104f0b32..e0b40199b774585 100644

--- a/llvm/lib/Target/AMDGPU/AMDGPU.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPU.td
@@ -375,6 +375,12 @@ def FeatureTrue16BitInsts : SubtargetFeature<"true16",
   "True 16-bit operand instructions"
 >;
 
+def FeatureRealTrue16Insts : SubtargetFeature<"real-true16",
+  "EnableRealTrue16Insts",
+  "true",
+  "Use true 16-bit registers"
+>;
+
 def FeatureVOP3P : SubtargetFeature<"vop3p",
   "HasVOP3PInsts",
   "true",

diff  --git a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
index cc179c85c818426..cffc77ba5d18c15 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
@@ -167,6 +167,10 @@ GCNSubtarget::initializeSubtargetDependencies(const Triple &TT,
 
 AMDGPUSubtarget::AMDGPUSubtarget(const Triple &TT) : TargetTriple(TT) {}
 
+bool AMDGPUSubtarget::useRealTrue16Insts() const {
+  return hasTrue16BitInsts() && EnableRealTrue16Insts;
+}
+
 GCNSubtarget::GCNSubtarget(const Triple &TT, StringRef GPU, StringRef FS,
                            const GCNTargetMachine &TM)
     : // clang-format off

diff  --git a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h
index 10ce00fe68ca3e5..71acce809a1494f 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h
@@ -49,6 +49,7 @@ class AMDGPUSubtarget {
   bool GCN3Encoding = false;
   bool Has16BitInsts = false;
   bool HasTrue16BitInsts = false;
+  bool EnableRealTrue16Insts = false;
   bool HasMadMixInsts = false;
   bool HasMadMacF32Insts = false;
   bool HasDsSrc2Insts = false;
@@ -153,8 +154,17 @@ class AMDGPUSubtarget {
     return Has16BitInsts;
   }
 
+  /// Return true if the subtarget supports True16 instructions.
   bool hasTrue16BitInsts() const { return HasTrue16BitInsts; }
 
+  /// Return true if real (non-fake) variants of True16 instructions using
+  /// 16-bit registers should be code-generated. Fake True16 instructions are
+  /// identical to non-fake ones except that they take 32-bit registers as
+  /// operands and always use their low halves.
+  // TODO: Remove and use hasTrue16BitInsts() instead once True16 is fully
+  // supported and the support for fake True16 instructions is removed.
+  bool useRealTrue16Insts() const;
+
   bool hasMadMixInsts() const {
     return HasMadMixInsts;
   }

diff  --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index 1c85ec3f9f5212f..b88e062dc16f787 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -148,8 +148,13 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,
   addRegisterClass(MVT::v16f64, TRI->getVGPRClassForBitWidth(1024));
 
   if (Subtarget->has16BitInsts()) {
-    addRegisterClass(MVT::i16, &AMDGPU::SReg_32RegClass);
-    addRegisterClass(MVT::f16, &AMDGPU::SReg_32RegClass);
+    if (Subtarget->useRealTrue16Insts()) {
+      addRegisterClass(MVT::i16, &AMDGPU::VGPR_16RegClass);
+      addRegisterClass(MVT::f16, &AMDGPU::VGPR_16RegClass);
+    } else {
+      addRegisterClass(MVT::i16, &AMDGPU::SReg_32RegClass);
+      addRegisterClass(MVT::f16, &AMDGPU::SReg_32RegClass);
+    }
 
     // Unless there are also VOP3P operations, not operations are really legal.
     addRegisterClass(MVT::v2i16, &AMDGPU::SReg_32RegClass);