[llvm] r258789 - AMDGPU: Tidy minor td file issues
Matt Arsenault via llvm-commits
llvm-commits at lists.llvm.org
Mon Jan 25 20:49:22 PST 2016
Author: arsenm
Date: Mon Jan 25 22:49:22 2016
New Revision: 258789
URL: http://llvm.org/viewvc/llvm-project?rev=258789&view=rev
Log:
AMDGPU: Tidy minor td file issues
Make comments and indentation more consistent.
Rearrange a few things to be in a more consistent order,
such as organizing subtarget features from those describing
an actual device property, and those used as options.
Modified:
llvm/trunk/lib/Target/AMDGPU/AMDGPU.td
llvm/trunk/lib/Target/AMDGPU/AMDGPUInstructions.td
llvm/trunk/lib/Target/AMDGPU/CIInstructions.td
llvm/trunk/lib/Target/AMDGPU/SIInstructions.td
Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPU.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPU.td?rev=258789&r1=258788&r2=258789&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/AMDGPU.td (original)
+++ llvm/trunk/lib/Target/AMDGPU/AMDGPU.td Mon Jan 25 22:49:22 2016
@@ -1,183 +1,115 @@
-//===-- AMDGPU.td - AMDGPU Tablegen files ------------------*- tablegen -*-===//
+//===-- AMDGPU.td - AMDGPU Tablegen files --------*- tablegen -*-===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
-//===----------------------------------------------------------------------===//
+//===------------------------------------------------------------===//
include "llvm/Target/Target.td"
-//===----------------------------------------------------------------------===//
-// Subtarget Features
-//===----------------------------------------------------------------------===//
-
-// Debugging Features
-
-def FeatureDumpCode : SubtargetFeature <"DumpCode",
- "DumpCode",
- "true",
- "Dump MachineInstrs in the CodeEmitter">;
-
-def FeatureDumpCodeLower : SubtargetFeature <"dumpcode",
- "DumpCode",
- "true",
- "Dump MachineInstrs in the CodeEmitter">;
-
-def FeatureIRStructurizer : SubtargetFeature <"disable-irstructurizer",
- "EnableIRStructurizer",
- "false",
- "Disable IR Structurizer">;
-
-def FeaturePromoteAlloca : SubtargetFeature <"promote-alloca",
- "EnablePromoteAlloca",
- "true",
- "Enable promote alloca pass">;
-
-// Target features
-
-def FeatureIfCvt : SubtargetFeature <"disable-ifcvt",
- "EnableIfCvt",
- "false",
- "Disable the if conversion pass">;
+//===------------------------------------------------------------===//
+// Subtarget Features (device properties)
+//===------------------------------------------------------------===//
def FeatureFP64 : SubtargetFeature<"fp64",
- "FP64",
- "true",
- "Enable double precision operations">;
-
-def FeatureFP64Denormals : SubtargetFeature<"fp64-denormals",
- "FP64Denormals",
- "true",
- "Enable double precision denormal handling",
- [FeatureFP64]>;
+ "FP64",
+ "true",
+ "Enable double precision operations"
+>;
def FeatureFastFMAF32 : SubtargetFeature<"fast-fmaf",
- "FastFMAF32",
- "true",
- "Assuming f32 fma is at least as fast as mul + add",
- []>;
+ "FastFMAF32",
+ "true",
+ "Assuming f32 fma is at least as fast as mul + add"
+>;
def HalfRate64Ops : SubtargetFeature<"half-rate-64-ops",
- "HalfRate64Ops",
- "true",
- "Most fp64 instructions are half rate instead of quarter",
- []>;
-
-// Some instructions do not support denormals despite this flag. Using
-// fp32 denormals also causes instructions to run at the double
-// precision rate for the device.
-def FeatureFP32Denormals : SubtargetFeature<"fp32-denormals",
- "FP32Denormals",
- "true",
- "Enable single precision denormal handling">;
+ "HalfRate64Ops",
+ "true",
+ "Most fp64 instructions are half rate instead of quarter"
+>;
def FeatureR600ALUInst : SubtargetFeature<"R600ALUInst",
- "R600ALUInst",
- "false",
- "Older version of ALU instructions encoding">;
+ "R600ALUInst",
+ "false",
+ "Older version of ALU instructions encoding"
+>;
def FeatureVertexCache : SubtargetFeature<"HasVertexCache",
- "HasVertexCache",
- "true",
- "Specify use of dedicated vertex cache">;
+ "HasVertexCache",
+ "true",
+ "Specify use of dedicated vertex cache"
+>;
def FeatureCaymanISA : SubtargetFeature<"caymanISA",
- "CaymanISA",
- "true",
- "Use Cayman ISA">;
+ "CaymanISA",
+ "true",
+ "Use Cayman ISA"
+>;
def FeatureCFALUBug : SubtargetFeature<"cfalubug",
- "CFALUBug",
- "true",
- "GPU has CF_ALU bug">;
-
-// XXX - This should probably be removed once enabled by default
-def FeatureEnableLoadStoreOpt : SubtargetFeature <"load-store-opt",
- "EnableLoadStoreOpt",
- "true",
- "Enable SI load/store optimizer pass">;
-
-// Performance debugging feature. Allow using DS instruction immediate
-// offsets even if the base pointer can't be proven to be base. On SI,
-// base pointer values that won't give the same result as a 16-bit add
-// are not safe to fold, but this will override the conservative test
-// for the base pointer.
-def FeatureEnableUnsafeDSOffsetFolding : SubtargetFeature <"unsafe-ds-offset-folding",
- "EnableUnsafeDSOffsetFolding",
- "true",
- "Force using DS instruction immediate offsets on SI">;
-
-def FeatureFlatForGlobal : SubtargetFeature<"flat-for-global",
- "FlatForGlobal",
- "true",
- "Force to generate flat instruction for global">;
+ "CFALUBug",
+ "true",
+ "GPU has CF_ALU bug"
+>;
def FeatureFlatAddressSpace : SubtargetFeature<"flat-address-space",
- "FlatAddressSpace",
- "true",
- "Support flat address space">;
+ "FlatAddressSpace",
+ "true",
+ "Support flat address space"
+>;
def FeatureXNACK : SubtargetFeature<"xnack",
- "EnableXNACK",
- "true",
- "Enable XNACK support">;
-
-def FeatureVGPRSpilling : SubtargetFeature<"vgpr-spilling",
- "EnableVGPRSpilling",
- "true",
- "Enable spilling of VGPRs to scratch memory">;
+ "EnableXNACK",
+ "true",
+ "Enable XNACK support"
+>;
def FeatureSGPRInitBug : SubtargetFeature<"sgpr-init-bug",
- "SGPRInitBug",
- "true",
- "VI SGPR initilization bug requiring a fixed SGPR allocation size">;
-
-def FeatureEnableHugeScratchBuffer : SubtargetFeature<"huge-scratch-buffer",
- "EnableHugeScratchBuffer",
- "true",
- "Enable scratch buffer sizes greater than 128 GB">;
-
-def FeatureEnableSIScheduler : SubtargetFeature<"si-scheduler",
- "EnableSIScheduler",
- "true",
- "Enable SI Machine Scheduler">;
+ "SGPRInitBug",
+ "true",
+ "VI SGPR initilization bug requiring a fixed SGPR allocation size"
+>;
class SubtargetFeatureFetchLimit <string Value> :
SubtargetFeature <"fetch"#Value,
- "TexVTXClauseSize",
- Value,
- "Limit the maximum number of fetches in a clause to "#Value>;
+ "TexVTXClauseSize",
+ Value,
+ "Limit the maximum number of fetches in a clause to "#Value
+>;
def FeatureFetchLimit8 : SubtargetFeatureFetchLimit <"8">;
def FeatureFetchLimit16 : SubtargetFeatureFetchLimit <"16">;
class SubtargetFeatureWavefrontSize <int Value> : SubtargetFeature<
- "wavefrontsize"#Value,
- "WavefrontSize",
- !cast<string>(Value),
- "The number of threads per wavefront">;
+ "wavefrontsize"#Value,
+ "WavefrontSize",
+ !cast<string>(Value),
+ "The number of threads per wavefront"
+>;
def FeatureWavefrontSize16 : SubtargetFeatureWavefrontSize<16>;
def FeatureWavefrontSize32 : SubtargetFeatureWavefrontSize<32>;
def FeatureWavefrontSize64 : SubtargetFeatureWavefrontSize<64>;
class SubtargetFeatureLDSBankCount <int Value> : SubtargetFeature <
- "ldsbankcount"#Value,
- "LDSBankCount",
- !cast<string>(Value),
- "The number of LDS banks per compute unit.">;
+ "ldsbankcount"#Value,
+ "LDSBankCount",
+ !cast<string>(Value),
+ "The number of LDS banks per compute unit."
+>;
def FeatureLDSBankCount16 : SubtargetFeatureLDSBankCount<16>;
def FeatureLDSBankCount32 : SubtargetFeatureLDSBankCount<32>;
class SubtargetFeatureISAVersion <int Major, int Minor, int Stepping>
: SubtargetFeature <
- "isaver"#Major#"."#Minor#"."#Stepping,
- "IsaVersion",
- "ISAVersion"#Major#"_"#Minor#"_"#Stepping,
- "Instruction set version number"
+ "isaver"#Major#"."#Minor#"."#Stepping,
+ "IsaVersion",
+ "ISAVersion"#Major#"_"#Minor#"_"#Stepping,
+ "Instruction set version number"
>;
def FeatureISAVersion7_0_0 : SubtargetFeatureISAVersion <7,0,0>;
@@ -187,36 +119,135 @@ def FeatureISAVersion8_0_1 : SubtargetFe
def FeatureISAVersion8_0_3 : SubtargetFeatureISAVersion <8,0,3>;
class SubtargetFeatureLocalMemorySize <int Value> : SubtargetFeature<
- "localmemorysize"#Value,
- "LocalMemorySize",
- !cast<string>(Value),
- "The size of local memory in bytes">;
+ "localmemorysize"#Value,
+ "LocalMemorySize",
+ !cast<string>(Value),
+ "The size of local memory in bytes"
+>;
def FeatureGCN : SubtargetFeature<"gcn",
- "IsGCN",
- "true",
- "GCN or newer GPU">;
+ "IsGCN",
+ "true",
+ "GCN or newer GPU"
+>;
def FeatureGCN1Encoding : SubtargetFeature<"gcn1-encoding",
- "GCN1Encoding",
- "true",
- "Encoding format for SI and CI">;
+ "GCN1Encoding",
+ "true",
+ "Encoding format for SI and CI"
+>;
def FeatureGCN3Encoding : SubtargetFeature<"gcn3-encoding",
- "GCN3Encoding",
- "true",
- "Encoding format for VI">;
+ "GCN3Encoding",
+ "true",
+ "Encoding format for VI"
+>;
def FeatureCIInsts : SubtargetFeature<"ci-insts",
- "CIInsts",
- "true",
- "Additional intstructions for CI+">;
+ "CIInsts",
+ "true",
+ "Additional intstructions for CI+"
+>;
+
+//===------------------------------------------------------------===//
+// Subtarget Features (options and debugging)
+//===------------------------------------------------------------===//
+
+// Some instructions do not support denormals despite this flag. Using
+// fp32 denormals also causes instructions to run at the double
+// precision rate for the device.
+def FeatureFP32Denormals : SubtargetFeature<"fp32-denormals",
+ "FP32Denormals",
+ "true",
+ "Enable single precision denormal handling"
+>;
+
+def FeatureFP64Denormals : SubtargetFeature<"fp64-denormals",
+ "FP64Denormals",
+ "true",
+ "Enable double precision denormal handling",
+ [FeatureFP64]
+>;
+
+def FeatureEnableHugeScratchBuffer : SubtargetFeature<
+ "huge-scratch-buffer",
+ "EnableHugeScratchBuffer",
+ "true",
+ "Enable scratch buffer sizes greater than 128 GB"
+>;
+
+def FeatureVGPRSpilling : SubtargetFeature<"vgpr-spilling",
+ "EnableVGPRSpilling",
+ "true",
+ "Enable spilling of VGPRs to scratch memory"
+>;
+
+def FeatureDumpCode : SubtargetFeature <"DumpCode",
+ "DumpCode",
+ "true",
+ "Dump MachineInstrs in the CodeEmitter"
+>;
+
+def FeatureDumpCodeLower : SubtargetFeature <"dumpcode",
+ "DumpCode",
+ "true",
+ "Dump MachineInstrs in the CodeEmitter"
+>;
+
+def FeatureIRStructurizer : SubtargetFeature <"disable-irstructurizer",
+ "EnableIRStructurizer",
+ "false",
+ "Disable IR Structurizer"
+>;
+
+def FeaturePromoteAlloca : SubtargetFeature <"promote-alloca",
+ "EnablePromoteAlloca",
+ "true",
+ "Enable promote alloca pass"
+>;
+
+// XXX - This should probably be removed once enabled by default
+def FeatureEnableLoadStoreOpt : SubtargetFeature <"load-store-opt",
+ "EnableLoadStoreOpt",
+ "true",
+ "Enable SI load/store optimizer pass"
+>;
+
+// Performance debugging feature. Allow using DS instruction immediate
+// offsets even if the base pointer can't be proven to be base. On SI,
+// base pointer values that won't give the same result as a 16-bit add
+// are not safe to fold, but this will override the conservative test
+// for the base pointer.
+def FeatureEnableUnsafeDSOffsetFolding : SubtargetFeature <
+ "unsafe-ds-offset-folding",
+ "EnableUnsafeDSOffsetFolding",
+ "true",
+ "Force using DS instruction immediate offsets on SI"
+>;
+
+def FeatureIfCvt : SubtargetFeature <"disable-ifcvt",
+ "EnableIfCvt",
+ "false",
+ "Disable the if conversion pass"
+>;
+
+def FeatureEnableSIScheduler : SubtargetFeature<"si-scheduler",
+ "EnableSIScheduler",
+ "true",
+ "Enable SI Machine Scheduler"
+>;
+
+def FeatureFlatForGlobal : SubtargetFeature<"flat-for-global",
+ "FlatForGlobal",
+ "true",
+ "Force to generate flat instruction for global"
+>;
// Dummy feature used to disable assembler instructions.
def FeatureDisable : SubtargetFeature<"",
- "FeatureDisable","true",
- "Dummy feature to disable assembler"
- " instructions">;
+ "FeatureDisable","true",
+ "Dummy feature to disable assembler instructions"
+>;
class SubtargetFeatureGeneration <string Value,
list<SubtargetFeature> Implies> :
@@ -228,33 +259,39 @@ def FeatureLocalMemorySize32768 : Subtar
def FeatureLocalMemorySize65536 : SubtargetFeatureLocalMemorySize<65536>;
def FeatureR600 : SubtargetFeatureGeneration<"R600",
- [FeatureR600ALUInst, FeatureFetchLimit8, FeatureLocalMemorySize0]>;
+ [FeatureR600ALUInst, FeatureFetchLimit8, FeatureLocalMemorySize0]
+>;
def FeatureR700 : SubtargetFeatureGeneration<"R700",
- [FeatureFetchLimit16, FeatureLocalMemorySize0]>;
+ [FeatureFetchLimit16, FeatureLocalMemorySize0]
+>;
def FeatureEvergreen : SubtargetFeatureGeneration<"EVERGREEN",
- [FeatureFetchLimit16, FeatureLocalMemorySize32768]>;
+ [FeatureFetchLimit16, FeatureLocalMemorySize32768]
+>;
def FeatureNorthernIslands : SubtargetFeatureGeneration<"NORTHERN_ISLANDS",
- [FeatureFetchLimit16, FeatureWavefrontSize64,
- FeatureLocalMemorySize32768]
+ [FeatureFetchLimit16, FeatureWavefrontSize64,
+ FeatureLocalMemorySize32768]
>;
def FeatureSouthernIslands : SubtargetFeatureGeneration<"SOUTHERN_ISLANDS",
- [FeatureFP64, FeatureLocalMemorySize32768,
- FeatureWavefrontSize64, FeatureGCN, FeatureGCN1Encoding,
- FeatureLDSBankCount32]>;
+ [FeatureFP64, FeatureLocalMemorySize32768,
+ FeatureWavefrontSize64, FeatureGCN, FeatureGCN1Encoding,
+ FeatureLDSBankCount32]
+>;
def FeatureSeaIslands : SubtargetFeatureGeneration<"SEA_ISLANDS",
- [FeatureFP64, FeatureLocalMemorySize65536,
- FeatureWavefrontSize64, FeatureGCN, FeatureFlatAddressSpace,
- FeatureGCN1Encoding, FeatureCIInsts]>;
+ [FeatureFP64, FeatureLocalMemorySize65536,
+ FeatureWavefrontSize64, FeatureGCN, FeatureFlatAddressSpace,
+ FeatureGCN1Encoding, FeatureCIInsts]
+>;
def FeatureVolcanicIslands : SubtargetFeatureGeneration<"VOLCANIC_ISLANDS",
- [FeatureFP64, FeatureLocalMemorySize65536,
- FeatureWavefrontSize64, FeatureFlatAddressSpace, FeatureGCN,
- FeatureGCN3Encoding, FeatureCIInsts, FeatureLDSBankCount32]>;
+ [FeatureFP64, FeatureLocalMemorySize65536,
+ FeatureWavefrontSize64, FeatureFlatAddressSpace, FeatureGCN,
+ FeatureGCN3Encoding, FeatureCIInsts, FeatureLDSBankCount32]
+>;
//===----------------------------------------------------------------------===//
@@ -284,6 +321,7 @@ def NullALU : InstrItinClass;
//===----------------------------------------------------------------------===//
def TruePredicate : Predicate<"true">;
+
def isSICI : Predicate<
"Subtarget->getGeneration() == AMDGPUSubtarget::SOUTHERN_ISLANDS ||"
"Subtarget->getGeneration() == AMDGPUSubtarget::SEA_ISLANDS"
@@ -293,6 +331,13 @@ def isVI : Predicate <
"Subtarget->getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS">,
AssemblerPredicate<"FeatureGCN3Encoding">;
+def isCIVI : Predicate <
+ "Subtarget->getGeneration() == AMDGPUSubtarget::SEA_ISLANDS || "
+ "Subtarget->getGeneration() == AMDGPUSubtarget::VOLCANIC_ISLANDS"
+>, AssemblerPredicate<"FeatureCIInsts">;
+
+def HasFlatAddressSpace : Predicate<"Subtarget->hasFlatAddressSpace()">;
+
class PredicateControl {
Predicate SubtargetPredicate;
Predicate SIAssemblerPredicate = isSICI;
Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPUInstructions.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPUInstructions.td?rev=258789&r1=258788&r2=258789&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/AMDGPUInstructions.td (original)
+++ llvm/trunk/lib/Target/AMDGPU/AMDGPUInstructions.td Mon Jan 25 22:49:22 2016
@@ -587,13 +587,6 @@ def cvt_flr_i32_f32 : PatFrag <
[{ (void)N; return TM.Options.NoNaNsFPMath; }]
>;
-/*
-class UMUL24Pattern <Instruction UMUL24> : Pat <
- (mul U24:$x, U24:$y),
- (UMUL24 $x, $y)
->;
-*/
-
class IMad24Pat<Instruction Inst> : Pat <
(add (AMDGPUmul_i24 i32:$src0, i32:$src1), i32:$src2),
(Inst $src0, $src1, $src2)
Modified: llvm/trunk/lib/Target/AMDGPU/CIInstructions.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/CIInstructions.td?rev=258789&r1=258788&r2=258789&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/CIInstructions.td (original)
+++ llvm/trunk/lib/Target/AMDGPU/CIInstructions.td Mon Jan 25 22:49:22 2016
@@ -25,14 +25,6 @@
// BUFFER_LOAD_DWORDX3
// BUFFER_STORE_DWORDX3
-
-def isCIVI : Predicate <
- "Subtarget->getGeneration() == AMDGPUSubtarget::SEA_ISLANDS || "
- "Subtarget->getGeneration() == AMDGPUSubtarget::VOLCANIC_ISLANDS"
->, AssemblerPredicate<"FeatureCIInsts">;
-
-def HasFlatAddressSpace : Predicate<"Subtarget->hasFlatAddressSpace()">;
-
//===----------------------------------------------------------------------===//
// VOP1 Instructions
//===----------------------------------------------------------------------===//
@@ -262,7 +254,7 @@ defm FLAT_ATOMIC_FMAX_X2 : FLAT_ATOMIC <
flat<0x60>, "flat_atomic_fmax_x2", VReg_64
>;
-} // End let SubtargetPredicate = isCI, VIAssemblerPredicate = DisableInst
+} // End SubtargetPredicate = isCI, VIAssemblerPredicate = DisableInst
let Predicates = [isCI] in {
@@ -289,7 +281,7 @@ def : Pat <
let Predicates = [isCIVI] in {
-// Patterns for global loads with no offset
+// Patterns for global loads with no offset.
class FlatLoadPat <FLAT inst, SDPatternOperator node, ValueType vt> : Pat <
(vt (node i64:$addr)),
(inst $addr, 0, 0, 0)
Modified: llvm/trunk/lib/Target/AMDGPU/SIInstructions.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIInstructions.td?rev=258789&r1=258788&r2=258789&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SIInstructions.td (original)
+++ llvm/trunk/lib/Target/AMDGPU/SIInstructions.td Mon Jan 25 22:49:22 2016
@@ -101,7 +101,7 @@ let isMoveImm = 1 in {
let isReMaterializable = 1, isAsCheapAsAMove = 1 in {
defm S_MOV_B32 : SOP1_32 <sop1<0x03, 0x00>, "s_mov_b32", []>;
defm S_MOV_B64 : SOP1_64 <sop1<0x04, 0x01>, "s_mov_b64", []>;
- } // let isRematerializeable = 1
+ } // End isRematerializeable = 1
let Uses = [SCC] in {
defm S_CMOV_B32 : SOP1_32 <sop1<0x05, 0x02>, "s_cmov_b32", []>;
@@ -1234,7 +1234,7 @@ defm V_CVT_F64_U32 : VOP1Inst <vop1<0x16
VOP_F64_I32, uint_to_fp
>;
-} // let SchedRW = [WriteQuarterRate32]
+} // End SchedRW = [WriteQuarterRate32]
defm V_FRACT_F32 : VOP1Inst <vop1<0x20, 0x1b>, "v_fract_f32",
VOP_F32_F32, AMDGPUfract
@@ -1270,7 +1270,7 @@ defm V_RSQ_F32 : VOP1Inst <vop1<0x2e, 0x
VOP_F32_F32, AMDGPUrsq
>;
-} //let SchedRW = [WriteQuarterRate32]
+} // End SchedRW = [WriteQuarterRate32]
let SchedRW = [WriteDouble] in {
@@ -1281,7 +1281,7 @@ defm V_RSQ_F64 : VOP1Inst <vop1<0x31, 0x
VOP_F64_F64, AMDGPUrsq
>;
-} // let SchedRW = [WriteDouble];
+} // End SchedRW = [WriteDouble];
defm V_SQRT_F32 : VOP1Inst <vop1<0x33, 0x27>, "v_sqrt_f32",
VOP_F32_F32, fsqrt
@@ -1710,7 +1710,7 @@ defm V_MED3_U32 : VOP3Inst <vop3<0x159,
defm V_SAD_U32 : VOP3Inst <vop3<0x15d, 0x1dc>, "v_sad_u32",
VOP_I32_I32_I32_I32
>;
-////def V_CVT_PK_U8_F32 : VOP3_U8 <0x0000015e, "v_cvt_pk_u8_f32", []>;
+//def V_CVT_PK_U8_F32 : VOP3_U8 <0x0000015e, "v_cvt_pk_u8_f32", []>;
defm V_DIV_FIXUP_F32 : VOP3Inst <
vop3<0x15f, 0x1de>, "v_div_fixup_f32", VOP_F32_F32_F32_F32, AMDGPUdiv_fixup
>;
@@ -1740,13 +1740,13 @@ defm V_MAX_F64 : VOP3Inst <vop3<0x167, 0
VOP_F64_F64_F64, fmaxnum
>;
-} // isCommutable = 1
+} // End isCommutable = 1
defm V_LDEXP_F64 : VOP3Inst <vop3<0x168, 0x284>, "v_ldexp_f64",
VOP_F64_F64_I32, AMDGPUldexp
>;
-} // let SchedRW = [WriteDoubleAdd]
+} // End let SchedRW = [WriteDoubleAdd]
let isCommutable = 1, SchedRW = [WriteQuarterRate32] in {
@@ -1764,7 +1764,7 @@ defm V_MUL_HI_I32 : VOP3Inst <vop3<0x16c
VOP_I32_I32_I32, mulhs
>;
-} // isCommutable = 1, SchedRW = [WriteQuarterRate32]
+} // End isCommutable = 1, SchedRW = [WriteQuarterRate32]
let SchedRW = [WriteFloatFMA, WriteSALU] in {
defm V_DIV_SCALE_F32 : VOP3bInst <vop3<0x16d, 0x1e0>, "v_div_scale_f32",
@@ -1777,7 +1777,7 @@ let SchedRW = [WriteDouble, WriteSALU] i
defm V_DIV_SCALE_F64 : VOP3bInst <vop3<0x16e, 0x1e1>, "v_div_scale_f64",
VOP3b_F64_I1_F64_F64_F64
>;
-} // let SchedRW = [WriteDouble]
+} // End SchedRW = [WriteDouble]
let isCommutable = 1, Uses = [VCC, EXEC] in {
@@ -1814,7 +1814,7 @@ defm V_TRIG_PREOP_F64 : VOP3Inst <
vop3<0x174, 0x292>, "v_trig_preop_f64", VOP_F64_F64_I32, AMDGPUtrig_preop
>;
-} // let SchedRW = [WriteDouble]
+} // End SchedRW = [WriteDouble]
// These instructions only exist on SI and CI
let SubtargetPredicate = isSICI in {
@@ -1856,7 +1856,7 @@ let hasSideEffects = 0, mayLoad = 0, may
// 64-bit vector move instruction. This is mainly used by the SIFoldOperands
// pass to enable folding of inline immediates.
def V_MOV_B64_PSEUDO : InstSI <(outs VReg_64:$dst), (ins VSrc_64:$src0), "", []>;
-} // end let hasSideEffects = 0, mayLoad = 0, mayStore = 0
+} // End let hasSideEffects = 0, mayLoad = 0, mayStore = 0
let hasSideEffects = 1, SALU = 1 in {
def SGPR_USE : InstSI <(outs),(ins), "", []>;
@@ -1893,7 +1893,7 @@ def SI_LOOP : InstSI <
[(int_amdgcn_loop i64:$saved, bb:$target)]
>;
-} // end isBranch = 1, isTerminator = 1
+} // End isBranch = 1, isTerminator = 1
def SI_BREAK : InstSI <
(outs SReg_64:$dst),
@@ -1934,7 +1934,7 @@ def SI_KILL : InstSI <
>;
} // End Uses = [EXEC], Defs = [EXEC,VCC]
-} // end mayLoad = 1, mayStore = 1, hasSideEffects = 1
+} // End mayLoad = 1, mayStore = 1, hasSideEffects = 1
let Uses = [EXEC], Defs = [EXEC,VCC,M0] in {
@@ -1967,7 +1967,7 @@ def SI_INDIRECT_DST_V4 : SI_INDIRECT_DST
def SI_INDIRECT_DST_V8 : SI_INDIRECT_DST<VReg_256>;
def SI_INDIRECT_DST_V16 : SI_INDIRECT_DST<VReg_512>;
-} // Uses = [EXEC,VCC,M0], Defs = [EXEC,VCC,M0]
+} // End Uses = [EXEC], Defs = [EXEC,VCC,M0]
multiclass SI_SPILL_SGPR <RegisterClass sgpr_class> {
@@ -1975,8 +1975,7 @@ multiclass SI_SPILL_SGPR <RegisterClass
def _SAVE : InstSI <
(outs),
(ins sgpr_class:$src, i32imm:$frame_idx),
- "", []
- > {
+ "", []> {
let mayStore = 1;
let mayLoad = 0;
}
@@ -1984,8 +1983,7 @@ multiclass SI_SPILL_SGPR <RegisterClass
def _RESTORE : InstSI <
(outs sgpr_class:$dst),
(ins i32imm:$frame_idx),
- "", []
- > {
+ "", []> {
let mayStore = 0;
let mayLoad = 1;
}
@@ -2007,8 +2005,7 @@ multiclass SI_SPILL_VGPR <RegisterClass
(outs),
(ins vgpr_class:$src, i32imm:$frame_idx, SReg_128:$scratch_rsrc,
SReg_32:$scratch_offset),
- "", []
- > {
+ "", []> {
let mayStore = 1;
let mayLoad = 0;
}
@@ -2016,8 +2013,7 @@ multiclass SI_SPILL_VGPR <RegisterClass
def _RESTORE : InstSI <
(outs vgpr_class:$dst),
(ins i32imm:$frame_idx, SReg_128:$scratch_rsrc, SReg_32:$scratch_offset),
- "", []
- > {
+ "", []> {
let mayStore = 0;
let mayLoad = 1;
}
@@ -2043,9 +2039,9 @@ def SI_CONSTDATA_PTR : InstSI <
} // End Defs = [SCC]
-} // end IsCodeGenOnly, isPseudo
+} // End isCodeGenOnly, isPseudo
-} // end SubtargetPredicate = isGCN
+} // End SubtargetPredicate = isGCN
let Predicates = [isGCN] in {
@@ -2060,7 +2056,6 @@ def : Pat<
(BUFFER_LOAD_FORMAT_XYZW_IDXEN $buf_idx_vgpr, $tlst, 0, imm:$attr_offset, 0, 0, 0)
>;
-/* int_SI_export */
def : Pat <
(int_SI_export imm:$en, imm:$vm, imm:$done, imm:$tgt, imm:$compr,
f32:$src0, f32:$src1, f32:$src2, f32:$src3),
@@ -2445,11 +2440,6 @@ class ImageLoadArrayMSAAPattern<Intrinsi
/********** Extraction, Insertion, Building and Casting **********/
/********** ============================================ **********/
-//def : Extract_Element<i64, v2i64, 0, sub0_sub1>;
-//def : Extract_Element<i64, v2i64, 1, sub2_sub3>;
-//def : Extract_Element<f64, v2f64, 0, sub0_sub1>;
-//def : Extract_Element<f64, v2f64, 1, sub2_sub3>;
-
foreach Index = 0-2 in {
def Extract_Element_v2i32_#Index : Extract_Element <
i32, v2i32, Index, !cast<SubRegIndex>(sub#Index)
@@ -2514,46 +2504,45 @@ foreach Index = 0-15 in {
>;
}
-def : BitConvert <i32, f32, SReg_32>;
+// FIXME: Why do only some of these type combinations for SReg and
+// VReg?
+// 32-bit bitcast
def : BitConvert <i32, f32, VGPR_32>;
-
-def : BitConvert <f32, i32, SReg_32>;
def : BitConvert <f32, i32, VGPR_32>;
+def : BitConvert <i32, f32, SReg_32>;
+def : BitConvert <f32, i32, SReg_32>;
+// 64-bit bitcast
def : BitConvert <i64, f64, VReg_64>;
-
def : BitConvert <f64, i64, VReg_64>;
-
-def : BitConvert <v2f32, v2i32, VReg_64>;
def : BitConvert <v2i32, v2f32, VReg_64>;
-def : BitConvert <v2i32, i64, VReg_64>;
+def : BitConvert <v2f32, v2i32, VReg_64>;
def : BitConvert <i64, v2i32, VReg_64>;
-def : BitConvert <v2f32, i64, VReg_64>;
+def : BitConvert <v2i32, i64, VReg_64>;
def : BitConvert <i64, v2f32, VReg_64>;
-def : BitConvert <v2f32, f64, VReg_64>;
-def : BitConvert <v2i32, f64, VReg_64>;
+def : BitConvert <v2f32, i64, VReg_64>;
def : BitConvert <f64, v2f32, VReg_64>;
+def : BitConvert <v2f32, f64, VReg_64>;
def : BitConvert <f64, v2i32, VReg_64>;
-def : BitConvert <v4f32, v4i32, VReg_128>;
+def : BitConvert <v2i32, f64, VReg_64>;
def : BitConvert <v4i32, v4f32, VReg_128>;
+def : BitConvert <v4f32, v4i32, VReg_128>;
-
+// 128-bit bitcast
def : BitConvert <v2i64, v4i32, SReg_128>;
def : BitConvert <v4i32, v2i64, SReg_128>;
-
def : BitConvert <v2f64, v4f32, VReg_128>;
def : BitConvert <v2f64, v4i32, VReg_128>;
def : BitConvert <v4f32, v2f64, VReg_128>;
def : BitConvert <v4i32, v2f64, VReg_128>;
-
-
-
-def : BitConvert <v8f32, v8i32, SReg_256>;
+// 256-bit bitcast
def : BitConvert <v8i32, v8f32, SReg_256>;
+def : BitConvert <v8f32, v8i32, SReg_256>;
def : BitConvert <v8i32, v8f32, VReg_256>;
def : BitConvert <v8f32, v8i32, VReg_256>;
+// 512-bit bitcast
def : BitConvert <v16i32, v16f32, VReg_512>;
def : BitConvert <v16f32, v16i32, VReg_512>;
@@ -2575,7 +2564,7 @@ def : Pat <
def : Pat <
(fneg (fabs f32:$src)),
- (S_OR_B32 $src, 0x80000000) /* Set sign bit */
+ (S_OR_B32 $src, 0x80000000) // Set sign bit
>;
// FIXME: Should use S_OR_B32
@@ -2665,7 +2654,6 @@ def : Pat <
/********** Intrinsic Patterns **********/
/********** ================== **********/
-/* llvm.AMDGPU.pow */
def : POW_Common <V_LOG_F32_e32, V_EXP_F32_e32, V_MUL_LEGACY_F32_e32>;
def : Pat <
@@ -2702,7 +2690,7 @@ class Ext32Pat <SDNode ext> : Pat <
def : Ext32Pat <zext>;
def : Ext32Pat <anyext>;
-// Offset in an 32Bit VGPR
+// Offset in an 32-bit VGPR
def : Pat <
(SIload_constant v4i32:$sbase, i32:$voff),
(BUFFER_LOAD_DWORD_OFFEN $voff, $sbase, 0, 0, 0, 0, 0)
@@ -2934,22 +2922,6 @@ def : MUBUFScratchStorePat <BUFFER_STORE
def : MUBUFScratchStorePat <BUFFER_STORE_DWORDX2_OFFEN, v2i32, store_private>;
def : MUBUFScratchStorePat <BUFFER_STORE_DWORDX4_OFFEN, v4i32, store_private>;
-/*
-class MUBUFStore_Pattern <MUBUF Instr, ValueType vt, PatFrag st> : Pat <
- (st vt:$value, (MUBUFScratch v4i32:$srsrc, i64:$vaddr, u16imm:$offset)),
- (Instr $value, $srsrc, $vaddr, $offset)
->;
-
-let Predicates = [isSICI] in {
-def : MUBUFStore_Pattern <BUFFER_STORE_BYTE_ADDR64, i32, truncstorei8_private>;
-def : MUBUFStore_Pattern <BUFFER_STORE_SHORT_ADDR64, i32, truncstorei16_private>;
-def : MUBUFStore_Pattern <BUFFER_STORE_DWORD_ADDR64, i32, store_private>;
-def : MUBUFStore_Pattern <BUFFER_STORE_DWORDX2_ADDR64, v2i32, store_private>;
-def : MUBUFStore_Pattern <BUFFER_STORE_DWORDX4_ADDR64, v4i32, store_private>;
-} // End Predicates = [isSICI]
-
-*/
-
//===----------------------------------------------------------------------===//
// MTBUF Patterns
//===----------------------------------------------------------------------===//
More information about the llvm-commits
mailing list