[llvm] 7f3280e - AMDGPU/GlobalISel: Select permlane16/permlanex16
Matt Arsenault via llvm-commits
llvm-commits at lists.llvm.org
Wed Jan 29 14:55:38 PST 2020
Author: Matt Arsenault
Date: 2020-01-29T17:55:31-05:00
New Revision: 7f3280ecddbe574d8693b45fa0851e828db22636
URL: https://github.com/llvm/llvm-project/commit/7f3280ecddbe574d8693b45fa0851e828db22636
DIFF: https://github.com/llvm/llvm-project/commit/7f3280ecddbe574d8693b45fa0851e828db22636.diff
LOG: AMDGPU/GlobalISel: Select permlane16/permlanex16
Added:
llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.permlane.ll
Modified:
llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
llvm/lib/Target/AMDGPU/SIInstrInfo.td
llvm/lib/Target/AMDGPU/SMInstructions.td
llvm/lib/Target/AMDGPU/VOP3Instructions.td
llvm/lib/Target/AMDGPU/VOP3PInstructions.td
llvm/test/CodeGen/AMDGPU/llvm.amdgcn.permlane.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
index 476ab798d004..86495acd5142 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
@@ -2324,6 +2324,15 @@ void AMDGPURegisterBankInfo::applyMappingImpl(
constrainOpWithReadfirstlane(MI, MRI, MI.getNumOperands() - 1); // Index
return;
}
+ case Intrinsic::amdgcn_permlane16:
+ case Intrinsic::amdgcn_permlanex16: {
+ // Doing a waterfall loop over these wouldn't make any sense.
+ substituteSimpleCopyRegs(OpdMapper, 2);
+ substituteSimpleCopyRegs(OpdMapper, 3);
+ constrainOpWithReadfirstlane(MI, MRI, 4);
+ constrainOpWithReadfirstlane(MI, MRI, 5);
+ return;
+ }
default:
break;
}
@@ -3334,6 +3343,16 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
OpdsMapping[3] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size);
break;
}
+ case Intrinsic::amdgcn_permlane16:
+ case Intrinsic::amdgcn_permlanex16: {
+ unsigned Size = getSizeInBits(MI.getOperand(0).getReg(), MRI, *TRI);
+ OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size);
+ OpdsMapping[2] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size);
+ OpdsMapping[3] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size);
+ OpdsMapping[4] = getSGPROpMapping(MI.getOperand(3).getReg(), MRI, *TRI);
+ OpdsMapping[5] = getSGPROpMapping(MI.getOperand(4).getReg(), MRI, *TRI);
+ break;
+ }
case Intrinsic::amdgcn_mfma_f32_4x4x1f32:
case Intrinsic::amdgcn_mfma_f32_4x4x4f16:
case Intrinsic::amdgcn_mfma_i32_4x4x4i8:
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.td b/llvm/lib/Target/AMDGPU/SIInstrInfo.td
index 9ee5aad07f5c..f2226cdfba49 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.td
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.td
@@ -665,10 +665,6 @@ defm atomic_load_fadd : SIAtomicM0Glue2 <"LOAD_FADD", 0, SDTAtomic2_f32, 0>;
defm atomic_load_fmin : SIAtomicM0Glue2 <"LOAD_FMIN", 1, SDTAtomic2_f32, 0>;
defm atomic_load_fmax : SIAtomicM0Glue2 <"LOAD_FMAX", 1, SDTAtomic2_f32, 0>;
-def as_i1imm : SDNodeXForm<imm, [{
- return CurDAG->getTargetConstant(N->getZExtValue(), SDLoc(N), MVT::i1);
-}]>;
-
def as_i1timm : SDNodeXForm<timm, [{
return CurDAG->getTargetConstant(N->getZExtValue(), SDLoc(N), MVT::i1);
}]>;
diff --git a/llvm/lib/Target/AMDGPU/SMInstructions.td b/llvm/lib/Target/AMDGPU/SMInstructions.td
index 79982d96c2c8..e054434ba0e1 100644
--- a/llvm/lib/Target/AMDGPU/SMInstructions.td
+++ b/llvm/lib/Target/AMDGPU/SMInstructions.td
@@ -769,22 +769,22 @@ multiclass SMLoad_Pattern <string Instr, ValueType vt> {
// 1. Offset as an immediate
def : GCNPat <
(SIsbuffer_load v4i32:$sbase, (SMRDBufferImm i32:$offset), i1:$glc, i1:$dlc),
- (vt (!cast<SM_Pseudo>(Instr#"_IMM") $sbase, $offset, (as_i1imm $glc),
- (as_i1imm $dlc)))
+ (vt (!cast<SM_Pseudo>(Instr#"_IMM") $sbase, $offset, (as_i1timm $glc),
+ (as_i1timm $dlc)))
>;
// 2. 32-bit IMM offset on CI
def : GCNPat <
(vt (SIsbuffer_load v4i32:$sbase, (SMRDBufferImm32 i32:$offset), i1:$glc, i1:$dlc)),
- (!cast<InstSI>(Instr#"_IMM_ci") $sbase, $offset, (as_i1imm $glc), (as_i1imm $dlc))> {
+ (!cast<InstSI>(Instr#"_IMM_ci") $sbase, $offset, (as_i1timm $glc), (as_i1timm $dlc))> {
let OtherPredicates = [isGFX7Only];
}
// 3. Offset loaded in an 32bit SGPR
def : GCNPat <
(SIsbuffer_load v4i32:$sbase, i32:$offset, i1:$glc, i1:$dlc),
- (vt (!cast<SM_Pseudo>(Instr#"_SGPR") $sbase, $offset, (as_i1imm $glc),
- (as_i1imm $dlc)))
+ (vt (!cast<SM_Pseudo>(Instr#"_SGPR") $sbase, $offset, (as_i1timm $glc),
+ (as_i1timm $dlc)))
>;
}
diff --git a/llvm/lib/Target/AMDGPU/VOP3Instructions.td b/llvm/lib/Target/AMDGPU/VOP3Instructions.td
index 176d0e06438c..0b12a9b5e98c 100644
--- a/llvm/lib/Target/AMDGPU/VOP3Instructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP3Instructions.td
@@ -646,8 +646,8 @@ class PermlanePat<SDPatternOperator permlane,
Instruction inst> : GCNPat<
(permlane i32:$vdst_in, i32:$src0, i32:$src1, i32:$src2,
timm:$fi, timm:$bc),
- (inst (as_i1imm $fi), $src0, (as_i1imm $bc),
- $src1, 0, $src2, $vdst_in)
+ (inst (as_i1timm $fi), VGPR_32:$src0, (as_i1timm $bc),
+ SCSrc_b32:$src1, 0, SCSrc_b32:$src2, VGPR_32:$vdst_in)
>;
// Permlane intrinsic that has either fetch invalid or bound control
@@ -659,13 +659,19 @@ class BoundControlOrFetchInvalidPermlane<SDPatternOperator permlane> :
$src1, node:$src2, node:$fi, node:$bc)> {
let PredicateCode = [{ return N->getConstantOperandVal(5) != 0 ||
N->getConstantOperandVal(6) != 0; }];
+ let GISelPredicateCode = [{
+ return MI.getOperand(6).getImm() != 0 ||
+ MI.getOperand(7).getImm() != 0;
+ }];
}
// Drop the input value if it won't be read.
class PermlaneDiscardVDstIn<SDPatternOperator permlane,
Instruction inst> : GCNPat<
- (permlane srcvalue, i32:$src0, i32:$src1, i32:$src2, timm:$fi, timm:$bc),
- (inst (as_i1imm $fi), $src0, (as_i1imm $bc), $src1, 0, $src2,
+ (permlane srcvalue, i32:$src0, i32:$src1, i32:$src2,
+ timm:$fi, timm:$bc),
+ (inst (as_i1timm $fi), VGPR_32:$src0, (as_i1timm $bc),
+ SCSrc_b32:$src1, 0, SCSrc_b32:$src2,
(IMPLICIT_DEF))
>;
@@ -688,7 +694,6 @@ let SubtargetPredicate = isGFX10Plus in {
def : PermlaneDiscardVDstIn<
BoundControlOrFetchInvalidPermlane<int_amdgcn_permlanex16>,
V_PERMLANEX16_B32>;
-
} // End SubtargetPredicate = isGFX10Plus
//===----------------------------------------------------------------------===//
diff --git a/llvm/lib/Target/AMDGPU/VOP3PInstructions.td b/llvm/lib/Target/AMDGPU/VOP3PInstructions.td
index 933acc2278fd..9f949115c4f3 100644
--- a/llvm/lib/Target/AMDGPU/VOP3PInstructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP3PInstructions.td
@@ -287,7 +287,7 @@ multiclass DotPats<SDPatternOperator dot_op,
(dot_op (dot_inst.Pfl.Src0VT (VOP3PMods0 dot_inst.Pfl.Src0VT:$src0, i32:$src0_modifiers)),
(dot_inst.Pfl.Src1VT (VOP3PMods dot_inst.Pfl.Src1VT:$src1, i32:$src1_modifiers)),
(dot_inst.Pfl.Src2VT (VOP3PMods dot_inst.Pfl.Src2VT:$src2, i32:$src2_modifiers)), i1:$clamp),
- (dot_inst $src0_modifiers, $src0, $src1_modifiers, $src1, $src2_modifiers, $src2, (as_i1imm $clamp))>;
+ (dot_inst $src0_modifiers, $src0, $src1_modifiers, $src1, $src2_modifiers, $src2, (as_i1timm $clamp))>;
}
defm : DotPats<AMDGPUfdot2, V_DOT2_F32_F16>;
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.permlane.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.permlane.ll
new file mode 100644
index 000000000000..07be54d35779
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.permlane.ll
@@ -0,0 +1 @@
+; RUN: llc -global-isel -amdgpu-load-store-vectorizer=0 -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs < %S/../llvm.amdgcn.permlane.ll | FileCheck -check-prefixes=GCN,GFX10 %S/../llvm.amdgcn.permlane.ll
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.permlane.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.permlane.ll
index 636734e2dad5..e6b51496b10d 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.permlane.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.permlane.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX10 %s
+; RUN: llc -amdgpu-load-store-vectorizer=0 -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX10 %s
declare i32 @llvm.amdgcn.permlane16(i32, i32, i32, i32, i1, i1) #1
declare i32 @llvm.amdgcn.permlanex16(i32, i32, i32, i32, i1, i1) #1
More information about the llvm-commits
mailing list