[clang] 97f3f0b - AMDGPU: Add intrinsic for s_setreg
Matt Arsenault via cfe-commits
cfe-commits at lists.llvm.org
Thu May 28 11:26:45 PDT 2020
Author: Matt Arsenault
Date: 2020-05-28T14:26:38-04:00
New Revision: 97f3f0bab0982f84745c7ac5ce8fb6b0918ff718
URL: https://github.com/llvm/llvm-project/commit/97f3f0bab0982f84745c7ac5ce8fb6b0918ff718
DIFF: https://github.com/llvm/llvm-project/commit/97f3f0bab0982f84745c7ac5ce8fb6b0918ff718.diff
LOG: AMDGPU: Add intrinsic for s_setreg
This will be more useful with fenv access implemented.
Added:
llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.s.setreg.ll
llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.setreg.ll
Modified:
clang/include/clang/Basic/BuiltinsAMDGPU.def
clang/test/CodeGenOpenCL/builtins-amdgcn.cl
clang/test/SemaOpenCL/builtins-amdgcn-error.cl
llvm/include/llvm/IR/IntrinsicsAMDGPU.td
llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td
llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
llvm/lib/Target/AMDGPU/SIISelLowering.cpp
llvm/lib/Target/AMDGPU/SIInstrInfo.td
llvm/lib/Target/AMDGPU/SOPInstructions.td
Removed:
################################################################################
diff --git a/clang/include/clang/Basic/BuiltinsAMDGPU.def b/clang/include/clang/Basic/BuiltinsAMDGPU.def
index 5633ccd5d744..28379142b05a 100644
--- a/clang/include/clang/Basic/BuiltinsAMDGPU.def
+++ b/clang/include/clang/Basic/BuiltinsAMDGPU.def
@@ -44,6 +44,7 @@ BUILTIN(__builtin_amdgcn_mbcnt_lo, "UiUiUi", "nc")
// Instruction builtins.
//===----------------------------------------------------------------------===//
BUILTIN(__builtin_amdgcn_s_getreg, "UiIi", "n")
+BUILTIN(__builtin_amdgcn_s_setreg, "vIiUi", "n")
BUILTIN(__builtin_amdgcn_s_getpc, "LUi", "n")
BUILTIN(__builtin_amdgcn_s_waitcnt, "vIi", "n")
BUILTIN(__builtin_amdgcn_s_sendmsg, "vIiUi", "n")
diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn.cl
index 8f2f149103b3..3563ad464c66 100644
--- a/clang/test/CodeGenOpenCL/builtins-amdgcn.cl
+++ b/clang/test/CodeGenOpenCL/builtins-amdgcn.cl
@@ -715,6 +715,12 @@ kernel void test_mqsad_u32_u8(global uint4* out, ulong src0, uint src1, uint4 sr
*out = __builtin_amdgcn_mqsad_u32_u8(src0, src1, src2);
}
+// CHECK-LABEL: test_s_setreg(
+// CHECK: call void @llvm.amdgcn.s.setreg(i32 8193, i32 %val)
+kernel void test_s_setreg(uint val) {
+ __builtin_amdgcn_s_setreg(8193, val);
+}
+
// CHECK-DAG: [[$WI_RANGE]] = !{i32 0, i32 1024}
// CHECK-DAG: [[$WS_RANGE]] = !{i16 1, i16 1025}
// CHECK-DAG: attributes #[[$NOUNWIND_READONLY:[0-9]+]] = { nounwind readonly }
diff --git a/clang/test/SemaOpenCL/builtins-amdgcn-error.cl b/clang/test/SemaOpenCL/builtins-amdgcn-error.cl
index ad5e8776b2e8..dbe2900b600b 100644
--- a/clang/test/SemaOpenCL/builtins-amdgcn-error.cl
+++ b/clang/test/SemaOpenCL/builtins-amdgcn-error.cl
@@ -139,3 +139,8 @@ void test_fence() {
const char ptr[] = "workgroup";
__builtin_amdgcn_fence(__ATOMIC_ACQUIRE, ptr); // expected-error {{expression is not a string literal}}
}
+
+void test_s_setreg(int x, int y) {
+ __builtin_amdgcn_s_setreg(x, 0); // expected-error {{argument to '__builtin_amdgcn_s_setreg' must be a constant integer}}
+ __builtin_amdgcn_s_setreg(x, y); // expected-error {{argument to '__builtin_amdgcn_s_setreg' must be a constant integer}}
+}
diff --git a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
index e2d8f3cb1bd6..40449304ed04 100644
--- a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
+++ b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
@@ -1207,6 +1207,16 @@ def int_amdgcn_s_getreg :
[IntrInaccessibleMemOnly, IntrReadMem, IntrSpeculatable, ImmArg<ArgIndex<0>>]
>;
+// Note this can be used to set FP environment properties that are
+// unsafe to change in non-strictfp functions. The register properties
+// available (and value required to access them) may
diff er per
+// subtarget. llvm.amdgcn.s.setreg(hwmode, value)
+def int_amdgcn_s_setreg :
+ GCCBuiltin<"__builtin_amdgcn_s_setreg">,
+ Intrinsic<[], [llvm_i32_ty, llvm_i32_ty],
+ [IntrNoMem, IntrHasSideEffects, ImmArg<ArgIndex<0>>]
+>;
+
// int_amdgcn_s_getpc is provided to allow a specific style of position
// independent code to determine the high part of its address when it is
// known (through convention) that the code and any data of interest does
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td b/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td
index 3b8f88271458..59f9866b93b6 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td
@@ -202,13 +202,6 @@ def AMDGPUSetCCOp : SDTypeProfile<1, 3, [ // setcc
def AMDGPUsetcc : SDNode<"AMDGPUISD::SETCC", AMDGPUSetCCOp>;
-def AMDGPUSetRegOp : SDTypeProfile<0, 2, [
- SDTCisInt<0>, SDTCisInt<1>
-]>;
-
-def AMDGPUsetreg : SDNode<"AMDGPUISD::SETREG", AMDGPUSetRegOp, [
- SDNPHasChain, SDNPSideEffect, SDNPOptInGlue, SDNPOutGlue]>;
-
def AMDGPUfma : SDNode<"AMDGPUISD::FMA_W_CHAIN", SDTFPTernaryOp, [
SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
index f11563a66d41..c6e0cb2b9cfa 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
@@ -2783,6 +2783,10 @@ void AMDGPURegisterBankInfo::applyMappingImpl(
constrainOpWithReadfirstlane(MI, MRI, 2); // M0
return;
}
+ case Intrinsic::amdgcn_s_setreg: {
+ constrainOpWithReadfirstlane(MI, MRI, 2);
+ return;
+ }
default: {
if (const AMDGPU::RsrcIntrinsic *RSrcIntrin =
AMDGPU::lookupRsrcIntrinsic(IntrID)) {
@@ -3924,6 +3928,13 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
OpdsMapping[2] = AMDGPU::getValueMapping(Bank, 32);
break;
}
+ case Intrinsic::amdgcn_s_setreg: {
+ // This must be an SGPR, but accept a VGPR.
+ unsigned Bank = getRegBankID(MI.getOperand(2).getReg(), MRI, *TRI,
+ AMDGPU::SGPRRegBankID);
+ OpdsMapping[2] = AMDGPU::getValueMapping(Bank, 32);
+ break;
+ }
case Intrinsic::amdgcn_end_cf:
case Intrinsic::amdgcn_init_exec: {
unsigned Size = getSizeInBits(MI.getOperand(1).getReg(), MRI, *TRI);
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index 042087ec5a4d..3b8930c433a3 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -7977,32 +7977,32 @@ SDValue SITargetLowering::LowerFDIV32(SDValue Op, SelectionDAG &DAG) const {
const unsigned Denorm32Reg = AMDGPU::Hwreg::ID_MODE |
(4 << AMDGPU::Hwreg::OFFSET_SHIFT_) |
(1 << AMDGPU::Hwreg::WIDTH_M1_SHIFT_);
- const SDValue BitField = DAG.getTargetConstant(Denorm32Reg, SL, MVT::i16);
+ const SDValue BitField = DAG.getTargetConstant(Denorm32Reg, SL, MVT::i32);
const bool HasFP32Denormals = hasFP32Denormals(DAG.getMachineFunction());
if (!HasFP32Denormals) {
SDVTList BindParamVTs = DAG.getVTList(MVT::Other, MVT::Glue);
- SDValue EnableDenorm;
+ SDNode *EnableDenorm;
if (Subtarget->hasDenormModeInst()) {
const SDValue EnableDenormValue =
getSPDenormModeValue(FP_DENORM_FLUSH_NONE, DAG, SL, Subtarget);
EnableDenorm = DAG.getNode(AMDGPUISD::DENORM_MODE, SL, BindParamVTs,
- DAG.getEntryNode(), EnableDenormValue);
+ DAG.getEntryNode(), EnableDenormValue).getNode();
} else {
const SDValue EnableDenormValue = DAG.getConstant(FP_DENORM_FLUSH_NONE,
SL, MVT::i32);
- EnableDenorm = DAG.getNode(AMDGPUISD::SETREG, SL, BindParamVTs,
- DAG.getEntryNode(), EnableDenormValue,
- BitField);
+ EnableDenorm =
+ DAG.getMachineNode(AMDGPU::S_SETREG_B32, SL, BindParamVTs,
+ {EnableDenormValue, BitField, DAG.getEntryNode()});
}
SDValue Ops[3] = {
NegDivScale0,
- EnableDenorm.getValue(0),
- EnableDenorm.getValue(1)
+ SDValue(EnableDenorm, 0),
+ SDValue(EnableDenorm, 1)
};
NegDivScale0 = DAG.getMergeValues(Ops, SL);
@@ -8026,25 +8026,25 @@ SDValue SITargetLowering::LowerFDIV32(SDValue Op, SelectionDAG &DAG) const {
NumeratorScaled, Fma3);
if (!HasFP32Denormals) {
- SDValue DisableDenorm;
+ SDNode *DisableDenorm;
if (Subtarget->hasDenormModeInst()) {
const SDValue DisableDenormValue =
getSPDenormModeValue(FP_DENORM_FLUSH_IN_FLUSH_OUT, DAG, SL, Subtarget);
DisableDenorm = DAG.getNode(AMDGPUISD::DENORM_MODE, SL, MVT::Other,
Fma4.getValue(1), DisableDenormValue,
- Fma4.getValue(2));
+ Fma4.getValue(2)).getNode();
} else {
const SDValue DisableDenormValue =
DAG.getConstant(FP_DENORM_FLUSH_IN_FLUSH_OUT, SL, MVT::i32);
- DisableDenorm = DAG.getNode(AMDGPUISD::SETREG, SL, MVT::Other,
- Fma4.getValue(1), DisableDenormValue,
- BitField, Fma4.getValue(2));
+ DisableDenorm = DAG.getMachineNode(
+ AMDGPU::S_SETREG_B32, SL, MVT::Other,
+ {DisableDenormValue, BitField, Fma4.getValue(1), Fma4.getValue(2)});
}
SDValue OutputChain = DAG.getNode(ISD::TokenFactor, SL, MVT::Other,
- DisableDenorm, DAG.getRoot());
+ SDValue(DisableDenorm, 0), DAG.getRoot());
DAG.setRoot(OutputChain);
}
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.td b/llvm/lib/Target/AMDGPU/SIInstrInfo.td
index 62b7f8318fd0..529e80e67968 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.td
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.td
@@ -1131,7 +1131,7 @@ def blgp : NamedOperandU32<"BLGP", NamedMatchClass<"BLGP">>;
def cbsz : NamedOperandU32<"CBSZ", NamedMatchClass<"CBSZ">>;
def abid : NamedOperandU32<"ABID", NamedMatchClass<"ABID">>;
-def hwreg : NamedOperandU16<"Hwreg", NamedMatchClass<"Hwreg", 0>>;
+def hwreg : NamedOperandU32<"Hwreg", NamedMatchClass<"Hwreg", 0>>;
def exp_tgt : NamedOperandU32<"ExpTgt", NamedMatchClass<"ExpTgt", 0>> {
diff --git a/llvm/lib/Target/AMDGPU/SOPInstructions.td b/llvm/lib/Target/AMDGPU/SOPInstructions.td
index 7b8c2c27b806..dbafea5a1347 100644
--- a/llvm/lib/Target/AMDGPU/SOPInstructions.td
+++ b/llvm/lib/Target/AMDGPU/SOPInstructions.td
@@ -801,13 +801,13 @@ def S_GETREG_B32 : SOPK_Pseudo <
>;
}
-let hasSideEffects = 1 in {
+let hasSideEffects = 1, mayLoad = 0, mayStore =0 in {
def S_SETREG_B32 : SOPK_Pseudo <
"s_setreg_b32",
(outs), (ins SReg_32:$sdst, hwreg:$simm16),
"$simm16, $sdst",
- [(AMDGPUsetreg i32:$sdst, (i16 timm:$simm16))]> {
+ [(int_amdgcn_s_setreg (i32 timm:$simm16), i32:$sdst)]> {
let Defs = [MODE];
let Uses = [MODE];
}
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.s.setreg.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.s.setreg.ll
new file mode 100644
index 000000000000..85ed95eec0ae
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.s.setreg.ll
@@ -0,0 +1,59 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -global-isel -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,SI %s
+; RUN: llc -global-isel -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,VI %s
+; RUN: llc -global-isel -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX9 %s
+
+; Set FP32 fp_round to round to zero
+define amdgpu_kernel void @test_setreg_f32_round_mode_rtz() {
+; GCN-LABEL: test_setreg_f32_round_mode_rtz:
+; GCN: ; %bb.0:
+; GCN-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 3), 3
+; GCN-NEXT: s_endpgm
+ call void @llvm.amdgcn.s.setreg(i32 4097, i32 3)
+ ret void
+}
+
+; Set FP64/FP16 fp_round to round to zero
+define amdgpu_kernel void @test_setreg_f64_round_mode_rtz() {
+; GCN-LABEL: test_setreg_f64_round_mode_rtz:
+; GCN: ; %bb.0:
+; GCN-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 3), 3
+; GCN-NEXT: s_endpgm
+ call void @llvm.amdgcn.s.setreg(i32 4225, i32 3)
+ ret void
+}
+
+; Set all fp_round to round to zero
+define amdgpu_kernel void @test_setreg_all_round_mode_rtz() {
+; GCN-LABEL: test_setreg_all_round_mode_rtz:
+; GCN: ; %bb.0:
+; GCN-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 5), 7
+; GCN-NEXT: s_endpgm
+ call void @llvm.amdgcn.s.setreg(i32 8193, i32 7)
+ ret void
+}
+
+; Set FP32 fp_round to dynamic mode
+define amdgpu_cs void @test_setreg_roundingmode_var(i32 inreg %var.mode) {
+; GCN-LABEL: test_setreg_roundingmode_var:
+; GCN: ; %bb.0:
+; GCN-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 3), s0
+; GCN-NEXT: s_endpgm
+ call void @llvm.amdgcn.s.setreg(i32 4097, i32 %var.mode)
+ ret void
+}
+
+define void @test_setreg_roundingmode_var_vgpr(i32 %var.mode) {
+; GCN-LABEL: test_setreg_roundingmode_var_vgpr:
+; GCN: ; %bb.0:
+; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT: v_readfirstlane_b32 s4, v0
+; GCN-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 3), s4
+; GCN-NEXT: s_setpc_b64 s[30:31]
+ call void @llvm.amdgcn.s.setreg(i32 4097, i32 %var.mode)
+ ret void
+}
+
+declare void @llvm.amdgcn.s.setreg(i32 immarg, i32) #0
+
+attributes #0 = { nounwind }
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.setreg.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.setreg.ll
new file mode 100644
index 000000000000..88e6bd4adb7b
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.setreg.ll
@@ -0,0 +1,55 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,SI %s
+; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,VI %s
+; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX9 %s
+; FIXME: This copy of the test is a subset of the -global-isel version, since the VGPR case doesn't work.
+
+; Set FP32 fp_round to round to zero
+define amdgpu_kernel void @test_setreg_f32_round_mode_rtz() {
+; GCN-LABEL: test_setreg_f32_round_mode_rtz:
+; GCN: ; %bb.0:
+; GCN-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 3), 3
+; GCN-NEXT: s_endpgm
+ call void @llvm.amdgcn.s.setreg(i32 4097, i32 3)
+ ret void
+}
+
+; Set FP64/FP16 fp_round to round to zero
+define amdgpu_kernel void @test_setreg_f64_round_mode_rtz() {
+; GCN-LABEL: test_setreg_f64_round_mode_rtz:
+; GCN: ; %bb.0:
+; GCN-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 3), 3
+; GCN-NEXT: s_endpgm
+ call void @llvm.amdgcn.s.setreg(i32 4225, i32 3)
+ ret void
+}
+
+; Set all fp_round to round to zero
+define amdgpu_kernel void @test_setreg_all_round_mode_rtz() {
+; GCN-LABEL: test_setreg_all_round_mode_rtz:
+; GCN: ; %bb.0:
+; GCN-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 5), 7
+; GCN-NEXT: s_endpgm
+ call void @llvm.amdgcn.s.setreg(i32 8193, i32 7)
+ ret void
+}
+
+; Set FP32 fp_round to dynamic mode
+define amdgpu_cs void @test_setreg_roundingmode_var(i32 inreg %var.mode) {
+; GCN-LABEL: test_setreg_roundingmode_var:
+; GCN: ; %bb.0:
+; GCN-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 3), s0
+; GCN-NEXT: s_endpgm
+ call void @llvm.amdgcn.s.setreg(i32 4097, i32 %var.mode)
+ ret void
+}
+
+; FIXME: Broken for DAG
+; define void @test_setreg_roundingmode_var_vgpr(i32 %var.mode) {
+; call void @llvm.amdgcn.s.setreg(i32 4097, i32 %var.mode)
+; ret void
+; }
+
+declare void @llvm.amdgcn.s.setreg(i32 immarg, i32) #0
+
+attributes #0 = { nounwind }
More information about the cfe-commits
mailing list