[llvm] 0dc4c36 - AMDGPU/GlobalISel: Manually select llvm.amdgcn.writelane
Matt Arsenault via llvm-commits
llvm-commits at lists.llvm.org
Tue Aug 11 08:56:24 PDT 2020
Author: Matt Arsenault
Date: 2020-08-11T11:56:16-04:00
New Revision: 0dc4c36d3aa1c1bcae4aa00e7808722ebfd22f6d
URL: https://github.com/llvm/llvm-project/commit/0dc4c36d3aa1c1bcae4aa00e7808722ebfd22f6d
DIFF: https://github.com/llvm/llvm-project/commit/0dc4c36d3aa1c1bcae4aa00e7808722ebfd22f6d.diff
LOG: AMDGPU/GlobalISel: Manually select llvm.amdgcn.writelane
Fixup the special case constant bus handling pre-gfx10.
Added:
llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.writelane.ll
Modified:
llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h
Removed:
################################################################################
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
index ffc7e1a4a8bd..90a78bb58ff0 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
@@ -797,6 +797,63 @@ bool AMDGPUInstructionSelector::selectInterpP1F16(MachineInstr &MI) const {
return true;
}
+// Writelane is special in that it can use SGPR and M0 (which would normally
+// count as using the constant bus twice - but in this case it is allowed since
+// the lane selector doesn't count as a use of the constant bus). However, it is
+// still required to abide by the 1 SGPR rule. Fix this up if we might have
+// multiple SGPRs.
+bool AMDGPUInstructionSelector::selectWritelane(MachineInstr &MI) const {
+ // With a constant bus limit of at least 2, there's no issue.
+ if (STI.getConstantBusLimit(AMDGPU::V_WRITELANE_B32) > 1)
+ return selectImpl(MI, *CoverageInfo);
+
+ MachineBasicBlock *MBB = MI.getParent();
+ const DebugLoc &DL = MI.getDebugLoc();
+ Register VDst = MI.getOperand(0).getReg();
+ Register Val = MI.getOperand(2).getReg();
+ Register LaneSelect = MI.getOperand(3).getReg();
+ Register VDstIn = MI.getOperand(4).getReg();
+
+ auto MIB = BuildMI(*MBB, &MI, DL, TII.get(AMDGPU::V_WRITELANE_B32), VDst);
+
+ Optional<ValueAndVReg> ConstSelect =
+ getConstantVRegValWithLookThrough(LaneSelect, *MRI, true, true);
+ if (ConstSelect) {
+ // The selector has to be an inline immediate, so we can use whatever for
+ // the other operands.
+ MIB.addReg(Val);
+ MIB.addImm(ConstSelect->Value &
+ maskTrailingOnes<uint64_t>(STI.getWavefrontSizeLog2()));
+ } else {
+ Optional<ValueAndVReg> ConstVal =
+ getConstantVRegValWithLookThrough(Val, *MRI, true, true);
+
+ // If the value written is an inline immediate, we can get away without a
+ // copy to m0.
+ if (ConstVal && AMDGPU::isInlinableLiteral32(ConstVal->Value,
+ STI.hasInv2PiInlineImm())) {
+ MIB.addImm(ConstVal->Value);
+ MIB.addReg(LaneSelect);
+ } else {
+ MIB.addReg(Val);
+
+ // If the lane selector was originally in a VGPR and copied with
+ // readfirstlane, there's a hazard to read the same SGPR from the
+ // VALU. Constrain to a
diff erent SGPR to help avoid needing a nop later.
+ RBI.constrainGenericRegister(LaneSelect, AMDGPU::SReg_32_XM0RegClass, *MRI);
+
+ BuildMI(*MBB, *MIB, DL, TII.get(AMDGPU::COPY), AMDGPU::M0)
+ .addReg(LaneSelect);
+ MIB.addReg(AMDGPU::M0);
+ }
+ }
+
+ MIB.addReg(VDstIn);
+
+ MI.eraseFromParent();
+ return constrainSelectedInstRegOperands(*MIB, TII, TRI, RBI);
+}
+
// We need to handle this here because tablegen doesn't support matching
// instructions with multiple outputs.
bool AMDGPUInstructionSelector::selectDivScale(MachineInstr &MI) const {
@@ -863,6 +920,8 @@ bool AMDGPUInstructionSelector::selectG_INTRINSIC(MachineInstr &I) const {
return constrainCopyLikeIntrin(I, AMDGPU::SOFT_WQM);
case Intrinsic::amdgcn_wwm:
return constrainCopyLikeIntrin(I, AMDGPU::WWM);
+ case Intrinsic::amdgcn_writelane:
+ return selectWritelane(I);
case Intrinsic::amdgcn_div_scale:
return selectDivScale(I);
case Intrinsic::amdgcn_icmp:
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h
index b7223d3b07d7..c9129bf1105b 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h
@@ -105,6 +105,7 @@ class AMDGPUInstructionSelector final : public InstructionSelector {
bool selectG_INSERT(MachineInstr &I) const;
bool selectInterpP1F16(MachineInstr &MI) const;
+ bool selectWritelane(MachineInstr &MI) const;
bool selectDivScale(MachineInstr &MI) const;
bool selectIntrinsicIcmp(MachineInstr &MI) const;
bool selectBallot(MachineInstr &I) const;
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.writelane.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.writelane.ll
new file mode 100644
index 000000000000..d40a0348cfc0
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.writelane.ll
@@ -0,0 +1,333 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx700 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX7 %s
+; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx803 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX8 %s
+; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX10 %s
+
+define amdgpu_ps float @test_writelane_s_s_s(i32 inreg %data, i32 inreg %lane, i32 inreg %vdst.in) #0 {
+; GFX7-LABEL: test_writelane_s_s_s:
+; GFX7: ; %bb.0:
+; GFX7-NEXT: v_mov_b32_e32 v0, s4
+; GFX7-NEXT: s_mov_b32 m0, s3
+; GFX7-NEXT: v_writelane_b32 v0, s2, m0
+; GFX7-NEXT: ; return to shader part epilog
+;
+; GFX8-LABEL: test_writelane_s_s_s:
+; GFX8: ; %bb.0:
+; GFX8-NEXT: v_mov_b32_e32 v0, s4
+; GFX8-NEXT: s_mov_b32 m0, s3
+; GFX8-NEXT: v_writelane_b32 v0, s2, m0
+; GFX8-NEXT: ; return to shader part epilog
+;
+; GFX10-LABEL: test_writelane_s_s_s:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: v_mov_b32_e32 v0, s4
+; GFX10-NEXT: ; implicit-def: $vcc_hi
+; GFX10-NEXT: v_writelane_b32 v0, s2, s3
+; GFX10-NEXT: ; return to shader part epilog
+ %writelane = call i32 @llvm.amdgcn.writelane(i32 %data, i32 %lane, i32 %vdst.in)
+ %writelane.cast = bitcast i32 %writelane to float
+ ret float %writelane.cast
+}
+
+define amdgpu_ps float @test_writelane_s_s_imm(i32 inreg %data, i32 inreg %lane) #0 {
+; GFX7-LABEL: test_writelane_s_s_imm:
+; GFX7: ; %bb.0:
+; GFX7-NEXT: v_mov_b32_e32 v0, 42
+; GFX7-NEXT: s_mov_b32 m0, s3
+; GFX7-NEXT: v_writelane_b32 v0, s2, m0
+; GFX7-NEXT: ; return to shader part epilog
+;
+; GFX8-LABEL: test_writelane_s_s_imm:
+; GFX8: ; %bb.0:
+; GFX8-NEXT: v_mov_b32_e32 v0, 42
+; GFX8-NEXT: s_mov_b32 m0, s3
+; GFX8-NEXT: v_writelane_b32 v0, s2, m0
+; GFX8-NEXT: ; return to shader part epilog
+;
+; GFX10-LABEL: test_writelane_s_s_imm:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: v_mov_b32_e32 v0, 42
+; GFX10-NEXT: ; implicit-def: $vcc_hi
+; GFX10-NEXT: v_writelane_b32 v0, s2, s3
+; GFX10-NEXT: ; return to shader part epilog
+ %writelane = call i32 @llvm.amdgcn.writelane(i32 %data, i32 %lane, i32 42)
+ %writelane.cast = bitcast i32 %writelane to float
+ ret float %writelane.cast
+}
+
+; data is not inline imm
+define amdgpu_ps float @test_writelane_k_s_v(i32 inreg %lane, i32 %vdst.in) #0 {
+; GFX7-LABEL: test_writelane_k_s_v:
+; GFX7: ; %bb.0:
+; GFX7-NEXT: s_movk_i32 s0, 0x3e7
+; GFX7-NEXT: s_mov_b32 m0, s2
+; GFX7-NEXT: v_writelane_b32 v0, s0, m0
+; GFX7-NEXT: ; return to shader part epilog
+;
+; GFX8-LABEL: test_writelane_k_s_v:
+; GFX8: ; %bb.0:
+; GFX8-NEXT: s_movk_i32 s0, 0x3e7
+; GFX8-NEXT: s_mov_b32 m0, s2
+; GFX8-NEXT: v_writelane_b32 v0, s0, m0
+; GFX8-NEXT: ; return to shader part epilog
+;
+; GFX10-LABEL: test_writelane_k_s_v:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: s_movk_i32 s0, 0x3e7
+; GFX10-NEXT: ; implicit-def: $vcc_hi
+; GFX10-NEXT: v_writelane_b32 v0, s0, s2
+; GFX10-NEXT: ; return to shader part epilog
+ %writelane = call i32 @llvm.amdgcn.writelane(i32 999, i32 %lane, i32 %vdst.in)
+ %writelane.cast = bitcast i32 %writelane to float
+ ret float %writelane.cast
+}
+
+; Data is inline imm
+define amdgpu_ps float @test_writelane_imm_s_v(i32 inreg %lane, i32 %vdst.in) #0 {
+; GFX7-LABEL: test_writelane_imm_s_v:
+; GFX7: ; %bb.0:
+; GFX7-NEXT: v_writelane_b32 v0, 42, s2
+; GFX7-NEXT: ; return to shader part epilog
+;
+; GFX8-LABEL: test_writelane_imm_s_v:
+; GFX8: ; %bb.0:
+; GFX8-NEXT: v_writelane_b32 v0, 42, s2
+; GFX8-NEXT: ; return to shader part epilog
+;
+; GFX10-LABEL: test_writelane_imm_s_v:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: v_writelane_b32 v0, 42, s2
+; GFX10-NEXT: ; implicit-def: $vcc_hi
+; GFX10-NEXT: ; return to shader part epilog
+ %writelane = call i32 @llvm.amdgcn.writelane(i32 42, i32 %lane, i32 %vdst.in)
+ %writelane.cast = bitcast i32 %writelane to float
+ ret float %writelane.cast
+}
+
+; Data is subtarget dependent inline imm
+define amdgpu_ps float @test_writelane_imminv2pi_s_v(i32 inreg %lane, i32 %vdst.in) #0 {
+; GFX7-LABEL: test_writelane_imminv2pi_s_v:
+; GFX7: ; %bb.0:
+; GFX7-NEXT: s_mov_b32 s0, 0x3e22f983
+; GFX7-NEXT: s_mov_b32 m0, s2
+; GFX7-NEXT: v_writelane_b32 v0, s0, m0
+; GFX7-NEXT: ; return to shader part epilog
+;
+; GFX8-LABEL: test_writelane_imminv2pi_s_v:
+; GFX8: ; %bb.0:
+; GFX8-NEXT: v_writelane_b32 v0, 0.15915494, s2
+; GFX8-NEXT: ; return to shader part epilog
+;
+; GFX10-LABEL: test_writelane_imminv2pi_s_v:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: v_writelane_b32 v0, 0.15915494, s2
+; GFX10-NEXT: ; implicit-def: $vcc_hi
+; GFX10-NEXT: ; return to shader part epilog
+ %writelane = call i32 @llvm.amdgcn.writelane(i32 bitcast (float 0x3FC45F3060000000 to i32), i32 %lane, i32 %vdst.in)
+ %writelane.cast = bitcast i32 %writelane to float
+ ret float %writelane.cast
+}
+
+
+; Lane is inline imm
+define amdgpu_ps float @test_writelane_s_imm_v(i32 inreg %data, i32 %vdst.in) #0 {
+; GFX7-LABEL: test_writelane_s_imm_v:
+; GFX7: ; %bb.0:
+; GFX7-NEXT: v_writelane_b32 v0, s2, 23
+; GFX7-NEXT: ; return to shader part epilog
+;
+; GFX8-LABEL: test_writelane_s_imm_v:
+; GFX8: ; %bb.0:
+; GFX8-NEXT: v_writelane_b32 v0, s2, 23
+; GFX8-NEXT: ; return to shader part epilog
+;
+; GFX10-LABEL: test_writelane_s_imm_v:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: v_writelane_b32 v0, s2, 23
+; GFX10-NEXT: ; implicit-def: $vcc_hi
+; GFX10-NEXT: ; return to shader part epilog
+ %writelane = call i32 @llvm.amdgcn.writelane(i32 %data, i32 23, i32 %vdst.in)
+ %writelane.cast = bitcast i32 %writelane to float
+ ret float %writelane.cast
+}
+
+; Lane index is larger than the wavesize
+define amdgpu_ps float @test_writelane_s_k0_v(i32 inreg %data, i32 %vdst.in) #0 {
+; GFX7-LABEL: test_writelane_s_k0_v:
+; GFX7: ; %bb.0:
+; GFX7-NEXT: v_writelane_b32 v0, s2, 3
+; GFX7-NEXT: ; return to shader part epilog
+;
+; GFX8-LABEL: test_writelane_s_k0_v:
+; GFX8: ; %bb.0:
+; GFX8-NEXT: v_writelane_b32 v0, s2, 3
+; GFX8-NEXT: ; return to shader part epilog
+;
+; GFX10-LABEL: test_writelane_s_k0_v:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: s_movk_i32 s0, 0x43
+; GFX10-NEXT: ; implicit-def: $vcc_hi
+; GFX10-NEXT: v_writelane_b32 v0, s2, s0
+; GFX10-NEXT: ; return to shader part epilog
+ %writelane = call i32 @llvm.amdgcn.writelane(i32 %data, i32 67, i32 %vdst.in)
+ %writelane.cast = bitcast i32 %writelane to float
+ ret float %writelane.cast
+}
+
+; Lane index is larger than the wavesize for wave32
+define amdgpu_ps float @test_writelane_s_k1_v(i32 inreg %data, i32 %vdst.in) #0 {
+; GFX7-LABEL: test_writelane_s_k1_v:
+; GFX7: ; %bb.0:
+; GFX7-NEXT: v_writelane_b32 v0, s2, 32
+; GFX7-NEXT: ; return to shader part epilog
+;
+; GFX8-LABEL: test_writelane_s_k1_v:
+; GFX8: ; %bb.0:
+; GFX8-NEXT: v_writelane_b32 v0, s2, 32
+; GFX8-NEXT: ; return to shader part epilog
+;
+; GFX10-LABEL: test_writelane_s_k1_v:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: v_writelane_b32 v0, s2, 32
+; GFX10-NEXT: ; implicit-def: $vcc_hi
+; GFX10-NEXT: ; return to shader part epilog
+ %writelane = call i32 @llvm.amdgcn.writelane(i32 %data, i32 32, i32 %vdst.in)
+ %writelane.cast = bitcast i32 %writelane to float
+ ret float %writelane.cast
+}
+
+define amdgpu_ps float @test_writelane_v_v_v(i32 %data, i32 %lane, i32 %vdst.in) #0 {
+; GFX7-LABEL: test_writelane_v_v_v:
+; GFX7: ; %bb.0:
+; GFX7-NEXT: v_readfirstlane_b32 s1, v1
+; GFX7-NEXT: v_readfirstlane_b32 s0, v0
+; GFX7-NEXT: s_mov_b32 m0, s1
+; GFX7-NEXT: v_writelane_b32 v2, s0, m0
+; GFX7-NEXT: v_mov_b32_e32 v0, v2
+; GFX7-NEXT: ; return to shader part epilog
+;
+; GFX8-LABEL: test_writelane_v_v_v:
+; GFX8: ; %bb.0:
+; GFX8-NEXT: v_readfirstlane_b32 s1, v1
+; GFX8-NEXT: v_readfirstlane_b32 s0, v0
+; GFX8-NEXT: s_mov_b32 m0, s1
+; GFX8-NEXT: v_writelane_b32 v2, s0, m0
+; GFX8-NEXT: v_mov_b32_e32 v0, v2
+; GFX8-NEXT: ; return to shader part epilog
+;
+; GFX10-LABEL: test_writelane_v_v_v:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: v_readfirstlane_b32 s0, v0
+; GFX10-NEXT: v_readfirstlane_b32 s1, v1
+; GFX10-NEXT: ; implicit-def: $vcc_hi
+; GFX10-NEXT: v_writelane_b32 v2, s0, s1
+; GFX10-NEXT: v_mov_b32_e32 v0, v2
+; GFX10-NEXT: ; return to shader part epilog
+ %writelane = call i32 @llvm.amdgcn.writelane(i32 %data, i32 %lane, i32 %vdst.in)
+ %writelane.cast = bitcast i32 %writelane to float
+ ret float %writelane.cast
+}
+
+define amdgpu_ps float @test_writelane_v_s_v(i32 %data, i32 inreg %lane, i32 %vdst.in) #0 {
+; GFX7-LABEL: test_writelane_v_s_v:
+; GFX7: ; %bb.0:
+; GFX7-NEXT: v_readfirstlane_b32 s0, v0
+; GFX7-NEXT: s_mov_b32 m0, s2
+; GFX7-NEXT: v_writelane_b32 v1, s0, m0
+; GFX7-NEXT: v_mov_b32_e32 v0, v1
+; GFX7-NEXT: ; return to shader part epilog
+;
+; GFX8-LABEL: test_writelane_v_s_v:
+; GFX8: ; %bb.0:
+; GFX8-NEXT: v_readfirstlane_b32 s0, v0
+; GFX8-NEXT: s_mov_b32 m0, s2
+; GFX8-NEXT: v_writelane_b32 v1, s0, m0
+; GFX8-NEXT: v_mov_b32_e32 v0, v1
+; GFX8-NEXT: ; return to shader part epilog
+;
+; GFX10-LABEL: test_writelane_v_s_v:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: v_readfirstlane_b32 s0, v0
+; GFX10-NEXT: ; implicit-def: $vcc_hi
+; GFX10-NEXT: v_writelane_b32 v1, s0, s2
+; GFX10-NEXT: v_mov_b32_e32 v0, v1
+; GFX10-NEXT: ; return to shader part epilog
+ %writelane = call i32 @llvm.amdgcn.writelane(i32 %data, i32 inreg %lane, i32 %vdst.in)
+ %writelane.cast = bitcast i32 %writelane to float
+ ret float %writelane.cast
+}
+
+; FIXME: This could theoretically use m0 directly as the data source,
+; and another SGPR as the lane selector and avoid register swap.
+define amdgpu_ps float @test_writelane_m0_s_v(i32 inreg %lane, i32 %vdst.in) #0 {
+; GFX7-LABEL: test_writelane_m0_s_v:
+; GFX7: ; %bb.0:
+; GFX7-NEXT: ;;#ASMSTART
+; GFX7-NEXT: s_mov_b32 m0, -1
+; GFX7-NEXT: ;;#ASMEND
+; GFX7-NEXT: s_mov_b32 s0, m0
+; GFX7-NEXT: s_mov_b32 m0, s2
+; GFX7-NEXT: v_writelane_b32 v0, s0, m0
+; GFX7-NEXT: ; return to shader part epilog
+;
+; GFX8-LABEL: test_writelane_m0_s_v:
+; GFX8: ; %bb.0:
+; GFX8-NEXT: ;;#ASMSTART
+; GFX8-NEXT: s_mov_b32 m0, -1
+; GFX8-NEXT: ;;#ASMEND
+; GFX8-NEXT: s_mov_b32 s0, m0
+; GFX8-NEXT: s_mov_b32 m0, s2
+; GFX8-NEXT: v_writelane_b32 v0, s0, m0
+; GFX8-NEXT: ; return to shader part epilog
+;
+; GFX10-LABEL: test_writelane_m0_s_v:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: ;;#ASMSTART
+; GFX10-NEXT: s_mov_b32 m0, -1
+; GFX10-NEXT: ;;#ASMEND
+; GFX10-NEXT: v_writelane_b32 v0, m0, s2
+; GFX10-NEXT: ; implicit-def: $vcc_hi
+; GFX10-NEXT: ; return to shader part epilog
+ %m0 = call i32 asm "s_mov_b32 m0, -1", "={m0}"()
+ %writelane = call i32 @llvm.amdgcn.writelane(i32 %m0, i32 %lane, i32 %vdst.in)
+ %writelane.cast = bitcast i32 %writelane to float
+ ret float %writelane.cast
+}
+
+define amdgpu_ps float @test_writelane_s_m0_v(i32 inreg %data, i32 %vdst.in) #0 {
+; GFX7-LABEL: test_writelane_s_m0_v:
+; GFX7: ; %bb.0:
+; GFX7-NEXT: ;;#ASMSTART
+; GFX7-NEXT: s_mov_b32 m0, -1
+; GFX7-NEXT: ;;#ASMEND
+; GFX7-NEXT: v_writelane_b32 v0, s2, m0
+; GFX7-NEXT: ; return to shader part epilog
+;
+; GFX8-LABEL: test_writelane_s_m0_v:
+; GFX8: ; %bb.0:
+; GFX8-NEXT: ;;#ASMSTART
+; GFX8-NEXT: s_mov_b32 m0, -1
+; GFX8-NEXT: ;;#ASMEND
+; GFX8-NEXT: v_writelane_b32 v0, s2, m0
+; GFX8-NEXT: ; return to shader part epilog
+;
+; GFX10-LABEL: test_writelane_s_m0_v:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: ;;#ASMSTART
+; GFX10-NEXT: s_mov_b32 m0, -1
+; GFX10-NEXT: ;;#ASMEND
+; GFX10-NEXT: v_writelane_b32 v0, s2, m0
+; GFX10-NEXT: ; implicit-def: $vcc_hi
+; GFX10-NEXT: ; return to shader part epilog
+ %m0 = call i32 asm "s_mov_b32 m0, -1", "={m0}"()
+ %writelane = call i32 @llvm.amdgcn.writelane(i32 %data, i32 %m0, i32 %vdst.in)
+ %writelane.cast = bitcast i32 %writelane to float
+ ret float %writelane.cast
+}
+
+declare i32 @llvm.amdgcn.writelane(i32, i32, i32) #1
+declare i32 @llvm.amdgcn.workitem.id.x() #2
+
+attributes #0 = { nounwind }
+attributes #1 = { convergent nounwind readnone willreturn }
+attributes #2 = { nounwind readnone speculatable willreturn }
More information about the llvm-commits
mailing list