[llvm] ec96283 - AMDGPU/GlobalISel: Select DS append/consume
Matt Arsenault via llvm-commits
llvm-commits at lists.llvm.org
Fri Jan 17 17:10:00 PST 2020
Author: Matt Arsenault
Date: 2020-01-17T20:09:53-05:00
New Revision: ec9628318d797bfe036aca314d58665dd93b364f
URL: https://github.com/llvm/llvm-project/commit/ec9628318d797bfe036aca314d58665dd93b364f
DIFF: https://github.com/llvm/llvm-project/commit/ec9628318d797bfe036aca314d58665dd93b364f.diff
LOG: AMDGPU/GlobalISel: Select DS append/consume
Added:
llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ds.append.ll
llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ds.consume.ll
Modified:
llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h
llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.ds.append.mir
llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.ds.consume.mir
llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.append.ll
llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.consume.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
index 5ab8d2d84792..a10c1ce20037 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
@@ -1197,6 +1197,36 @@ bool AMDGPUInstructionSelector::selectDSGWSIntrinsic(MachineInstr &MI,
return true;
}
+bool AMDGPUInstructionSelector::selectDSAppendConsume(MachineInstr &MI,
+ bool IsAppend) const {
+ Register PtrBase = MI.getOperand(2).getReg();
+ LLT PtrTy = MRI->getType(PtrBase);
+ bool IsGDS = PtrTy.getAddressSpace() == AMDGPUAS::REGION_ADDRESS;
+
+ unsigned Offset;
+ std::tie(PtrBase, Offset) = selectDS1Addr1OffsetImpl(MI.getOperand(2));
+
+ // TODO: Should this try to look through readfirstlane like GWS?
+ if (!isDSOffsetLegal(PtrBase, Offset, 16)) {
+ PtrBase = MI.getOperand(2).getReg();
+ Offset = 0;
+ }
+
+ MachineBasicBlock *MBB = MI.getParent();
+ const DebugLoc &DL = MI.getDebugLoc();
+ const unsigned Opc = IsAppend ? AMDGPU::DS_APPEND : AMDGPU::DS_CONSUME;
+
+ BuildMI(*MBB, &MI, DL, TII.get(AMDGPU::COPY), AMDGPU::M0)
+ .addReg(PtrBase);
+ BuildMI(*MBB, &MI, DL, TII.get(Opc), MI.getOperand(0).getReg())
+ .addImm(Offset)
+ .addImm(IsGDS ? -1 : 0)
+ .cloneMemRefs(MI);
+
+ MI.eraseFromParent();
+ return true;
+}
+
bool AMDGPUInstructionSelector::selectG_INTRINSIC_W_SIDE_EFFECTS(
MachineInstr &I) const {
MachineBasicBlock *BB = I.getParent();
@@ -1230,6 +1260,10 @@ bool AMDGPUInstructionSelector::selectG_INTRINSIC_W_SIDE_EFFECTS(
case Intrinsic::amdgcn_ds_gws_sema_p:
case Intrinsic::amdgcn_ds_gws_sema_release_all:
return selectDSGWSIntrinsic(I, IntrinsicID);
+ case Intrinsic::amdgcn_ds_append:
+ return selectDSAppendConsume(I, true);
+ case Intrinsic::amdgcn_ds_consume:
+ return selectDSAppendConsume(I, false);
default:
return selectImpl(I, *CoverageInfo);
}
@@ -2248,8 +2282,7 @@ AMDGPUInstructionSelector::selectMUBUFScratchOffen(MachineOperand &Root) const {
}}};
}
-bool AMDGPUInstructionSelector::isDSOffsetLegal(const MachineRegisterInfo &MRI,
- const MachineOperand &Base,
+bool AMDGPUInstructionSelector::isDSOffsetLegal(Register Base,
int64_t Offset,
unsigned OffsetBits) const {
if ((OffsetBits == 16 && !isUInt<16>(Offset)) ||
@@ -2261,7 +2294,7 @@ bool AMDGPUInstructionSelector::isDSOffsetLegal(const MachineRegisterInfo &MRI,
// On Southern Islands instruction with a negative base value and an offset
// don't seem to work.
- return KnownBits->signBitIsZero(Base.getReg());
+ return KnownBits->signBitIsZero(Base);
}
InstructionSelector::ComplexRendererFns
@@ -2292,15 +2325,11 @@ AMDGPUInstructionSelector::selectMUBUFScratchOffset(
}};
}
-InstructionSelector::ComplexRendererFns
-AMDGPUInstructionSelector::selectDS1Addr1Offset(MachineOperand &Root) const {
+std::pair<Register, unsigned>
+AMDGPUInstructionSelector::selectDS1Addr1OffsetImpl(MachineOperand &Root) const {
const MachineInstr *RootDef = MRI->getVRegDef(Root.getReg());
- if (!RootDef) {
- return {{
- [=](MachineInstrBuilder &MIB) { MIB.add(Root); },
- [=](MachineInstrBuilder &MIB) { MIB.addImm(0); }
- }};
- }
+ if (!RootDef)
+ return std::make_pair(Root.getReg(), 0);
int64_t ConstAddr = 0;
if (isBaseWithConstantOffset(Root, *MRI)) {
@@ -2311,26 +2340,32 @@ AMDGPUInstructionSelector::selectDS1Addr1Offset(MachineOperand &Root) const {
if (LHSDef && RHSDef) {
int64_t PossibleOffset =
RHSDef->getOperand(1).getCImm()->getSExtValue();
- if (isDSOffsetLegal(*MRI, LHS, PossibleOffset, 16)) {
+ if (isDSOffsetLegal(LHS.getReg(), PossibleOffset, 16)) {
// (add n0, c0)
- return {{
- [=](MachineInstrBuilder &MIB) { MIB.add(LHS); },
- [=](MachineInstrBuilder &MIB) { MIB.addImm(PossibleOffset); }
- }};
+ return std::make_pair(LHS.getReg(), PossibleOffset);
}
}
} else if (RootDef->getOpcode() == AMDGPU::G_SUB) {
-
+ // TODO
} else if (mi_match(Root.getReg(), *MRI, m_ICst(ConstAddr))) {
-
+ // TODO
}
+ return std::make_pair(Root.getReg(), 0);
+}
+
+InstructionSelector::ComplexRendererFns
+AMDGPUInstructionSelector::selectDS1Addr1Offset(MachineOperand &Root) const {
+
+ Register Reg;
+ unsigned Offset;
+ std::tie(Reg, Offset) = selectDS1Addr1OffsetImpl(Root);
return {{
- [=](MachineInstrBuilder &MIB) { MIB.add(Root); },
- [=](MachineInstrBuilder &MIB) { MIB.addImm(0); }
+ [=](MachineInstrBuilder &MIB) { MIB.addReg(Reg); },
+ [=](MachineInstrBuilder &MIB) { MIB.addImm(Offset); }
}};
}
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h
index 6f5268629bee..89d5595016d0 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h
@@ -102,6 +102,7 @@ class AMDGPUInstructionSelector : public InstructionSelector {
bool selectStoreIntrinsic(MachineInstr &MI, bool IsFormat) const;
bool selectDSOrderedIntrinsic(MachineInstr &MI, Intrinsic::ID IID) const;
bool selectDSGWSIntrinsic(MachineInstr &MI, Intrinsic::ID IID) const;
+ bool selectDSAppendConsume(MachineInstr &MI, bool IsAppend) const;
bool selectG_INTRINSIC_W_SIDE_EFFECTS(MachineInstr &I) const;
int getS_CMPOpcode(CmpInst::Predicate P, unsigned Size) const;
@@ -164,9 +165,11 @@ class AMDGPUInstructionSelector : public InstructionSelector {
InstructionSelector::ComplexRendererFns
selectMUBUFScratchOffset(MachineOperand &Root) const;
- bool isDSOffsetLegal(const MachineRegisterInfo &MRI,
- const MachineOperand &Base,
- int64_t Offset, unsigned OffsetBits) const;
+ bool isDSOffsetLegal(Register Base, int64_t Offset,
+ unsigned OffsetBits) const;
+
+ std::pair<Register, unsigned>
+ selectDS1Addr1OffsetImpl(MachineOperand &Src) const;
InstructionSelector::ComplexRendererFns
selectDS1Addr1Offset(MachineOperand &Root) const;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
index ebe15c9f4334..48f49c7987ef 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
@@ -2147,6 +2147,11 @@ void AMDGPURegisterBankInfo::applyMappingImpl(
constrainOpWithReadfirstlane(MI, MRI, 1); // M0
return;
}
+ case Intrinsic::amdgcn_ds_append:
+ case Intrinsic::amdgcn_ds_consume: {
+ constrainOpWithReadfirstlane(MI, MRI, 2); // M0
+ return;
+ }
case Intrinsic::amdgcn_s_sendmsg:
case Intrinsic::amdgcn_s_sendmsghalt: {
// FIXME: Should this use a waterfall loop?
@@ -3080,8 +3085,6 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size);
break;
}
- case Intrinsic::amdgcn_ds_append:
- case Intrinsic::amdgcn_ds_consume:
case Intrinsic::amdgcn_ds_fadd:
case Intrinsic::amdgcn_ds_fmin:
case Intrinsic::amdgcn_ds_fmax:
@@ -3098,6 +3101,13 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
OpdsMapping[3] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 32);
break;
}
+ case Intrinsic::amdgcn_ds_append:
+ case Intrinsic::amdgcn_ds_consume: {
+ unsigned DstSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
+ OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, DstSize);
+ OpdsMapping[2] = getSGPROpMapping(MI.getOperand(2).getReg(), MRI, *TRI);
+ break;
+ }
case Intrinsic::amdgcn_exp_compr:
OpdsMapping[0] = nullptr; // IntrinsicID
// FIXME: These are immediate values which can't be read from registers.
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ds.append.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ds.append.ll
new file mode 100644
index 000000000000..8287a60a069f
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ds.append.ll
@@ -0,0 +1,4 @@
+; XUN: llc -global-isel -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %S/../llvm.amdgcn.ds.append.ll | FileCheck -check-prefixes=GCN,SI,NOTGFX9,CIPLUS-GISEL,GCN-GISEL %S/../llvm.amdgcn.ds.append.ll
+; RUN: llc -global-isel -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %S/../llvm.amdgcn.ds.append.ll | FileCheck -check-prefixes=GCN,CIPLUS,NOTGFX9,CIPLUS-GISEL,GCN-GISEL %S/../llvm.amdgcn.ds.append.ll
+; RUN: llc -global-isel -march=amdgcn -mcpu=tonga -verify-machineinstrs < %S/../llvm.amdgcn.ds.append.ll | FileCheck -check-prefixes=GCN,CIPLUS,NOTGFX9,CIPLUS-GISEL,GCN-GISEL %S/../llvm.amdgcn.ds.append.ll
+; RUN: llc -global-isel -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %S/../llvm.amdgcn.ds.append.ll | FileCheck -check-prefixes=GCN,CIPLUS,GFX9,CIPLUS-GISEL,GCN-GISEL %S/../llvm.amdgcn.ds.append.ll
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ds.consume.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ds.consume.ll
new file mode 100644
index 000000000000..c755c37cad46
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ds.consume.ll
@@ -0,0 +1,4 @@
+; XUN: llc -global-isel -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %S/../llvm.amdgcn.ds.append.ll | FileCheck -check-prefixes=GCN,SI,NOTGFX9 %S/../llvm.amdgcn.ds.append.ll
+; RUN: llc -global-isel -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %S/../llvm.amdgcn.ds.append.ll | FileCheck -check-prefixes=GCN,CIPLUS,NOTGFX9 %S/../llvm.amdgcn.ds.append.ll
+; RUN: llc -global-isel -march=amdgcn -mcpu=tonga -verify-machineinstrs < %S/../llvm.amdgcn.ds.append.ll | FileCheck -check-prefixes=GCN,CIPLUS,NOTGFX9 %S/../llvm.amdgcn.ds.append.ll
+; RUN: llc -global-isel -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %S/../llvm.amdgcn.ds.append.ll | FileCheck -check-prefixes=GCN,CIPLUS,GFX9 %S/../llvm.amdgcn.ds.append.ll
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.ds.append.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.ds.append.mir
index 0267f7612de1..51215fb931fc 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.ds.append.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.ds.append.mir
@@ -12,8 +12,7 @@ body: |
; CHECK-LABEL: name: ds_append_s
; CHECK: liveins: $sgpr0
; CHECK: [[COPY:%[0-9]+]]:sgpr(p3) = COPY $sgpr0
- ; CHECK: [[COPY1:%[0-9]+]]:vgpr(p3) = COPY [[COPY]](p3)
- ; CHECK: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.ds.append), [[COPY1]](p3), 0
+ ; CHECK: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.ds.append), [[COPY]](p3), 0
%0:_(p3) = COPY $sgpr0
%1:_(s32) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.ds.append), %0, 0
@@ -28,8 +27,9 @@ body: |
liveins: $vgpr0
; CHECK-LABEL: name: ds_append_v
; CHECK: liveins: $vgpr0
- ; CHECK: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
- ; CHECK: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.ds.append), [[COPY]](p3), 0
+ ; CHECK: [[COPY:%[0-9]+]]:vgpr_32(p3) = COPY $vgpr0
+ ; CHECK: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32(p3) = V_READFIRSTLANE_B32 [[COPY]](p3), implicit $exec
+ ; CHECK: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.ds.append), [[V_READFIRSTLANE_B32_]](p3), 0
%0:_(p3) = COPY $vgpr0
%1:_(s32) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.ds.append), %0, 0
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.ds.consume.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.ds.consume.mir
index 50dd920210b7..c3cc88e3a32b 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.ds.consume.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.ds.consume.mir
@@ -12,8 +12,7 @@ body: |
; CHECK-LABEL: name: ds_consume_s
; CHECK: liveins: $sgpr0
; CHECK: [[COPY:%[0-9]+]]:sgpr(p3) = COPY $sgpr0
- ; CHECK: [[COPY1:%[0-9]+]]:vgpr(p3) = COPY [[COPY]](p3)
- ; CHECK: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.ds.consume), [[COPY1]](p3), 0
+ ; CHECK: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.ds.consume), [[COPY]](p3), 0
%0:_(p3) = COPY $sgpr0
%1:_(s32) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.ds.consume), %0, 0
@@ -28,8 +27,9 @@ body: |
liveins: $vgpr0
; CHECK-LABEL: name: ds_consume_v
; CHECK: liveins: $vgpr0
- ; CHECK: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
- ; CHECK: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.ds.consume), [[COPY]](p3), 0
+ ; CHECK: [[COPY:%[0-9]+]]:vgpr_32(p3) = COPY $vgpr0
+ ; CHECK: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32(p3) = V_READFIRSTLANE_B32 [[COPY]](p3), implicit $exec
+ ; CHECK: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.ds.consume), [[V_READFIRSTLANE_B32_]](p3), 0
%0:_(p3) = COPY $vgpr0
%1:_(s32) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.ds.consume), %0, 0
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.append.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.append.ll
index 9f7aa5ea3a1c..ce1551e44e51 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.append.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.append.ll
@@ -1,7 +1,7 @@
-; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,SI,NOTGFX9 %s
-; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,CIPLUS,NOTGFX9 %s
-; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,CIPLUS,NOTGFX9 %s
-; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,CIPLUS,GFX9 %s
+; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,SI,NOTGFX9,GCN-SDAG %s
+; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,CIPLUS,NOTGFX9,CIPLUS-SDAG,GCN-SDAG %s
+; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,CIPLUS,NOTGFX9,CIPLUS-SDAG,GCN-SDAG %s
+; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,CIPLUS,GFX9,CIPLUS-SDAG,GCN-SDAG %s
; GCN-LABEL: {{^}}ds_append_lds:
; GCN: s_load_dword [[PTR:s[0-9]+]]
@@ -51,10 +51,13 @@ define amdgpu_kernel void @ds_append_no_fold_offset_si(i32 addrspace(3)* addrspa
; GCN-LABEL: {{^}}ds_append_lds_over_max_offset:
; GCN: s_load_dword [[PTR:s[0-9]+]]
-; SI: s_bitset1_b32 [[PTR]], 16
-; CIPLUS: s_add_i32 [[PTR]], [[PTR]], 0x10000
+; SI-SDAG: s_bitset1_b32 [[PTR]], 16
+; CIPLUS-SDAG: s_add_i32 [[PTR]], [[PTR]], 0x10000
+; GCN-SDAG: s_mov_b32 m0, [[PTR]]
+
+; SI-GISEL: s_bitset1_b32 m0, 16
+; CIPLUS-GISEL: s_add_u32 m0, [[PTR]], 0x10000
-; GCN: s_mov_b32 m0, [[PTR]]
; GCN: ds_append [[RESULT:v[0-9]+]]{{$}}
; GCN-NOT: buffer_wbinvl1
; GCN: {{.*}}store{{.*}} [[RESULT]]
@@ -66,8 +69,11 @@ define amdgpu_kernel void @ds_append_lds_over_max_offset(i32 addrspace(3)* %lds,
}
; GCN-LABEL: {{^}}ds_append_lds_vgpr_addr:
-; GCN: v_readfirstlane_b32 [[READLANE:s[0-9]+]], v0
-; GCN: s_mov_b32 m0, [[READLANE]]
+; GCN-SDAG: v_readfirstlane_b32 [[READLANE:s[0-9]+]], v0
+; GCN-SDAG: s_mov_b32 m0, [[READLANE]]
+
+; GCN-GISEL: v_readfirstlane_b32 m0, v0
+
; GCN: ds_append [[RESULT:v[0-9]+]]{{$}}
; GCN-NOT: buffer_wbinvl1
; GCN: {{.*}}store{{.*}} [[RESULT]]
@@ -127,8 +133,8 @@ define amdgpu_kernel void @ds_append_lds_m0_restore(i32 addrspace(3)* %lds, i32
ret void
}
-declare i32 @llvm.amdgcn.ds.append.p3i32(i32 addrspace(3)* nocapture, i1) #1
-declare i32 @llvm.amdgcn.ds.append.p2i32(i32 addrspace(2)* nocapture, i1) #1
+declare i32 @llvm.amdgcn.ds.append.p3i32(i32 addrspace(3)* nocapture, i1 immarg) #1
+declare i32 @llvm.amdgcn.ds.append.p2i32(i32 addrspace(2)* nocapture, i1 immarg) #1
attributes #0 = { nounwind }
attributes #1 = { argmemonly convergent nounwind }
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.consume.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.consume.ll
index 415b80a4b111..175c0cf7760a 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.consume.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.consume.ll
@@ -127,8 +127,8 @@ define amdgpu_kernel void @ds_consume_lds_m0_restore(i32 addrspace(3)* %lds, i32
ret void
}
-declare i32 @llvm.amdgcn.ds.consume.p3i32(i32 addrspace(3)* nocapture, i1) #1
-declare i32 @llvm.amdgcn.ds.consume.p2i32(i32 addrspace(2)* nocapture, i1) #1
+declare i32 @llvm.amdgcn.ds.consume.p3i32(i32 addrspace(3)* nocapture, i1 immarg) #1
+declare i32 @llvm.amdgcn.ds.consume.p2i32(i32 addrspace(2)* nocapture, i1 immarg) #1
attributes #0 = { nounwind }
attributes #1 = { argmemonly convergent nounwind }
More information about the llvm-commits
mailing list