[llvm] edd6da1 - [AMDGPU] Remove cpol, tfe, and swz from MUBUF patterns
Stanislav Mekhanoshin via llvm-commits
llvm-commits at lists.llvm.org
Thu Mar 18 14:36:14 PDT 2021
Author: Stanislav Mekhanoshin
Date: 2021-03-18T14:36:04-07:00
New Revision: edd6da10d20f8fc025af2131f127c53401def04e
URL: https://github.com/llvm/llvm-project/commit/edd6da10d20f8fc025af2131f127c53401def04e
DIFF: https://github.com/llvm/llvm-project/commit/edd6da10d20f8fc025af2131f127c53401def04e.diff
LOG: [AMDGPU] Remove cpol, tfe, and swz from MUBUF patterns
These are always selected as 0 anyway.
Differential Revision: https://reviews.llvm.org/D98663
Added:
Modified:
llvm/lib/Target/AMDGPU/AMDGPUGISel.td
llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
llvm/lib/Target/AMDGPU/BUFInstructions.td
llvm/lib/Target/AMDGPU/SIInstrInfo.td
llvm/test/CodeGen/AMDGPU/extract_vector_elt-i16.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUGISel.td b/llvm/lib/Target/AMDGPU/AMDGPUGISel.td
index 1a5f7aafbb43..29f9c20dc8fd 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUGISel.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPUGISel.td
@@ -113,14 +113,6 @@ def gi_mubuf_offset :
GIComplexOperandMatcher<s64, "selectMUBUFOffset">,
GIComplexPatternEquiv<MUBUFOffset>;
-def gi_mubuf_addr64_atomic :
- GIComplexOperandMatcher<s64, "selectMUBUFAddr64Atomic">,
- GIComplexPatternEquiv<MUBUFAddr64Atomic>;
-
-def gi_mubuf_offset_atomic :
- GIComplexOperandMatcher<s64, "selectMUBUFOffsetAtomic">,
- GIComplexPatternEquiv<MUBUFOffsetAtomic>;
-
def gi_smrd_buffer_imm :
GIComplexOperandMatcher<s64, "selectSMRDBufferImm">,
GIComplexPatternEquiv<SMRDBufferImm>;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
index 28a21a1270ff..fdb1cf898be3 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
@@ -188,11 +188,7 @@ class AMDGPUDAGToDAGISel : public SelectionDAGISel {
SDValue &Offset1, unsigned Size) const;
bool SelectMUBUF(SDValue Addr, SDValue &SRsrc, SDValue &VAddr,
SDValue &SOffset, SDValue &Offset, SDValue &Offen,
- SDValue &Idxen, SDValue &Addr64, SDValue &CPol, SDValue &TFE,
- SDValue &SWZ) const;
- bool SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc, SDValue &VAddr,
- SDValue &SOffset, SDValue &Offset, SDValue &CPol,
- SDValue &TFE, SDValue &SWZ) const;
+ SDValue &Idxen, SDValue &Addr64) const;
bool SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc, SDValue &VAddr,
SDValue &SOffset, SDValue &Offset) const;
bool SelectMUBUFScratchOffen(SDNode *Parent,
@@ -202,9 +198,6 @@ class AMDGPUDAGToDAGISel : public SelectionDAGISel {
SDValue Addr, SDValue &SRsrc, SDValue &Soffset,
SDValue &Offset) const;
- bool SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, SDValue &SOffset,
- SDValue &Offset, SDValue &CPol, SDValue &TFE,
- SDValue &SWZ) const;
bool SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, SDValue &Soffset,
SDValue &Offset) const;
@@ -1390,8 +1383,7 @@ bool AMDGPUDAGToDAGISel::SelectDSReadWrite2(SDValue Addr, SDValue &Base,
bool AMDGPUDAGToDAGISel::SelectMUBUF(SDValue Addr, SDValue &Ptr, SDValue &VAddr,
SDValue &SOffset, SDValue &Offset,
SDValue &Offen, SDValue &Idxen,
- SDValue &Addr64, SDValue &CPol,
- SDValue &TFE, SDValue &SWZ) const {
+ SDValue &Addr64) const {
// Subtarget prefers to use flat instruction
// FIXME: This should be a pattern predicate and not reach here
if (Subtarget->useFlatForGlobal())
@@ -1399,11 +1391,6 @@ bool AMDGPUDAGToDAGISel::SelectMUBUF(SDValue Addr, SDValue &Ptr, SDValue &VAddr,
SDLoc DL(Addr);
- if (!CPol)
- CPol = CurDAG->getTargetConstant(0, DL, MVT::i32);
- TFE = CurDAG->getTargetConstant(0, DL, MVT::i1);
- SWZ = CurDAG->getTargetConstant(0, DL, MVT::i1);
-
Idxen = CurDAG->getTargetConstant(0, DL, MVT::i1);
Offen = CurDAG->getTargetConstant(0, DL, MVT::i1);
Addr64 = CurDAG->getTargetConstant(0, DL, MVT::i1);
@@ -1480,8 +1467,7 @@ bool AMDGPUDAGToDAGISel::SelectMUBUF(SDValue Addr, SDValue &Ptr, SDValue &VAddr,
bool AMDGPUDAGToDAGISel::SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc,
SDValue &VAddr, SDValue &SOffset,
- SDValue &Offset, SDValue &CPol,
- SDValue &TFE, SDValue &SWZ) const {
+ SDValue &Offset) const {
SDValue Ptr, Offen, Idxen, Addr64;
// addr64 bit was removed for volcanic islands.
@@ -1489,8 +1475,7 @@ bool AMDGPUDAGToDAGISel::SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc,
if (!Subtarget->hasAddr64())
return false;
- if (!SelectMUBUF(Addr, Ptr, VAddr, SOffset, Offset, Offen, Idxen, Addr64,
- CPol, TFE, SWZ))
+ if (!SelectMUBUF(Addr, Ptr, VAddr, SOffset, Offset, Offen, Idxen, Addr64))
return false;
ConstantSDNode *C = cast<ConstantSDNode>(Addr64);
@@ -1507,14 +1492,6 @@ bool AMDGPUDAGToDAGISel::SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc,
return false;
}
-bool AMDGPUDAGToDAGISel::SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc,
- SDValue &VAddr, SDValue &SOffset,
- SDValue &Offset) const {
- SDValue CPol, TFE, SWZ;
-
- return SelectMUBUFAddr64(Addr, SRsrc, VAddr, SOffset, Offset, CPol, TFE, SWZ);
-}
-
static bool isStackPtrRelative(const MachinePointerInfo &PtrInfo) {
auto PSV = PtrInfo.V.dyn_cast<const PseudoSourceValue *>();
return PSV && PSV->isStack();
@@ -1633,15 +1610,13 @@ bool AMDGPUDAGToDAGISel::SelectMUBUFScratchOffset(SDNode *Parent,
}
bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc,
- SDValue &SOffset, SDValue &Offset,
- SDValue &CPol, SDValue &TFE,
- SDValue &SWZ) const {
+ SDValue &SOffset, SDValue &Offset
+ ) const {
SDValue Ptr, VAddr, Offen, Idxen, Addr64;
const SIInstrInfo *TII =
static_cast<const SIInstrInfo *>(Subtarget->getInstrInfo());
- if (!SelectMUBUF(Addr, Ptr, VAddr, SOffset, Offset, Offen, Idxen, Addr64,
- CPol, TFE, SWZ))
+ if (!SelectMUBUF(Addr, Ptr, VAddr, SOffset, Offset, Offen, Idxen, Addr64))
return false;
if (!cast<ConstantSDNode>(Offen)->getSExtValue() &&
@@ -1660,14 +1635,6 @@ bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc,
return false;
}
-bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc,
- SDValue &Soffset, SDValue &Offset
- ) const {
- SDValue CPol, TFE, SWZ;
-
- return SelectMUBUFOffset(Addr, SRsrc, Soffset, Offset, CPol, TFE, SWZ);
-}
-
// Find a load or store from corresponding pattern root.
// Roots may be build_vector, bitconvert or their combinations.
static MemSDNode* findMemSDNode(SDNode *N) {
diff --git a/llvm/lib/Target/AMDGPU/BUFInstructions.td b/llvm/lib/Target/AMDGPU/BUFInstructions.td
index d367969702e3..6a3e823e4ac3 100644
--- a/llvm/lib/Target/AMDGPU/BUFInstructions.td
+++ b/llvm/lib/Target/AMDGPU/BUFInstructions.td
@@ -6,15 +6,12 @@
//
//===----------------------------------------------------------------------===//
-def MUBUFAddr64 : ComplexPattern<i64, 7, "SelectMUBUFAddr64">;
-def MUBUFAddr64Atomic : ComplexPattern<i64, 4, "SelectMUBUFAddr64">;
+def MUBUFAddr64 : ComplexPattern<i64, 4, "SelectMUBUFAddr64">;
+def MUBUFOffset : ComplexPattern<i64, 3, "SelectMUBUFOffset">;
def MUBUFScratchOffen : ComplexPattern<i64, 4, "SelectMUBUFScratchOffen", [], [SDNPWantParent]>;
def MUBUFScratchOffset : ComplexPattern<i64, 3, "SelectMUBUFScratchOffset", [], [SDNPWantParent], 20>;
-def MUBUFOffset : ComplexPattern<i64, 6, "SelectMUBUFOffset">;
-def MUBUFOffsetAtomic : ComplexPattern<i64, 3, "SelectMUBUFOffset">;
-
def BUFAddrKind {
int Offset = 0;
int OffEn = 1;
@@ -402,19 +399,19 @@ class getMUBUFInsDA<list<RegisterClass> vdataList,
RegisterOperand vdata_op = getLdStRegisterOperand<vdataClass>.ret;
dag InsNoData = !if(!empty(vaddrList),
(ins SReg_128:$srsrc, SCSrc_b32:$soffset,
- offset:$offset, CPol:$cpol),
+ offset:$offset, CPol_0:$cpol),
(ins vaddrClass:$vaddr, SReg_128:$srsrc, SCSrc_b32:$soffset,
- offset:$offset, CPol:$cpol)
+ offset:$offset, CPol_0:$cpol)
);
dag InsData = !if(!empty(vaddrList),
(ins vdata_op:$vdata, SReg_128:$srsrc,
- SCSrc_b32:$soffset, offset:$offset, CPol:$cpol),
+ SCSrc_b32:$soffset, offset:$offset, CPol_0:$cpol),
(ins vdata_op:$vdata, vaddrClass:$vaddr, SReg_128:$srsrc,
- SCSrc_b32:$soffset, offset:$offset, CPol:$cpol)
+ SCSrc_b32:$soffset, offset:$offset, CPol_0:$cpol)
);
dag ret = !con(
!if(!empty(vdataList), InsNoData, InsData),
- !if(isLds, (ins SWZ:$swz), (ins TFE:$tfe, SWZ:$swz))
+ !if(isLds, (ins SWZ_0:$swz), (ins TFE_0:$tfe, SWZ_0:$swz))
);
}
@@ -506,15 +503,15 @@ class MUBUF_Load_Pseudo <string opName,
}
class MUBUF_Offset_Load_Pat <Instruction inst, ValueType load_vt = i32, SDPatternOperator ld = null_frag> : Pat <
- (load_vt (ld (MUBUFOffset v4i32:$srsrc, i32:$soffset, i16:$offset, CPol:$cpol, i1:$tfe, i1:$swz))),
- (load_vt (inst v4i32:$srsrc, i32:$soffset, i16:$offset, CPol:$cpol, i1:$tfe, i1:$swz))
+ (load_vt (ld (MUBUFOffset v4i32:$srsrc, i32:$soffset, i16:$offset))),
+ (load_vt (inst v4i32:$srsrc, i32:$soffset, i16:$offset))
>;
class MUBUF_Addr64_Load_Pat <Instruction inst,
ValueType load_vt = i32,
SDPatternOperator ld = null_frag> : Pat <
- (load_vt (ld (MUBUFAddr64 v4i32:$srsrc, i64:$vaddr, i32:$soffset, i16:$offset, CPol:$cpol, i1:$tfe, i1:$swz))),
- (load_vt (inst i64:$vaddr, v4i32:$srsrc, i32:$soffset, i16:$offset, CPol:$cpol, i1:$tfe, i1:$swz))
+ (load_vt (ld (MUBUFAddr64 v4i32:$srsrc, i64:$vaddr, i32:$soffset, i16:$offset))),
+ (load_vt (inst i64:$vaddr, v4i32:$srsrc, i32:$soffset, i16:$offset))
>;
multiclass MUBUF_Pseudo_Load_Pats<string BaseInst, ValueType load_vt = i32, SDPatternOperator ld = null_frag> {
@@ -585,12 +582,12 @@ multiclass MUBUF_Pseudo_Stores<string opName,
def _OFFSET : MUBUF_Store_Pseudo <opName, BUFAddrKind.Offset, legal_store_vt,
[(st legal_store_vt:$vdata, (MUBUFOffset v4i32:$srsrc, i32:$soffset,
- i16:$offset, CPol:$cpol, i1:$tfe, i1:$swz))]>,
+ i16:$offset))]>,
MUBUFAddr64Table<0, NAME>;
def _ADDR64 : MUBUF_Store_Pseudo <opName, BUFAddrKind.Addr64, legal_store_vt,
[(st legal_store_vt:$vdata, (MUBUFAddr64 v4i32:$srsrc, i64:$vaddr, i32:$soffset,
- i16:$offset, CPol:$cpol, i1:$tfe, i1:$swz))]>,
+ i16:$offset))]>,
MUBUFAddr64Table<1, NAME>;
def _OFFEN : MUBUF_Store_Pseudo <opName, BUFAddrKind.OffEn, legal_store_vt>;
@@ -757,14 +754,14 @@ multiclass MUBUF_Pseudo_Atomics_RTN <string opName,
let FPAtomic = isFP in
def _OFFSET_RTN : MUBUF_AtomicRet_Pseudo <opName, BUFAddrKind.Offset, vdataClass,
[(set vdataType:$vdata,
- (atomic (MUBUFOffsetAtomic v4i32:$srsrc, i32:$soffset, i16:$offset),
+ (atomic (MUBUFOffset v4i32:$srsrc, i32:$soffset, i16:$offset),
vdataType:$vdata_in))]>,
MUBUFAddr64Table <0, NAME # "_RTN">;
let FPAtomic = isFP in
def _ADDR64_RTN : MUBUF_AtomicRet_Pseudo <opName, BUFAddrKind.Addr64, vdataClass,
[(set vdataType:$vdata,
- (atomic (MUBUFAddr64Atomic v4i32:$srsrc, i64:$vaddr, i32:$soffset, i16:$offset),
+ (atomic (MUBUFAddr64 v4i32:$srsrc, i64:$vaddr, i32:$soffset, i16:$offset),
vdataType:$vdata_in))]>,
MUBUFAddr64Table <1, NAME # "_RTN">;
@@ -1539,20 +1536,20 @@ def : GCNPat<
class MUBUFLoad_PatternADDR64 <MUBUF_Pseudo Instr_ADDR64, ValueType vt,
PatFrag constant_ld> : GCNPat <
(vt (constant_ld (MUBUFAddr64 v4i32:$srsrc, i64:$vaddr, i32:$soffset,
- i16:$offset, CPol:$cpol, i1:$tfe, i1:$swz))),
- (Instr_ADDR64 $vaddr, $srsrc, $soffset, $offset, CPol:$cpol, $tfe, $swz)
+ i16:$offset))),
+ (Instr_ADDR64 $vaddr, $srsrc, $soffset, $offset)
>;
multiclass MUBUFLoad_Atomic_Pattern <MUBUF_Pseudo Instr_ADDR64, MUBUF_Pseudo Instr_OFFSET,
ValueType vt, PatFrag atomic_ld> {
def : GCNPat <
(vt (atomic_ld (MUBUFAddr64 v4i32:$srsrc, i64:$vaddr, i32:$soffset, i16:$offset))),
- (Instr_ADDR64 $vaddr, $srsrc, $soffset, $offset, 0, 0, 0)
+ (Instr_ADDR64 $vaddr, $srsrc, $soffset, $offset)
>;
def : GCNPat <
- (vt (atomic_ld (MUBUFOffsetAtomic v4i32:$rsrc, i32:$soffset, i16:$offset))),
- (Instr_OFFSET $rsrc, $soffset, (as_i16imm $offset), 0, 0, 0)
+ (vt (atomic_ld (MUBUFOffset v4i32:$rsrc, i32:$soffset, i16:$offset))),
+ (Instr_OFFSET $rsrc, $soffset, (as_i16imm $offset))
>;
}
@@ -1572,9 +1569,8 @@ multiclass MUBUFLoad_Pattern <MUBUF_Pseudo Instr_OFFSET, ValueType vt,
PatFrag ld> {
def : GCNPat <
- (vt (ld (MUBUFOffset v4i32:$srsrc, i32:$soffset,
- i16:$offset, CPol:$cpol, i1:$tfe, i1:$swz))),
- (Instr_OFFSET $srsrc, $soffset, $offset, CPol:$cpol, $tfe, $swz)
+ (vt (ld (MUBUFOffset v4i32:$srsrc, i32:$soffset, i16:$offset))),
+ (Instr_OFFSET $srsrc, $soffset, $offset)
>;
}
@@ -1612,12 +1608,12 @@ multiclass MUBUFScratchLoadPat_D16 <MUBUF_Pseudo InstrOffen,
ValueType vt, PatFrag ld_frag> {
def : GCNPat <
(ld_frag (MUBUFScratchOffen v4i32:$srsrc, i32:$vaddr, i32:$soffset, u16imm:$offset), vt:$in),
- (InstrOffen $vaddr, $srsrc, $soffset, $offset, 0, 0, 0, $in)
+ (InstrOffen $vaddr, $srsrc, $soffset, $offset, $in)
>;
def : GCNPat <
(ld_frag (MUBUFScratchOffset v4i32:$srsrc, i32:$soffset, u16imm:$offset), vt:$in),
- (InstrOffset $srsrc, $soffset, $offset, 0, 0, 0, $in)
+ (InstrOffset $srsrc, $soffset, $offset, $in)
>;
}
@@ -1663,12 +1659,12 @@ multiclass MUBUFStore_Atomic_Pattern <MUBUF_Pseudo Instr_ADDR64, MUBUF_Pseudo In
// Store follows atomic op convention so address is first
def : GCNPat <
(atomic_st (MUBUFAddr64 v4i32:$srsrc, i64:$vaddr, i32:$soffset, i16:$offset), vt:$val),
- (Instr_ADDR64 $val, $vaddr, $srsrc, $soffset, $offset, 0, 0, 0)
+ (Instr_ADDR64 $val, $vaddr, $srsrc, $soffset, $offset)
>;
def : GCNPat <
- (atomic_st (MUBUFOffsetAtomic v4i32:$rsrc, i32:$soffset, i16:$offset), vt:$val),
- (Instr_OFFSET $val, $rsrc, $soffset, (as_i16imm $offset), 0, 0, 0)
+ (atomic_st (MUBUFOffset v4i32:$rsrc, i32:$soffset, i16:$offset), vt:$val),
+ (Instr_OFFSET $val, $rsrc, $soffset, (as_i16imm $offset))
>;
}
let SubtargetPredicate = isGFX6GFX7 in {
@@ -1681,9 +1677,8 @@ multiclass MUBUFStore_Pattern <MUBUF_Pseudo Instr_OFFSET, ValueType vt,
PatFrag st> {
def : GCNPat <
- (st vt:$vdata, (MUBUFOffset v4i32:$srsrc, i32:$soffset,
- i16:$offset, CPol:$cpol, i1:$tfe, i1:$swz)),
- (Instr_OFFSET $vdata, $srsrc, $soffset, $offset, CPol:$cpol, $tfe, $swz)
+ (st vt:$vdata, (MUBUFOffset v4i32:$srsrc, i32:$soffset, i16:$offset)),
+ (Instr_OFFSET $vdata, $srsrc, $soffset, $offset)
>;
}
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.td b/llvm/lib/Target/AMDGPU/SIInstrInfo.td
index af51434514df..19ccb1e28088 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.td
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.td
@@ -1116,7 +1116,9 @@ def CPol_0 : NamedOperandU32Default0<"CPol", NamedMatchClass<"CPol">>;
def CPol_GLC1 : NamedOperandU32Default1<"CPol", NamedMatchClass<"CPol">>;
def TFE : NamedOperandBit<"TFE", NamedMatchClass<"TFE">>;
+def TFE_0 : NamedOperandBit_0<"TFE", NamedMatchClass<"TFE">>;
def SWZ : NamedOperandBit<"SWZ", NamedMatchClass<"SWZ">>;
+def SWZ_0 : NamedOperandBit_0<"SWZ", NamedMatchClass<"SWZ">>;
def UNorm : NamedOperandBit<"UNorm", NamedMatchClass<"UNorm">>;
def DA : NamedOperandBit<"DA", NamedMatchClass<"DA">>;
def R128A16 : NamedOperandBit<"R128A16", NamedMatchClass<"R128A16">>;
diff --git a/llvm/test/CodeGen/AMDGPU/extract_vector_elt-i16.ll b/llvm/test/CodeGen/AMDGPU/extract_vector_elt-i16.ll
index b938ff52f21a..729a05c12c74 100644
--- a/llvm/test/CodeGen/AMDGPU/extract_vector_elt-i16.ll
+++ b/llvm/test/CodeGen/AMDGPU/extract_vector_elt-i16.ll
@@ -1,14 +1,17 @@
-; RUN: llc -march=amdgcn -mtriple=amdgcn-- -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,SI %s
-; RUN: llc -march=amdgcn -mtriple=amdgcn-- -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,VI,GFX89 %s
+; RUN: llc -march=amdgcn -mtriple=amdgcn-- -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,SI,SIVI %s
+; RUN: llc -march=amdgcn -mtriple=amdgcn-- -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,VI,GFX89,SIVI %s
; RUN: llc -march=amdgcn -mtriple=amdgcn-- -mcpu=gfx900 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX9,GFX89 %s
; GCN-LABEL: {{^}}extract_vector_elt_v2i16:
; GCN: s_load_dword [[VEC:s[0-9]+]]
-; GCN: s_lshr_b32 [[ELT1:s[0-9]+]], [[VEC]], 16
-; GCN-DAG: v_mov_b32_e32 [[VELT0:v[0-9]+]], [[VEC]]
-; GCN-DAG: v_mov_b32_e32 [[VELT1:v[0-9]+]], [[ELT1]]
-; GCN-DAG: buffer_store_short [[VELT0]]
-; GCN-DAG: buffer_store_short [[VELT1]]
+; SIVI: s_lshr_b32 [[ELT1:s[0-9]+]], [[VEC]], 16
+; SIVI-DAG: v_mov_b32_e32 [[VELT0:v[0-9]+]], [[VEC]]
+; SIVI-DAG: v_mov_b32_e32 [[VELT1:v[0-9]+]], [[ELT1]]
+; SIVI-DAG: buffer_store_short [[VELT0]]
+; SIVI-DAG: buffer_store_short [[VELT1]]
+; GFX9: v_mov_b32_e32 [[VVEC:v[0-9]+]], [[VEC]]
+; GFX9: global_store_short_d16_hi v{{[0-9]+}}, [[VVEC]],
+; GFX9: buffer_store_short [[VVEC]],
define amdgpu_kernel void @extract_vector_elt_v2i16(i16 addrspace(1)* %out, <2 x i16> addrspace(4)* %vec.ptr) #0 {
%vec = load <2 x i16>, <2 x i16> addrspace(4)* %vec.ptr
%p0 = extractelement <2 x i16> %vec, i32 0
More information about the llvm-commits
mailing list