[llvm] r282381 - [X86][avx512] Fix bug in masked compress store.
Ayman Musa via llvm-commits
llvm-commits at lists.llvm.org
Sun Sep 25 23:22:08 PDT 2016
Author: aymanmus
Date: Mon Sep 26 01:22:08 2016
New Revision: 282381
URL: http://llvm.org/viewvc/llvm-project?rev=282381&view=rev
Log:
[X86][avx512] Fix bug in masked compress store.
Differential Revision: https://reviews.llvm.org/D23984
Modified:
llvm/trunk/include/llvm/CodeGen/SelectionDAG.h
llvm/trunk/include/llvm/CodeGen/SelectionDAGNodes.h
llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
llvm/trunk/lib/Target/X86/X86InstrAVX512.td
llvm/trunk/lib/Target/X86/X86InstrFragmentsSIMD.td
llvm/trunk/test/CodeGen/X86/avx512vl-intrinsics.ll
Modified: llvm/trunk/include/llvm/CodeGen/SelectionDAG.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/CodeGen/SelectionDAG.h?rev=282381&r1=282380&r2=282381&view=diff
==============================================================================
--- llvm/trunk/include/llvm/CodeGen/SelectionDAG.h (original)
+++ llvm/trunk/include/llvm/CodeGen/SelectionDAG.h Mon Sep 26 01:22:08 2016
@@ -968,7 +968,8 @@ public:
MachineMemOperand *MMO, ISD::LoadExtType);
SDValue getMaskedStore(SDValue Chain, const SDLoc &dl, SDValue Val,
SDValue Ptr, SDValue Mask, EVT MemVT,
- MachineMemOperand *MMO, bool IsTrunc);
+ MachineMemOperand *MMO, bool IsTrunc,
+ bool isCompressing = false);
SDValue getMaskedGather(SDVTList VTs, EVT VT, const SDLoc &dl,
ArrayRef<SDValue> Ops, MachineMemOperand *MMO);
SDValue getMaskedScatter(SDVTList VTs, EVT VT, const SDLoc &dl,
Modified: llvm/trunk/include/llvm/CodeGen/SelectionDAGNodes.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/CodeGen/SelectionDAGNodes.h?rev=282381&r1=282380&r2=282381&view=diff
==============================================================================
--- llvm/trunk/include/llvm/CodeGen/SelectionDAGNodes.h (original)
+++ llvm/trunk/include/llvm/CodeGen/SelectionDAGNodes.h Mon Sep 26 01:22:08 2016
@@ -453,6 +453,7 @@ protected:
uint16_t : NumLSBaseSDNodeBits;
uint16_t IsTruncating : 1;
+ uint16_t IsCompressing : 1;
};
union {
@@ -1959,15 +1960,23 @@ class MaskedStoreSDNode : public MaskedL
public:
friend class SelectionDAG;
MaskedStoreSDNode(unsigned Order, const DebugLoc &dl, SDVTList VTs,
- bool isTrunc, EVT MemVT, MachineMemOperand *MMO)
+ bool isTrunc, bool isCompressing, EVT MemVT,
+ MachineMemOperand *MMO)
: MaskedLoadStoreSDNode(ISD::MSTORE, Order, dl, VTs, MemVT, MMO) {
StoreSDNodeBits.IsTruncating = isTrunc;
+ StoreSDNodeBits.IsCompressing = isCompressing;
}
/// Return true if the op does a truncation before store.
/// For integers this is the same as doing a TRUNCATE and storing the result.
/// For floats, it is the same as doing an FP_ROUND and storing the result.
bool isTruncatingStore() const { return StoreSDNodeBits.IsTruncating; }
+ /// Returns true if the op does a compression to the vector before storing.
+ /// The node contiguously stores the active elements (integers or floats)
+ /// in src (those with their respective bit set in writemask k) to unaligned
+ /// memory at base_addr.
+ bool isCompressingStore() const { return StoreSDNodeBits.IsCompressing; }
+
const SDValue &getValue() const { return getOperand(3); }
static bool classof(const SDNode *N) {
Modified: llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAG.cpp?rev=282381&r1=282380&r2=282381&view=diff
==============================================================================
--- llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAG.cpp (original)
+++ llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAG.cpp Mon Sep 26 01:22:08 2016
@@ -5340,7 +5340,7 @@ SDValue SelectionDAG::getMaskedLoad(EVT
SDValue SelectionDAG::getMaskedStore(SDValue Chain, const SDLoc &dl,
SDValue Val, SDValue Ptr, SDValue Mask,
EVT MemVT, MachineMemOperand *MMO,
- bool isTrunc) {
+ bool isTrunc, bool isCompress) {
assert(Chain.getValueType() == MVT::Other &&
"Invalid chain type");
EVT VT = Val.getValueType();
@@ -5350,7 +5350,7 @@ SDValue SelectionDAG::getMaskedStore(SDV
AddNodeIDNode(ID, ISD::MSTORE, VTs, Ops);
ID.AddInteger(VT.getRawBits());
ID.AddInteger(getSyntheticNodeSubclassData<MaskedStoreSDNode>(
- dl.getIROrder(), VTs, isTrunc, MemVT, MMO));
+ dl.getIROrder(), VTs, isTrunc, isCompress, MemVT, MMO));
ID.AddInteger(MMO->getPointerInfo().getAddrSpace());
void *IP = nullptr;
if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP)) {
@@ -5358,7 +5358,7 @@ SDValue SelectionDAG::getMaskedStore(SDV
return SDValue(E, 0);
}
auto *N = newSDNode<MaskedStoreSDNode>(dl.getIROrder(), dl.getDebugLoc(), VTs,
- isTrunc, MemVT, MMO);
+ isTrunc, isCompress, MemVT, MMO);
createOperands(N, Ops);
CSEMap.InsertNode(N, IP);
Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=282381&r1=282380&r2=282381&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Mon Sep 26 01:22:08 2016
@@ -18818,11 +18818,11 @@ static SDValue LowerINTRINSIC_W_CHAIN(SD
return DAG.getStore(Chain, dl, DataToCompress, Addr,
MemIntr->getMemOperand());
- SDValue Compressed =
- getVectorMaskingNode(DAG.getNode(IntrData->Opc0, dl, VT, DataToCompress),
- Mask, DAG.getUNDEF(VT), Subtarget, DAG);
- return DAG.getStore(Chain, dl, Compressed, Addr,
- MemIntr->getMemOperand());
+ MVT MaskVT = MVT::getVectorVT(MVT::i1, VT.getVectorNumElements());
+ SDValue VMask = getMaskNode(Mask, MaskVT, Subtarget, DAG, dl);
+
+ return DAG.getMaskedStore(Chain, dl, DataToCompress, Addr, VMask, VT,
+ MemIntr->getMemOperand(), false, true);
}
case TRUNCATE_TO_MEM_VI8:
case TRUNCATE_TO_MEM_VI16:
Modified: llvm/trunk/lib/Target/X86/X86InstrAVX512.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrAVX512.td?rev=282381&r1=282380&r2=282381&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrAVX512.td (original)
+++ llvm/trunk/lib/Target/X86/X86InstrAVX512.td Mon Sep 26 01:22:08 2016
@@ -7452,7 +7452,7 @@ defm VPMOVQ2M : avx512_convert_vector_to
// AVX-512 - COMPRESS and EXPAND
//
-multiclass compress_by_vec_width<bits<8> opc, X86VectorVTInfo _,
+multiclass compress_by_vec_width_common<bits<8> opc, X86VectorVTInfo _,
string OpcodeStr> {
defm rr : AVX512_maskable<opc, MRMDestReg, _, (outs _.RC:$dst),
(ins _.RC:$src1), OpcodeStr, "$src1", "$src1",
@@ -7467,19 +7467,28 @@ multiclass compress_by_vec_width<bits<8>
def mrk : AVX5128I<opc, MRMDestMem, (outs),
(ins _.MemOp:$dst, _.KRCWM:$mask, _.RC:$src),
OpcodeStr # "\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
- [(store (_.VT (vselect _.KRCWM:$mask,
- (_.VT (X86compress _.RC:$src)), _.ImmAllZerosV)),
- addr:$dst)]>,
+ []>,
EVEX_K, EVEX_CD8<_.EltSize, CD8VT1>;
}
+multiclass compress_by_vec_width_lowering<X86VectorVTInfo _ > {
+
+ def : Pat<(X86mCompressingStore addr:$dst, _.KRCWM:$mask,
+ (_.VT _.RC:$src)),
+ (!cast<Instruction>(NAME#_.ZSuffix##mrk)
+ addr:$dst, _.KRCWM:$mask, _.RC:$src)>;
+}
+
multiclass compress_by_elt_width<bits<8> opc, string OpcodeStr,
AVX512VLVectorVTInfo VTInfo> {
- defm Z : compress_by_vec_width<opc, VTInfo.info512, OpcodeStr>, EVEX_V512;
+ defm Z : compress_by_vec_width_common<opc, VTInfo.info512, OpcodeStr>,
+ compress_by_vec_width_lowering<VTInfo.info512>, EVEX_V512;
let Predicates = [HasVLX] in {
- defm Z256 : compress_by_vec_width<opc, VTInfo.info256, OpcodeStr>, EVEX_V256;
- defm Z128 : compress_by_vec_width<opc, VTInfo.info128, OpcodeStr>, EVEX_V128;
+ defm Z256 : compress_by_vec_width_common<opc, VTInfo.info256, OpcodeStr>,
+ compress_by_vec_width_lowering<VTInfo.info256>, EVEX_V256;
+ defm Z128 : compress_by_vec_width_common<opc, VTInfo.info128, OpcodeStr>,
+ compress_by_vec_width_lowering<VTInfo.info128>, EVEX_V128;
}
}
Modified: llvm/trunk/lib/Target/X86/X86InstrFragmentsSIMD.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrFragmentsSIMD.td?rev=282381&r1=282380&r2=282381&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrFragmentsSIMD.td (original)
+++ llvm/trunk/lib/Target/X86/X86InstrFragmentsSIMD.td Mon Sep 26 01:22:08 2016
@@ -959,7 +959,8 @@ def masked_load_unaligned : PatFrag<(ops
// do not support vector types (llvm-tblgen will fail).
def X86mstore : PatFrag<(ops node:$src1, node:$src2, node:$src3),
(masked_store node:$src1, node:$src2, node:$src3), [{
- return !cast<MaskedStoreSDNode>(N)->isTruncatingStore();
+ return (!cast<MaskedStoreSDNode>(N)->isTruncatingStore()) &&
+ (!cast<MaskedStoreSDNode>(N)->isCompressingStore());
}]>;
def masked_store_aligned128 : PatFrag<(ops node:$src1, node:$src2, node:$src3),
@@ -988,6 +989,11 @@ def masked_store_unaligned : PatFrag<(op
return isa<MaskedStoreSDNode>(N);
}]>;
+def X86mCompressingStore : PatFrag<(ops node:$src1, node:$src2, node:$src3),
+ (masked_store node:$src1, node:$src2, node:$src3), [{
+ return cast<MaskedStoreSDNode>(N)->isCompressingStore();
+}]>;
+
// masked truncstore fragments
// X86mtruncstore can't be implemented in core DAG files because some targets
// doesn't support vector type ( llvm-tblgen will fail)
Modified: llvm/trunk/test/CodeGen/X86/avx512vl-intrinsics.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512vl-intrinsics.ll?rev=282381&r1=282380&r2=282381&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512vl-intrinsics.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512vl-intrinsics.ll Mon Sep 26 01:22:08 2016
@@ -893,6 +893,37 @@ define <4 x i32> @compr10(<4 x i32> %dat
ret <4 x i32> %res
}
+
+ at xmm = common global <4 x i32> zeroinitializer, align 16
+ at k8 = common global i8 0, align 1
+
+define i32 @compr11() {
+; CHECK-LABEL: compr11:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: movq _xmm@{{.*}}(%rip), %rax ## encoding: [0x48,0x8b,0x05,A,A,A,A]
+; CHECK-NEXT: ## fixup A - offset: 3, value: _xmm at GOTPCREL-4, kind: reloc_riprel_4byte_movq_load
+; CHECK-NEXT: vmovdqa32 (%rax), %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0x6f,0x00]
+; CHECK-NEXT: movq _k8@{{.*}}(%rip), %rax ## encoding: [0x48,0x8b,0x05,A,A,A,A]
+; CHECK-NEXT: ## fixup A - offset: 3, value: _k8 at GOTPCREL-4, kind: reloc_riprel_4byte_movq_load
+; CHECK-NEXT: movzbl (%rax), %eax ## encoding: [0x0f,0xb6,0x00]
+; CHECK-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
+; CHECK-NEXT: vpcompressd %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0x89,0x8b,0xc0]
+; CHECK-NEXT: vpxord %xmm1, %xmm1, %xmm1 ## encoding: [0x62,0xf1,0x75,0x08,0xef,0xc9]
+; CHECK-NEXT: vmovdqa32 %xmm0, -{{[0-9]+}}(%rsp) ## encoding: [0x62,0xf1,0x7d,0x08,0x7f,0x84,0x24,0xd8,0xff,0xff,0xff]
+; CHECK-NEXT: vmovdqa32 %xmm1, -{{[0-9]+}}(%rsp) ## encoding: [0x62,0xf1,0x7d,0x08,0x7f,0x8c,0x24,0xe8,0xff,0xff,0xff]
+; CHECK-NEXT: xorl %eax, %eax ## encoding: [0x31,0xc0]
+; CHECK-NEXT: retq ## encoding: [0xc3]
+entry:
+ %.compoundliteral = alloca <2 x i64>, align 16
+ %res = alloca <4 x i32>, align 16
+ %a0 = load <4 x i32>, <4 x i32>* @xmm, align 16
+ %a2 = load i8, i8* @k8, align 1
+ %a21 = call <4 x i32> @llvm.x86.avx512.mask.compress.d.128(<4 x i32> %a0, <4 x i32> zeroinitializer, i8 %a2) #2
+ store volatile <4 x i32> %a21, <4 x i32>* %res, align 16
+ store <2 x i64> zeroinitializer, <2 x i64>* %.compoundliteral, align 16
+ ret i32 0
+}
+
declare <4 x i32> @llvm.x86.avx512.mask.compress.d.128(<4 x i32> %data, <4 x i32> %src0, i8 %mask)
; Expand
@@ -5219,9 +5250,9 @@ define <8 x i32>@test_int_x86_avx512_mas
; CHECK: ## BB#0:
; CHECK-NEXT: vmovdqa32 {{.*#+}} ymm0 = [2,9,4294967284,23,4294967270,37,4294967256,51]
; CHECK-NEXT: ## encoding: [0x62,0xf1,0x7d,0x28,0x6f,0x05,A,A,A,A]
-; CHECK-NEXT: ## fixup A - offset: 6, value: LCPI308_0-4, kind: reloc_riprel_4byte
+; CHECK-NEXT: ## fixup A - offset: 6, value: LCPI309_0-4, kind: reloc_riprel_4byte
; CHECK-NEXT: vpsravd {{.*}}(%rip), %ymm0, %ymm0 ## encoding: [0x62,0xf2,0x7d,0x28,0x46,0x05,A,A,A,A]
-; CHECK-NEXT: ## fixup A - offset: 6, value: LCPI308_1-4, kind: reloc_riprel_4byte
+; CHECK-NEXT: ## fixup A - offset: 6, value: LCPI309_1-4, kind: reloc_riprel_4byte
; CHECK-NEXT: retq ## encoding: [0xc3]
%res = call <8 x i32> @llvm.x86.avx512.mask.psrav8.si(<8 x i32> <i32 2, i32 9, i32 -12, i32 23, i32 -26, i32 37, i32 -40, i32 51>, <8 x i32> <i32 1, i32 18, i32 35, i32 52, i32 69, i32 15, i32 32, i32 49>, <8 x i32> zeroinitializer, i8 -1)
ret <8 x i32> %res
@@ -5252,9 +5283,9 @@ define <2 x i64>@test_int_x86_avx512_mas
; CHECK: ## BB#0:
; CHECK-NEXT: vmovdqa64 {{.*#+}} xmm0 = [2,18446744073709551607]
; CHECK-NEXT: ## encoding: [0x62,0xf1,0xfd,0x08,0x6f,0x05,A,A,A,A]
-; CHECK-NEXT: ## fixup A - offset: 6, value: LCPI310_0-4, kind: reloc_riprel_4byte
+; CHECK-NEXT: ## fixup A - offset: 6, value: LCPI311_0-4, kind: reloc_riprel_4byte
; CHECK-NEXT: vpsravq {{.*}}(%rip), %xmm0, %xmm0 ## encoding: [0x62,0xf2,0xfd,0x08,0x46,0x05,A,A,A,A]
-; CHECK-NEXT: ## fixup A - offset: 6, value: LCPI310_1-4, kind: reloc_riprel_4byte
+; CHECK-NEXT: ## fixup A - offset: 6, value: LCPI311_1-4, kind: reloc_riprel_4byte
; CHECK-NEXT: retq ## encoding: [0xc3]
%res = call <2 x i64> @llvm.x86.avx512.mask.psrav.q.128(<2 x i64> <i64 2, i64 -9>, <2 x i64> <i64 1, i64 90>, <2 x i64> zeroinitializer, i8 -1)
ret <2 x i64> %res
More information about the llvm-commits
mailing list