[llvm] r321953 - [X86] Add 128 and 256-bit VPOPCNTD/Q instructions to load folding tables.

Craig Topper via llvm-commits llvm-commits at lists.llvm.org
Sat Jan 6 22:24:27 PST 2018


Author: ctopper
Date: Sat Jan  6 22:24:27 2018
New Revision: 321953

URL: http://llvm.org/viewvc/llvm-project?rev=321953&view=rev
Log:
[X86] Add 128 and 256-bit VPOPCNTD/Q instructions to load folding tables.

Modified:
    llvm/trunk/lib/Target/X86/X86InstrInfo.cpp
    llvm/trunk/test/CodeGen/X86/stack-folding-int-avx512.ll
    llvm/trunk/test/CodeGen/X86/stack-folding-int-avx512vl.ll

Modified: llvm/trunk/lib/Target/X86/X86InstrInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrInfo.cpp?rev=321953&r1=321952&r2=321953&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrInfo.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86InstrInfo.cpp Sat Jan  6 22:24:27 2018
@@ -976,8 +976,10 @@ X86InstrInfo::X86InstrInfo(X86Subtarget
     { X86::VPMOVZXDQZrr,     X86::VPMOVZXDQZrm,       0 },
     { X86::VPMOVZXWDZrr,     X86::VPMOVZXWDZrm,       0 },
     { X86::VPMOVZXWQZrr,     X86::VPMOVZXWQZrm,       0 },
+    { X86::VPOPCNTBZrr,      X86::VPOPCNTBZrm,        0 },
     { X86::VPOPCNTDZrr,      X86::VPOPCNTDZrm,        0 },
     { X86::VPOPCNTQZrr,      X86::VPOPCNTQZrm,        0 },
+    { X86::VPOPCNTWZrr,      X86::VPOPCNTWZrm,        0 },
     { X86::VPSHUFDZri,       X86::VPSHUFDZmi,         0 },
     { X86::VPSHUFHWZri,      X86::VPSHUFHWZmi,        0 },
     { X86::VPSHUFLWZri,      X86::VPSHUFLWZmi,        0 },
@@ -1033,6 +1035,10 @@ X86InstrInfo::X86InstrInfo(X86Subtarget
     { X86::VPMOVZXDQZ256rr,      X86::VPMOVZXDQZ256rm,      0 },
     { X86::VPMOVZXWDZ256rr,      X86::VPMOVZXWDZ256rm,      0 },
     { X86::VPMOVZXWQZ256rr,      X86::VPMOVZXWQZ256rm,      TB_NO_REVERSE },
+    { X86::VPOPCNTBZ256rr,       X86::VPOPCNTBZ256rm,       0 },
+    { X86::VPOPCNTDZ256rr,       X86::VPOPCNTDZ256rm,       0 },
+    { X86::VPOPCNTQZ256rr,       X86::VPOPCNTQZ256rm,       0 },
+    { X86::VPOPCNTWZ256rr,       X86::VPOPCNTWZ256rm,       0 },
     { X86::VPSHUFDZ256ri,        X86::VPSHUFDZ256mi,        0 },
     { X86::VPSHUFHWZ256ri,       X86::VPSHUFHWZ256mi,       0 },
     { X86::VPSHUFLWZ256ri,       X86::VPSHUFLWZ256mi,       0 },
@@ -1085,6 +1091,10 @@ X86InstrInfo::X86InstrInfo(X86Subtarget
     { X86::VPMOVZXDQZ128rr,      X86::VPMOVZXDQZ128rm,      TB_NO_REVERSE },
     { X86::VPMOVZXWDZ128rr,      X86::VPMOVZXWDZ128rm,      TB_NO_REVERSE },
     { X86::VPMOVZXWQZ128rr,      X86::VPMOVZXWQZ128rm,      TB_NO_REVERSE },
+    { X86::VPOPCNTBZ128rr,       X86::VPOPCNTBZ128rm,       0 },
+    { X86::VPOPCNTDZ128rr,       X86::VPOPCNTDZ128rm,       0 },
+    { X86::VPOPCNTQZ128rr,       X86::VPOPCNTQZ128rm,       0 },
+    { X86::VPOPCNTWZ128rr,       X86::VPOPCNTWZ128rm,       0 },
     { X86::VPSHUFDZ128ri,        X86::VPSHUFDZ128mi,        0 },
     { X86::VPSHUFHWZ128ri,       X86::VPSHUFHWZ128mi,       0 },
     { X86::VPSHUFLWZ128ri,       X86::VPSHUFLWZ128mi,       0 },
@@ -2415,8 +2425,10 @@ X86InstrInfo::X86InstrInfo(X86Subtarget
     { X86::VPMOVZXDQZrrkz,    X86::VPMOVZXDQZrmkz,      0 },
     { X86::VPMOVZXWDZrrkz,    X86::VPMOVZXWDZrmkz,      0 },
     { X86::VPMOVZXWQZrrkz,    X86::VPMOVZXWQZrmkz,      0 },
+    { X86::VPOPCNTBZrrkz,     X86::VPOPCNTBZrmkz,       0 },
     { X86::VPOPCNTDZrrkz,     X86::VPOPCNTDZrmkz,       0 },
     { X86::VPOPCNTQZrrkz,     X86::VPOPCNTQZrmkz,       0 },
+    { X86::VPOPCNTWZrrkz,     X86::VPOPCNTWZrmkz,       0 },
     { X86::VPSHUFDZrikz,      X86::VPSHUFDZmikz,        0 },
     { X86::VPSHUFHWZrikz,     X86::VPSHUFHWZmikz,       0 },
     { X86::VPSHUFLWZrikz,     X86::VPSHUFLWZmikz,       0 },
@@ -2457,6 +2469,10 @@ X86InstrInfo::X86InstrInfo(X86Subtarget
     { X86::VPMOVZXDQZ256rrkz, X86::VPMOVZXDQZ256rmkz,   0 },
     { X86::VPMOVZXWDZ256rrkz, X86::VPMOVZXWDZ256rmkz,   0 },
     { X86::VPMOVZXWQZ256rrkz, X86::VPMOVZXWQZ256rmkz,   TB_NO_REVERSE },
+    { X86::VPOPCNTBZ256rrkz,  X86::VPOPCNTBZ256rmkz,    0 },
+    { X86::VPOPCNTDZ256rrkz,  X86::VPOPCNTDZ256rmkz,    0 },
+    { X86::VPOPCNTQZ256rrkz,  X86::VPOPCNTQZ256rmkz,    0 },
+    { X86::VPOPCNTWZ256rrkz,  X86::VPOPCNTWZ256rmkz,    0 },
     { X86::VPSHUFDZ256rikz,   X86::VPSHUFDZ256mikz,     0 },
     { X86::VPSHUFHWZ256rikz,  X86::VPSHUFHWZ256mikz,    0 },
     { X86::VPSHUFLWZ256rikz,  X86::VPSHUFLWZ256mikz,    0 },
@@ -2494,6 +2510,10 @@ X86InstrInfo::X86InstrInfo(X86Subtarget
     { X86::VPMOVZXDQZ128rrkz, X86::VPMOVZXDQZ128rmkz,   TB_NO_REVERSE },
     { X86::VPMOVZXWDZ128rrkz, X86::VPMOVZXWDZ128rmkz,   TB_NO_REVERSE },
     { X86::VPMOVZXWQZ128rrkz, X86::VPMOVZXWQZ128rmkz,   TB_NO_REVERSE },
+    { X86::VPOPCNTBZ128rrkz,  X86::VPOPCNTBZ128rmkz,    0 },
+    { X86::VPOPCNTDZ128rrkz,  X86::VPOPCNTDZ128rmkz,    0 },
+    { X86::VPOPCNTQZ128rrkz,  X86::VPOPCNTQZ128rmkz,    0 },
+    { X86::VPOPCNTWZ128rrkz,  X86::VPOPCNTWZ128rmkz,    0 },
     { X86::VPSHUFDZ128rikz,   X86::VPSHUFDZ128mikz,     0 },
     { X86::VPSHUFHWZ128rikz,  X86::VPSHUFHWZ128mikz,    0 },
     { X86::VPSHUFLWZ128rikz,  X86::VPSHUFLWZ128mikz,    0 },
@@ -3056,8 +3076,10 @@ X86InstrInfo::X86InstrInfo(X86Subtarget
     { X86::VPMOVZXDQZrrk,         X86::VPMOVZXDQZrmk,         0 },
     { X86::VPMOVZXWDZrrk,         X86::VPMOVZXWDZrmk,         0 },
     { X86::VPMOVZXWQZrrk,         X86::VPMOVZXWQZrmk,         0 },
+    { X86::VPOPCNTBZrrk,          X86::VPOPCNTBZrmk,          0 },
     { X86::VPOPCNTDZrrk,          X86::VPOPCNTDZrmk,          0 },
     { X86::VPOPCNTQZrrk,          X86::VPOPCNTQZrmk,          0 },
+    { X86::VPOPCNTWZrrk,          X86::VPOPCNTWZrmk,          0 },
     { X86::VPSHUFDZrik,           X86::VPSHUFDZmik,           0 },
     { X86::VPSHUFHWZrik,          X86::VPSHUFHWZmik,          0 },
     { X86::VPSHUFLWZrik,          X86::VPSHUFLWZmik,          0 },
@@ -3098,6 +3120,10 @@ X86InstrInfo::X86InstrInfo(X86Subtarget
     { X86::VPMOVZXDQZ256rrk,      X86::VPMOVZXDQZ256rmk,      0 },
     { X86::VPMOVZXWDZ256rrk,      X86::VPMOVZXWDZ256rmk,      0 },
     { X86::VPMOVZXWQZ256rrk,      X86::VPMOVZXWQZ256rmk,      TB_NO_REVERSE },
+    { X86::VPOPCNTBZ256rrk,       X86::VPOPCNTBZ256rmk,       0 },
+    { X86::VPOPCNTDZ256rrk,       X86::VPOPCNTDZ256rmk,       0 },
+    { X86::VPOPCNTQZ256rrk,       X86::VPOPCNTQZ256rmk,       0 },
+    { X86::VPOPCNTWZ256rrk,       X86::VPOPCNTWZ256rmk,       0 },
     { X86::VPSHUFDZ256rik,        X86::VPSHUFDZ256mik,        0 },
     { X86::VPSHUFHWZ256rik,       X86::VPSHUFHWZ256mik,       0 },
     { X86::VPSHUFLWZ256rik,       X86::VPSHUFLWZ256mik,       0 },
@@ -3135,6 +3161,10 @@ X86InstrInfo::X86InstrInfo(X86Subtarget
     { X86::VPMOVZXDQZ128rrk,      X86::VPMOVZXDQZ128rmk,      TB_NO_REVERSE },
     { X86::VPMOVZXWDZ128rrk,      X86::VPMOVZXWDZ128rmk,      TB_NO_REVERSE },
     { X86::VPMOVZXWQZ128rrk,      X86::VPMOVZXWQZ128rmk,      TB_NO_REVERSE },
+    { X86::VPOPCNTBZ128rrk,       X86::VPOPCNTBZ128rmk,       0 },
+    { X86::VPOPCNTDZ128rrk,       X86::VPOPCNTDZ128rmk,       0 },
+    { X86::VPOPCNTQZ128rrk,       X86::VPOPCNTQZ128rmk,       0 },
+    { X86::VPOPCNTWZ128rrk,       X86::VPOPCNTWZ128rmk,       0 },
     { X86::VPSHUFDZ128rik,        X86::VPSHUFDZ128mik,        0 },
     { X86::VPSHUFHWZ128rik,       X86::VPSHUFHWZ128mik,       0 },
     { X86::VPSHUFLWZ128rik,       X86::VPSHUFLWZ128mik,       0 },

Modified: llvm/trunk/test/CodeGen/X86/stack-folding-int-avx512.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/stack-folding-int-avx512.ll?rev=321953&r1=321952&r2=321953&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/stack-folding-int-avx512.ll (original)
+++ llvm/trunk/test/CodeGen/X86/stack-folding-int-avx512.ll Sat Jan  6 22:24:27 2018
@@ -1,4 +1,4 @@
-; RUN: llc -O3 -disable-peephole -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw,+avx512dq,+avx512vbmi,+avx512cd < %s | FileCheck %s
+; RUN: llc -O3 -disable-peephole -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw,+avx512dq,+avx512vbmi,+avx512cd,+avx512vpopcntdq < %s | FileCheck %s
 
 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
 target triple = "x86_64-unknown-unknown"
@@ -1214,6 +1214,24 @@ define <8 x i64> @stack_fold_pmovzxwq_ma
   ret <8 x i64> %4
 }
 
+define <16 x i32> @stack_fold_vpopcntd(<16 x i32> %a0) {
+  ;CHECK-LABEL: stack_fold_vpopcntd
+  ;CHECK:       vpopcntd {{-?[0-9]*}}(%rsp), {{%zmm[0-9][0-9]*}} {{.*#+}} 64-byte Folded Reload
+  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
+  %2 = call <16 x i32> @llvm.ctpop.v16i32(<16 x i32> %a0)
+  ret <16 x i32> %2
+}
+declare <16 x i32> @llvm.ctpop.v16i32(<16 x i32>) nounwind readonly
+
+define <8 x i64> @stack_fold_vpopcntq(<8 x i64> %a0) {
+  ;CHECK-LABEL: stack_fold_vpopcntq
+  ;CHECK:       vpopcntq {{-?[0-9]*}}(%rsp), {{%zmm[0-9][0-9]*}} {{.*#+}} 64-byte Folded Reload
+  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
+  %2 = call <8 x i64> @llvm.ctpop.v8i64(<8 x i64> %a0)
+  ret <8 x i64> %2
+}
+declare <8 x i64> @llvm.ctpop.v8i64(<8 x i64>) nounwind readnone
+
 define <8 x i64> @stack_fold_psadbw(<64 x i8> %a0, <64 x i8> %a1) {
   ;CHECK-LABEL: stack_fold_psadbw
   ;CHECK:       vpsadbw {{-?[0-9]*}}(%rsp), {{%zmm[0-9][0-9]*}}, {{%zmm[0-9][0-9]*}} {{.*#+}} 64-byte Folded Reload

Modified: llvm/trunk/test/CodeGen/X86/stack-folding-int-avx512vl.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/stack-folding-int-avx512vl.ll?rev=321953&r1=321952&r2=321953&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/stack-folding-int-avx512vl.ll (original)
+++ llvm/trunk/test/CodeGen/X86/stack-folding-int-avx512vl.ll Sat Jan  6 22:24:27 2018
@@ -1,4 +1,4 @@
-; RUN: llc -O3 -disable-peephole -mtriple=x86_64-unknown-unknown -mattr=+avx512vl,+avx512bw,+avx512dq,+avx512vbmi,+avx512cd < %s | FileCheck %s
+; RUN: llc -O3 -disable-peephole -mtriple=x86_64-unknown-unknown -mattr=+avx512vl,+avx512bw,+avx512dq,+avx512vbmi,+avx512cd,+avx512vpopcntdq < %s | FileCheck %s
 
 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
 target triple = "x86_64-unknown-unknown"
@@ -1620,6 +1620,42 @@ define <4 x i64> @stack_fold_pmuludq_ymm
   ret <4 x i64> %5
 }
 
+define <4 x i32> @stack_fold_vpopcntd(<4 x i32> %a0) {
+  ;CHECK-LABEL: stack_fold_vpopcntd
+  ;CHECK:       vpopcntd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
+  %2 = call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %a0)
+  ret <4 x i32> %2
+}
+declare <4 x i32> @llvm.ctpop.v4i32(<4 x i32>) nounwind readonly
+
+define <8 x i32> @stack_fold_vpopcntd_ymm(<8 x i32> %a0) {
+  ;CHECK-LABEL: stack_fold_vpopcntd_ymm
+  ;CHECK:       vpopcntd {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
+  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
+  %2 = call <8 x i32> @llvm.ctpop.v8i32(<8 x i32> %a0)
+  ret <8 x i32> %2
+}
+declare <8 x i32> @llvm.ctpop.v8i32(<8 x i32>) nounwind readonly
+
+define <2 x i64> @stack_fold_vpopcntq(<2 x i64> %a0) {
+  ;CHECK-LABEL: stack_fold_vpopcntq
+  ;CHECK:       vpopcntq {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
+  %2 = call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %a0)
+  ret <2 x i64> %2
+}
+declare <2 x i64> @llvm.ctpop.v2i64(<2 x i64>) nounwind readnone
+
+define <4 x i64> @stack_fold_vpopcntq_ymm(<4 x i64> %a0) {
+  ;CHECK-LABEL: stack_fold_vpopcntq_ymm
+  ;CHECK:       vpopcntq {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
+  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
+  %2 = call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %a0)
+  ret <4 x i64> %2
+}
+declare <4 x i64> @llvm.ctpop.v4i64(<4 x i64>) nounwind readnone
+
 define <2 x i64> @stack_fold_psadbw(<16 x i8> %a0, <16 x i8> %a1) {
   ;CHECK-LABEL: stack_fold_psadbw
   ;CHECK:       vpsadbw {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload




More information about the llvm-commits mailing list