[llvm] ac82b91 - [X86] Use h-register for final XOR of __builtin_parity on 64-bit targets.
Craig Topper via llvm-commits
llvm-commits at lists.llvm.org
Mon Aug 3 10:10:33 PDT 2020
Author: Craig Topper
Date: 2020-08-03T10:10:17-07:00
New Revision: ac82b918c74f3fab8d4a7c1905277bda6b9bccb4
URL: https://github.com/llvm/llvm-project/commit/ac82b918c74f3fab8d4a7c1905277bda6b9bccb4
DIFF: https://github.com/llvm/llvm-project/commit/ac82b918c74f3fab8d4a7c1905277bda6b9bccb4.diff
LOG: [X86] Use h-register for final XOR of __builtin_parity on 64-bit targets.
This adds an isel pattern and special XOR8rr_NOREX instruction
to enable the use of h-registers for __builtin_parity. This avoids
a copy and a shift instruction. The NOREX instruction is in case
register allocation doesn't use the matching l-register for some
reason. If a R8-R15 register gets picked instead, we won't be
able to encode the instruction since an h-register can't be used
with a REX prefix.
Fixes PR46954
Added:
Modified:
llvm/lib/Target/X86/X86InstrArithmetic.td
llvm/lib/Target/X86/X86InstrCompiler.td
llvm/test/CodeGen/X86/parity.ll
llvm/test/CodeGen/X86/vector-reduce-xor-bool.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/X86/X86InstrArithmetic.td b/llvm/lib/Target/X86/X86InstrArithmetic.td
index f7f22285bd15..e83e1e74ff52 100644
--- a/llvm/lib/Target/X86/X86InstrArithmetic.td
+++ b/llvm/lib/Target/X86/X86InstrArithmetic.td
@@ -1182,6 +1182,15 @@ defm SUB : ArithBinOp_RF<0x28, 0x2A, 0x2C, "sub", MRM5r, MRM5m,
X86sub_flag, sub, 0, 1, 0>;
}
+// Version of XOR8rr_NOREX that use GR8_NOREX. This is used by the handling of
+// __builtin_parity where the last step xors an h-register with an l-register.
+let isCodeGenOnly = 1, hasSideEffects = 0, Constraints = "$src1 = $dst",
+ Defs = [EFLAGS], isCommutable = 1 in
+def XOR8rr_NOREX : I<0x30, MRMDestReg, (outs GR8_NOREX:$dst),
+ (ins GR8_NOREX:$src1, GR8_NOREX:$src2),
+ "xor{b}\t{$src2, $dst|$dst, $src2}", []>,
+ Sched<[WriteALU]>;
+
// Arithmetic.
defm ADC : ArithBinOp_RFF<0x10, 0x12, 0x14, "adc", MRM2r, MRM2m, X86adc_flag,
1, 0>;
diff --git a/llvm/lib/Target/X86/X86InstrCompiler.td b/llvm/lib/Target/X86/X86InstrCompiler.td
index 4df93fb2ed60..d78d9f7c80c7 100644
--- a/llvm/lib/Target/X86/X86InstrCompiler.td
+++ b/llvm/lib/Target/X86/X86InstrCompiler.td
@@ -1698,6 +1698,16 @@ def : Pat<(store (i8 (trunc_su (srl_su GR16:$src, (i8 8)))), addr:$dst),
(EXTRACT_SUBREG GR16:$src, sub_8bit_hi))>,
Requires<[In64BitMode]>;
+// Special pattern to catch the last step of __builtin_parity handling. Our
+// goal is to use an xor of an h-register with the corresponding l-register.
+// The above patterns would handle this on non 64-bit targets, but for 64-bit
+// we need to be more careful. We're using a NOREX instruction here in case
+// register allocation fails to keep the two registers together. So we need to
+// make sure we can't accidentally mix R8-R15 with an h-register.
+def : Pat<(X86xor_flag (i8 (trunc GR32:$src)),
+ (i8 (trunc (srl_su GR32:$src, (i8 8))))),
+ (XOR8rr_NOREX (EXTRACT_SUBREG GR32:$src, sub_8bit),
+ (EXTRACT_SUBREG GR32:$src, sub_8bit_hi))>;
// (shl x, 1) ==> (add x, x)
// Note that if x is undef (immediate or otherwise), we could theoretically
diff --git a/llvm/test/CodeGen/X86/parity.ll b/llvm/test/CodeGen/X86/parity.ll
index 869ee552c67d..6289ab482426 100644
--- a/llvm/test/CodeGen/X86/parity.ll
+++ b/llvm/test/CodeGen/X86/parity.ll
@@ -21,10 +21,8 @@ define i32 @parity_32(i32 %x) {
; X64-NOPOPCNT-NEXT: movl %edi, %ecx
; X64-NOPOPCNT-NEXT: shrl $16, %ecx
; X64-NOPOPCNT-NEXT: xorl %edi, %ecx
-; X64-NOPOPCNT-NEXT: movl %ecx, %edx
-; X64-NOPOPCNT-NEXT: shrl $8, %edx
; X64-NOPOPCNT-NEXT: xorl %eax, %eax
-; X64-NOPOPCNT-NEXT: xorb %cl, %dl
+; X64-NOPOPCNT-NEXT: xorb %ch, %cl
; X64-NOPOPCNT-NEXT: setnp %al
; X64-NOPOPCNT-NEXT: retq
;
@@ -66,10 +64,8 @@ define i64 @parity_64(i64 %x) {
; X64-NOPOPCNT-NEXT: movl %eax, %ecx
; X64-NOPOPCNT-NEXT: shrl $16, %ecx
; X64-NOPOPCNT-NEXT: xorl %eax, %ecx
-; X64-NOPOPCNT-NEXT: movl %ecx, %edx
-; X64-NOPOPCNT-NEXT: shrl $8, %edx
; X64-NOPOPCNT-NEXT: xorl %eax, %eax
-; X64-NOPOPCNT-NEXT: xorb %cl, %dl
+; X64-NOPOPCNT-NEXT: xorb %ch, %cl
; X64-NOPOPCNT-NEXT: setnp %al
; X64-NOPOPCNT-NEXT: retq
;
@@ -113,10 +109,8 @@ define i32 @parity_64_trunc(i64 %x) {
; X64-NOPOPCNT-NEXT: movl %eax, %ecx
; X64-NOPOPCNT-NEXT: shrl $16, %ecx
; X64-NOPOPCNT-NEXT: xorl %eax, %ecx
-; X64-NOPOPCNT-NEXT: movl %ecx, %edx
-; X64-NOPOPCNT-NEXT: shrl $8, %edx
; X64-NOPOPCNT-NEXT: xorl %eax, %eax
-; X64-NOPOPCNT-NEXT: xorb %cl, %dl
+; X64-NOPOPCNT-NEXT: xorb %ch, %cl
; X64-NOPOPCNT-NEXT: setnp %al
; X64-NOPOPCNT-NEXT: retq
;
@@ -156,9 +150,7 @@ define i8 @parity_32_trunc(i32 %x) {
; X64-NOPOPCNT-NEXT: movl %edi, %eax
; X64-NOPOPCNT-NEXT: shrl $16, %eax
; X64-NOPOPCNT-NEXT: xorl %edi, %eax
-; X64-NOPOPCNT-NEXT: movl %eax, %ecx
-; X64-NOPOPCNT-NEXT: shrl $8, %ecx
-; X64-NOPOPCNT-NEXT: xorb %al, %cl
+; X64-NOPOPCNT-NEXT: xorb %ah, %al
; X64-NOPOPCNT-NEXT: setnp %al
; X64-NOPOPCNT-NEXT: retq
;
diff --git a/llvm/test/CodeGen/X86/vector-reduce-xor-bool.ll b/llvm/test/CodeGen/X86/vector-reduce-xor-bool.ll
index 8e50cfc4e4b2..b28aa43affb7 100644
--- a/llvm/test/CodeGen/X86/vector-reduce-xor-bool.ll
+++ b/llvm/test/CodeGen/X86/vector-reduce-xor-bool.ll
@@ -181,9 +181,7 @@ define i1 @trunc_v16i8_v16i1(<16 x i8>) {
; SSE: # %bb.0:
; SSE-NEXT: psllw $7, %xmm0
; SSE-NEXT: pmovmskb %xmm0, %eax
-; SSE-NEXT: movl %eax, %ecx
-; SSE-NEXT: shrl $8, %ecx
-; SSE-NEXT: xorb %al, %cl
+; SSE-NEXT: xorb %ah, %al
; SSE-NEXT: setnp %al
; SSE-NEXT: retq
;
@@ -191,9 +189,7 @@ define i1 @trunc_v16i8_v16i1(<16 x i8>) {
; AVX: # %bb.0:
; AVX-NEXT: vpsllw $7, %xmm0, %xmm0
; AVX-NEXT: vpmovmskb %xmm0, %eax
-; AVX-NEXT: movl %eax, %ecx
-; AVX-NEXT: shrl $8, %ecx
-; AVX-NEXT: xorb %al, %cl
+; AVX-NEXT: xorb %ah, %al
; AVX-NEXT: setnp %al
; AVX-NEXT: retq
;
@@ -201,9 +197,7 @@ define i1 @trunc_v16i8_v16i1(<16 x i8>) {
; AVX512: # %bb.0:
; AVX512-NEXT: vpsllw $7, %xmm0, %xmm0
; AVX512-NEXT: vpmovmskb %xmm0, %eax
-; AVX512-NEXT: movl %eax, %ecx
-; AVX512-NEXT: shrl $8, %ecx
-; AVX512-NEXT: xorb %al, %cl
+; AVX512-NEXT: xorb %ah, %al
; AVX512-NEXT: setnp %al
; AVX512-NEXT: retq
%a = trunc <16 x i8> %0 to <16 x i1>
@@ -364,9 +358,7 @@ define i1 @trunc_v16i16_v16i1(<16 x i16>) {
; SSE-NEXT: packuswb %xmm1, %xmm0
; SSE-NEXT: psllw $7, %xmm0
; SSE-NEXT: pmovmskb %xmm0, %eax
-; SSE-NEXT: movl %eax, %ecx
-; SSE-NEXT: shrl $8, %ecx
-; SSE-NEXT: xorb %al, %cl
+; SSE-NEXT: xorb %ah, %al
; SSE-NEXT: setnp %al
; SSE-NEXT: retq
;
@@ -377,9 +369,7 @@ define i1 @trunc_v16i16_v16i1(<16 x i16>) {
; AVX1-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vpsllw $7, %xmm0, %xmm0
; AVX1-NEXT: vpmovmskb %xmm0, %eax
-; AVX1-NEXT: movl %eax, %ecx
-; AVX1-NEXT: shrl $8, %ecx
-; AVX1-NEXT: xorb %al, %cl
+; AVX1-NEXT: xorb %ah, %al
; AVX1-NEXT: setnp %al
; AVX1-NEXT: vzeroupper
; AVX1-NEXT: retq
@@ -391,9 +381,7 @@ define i1 @trunc_v16i16_v16i1(<16 x i16>) {
; AVX2-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vpsllw $7, %xmm0, %xmm0
; AVX2-NEXT: vpmovmskb %xmm0, %eax
-; AVX2-NEXT: movl %eax, %ecx
-; AVX2-NEXT: shrl $8, %ecx
-; AVX2-NEXT: xorb %al, %cl
+; AVX2-NEXT: xorb %ah, %al
; AVX2-NEXT: setnp %al
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
@@ -445,9 +433,7 @@ define i1 @trunc_v32i8_v32i1(<32 x i8>) {
; SSE-NEXT: pxor %xmm1, %xmm0
; SSE-NEXT: psllw $7, %xmm0
; SSE-NEXT: pmovmskb %xmm0, %eax
-; SSE-NEXT: movl %eax, %ecx
-; SSE-NEXT: shrl $8, %ecx
-; SSE-NEXT: xorb %al, %cl
+; SSE-NEXT: xorb %ah, %al
; SSE-NEXT: setnp %al
; SSE-NEXT: retq
;
@@ -457,9 +443,7 @@ define i1 @trunc_v32i8_v32i1(<32 x i8>) {
; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vpsllw $7, %xmm0, %xmm0
; AVX1-NEXT: vpmovmskb %xmm0, %eax
-; AVX1-NEXT: movl %eax, %ecx
-; AVX1-NEXT: shrl $8, %ecx
-; AVX1-NEXT: xorb %al, %cl
+; AVX1-NEXT: xorb %ah, %al
; AVX1-NEXT: setnp %al
; AVX1-NEXT: vzeroupper
; AVX1-NEXT: retq
@@ -471,9 +455,7 @@ define i1 @trunc_v32i8_v32i1(<32 x i8>) {
; AVX2-NEXT: movl %eax, %ecx
; AVX2-NEXT: shrl $16, %ecx
; AVX2-NEXT: xorl %eax, %ecx
-; AVX2-NEXT: movl %ecx, %eax
-; AVX2-NEXT: shrl $8, %eax
-; AVX2-NEXT: xorb %cl, %al
+; AVX2-NEXT: xorb %ch, %cl
; AVX2-NEXT: setnp %al
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
@@ -505,9 +487,7 @@ define i1 @trunc_v32i8_v32i1(<32 x i8>) {
; AVX512BW-NEXT: movl %eax, %ecx
; AVX512BW-NEXT: shrl $16, %ecx
; AVX512BW-NEXT: xorl %eax, %ecx
-; AVX512BW-NEXT: movl %ecx, %eax
-; AVX512BW-NEXT: shrl $8, %eax
-; AVX512BW-NEXT: xorb %cl, %al
+; AVX512BW-NEXT: xorb %ch, %cl
; AVX512BW-NEXT: setnp %al
; AVX512BW-NEXT: vzeroupper
; AVX512BW-NEXT: retq
@@ -519,9 +499,7 @@ define i1 @trunc_v32i8_v32i1(<32 x i8>) {
; AVX512VL-NEXT: movl %eax, %ecx
; AVX512VL-NEXT: shrl $16, %ecx
; AVX512VL-NEXT: xorl %eax, %ecx
-; AVX512VL-NEXT: movl %ecx, %eax
-; AVX512VL-NEXT: shrl $8, %eax
-; AVX512VL-NEXT: xorb %cl, %al
+; AVX512VL-NEXT: xorb %ch, %cl
; AVX512VL-NEXT: setnp %al
; AVX512VL-NEXT: vzeroupper
; AVX512VL-NEXT: retq
@@ -648,9 +626,7 @@ define i1 @trunc_v16i32_v16i1(<16 x i32>) {
; SSE2-NEXT: packuswb %xmm2, %xmm0
; SSE2-NEXT: psllw $7, %xmm0
; SSE2-NEXT: pmovmskb %xmm0, %eax
-; SSE2-NEXT: movl %eax, %ecx
-; SSE2-NEXT: shrl $8, %ecx
-; SSE2-NEXT: xorb %al, %cl
+; SSE2-NEXT: xorb %ah, %al
; SSE2-NEXT: setnp %al
; SSE2-NEXT: retq
;
@@ -666,9 +642,7 @@ define i1 @trunc_v16i32_v16i1(<16 x i32>) {
; SSE41-NEXT: packuswb %xmm2, %xmm0
; SSE41-NEXT: psllw $7, %xmm0
; SSE41-NEXT: pmovmskb %xmm0, %eax
-; SSE41-NEXT: movl %eax, %ecx
-; SSE41-NEXT: shrl $8, %ecx
-; SSE41-NEXT: xorb %al, %cl
+; SSE41-NEXT: xorb %ah, %al
; SSE41-NEXT: setnp %al
; SSE41-NEXT: retq
;
@@ -684,9 +658,7 @@ define i1 @trunc_v16i32_v16i1(<16 x i32>) {
; AVX1-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vpsllw $7, %xmm0, %xmm0
; AVX1-NEXT: vpmovmskb %xmm0, %eax
-; AVX1-NEXT: movl %eax, %ecx
-; AVX1-NEXT: shrl $8, %ecx
-; AVX1-NEXT: xorb %al, %cl
+; AVX1-NEXT: xorb %ah, %al
; AVX1-NEXT: setnp %al
; AVX1-NEXT: vzeroupper
; AVX1-NEXT: retq
@@ -704,9 +676,7 @@ define i1 @trunc_v16i32_v16i1(<16 x i32>) {
; AVX2-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vpsllw $7, %xmm0, %xmm0
; AVX2-NEXT: vpmovmskb %xmm0, %eax
-; AVX2-NEXT: movl %eax, %ecx
-; AVX2-NEXT: shrl $8, %ecx
-; AVX2-NEXT: xorb %al, %cl
+; AVX2-NEXT: xorb %ah, %al
; AVX2-NEXT: setnp %al
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
@@ -764,9 +734,7 @@ define i1 @trunc_v32i16_v32i1(<32 x i16>) {
; SSE-NEXT: pxor %xmm2, %xmm0
; SSE-NEXT: psllw $7, %xmm0
; SSE-NEXT: pmovmskb %xmm0, %eax
-; SSE-NEXT: movl %eax, %ecx
-; SSE-NEXT: shrl $8, %ecx
-; SSE-NEXT: xorb %al, %cl
+; SSE-NEXT: xorb %ah, %al
; SSE-NEXT: setnp %al
; SSE-NEXT: retq
;
@@ -778,9 +746,7 @@ define i1 @trunc_v32i16_v32i1(<32 x i16>) {
; AVX1-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vpsllw $7, %xmm0, %xmm0
; AVX1-NEXT: vpmovmskb %xmm0, %eax
-; AVX1-NEXT: movl %eax, %ecx
-; AVX1-NEXT: shrl $8, %ecx
-; AVX1-NEXT: xorb %al, %cl
+; AVX1-NEXT: xorb %ah, %al
; AVX1-NEXT: setnp %al
; AVX1-NEXT: vzeroupper
; AVX1-NEXT: retq
@@ -798,9 +764,7 @@ define i1 @trunc_v32i16_v32i1(<32 x i16>) {
; AVX2-NEXT: movl %eax, %ecx
; AVX2-NEXT: shrl $16, %ecx
; AVX2-NEXT: xorl %eax, %ecx
-; AVX2-NEXT: movl %ecx, %eax
-; AVX2-NEXT: shrl $8, %eax
-; AVX2-NEXT: xorb %cl, %al
+; AVX2-NEXT: xorb %ch, %cl
; AVX2-NEXT: setnp %al
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
@@ -833,9 +797,7 @@ define i1 @trunc_v32i16_v32i1(<32 x i16>) {
; AVX512BW-NEXT: movl %eax, %ecx
; AVX512BW-NEXT: shrl $16, %ecx
; AVX512BW-NEXT: xorl %eax, %ecx
-; AVX512BW-NEXT: movl %ecx, %eax
-; AVX512BW-NEXT: shrl $8, %eax
-; AVX512BW-NEXT: xorb %cl, %al
+; AVX512BW-NEXT: xorb %ch, %cl
; AVX512BW-NEXT: setnp %al
; AVX512BW-NEXT: vzeroupper
; AVX512BW-NEXT: retq
@@ -848,9 +810,7 @@ define i1 @trunc_v32i16_v32i1(<32 x i16>) {
; AVX512VL-NEXT: movl %eax, %ecx
; AVX512VL-NEXT: shrl $16, %ecx
; AVX512VL-NEXT: xorl %eax, %ecx
-; AVX512VL-NEXT: movl %ecx, %eax
-; AVX512VL-NEXT: shrl $8, %eax
-; AVX512VL-NEXT: xorb %cl, %al
+; AVX512VL-NEXT: xorb %ch, %cl
; AVX512VL-NEXT: setnp %al
; AVX512VL-NEXT: vzeroupper
; AVX512VL-NEXT: retq
@@ -867,9 +827,7 @@ define i1 @trunc_v64i8_v64i1(<64 x i8>) {
; SSE-NEXT: pxor %xmm0, %xmm1
; SSE-NEXT: psllw $7, %xmm1
; SSE-NEXT: pmovmskb %xmm1, %eax
-; SSE-NEXT: movl %eax, %ecx
-; SSE-NEXT: shrl $8, %ecx
-; SSE-NEXT: xorb %al, %cl
+; SSE-NEXT: xorb %ah, %al
; SSE-NEXT: setnp %al
; SSE-NEXT: retq
;
@@ -880,9 +838,7 @@ define i1 @trunc_v64i8_v64i1(<64 x i8>) {
; AVX1-NEXT: vxorps %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vpsllw $7, %xmm0, %xmm0
; AVX1-NEXT: vpmovmskb %xmm0, %eax
-; AVX1-NEXT: movl %eax, %ecx
-; AVX1-NEXT: shrl $8, %ecx
-; AVX1-NEXT: xorb %al, %cl
+; AVX1-NEXT: xorb %ah, %al
; AVX1-NEXT: setnp %al
; AVX1-NEXT: vzeroupper
; AVX1-NEXT: retq
@@ -895,9 +851,7 @@ define i1 @trunc_v64i8_v64i1(<64 x i8>) {
; AVX2-NEXT: movl %eax, %ecx
; AVX2-NEXT: shrl $16, %ecx
; AVX2-NEXT: xorl %eax, %ecx
-; AVX2-NEXT: movl %ecx, %eax
-; AVX2-NEXT: shrl $8, %eax
-; AVX2-NEXT: xorb %cl, %al
+; AVX2-NEXT: xorb %ch, %cl
; AVX2-NEXT: setnp %al
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
@@ -937,9 +891,7 @@ define i1 @trunc_v64i8_v64i1(<64 x i8>) {
; AVX512BW-NEXT: movl %ecx, %eax
; AVX512BW-NEXT: shrl $16, %eax
; AVX512BW-NEXT: xorl %ecx, %eax
-; AVX512BW-NEXT: movl %eax, %ecx
-; AVX512BW-NEXT: shrl $8, %ecx
-; AVX512BW-NEXT: xorb %al, %cl
+; AVX512BW-NEXT: xorb %ah, %al
; AVX512BW-NEXT: setnp %al
; AVX512BW-NEXT: vzeroupper
; AVX512BW-NEXT: retq
@@ -955,9 +907,7 @@ define i1 @trunc_v64i8_v64i1(<64 x i8>) {
; AVX512VL-NEXT: movl %ecx, %eax
; AVX512VL-NEXT: shrl $16, %eax
; AVX512VL-NEXT: xorl %ecx, %eax
-; AVX512VL-NEXT: movl %eax, %ecx
-; AVX512VL-NEXT: shrl $8, %ecx
-; AVX512VL-NEXT: xorb %al, %cl
+; AVX512VL-NEXT: xorb %ah, %al
; AVX512VL-NEXT: setnp %al
; AVX512VL-NEXT: vzeroupper
; AVX512VL-NEXT: retq
@@ -1157,9 +1107,7 @@ define i1 @icmp_v16i8_v16i1(<16 x i8>) {
; SSE-NEXT: pxor %xmm1, %xmm1
; SSE-NEXT: pcmpeqb %xmm0, %xmm1
; SSE-NEXT: pmovmskb %xmm1, %eax
-; SSE-NEXT: movl %eax, %ecx
-; SSE-NEXT: shrl $8, %ecx
-; SSE-NEXT: xorb %al, %cl
+; SSE-NEXT: xorb %ah, %al
; SSE-NEXT: setnp %al
; SSE-NEXT: retq
;
@@ -1168,9 +1116,7 @@ define i1 @icmp_v16i8_v16i1(<16 x i8>) {
; AVX-NEXT: vpxor %xmm1, %xmm1, %xmm1
; AVX-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0
; AVX-NEXT: vpmovmskb %xmm0, %eax
-; AVX-NEXT: movl %eax, %ecx
-; AVX-NEXT: shrl $8, %ecx
-; AVX-NEXT: xorb %al, %cl
+; AVX-NEXT: xorb %ah, %al
; AVX-NEXT: setnp %al
; AVX-NEXT: retq
;
@@ -1179,9 +1125,7 @@ define i1 @icmp_v16i8_v16i1(<16 x i8>) {
; AVX512F-NEXT: vpxor %xmm1, %xmm1, %xmm1
; AVX512F-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0
; AVX512F-NEXT: vpmovmskb %xmm0, %eax
-; AVX512F-NEXT: movl %eax, %ecx
-; AVX512F-NEXT: shrl $8, %ecx
-; AVX512F-NEXT: xorb %al, %cl
+; AVX512F-NEXT: xorb %ah, %al
; AVX512F-NEXT: setnp %al
; AVX512F-NEXT: retq
;
@@ -1371,9 +1315,7 @@ define i1 @icmp_v16i16_v16i1(<16 x i16>) {
; SSE-NEXT: pcmpeqw %xmm2, %xmm0
; SSE-NEXT: packsswb %xmm1, %xmm0
; SSE-NEXT: pmovmskb %xmm0, %eax
-; SSE-NEXT: movl %eax, %ecx
-; SSE-NEXT: shrl $8, %ecx
-; SSE-NEXT: xorb %al, %cl
+; SSE-NEXT: xorb %ah, %al
; SSE-NEXT: setnp %al
; SSE-NEXT: retq
;
@@ -1385,9 +1327,7 @@ define i1 @icmp_v16i16_v16i1(<16 x i16>) {
; AVX1-NEXT: vpcmpeqw %xmm2, %xmm0, %xmm0
; AVX1-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vpmovmskb %xmm0, %eax
-; AVX1-NEXT: movl %eax, %ecx
-; AVX1-NEXT: shrl $8, %ecx
-; AVX1-NEXT: xorb %al, %cl
+; AVX1-NEXT: xorb %ah, %al
; AVX1-NEXT: setnp %al
; AVX1-NEXT: vzeroupper
; AVX1-NEXT: retq
@@ -1399,9 +1339,7 @@ define i1 @icmp_v16i16_v16i1(<16 x i16>) {
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
; AVX2-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vpmovmskb %xmm0, %eax
-; AVX2-NEXT: movl %eax, %ecx
-; AVX2-NEXT: shrl $8, %ecx
-; AVX2-NEXT: xorb %al, %cl
+; AVX2-NEXT: xorb %ah, %al
; AVX2-NEXT: setnp %al
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
@@ -1455,9 +1393,7 @@ define i1 @icmp_v32i8_v32i1(<32 x i8>) {
; SSE-NEXT: pcmpeqb %xmm2, %xmm0
; SSE-NEXT: pxor %xmm1, %xmm0
; SSE-NEXT: pmovmskb %xmm0, %eax
-; SSE-NEXT: movl %eax, %ecx
-; SSE-NEXT: shrl $8, %ecx
-; SSE-NEXT: xorb %al, %cl
+; SSE-NEXT: xorb %ah, %al
; SSE-NEXT: setnp %al
; SSE-NEXT: retq
;
@@ -1469,9 +1405,7 @@ define i1 @icmp_v32i8_v32i1(<32 x i8>) {
; AVX1-NEXT: vpcmpeqb %xmm2, %xmm0, %xmm0
; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vpmovmskb %xmm0, %eax
-; AVX1-NEXT: movl %eax, %ecx
-; AVX1-NEXT: shrl $8, %ecx
-; AVX1-NEXT: xorb %al, %cl
+; AVX1-NEXT: xorb %ah, %al
; AVX1-NEXT: setnp %al
; AVX1-NEXT: vzeroupper
; AVX1-NEXT: retq
@@ -1484,9 +1418,7 @@ define i1 @icmp_v32i8_v32i1(<32 x i8>) {
; AVX2-NEXT: movl %eax, %ecx
; AVX2-NEXT: shrl $16, %ecx
; AVX2-NEXT: xorl %eax, %ecx
-; AVX2-NEXT: movl %ecx, %eax
-; AVX2-NEXT: shrl $8, %eax
-; AVX2-NEXT: xorb %cl, %al
+; AVX2-NEXT: xorb %ch, %cl
; AVX2-NEXT: setnp %al
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
@@ -1520,9 +1452,7 @@ define i1 @icmp_v32i8_v32i1(<32 x i8>) {
; AVX512BW-NEXT: movl %eax, %ecx
; AVX512BW-NEXT: shrl $16, %ecx
; AVX512BW-NEXT: xorl %eax, %ecx
-; AVX512BW-NEXT: movl %ecx, %eax
-; AVX512BW-NEXT: shrl $8, %eax
-; AVX512BW-NEXT: xorb %cl, %al
+; AVX512BW-NEXT: xorb %ch, %cl
; AVX512BW-NEXT: setnp %al
; AVX512BW-NEXT: vzeroupper
; AVX512BW-NEXT: retq
@@ -1534,9 +1464,7 @@ define i1 @icmp_v32i8_v32i1(<32 x i8>) {
; AVX512VL-NEXT: movl %eax, %ecx
; AVX512VL-NEXT: shrl $16, %ecx
; AVX512VL-NEXT: xorl %eax, %ecx
-; AVX512VL-NEXT: movl %ecx, %eax
-; AVX512VL-NEXT: shrl $8, %eax
-; AVX512VL-NEXT: xorb %cl, %al
+; AVX512VL-NEXT: xorb %ch, %cl
; AVX512VL-NEXT: setnp %al
; AVX512VL-NEXT: vzeroupper
; AVX512VL-NEXT: retq
@@ -1660,9 +1588,7 @@ define i1 @icmp_v16i32_v16i1(<16 x i32>) {
; SSE-NEXT: packssdw %xmm1, %xmm0
; SSE-NEXT: packsswb %xmm2, %xmm0
; SSE-NEXT: pmovmskb %xmm0, %eax
-; SSE-NEXT: movl %eax, %ecx
-; SSE-NEXT: shrl $8, %ecx
-; SSE-NEXT: xorb %al, %cl
+; SSE-NEXT: xorb %ah, %al
; SSE-NEXT: setnp %al
; SSE-NEXT: retq
;
@@ -1679,9 +1605,7 @@ define i1 @icmp_v16i32_v16i1(<16 x i32>) {
; AVX1-NEXT: vpackssdw %xmm2, %xmm0, %xmm0
; AVX1-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vpmovmskb %xmm0, %eax
-; AVX1-NEXT: movl %eax, %ecx
-; AVX1-NEXT: shrl $8, %ecx
-; AVX1-NEXT: xorb %al, %cl
+; AVX1-NEXT: xorb %ah, %al
; AVX1-NEXT: setnp %al
; AVX1-NEXT: vzeroupper
; AVX1-NEXT: retq
@@ -1696,9 +1620,7 @@ define i1 @icmp_v16i32_v16i1(<16 x i32>) {
; AVX2-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,1,3]
; AVX2-NEXT: vpmovmskb %xmm0, %eax
-; AVX2-NEXT: movl %eax, %ecx
-; AVX2-NEXT: shrl $8, %ecx
-; AVX2-NEXT: xorb %al, %cl
+; AVX2-NEXT: xorb %ah, %al
; AVX2-NEXT: setnp %al
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
@@ -1752,9 +1674,7 @@ define i1 @icmp_v32i16_v32i1(<32 x i16>) {
; SSE-NEXT: packsswb %xmm3, %xmm2
; SSE-NEXT: pxor %xmm0, %xmm2
; SSE-NEXT: pmovmskb %xmm2, %eax
-; SSE-NEXT: movl %eax, %ecx
-; SSE-NEXT: shrl $8, %ecx
-; SSE-NEXT: xorb %al, %cl
+; SSE-NEXT: xorb %ah, %al
; SSE-NEXT: setnp %al
; SSE-NEXT: retq
;
@@ -1771,9 +1691,7 @@ define i1 @icmp_v32i16_v32i1(<32 x i16>) {
; AVX1-NEXT: vpacksswb %xmm2, %xmm0, %xmm0
; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vpmovmskb %xmm0, %eax
-; AVX1-NEXT: movl %eax, %ecx
-; AVX1-NEXT: shrl $8, %ecx
-; AVX1-NEXT: xorb %al, %cl
+; AVX1-NEXT: xorb %ah, %al
; AVX1-NEXT: setnp %al
; AVX1-NEXT: vzeroupper
; AVX1-NEXT: retq
@@ -1789,9 +1707,7 @@ define i1 @icmp_v32i16_v32i1(<32 x i16>) {
; AVX2-NEXT: movl %eax, %ecx
; AVX2-NEXT: shrl $16, %ecx
; AVX2-NEXT: xorl %eax, %ecx
-; AVX2-NEXT: movl %ecx, %eax
-; AVX2-NEXT: shrl $8, %eax
-; AVX2-NEXT: xorb %cl, %al
+; AVX2-NEXT: xorb %ch, %cl
; AVX2-NEXT: setnp %al
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
@@ -1825,9 +1741,7 @@ define i1 @icmp_v32i16_v32i1(<32 x i16>) {
; AVX512BW-NEXT: movl %eax, %ecx
; AVX512BW-NEXT: shrl $16, %ecx
; AVX512BW-NEXT: xorl %eax, %ecx
-; AVX512BW-NEXT: movl %ecx, %eax
-; AVX512BW-NEXT: shrl $8, %eax
-; AVX512BW-NEXT: xorb %cl, %al
+; AVX512BW-NEXT: xorb %ch, %cl
; AVX512BW-NEXT: setnp %al
; AVX512BW-NEXT: vzeroupper
; AVX512BW-NEXT: retq
@@ -1839,9 +1753,7 @@ define i1 @icmp_v32i16_v32i1(<32 x i16>) {
; AVX512VL-NEXT: movl %eax, %ecx
; AVX512VL-NEXT: shrl $16, %ecx
; AVX512VL-NEXT: xorl %eax, %ecx
-; AVX512VL-NEXT: movl %ecx, %eax
-; AVX512VL-NEXT: shrl $8, %eax
-; AVX512VL-NEXT: xorb %cl, %al
+; AVX512VL-NEXT: xorb %ch, %cl
; AVX512VL-NEXT: setnp %al
; AVX512VL-NEXT: vzeroupper
; AVX512VL-NEXT: retq
@@ -1862,9 +1774,7 @@ define i1 @icmp_v64i8_v64i1(<64 x i8>) {
; SSE-NEXT: pxor %xmm2, %xmm1
; SSE-NEXT: pxor %xmm0, %xmm1
; SSE-NEXT: pmovmskb %xmm1, %eax
-; SSE-NEXT: movl %eax, %ecx
-; SSE-NEXT: shrl $8, %ecx
-; SSE-NEXT: xorb %al, %cl
+; SSE-NEXT: xorb %ah, %al
; SSE-NEXT: setnp %al
; SSE-NEXT: retq
;
@@ -1881,9 +1791,7 @@ define i1 @icmp_v64i8_v64i1(<64 x i8>) {
; AVX1-NEXT: vpxor %xmm0, %xmm3, %xmm0
; AVX1-NEXT: vpxor %xmm0, %xmm4, %xmm0
; AVX1-NEXT: vpmovmskb %xmm0, %eax
-; AVX1-NEXT: movl %eax, %ecx
-; AVX1-NEXT: shrl $8, %ecx
-; AVX1-NEXT: xorb %al, %cl
+; AVX1-NEXT: xorb %ah, %al
; AVX1-NEXT: setnp %al
; AVX1-NEXT: vzeroupper
; AVX1-NEXT: retq
@@ -1898,9 +1806,7 @@ define i1 @icmp_v64i8_v64i1(<64 x i8>) {
; AVX2-NEXT: movl %eax, %ecx
; AVX2-NEXT: shrl $16, %ecx
; AVX2-NEXT: xorl %eax, %ecx
-; AVX2-NEXT: movl %ecx, %eax
-; AVX2-NEXT: shrl $8, %eax
-; AVX2-NEXT: xorb %cl, %al
+; AVX2-NEXT: xorb %ch, %cl
; AVX2-NEXT: setnp %al
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
@@ -1942,9 +1848,7 @@ define i1 @icmp_v64i8_v64i1(<64 x i8>) {
; AVX512BW-NEXT: movl %ecx, %eax
; AVX512BW-NEXT: shrl $16, %eax
; AVX512BW-NEXT: xorl %ecx, %eax
-; AVX512BW-NEXT: movl %eax, %ecx
-; AVX512BW-NEXT: shrl $8, %ecx
-; AVX512BW-NEXT: xorb %al, %cl
+; AVX512BW-NEXT: xorb %ah, %al
; AVX512BW-NEXT: setnp %al
; AVX512BW-NEXT: vzeroupper
; AVX512BW-NEXT: retq
@@ -1959,9 +1863,7 @@ define i1 @icmp_v64i8_v64i1(<64 x i8>) {
; AVX512VL-NEXT: movl %ecx, %eax
; AVX512VL-NEXT: shrl $16, %eax
; AVX512VL-NEXT: xorl %ecx, %eax
-; AVX512VL-NEXT: movl %eax, %ecx
-; AVX512VL-NEXT: shrl $8, %ecx
-; AVX512VL-NEXT: xorb %al, %cl
+; AVX512VL-NEXT: xorb %ah, %al
; AVX512VL-NEXT: setnp %al
; AVX512VL-NEXT: vzeroupper
; AVX512VL-NEXT: retq
More information about the llvm-commits
mailing list