[llvm] [TargetLowering] Add and (rot X, Y), Z ==/!= -1 --> (and X, Z) ==/!= -1 to foldSetCCWithRotate (PR #147068)
via llvm-commits
llvm-commits at lists.llvm.org
Fri Jul 4 08:46:15 PDT 2025
https://github.com/AZero13 updated https://github.com/llvm/llvm-project/pull/147068
>From c1c2e710de02265a8208cfebc028d57e38e85e10 Mon Sep 17 00:00:00 2001
From: Rose <gfunni234 at gmail.com>
Date: Fri, 4 Jul 2025 10:45:23 -0400
Subject: [PATCH 1/2] Pre-commit test (NFC)
---
llvm/test/CodeGen/X86/setcc-fsh.ll | 349 +++++++++++++++++++++++++++++
1 file changed, 349 insertions(+)
diff --git a/llvm/test/CodeGen/X86/setcc-fsh.ll b/llvm/test/CodeGen/X86/setcc-fsh.ll
index 7ab63959f58b0..4ac0f242775f4 100644
--- a/llvm/test/CodeGen/X86/setcc-fsh.ll
+++ b/llvm/test/CodeGen/X86/setcc-fsh.ll
@@ -520,3 +520,352 @@ define i1 @fshl_or_ne_2(i32 %x, i32 %y) {
%r = icmp ne i32 %f, 2
ret i1 %r
}
+
+define i1 @and_rotl_eq_neg_1(i8 %x, i8 %y, i8 %z) nounwind {
+; CHECK-LABEL: and_rotl_eq_neg_1:
+; CHECK: # %bb.0:
+; CHECK-NEXT: movl %edx, %ecx
+; CHECK-NEXT: # kill: def $cl killed $cl killed $ecx
+; CHECK-NEXT: rolb %cl, %dil
+; CHECK-NEXT: andb %sil, %dil
+; CHECK-NEXT: cmpb $-1, %dil
+; CHECK-NEXT: sete %al
+; CHECK-NEXT: retq
+ %rot = tail call i8 @llvm.fshl.i8(i8 %x, i8 %x, i8 %z)
+ %and = and i8 %rot, %y
+ %r = icmp eq i8 %and, -1
+ ret i1 %r
+}
+
+define i1 @and_rotr_ne_neg_1(i64 %x, i64 %y, i64 %z) nounwind {
+; CHECK-LABEL: and_rotr_ne_neg_1:
+; CHECK: # %bb.0:
+; CHECK-NEXT: movq %rdx, %rcx
+; CHECK-NEXT: # kill: def $cl killed $cl killed $rcx
+; CHECK-NEXT: rorq %cl, %rdi
+; CHECK-NEXT: testq %rdi, %rsi
+; CHECK-NEXT: setne %al
+; CHECK-NEXT: retq
+ %rot = tail call i64 @llvm.fshr.i64(i64 %x, i64 %x, i64 %z)
+ %and = and i64 %y, %rot
+ %r = icmp ne i64 %and, 0
+ ret i1 %r
+}
+
+; negative test - wrong constant
+
+define i1 @or_rotl_ne_neg_1(i32 %x, i32 %y, i32 %z) nounwind {
+; CHECK-LABEL: or_rotl_ne_neg_1:
+; CHECK: # %bb.0:
+; CHECK-NEXT: movl %edx, %ecx
+; CHECK-NEXT: # kill: def $cl killed $cl killed $ecx
+; CHECK-NEXT: roll %cl, %edi
+; CHECK-NEXT: testl %edi, %esi
+; CHECK-NEXT: setne %al
+; CHECK-NEXT: retq
+ %rot = tail call i32 @llvm.fshl.i32(i32 %x, i32 %x, i32 %z)
+ %and = and i32 %y, %rot
+ %r = icmp ne i32 %and, 0
+ ret i1 %r
+}
+
+; negative test - extra use
+
+define i1 @and_rotl_ne_neg_1_use(i32 %x, i32 %y, i32 %z) nounwind {
+; CHECK-LABEL: and_rotl_ne_neg_1_use:
+; CHECK: # %bb.0:
+; CHECK-NEXT: pushq %rbx
+; CHECK-NEXT: movl %edx, %ecx
+; CHECK-NEXT: movl %edi, %ebx
+; CHECK-NEXT: # kill: def $cl killed $cl killed $ecx
+; CHECK-NEXT: roll %cl, %ebx
+; CHECK-NEXT: andl %esi, %ebx
+; CHECK-NEXT: movl %ebx, %edi
+; CHECK-NEXT: callq use32 at PLT
+; CHECK-NEXT: cmpl $-1, %ebx
+; CHECK-NEXT: setne %al
+; CHECK-NEXT: popq %rbx
+; CHECK-NEXT: retq
+ %rot = tail call i32 @llvm.fshl.i32(i32 %x, i32 %x, i32 %z)
+ %and = and i32 %y, %rot
+ call void @use32(i32 %and)
+ %r = icmp ne i32 %and, -1
+ ret i1 %r
+}
+
+define <4 x i1> @and_rotl_ne_eq_neg_1(<4 x i32> %x, <4 x i32> %y) nounwind {
+; CHECK-LABEL: and_rotl_ne_eq_neg_1:
+; CHECK: # %bb.0:
+; CHECK-NEXT: movdqa %xmm1, %xmm2
+; CHECK-NEXT: pslld $23, %xmm2
+; CHECK-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2
+; CHECK-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2
+; CHECK-NEXT: cvttps2dq %xmm2, %xmm2
+; CHECK-NEXT: pshufd {{.*#+}} xmm3 = xmm0[1,1,3,3]
+; CHECK-NEXT: pmuludq %xmm2, %xmm0
+; CHECK-NEXT: pshufd {{.*#+}} xmm4 = xmm0[1,3,2,3]
+; CHECK-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
+; CHECK-NEXT: pmuludq %xmm3, %xmm2
+; CHECK-NEXT: pshufd {{.*#+}} xmm3 = xmm2[1,3,2,3]
+; CHECK-NEXT: punpckldq {{.*#+}} xmm4 = xmm4[0],xmm3[0],xmm4[1],xmm3[1]
+; CHECK-NEXT: pshufd {{.*#+}} xmm3 = xmm0[0,2,2,3]
+; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm2[0,2,2,3]
+; CHECK-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1]
+; CHECK-NEXT: por %xmm4, %xmm3
+; CHECK-NEXT: pand %xmm1, %xmm3
+; CHECK-NEXT: pcmpeqd %xmm0, %xmm0
+; CHECK-NEXT: pcmpeqd %xmm3, %xmm0
+; CHECK-NEXT: retq
+ %rot = tail call <4 x i32> @llvm.fshl.v4i32(<4 x i32>%x, <4 x i32> %x, <4 x i32> %y)
+ %and = and <4 x i32> %y, %rot
+ %r = icmp eq <4 x i32> %and, <i32 -1, i32 -1, i32 -1, i32 poison>
+ ret <4 x i1> %r
+}
+
+define i1 @fshl_or_eq_neg_1(i32 %x, i32 %y) {
+; CHECK-LABEL: fshl_or_eq_neg_1:
+; CHECK: # %bb.0:
+; CHECK-NEXT: andl %edi, %esi
+; CHECK-NEXT: shldl $5, %edi, %esi
+; CHECK-NEXT: cmpl $-1, %esi
+; CHECK-NEXT: sete %al
+; CHECK-NEXT: retq
+ %and = and i32 %x, %y
+ %f = call i32 @llvm.fshl.i32(i32 %and, i32 %x, i32 5)
+ %r = icmp eq i32 %f, -1
+ ret i1 %r
+}
+
+define i1 @fshl_and_commute_eq_neg_1(i32 %x, i32 %y) {
+; CHECK-LABEL: fshl_and_commute_eq_neg_1:
+; CHECK: # %bb.0:
+; CHECK-NEXT: andl %edi, %esi
+; CHECK-NEXT: shldl $5, %edi, %esi
+; CHECK-NEXT: cmpl $-1, %esi
+; CHECK-NEXT: sete %al
+; CHECK-NEXT: retq
+ %and = and i32 %y, %x
+ %f = call i32 @llvm.fshl.i32(i32 %and, i32 %x, i32 5)
+ %r = icmp eq i32 %f, -1
+ ret i1 %r
+}
+
+define <4 x i1> @fshl_and2_eq_neg_1(<4 x i32> %x, <4 x i32> %y) {
+; CHECK-LABEL: fshl_and2_eq_neg_1:
+; CHECK: # %bb.0:
+; CHECK-NEXT: pand %xmm0, %xmm1
+; CHECK-NEXT: psrld $7, %xmm1
+; CHECK-NEXT: pslld $25, %xmm0
+; CHECK-NEXT: por %xmm1, %xmm0
+; CHECK-NEXT: pcmpeqd %xmm1, %xmm1
+; CHECK-NEXT: pcmpeqd %xmm1, %xmm0
+; CHECK-NEXT: retq
+ %and = and <4 x i32> %x, %y
+ %f = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %x, <4 x i32> %and, <4 x i32> <i32 25, i32 25, i32 25, i32 25>)
+ %r = icmp eq <4 x i32> %f, <i32 -1, i32 -1, i32 -1, i32 -1>
+ ret <4 x i1> %r
+}
+
+define <4 x i1> @fshl_and2_commute_eq_neg_1(<4 x i32> %x, <4 x i32> %y) {
+; CHECK-LABEL: fshl_and2_commute_eq_neg_1:
+; CHECK: # %bb.0:
+; CHECK-NEXT: pand %xmm0, %xmm1
+; CHECK-NEXT: psrld $7, %xmm1
+; CHECK-NEXT: pslld $25, %xmm0
+; CHECK-NEXT: por %xmm1, %xmm0
+; CHECK-NEXT: pcmpeqd %xmm1, %xmm1
+; CHECK-NEXT: pcmpeqd %xmm1, %xmm0
+; CHECK-NEXT: retq
+ %and = and <4 x i32> %y, %x
+ %f = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %x, <4 x i32> %and, <4 x i32> <i32 25, i32 25, i32 25, i32 25>)
+ %r = icmp eq <4 x i32> %f, <i32 -1, i32 -1, i32 -1, i32 -1>
+ ret <4 x i1> %r
+}
+
+define i1 @fshr_and_eq_neg_1(i16 %x, i16 %y) {
+; CHECK-LABEL: fshr_and_eq_neg_1:
+; CHECK: # %bb.0:
+; CHECK-NEXT: andl %edi, %esi
+; CHECK-NEXT: shldw $8, %di, %si
+; CHECK-NEXT: cmpw $-1, %si
+; CHECK-NEXT: sete %al
+; CHECK-NEXT: retq
+ %and = and i16 %x, %y
+ %f = call i16 @llvm.fshr.i16(i16 %and, i16 %x, i16 8)
+ %r = icmp eq i16 %f, -1
+ ret i1 %r
+}
+
+define i1 @fshr_and_commute_eq_neg_1(i16 %x, i16 %y) {
+; CHECK-LABEL: fshr_and_commute_eq_neg_1:
+; CHECK: # %bb.0:
+; CHECK-NEXT: andl %edi, %esi
+; CHECK-NEXT: shldw $8, %di, %si
+; CHECK-NEXT: cmpw $-1, %si
+; CHECK-NEXT: sete %al
+; CHECK-NEXT: retq
+ %and = and i16 %y, %x
+ %f = call i16 @llvm.fshr.i16(i16 %and, i16 %x, i16 8)
+ %r = icmp eq i16 %f, -1
+ ret i1 %r
+}
+
+define i1 @fshr_and2_eq_neg_1(i64 %x, i64 %y) {
+; CHECK-LABEL: fshr_and2_eq_neg_1:
+; CHECK: # %bb.0:
+; CHECK-NEXT: andq %rdi, %rsi
+; CHECK-NEXT: shrdq $3, %rdi, %rsi
+; CHECK-NEXT: sete %al
+; CHECK-NEXT: retq
+ %and = and i64 %x, %y
+ %f = call i64 @llvm.fshr.i64(i64 %x, i64 %and, i64 3)
+ %r = icmp eq i64 %f, 0
+ ret i1 %r
+}
+
+define i1 @fshr_and2_commute_eq_neg_1(i64 %x, i64 %y) {
+; CHECK-LABEL: fshr_and2_commute_eq_neg_1:
+; CHECK: # %bb.0:
+; CHECK-NEXT: andq %rdi, %rsi
+; CHECK-NEXT: shrdq $3, %rdi, %rsi
+; CHECK-NEXT: cmpq $-1, %rsi
+; CHECK-NEXT: sete %al
+; CHECK-NEXT: retq
+ %and = and i64 %y, %x
+ %f = call i64 @llvm.fshr.i64(i64 %x, i64 %and, i64 3)
+ %r = icmp eq i64 %f, -1
+ ret i1 %r
+}
+
+define i1 @fshl_and_ne_neg_1(i32 %x, i32 %y) {
+; CHECK-LABEL: fshl_and_ne_neg_1:
+; CHECK: # %bb.0:
+; CHECK-NEXT: andl %edi, %esi
+; CHECK-NEXT: shldl $7, %edi, %esi
+; CHECK-NEXT: cmpl $-1, %esi
+; CHECK-NEXT: setne %al
+; CHECK-NEXT: retq
+ %and = and i32 %x, %y
+ %f = call i32 @llvm.fshl.i32(i32 %and, i32 %x, i32 7)
+ %r = icmp ne i32 %f, -1
+ ret i1 %r
+}
+
+define i1 @fshl_and_commute_ne_neg_1(i32 %x, i32 %y) {
+; CHECK-LABEL: fshl_and_commute_ne_neg_1:
+; CHECK: # %bb.0:
+; CHECK-NEXT: andl %edi, %esi
+; CHECK-NEXT: shldl $7, %edi, %esi
+; CHECK-NEXT: cmpl $-1, %esi
+; CHECK-NEXT: setne %al
+; CHECK-NEXT: retq
+ %and = and i32 %y, %x
+ %f = call i32 @llvm.fshl.i32(i32 %and, i32 %x, i32 7)
+ %r = icmp ne i32 %f, -1
+ ret i1 %r
+}
+
+define <4 x i1> @fshl_and2_ne_neg_1(<4 x i32> %x, <4 x i32> %y) {
+; CHECK-LABEL: fshl_and2_ne_neg_1:
+; CHECK: # %bb.0:
+; CHECK-NEXT: pand %xmm0, %xmm1
+; CHECK-NEXT: psrld $27, %xmm1
+; CHECK-NEXT: pslld $5, %xmm0
+; CHECK-NEXT: por %xmm1, %xmm0
+; CHECK-NEXT: pcmpeqd %xmm1, %xmm1
+; CHECK-NEXT: pcmpeqd %xmm1, %xmm0
+; CHECK-NEXT: pxor %xmm1, %xmm0
+; CHECK-NEXT: retq
+ %and = and <4 x i32> %x, %y
+ %f = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %x, <4 x i32> %and, <4 x i32> <i32 5, i32 5, i32 5, i32 5>)
+ %r = icmp ne <4 x i32> %f, <i32 -1, i32 -1, i32 -1, i32 -1>
+ ret <4 x i1> %r
+}
+
+define <4 x i1> @fshl_and2_commute_ne_neg_1(<4 x i32> %x, <4 x i32> %y) {
+; CHECK-LABEL: fshl_and2_commute_ne_neg_1:
+; CHECK: # %bb.0:
+; CHECK-NEXT: pand %xmm0, %xmm1
+; CHECK-NEXT: psrld $27, %xmm1
+; CHECK-NEXT: pslld $5, %xmm0
+; CHECK-NEXT: por %xmm1, %xmm0
+; CHECK-NEXT: pcmpeqd %xmm1, %xmm1
+; CHECK-NEXT: pcmpeqd %xmm1, %xmm0
+; CHECK-NEXT: pxor %xmm1, %xmm0
+; CHECK-NEXT: retq
+ %and = and <4 x i32> %y, %x
+ %f = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %x, <4 x i32> %and, <4 x i32> <i32 5, i32 5, i32 5, i32 5>)
+ %r = icmp ne <4 x i32> %f, <i32 -1, i32 -1, i32 -1, i32 -1>
+ ret <4 x i1> %r
+}
+
+define i1 @fshr_and_ne_neg_1(i64 %x, i64 %y) {
+; CHECK-LABEL: fshr_and_ne_neg_1:
+; CHECK: # %bb.0:
+; CHECK-NEXT: andl %edi, %esi
+; CHECK-NEXT: shldq $63, %rdi, %rsi
+; CHECK-NEXT: cmpq $-1, %rsi
+; CHECK-NEXT: setne %al
+; CHECK-NEXT: retq
+ %and = and i64 %x, %y
+ %f = call i64 @llvm.fshr.i64(i64 %and, i64 %x, i64 1)
+ %r = icmp ne i64 %f, -1
+ ret i1 %r
+}
+
+define i1 @fshr_and_commute_ne_neg_1(i64 %x, i64 %y) {
+; CHECK-LABEL: fshr_and_commute_ne_neg_1:
+; CHECK: # %bb.0:
+; CHECK-NEXT: andl %edi, %esi
+; CHECK-NEXT: shldq $63, %rdi, %rsi
+; CHECK-NEXT: cmpq $-1, %rsi
+; CHECK-NEXT: setne %al
+; CHECK-NEXT: retq
+ %and = and i64 %y, %x
+ %f = call i64 @llvm.fshr.i64(i64 %and, i64 %x, i64 1)
+ %r = icmp ne i64 %f, -1
+ ret i1 %r
+}
+
+define i1 @fshr_and2_ne_neg_1(i16 %x, i16 %y) {
+; CHECK-LABEL: fshr_and2_ne_neg_1:
+; CHECK: # %bb.0:
+; CHECK-NEXT: andl %edi, %esi
+; CHECK-NEXT: shrdw $2, %di, %si
+; CHECK-NEXT: cmpw $-1, %si
+; CHECK-NEXT: setne %al
+; CHECK-NEXT: retq
+ %and = and i16 %x, %y
+ %f = call i16 @llvm.fshr.i16(i16 %x, i16 %and, i16 2)
+ %r = icmp ne i16 %f, -1
+ ret i1 %r
+}
+
+define i1 @fshr_and2_commute_ne_neg_1(i16 %x, i16 %y) {
+; CHECK-LABEL: fshr_and2_commute_ne_neg_1:
+; CHECK: # %bb.0:
+; CHECK-NEXT: andl %edi, %esi
+; CHECK-NEXT: shrdw $2, %di, %si
+; CHECK-NEXT: cmpw $-1, %si
+; CHECK-NEXT: setne %al
+; CHECK-NEXT: retq
+ %and = and i16 %y, %x
+ %f = call i16 @llvm.fshr.i16(i16 %x, i16 %and, i16 2)
+ %r = icmp ne i16 %f, -1
+ ret i1 %r
+}
+
+define i1 @fshl_and_ne_neg_1_2(i32 %x, i32 %y) {
+; CHECK-LABEL: fshl_and_ne_neg_1_2:
+; CHECK: # %bb.0:
+; CHECK-NEXT: andl %edi, %esi
+; CHECK-NEXT: shldl $2, %edi, %esi
+; CHECK-NEXT: cmpl $2, %esi
+; CHECK-NEXT: setne %al
+; CHECK-NEXT: retq
+ %and = and i32 %x, %y
+ %f = call i32 @llvm.fshl.i32(i32 %and, i32 %x, i32 2)
+ %r = icmp ne i32 %f, 2
+ ret i1 %r
+}
+
>From b892ec441af046fe4fee43323aae7b6c6497424d Mon Sep 17 00:00:00 2001
From: Rose <gfunni234 at gmail.com>
Date: Fri, 4 Jul 2025 10:31:10 -0400
Subject: [PATCH 2/2] [TargetLowering] Add and (rot X, Y), Z ==/!= -1 --> (and
X, Z) ==/!= -1 to foldSetCCWithRotate
---
.../CodeGen/SelectionDAG/TargetLowering.cpp | 16 ++++++++++-
llvm/test/CodeGen/X86/setcc-fsh.ll | 27 +++----------------
2 files changed, 19 insertions(+), 24 deletions(-)
diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index 000f8cc6786a5..0d62533f9218d 100644
--- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -4567,7 +4567,6 @@ static SDValue foldSetCCWithRotate(EVT VT, SDValue N0, SDValue N1,
// or (rot X, Y), Z ==/!= 0 --> (or X, Z) ==/!= 0
// or Z, (rot X, Y) ==/!= 0 --> (or X, Z) ==/!= 0
//
- // TODO: Add the 'and' with -1 sibling.
// TODO: Recurse through a series of 'or' ops to find the rotate.
EVT OpVT = N0.getValueType();
if (N0.hasOneUse() && N0.getOpcode() == ISD::OR && C1->isZero()) {
@@ -4581,6 +4580,21 @@ static SDValue foldSetCCWithRotate(EVT VT, SDValue N0, SDValue N1,
}
}
+ // and (rot X, Y), Z ==/!= -1 --> (and X, Z) ==/!= -1
+ // and Z, (rot X, Y) ==/!= -1 --> (and X, Z) ==/!= -1
+ //
+ // TODO: Recurse through a series of 'and' ops to find the rotate.
+ if (N0.hasOneUse() && N0.getOpcode() == ISD::AND && C1->isAllOnes()) {
+ if (SDValue R = getRotateSource(N0.getOperand(0))) {
+ SDValue NewAnd = DAG.getNode(ISD::AND, dl, OpVT, R, N0.getOperand(1));
+ return DAG.getSetCC(dl, VT, NewAnd, N1, Cond);
+ }
+ if (SDValue R = getRotateSource(N0.getOperand(1))) {
+ SDValue NewAnd = DAG.getNode(ISD::AND, dl, OpVT, R, N0.getOperand(0));
+ return DAG.getSetCC(dl, VT, NewAnd, N1, Cond);
+ }
+ }
+
return SDValue();
}
diff --git a/llvm/test/CodeGen/X86/setcc-fsh.ll b/llvm/test/CodeGen/X86/setcc-fsh.ll
index 4ac0f242775f4..c9d661fee3863 100644
--- a/llvm/test/CodeGen/X86/setcc-fsh.ll
+++ b/llvm/test/CodeGen/X86/setcc-fsh.ll
@@ -524,10 +524,7 @@ define i1 @fshl_or_ne_2(i32 %x, i32 %y) {
define i1 @and_rotl_eq_neg_1(i8 %x, i8 %y, i8 %z) nounwind {
; CHECK-LABEL: and_rotl_eq_neg_1:
; CHECK: # %bb.0:
-; CHECK-NEXT: movl %edx, %ecx
-; CHECK-NEXT: # kill: def $cl killed $cl killed $ecx
-; CHECK-NEXT: rolb %cl, %dil
-; CHECK-NEXT: andb %sil, %dil
+; CHECK-NEXT: andl %esi, %edi
; CHECK-NEXT: cmpb $-1, %dil
; CHECK-NEXT: sete %al
; CHECK-NEXT: retq
@@ -596,25 +593,9 @@ define i1 @and_rotl_ne_neg_1_use(i32 %x, i32 %y, i32 %z) nounwind {
define <4 x i1> @and_rotl_ne_eq_neg_1(<4 x i32> %x, <4 x i32> %y) nounwind {
; CHECK-LABEL: and_rotl_ne_eq_neg_1:
; CHECK: # %bb.0:
-; CHECK-NEXT: movdqa %xmm1, %xmm2
-; CHECK-NEXT: pslld $23, %xmm2
-; CHECK-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2
-; CHECK-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2
-; CHECK-NEXT: cvttps2dq %xmm2, %xmm2
-; CHECK-NEXT: pshufd {{.*#+}} xmm3 = xmm0[1,1,3,3]
-; CHECK-NEXT: pmuludq %xmm2, %xmm0
-; CHECK-NEXT: pshufd {{.*#+}} xmm4 = xmm0[1,3,2,3]
-; CHECK-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
-; CHECK-NEXT: pmuludq %xmm3, %xmm2
-; CHECK-NEXT: pshufd {{.*#+}} xmm3 = xmm2[1,3,2,3]
-; CHECK-NEXT: punpckldq {{.*#+}} xmm4 = xmm4[0],xmm3[0],xmm4[1],xmm3[1]
-; CHECK-NEXT: pshufd {{.*#+}} xmm3 = xmm0[0,2,2,3]
-; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm2[0,2,2,3]
-; CHECK-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1]
-; CHECK-NEXT: por %xmm4, %xmm3
-; CHECK-NEXT: pand %xmm1, %xmm3
-; CHECK-NEXT: pcmpeqd %xmm0, %xmm0
-; CHECK-NEXT: pcmpeqd %xmm3, %xmm0
+; CHECK-NEXT: pcmpeqd %xmm2, %xmm2
+; CHECK-NEXT: pand %xmm1, %xmm0
+; CHECK-NEXT: pcmpeqd %xmm2, %xmm0
; CHECK-NEXT: retq
%rot = tail call <4 x i32> @llvm.fshl.v4i32(<4 x i32>%x, <4 x i32> %x, <4 x i32> %y)
%and = and <4 x i32> %y, %rot
More information about the llvm-commits
mailing list