[llvm] [X86] Pre-SSE42 v2i64 sgt lowering - check if representable as v2i32 (PR #83560)
via llvm-commits
llvm-commits at lists.llvm.org
Fri Mar 1 04:07:53 PST 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-x86
Author: Simon Pilgrim (RKSimon)
<details>
<summary>Changes</summary>
Without PCMPGTQ, if the i64 elements are sign-extended enough to be representable as i32 then we can compare the lower i32 bits with PCMPGTD and splat the results into the upper elements.
Value tracking has meant we already get pretty close with this, but this allows us to remove a lot of unnecessary bit flipping.
---
Patch is 194.40 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/83560.diff
2 Files Affected:
- (modified) llvm/lib/Target/X86/X86ISelLowering.cpp (+14)
- (modified) llvm/test/CodeGen/X86/vector-popcnt-128-ult-ugt.ll (+244-732)
``````````diff
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index b807a97d6e4851..0162bb65afe3b0 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -23422,6 +23422,20 @@ static SDValue LowerVSETCC(SDValue Op, const X86Subtarget &Subtarget,
return DAG.getBitcast(VT, Result);
}
+ // If the i64 elements are sign-extended enough to be representable as i32
+ // then we can compare the lower i32 bits and splat.
+ if (!FlipSigns && !Invert && DAG.ComputeNumSignBits(Op0) > 32 &&
+ DAG.ComputeNumSignBits(Op1) > 32) {
+ Op0 = DAG.getBitcast(MVT::v4i32, Op0);
+ Op1 = DAG.getBitcast(MVT::v4i32, Op1);
+
+ SDValue GT = DAG.getNode(X86ISD::PCMPGT, dl, MVT::v4i32, Op0, Op1);
+ static const int MaskLo[] = {0, 0, 2, 2};
+ SDValue Result = DAG.getVectorShuffle(MVT::v4i32, dl, GT, GT, MaskLo);
+
+ return DAG.getBitcast(VT, Result);
+ }
+
// Since SSE has no unsigned integer comparisons, we need to flip the sign
// bits of the inputs before performing those operations. The lower
// compare is always unsigned.
diff --git a/llvm/test/CodeGen/X86/vector-popcnt-128-ult-ugt.ll b/llvm/test/CodeGen/X86/vector-popcnt-128-ult-ugt.ll
index 8aafec7427b4f3..c3d5a4b32edbc7 100644
--- a/llvm/test/CodeGen/X86/vector-popcnt-128-ult-ugt.ll
+++ b/llvm/test/CodeGen/X86/vector-popcnt-128-ult-ugt.ll
@@ -16972,7 +16972,6 @@ define <2 x i64> @ugt_2_v2i64(<2 x i64> %0) {
; SSE2-NEXT: pxor %xmm0, %xmm0
; SSE2-NEXT: psadbw %xmm1, %xmm0
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,2,2]
-; SSE2-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; SSE2-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; SSE2-NEXT: retq
;
@@ -16995,7 +16994,6 @@ define <2 x i64> @ugt_2_v2i64(<2 x i64> %0) {
; SSE3-NEXT: pxor %xmm0, %xmm0
; SSE3-NEXT: psadbw %xmm1, %xmm0
; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,2,2]
-; SSE3-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; SSE3-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; SSE3-NEXT: retq
;
@@ -17014,7 +17012,6 @@ define <2 x i64> @ugt_2_v2i64(<2 x i64> %0) {
; SSSE3-NEXT: pxor %xmm0, %xmm0
; SSSE3-NEXT: psadbw %xmm3, %xmm0
; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,2,2]
-; SSSE3-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; SSSE3-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; SSSE3-NEXT: retq
;
@@ -17033,7 +17030,6 @@ define <2 x i64> @ugt_2_v2i64(<2 x i64> %0) {
; SSE41-NEXT: pxor %xmm0, %xmm0
; SSE41-NEXT: psadbw %xmm3, %xmm0
; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,2,2]
-; SSE41-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; SSE41-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; SSE41-NEXT: retq
;
@@ -17126,8 +17122,7 @@ define <2 x i64> @ult_3_v2i64(<2 x i64> %0) {
; SSE2-NEXT: pxor %xmm0, %xmm0
; SSE2-NEXT: psadbw %xmm1, %xmm0
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
-; SSE2-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
-; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [2147483651,2147483651,2147483651,2147483651]
+; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [3,3,3,3]
; SSE2-NEXT: pcmpgtd %xmm1, %xmm0
; SSE2-NEXT: retq
;
@@ -17150,8 +17145,7 @@ define <2 x i64> @ult_3_v2i64(<2 x i64> %0) {
; SSE3-NEXT: pxor %xmm0, %xmm0
; SSE3-NEXT: psadbw %xmm1, %xmm0
; SSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
-; SSE3-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
-; SSE3-NEXT: movdqa {{.*#+}} xmm0 = [2147483651,2147483651,2147483651,2147483651]
+; SSE3-NEXT: movdqa {{.*#+}} xmm0 = [3,3,3,3]
; SSE3-NEXT: pcmpgtd %xmm1, %xmm0
; SSE3-NEXT: retq
;
@@ -17170,8 +17164,7 @@ define <2 x i64> @ult_3_v2i64(<2 x i64> %0) {
; SSSE3-NEXT: pxor %xmm0, %xmm0
; SSSE3-NEXT: psadbw %xmm3, %xmm0
; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
-; SSSE3-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
-; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [2147483651,2147483651,2147483651,2147483651]
+; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [3,3,3,3]
; SSSE3-NEXT: pcmpgtd %xmm1, %xmm0
; SSSE3-NEXT: retq
;
@@ -17190,8 +17183,7 @@ define <2 x i64> @ult_3_v2i64(<2 x i64> %0) {
; SSE41-NEXT: pxor %xmm0, %xmm0
; SSE41-NEXT: psadbw %xmm3, %xmm0
; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
-; SSE41-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
-; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [2147483651,2147483651,2147483651,2147483651]
+; SSE41-NEXT: pmovsxbd {{.*#+}} xmm0 = [3,3,3,3]
; SSE41-NEXT: pcmpgtd %xmm1, %xmm0
; SSE41-NEXT: retq
;
@@ -17288,7 +17280,6 @@ define <2 x i64> @ugt_3_v2i64(<2 x i64> %0) {
; SSE2-NEXT: pxor %xmm0, %xmm0
; SSE2-NEXT: psadbw %xmm1, %xmm0
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,2,2]
-; SSE2-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; SSE2-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; SSE2-NEXT: retq
;
@@ -17311,7 +17302,6 @@ define <2 x i64> @ugt_3_v2i64(<2 x i64> %0) {
; SSE3-NEXT: pxor %xmm0, %xmm0
; SSE3-NEXT: psadbw %xmm1, %xmm0
; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,2,2]
-; SSE3-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; SSE3-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; SSE3-NEXT: retq
;
@@ -17330,7 +17320,6 @@ define <2 x i64> @ugt_3_v2i64(<2 x i64> %0) {
; SSSE3-NEXT: pxor %xmm0, %xmm0
; SSSE3-NEXT: psadbw %xmm3, %xmm0
; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,2,2]
-; SSSE3-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; SSSE3-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; SSSE3-NEXT: retq
;
@@ -17349,7 +17338,6 @@ define <2 x i64> @ugt_3_v2i64(<2 x i64> %0) {
; SSE41-NEXT: pxor %xmm0, %xmm0
; SSE41-NEXT: psadbw %xmm3, %xmm0
; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,2,2]
-; SSE41-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; SSE41-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; SSE41-NEXT: retq
;
@@ -17442,8 +17430,7 @@ define <2 x i64> @ult_4_v2i64(<2 x i64> %0) {
; SSE2-NEXT: pxor %xmm0, %xmm0
; SSE2-NEXT: psadbw %xmm1, %xmm0
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
-; SSE2-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
-; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [2147483652,2147483652,2147483652,2147483652]
+; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [4,4,4,4]
; SSE2-NEXT: pcmpgtd %xmm1, %xmm0
; SSE2-NEXT: retq
;
@@ -17466,8 +17453,7 @@ define <2 x i64> @ult_4_v2i64(<2 x i64> %0) {
; SSE3-NEXT: pxor %xmm0, %xmm0
; SSE3-NEXT: psadbw %xmm1, %xmm0
; SSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
-; SSE3-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
-; SSE3-NEXT: movdqa {{.*#+}} xmm0 = [2147483652,2147483652,2147483652,2147483652]
+; SSE3-NEXT: movdqa {{.*#+}} xmm0 = [4,4,4,4]
; SSE3-NEXT: pcmpgtd %xmm1, %xmm0
; SSE3-NEXT: retq
;
@@ -17486,8 +17472,7 @@ define <2 x i64> @ult_4_v2i64(<2 x i64> %0) {
; SSSE3-NEXT: pxor %xmm0, %xmm0
; SSSE3-NEXT: psadbw %xmm3, %xmm0
; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
-; SSSE3-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
-; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [2147483652,2147483652,2147483652,2147483652]
+; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [4,4,4,4]
; SSSE3-NEXT: pcmpgtd %xmm1, %xmm0
; SSSE3-NEXT: retq
;
@@ -17506,8 +17491,7 @@ define <2 x i64> @ult_4_v2i64(<2 x i64> %0) {
; SSE41-NEXT: pxor %xmm0, %xmm0
; SSE41-NEXT: psadbw %xmm3, %xmm0
; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
-; SSE41-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
-; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [2147483652,2147483652,2147483652,2147483652]
+; SSE41-NEXT: pmovsxbd {{.*#+}} xmm0 = [4,4,4,4]
; SSE41-NEXT: pcmpgtd %xmm1, %xmm0
; SSE41-NEXT: retq
;
@@ -17604,7 +17588,6 @@ define <2 x i64> @ugt_4_v2i64(<2 x i64> %0) {
; SSE2-NEXT: pxor %xmm0, %xmm0
; SSE2-NEXT: psadbw %xmm1, %xmm0
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,2,2]
-; SSE2-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; SSE2-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; SSE2-NEXT: retq
;
@@ -17627,7 +17610,6 @@ define <2 x i64> @ugt_4_v2i64(<2 x i64> %0) {
; SSE3-NEXT: pxor %xmm0, %xmm0
; SSE3-NEXT: psadbw %xmm1, %xmm0
; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,2,2]
-; SSE3-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; SSE3-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; SSE3-NEXT: retq
;
@@ -17646,7 +17628,6 @@ define <2 x i64> @ugt_4_v2i64(<2 x i64> %0) {
; SSSE3-NEXT: pxor %xmm0, %xmm0
; SSSE3-NEXT: psadbw %xmm3, %xmm0
; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,2,2]
-; SSSE3-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; SSSE3-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; SSSE3-NEXT: retq
;
@@ -17665,7 +17646,6 @@ define <2 x i64> @ugt_4_v2i64(<2 x i64> %0) {
; SSE41-NEXT: pxor %xmm0, %xmm0
; SSE41-NEXT: psadbw %xmm3, %xmm0
; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,2,2]
-; SSE41-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; SSE41-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; SSE41-NEXT: retq
;
@@ -17758,8 +17738,7 @@ define <2 x i64> @ult_5_v2i64(<2 x i64> %0) {
; SSE2-NEXT: pxor %xmm0, %xmm0
; SSE2-NEXT: psadbw %xmm1, %xmm0
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
-; SSE2-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
-; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [2147483653,2147483653,2147483653,2147483653]
+; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [5,5,5,5]
; SSE2-NEXT: pcmpgtd %xmm1, %xmm0
; SSE2-NEXT: retq
;
@@ -17782,8 +17761,7 @@ define <2 x i64> @ult_5_v2i64(<2 x i64> %0) {
; SSE3-NEXT: pxor %xmm0, %xmm0
; SSE3-NEXT: psadbw %xmm1, %xmm0
; SSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
-; SSE3-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
-; SSE3-NEXT: movdqa {{.*#+}} xmm0 = [2147483653,2147483653,2147483653,2147483653]
+; SSE3-NEXT: movdqa {{.*#+}} xmm0 = [5,5,5,5]
; SSE3-NEXT: pcmpgtd %xmm1, %xmm0
; SSE3-NEXT: retq
;
@@ -17802,8 +17780,7 @@ define <2 x i64> @ult_5_v2i64(<2 x i64> %0) {
; SSSE3-NEXT: pxor %xmm0, %xmm0
; SSSE3-NEXT: psadbw %xmm3, %xmm0
; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
-; SSSE3-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
-; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [2147483653,2147483653,2147483653,2147483653]
+; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [5,5,5,5]
; SSSE3-NEXT: pcmpgtd %xmm1, %xmm0
; SSSE3-NEXT: retq
;
@@ -17822,8 +17799,7 @@ define <2 x i64> @ult_5_v2i64(<2 x i64> %0) {
; SSE41-NEXT: pxor %xmm0, %xmm0
; SSE41-NEXT: psadbw %xmm3, %xmm0
; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
-; SSE41-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
-; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [2147483653,2147483653,2147483653,2147483653]
+; SSE41-NEXT: pmovsxbd {{.*#+}} xmm0 = [5,5,5,5]
; SSE41-NEXT: pcmpgtd %xmm1, %xmm0
; SSE41-NEXT: retq
;
@@ -17920,7 +17896,6 @@ define <2 x i64> @ugt_5_v2i64(<2 x i64> %0) {
; SSE2-NEXT: pxor %xmm0, %xmm0
; SSE2-NEXT: psadbw %xmm1, %xmm0
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,2,2]
-; SSE2-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; SSE2-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; SSE2-NEXT: retq
;
@@ -17943,7 +17918,6 @@ define <2 x i64> @ugt_5_v2i64(<2 x i64> %0) {
; SSE3-NEXT: pxor %xmm0, %xmm0
; SSE3-NEXT: psadbw %xmm1, %xmm0
; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,2,2]
-; SSE3-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; SSE3-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; SSE3-NEXT: retq
;
@@ -17962,7 +17936,6 @@ define <2 x i64> @ugt_5_v2i64(<2 x i64> %0) {
; SSSE3-NEXT: pxor %xmm0, %xmm0
; SSSE3-NEXT: psadbw %xmm3, %xmm0
; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,2,2]
-; SSSE3-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; SSSE3-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; SSSE3-NEXT: retq
;
@@ -17981,7 +17954,6 @@ define <2 x i64> @ugt_5_v2i64(<2 x i64> %0) {
; SSE41-NEXT: pxor %xmm0, %xmm0
; SSE41-NEXT: psadbw %xmm3, %xmm0
; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,2,2]
-; SSE41-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; SSE41-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; SSE41-NEXT: retq
;
@@ -18074,8 +18046,7 @@ define <2 x i64> @ult_6_v2i64(<2 x i64> %0) {
; SSE2-NEXT: pxor %xmm0, %xmm0
; SSE2-NEXT: psadbw %xmm1, %xmm0
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
-; SSE2-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
-; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [2147483654,2147483654,2147483654,2147483654]
+; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [6,6,6,6]
; SSE2-NEXT: pcmpgtd %xmm1, %xmm0
; SSE2-NEXT: retq
;
@@ -18098,8 +18069,7 @@ define <2 x i64> @ult_6_v2i64(<2 x i64> %0) {
; SSE3-NEXT: pxor %xmm0, %xmm0
; SSE3-NEXT: psadbw %xmm1, %xmm0
; SSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
-; SSE3-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
-; SSE3-NEXT: movdqa {{.*#+}} xmm0 = [2147483654,2147483654,2147483654,2147483654]
+; SSE3-NEXT: movdqa {{.*#+}} xmm0 = [6,6,6,6]
; SSE3-NEXT: pcmpgtd %xmm1, %xmm0
; SSE3-NEXT: retq
;
@@ -18118,8 +18088,7 @@ define <2 x i64> @ult_6_v2i64(<2 x i64> %0) {
; SSSE3-NEXT: pxor %xmm0, %xmm0
; SSSE3-NEXT: psadbw %xmm3, %xmm0
; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
-; SSSE3-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
-; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [2147483654,2147483654,2147483654,2147483654]
+; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [6,6,6,6]
; SSSE3-NEXT: pcmpgtd %xmm1, %xmm0
; SSSE3-NEXT: retq
;
@@ -18138,8 +18107,7 @@ define <2 x i64> @ult_6_v2i64(<2 x i64> %0) {
; SSE41-NEXT: pxor %xmm0, %xmm0
; SSE41-NEXT: psadbw %xmm3, %xmm0
; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
-; SSE41-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
-; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [2147483654,2147483654,2147483654,2147483654]
+; SSE41-NEXT: pmovsxbd {{.*#+}} xmm0 = [6,6,6,6]
; SSE41-NEXT: pcmpgtd %xmm1, %xmm0
; SSE41-NEXT: retq
;
@@ -18236,7 +18204,6 @@ define <2 x i64> @ugt_6_v2i64(<2 x i64> %0) {
; SSE2-NEXT: pxor %xmm0, %xmm0
; SSE2-NEXT: psadbw %xmm1, %xmm0
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,2,2]
-; SSE2-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; SSE2-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; SSE2-NEXT: retq
;
@@ -18259,7 +18226,6 @@ define <2 x i64> @ugt_6_v2i64(<2 x i64> %0) {
; SSE3-NEXT: pxor %xmm0, %xmm0
; SSE3-NEXT: psadbw %xmm1, %xmm0
; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,2,2]
-; SSE3-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; SSE3-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; SSE3-NEXT: retq
;
@@ -18278,7 +18244,6 @@ define <2 x i64> @ugt_6_v2i64(<2 x i64> %0) {
; SSSE3-NEXT: pxor %xmm0, %xmm0
; SSSE3-NEXT: psadbw %xmm3, %xmm0
; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,2,2]
-; SSSE3-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; SSSE3-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; SSSE3-NEXT: retq
;
@@ -18297,7 +18262,6 @@ define <2 x i64> @ugt_6_v2i64(<2 x i64> %0) {
; SSE41-NEXT: pxor %xmm0, %xmm0
; SSE41-NEXT: psadbw %xmm3, %xmm0
; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,2,2]
-; SSE41-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; SSE41-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; SSE41-NEXT: retq
;
@@ -18390,8 +18354,7 @@ define <2 x i64> @ult_7_v2i64(<2 x i64> %0) {
; SSE2-NEXT: pxor %xmm0, %xmm0
; SSE2-NEXT: psadbw %xmm1, %xmm0
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
-; SSE2-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
-; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [2147483655,2147483655,2147483655,2147483655]
+; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [7,7,7,7]
; SSE2-NEXT: pcmpgtd %xmm1, %xmm0
; SSE2-NEXT: retq
;
@@ -18414,8 +18377,7 @@ define <2 x i64> @ult_7_v2i64(<2 x i64> %0) {
; SSE3-NEXT: pxor %xmm0, %xmm0
; SSE3-NEXT: psadbw %xmm1, %xmm0
; SSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
-; SSE3-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
-; SSE3-NEXT: movdqa {{.*#+}} xmm0 = [2147483655,2147483655,2147483655,2147483655]
+; SSE3-NEXT: movdqa {{.*#+}} xmm0 = [7,7,7,7]
; SSE3-NEXT: pcmpgtd %xmm1, %xmm0
; SSE3-NEXT: retq
;
@@ -18434,8 +18396,7 @@ define <2 x i64> @ult_7_v2i64(<2 x i64> %0) {
; SSSE3-NEXT: pxor %xmm0, %xmm0
; SSSE3-NEXT: psadbw %xmm3, %xmm0
; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
-; SSSE3-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
-; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [2147483655,2147483655,2147483655,2147483655]
+; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [7,7,7,7]
; SSSE3-NEXT: pcmpgtd %xmm1, %xmm0
; SSSE3-NEXT: retq
;
@@ -18454,8 +18415,7 @@ define <2 x i64> @ult_7_v2i64(<2 x i64> %0) {
; SSE41-NEXT: pxor %xmm0, %xmm0
; SSE41-NEXT: psadbw %xmm3, %xmm0
; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
-; SSE41-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
-; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [2147483655,2147483655,2147483655,2147483655]
+; SSE41-NEXT: pmovsxbd {{.*#+}} xmm0 = [7,7,7,7]
; SSE41-NEXT: pcmpgtd %xmm1, %xmm0
; SSE41-NEXT: retq
;
@@ -18552,7 +18512,6 @@ define <2 x i64> @ugt_7_v2i64(<2 x i64> %0) {
; SSE2-NEXT: pxor %xmm0, %xmm0
; SSE2-NEXT: psadbw %xmm1, %xmm0
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,2,2]
-; SSE2-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; SSE2-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; SSE2-NEXT: retq
;
@@ -18575,7 +18534,6 @@ define <2 x i64> @ugt_7_v2i64(<2 x i64> %0) {
; SSE3-NEXT: pxor %xmm0, %xmm0
; SSE3-NEXT: psadbw %xmm1, %xmm0
; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,2,2]
-; SSE3-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; SSE3-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; SSE3-NEXT: retq
;
@@ -18594,7 +18552,6 @@ define <2 x i64> @ugt_7_v2i64(<2 x i64> %0) {
; SSSE3-NEXT: pxor %xmm0, %xmm0
; SSSE3-NEXT: psadbw %xmm3, %xmm0
; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,2,2]
-; SSSE3-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; SSSE3-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; SSSE3-NEXT: retq
;
@@ -18613,7 +18570,6 @@ define <2 x i64> @ugt_7_v2i64(<2 x i64> %0) {
; SSE41-NEXT: pxor %xmm0, %xmm0
; SSE41-NEXT: psadbw %xmm3, %xmm0
; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,2,2]
-; SSE41-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; SSE41-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; SSE41-NEXT: retq
;
@@ -18706,8 +18662,7 @@ define <2 x i64> @ult_8_v2i64(<2 x i64> %0) {
; SSE2-NEXT: pxor %xmm0, %xmm0
; SSE2-NEXT: psadbw %xmm1, %xmm0
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
-; SSE2-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
-; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [2147483656,2147483656,2147483656,2147483656]
+; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [8,8,8,8]
; SSE2-NEXT: pcmpgtd %xmm1, %xmm0
; SSE2-NEXT: retq
;
@@ -18730,8 +18685,7 @@ define <2 x i64> @ult_8_v2i64(<2 x i64> %0) {
; SSE3-NEXT: pxor %xmm0, %xmm0
; SSE3-NEXT: psadbw %xmm1, %xmm0
; SSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
-; SSE3-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
-; SSE3-NEXT: movdqa {{.*#+}} xmm0 = [2147483656,2147483656,2147483656,2147483656]
+; SSE3-NEXT: movdqa {{.*#+}} xmm0 = [8,8,8,8]
; SSE3-NEXT: pcmpgtd %xmm1, %xmm0
; SSE3-NEXT: retq
;
@@ -18750,8 +18704,7 @@ define <2 x i64> @ult_8_v2i64(<2 x i64> %0) {
; SSSE3-NEXT: pxor %xmm0, %xmm0
; SSSE3-NEXT: psadbw %xmm3, %xmm0
; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
-; SSSE3-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
-; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [2147483656,2147483656,2147483656,2147483656]
+; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [8,8,8,8]
; SSSE3-NEXT: pcmpgtd %xmm1, %xmm0
; SSSE3-NEXT: retq
;
@@ -18770,8 +18723,7 @@ define <2 x i64> @ult_8_v2i64(<2 x i64> %0) {
; SSE41-NEXT: pxor %xmm0, %xmm0
; SSE41-NEXT: psadbw %xmm3, %xmm0
; SSE41-NEXT: ...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/83560
More information about the llvm-commits
mailing list