[llvm] [PowerPC] replace vector compare equal to with vector compare greater than (PR #150422)

via llvm-commits llvm-commits at lists.llvm.org
Thu Aug 28 21:15:21 PDT 2025


https://github.com/Himadhith updated https://github.com/llvm/llvm-project/pull/150422

>From 9c6127ae6269f4fa287eeee159e73d7a9df7fa85 Mon Sep 17 00:00:00 2001
From: himadhith <himadhith.v at ibm.com>
Date: Thu, 24 Jul 2025 13:40:10 +0000
Subject: [PATCH] [PowerPC] vector compare greater than support for Zero vector
 comparisons

vector compare greater than support for Zero vector comparisons
---
 llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp   |  11 ++
 .../test/CodeGen/PowerPC/check-zero-vector.ll | 120 +++++++++---------
 llvm/test/CodeGen/PowerPC/pr61315.ll          |  61 +--------
 llvm/test/CodeGen/PowerPC/recipest.ll         |   2 +-
 llvm/test/CodeGen/PowerPC/setcc-logic.ll      |   3 +-
 .../PowerPC/vector-popcnt-128-ult-ugt.ll      |  30 ++---
 6 files changed, 85 insertions(+), 142 deletions(-)

diff --git a/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
index 415164fc9e2cb..e10890087bf06 100644
--- a/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
@@ -4570,6 +4570,17 @@ bool PPCDAGToDAGISel::trySETCC(SDNode *N) {
     if (Subtarget->hasSPE())
       return false;
 
+    // Optimise 'Not equal to zero-vector' comparisons using 'Greater than or
+    // less than' operators. Example: Consider k to be any non-zero positive
+    // value.
+    // for k != 0, change SETNE to SETUGT (k > 0)
+    // for 0 != k, change SETNE to SETULT (0 < k)
+    if (CC == ISD::SETNE) {
+      if (ISD::isBuildVectorAllZeros(RHS.getNode()))
+        CC = ISD::SETUGT;
+      else if (ISD::isBuildVectorAllZeros(LHS.getNode()))
+        CC = ISD::SETULT;
+    }
     EVT VecVT = LHS.getValueType();
     bool Swap, Negate;
     unsigned int VCmpInst =
diff --git a/llvm/test/CodeGen/PowerPC/check-zero-vector.ll b/llvm/test/CodeGen/PowerPC/check-zero-vector.ll
index d8e66d6500f5f..8180bf2f7d844 100644
--- a/llvm/test/CodeGen/PowerPC/check-zero-vector.ll
+++ b/llvm/test/CodeGen/PowerPC/check-zero-vector.ll
@@ -1,84 +1,80 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
 ; RUN: llc -verify-machineinstrs -mcpu=pwr9 -mtriple=powerpc64le-unknown-linux-gnu \
-; RUN:    < %s | FileCheck %s --check-prefix=POWERPC_64LE
+; RUN: -ppc-asm-full-reg-names --ppc-vsr-nums-as-vr   < %s | FileCheck %s --check-prefix=POWERPC_64LE
 
 ; RUN: llc -verify-machineinstrs -mcpu=pwr9 -mtriple=powerpc64-ibm-aix \
-; RUN:  < %s | FileCheck %s --check-prefix=POWERPC_64
+; RUN: -ppc-asm-full-reg-names --ppc-vsr-nums-as-vr < %s | FileCheck %s --check-prefix=POWERPC_64
 
 ; RUN: llc -verify-machineinstrs -mcpu=pwr9 -mtriple=powerpc-ibm-aix \
-; RUN:   < %s | FileCheck %s --check-prefix=POWERPC_32
+; RUN: -ppc-asm-full-reg-names --ppc-vsr-nums-as-vr  < %s | FileCheck %s --check-prefix=POWERPC_32
 
 define i32 @test_Greater_than(ptr %colauths) {
 ; This testcase is for the special case of zero-vector comparisons.
-; Currently the generated code does a comparison (vcmpequh) and then a negation (xxlnor).
-; This pattern is expected to be optimized in a future patch.
+; Optimised version using vcmpgtuh.
 ; POWERPC_64LE-LABEL: test_Greater_than:
 ; POWERPC_64LE:       # %bb.0: # %entry
-; POWERPC_64LE-NEXT:    lfd 0, 0(3)
-; POWERPC_64LE-NEXT:    xxlxor 35, 35, 35
-; POWERPC_64LE-NEXT:    li 4, 0
-; POWERPC_64LE-NEXT:    li 3, 4
-; POWERPC_64LE-NEXT:    xxswapd 34, 0
-; POWERPC_64LE-NEXT:    vcmpequh 2, 2, 3
-; POWERPC_64LE-NEXT:    xxlnor 34, 34, 34
-; POWERPC_64LE-NEXT:    vmrglh 3, 2, 2
-; POWERPC_64LE-NEXT:    vextuwrx 4, 4, 2
-; POWERPC_64LE-NEXT:    vextuwrx 3, 3, 3
-; POWERPC_64LE-NEXT:    clrlwi 4, 4, 31
-; POWERPC_64LE-NEXT:    rlwimi 4, 3, 1, 30, 30
-; POWERPC_64LE-NEXT:    mfvsrwz 3, 35
-; POWERPC_64LE-NEXT:    rlwimi 4, 3, 2, 29, 29
-; POWERPC_64LE-NEXT:    li 3, 12
-; POWERPC_64LE-NEXT:    vextuwrx 3, 3, 3
-; POWERPC_64LE-NEXT:    rlwimi 4, 3, 3, 28, 28
-; POWERPC_64LE-NEXT:    stb 4, -1(1)
-; POWERPC_64LE-NEXT:    lbz 3, -1(1)
-; POWERPC_64LE-NEXT:    popcntd 3, 3
+; POWERPC_64LE-NEXT:    lfd f0, 0(r3)
+; POWERPC_64LE-NEXT:    xxlxor v3, v3, v3
+; POWERPC_64LE-NEXT:    li r4, 0
+; POWERPC_64LE-NEXT:    li r3, 4
+; POWERPC_64LE-NEXT:    xxswapd v2, f0
+; POWERPC_64LE-NEXT:    vcmpgtuh v2, v2, v3
+; POWERPC_64LE-NEXT:    vmrglh v3, v2, v2
+; POWERPC_64LE-NEXT:    vextuwrx r4, r4, v2
+; POWERPC_64LE-NEXT:    vextuwrx r3, r3, v3
+; POWERPC_64LE-NEXT:    clrlwi r4, r4, 31
+; POWERPC_64LE-NEXT:    rlwimi r4, r3, 1, 30, 30
+; POWERPC_64LE-NEXT:    mfvsrwz r3, v3
+; POWERPC_64LE-NEXT:    rlwimi r4, r3, 2, 29, 29
+; POWERPC_64LE-NEXT:    li r3, 12
+; POWERPC_64LE-NEXT:    vextuwrx r3, r3, v3
+; POWERPC_64LE-NEXT:    rlwimi r4, r3, 3, 28, 28
+; POWERPC_64LE-NEXT:    stb r4, -1(r1)
+; POWERPC_64LE-NEXT:    lbz r3, -1(r1)
+; POWERPC_64LE-NEXT:    popcntd r3, r3
 ; POWERPC_64LE-NEXT:    blr
 ;
 ; POWERPC_64-LABEL: test_Greater_than:
 ; POWERPC_64:       # %bb.0: # %entry
-; POWERPC_64-NEXT:    lxsd 2, 0(3)
-; POWERPC_64-NEXT:    xxlxor 35, 35, 35
-; POWERPC_64-NEXT:    li 4, 12
-; POWERPC_64-NEXT:    li 3, 8
-; POWERPC_64-NEXT:    vcmpequh 2, 2, 3
-; POWERPC_64-NEXT:    xxlnor 34, 34, 34
-; POWERPC_64-NEXT:    vmrghh 2, 2, 2
-; POWERPC_64-NEXT:    vextuwlx 4, 4, 2
-; POWERPC_64-NEXT:    vextuwlx 3, 3, 2
-; POWERPC_64-NEXT:    clrlwi 4, 4, 31
-; POWERPC_64-NEXT:    rlwimi 4, 3, 1, 30, 30
-; POWERPC_64-NEXT:    mfvsrwz 3, 34
-; POWERPC_64-NEXT:    rlwimi 4, 3, 2, 29, 29
-; POWERPC_64-NEXT:    li 3, 0
-; POWERPC_64-NEXT:    vextuwlx 3, 3, 2
-; POWERPC_64-NEXT:    rlwimi 4, 3, 3, 28, 28
-; POWERPC_64-NEXT:    stb 4, -1(1)
-; POWERPC_64-NEXT:    lbz 3, -1(1)
-; POWERPC_64-NEXT:    popcntd 3, 3
+; POWERPC_64-NEXT:    lxsd v2, 0(r3)
+; POWERPC_64-NEXT:    xxlxor v3, v3, v3
+; POWERPC_64-NEXT:    li r4, 12
+; POWERPC_64-NEXT:    li r3, 8
+; POWERPC_64-NEXT:    vcmpgtuh v2, v2, v3
+; POWERPC_64-NEXT:    vmrghh v2, v2, v2
+; POWERPC_64-NEXT:    vextuwlx r4, r4, v2
+; POWERPC_64-NEXT:    vextuwlx r3, r3, v2
+; POWERPC_64-NEXT:    clrlwi r4, r4, 31
+; POWERPC_64-NEXT:    rlwimi r4, r3, 1, 30, 30
+; POWERPC_64-NEXT:    mfvsrwz r3, v2
+; POWERPC_64-NEXT:    rlwimi r4, r3, 2, 29, 29
+; POWERPC_64-NEXT:    li r3, 0
+; POWERPC_64-NEXT:    vextuwlx r3, r3, v2
+; POWERPC_64-NEXT:    rlwimi r4, r3, 3, 28, 28
+; POWERPC_64-NEXT:    stb r4, -1(r1)
+; POWERPC_64-NEXT:    lbz r3, -1(r1)
+; POWERPC_64-NEXT:    popcntd r3, r3
 ; POWERPC_64-NEXT:    blr
 ;
 ; POWERPC_32-LABEL: test_Greater_than:
 ; POWERPC_32:       # %bb.0: # %entry
-; POWERPC_32-NEXT:    li 4, 4
-; POWERPC_32-NEXT:    lxvwsx 1, 0, 3
-; POWERPC_32-NEXT:    xxlxor 35, 35, 35
-; POWERPC_32-NEXT:    lxvwsx 0, 3, 4
-; POWERPC_32-NEXT:    xxmrghw 34, 1, 0
-; POWERPC_32-NEXT:    vcmpequh 2, 2, 3
-; POWERPC_32-NEXT:    xxlnor 34, 34, 34
-; POWERPC_32-NEXT:    vmrghh 2, 2, 2
-; POWERPC_32-NEXT:    stxv 34, -32(1)
-; POWERPC_32-NEXT:    lwz 3, -20(1)
-; POWERPC_32-NEXT:    lwz 4, -24(1)
-; POWERPC_32-NEXT:    clrlwi 3, 3, 31
-; POWERPC_32-NEXT:    rlwimi 3, 4, 1, 30, 30
-; POWERPC_32-NEXT:    lwz 4, -28(1)
-; POWERPC_32-NEXT:    rlwimi 3, 4, 2, 29, 29
-; POWERPC_32-NEXT:    lwz 4, -32(1)
-; POWERPC_32-NEXT:    rlwimi 3, 4, 3, 28, 28
-; POWERPC_32-NEXT:    popcntw 3, 3
+; POWERPC_32-NEXT:    li r4, 4
+; POWERPC_32-NEXT:    lxvwsx vs1, 0, r3
+; POWERPC_32-NEXT:    xxlxor v3, v3, v3
+; POWERPC_32-NEXT:    lxvwsx vs0, r3, r4
+; POWERPC_32-NEXT:    xxmrghw v2, vs1, vs0
+; POWERPC_32-NEXT:    vcmpgtuh v2, v2, v3
+; POWERPC_32-NEXT:    vmrghh v2, v2, v2
+; POWERPC_32-NEXT:    stxv v2, -32(r1)
+; POWERPC_32-NEXT:    lwz r3, -20(r1)
+; POWERPC_32-NEXT:    lwz r4, -24(r1)
+; POWERPC_32-NEXT:    clrlwi r3, r3, 31
+; POWERPC_32-NEXT:    rlwimi r3, r4, 1, 30, 30
+; POWERPC_32-NEXT:    lwz r4, -28(r1)
+; POWERPC_32-NEXT:    rlwimi r3, r4, 2, 29, 29
+; POWERPC_32-NEXT:    lwz r4, -32(r1)
+; POWERPC_32-NEXT:    rlwimi r3, r4, 3, 28, 28
+; POWERPC_32-NEXT:    popcntw r3, r3
 ; POWERPC_32-NEXT:    blr
 entry:
   %0 = load <4 x i16>, ptr %colauths, align 2, !tbaa !5
diff --git a/llvm/test/CodeGen/PowerPC/pr61315.ll b/llvm/test/CodeGen/PowerPC/pr61315.ll
index 87208691eb047..b8a1d46afc3b2 100644
--- a/llvm/test/CodeGen/PowerPC/pr61315.ll
+++ b/llvm/test/CodeGen/PowerPC/pr61315.ll
@@ -1,23 +1,7 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
 ; RUN: llc -verify-machineinstrs -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr \
 ; RUN:   -mcpu=pwr9 -mtriple=powerpc64 < %s | FileCheck %s
 define dso_local <16 x i8> @ConvertExtractedMaskBitsToVect(<16 x i8> noundef %0) local_unnamed_addr #0 {
-; CHECK: .LCPI0_0:
-; CHECK-NEXT: .byte	23                               # 0x17
-; CHECK-NEXT: .byte	23                               # 0x17
-; CHECK-NEXT: .byte	23                               # 0x17
-; CHECK-NEXT: .byte	23                               # 0x17
-; CHECK-NEXT: .byte	23                               # 0x17
-; CHECK-NEXT: .byte	23                               # 0x17
-; CHECK-NEXT: .byte	23                               # 0x17
-; CHECK-NEXT: .byte	23                               # 0x17
-; CHECK-NEXT: .byte	0                                # 0x0
-; CHECK-NEXT: .byte	0                                # 0x0
-; CHECK-NEXT: .byte	0                                # 0x0
-; CHECK-NEXT: .byte	0                                # 0x0
-; CHECK-NEXT: .byte	0                                # 0x0
-; CHECK-NEXT: .byte	0                                # 0x0
-; CHECK-NEXT: .byte	0                                # 0x0
-; CHECK-NEXT: .byte	0                                # 0x0
 ; CHECK-LABEL: ConvertExtractedMaskBitsToVect:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    addis r3, r2, .LCPI0_0 at toc@ha
@@ -29,8 +13,7 @@ define dso_local <16 x i8> @ConvertExtractedMaskBitsToVect(<16 x i8> noundef %0)
 ; CHECK-NEXT:    xxperm v2, v3, vs0
 ; CHECK-NEXT:    lxv vs0, 0(r3)
 ; CHECK-NEXT:    xxland v2, v2, vs0
-; CHECK-NEXT:    vcmpequb v2, v2, v3
-; CHECK-NEXT:    xxlnor v2, v2, v2
+; CHECK-NEXT:    vcmpgtub v2, v2, v3
 ; CHECK-NEXT:    blr
   %a4 = extractelement <16 x i8> %0, i64 7
   %a5 = zext i8 %a4 to i16
@@ -44,23 +27,6 @@ define dso_local <16 x i8> @ConvertExtractedMaskBitsToVect(<16 x i8> noundef %0)
 }
 
 define dso_local <16 x i8> @ConvertExtractedMaskBitsToVect2(<16 x i8> noundef %0) local_unnamed_addr #0 {
-; CHECK: .LCPI1_0:
-; CHECK-NEXT: .byte	23                               # 0x17
-; CHECK-NEXT: .byte	23                               # 0x17
-; CHECK-NEXT: .byte	23                               # 0x17
-; CHECK-NEXT: .byte	23                               # 0x17
-; CHECK-NEXT: .byte	23                               # 0x17
-; CHECK-NEXT: .byte	23                               # 0x17
-; CHECK-NEXT: .byte	23                               # 0x17
-; CHECK-NEXT: .byte	23                               # 0x17
-; CHECK-NEXT: .byte	0                                # 0x0
-; CHECK-NEXT: .byte	0                                # 0x0
-; CHECK-NEXT: .byte	0                                # 0x0
-; CHECK-NEXT: .byte	0                                # 0x0
-; CHECK-NEXT: .byte	0                                # 0x0
-; CHECK-NEXT: .byte	0                                # 0x0
-; CHECK-NEXT: .byte	0                                # 0x0
-; CHECK-NEXT: .byte	0                                # 0x0
 ; CHECK-LABEL: ConvertExtractedMaskBitsToVect2:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    addis r3, r2, .LCPI1_0 at toc@ha
@@ -72,8 +38,7 @@ define dso_local <16 x i8> @ConvertExtractedMaskBitsToVect2(<16 x i8> noundef %0
 ; CHECK-NEXT:    xxperm v2, v3, vs0
 ; CHECK-NEXT:    lxv vs0, 0(r3)
 ; CHECK-NEXT:    xxland v2, v2, vs0
-; CHECK-NEXT:    vcmpequb v2, v2, v3
-; CHECK-NEXT:    xxlnor v2, v2, v2
+; CHECK-NEXT:    vcmpgtub v2, v2, v3
 ; CHECK-NEXT:    blr
   %a4 = extractelement <16 x i8> %0, i64 7
   %a5 = zext i8 %a4 to i32
@@ -87,23 +52,6 @@ define dso_local <16 x i8> @ConvertExtractedMaskBitsToVect2(<16 x i8> noundef %0
 }
 
 define dso_local <16 x i8> @ConvertExtractedMaskBitsToVect3(<8 x i16> noundef %0) local_unnamed_addr #0 {
-; CHECK: .LCPI2_0:
-; CHECK-NEXT: .byte	22                                # 0x16
-; CHECK-NEXT: .byte	23                                # 0x17
-; CHECK-NEXT: .byte	22                                # 0x16
-; CHECK-NEXT: .byte	23                                # 0x17
-; CHECK-NEXT: .byte	22                                # 0x16
-; CHECK-NEXT: .byte	23                                # 0x17
-; CHECK-NEXT: .byte	22                                # 0x16
-; CHECK-NEXT: .byte	23                                # 0x17
-; CHECK-NEXT: .byte	0                                 # 0x0
-; CHECK-NEXT: .byte	0                                 # 0x0
-; CHECK-NEXT: .byte	0                                 # 0x0
-; CHECK-NEXT: .byte	0                                 # 0x0
-; CHECK-NEXT: .byte	0                                 # 0x0
-; CHECK-NEXT: .byte	0                                 # 0x0
-; CHECK-NEXT: .byte	0                                 # 0x0
-; CHECK-NEXT: .byte	0                                 # 0x0
 ; CHECK-LABEL: ConvertExtractedMaskBitsToVect3:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    addis r3, r2, .LCPI2_0 at toc@ha
@@ -115,8 +63,7 @@ define dso_local <16 x i8> @ConvertExtractedMaskBitsToVect3(<8 x i16> noundef %0
 ; CHECK-NEXT:    xxperm v2, v3, vs0
 ; CHECK-NEXT:    lxv vs0, 0(r3)
 ; CHECK-NEXT:    xxland v2, v2, vs0
-; CHECK-NEXT:    vcmpequb v2, v2, v3
-; CHECK-NEXT:    xxlnor v2, v2, v2
+; CHECK-NEXT:    vcmpgtub v2, v2, v3
 ; CHECK-NEXT:    blr
   %a4 = extractelement <8 x i16> %0, i64 3
   %a5 = zext i16 %a4 to i32
diff --git a/llvm/test/CodeGen/PowerPC/recipest.ll b/llvm/test/CodeGen/PowerPC/recipest.ll
index 2598a410b8761..c3ee1c553eebe 100644
--- a/llvm/test/CodeGen/PowerPC/recipest.ll
+++ b/llvm/test/CodeGen/PowerPC/recipest.ll
@@ -1031,7 +1031,7 @@ define <4 x float> @hoo3_fmf(<4 x float> %a) #1 {
 ; CHECK-P7-NEXT:    vmaddfp 5, 5, 0, 3
 ; CHECK-P7-NEXT:    vmaddfp 3, 5, 4, 3
 ; CHECK-P7-NEXT:    vxor 4, 4, 4
-; CHECK-P7-NEXT:    vcmpeqfp 2, 2, 4
+; CHECK-P7-NEXT:    vcmpgefp 2, 4, 2
 ; CHECK-P7-NEXT:    vnot 2, 2
 ; CHECK-P7-NEXT:    vand 2, 2, 3
 ; CHECK-P7-NEXT:    blr
diff --git a/llvm/test/CodeGen/PowerPC/setcc-logic.ll b/llvm/test/CodeGen/PowerPC/setcc-logic.ll
index 1c3ac17666e26..90f963de428e9 100644
--- a/llvm/test/CodeGen/PowerPC/setcc-logic.ll
+++ b/llvm/test/CodeGen/PowerPC/setcc-logic.ll
@@ -366,8 +366,7 @@ define <4 x i1> @any_bits_set_vec(<4 x i32> %P, <4 x i32> %Q) {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    xxlor 34, 34, 35
 ; CHECK-NEXT:    xxlxor 35, 35, 35
-; CHECK-NEXT:    vcmpequw 2, 2, 3
-; CHECK-NEXT:    xxlnor 34, 34, 34
+; CHECK-NEXT:    vcmpgtuw 2, 2, 3
 ; CHECK-NEXT:    blr
   %a = icmp ne <4 x i32> %P, zeroinitializer
   %b = icmp ne <4 x i32> %Q, zeroinitializer
diff --git a/llvm/test/CodeGen/PowerPC/vector-popcnt-128-ult-ugt.ll b/llvm/test/CodeGen/PowerPC/vector-popcnt-128-ult-ugt.ll
index 43cbc62e0bb1c..986f255df4bd6 100644
--- a/llvm/test/CodeGen/PowerPC/vector-popcnt-128-ult-ugt.ll
+++ b/llvm/test/CodeGen/PowerPC/vector-popcnt-128-ult-ugt.ll
@@ -13,8 +13,7 @@ define <16 x i8> @ugt_1_v16i8(<16 x i8> %0) {
 ; PWR5-NEXT:    vaddubm 3, 2, 3
 ; PWR5-NEXT:    vand 2, 2, 3
 ; PWR5-NEXT:    vxor 3, 3, 3
-; PWR5-NEXT:    vcmpequb 2, 2, 3
-; PWR5-NEXT:    vnot 2, 2
+; PWR5-NEXT:    vcmpgtub 2, 2, 3
 ; PWR5-NEXT:    blr
 ;
 ; PWR6-LABEL: ugt_1_v16i8:
@@ -23,8 +22,7 @@ define <16 x i8> @ugt_1_v16i8(<16 x i8> %0) {
 ; PWR6-NEXT:    vaddubm 3, 2, 3
 ; PWR6-NEXT:    vand 2, 2, 3
 ; PWR6-NEXT:    vxor 3, 3, 3
-; PWR6-NEXT:    vcmpequb 2, 2, 3
-; PWR6-NEXT:    vnot 2, 2
+; PWR6-NEXT:    vcmpgtub 2, 2, 3
 ; PWR6-NEXT:    blr
 ;
 ; PWR7-LABEL: ugt_1_v16i8:
@@ -33,8 +31,7 @@ define <16 x i8> @ugt_1_v16i8(<16 x i8> %0) {
 ; PWR7-NEXT:    vaddubm 3, 2, 3
 ; PWR7-NEXT:    xxland 34, 34, 35
 ; PWR7-NEXT:    xxlxor 35, 35, 35
-; PWR7-NEXT:    vcmpequb 2, 2, 3
-; PWR7-NEXT:    xxlnor 34, 34, 34
+; PWR7-NEXT:    vcmpgtub 2, 2, 3
 ; PWR7-NEXT:    blr
 ;
 ; PWR8-LABEL: ugt_1_v16i8:
@@ -1081,8 +1078,7 @@ define <8 x i16> @ugt_1_v8i16(<8 x i16> %0) {
 ; PWR5-NEXT:    vadduhm 3, 2, 3
 ; PWR5-NEXT:    vand 2, 2, 3
 ; PWR5-NEXT:    vxor 3, 3, 3
-; PWR5-NEXT:    vcmpequh 2, 2, 3
-; PWR5-NEXT:    vnot 2, 2
+; PWR5-NEXT:    vcmpgtuh 2, 2, 3
 ; PWR5-NEXT:    blr
 ;
 ; PWR6-LABEL: ugt_1_v8i16:
@@ -1091,8 +1087,7 @@ define <8 x i16> @ugt_1_v8i16(<8 x i16> %0) {
 ; PWR6-NEXT:    vadduhm 3, 2, 3
 ; PWR6-NEXT:    vand 2, 2, 3
 ; PWR6-NEXT:    vxor 3, 3, 3
-; PWR6-NEXT:    vcmpequh 2, 2, 3
-; PWR6-NEXT:    vnot 2, 2
+; PWR6-NEXT:    vcmpgtuh 2, 2, 3
 ; PWR6-NEXT:    blr
 ;
 ; PWR7-LABEL: ugt_1_v8i16:
@@ -1101,8 +1096,7 @@ define <8 x i16> @ugt_1_v8i16(<8 x i16> %0) {
 ; PWR7-NEXT:    vadduhm 3, 2, 3
 ; PWR7-NEXT:    xxland 34, 34, 35
 ; PWR7-NEXT:    xxlxor 35, 35, 35
-; PWR7-NEXT:    vcmpequh 2, 2, 3
-; PWR7-NEXT:    xxlnor 34, 34, 34
+; PWR7-NEXT:    vcmpgtuh 2, 2, 3
 ; PWR7-NEXT:    blr
 ;
 ; PWR8-LABEL: ugt_1_v8i16:
@@ -4101,8 +4095,7 @@ define <4 x i32> @ugt_1_v4i32(<4 x i32> %0) {
 ; PWR5-NEXT:    vadduwm 3, 2, 3
 ; PWR5-NEXT:    vand 2, 2, 3
 ; PWR5-NEXT:    vxor 3, 3, 3
-; PWR5-NEXT:    vcmpequw 2, 2, 3
-; PWR5-NEXT:    vnot 2, 2
+; PWR5-NEXT:    vcmpgtuw 2, 2, 3
 ; PWR5-NEXT:    blr
 ;
 ; PWR6-LABEL: ugt_1_v4i32:
@@ -4111,8 +4104,7 @@ define <4 x i32> @ugt_1_v4i32(<4 x i32> %0) {
 ; PWR6-NEXT:    vadduwm 3, 2, 3
 ; PWR6-NEXT:    vand 2, 2, 3
 ; PWR6-NEXT:    vxor 3, 3, 3
-; PWR6-NEXT:    vcmpequw 2, 2, 3
-; PWR6-NEXT:    vnot 2, 2
+; PWR6-NEXT:    vcmpgtuw 2, 2, 3
 ; PWR6-NEXT:    blr
 ;
 ; PWR7-LABEL: ugt_1_v4i32:
@@ -4121,8 +4113,7 @@ define <4 x i32> @ugt_1_v4i32(<4 x i32> %0) {
 ; PWR7-NEXT:    vadduwm 3, 2, 3
 ; PWR7-NEXT:    xxland 34, 34, 35
 ; PWR7-NEXT:    xxlxor 35, 35, 35
-; PWR7-NEXT:    vcmpequw 2, 2, 3
-; PWR7-NEXT:    xxlnor 34, 34, 34
+; PWR7-NEXT:    vcmpgtuw 2, 2, 3
 ; PWR7-NEXT:    blr
 ;
 ; PWR8-LABEL: ugt_1_v4i32:
@@ -11967,9 +11958,8 @@ define <2 x i64> @ugt_1_v2i64(<2 x i64> %0) {
 ; PWR7-NEXT:    addis 3, 2, .LCPI100_0 at toc@ha
 ; PWR7-NEXT:    addi 3, 3, .LCPI100_0 at toc@l
 ; PWR7-NEXT:    xxland 34, 34, 0
-; PWR7-NEXT:    vcmpequw 2, 2, 3
+; PWR7-NEXT:    vcmpgtuw 2, 2, 3
 ; PWR7-NEXT:    lxvw4x 35, 0, 3
-; PWR7-NEXT:    xxlnor 34, 34, 34
 ; PWR7-NEXT:    vperm 3, 2, 2, 3
 ; PWR7-NEXT:    xxlor 34, 35, 34
 ; PWR7-NEXT:    blr



More information about the llvm-commits mailing list