[llvm] [PowerPC] vector compare greater than support (PR #150422)

Wed Aug 6 07:59:29 PDT 2025

https://github.com/Himadhith updated https://github.com/llvm/llvm-project/pull/150422

>From 24a2fb131b03769133b4462fe30261ce775d49b7 Mon Sep 17 00:00:00 2001
From: himadhith <himadhith.v at ibm.com>
Date: Thu, 24 Jul 2025 13:40:10 +0000
Subject: [PATCH] [PowerPC] vector compare greater than support for Zero vector
 comparisons

---
 llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp   | 10 +++
 .../test/CodeGen/PowerPC/check-zero-vector.ll |  9 +--
 llvm/test/CodeGen/PowerPC/pr61315.ll          | 61 ++-----------------
 llvm/test/CodeGen/PowerPC/recipest.ll         |  2 +-
 llvm/test/CodeGen/PowerPC/setcc-logic.ll      |  3 +-
 .../PowerPC/vector-popcnt-128-ult-ugt.ll      | 30 +++------
 6 files changed, 29 insertions(+), 86 deletions(-)

diff --git a/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
index 415164fc9e2cb..cc30aedd03011 100644
--- a/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
@@ -4570,6 +4570,16 @@ bool PPCDAGToDAGISel::trySETCC(SDNode *N) {
     if (Subtarget->hasSPE())
       return false;
 
+    // To optimize zero-vector comparisons and avoid the extra step of negation,
+    // we should be checking for greater than unsigned halfwords and the only
+    // scenario where it will be True is for all values other than 0.
+    // The optimized code will be using vcmpgtuh instruction.
+    if (CC == ISD::SETNE) {
+      if (ISD::isBuildVectorAllZeros(RHS.getNode()))
+        CC = ISD::SETUGT;
+      else if (ISD::isBuildVectorAllZeros(LHS.getNode()))
+        CC = ISD::SETULT;
+    }
     EVT VecVT = LHS.getValueType();
     bool Swap, Negate;
     unsigned int VCmpInst =
diff --git a/llvm/test/CodeGen/PowerPC/check-zero-vector.ll b/llvm/test/CodeGen/PowerPC/check-zero-vector.ll
index d8e66d6500f5f..665f4b16d1bd6 100644
--- a/llvm/test/CodeGen/PowerPC/check-zero-vector.ll
+++ b/llvm/test/CodeGen/PowerPC/check-zero-vector.ll
@@ -19,8 +19,7 @@ define i32 @test_Greater_than(ptr %colauths) {
 ; POWERPC_64LE-NEXT:    li 4, 0
 ; POWERPC_64LE-NEXT:    li 3, 4
 ; POWERPC_64LE-NEXT:    xxswapd 34, 0
-; POWERPC_64LE-NEXT:    vcmpequh 2, 2, 3
-; POWERPC_64LE-NEXT:    xxlnor 34, 34, 34
+; POWERPC_64LE-NEXT:    vcmpgtuh 2, 2, 3
 ; POWERPC_64LE-NEXT:    vmrglh 3, 2, 2
 ; POWERPC_64LE-NEXT:    vextuwrx 4, 4, 2
 ; POWERPC_64LE-NEXT:    vextuwrx 3, 3, 3
@@ -42,8 +41,7 @@ define i32 @test_Greater_than(ptr %colauths) {
 ; POWERPC_64-NEXT:    xxlxor 35, 35, 35
 ; POWERPC_64-NEXT:    li 4, 12
 ; POWERPC_64-NEXT:    li 3, 8
-; POWERPC_64-NEXT:    vcmpequh 2, 2, 3
-; POWERPC_64-NEXT:    xxlnor 34, 34, 34
+; POWERPC_64-NEXT:    vcmpgtuh 2, 2, 3
 ; POWERPC_64-NEXT:    vmrghh 2, 2, 2
 ; POWERPC_64-NEXT:    vextuwlx 4, 4, 2
 ; POWERPC_64-NEXT:    vextuwlx 3, 3, 2
@@ -66,8 +64,7 @@ define i32 @test_Greater_than(ptr %colauths) {
 ; POWERPC_32-NEXT:    xxlxor 35, 35, 35
 ; POWERPC_32-NEXT:    lxvwsx 0, 3, 4
 ; POWERPC_32-NEXT:    xxmrghw 34, 1, 0
-; POWERPC_32-NEXT:    vcmpequh 2, 2, 3
-; POWERPC_32-NEXT:    xxlnor 34, 34, 34
+; POWERPC_32-NEXT:    vcmpgtuh 2, 2, 3
 ; POWERPC_32-NEXT:    vmrghh 2, 2, 2
 ; POWERPC_32-NEXT:    stxv 34, -32(1)
 ; POWERPC_32-NEXT:    lwz 3, -20(1)
diff --git a/llvm/test/CodeGen/PowerPC/pr61315.ll b/llvm/test/CodeGen/PowerPC/pr61315.ll
index 87208691eb047..b8a1d46afc3b2 100644
--- a/llvm/test/CodeGen/PowerPC/pr61315.ll
+++ b/llvm/test/CodeGen/PowerPC/pr61315.ll
@@ -1,23 +1,7 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
 ; RUN: llc -verify-machineinstrs -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr \
 ; RUN:   -mcpu=pwr9 -mtriple=powerpc64 < %s | FileCheck %s
 define dso_local <16 x i8> @ConvertExtractedMaskBitsToVect(<16 x i8> noundef %0) local_unnamed_addr #0 {
-; CHECK: .LCPI0_0:
-; CHECK-NEXT: .byte	23                               # 0x17
-; CHECK-NEXT: .byte	23                               # 0x17
-; CHECK-NEXT: .byte	23                               # 0x17
-; CHECK-NEXT: .byte	23                               # 0x17
-; CHECK-NEXT: .byte	23                               # 0x17
-; CHECK-NEXT: .byte	23                               # 0x17
-; CHECK-NEXT: .byte	23                               # 0x17
-; CHECK-NEXT: .byte	23                               # 0x17
-; CHECK-NEXT: .byte	0                                # 0x0
-; CHECK-NEXT: .byte	0                                # 0x0
-; CHECK-NEXT: .byte	0                                # 0x0
-; CHECK-NEXT: .byte	0                                # 0x0
-; CHECK-NEXT: .byte	0                                # 0x0
-; CHECK-NEXT: .byte	0                                # 0x0
-; CHECK-NEXT: .byte	0                                # 0x0
-; CHECK-NEXT: .byte	0                                # 0x0
 ; CHECK-LABEL: ConvertExtractedMaskBitsToVect:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    addis r3, r2, .LCPI0_0 at toc@ha
@@ -29,8 +13,7 @@ define dso_local <16 x i8> @ConvertExtractedMaskBitsToVect(<16 x i8> noundef %0)
 ; CHECK-NEXT:    xxperm v2, v3, vs0
 ; CHECK-NEXT:    lxv vs0, 0(r3)
 ; CHECK-NEXT:    xxland v2, v2, vs0
-; CHECK-NEXT:    vcmpequb v2, v2, v3
-; CHECK-NEXT:    xxlnor v2, v2, v2
+; CHECK-NEXT:    vcmpgtub v2, v2, v3
 ; CHECK-NEXT:    blr
   %a4 = extractelement <16 x i8> %0, i64 7
   %a5 = zext i8 %a4 to i16
@@ -44,23 +27,6 @@ define dso_local <16 x i8> @ConvertExtractedMaskBitsToVect(<16 x i8> noundef %0)
 }
 
 define dso_local <16 x i8> @ConvertExtractedMaskBitsToVect2(<16 x i8> noundef %0) local_unnamed_addr #0 {
-; CHECK: .LCPI1_0:
-; CHECK-NEXT: .byte	23                               # 0x17
-; CHECK-NEXT: .byte	23                               # 0x17
-; CHECK-NEXT: .byte	23                               # 0x17
-; CHECK-NEXT: .byte	23                               # 0x17
-; CHECK-NEXT: .byte	23                               # 0x17
-; CHECK-NEXT: .byte	23                               # 0x17
-; CHECK-NEXT: .byte	23                               # 0x17
-; CHECK-NEXT: .byte	23                               # 0x17
-; CHECK-NEXT: .byte	0                                # 0x0
-; CHECK-NEXT: .byte	0                                # 0x0
-; CHECK-NEXT: .byte	0                                # 0x0
-; CHECK-NEXT: .byte	0                                # 0x0
-; CHECK-NEXT: .byte	0                                # 0x0
-; CHECK-NEXT: .byte	0                                # 0x0
-; CHECK-NEXT: .byte	0                                # 0x0
-; CHECK-NEXT: .byte	0                                # 0x0
 ; CHECK-LABEL: ConvertExtractedMaskBitsToVect2:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    addis r3, r2, .LCPI1_0 at toc@ha
@@ -72,8 +38,7 @@ define dso_local <16 x i8> @ConvertExtractedMaskBitsToVect2(<16 x i8> noundef %0
 ; CHECK-NEXT:    xxperm v2, v3, vs0
 ; CHECK-NEXT:    lxv vs0, 0(r3)
 ; CHECK-NEXT:    xxland v2, v2, vs0
-; CHECK-NEXT:    vcmpequb v2, v2, v3
-; CHECK-NEXT:    xxlnor v2, v2, v2
+; CHECK-NEXT:    vcmpgtub v2, v2, v3
 ; CHECK-NEXT:    blr
   %a4 = extractelement <16 x i8> %0, i64 7
   %a5 = zext i8 %a4 to i32
@@ -87,23 +52,6 @@ define dso_local <16 x i8> @ConvertExtractedMaskBitsToVect2(<16 x i8> noundef %0
 }
 
 define dso_local <16 x i8> @ConvertExtractedMaskBitsToVect3(<8 x i16> noundef %0) local_unnamed_addr #0 {
-; CHECK: .LCPI2_0:
-; CHECK-NEXT: .byte	22                                # 0x16
-; CHECK-NEXT: .byte	23                                # 0x17
-; CHECK-NEXT: .byte	22                                # 0x16
-; CHECK-NEXT: .byte	23                                # 0x17
-; CHECK-NEXT: .byte	22                                # 0x16
-; CHECK-NEXT: .byte	23                                # 0x17
-; CHECK-NEXT: .byte	22                                # 0x16
-; CHECK-NEXT: .byte	23                                # 0x17
-; CHECK-NEXT: .byte	0                                 # 0x0
-; CHECK-NEXT: .byte	0                                 # 0x0
-; CHECK-NEXT: .byte	0                                 # 0x0
-; CHECK-NEXT: .byte	0                                 # 0x0
-; CHECK-NEXT: .byte	0                                 # 0x0
-; CHECK-NEXT: .byte	0                                 # 0x0
-; CHECK-NEXT: .byte	0                                 # 0x0
-; CHECK-NEXT: .byte	0                                 # 0x0
 ; CHECK-LABEL: ConvertExtractedMaskBitsToVect3:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    addis r3, r2, .LCPI2_0 at toc@ha
@@ -115,8 +63,7 @@ define dso_local <16 x i8> @ConvertExtractedMaskBitsToVect3(<8 x i16> noundef %0
 ; CHECK-NEXT:    xxperm v2, v3, vs0
 ; CHECK-NEXT:    lxv vs0, 0(r3)
 ; CHECK-NEXT:    xxland v2, v2, vs0
-; CHECK-NEXT:    vcmpequb v2, v2, v3
-; CHECK-NEXT:    xxlnor v2, v2, v2
+; CHECK-NEXT:    vcmpgtub v2, v2, v3
 ; CHECK-NEXT:    blr
   %a4 = extractelement <8 x i16> %0, i64 3
   %a5 = zext i16 %a4 to i32
diff --git a/llvm/test/CodeGen/PowerPC/recipest.ll b/llvm/test/CodeGen/PowerPC/recipest.ll
index 2598a410b8761..c3ee1c553eebe 100644
--- a/llvm/test/CodeGen/PowerPC/recipest.ll
+++ b/llvm/test/CodeGen/PowerPC/recipest.ll
@@ -1031,7 +1031,7 @@ define <4 x float> @hoo3_fmf(<4 x float> %a) #1 {
 ; CHECK-P7-NEXT:    vmaddfp 5, 5, 0, 3
 ; CHECK-P7-NEXT:    vmaddfp 3, 5, 4, 3
 ; CHECK-P7-NEXT:    vxor 4, 4, 4
-; CHECK-P7-NEXT:    vcmpeqfp 2, 2, 4
+; CHECK-P7-NEXT:    vcmpgefp 2, 4, 2
 ; CHECK-P7-NEXT:    vnot 2, 2
 ; CHECK-P7-NEXT:    vand 2, 2, 3
 ; CHECK-P7-NEXT:    blr
diff --git a/llvm/test/CodeGen/PowerPC/setcc-logic.ll b/llvm/test/CodeGen/PowerPC/setcc-logic.ll
index 1c3ac17666e26..90f963de428e9 100644
--- a/llvm/test/CodeGen/PowerPC/setcc-logic.ll
+++ b/llvm/test/CodeGen/PowerPC/setcc-logic.ll
@@ -366,8 +366,7 @@ define <4 x i1> @any_bits_set_vec(<4 x i32> %P, <4 x i32> %Q) {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    xxlor 34, 34, 35
 ; CHECK-NEXT:    xxlxor 35, 35, 35
-; CHECK-NEXT:    vcmpequw 2, 2, 3
-; CHECK-NEXT:    xxlnor 34, 34, 34
+; CHECK-NEXT:    vcmpgtuw 2, 2, 3
 ; CHECK-NEXT:    blr
   %a = icmp ne <4 x i32> %P, zeroinitializer
   %b = icmp ne <4 x i32> %Q, zeroinitializer
diff --git a/llvm/test/CodeGen/PowerPC/vector-popcnt-128-ult-ugt.ll b/llvm/test/CodeGen/PowerPC/vector-popcnt-128-ult-ugt.ll
index 43cbc62e0bb1c..986f255df4bd6 100644
--- a/llvm/test/CodeGen/PowerPC/vector-popcnt-128-ult-ugt.ll
+++ b/llvm/test/CodeGen/PowerPC/vector-popcnt-128-ult-ugt.ll
@@ -13,8 +13,7 @@ define <16 x i8> @ugt_1_v16i8(<16 x i8> %0) {
 ; PWR5-NEXT:    vaddubm 3, 2, 3
 ; PWR5-NEXT:    vand 2, 2, 3
 ; PWR5-NEXT:    vxor 3, 3, 3
-; PWR5-NEXT:    vcmpequb 2, 2, 3
-; PWR5-NEXT:    vnot 2, 2
+; PWR5-NEXT:    vcmpgtub 2, 2, 3
 ; PWR5-NEXT:    blr
 ;
 ; PWR6-LABEL: ugt_1_v16i8:
@@ -23,8 +22,7 @@ define <16 x i8> @ugt_1_v16i8(<16 x i8> %0) {
 ; PWR6-NEXT:    vaddubm 3, 2, 3
 ; PWR6-NEXT:    vand 2, 2, 3
 ; PWR6-NEXT:    vxor 3, 3, 3
-; PWR6-NEXT:    vcmpequb 2, 2, 3
-; PWR6-NEXT:    vnot 2, 2
+; PWR6-NEXT:    vcmpgtub 2, 2, 3
 ; PWR6-NEXT:    blr
 ;
 ; PWR7-LABEL: ugt_1_v16i8:
@@ -33,8 +31,7 @@ define <16 x i8> @ugt_1_v16i8(<16 x i8> %0) {
 ; PWR7-NEXT:    vaddubm 3, 2, 3
 ; PWR7-NEXT:    xxland 34, 34, 35
 ; PWR7-NEXT:    xxlxor 35, 35, 35
-; PWR7-NEXT:    vcmpequb 2, 2, 3
-; PWR7-NEXT:    xxlnor 34, 34, 34
+; PWR7-NEXT:    vcmpgtub 2, 2, 3
 ; PWR7-NEXT:    blr
 ;
 ; PWR8-LABEL: ugt_1_v16i8:
@@ -1081,8 +1078,7 @@ define <8 x i16> @ugt_1_v8i16(<8 x i16> %0) {
 ; PWR5-NEXT:    vadduhm 3, 2, 3
 ; PWR5-NEXT:    vand 2, 2, 3
 ; PWR5-NEXT:    vxor 3, 3, 3
-; PWR5-NEXT:    vcmpequh 2, 2, 3
-; PWR5-NEXT:    vnot 2, 2
+; PWR5-NEXT:    vcmpgtuh 2, 2, 3
 ; PWR5-NEXT:    blr
 ;
 ; PWR6-LABEL: ugt_1_v8i16:
@@ -1091,8 +1087,7 @@ define <8 x i16> @ugt_1_v8i16(<8 x i16> %0) {
 ; PWR6-NEXT:    vadduhm 3, 2, 3
 ; PWR6-NEXT:    vand 2, 2, 3
 ; PWR6-NEXT:    vxor 3, 3, 3
-; PWR6-NEXT:    vcmpequh 2, 2, 3
-; PWR6-NEXT:    vnot 2, 2
+; PWR6-NEXT:    vcmpgtuh 2, 2, 3
 ; PWR6-NEXT:    blr
 ;
 ; PWR7-LABEL: ugt_1_v8i16:
@@ -1101,8 +1096,7 @@ define <8 x i16> @ugt_1_v8i16(<8 x i16> %0) {
 ; PWR7-NEXT:    vadduhm 3, 2, 3
 ; PWR7-NEXT:    xxland 34, 34, 35
 ; PWR7-NEXT:    xxlxor 35, 35, 35
-; PWR7-NEXT:    vcmpequh 2, 2, 3
-; PWR7-NEXT:    xxlnor 34, 34, 34
+; PWR7-NEXT:    vcmpgtuh 2, 2, 3
 ; PWR7-NEXT:    blr
 ;
 ; PWR8-LABEL: ugt_1_v8i16:
@@ -4101,8 +4095,7 @@ define <4 x i32> @ugt_1_v4i32(<4 x i32> %0) {
 ; PWR5-NEXT:    vadduwm 3, 2, 3
 ; PWR5-NEXT:    vand 2, 2, 3
 ; PWR5-NEXT:    vxor 3, 3, 3
-; PWR5-NEXT:    vcmpequw 2, 2, 3
-; PWR5-NEXT:    vnot 2, 2
+; PWR5-NEXT:    vcmpgtuw 2, 2, 3
 ; PWR5-NEXT:    blr
 ;
 ; PWR6-LABEL: ugt_1_v4i32:
@@ -4111,8 +4104,7 @@ define <4 x i32> @ugt_1_v4i32(<4 x i32> %0) {
 ; PWR6-NEXT:    vadduwm 3, 2, 3
 ; PWR6-NEXT:    vand 2, 2, 3
 ; PWR6-NEXT:    vxor 3, 3, 3
-; PWR6-NEXT:    vcmpequw 2, 2, 3
-; PWR6-NEXT:    vnot 2, 2
+; PWR6-NEXT:    vcmpgtuw 2, 2, 3
 ; PWR6-NEXT:    blr
 ;
 ; PWR7-LABEL: ugt_1_v4i32:
@@ -4121,8 +4113,7 @@ define <4 x i32> @ugt_1_v4i32(<4 x i32> %0) {
 ; PWR7-NEXT:    vadduwm 3, 2, 3
 ; PWR7-NEXT:    xxland 34, 34, 35
 ; PWR7-NEXT:    xxlxor 35, 35, 35
-; PWR7-NEXT:    vcmpequw 2, 2, 3
-; PWR7-NEXT:    xxlnor 34, 34, 34
+; PWR7-NEXT:    vcmpgtuw 2, 2, 3
 ; PWR7-NEXT:    blr
 ;
 ; PWR8-LABEL: ugt_1_v4i32:
@@ -11967,9 +11958,8 @@ define <2 x i64> @ugt_1_v2i64(<2 x i64> %0) {
 ; PWR7-NEXT:    addis 3, 2, .LCPI100_0 at toc@ha
 ; PWR7-NEXT:    addi 3, 3, .LCPI100_0 at toc@l
 ; PWR7-NEXT:    xxland 34, 34, 0
-; PWR7-NEXT:    vcmpequw 2, 2, 3
+; PWR7-NEXT:    vcmpgtuw 2, 2, 3
 ; PWR7-NEXT:    lxvw4x 35, 0, 3
-; PWR7-NEXT:    xxlnor 34, 34, 34
 ; PWR7-NEXT:    vperm 3, 2, 2, 3
 ; PWR7-NEXT:    xxlor 34, 35, 34
 ; PWR7-NEXT:    blr