[llvm] [PowerPC] vector compare greater than support (PR #150422)
via llvm-commits
llvm-commits at lists.llvm.org
Wed Aug 6 07:59:29 PDT 2025
https://github.com/Himadhith updated https://github.com/llvm/llvm-project/pull/150422
>From 24a2fb131b03769133b4462fe30261ce775d49b7 Mon Sep 17 00:00:00 2001
From: himadhith <himadhith.v at ibm.com>
Date: Thu, 24 Jul 2025 13:40:10 +0000
Subject: [PATCH] [PowerPC] vector compare greater than support for Zero vector
comparisons
---
llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp | 10 +++
.../test/CodeGen/PowerPC/check-zero-vector.ll | 9 +--
llvm/test/CodeGen/PowerPC/pr61315.ll | 61 ++-----------------
llvm/test/CodeGen/PowerPC/recipest.ll | 2 +-
llvm/test/CodeGen/PowerPC/setcc-logic.ll | 3 +-
.../PowerPC/vector-popcnt-128-ult-ugt.ll | 30 +++------
6 files changed, 29 insertions(+), 86 deletions(-)
diff --git a/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
index 415164fc9e2cb..cc30aedd03011 100644
--- a/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
@@ -4570,6 +4570,16 @@ bool PPCDAGToDAGISel::trySETCC(SDNode *N) {
if (Subtarget->hasSPE())
return false;
+ // To optimize zero-vector comparisons and avoid the extra step of negation,
+ // we should be checking for greater than unsigned halfwords and the only
+ // scenario where it will be True is for all values other than 0.
+ // The optimized code will be using vcmpgtuh instruction.
+ if (CC == ISD::SETNE) {
+ if (ISD::isBuildVectorAllZeros(RHS.getNode()))
+ CC = ISD::SETUGT;
+ else if (ISD::isBuildVectorAllZeros(LHS.getNode()))
+ CC = ISD::SETULT;
+ }
EVT VecVT = LHS.getValueType();
bool Swap, Negate;
unsigned int VCmpInst =
diff --git a/llvm/test/CodeGen/PowerPC/check-zero-vector.ll b/llvm/test/CodeGen/PowerPC/check-zero-vector.ll
index d8e66d6500f5f..665f4b16d1bd6 100644
--- a/llvm/test/CodeGen/PowerPC/check-zero-vector.ll
+++ b/llvm/test/CodeGen/PowerPC/check-zero-vector.ll
@@ -19,8 +19,7 @@ define i32 @test_Greater_than(ptr %colauths) {
; POWERPC_64LE-NEXT: li 4, 0
; POWERPC_64LE-NEXT: li 3, 4
; POWERPC_64LE-NEXT: xxswapd 34, 0
-; POWERPC_64LE-NEXT: vcmpequh 2, 2, 3
-; POWERPC_64LE-NEXT: xxlnor 34, 34, 34
+; POWERPC_64LE-NEXT: vcmpgtuh 2, 2, 3
; POWERPC_64LE-NEXT: vmrglh 3, 2, 2
; POWERPC_64LE-NEXT: vextuwrx 4, 4, 2
; POWERPC_64LE-NEXT: vextuwrx 3, 3, 3
@@ -42,8 +41,7 @@ define i32 @test_Greater_than(ptr %colauths) {
; POWERPC_64-NEXT: xxlxor 35, 35, 35
; POWERPC_64-NEXT: li 4, 12
; POWERPC_64-NEXT: li 3, 8
-; POWERPC_64-NEXT: vcmpequh 2, 2, 3
-; POWERPC_64-NEXT: xxlnor 34, 34, 34
+; POWERPC_64-NEXT: vcmpgtuh 2, 2, 3
; POWERPC_64-NEXT: vmrghh 2, 2, 2
; POWERPC_64-NEXT: vextuwlx 4, 4, 2
; POWERPC_64-NEXT: vextuwlx 3, 3, 2
@@ -66,8 +64,7 @@ define i32 @test_Greater_than(ptr %colauths) {
; POWERPC_32-NEXT: xxlxor 35, 35, 35
; POWERPC_32-NEXT: lxvwsx 0, 3, 4
; POWERPC_32-NEXT: xxmrghw 34, 1, 0
-; POWERPC_32-NEXT: vcmpequh 2, 2, 3
-; POWERPC_32-NEXT: xxlnor 34, 34, 34
+; POWERPC_32-NEXT: vcmpgtuh 2, 2, 3
; POWERPC_32-NEXT: vmrghh 2, 2, 2
; POWERPC_32-NEXT: stxv 34, -32(1)
; POWERPC_32-NEXT: lwz 3, -20(1)
diff --git a/llvm/test/CodeGen/PowerPC/pr61315.ll b/llvm/test/CodeGen/PowerPC/pr61315.ll
index 87208691eb047..b8a1d46afc3b2 100644
--- a/llvm/test/CodeGen/PowerPC/pr61315.ll
+++ b/llvm/test/CodeGen/PowerPC/pr61315.ll
@@ -1,23 +1,7 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; RUN: llc -verify-machineinstrs -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr \
; RUN: -mcpu=pwr9 -mtriple=powerpc64 < %s | FileCheck %s
define dso_local <16 x i8> @ConvertExtractedMaskBitsToVect(<16 x i8> noundef %0) local_unnamed_addr #0 {
-; CHECK: .LCPI0_0:
-; CHECK-NEXT: .byte 23 # 0x17
-; CHECK-NEXT: .byte 23 # 0x17
-; CHECK-NEXT: .byte 23 # 0x17
-; CHECK-NEXT: .byte 23 # 0x17
-; CHECK-NEXT: .byte 23 # 0x17
-; CHECK-NEXT: .byte 23 # 0x17
-; CHECK-NEXT: .byte 23 # 0x17
-; CHECK-NEXT: .byte 23 # 0x17
-; CHECK-NEXT: .byte 0 # 0x0
-; CHECK-NEXT: .byte 0 # 0x0
-; CHECK-NEXT: .byte 0 # 0x0
-; CHECK-NEXT: .byte 0 # 0x0
-; CHECK-NEXT: .byte 0 # 0x0
-; CHECK-NEXT: .byte 0 # 0x0
-; CHECK-NEXT: .byte 0 # 0x0
-; CHECK-NEXT: .byte 0 # 0x0
; CHECK-LABEL: ConvertExtractedMaskBitsToVect:
; CHECK: # %bb.0:
; CHECK-NEXT: addis r3, r2, .LCPI0_0 at toc@ha
@@ -29,8 +13,7 @@ define dso_local <16 x i8> @ConvertExtractedMaskBitsToVect(<16 x i8> noundef %0)
; CHECK-NEXT: xxperm v2, v3, vs0
; CHECK-NEXT: lxv vs0, 0(r3)
; CHECK-NEXT: xxland v2, v2, vs0
-; CHECK-NEXT: vcmpequb v2, v2, v3
-; CHECK-NEXT: xxlnor v2, v2, v2
+; CHECK-NEXT: vcmpgtub v2, v2, v3
; CHECK-NEXT: blr
%a4 = extractelement <16 x i8> %0, i64 7
%a5 = zext i8 %a4 to i16
@@ -44,23 +27,6 @@ define dso_local <16 x i8> @ConvertExtractedMaskBitsToVect(<16 x i8> noundef %0)
}
define dso_local <16 x i8> @ConvertExtractedMaskBitsToVect2(<16 x i8> noundef %0) local_unnamed_addr #0 {
-; CHECK: .LCPI1_0:
-; CHECK-NEXT: .byte 23 # 0x17
-; CHECK-NEXT: .byte 23 # 0x17
-; CHECK-NEXT: .byte 23 # 0x17
-; CHECK-NEXT: .byte 23 # 0x17
-; CHECK-NEXT: .byte 23 # 0x17
-; CHECK-NEXT: .byte 23 # 0x17
-; CHECK-NEXT: .byte 23 # 0x17
-; CHECK-NEXT: .byte 23 # 0x17
-; CHECK-NEXT: .byte 0 # 0x0
-; CHECK-NEXT: .byte 0 # 0x0
-; CHECK-NEXT: .byte 0 # 0x0
-; CHECK-NEXT: .byte 0 # 0x0
-; CHECK-NEXT: .byte 0 # 0x0
-; CHECK-NEXT: .byte 0 # 0x0
-; CHECK-NEXT: .byte 0 # 0x0
-; CHECK-NEXT: .byte 0 # 0x0
; CHECK-LABEL: ConvertExtractedMaskBitsToVect2:
; CHECK: # %bb.0:
; CHECK-NEXT: addis r3, r2, .LCPI1_0 at toc@ha
@@ -72,8 +38,7 @@ define dso_local <16 x i8> @ConvertExtractedMaskBitsToVect2(<16 x i8> noundef %0
; CHECK-NEXT: xxperm v2, v3, vs0
; CHECK-NEXT: lxv vs0, 0(r3)
; CHECK-NEXT: xxland v2, v2, vs0
-; CHECK-NEXT: vcmpequb v2, v2, v3
-; CHECK-NEXT: xxlnor v2, v2, v2
+; CHECK-NEXT: vcmpgtub v2, v2, v3
; CHECK-NEXT: blr
%a4 = extractelement <16 x i8> %0, i64 7
%a5 = zext i8 %a4 to i32
@@ -87,23 +52,6 @@ define dso_local <16 x i8> @ConvertExtractedMaskBitsToVect2(<16 x i8> noundef %0
}
define dso_local <16 x i8> @ConvertExtractedMaskBitsToVect3(<8 x i16> noundef %0) local_unnamed_addr #0 {
-; CHECK: .LCPI2_0:
-; CHECK-NEXT: .byte 22 # 0x16
-; CHECK-NEXT: .byte 23 # 0x17
-; CHECK-NEXT: .byte 22 # 0x16
-; CHECK-NEXT: .byte 23 # 0x17
-; CHECK-NEXT: .byte 22 # 0x16
-; CHECK-NEXT: .byte 23 # 0x17
-; CHECK-NEXT: .byte 22 # 0x16
-; CHECK-NEXT: .byte 23 # 0x17
-; CHECK-NEXT: .byte 0 # 0x0
-; CHECK-NEXT: .byte 0 # 0x0
-; CHECK-NEXT: .byte 0 # 0x0
-; CHECK-NEXT: .byte 0 # 0x0
-; CHECK-NEXT: .byte 0 # 0x0
-; CHECK-NEXT: .byte 0 # 0x0
-; CHECK-NEXT: .byte 0 # 0x0
-; CHECK-NEXT: .byte 0 # 0x0
; CHECK-LABEL: ConvertExtractedMaskBitsToVect3:
; CHECK: # %bb.0:
; CHECK-NEXT: addis r3, r2, .LCPI2_0 at toc@ha
@@ -115,8 +63,7 @@ define dso_local <16 x i8> @ConvertExtractedMaskBitsToVect3(<8 x i16> noundef %0
; CHECK-NEXT: xxperm v2, v3, vs0
; CHECK-NEXT: lxv vs0, 0(r3)
; CHECK-NEXT: xxland v2, v2, vs0
-; CHECK-NEXT: vcmpequb v2, v2, v3
-; CHECK-NEXT: xxlnor v2, v2, v2
+; CHECK-NEXT: vcmpgtub v2, v2, v3
; CHECK-NEXT: blr
%a4 = extractelement <8 x i16> %0, i64 3
%a5 = zext i16 %a4 to i32
diff --git a/llvm/test/CodeGen/PowerPC/recipest.ll b/llvm/test/CodeGen/PowerPC/recipest.ll
index 2598a410b8761..c3ee1c553eebe 100644
--- a/llvm/test/CodeGen/PowerPC/recipest.ll
+++ b/llvm/test/CodeGen/PowerPC/recipest.ll
@@ -1031,7 +1031,7 @@ define <4 x float> @hoo3_fmf(<4 x float> %a) #1 {
; CHECK-P7-NEXT: vmaddfp 5, 5, 0, 3
; CHECK-P7-NEXT: vmaddfp 3, 5, 4, 3
; CHECK-P7-NEXT: vxor 4, 4, 4
-; CHECK-P7-NEXT: vcmpeqfp 2, 2, 4
+; CHECK-P7-NEXT: vcmpgefp 2, 4, 2
; CHECK-P7-NEXT: vnot 2, 2
; CHECK-P7-NEXT: vand 2, 2, 3
; CHECK-P7-NEXT: blr
diff --git a/llvm/test/CodeGen/PowerPC/setcc-logic.ll b/llvm/test/CodeGen/PowerPC/setcc-logic.ll
index 1c3ac17666e26..90f963de428e9 100644
--- a/llvm/test/CodeGen/PowerPC/setcc-logic.ll
+++ b/llvm/test/CodeGen/PowerPC/setcc-logic.ll
@@ -366,8 +366,7 @@ define <4 x i1> @any_bits_set_vec(<4 x i32> %P, <4 x i32> %Q) {
; CHECK: # %bb.0:
; CHECK-NEXT: xxlor 34, 34, 35
; CHECK-NEXT: xxlxor 35, 35, 35
-; CHECK-NEXT: vcmpequw 2, 2, 3
-; CHECK-NEXT: xxlnor 34, 34, 34
+; CHECK-NEXT: vcmpgtuw 2, 2, 3
; CHECK-NEXT: blr
%a = icmp ne <4 x i32> %P, zeroinitializer
%b = icmp ne <4 x i32> %Q, zeroinitializer
diff --git a/llvm/test/CodeGen/PowerPC/vector-popcnt-128-ult-ugt.ll b/llvm/test/CodeGen/PowerPC/vector-popcnt-128-ult-ugt.ll
index 43cbc62e0bb1c..986f255df4bd6 100644
--- a/llvm/test/CodeGen/PowerPC/vector-popcnt-128-ult-ugt.ll
+++ b/llvm/test/CodeGen/PowerPC/vector-popcnt-128-ult-ugt.ll
@@ -13,8 +13,7 @@ define <16 x i8> @ugt_1_v16i8(<16 x i8> %0) {
; PWR5-NEXT: vaddubm 3, 2, 3
; PWR5-NEXT: vand 2, 2, 3
; PWR5-NEXT: vxor 3, 3, 3
-; PWR5-NEXT: vcmpequb 2, 2, 3
-; PWR5-NEXT: vnot 2, 2
+; PWR5-NEXT: vcmpgtub 2, 2, 3
; PWR5-NEXT: blr
;
; PWR6-LABEL: ugt_1_v16i8:
@@ -23,8 +22,7 @@ define <16 x i8> @ugt_1_v16i8(<16 x i8> %0) {
; PWR6-NEXT: vaddubm 3, 2, 3
; PWR6-NEXT: vand 2, 2, 3
; PWR6-NEXT: vxor 3, 3, 3
-; PWR6-NEXT: vcmpequb 2, 2, 3
-; PWR6-NEXT: vnot 2, 2
+; PWR6-NEXT: vcmpgtub 2, 2, 3
; PWR6-NEXT: blr
;
; PWR7-LABEL: ugt_1_v16i8:
@@ -33,8 +31,7 @@ define <16 x i8> @ugt_1_v16i8(<16 x i8> %0) {
; PWR7-NEXT: vaddubm 3, 2, 3
; PWR7-NEXT: xxland 34, 34, 35
; PWR7-NEXT: xxlxor 35, 35, 35
-; PWR7-NEXT: vcmpequb 2, 2, 3
-; PWR7-NEXT: xxlnor 34, 34, 34
+; PWR7-NEXT: vcmpgtub 2, 2, 3
; PWR7-NEXT: blr
;
; PWR8-LABEL: ugt_1_v16i8:
@@ -1081,8 +1078,7 @@ define <8 x i16> @ugt_1_v8i16(<8 x i16> %0) {
; PWR5-NEXT: vadduhm 3, 2, 3
; PWR5-NEXT: vand 2, 2, 3
; PWR5-NEXT: vxor 3, 3, 3
-; PWR5-NEXT: vcmpequh 2, 2, 3
-; PWR5-NEXT: vnot 2, 2
+; PWR5-NEXT: vcmpgtuh 2, 2, 3
; PWR5-NEXT: blr
;
; PWR6-LABEL: ugt_1_v8i16:
@@ -1091,8 +1087,7 @@ define <8 x i16> @ugt_1_v8i16(<8 x i16> %0) {
; PWR6-NEXT: vadduhm 3, 2, 3
; PWR6-NEXT: vand 2, 2, 3
; PWR6-NEXT: vxor 3, 3, 3
-; PWR6-NEXT: vcmpequh 2, 2, 3
-; PWR6-NEXT: vnot 2, 2
+; PWR6-NEXT: vcmpgtuh 2, 2, 3
; PWR6-NEXT: blr
;
; PWR7-LABEL: ugt_1_v8i16:
@@ -1101,8 +1096,7 @@ define <8 x i16> @ugt_1_v8i16(<8 x i16> %0) {
; PWR7-NEXT: vadduhm 3, 2, 3
; PWR7-NEXT: xxland 34, 34, 35
; PWR7-NEXT: xxlxor 35, 35, 35
-; PWR7-NEXT: vcmpequh 2, 2, 3
-; PWR7-NEXT: xxlnor 34, 34, 34
+; PWR7-NEXT: vcmpgtuh 2, 2, 3
; PWR7-NEXT: blr
;
; PWR8-LABEL: ugt_1_v8i16:
@@ -4101,8 +4095,7 @@ define <4 x i32> @ugt_1_v4i32(<4 x i32> %0) {
; PWR5-NEXT: vadduwm 3, 2, 3
; PWR5-NEXT: vand 2, 2, 3
; PWR5-NEXT: vxor 3, 3, 3
-; PWR5-NEXT: vcmpequw 2, 2, 3
-; PWR5-NEXT: vnot 2, 2
+; PWR5-NEXT: vcmpgtuw 2, 2, 3
; PWR5-NEXT: blr
;
; PWR6-LABEL: ugt_1_v4i32:
@@ -4111,8 +4104,7 @@ define <4 x i32> @ugt_1_v4i32(<4 x i32> %0) {
; PWR6-NEXT: vadduwm 3, 2, 3
; PWR6-NEXT: vand 2, 2, 3
; PWR6-NEXT: vxor 3, 3, 3
-; PWR6-NEXT: vcmpequw 2, 2, 3
-; PWR6-NEXT: vnot 2, 2
+; PWR6-NEXT: vcmpgtuw 2, 2, 3
; PWR6-NEXT: blr
;
; PWR7-LABEL: ugt_1_v4i32:
@@ -4121,8 +4113,7 @@ define <4 x i32> @ugt_1_v4i32(<4 x i32> %0) {
; PWR7-NEXT: vadduwm 3, 2, 3
; PWR7-NEXT: xxland 34, 34, 35
; PWR7-NEXT: xxlxor 35, 35, 35
-; PWR7-NEXT: vcmpequw 2, 2, 3
-; PWR7-NEXT: xxlnor 34, 34, 34
+; PWR7-NEXT: vcmpgtuw 2, 2, 3
; PWR7-NEXT: blr
;
; PWR8-LABEL: ugt_1_v4i32:
@@ -11967,9 +11958,8 @@ define <2 x i64> @ugt_1_v2i64(<2 x i64> %0) {
; PWR7-NEXT: addis 3, 2, .LCPI100_0 at toc@ha
; PWR7-NEXT: addi 3, 3, .LCPI100_0 at toc@l
; PWR7-NEXT: xxland 34, 34, 0
-; PWR7-NEXT: vcmpequw 2, 2, 3
+; PWR7-NEXT: vcmpgtuw 2, 2, 3
; PWR7-NEXT: lxvw4x 35, 0, 3
-; PWR7-NEXT: xxlnor 34, 34, 34
; PWR7-NEXT: vperm 3, 2, 2, 3
; PWR7-NEXT: xxlor 34, 35, 34
; PWR7-NEXT: blr
More information about the llvm-commits
mailing list