[llvm] [PowerPC] [Draft] Emit xxeval instruction for Ternary operation with v2i64 operand types (PR #145574)
Tony Varghese via llvm-commits
llvm-commits at lists.llvm.org
Tue Jun 24 12:13:33 PDT 2025
https://github.com/tonykuttai created https://github.com/llvm/llvm-project/pull/145574
`xxeval` instruction can be used for supporting the ternary patterns.
Supporting the following patterns in this change with `v2i64` operands. Since the Binary operations `xor` & `and` gets promoted to `v4i32` the original pattern becomes:
`ternary(v2i64:A, xor(v2i64:B, v2i64:C), and(v2i64:B, v2i64:C))` to `ternary(v2i64:A, bitcast(v2i64, xor(v4i32:B, v4i32:C)), bitcast(v2i64, and(v4i32:B, v4i32:C)) )`.
- To lower this pattern to `xxeval` instruction, Custom lowering is seeked and `ISD::VSELECT` node is Custom Lowered during the `PPCIselLowering`.
- If the pattern matches `ternary(v2i64:A, bitcast(v2i64, xor(v4i32:B, v4i32:C)), bitcast(v2i64, and(v4i32:B, v4i32:C)) )`, then a new node `PPCISD::VSELECT` is emitted as shown. All other nodes will follow the default lowering.
```
SelectionDAG has 15 nodes:
t0: ch,glue = EntryToken
t4: v2i64,ch = CopyFromReg t0, Register:v2i64 %1
t6: v2i64,ch = CopyFromReg t0, Register:v2i64 %2
t2: v2i64,ch = CopyFromReg t0, Register:v2i64 %0
t16: v2i64 = sign_extend_inreg t2, ValueType:ch:v2i1
t8: v2i64 = xor t4, t6
t9: v2i64 = and t4, t6
t23: v2i64 = PPCISD::VSELECT t16, t8, t9
t12: ch,glue = CopyToReg t0, Register:v2i64 $v2, t23
t13: ch = PPCISD::RET_GLUE t12, Register:v2i64 $v2, t12:1
```
- Now, I have added the pattern matching for PPCISD::VSELECT` in `llvm/lib/Target/PowerPC/PPCInstrP10.td`.
- Sample test file used `xxeval-vselect-x-and-v2i64.ll `:
```
; Function to test ternary(A, xor(B, C), and(B, C)) for <2 x i64>
define <2 x i64> @ternary_A_xor_BC_and_BC_2x64(<2 x i1> %A, <2 x i64> %B, <2 x i64> %C) {
; CHECK-LABEL: ternary_A_xor_BC_and_BC_2x64:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xxlxor v5, v5, v5
; CHECK-NEXT: xxlxor vs0, v3, v4
; CHECK-NEXT: xxland vs1, v3, v4
; CHECK-NEXT: xxsplti32dx v5, 1, 63
; CHECK-NEXT: vsld v2, v2, v5
; CHECK-NEXT: vsrad v2, v2, v5
; CHECK-NEXT: xxsel v2, vs1, vs0, v2
; CHECK-NEXT: blr
entry:
%xor = xor <2 x i64> %B, %C
%and = and <2 x i64> %B, %C
%res = select <2 x i1> %A, <2 x i64> %xor, <2 x i64> %and
ret <2 x i64> %res
}
```
`doit.sh` script:
```
$LLVM_BUILD/bin/llc -mcpu=pwr10 -mtriple=powerpc64le-unknown-unknown -ppc-asm-full-reg-names --ppc-vsr-nums-as-vr -debug-only=isel -debug-only=legalize-types xxeval-vselect-x-and-v2i64.ll > isel_debug_x_and_2x64.ll 2<&1
$LLVM_BUILD/bin/llc -mcpu=pwr10 -mtriple=powerpc64le-unknown-unknown -ppc-asm-full-reg-names --ppc-vsr-nums-as-vr -debug-only=ppc-lowering xxeval-vselect-x-and-v2i64.ll > ppc-lowering_debug_x_and_2x64.ll 2<&1
```
- This is resulting in a crash during legalize vector types.
>From 3ebc44298670bdfb054a4c34fbbc7dab2e2e8ded Mon Sep 17 00:00:00 2001
From: Tony Varghese <tony.varghese at ibm.com>
Date: Thu, 29 May 2025 16:33:21 +0000
Subject: [PATCH 1/4] [PowerPC][XXEVAL] Exploit xxeval instruction for cases of
the ternary(A,X, and(B,C)), ternary(A,X,B), ternary(A,X,C),
ternary(A,X,xor(B,C)) forms.
---
llvm/lib/Target/PowerPC/PPCInstrP10.td | 200 +++++++++++++++---
.../CodeGen/PowerPC/xxeval-vselect-x-and.ll | 42 +---
.../CodeGen/PowerPC/xxeval-vselect-x-b.ll | 32 +--
.../CodeGen/PowerPC/xxeval-vselect-x-c.ll | 26 +--
.../CodeGen/PowerPC/xxeval-vselect-x-or.ll | 28 ++-
.../CodeGen/PowerPC/xxeval-vselect-x-xor.ll | 38 +---
6 files changed, 222 insertions(+), 144 deletions(-)
diff --git a/llvm/lib/Target/PowerPC/PPCInstrP10.td b/llvm/lib/Target/PowerPC/PPCInstrP10.td
index d295f35fb1dd0..d0985f4a3a9bb 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrP10.td
+++ b/llvm/lib/Target/PowerPC/PPCInstrP10.td
@@ -2159,8 +2159,131 @@ let AddedComplexity = 400, Predicates = [IsISA3_1, HasVSX] in {
(COPY_TO_REGCLASS $VRB, VSRC), 2)))>;
}
-class XXEvalPattern <dag pattern, bits<8> imm> :
- Pat<(v4i32 pattern), (XXEVAL $vA, $vB, $vC, imm)> {}
+class XXEvalPattern <ValueType vt, dag pattern, bits<8> imm> :
+ Pat<(vt pattern), (XXEVAL $vA, $vB, $vC, imm)> {}
+
+class DagCondVNot<dag d, bit negate> {
+ // Utility to define a vnot around the dag.
+ dag res = !if(!ne(negate, 0),
+ (vnot d),
+ d);
+}
+
+class XXEvalUnaryPattern<ValueType vt> {
+ // vnot Operand B
+ dag vnotB = !cond(
+ !eq(vt, v4i32) : (vnot v4i32:$vB),
+ !eq(vt, v2i64) : (v2i64 (bitconvert (vnot (v4i32 (bitconvert v2i64:$vB)))))
+ );
+ // vnot Operand C
+ dag vnotC = !cond(
+ !eq(vt, v4i32) : (vnot v4i32:$vC),
+ !eq(vt, v2i64) : (v2i64 (bitconvert (vnot (v4i32 (bitconvert v2i64:$vC)))))
+ );
+}
+
+class XXEvalBinaryPattern<ValueType vt, SDPatternOperator op, bit notResult = 0> {
+ // Defines a wrapper class for binary patterns with optional NOT on result.
+ // Generate op pattern with optional NOT wrapping for result depending on "notResult".
+ dag opPat = !cond(
+ !eq(vt, v4i32) : DagCondVNot<(op v4i32:$vB, v4i32:$vC), notResult>.res,
+ !eq(vt, v2i64) : (v2i64 (bitconvert DagCondVNot<(op
+ (v4i32 (bitconvert v2i64:$vB)),
+ (v4i32 (bitconvert v2i64:$vC))), notResult>.res))
+ );
+}
+
+multiclass XXEvalVSelectWithXAnd<ValueType vt, bits<8> baseImm> {
+ // Multiclass for Ternary(A, X, and(B, C)) style patterns.
+ // Ternary(A, xor(B,C), and(B,C)) => imm: baseImm = 22
+ def : XXEvalPattern<vt,
+ (vselect vt:$vA, XXEvalBinaryPattern<vt, xor>.opPat, XXEvalBinaryPattern<vt, and>.opPat),
+ baseImm>;
+ // Ternary(A, nor(B,C), and(B,C)) => imm: baseImm + 2 = 24
+ def : XXEvalPattern<vt,
+ (vselect vt:$vA, XXEvalBinaryPattern<vt, or, 1>.opPat, XXEvalBinaryPattern<vt, and>.opPat),
+ !add(baseImm, 2)>;
+ // Ternary(A, eqv(B,C), and(B,C)) => imm: baseImm + 3 = 25
+ def : XXEvalPattern<vt,
+ (vselect vt:$vA, XXEvalBinaryPattern<vt, xor, 1>.opPat, XXEvalBinaryPattern<vt, and>.opPat),
+ !add(baseImm, 3)>;
+ // Ternary(A, not(C), and(B,C)) => imm: baseImm + 4 = 26
+ def : XXEvalPattern<vt,
+ (vselect vt:$vA, XXEvalUnaryPattern<vt>.vnotC, XXEvalBinaryPattern<vt, and>.opPat),
+ !add(baseImm, 4)>;
+ // Ternary(A, not(B), and(B,C)) => imm: baseImm + 6 = 28
+ def : XXEvalPattern<vt,
+ (vselect vt:$vA, XXEvalUnaryPattern<vt>.vnotB, XXEvalBinaryPattern<vt, and>.opPat),
+ !add(baseImm, 6)>;
+}
+
+multiclass XXEvalVSelectWithXB<ValueType vt, bits<8> baseImm>{
+ // Multiclass for Ternary(A, X, B) style patterns
+ // Ternary(A, and(B,C), B) => imm: baseImm = 49
+ def : XXEvalPattern<vt,
+ (vselect vt:$vA, XXEvalBinaryPattern<vt, and>.opPat, vt:$vB),
+ baseImm>;
+ // Ternary(A, nor(B,C), B) => imm: baseImm + 7 = 56
+ def : XXEvalPattern<vt,
+ (vselect vt:$vA, XXEvalBinaryPattern<vt, or, 1>.opPat, vt:$vB),
+ !add(baseImm, 7)>;
+ // Ternary(A, eqv(B,C), B) => imm: baseImm + 8 = 57
+ def : XXEvalPattern<vt,
+ (vselect vt:$vA, XXEvalBinaryPattern<vt, xor, 1>.opPat, vt:$vB),
+ !add(baseImm, 8)>;
+ // Ternary(A, not(C), B) => imm: baseImm + 9 = 58
+ def : XXEvalPattern<vt,
+ (vselect vt:$vA, XXEvalUnaryPattern<vt>.vnotC, vt:$vB),
+ !add(baseImm, 9)>;
+ // Ternary(A, nand(B,C), B) => imm: baseImm + 13 = 62
+ def : XXEvalPattern<vt,
+ (vselect vt:$vA, XXEvalBinaryPattern<vt, and, 1>.opPat, vt:$vB),
+ !add(baseImm, 13)>;
+}
+
+multiclass XXEvalVSelectWithXC<ValueType vt, bits<8> baseImm>{
+ // Multiclass for Ternary(A, X, C) style patterns
+ // Ternary(A, and(B,C), C) => imm: baseImm = 81
+ def : XXEvalPattern<vt,
+ (vselect vt:$vA, XXEvalBinaryPattern<vt, and>.opPat, vt:$vC),
+ baseImm>;
+ // Ternary(A, nor(B,C), C) => imm: baseImm + 7 = 88
+ def : XXEvalPattern<vt,
+ (vselect vt:$vA, XXEvalBinaryPattern<vt, or, 1>.opPat, vt:$vC),
+ !add(baseImm, 7)>;
+ // Ternary(A, eqv(B,C), C) => imm: baseImm + 8 = 89
+ def : XXEvalPattern<vt,
+ (vselect vt:$vA, XXEvalBinaryPattern<vt, xor, 1>.opPat, vt:$vC),
+ !add(baseImm, 8)>;
+ // Ternary(A, nand(B,C), C) => imm: baseImm + 13 = 94
+ def : XXEvalPattern<vt,
+ (vselect vt:$vA, XXEvalBinaryPattern<vt, and, 1>.opPat, vt:$vC),
+ !add(baseImm, 13)>;
+}
+
+multiclass XXEvalVSelectWithXXor<ValueType vt, bits<8> baseImm>{
+ // Multiclass for Ternary(A, X, xor(B,C)) style patterns
+ // Ternary(A, and(B,C), xor(B,C)) => imm: baseImm = 97
+ def : XXEvalPattern<vt,
+ (vselect vt:$vA, XXEvalBinaryPattern<vt, and>.opPat, XXEvalBinaryPattern<vt, xor>.opPat),
+ baseImm>;
+ // Ternary(A, B, xor(B,C)) => imm: baseImm + 2 = 99
+ def : XXEvalPattern<vt,
+ (vselect vt:$vA, vt:$vB, XXEvalBinaryPattern<vt, xor>.opPat),
+ !add(baseImm, 2)>;
+ // Ternary(A, C, xor(B,C)) => imm: baseImm + 4 = 101
+ def : XXEvalPattern<vt,
+ (vselect vt:$vA, vt:$vC, XXEvalBinaryPattern<vt, xor>.opPat),
+ !add(baseImm, 4)>;
+ // Ternary(A, or(B,C), xor(B,C)) => imm: baseImm + 6 = 103
+ def : XXEvalPattern<vt,
+ (vselect vt:$vA, XXEvalBinaryPattern<vt, or>.opPat, XXEvalBinaryPattern<vt, xor>.opPat),
+ !add(baseImm, 6)>;
+ // Ternary(A, nor(B,C), xor(B,C)) => imm: baseImm + 7 = 104
+ def : XXEvalPattern<vt,
+ (vselect vt:$vA, XXEvalBinaryPattern<vt, or, 1>.opPat, XXEvalBinaryPattern<vt, xor>.opPat),
+ !add(baseImm, 7)>;
+}
let Predicates = [PrefixInstrs, HasP10Vector] in {
let AddedComplexity = 400 in {
@@ -2192,83 +2315,96 @@ let Predicates = [PrefixInstrs, HasP10Vector] in {
// Anonymous patterns for XXEVAL
// AND
// and(A, B, C)
- def : XXEvalPattern<(and v4i32:$vA, (and v4i32:$vB, v4i32:$vC)), 1>;
+ def : XXEvalPattern<v4i32, (and v4i32:$vA, (and v4i32:$vB, v4i32:$vC)), 1>;
// and(A, xor(B, C))
- def : XXEvalPattern<(and v4i32:$vA, (xor v4i32:$vB, v4i32:$vC)), 6>;
+ def : XXEvalPattern<v4i32, (and v4i32:$vA, (xor v4i32:$vB, v4i32:$vC)), 6>;
// and(A, or(B, C))
- def : XXEvalPattern<(and v4i32:$vA, (or v4i32:$vB, v4i32:$vC)), 7>;
+ def : XXEvalPattern<v4i32, (and v4i32:$vA, (or v4i32:$vB, v4i32:$vC)), 7>;
// and(A, nor(B, C))
- def : XXEvalPattern<(and v4i32:$vA, (vnot (or v4i32:$vB, v4i32:$vC))), 8>;
+ def : XXEvalPattern<v4i32, (and v4i32:$vA, (vnot (or v4i32:$vB, v4i32:$vC))), 8>;
// and(A, eqv(B, C))
- def : XXEvalPattern<(and v4i32:$vA, (vnot (xor v4i32:$vB, v4i32:$vC))), 9>;
+ def : XXEvalPattern<v4i32, (and v4i32:$vA, (vnot (xor v4i32:$vB, v4i32:$vC))), 9>;
// and(A, nand(B, C))
- def : XXEvalPattern<(and v4i32:$vA, (vnot (and v4i32:$vB, v4i32:$vC))), 14>;
+ def : XXEvalPattern<v4i32, (and v4i32:$vA, (vnot (and v4i32:$vB, v4i32:$vC))), 14>;
// NAND
// nand(A, B, C)
- def : XXEvalPattern<(vnot (and v4i32:$vA, (and v4i32:$vB, v4i32:$vC))),
+ def : XXEvalPattern<v4i32, (vnot (and v4i32:$vA, (and v4i32:$vB, v4i32:$vC))),
!sub(255, 1)>;
// nand(A, xor(B, C))
- def : XXEvalPattern<(vnot (and v4i32:$vA, (xor v4i32:$vB, v4i32:$vC))),
+ def : XXEvalPattern<v4i32, (vnot (and v4i32:$vA, (xor v4i32:$vB, v4i32:$vC))),
!sub(255, 6)>;
// nand(A, or(B, C))
- def : XXEvalPattern<(vnot (and v4i32:$vA, (or v4i32:$vB, v4i32:$vC))),
+ def : XXEvalPattern<v4i32, (vnot (and v4i32:$vA, (or v4i32:$vB, v4i32:$vC))),
!sub(255, 7)>;
// nand(A, nor(B, C))
- def : XXEvalPattern<(or (vnot v4i32:$vA), (or v4i32:$vB, v4i32:$vC)),
+ def : XXEvalPattern<v4i32, (or (vnot v4i32:$vA), (or v4i32:$vB, v4i32:$vC)),
!sub(255, 8)>;
// nand(A, eqv(B, C))
- def : XXEvalPattern<(or (vnot v4i32:$vA), (xor v4i32:$vB, v4i32:$vC)),
+ def : XXEvalPattern<v4i32, (or (vnot v4i32:$vA), (xor v4i32:$vB, v4i32:$vC)),
!sub(255, 9)>;
// nand(A, nand(B, C))
- def : XXEvalPattern<(or (vnot v4i32:$vA), (and v4i32:$vB, v4i32:$vC)),
+ def : XXEvalPattern<v4i32, (or (vnot v4i32:$vA), (and v4i32:$vB, v4i32:$vC)),
!sub(255, 14)>;
// EQV
// (eqv A, B, C)
- def : XXEvalPattern<(or (and v4i32:$vA, (and v4i32:$vB, v4i32:$vC)),
+ def : XXEvalPattern<v4i32, (or (and v4i32:$vA, (and v4i32:$vB, v4i32:$vC)),
(vnot (or v4i32:$vA, (or v4i32:$vB, v4i32:$vC)))),
150>;
// (eqv A, (and B, C))
- def : XXEvalPattern<(vnot (xor v4i32:$vA, (and v4i32:$vB, v4i32:$vC))), 225>;
+ def : XXEvalPattern<v4i32, (vnot (xor v4i32:$vA, (and v4i32:$vB, v4i32:$vC))), 225>;
// (eqv A, (or B, C))
- def : XXEvalPattern<(vnot (xor v4i32:$vA, (or v4i32:$vB, v4i32:$vC))), 135>;
+ def : XXEvalPattern<v4i32, (vnot (xor v4i32:$vA, (or v4i32:$vB, v4i32:$vC))), 135>;
// NOR
// (nor A, B, C)
- def : XXEvalPattern<(vnot (or v4i32:$vA, (or v4i32:$vB, v4i32:$vC))), 128>;
+ def : XXEvalPattern<v4i32, (vnot (or v4i32:$vA, (or v4i32:$vB, v4i32:$vC))), 128>;
// (nor A, (and B, C))
- def : XXEvalPattern<(vnot (or v4i32:$vA, (and v4i32:$vB, v4i32:$vC))), 224>;
+ def : XXEvalPattern<v4i32, (vnot (or v4i32:$vA, (and v4i32:$vB, v4i32:$vC))), 224>;
// (nor A, (eqv B, C))
- def : XXEvalPattern<(and (vnot v4i32:$vA), (xor v4i32:$vB, v4i32:$vC)), 96>;
+ def : XXEvalPattern<v4i32, (and (vnot v4i32:$vA), (xor v4i32:$vB, v4i32:$vC)), 96>;
// (nor A, (nand B, C))
- def : XXEvalPattern<(and (vnot v4i32:$vA), (and v4i32:$vB, v4i32:$vC)), 16>;
+ def : XXEvalPattern<v4i32, (and (vnot v4i32:$vA), (and v4i32:$vB, v4i32:$vC)), 16>;
// (nor A, (nor B, C))
- def : XXEvalPattern<(and (vnot v4i32:$vA), (or v4i32:$vB, v4i32:$vC)), 112>;
+ def : XXEvalPattern<v4i32, (and (vnot v4i32:$vA), (or v4i32:$vB, v4i32:$vC)), 112>;
// (nor A, (xor B, C))
- def : XXEvalPattern<(vnot (or v4i32:$vA, (xor v4i32:$vB, v4i32:$vC))), 144>;
+ def : XXEvalPattern<v4i32, (vnot (or v4i32:$vA, (xor v4i32:$vB, v4i32:$vC))), 144>;
// OR
// (or A, B, C)
- def : XXEvalPattern<(or v4i32:$vA, (or v4i32:$vB, v4i32:$vC)), 127>;
+ def : XXEvalPattern<v4i32, (or v4i32:$vA, (or v4i32:$vB, v4i32:$vC)), 127>;
// (or A, (and B, C))
- def : XXEvalPattern<(or v4i32:$vA, (and v4i32:$vB, v4i32:$vC)), 31>;
+ def : XXEvalPattern<v4i32, (or v4i32:$vA, (and v4i32:$vB, v4i32:$vC)), 31>;
// (or A, (eqv B, C))
- def : XXEvalPattern<(or v4i32:$vA, (vnot (xor v4i32:$vB, v4i32:$vC))), 159>;
+ def : XXEvalPattern<v4i32, (or v4i32:$vA, (vnot (xor v4i32:$vB, v4i32:$vC))), 159>;
// (or A, (nand B, C))
- def : XXEvalPattern<(or v4i32:$vA, (vnot (and v4i32:$vB, v4i32:$vC))), 239>;
+ def : XXEvalPattern<v4i32, (or v4i32:$vA, (vnot (and v4i32:$vB, v4i32:$vC))), 239>;
// (or A, (nor B, C))
- def : XXEvalPattern<(or v4i32:$vA, (vnot (or v4i32:$vB, v4i32:$vC))), 143>;
+ def : XXEvalPattern<v4i32, (or v4i32:$vA, (vnot (or v4i32:$vB, v4i32:$vC))), 143>;
// (or A, (xor B, C))
- def : XXEvalPattern<(or v4i32:$vA, (xor v4i32:$vB, v4i32:$vC)), 111>;
+ def : XXEvalPattern<v4i32, (or v4i32:$vA, (xor v4i32:$vB, v4i32:$vC)), 111>;
// XOR
// (xor A, B, C)
- def : XXEvalPattern<(xor v4i32:$vA, (xor v4i32:$vB, v4i32:$vC)), 105>;
+ def : XXEvalPattern<v4i32, (xor v4i32:$vA, (xor v4i32:$vB, v4i32:$vC)), 105>;
// (xor A, (and B, C))
- def : XXEvalPattern<(xor v4i32:$vA, (and v4i32:$vB, v4i32:$vC)), 30>;
+ def : XXEvalPattern<v4i32, (xor v4i32:$vA, (and v4i32:$vB, v4i32:$vC)), 30>;
// (xor A, (or B, C))
- def : XXEvalPattern<(xor v4i32:$vA, (or v4i32:$vB, v4i32:$vC)), 120>;
+ def : XXEvalPattern<v4i32, (xor v4i32:$vA, (or v4i32:$vB, v4i32:$vC)), 120>;
+
+ // Utilize xxeval instruction for ternary vector expressions.
+ defm : XXEvalVSelectWithXAnd<v4i32, 22>;
+ defm : XXEvalVSelectWithXAnd<v2i64, 22>;
+
+ defm : XXEvalVSelectWithXB<v4i32, 49>;
+ defm : XXEvalVSelectWithXB<v2i64, 49>;
+
+ defm : XXEvalVSelectWithXC<v4i32, 81>;
+ defm : XXEvalVSelectWithXC<v2i64, 81>;
+
+ defm : XXEvalVSelectWithXXor<v4i32, 97>;
+ defm : XXEvalVSelectWithXXor<v2i64, 97>;
// Anonymous patterns to select prefixed VSX loads and stores.
// Load / Store f128
diff --git a/llvm/test/CodeGen/PowerPC/xxeval-vselect-x-and.ll b/llvm/test/CodeGen/PowerPC/xxeval-vselect-x-and.ll
index 2868669c52ce6..19305336f78df 100644
--- a/llvm/test/CodeGen/PowerPC/xxeval-vselect-x-and.ll
+++ b/llvm/test/CodeGen/PowerPC/xxeval-vselect-x-and.ll
@@ -1,5 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
-; Test file to verify the emission of Vector selection instructions when ternary operators are used.
+; Test file to verify the emission of Vector Evaluate instructions when ternary operators are used.
; RUN: llc -verify-machineinstrs -mcpu=pwr10 -mtriple=powerpc64le-unknown-unknown \
; RUN: -ppc-asm-full-reg-names --ppc-vsr-nums-as-vr < %s | FileCheck %s
@@ -15,11 +15,9 @@ define <4 x i32> @ternary_A_xor_BC_and_BC_4x32(<4 x i1> %A, <4 x i32> %B, <4 x i
; CHECK-LABEL: ternary_A_xor_BC_and_BC_4x32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xxleqv v5, v5, v5
-; CHECK-NEXT: xxlxor vs0, v3, v4
-; CHECK-NEXT: xxland vs1, v3, v4
; CHECK-NEXT: vslw v2, v2, v5
; CHECK-NEXT: vsraw v2, v2, v5
-; CHECK-NEXT: xxsel v2, vs1, vs0, v2
+; CHECK-NEXT: xxeval v2, v2, v3, v4, 22
; CHECK-NEXT: blr
entry:
%xor = xor <4 x i32> %B, %C
@@ -33,12 +31,10 @@ define <2 x i64> @ternary_A_xor_BC_and_BC_2x64(<2 x i1> %A, <2 x i64> %B, <2 x i
; CHECK-LABEL: ternary_A_xor_BC_and_BC_2x64:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xxlxor v5, v5, v5
-; CHECK-NEXT: xxlxor vs0, v3, v4
-; CHECK-NEXT: xxland vs1, v3, v4
; CHECK-NEXT: xxsplti32dx v5, 1, 63
; CHECK-NEXT: vsld v2, v2, v5
; CHECK-NEXT: vsrad v2, v2, v5
-; CHECK-NEXT: xxsel v2, vs1, vs0, v2
+; CHECK-NEXT: xxeval v2, v2, v3, v4, 22
; CHECK-NEXT: blr
entry:
%xor = xor <2 x i64> %B, %C
@@ -52,11 +48,9 @@ define <4 x i32> @ternary_A_nor_BC_and_BC_4x32(<4 x i1> %A, <4 x i32> %B, <4 x i
; CHECK-LABEL: ternary_A_nor_BC_and_BC_4x32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xxleqv v5, v5, v5
-; CHECK-NEXT: xxlnor vs0, v3, v4
-; CHECK-NEXT: xxland vs1, v3, v4
; CHECK-NEXT: vslw v2, v2, v5
; CHECK-NEXT: vsraw v2, v2, v5
-; CHECK-NEXT: xxsel v2, vs1, vs0, v2
+; CHECK-NEXT: xxeval v2, v2, v3, v4, 24
; CHECK-NEXT: blr
entry:
%or = or <4 x i32> %B, %C
@@ -71,12 +65,10 @@ define <2 x i64> @ternary_A_nor_BC_and_BC_2x64(<2 x i1> %A, <2 x i64> %B, <2 x i
; CHECK-LABEL: ternary_A_nor_BC_and_BC_2x64:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xxlxor v5, v5, v5
-; CHECK-NEXT: xxlnor vs0, v3, v4
-; CHECK-NEXT: xxland vs1, v3, v4
; CHECK-NEXT: xxsplti32dx v5, 1, 63
; CHECK-NEXT: vsld v2, v2, v5
; CHECK-NEXT: vsrad v2, v2, v5
-; CHECK-NEXT: xxsel v2, vs1, vs0, v2
+; CHECK-NEXT: xxeval v2, v2, v3, v4, 24
; CHECK-NEXT: blr
entry:
%or = or <2 x i64> %B, %C
@@ -91,11 +83,9 @@ define <4 x i32> @ternary_A_eqv_BC_and_BC_4x32(<4 x i1> %A, <4 x i32> %B, <4 x i
; CHECK-LABEL: ternary_A_eqv_BC_and_BC_4x32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xxleqv v5, v5, v5
-; CHECK-NEXT: xxleqv vs0, v3, v4
-; CHECK-NEXT: xxland vs1, v3, v4
; CHECK-NEXT: vslw v2, v2, v5
; CHECK-NEXT: vsraw v2, v2, v5
-; CHECK-NEXT: xxsel v2, vs1, vs0, v2
+; CHECK-NEXT: xxeval v2, v2, v3, v4, 25
; CHECK-NEXT: blr
entry:
%xor = xor <4 x i32> %B, %C
@@ -110,12 +100,10 @@ define <2 x i64> @ternary_A_eqv_BC_and_BC_2x64(<2 x i1> %A, <2 x i64> %B, <2 x i
; CHECK-LABEL: ternary_A_eqv_BC_and_BC_2x64:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xxlxor v5, v5, v5
-; CHECK-NEXT: xxleqv vs0, v3, v4
-; CHECK-NEXT: xxland vs1, v3, v4
; CHECK-NEXT: xxsplti32dx v5, 1, 63
; CHECK-NEXT: vsld v2, v2, v5
; CHECK-NEXT: vsrad v2, v2, v5
-; CHECK-NEXT: xxsel v2, vs1, vs0, v2
+; CHECK-NEXT: xxeval v2, v2, v3, v4, 25
; CHECK-NEXT: blr
entry:
%xor = xor <2 x i64> %B, %C
@@ -130,11 +118,9 @@ define <4 x i32> @ternary_A_not_C_and_BC_4x32(<4 x i1> %A, <4 x i32> %B, <4 x i3
; CHECK-LABEL: ternary_A_not_C_and_BC_4x32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xxleqv v5, v5, v5
-; CHECK-NEXT: xxlnor vs0, v4, v4
-; CHECK-NEXT: xxland vs1, v3, v4
; CHECK-NEXT: vslw v2, v2, v5
; CHECK-NEXT: vsraw v2, v2, v5
-; CHECK-NEXT: xxsel v2, vs1, vs0, v2
+; CHECK-NEXT: xxeval v2, v2, v3, v4, 26
; CHECK-NEXT: blr
entry:
%not = xor <4 x i32> %C, <i32 -1, i32 -1, i32 -1, i32 -1> ; Vector not operation
@@ -148,12 +134,10 @@ define <2 x i64> @ternary_A_not_C_and_BC_2x64(<2 x i1> %A, <2 x i64> %B, <2 x i6
; CHECK-LABEL: ternary_A_not_C_and_BC_2x64:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xxlxor v5, v5, v5
-; CHECK-NEXT: xxlnor vs0, v4, v4
-; CHECK-NEXT: xxland vs1, v3, v4
; CHECK-NEXT: xxsplti32dx v5, 1, 63
; CHECK-NEXT: vsld v2, v2, v5
; CHECK-NEXT: vsrad v2, v2, v5
-; CHECK-NEXT: xxsel v2, vs1, vs0, v2
+; CHECK-NEXT: xxeval v2, v2, v3, v4, 26
; CHECK-NEXT: blr
entry:
%not = xor <2 x i64> %C, <i64 -1, i64 -1> ; Vector not operation
@@ -167,11 +151,9 @@ define <4 x i32> @ternary_A_not_B_and_BC_4x32(<4 x i1> %A, <4 x i32> %B, <4 x i3
; CHECK-LABEL: ternary_A_not_B_and_BC_4x32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xxleqv v5, v5, v5
-; CHECK-NEXT: xxlnor vs0, v3, v3
-; CHECK-NEXT: xxland vs1, v3, v4
; CHECK-NEXT: vslw v2, v2, v5
; CHECK-NEXT: vsraw v2, v2, v5
-; CHECK-NEXT: xxsel v2, vs1, vs0, v2
+; CHECK-NEXT: xxeval v2, v2, v3, v4, 28
; CHECK-NEXT: blr
entry:
%not = xor <4 x i32> %B, <i32 -1, i32 -1, i32 -1, i32 -1> ; Vector not operation
@@ -185,12 +167,10 @@ define <2 x i64> @ternary_A_not_B_and_BC_2x64(<2 x i1> %A, <2 x i64> %B, <2 x i6
; CHECK-LABEL: ternary_A_not_B_and_BC_2x64:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xxlxor v5, v5, v5
-; CHECK-NEXT: xxlnor vs0, v3, v3
-; CHECK-NEXT: xxland vs1, v3, v4
; CHECK-NEXT: xxsplti32dx v5, 1, 63
; CHECK-NEXT: vsld v2, v2, v5
; CHECK-NEXT: vsrad v2, v2, v5
-; CHECK-NEXT: xxsel v2, vs1, vs0, v2
+; CHECK-NEXT: xxeval v2, v2, v3, v4, 28
; CHECK-NEXT: blr
entry:
%not = xor <2 x i64> %B, <i64 -1, i64 -1> ; Vector not operation
diff --git a/llvm/test/CodeGen/PowerPC/xxeval-vselect-x-b.ll b/llvm/test/CodeGen/PowerPC/xxeval-vselect-x-b.ll
index 37a0edb14b78f..c36fd68ba0ece 100644
--- a/llvm/test/CodeGen/PowerPC/xxeval-vselect-x-b.ll
+++ b/llvm/test/CodeGen/PowerPC/xxeval-vselect-x-b.ll
@@ -1,5 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
-; Test file to verify the emission of Vector selection instructions when ternary operators are used.
+; Test file to verify the emission of Vector Evaluate instructions when ternary operators are used.
; RUN: llc -verify-machineinstrs -mcpu=pwr10 -mtriple=powerpc64le-unknown-unknown \
; RUN: -ppc-asm-full-reg-names --ppc-vsr-nums-as-vr < %s | FileCheck %s
@@ -15,10 +15,9 @@ define <4 x i32> @ternary_A_and_BC_B_4x32(<4 x i1> %A, <4 x i32> %B, <4 x i32> %
; CHECK-LABEL: ternary_A_and_BC_B_4x32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xxleqv v5, v5, v5
-; CHECK-NEXT: xxland vs0, v3, v4
; CHECK-NEXT: vslw v2, v2, v5
; CHECK-NEXT: vsraw v2, v2, v5
-; CHECK-NEXT: xxsel v2, v3, vs0, v2
+; CHECK-NEXT: xxeval v2, v2, v3, v4, 49
; CHECK-NEXT: blr
entry:
%and = and <4 x i32> %B, %C
@@ -31,11 +30,10 @@ define <2 x i64> @ternary_A_and_BC_B_2x64(<2 x i1> %A, <2 x i64> %B, <2 x i64> %
; CHECK-LABEL: ternary_A_and_BC_B_2x64:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xxlxor v5, v5, v5
-; CHECK-NEXT: xxland vs0, v3, v4
; CHECK-NEXT: xxsplti32dx v5, 1, 63
; CHECK-NEXT: vsld v2, v2, v5
; CHECK-NEXT: vsrad v2, v2, v5
-; CHECK-NEXT: xxsel v2, v3, vs0, v2
+; CHECK-NEXT: xxeval v2, v2, v3, v4, 49
; CHECK-NEXT: blr
entry:
%and = and <2 x i64> %B, %C
@@ -48,10 +46,9 @@ define <4 x i32> @ternary_A_nor_BC_B_4x32(<4 x i1> %A, <4 x i32> %B, <4 x i32> %
; CHECK-LABEL: ternary_A_nor_BC_B_4x32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xxleqv v5, v5, v5
-; CHECK-NEXT: xxlnor vs0, v3, v4
; CHECK-NEXT: vslw v2, v2, v5
; CHECK-NEXT: vsraw v2, v2, v5
-; CHECK-NEXT: xxsel v2, v3, vs0, v2
+; CHECK-NEXT: xxeval v2, v2, v3, v4, 56
; CHECK-NEXT: blr
entry:
%or = or <4 x i32> %B, %C
@@ -65,11 +62,10 @@ define <2 x i64> @ternary_A_nor_BC_B_2x64(<2 x i1> %A, <2 x i64> %B, <2 x i64> %
; CHECK-LABEL: ternary_A_nor_BC_B_2x64:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xxlxor v5, v5, v5
-; CHECK-NEXT: xxlnor vs0, v3, v4
; CHECK-NEXT: xxsplti32dx v5, 1, 63
; CHECK-NEXT: vsld v2, v2, v5
; CHECK-NEXT: vsrad v2, v2, v5
-; CHECK-NEXT: xxsel v2, v3, vs0, v2
+; CHECK-NEXT: xxeval v2, v2, v3, v4, 56
; CHECK-NEXT: blr
entry:
%or = or <2 x i64> %B, %C
@@ -83,10 +79,9 @@ define <4 x i32> @ternary_A_eqv_BC_B_4x32(<4 x i1> %A, <4 x i32> %B, <4 x i32> %
; CHECK-LABEL: ternary_A_eqv_BC_B_4x32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xxleqv v5, v5, v5
-; CHECK-NEXT: xxleqv vs0, v3, v4
; CHECK-NEXT: vslw v2, v2, v5
; CHECK-NEXT: vsraw v2, v2, v5
-; CHECK-NEXT: xxsel v2, v3, vs0, v2
+; CHECK-NEXT: xxeval v2, v2, v3, v4, 57
; CHECK-NEXT: blr
entry:
%xor = xor <4 x i32> %B, %C
@@ -100,11 +95,10 @@ define <2 x i64> @ternary_A_eqv_BC_B_2x64(<2 x i1> %A, <2 x i64> %B, <2 x i64> %
; CHECK-LABEL: ternary_A_eqv_BC_B_2x64:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xxlxor v5, v5, v5
-; CHECK-NEXT: xxleqv vs0, v3, v4
; CHECK-NEXT: xxsplti32dx v5, 1, 63
; CHECK-NEXT: vsld v2, v2, v5
; CHECK-NEXT: vsrad v2, v2, v5
-; CHECK-NEXT: xxsel v2, v3, vs0, v2
+; CHECK-NEXT: xxeval v2, v2, v3, v4, 57
; CHECK-NEXT: blr
entry:
%xor = xor <2 x i64> %B, %C
@@ -118,10 +112,9 @@ define <4 x i32> @ternary_A_not_C_B_4x32(<4 x i1> %A, <4 x i32> %B, <4 x i32> %C
; CHECK-LABEL: ternary_A_not_C_B_4x32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xxleqv v5, v5, v5
-; CHECK-NEXT: xxlnor vs0, v4, v4
; CHECK-NEXT: vslw v2, v2, v5
; CHECK-NEXT: vsraw v2, v2, v5
-; CHECK-NEXT: xxsel v2, v3, vs0, v2
+; CHECK-NEXT: xxeval v2, v2, v3, v4, 58
; CHECK-NEXT: blr
entry:
%not = xor <4 x i32> %C, <i32 -1, i32 -1, i32 -1, i32 -1> ; Vector not operation
@@ -134,11 +127,10 @@ define <2 x i64> @ternary_A_not_C_B_2x64(<2 x i1> %A, <2 x i64> %B, <2 x i64> %C
; CHECK-LABEL: ternary_A_not_C_B_2x64:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xxlxor v5, v5, v5
-; CHECK-NEXT: xxlnor vs0, v4, v4
; CHECK-NEXT: xxsplti32dx v5, 1, 63
; CHECK-NEXT: vsld v2, v2, v5
; CHECK-NEXT: vsrad v2, v2, v5
-; CHECK-NEXT: xxsel v2, v3, vs0, v2
+; CHECK-NEXT: xxeval v2, v2, v3, v4, 58
; CHECK-NEXT: blr
entry:
%not = xor <2 x i64> %C, <i64 -1, i64 -1> ; Vector not operation
@@ -151,10 +143,9 @@ define <4 x i32> @ternary_A_nand_BC_B_4x32(<4 x i1> %A, <4 x i32> %B, <4 x i32>
; CHECK-LABEL: ternary_A_nand_BC_B_4x32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xxleqv v5, v5, v5
-; CHECK-NEXT: xxlnand vs0, v3, v4
; CHECK-NEXT: vslw v2, v2, v5
; CHECK-NEXT: vsraw v2, v2, v5
-; CHECK-NEXT: xxsel v2, v3, vs0, v2
+; CHECK-NEXT: xxeval v2, v2, v3, v4, 62
; CHECK-NEXT: blr
entry:
%and = and <4 x i32> %B, %C
@@ -168,11 +159,10 @@ define <2 x i64> @ternary_A_nand_BC_B_2x64(<2 x i1> %A, <2 x i64> %B, <2 x i64>
; CHECK-LABEL: ternary_A_nand_BC_B_2x64:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xxlxor v5, v5, v5
-; CHECK-NEXT: xxlnand vs0, v3, v4
; CHECK-NEXT: xxsplti32dx v5, 1, 63
; CHECK-NEXT: vsld v2, v2, v5
; CHECK-NEXT: vsrad v2, v2, v5
-; CHECK-NEXT: xxsel v2, v3, vs0, v2
+; CHECK-NEXT: xxeval v2, v2, v3, v4, 62
; CHECK-NEXT: blr
entry:
%and = and <2 x i64> %B, %C
diff --git a/llvm/test/CodeGen/PowerPC/xxeval-vselect-x-c.ll b/llvm/test/CodeGen/PowerPC/xxeval-vselect-x-c.ll
index 411aa27a61861..54fda6063bfac 100644
--- a/llvm/test/CodeGen/PowerPC/xxeval-vselect-x-c.ll
+++ b/llvm/test/CodeGen/PowerPC/xxeval-vselect-x-c.ll
@@ -1,5 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
-; Test file to verify the emission of Vector selection instructions when ternary operators are used.
+; Test file to verify the emission of Vector Evaluate instructions when ternary operators are used.
; RUN: llc -verify-machineinstrs -mcpu=pwr10 -mtriple=powerpc64le-unknown-unknown \
; RUN: -ppc-asm-full-reg-names --ppc-vsr-nums-as-vr < %s | FileCheck %s
@@ -15,10 +15,9 @@ define <4 x i32> @ternary_A_and_BC_C_4x32(<4 x i1> %A, <4 x i32> %B, <4 x i32> %
; CHECK-LABEL: ternary_A_and_BC_C_4x32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xxleqv v5, v5, v5
-; CHECK-NEXT: xxland vs0, v3, v4
; CHECK-NEXT: vslw v2, v2, v5
; CHECK-NEXT: vsraw v2, v2, v5
-; CHECK-NEXT: xxsel v2, v4, vs0, v2
+; CHECK-NEXT: xxeval v2, v2, v3, v4, 81
; CHECK-NEXT: blr
entry:
%and = and <4 x i32> %B, %C
@@ -31,11 +30,10 @@ define <2 x i64> @ternary_A_and_BC_C_2x64(<2 x i1> %A, <2 x i64> %B, <2 x i64> %
; CHECK-LABEL: ternary_A_and_BC_C_2x64:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xxlxor v5, v5, v5
-; CHECK-NEXT: xxland vs0, v3, v4
; CHECK-NEXT: xxsplti32dx v5, 1, 63
; CHECK-NEXT: vsld v2, v2, v5
; CHECK-NEXT: vsrad v2, v2, v5
-; CHECK-NEXT: xxsel v2, v4, vs0, v2
+; CHECK-NEXT: xxeval v2, v2, v3, v4, 81
; CHECK-NEXT: blr
entry:
%and = and <2 x i64> %B, %C
@@ -48,10 +46,9 @@ define <4 x i32> @ternary_A_nor_BC_C_4x32(<4 x i1> %A, <4 x i32> %B, <4 x i32> %
; CHECK-LABEL: ternary_A_nor_BC_C_4x32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xxleqv v5, v5, v5
-; CHECK-NEXT: xxlnor vs0, v3, v4
; CHECK-NEXT: vslw v2, v2, v5
; CHECK-NEXT: vsraw v2, v2, v5
-; CHECK-NEXT: xxsel v2, v4, vs0, v2
+; CHECK-NEXT: xxeval v2, v2, v3, v4, 88
; CHECK-NEXT: blr
entry:
%or = or <4 x i32> %B, %C
@@ -65,11 +62,10 @@ define <2 x i64> @ternary_A_nor_BC_C_2x64(<2 x i1> %A, <2 x i64> %B, <2 x i64> %
; CHECK-LABEL: ternary_A_nor_BC_C_2x64:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xxlxor v5, v5, v5
-; CHECK-NEXT: xxlnor vs0, v3, v4
; CHECK-NEXT: xxsplti32dx v5, 1, 63
; CHECK-NEXT: vsld v2, v2, v5
; CHECK-NEXT: vsrad v2, v2, v5
-; CHECK-NEXT: xxsel v2, v4, vs0, v2
+; CHECK-NEXT: xxeval v2, v2, v3, v4, 88
; CHECK-NEXT: blr
entry:
%or = or <2 x i64> %B, %C
@@ -83,10 +79,9 @@ define <4 x i32> @ternary_A_eqv_BC_C_4x32(<4 x i1> %A, <4 x i32> %B, <4 x i32> %
; CHECK-LABEL: ternary_A_eqv_BC_C_4x32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xxleqv v5, v5, v5
-; CHECK-NEXT: xxleqv vs0, v3, v4
; CHECK-NEXT: vslw v2, v2, v5
; CHECK-NEXT: vsraw v2, v2, v5
-; CHECK-NEXT: xxsel v2, v4, vs0, v2
+; CHECK-NEXT: xxeval v2, v2, v3, v4, 89
; CHECK-NEXT: blr
entry:
%xor = xor <4 x i32> %B, %C
@@ -100,11 +95,10 @@ define <2 x i64> @ternary_A_eqv_BC_C_2x64(<2 x i1> %A, <2 x i64> %B, <2 x i64> %
; CHECK-LABEL: ternary_A_eqv_BC_C_2x64:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xxlxor v5, v5, v5
-; CHECK-NEXT: xxleqv vs0, v3, v4
; CHECK-NEXT: xxsplti32dx v5, 1, 63
; CHECK-NEXT: vsld v2, v2, v5
; CHECK-NEXT: vsrad v2, v2, v5
-; CHECK-NEXT: xxsel v2, v4, vs0, v2
+; CHECK-NEXT: xxeval v2, v2, v3, v4, 89
; CHECK-NEXT: blr
entry:
%xor = xor <2 x i64> %B, %C
@@ -118,10 +112,9 @@ define <4 x i32> @ternary_A_nand_BC_C_4x32(<4 x i1> %A, <4 x i32> %B, <4 x i32>
; CHECK-LABEL: ternary_A_nand_BC_C_4x32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xxleqv v5, v5, v5
-; CHECK-NEXT: xxlnand vs0, v3, v4
; CHECK-NEXT: vslw v2, v2, v5
; CHECK-NEXT: vsraw v2, v2, v5
-; CHECK-NEXT: xxsel v2, v4, vs0, v2
+; CHECK-NEXT: xxeval v2, v2, v3, v4, 94
; CHECK-NEXT: blr
entry:
%and = and <4 x i32> %B, %C
@@ -135,11 +128,10 @@ define <2 x i64> @ternary_A_nand_BC_C_2x64(<2 x i1> %A, <2 x i64> %B, <2 x i64>
; CHECK-LABEL: ternary_A_nand_BC_C_2x64:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xxlxor v5, v5, v5
-; CHECK-NEXT: xxlnand vs0, v3, v4
; CHECK-NEXT: xxsplti32dx v5, 1, 63
; CHECK-NEXT: vsld v2, v2, v5
; CHECK-NEXT: vsrad v2, v2, v5
-; CHECK-NEXT: xxsel v2, v4, vs0, v2
+; CHECK-NEXT: xxeval v2, v2, v3, v4, 94
; CHECK-NEXT: blr
entry:
%and = and <2 x i64> %B, %C
diff --git a/llvm/test/CodeGen/PowerPC/xxeval-vselect-x-or.ll b/llvm/test/CodeGen/PowerPC/xxeval-vselect-x-or.ll
index 1ad7e95e3682e..c956785a757ca 100644
--- a/llvm/test/CodeGen/PowerPC/xxeval-vselect-x-or.ll
+++ b/llvm/test/CodeGen/PowerPC/xxeval-vselect-x-or.ll
@@ -120,11 +120,11 @@ define <4 x i32> @ternary_A_eqv_BC_or_BC_4x32(<4 x i1> %A, <4 x i32> %B, <4 x i3
; CHECK-LABEL: ternary_A_eqv_BC_or_BC_4x32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xxleqv v5, v5, v5
-; CHECK-NEXT: xxleqv vs0, v3, v4
+; CHECK-NEXT: xxlxor vs0, v3, v4
; CHECK-NEXT: xxlor vs1, v3, v4
; CHECK-NEXT: vslw v2, v2, v5
; CHECK-NEXT: vsraw v2, v2, v5
-; CHECK-NEXT: xxsel v2, vs1, vs0, v2
+; CHECK-NEXT: xxeval v2, v2, vs1, vs0, 58
; CHECK-NEXT: blr
entry:
%xor = xor <4 x i32> %B, %C
@@ -159,11 +159,10 @@ define <4 x i32> @ternary_A_not_C_or_BC_4x32(<4 x i1> %A, <4 x i32> %B, <4 x i32
; CHECK-LABEL: ternary_A_not_C_or_BC_4x32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xxleqv v5, v5, v5
-; CHECK-NEXT: xxlnor vs0, v4, v4
-; CHECK-NEXT: xxlor vs1, v3, v4
+; CHECK-NEXT: xxlor vs0, v3, v4
; CHECK-NEXT: vslw v2, v2, v5
; CHECK-NEXT: vsraw v2, v2, v5
-; CHECK-NEXT: xxsel v2, vs1, vs0, v2
+; CHECK-NEXT: xxeval v2, v2, vs0, v4, 58
; CHECK-NEXT: blr
entry:
%not = xor <4 x i32> %C, <i32 -1, i32 -1, i32 -1, i32 -1> ; Vector not operation
@@ -177,12 +176,11 @@ define <2 x i64> @ternary_A_not_C_or_BC_2x64(<2 x i1> %A, <2 x i64> %B, <2 x i64
; CHECK-LABEL: ternary_A_not_C_or_BC_2x64:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xxlxor v5, v5, v5
-; CHECK-NEXT: xxlnor vs0, v4, v4
-; CHECK-NEXT: xxlor vs1, v3, v4
+; CHECK-NEXT: xxlor vs0, v3, v4
; CHECK-NEXT: xxsplti32dx v5, 1, 63
; CHECK-NEXT: vsld v2, v2, v5
; CHECK-NEXT: vsrad v2, v2, v5
-; CHECK-NEXT: xxsel v2, vs1, vs0, v2
+; CHECK-NEXT: xxeval v2, v2, vs0, v4, 58
; CHECK-NEXT: blr
entry:
%not = xor <2 x i64> %C, <i64 -1, i64 -1> ; Vector not operation
@@ -196,11 +194,10 @@ define <4 x i32> @ternary_A_not_B_or_BC_4x32(<4 x i1> %A, <4 x i32> %B, <4 x i32
; CHECK-LABEL: ternary_A_not_B_or_BC_4x32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xxleqv v5, v5, v5
-; CHECK-NEXT: xxlnor vs0, v3, v3
-; CHECK-NEXT: xxlor vs1, v3, v4
+; CHECK-NEXT: xxlor vs0, v3, v4
; CHECK-NEXT: vslw v2, v2, v5
; CHECK-NEXT: vsraw v2, v2, v5
-; CHECK-NEXT: xxsel v2, vs1, vs0, v2
+; CHECK-NEXT: xxeval v2, v2, vs0, v3, 58
; CHECK-NEXT: blr
entry:
%not = xor <4 x i32> %B, <i32 -1, i32 -1, i32 -1, i32 -1> ; Vector not operation
@@ -214,12 +211,11 @@ define <2 x i64> @ternary_A_not_B_or_BC_2x64(<2 x i1> %A, <2 x i64> %B, <2 x i64
; CHECK-LABEL: ternary_A_not_B_or_BC_2x64:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xxlxor v5, v5, v5
-; CHECK-NEXT: xxlnor vs0, v3, v3
-; CHECK-NEXT: xxlor vs1, v3, v4
+; CHECK-NEXT: xxlor vs0, v3, v4
; CHECK-NEXT: xxsplti32dx v5, 1, 63
; CHECK-NEXT: vsld v2, v2, v5
; CHECK-NEXT: vsrad v2, v2, v5
-; CHECK-NEXT: xxsel v2, vs1, vs0, v2
+; CHECK-NEXT: xxeval v2, v2, vs0, v3, 58
; CHECK-NEXT: blr
entry:
%not = xor <2 x i64> %B, <i64 -1, i64 -1> ; Vector not operation
@@ -233,11 +229,11 @@ define <4 x i32> @ternary_A_nand_BC_or_BC_4x32(<4 x i1> %A, <4 x i32> %B, <4 x i
; CHECK-LABEL: ternary_A_nand_BC_or_BC_4x32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xxleqv v5, v5, v5
-; CHECK-NEXT: xxlnand vs0, v3, v4
+; CHECK-NEXT: xxland vs0, v3, v4
; CHECK-NEXT: xxlor vs1, v3, v4
; CHECK-NEXT: vslw v2, v2, v5
; CHECK-NEXT: vsraw v2, v2, v5
-; CHECK-NEXT: xxsel v2, vs1, vs0, v2
+; CHECK-NEXT: xxeval v2, v2, vs1, vs0, 58
; CHECK-NEXT: blr
entry:
%and = and <4 x i32> %B, %C
diff --git a/llvm/test/CodeGen/PowerPC/xxeval-vselect-x-xor.ll b/llvm/test/CodeGen/PowerPC/xxeval-vselect-x-xor.ll
index f8a8b6e9a0486..74d3a3bf6196e 100644
--- a/llvm/test/CodeGen/PowerPC/xxeval-vselect-x-xor.ll
+++ b/llvm/test/CodeGen/PowerPC/xxeval-vselect-x-xor.ll
@@ -1,5 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
-; Test file to verify the emission of Vector selection instructions when ternary operators are used.
+; Test file to verify the emission of Vector Evaluate instructions when ternary operators are used.
; RUN: llc -verify-machineinstrs -mcpu=pwr10 -mtriple=powerpc64le-unknown-unknown \
; RUN: -ppc-asm-full-reg-names --ppc-vsr-nums-as-vr < %s | FileCheck %s
@@ -15,11 +15,9 @@ define <4 x i32> @ternary_A_and_BC_xor_BC_4x32(<4 x i1> %A, <4 x i32> %B, <4 x i
; CHECK-LABEL: ternary_A_and_BC_xor_BC_4x32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xxleqv v5, v5, v5
-; CHECK-NEXT: xxland vs0, v3, v4
-; CHECK-NEXT: xxlxor vs1, v3, v4
; CHECK-NEXT: vslw v2, v2, v5
; CHECK-NEXT: vsraw v2, v2, v5
-; CHECK-NEXT: xxsel v2, vs1, vs0, v2
+; CHECK-NEXT: xxeval v2, v2, v3, v4, 97
; CHECK-NEXT: blr
entry:
%and = and <4 x i32> %B, %C
@@ -33,12 +31,10 @@ define <2 x i64> @ternary_A_and_BC_xor_BC_2x64(<2 x i1> %A, <2 x i64> %B, <2 x i
; CHECK-LABEL: ternary_A_and_BC_xor_BC_2x64:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xxlxor v5, v5, v5
-; CHECK-NEXT: xxland vs0, v3, v4
-; CHECK-NEXT: xxlxor vs1, v3, v4
; CHECK-NEXT: xxsplti32dx v5, 1, 63
; CHECK-NEXT: vsld v2, v2, v5
; CHECK-NEXT: vsrad v2, v2, v5
-; CHECK-NEXT: xxsel v2, vs1, vs0, v2
+; CHECK-NEXT: xxeval v2, v2, v3, v4, 97
; CHECK-NEXT: blr
entry:
%and = and <2 x i64> %B, %C
@@ -52,10 +48,9 @@ define <4 x i32> @ternary_A_B_xor_BC_4x32(<4 x i1> %A, <4 x i32> %B, <4 x i32> %
; CHECK-LABEL: ternary_A_B_xor_BC_4x32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xxleqv v5, v5, v5
-; CHECK-NEXT: xxlxor vs0, v3, v4
; CHECK-NEXT: vslw v2, v2, v5
; CHECK-NEXT: vsraw v2, v2, v5
-; CHECK-NEXT: xxsel v2, vs0, v3, v2
+; CHECK-NEXT: xxeval v2, v2, v3, v4, 99
; CHECK-NEXT: blr
entry:
%xor = xor <4 x i32> %B, %C
@@ -68,11 +63,10 @@ define <2 x i64> @ternary_A_B_xor_BC_2x64(<2 x i1> %A, <2 x i64> %B, <2 x i64> %
; CHECK-LABEL: ternary_A_B_xor_BC_2x64:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xxlxor v5, v5, v5
-; CHECK-NEXT: xxlxor vs0, v3, v4
; CHECK-NEXT: xxsplti32dx v5, 1, 63
; CHECK-NEXT: vsld v2, v2, v5
; CHECK-NEXT: vsrad v2, v2, v5
-; CHECK-NEXT: xxsel v2, vs0, v3, v2
+; CHECK-NEXT: xxeval v2, v2, v3, v4, 99
; CHECK-NEXT: blr
entry:
%xor = xor <2 x i64> %B, %C
@@ -85,10 +79,9 @@ define <4 x i32> @ternary_A_C_xor_BC_4x32(<4 x i1> %A, <4 x i32> %B, <4 x i32> %
; CHECK-LABEL: ternary_A_C_xor_BC_4x32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xxleqv v5, v5, v5
-; CHECK-NEXT: xxlxor vs0, v3, v4
; CHECK-NEXT: vslw v2, v2, v5
; CHECK-NEXT: vsraw v2, v2, v5
-; CHECK-NEXT: xxsel v2, vs0, v4, v2
+; CHECK-NEXT: xxeval v2, v2, v3, v4, 101
; CHECK-NEXT: blr
entry:
%xor = xor <4 x i32> %B, %C
@@ -101,11 +94,10 @@ define <2 x i64> @ternary_A_C_xor_BC_2x64(<2 x i1> %A, <2 x i64> %B, <2 x i64> %
; CHECK-LABEL: ternary_A_C_xor_BC_2x64:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xxlxor v5, v5, v5
-; CHECK-NEXT: xxlxor vs0, v3, v4
; CHECK-NEXT: xxsplti32dx v5, 1, 63
; CHECK-NEXT: vsld v2, v2, v5
; CHECK-NEXT: vsrad v2, v2, v5
-; CHECK-NEXT: xxsel v2, vs0, v4, v2
+; CHECK-NEXT: xxeval v2, v2, v3, v4, 101
; CHECK-NEXT: blr
entry:
%xor = xor <2 x i64> %B, %C
@@ -118,11 +110,9 @@ define <4 x i32> @ternary_A_or_BC_xor_BC_4x32(<4 x i1> %A, <4 x i32> %B, <4 x i3
; CHECK-LABEL: ternary_A_or_BC_xor_BC_4x32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xxleqv v5, v5, v5
-; CHECK-NEXT: xxlor vs0, v3, v4
-; CHECK-NEXT: xxlxor vs1, v3, v4
; CHECK-NEXT: vslw v2, v2, v5
; CHECK-NEXT: vsraw v2, v2, v5
-; CHECK-NEXT: xxsel v2, vs1, vs0, v2
+; CHECK-NEXT: xxeval v2, v2, v3, v4, 103
; CHECK-NEXT: blr
entry:
%or = or <4 x i32> %B, %C
@@ -136,12 +126,10 @@ define <2 x i64> @ternary_A_or_BC_xor_BC_2x64(<2 x i1> %A, <2 x i64> %B, <2 x i6
; CHECK-LABEL: ternary_A_or_BC_xor_BC_2x64:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xxlxor v5, v5, v5
-; CHECK-NEXT: xxlor vs0, v3, v4
-; CHECK-NEXT: xxlxor vs1, v3, v4
; CHECK-NEXT: xxsplti32dx v5, 1, 63
; CHECK-NEXT: vsld v2, v2, v5
; CHECK-NEXT: vsrad v2, v2, v5
-; CHECK-NEXT: xxsel v2, vs1, vs0, v2
+; CHECK-NEXT: xxeval v2, v2, v3, v4, 103
; CHECK-NEXT: blr
entry:
%or = or <2 x i64> %B, %C
@@ -155,11 +143,9 @@ define <4 x i32> @ternary_A_nor_BC_xor_BC_4x32(<4 x i1> %A, <4 x i32> %B, <4 x i
; CHECK-LABEL: ternary_A_nor_BC_xor_BC_4x32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xxleqv v5, v5, v5
-; CHECK-NEXT: xxlnor vs0, v3, v4
-; CHECK-NEXT: xxlxor vs1, v3, v4
; CHECK-NEXT: vslw v2, v2, v5
; CHECK-NEXT: vsraw v2, v2, v5
-; CHECK-NEXT: xxsel v2, vs1, vs0, v2
+; CHECK-NEXT: xxeval v2, v2, v3, v4, 104
; CHECK-NEXT: blr
entry:
%or = or <4 x i32> %B, %C
@@ -174,12 +160,10 @@ define <2 x i64> @ternary_A_nor_BC_xor_BC_2x64(<2 x i1> %A, <2 x i64> %B, <2 x i
; CHECK-LABEL: ternary_A_nor_BC_xor_BC_2x64:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xxlxor v5, v5, v5
-; CHECK-NEXT: xxlnor vs0, v3, v4
-; CHECK-NEXT: xxlxor vs1, v3, v4
; CHECK-NEXT: xxsplti32dx v5, 1, 63
; CHECK-NEXT: vsld v2, v2, v5
; CHECK-NEXT: vsrad v2, v2, v5
-; CHECK-NEXT: xxsel v2, vs1, vs0, v2
+; CHECK-NEXT: xxeval v2, v2, v3, v4, 104
; CHECK-NEXT: blr
entry:
%or = or <2 x i64> %B, %C
>From 9c8a424bfc0c235798ea2ce142963f011cc74ed3 Mon Sep 17 00:00:00 2001
From: Tony Varghese <tony.varghese at ibm.com>
Date: Fri, 20 Jun 2025 18:59:08 +0000
Subject: [PATCH 2/4] [PowerPC]xxeval instruction for ternary operations
support for v4i32
---
llvm/lib/Target/PowerPC/PPCInstrP10.td | 23 ++++---------------
.../CodeGen/PowerPC/xxeval-vselect-x-and.ll | 20 ++++++++++++----
.../CodeGen/PowerPC/xxeval-vselect-x-b.ll | 15 ++++++++----
.../CodeGen/PowerPC/xxeval-vselect-x-c.ll | 12 ++++++----
.../CodeGen/PowerPC/xxeval-vselect-x-or.ll | 10 ++++----
.../CodeGen/PowerPC/xxeval-vselect-x-xor.ll | 18 +++++++++++----
6 files changed, 56 insertions(+), 42 deletions(-)
diff --git a/llvm/lib/Target/PowerPC/PPCInstrP10.td b/llvm/lib/Target/PowerPC/PPCInstrP10.td
index d0985f4a3a9bb..947b3e188b5a3 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrP10.td
+++ b/llvm/lib/Target/PowerPC/PPCInstrP10.td
@@ -2171,26 +2171,15 @@ class DagCondVNot<dag d, bit negate> {
class XXEvalUnaryPattern<ValueType vt> {
// vnot Operand B
- dag vnotB = !cond(
- !eq(vt, v4i32) : (vnot v4i32:$vB),
- !eq(vt, v2i64) : (v2i64 (bitconvert (vnot (v4i32 (bitconvert v2i64:$vB)))))
- );
+ dag vnotB = (vnot vt:$vB);
// vnot Operand C
- dag vnotC = !cond(
- !eq(vt, v4i32) : (vnot v4i32:$vC),
- !eq(vt, v2i64) : (v2i64 (bitconvert (vnot (v4i32 (bitconvert v2i64:$vC)))))
- );
+ dag vnotC = (vnot vt:$vC);
}
class XXEvalBinaryPattern<ValueType vt, SDPatternOperator op, bit notResult = 0> {
// Defines a wrapper class for binary patterns with optional NOT on result.
// Generate op pattern with optional NOT wrapping for result depending on "notResult".
- dag opPat = !cond(
- !eq(vt, v4i32) : DagCondVNot<(op v4i32:$vB, v4i32:$vC), notResult>.res,
- !eq(vt, v2i64) : (v2i64 (bitconvert DagCondVNot<(op
- (v4i32 (bitconvert v2i64:$vB)),
- (v4i32 (bitconvert v2i64:$vC))), notResult>.res))
- );
+ dag opPat = DagCondVNot<(op vt:$vB, vt:$vC), notResult>.res;
}
multiclass XXEvalVSelectWithXAnd<ValueType vt, bits<8> baseImm> {
@@ -2395,16 +2384,12 @@ let Predicates = [PrefixInstrs, HasP10Vector] in {
// Utilize xxeval instruction for ternary vector expressions.
defm : XXEvalVSelectWithXAnd<v4i32, 22>;
- defm : XXEvalVSelectWithXAnd<v2i64, 22>;
defm : XXEvalVSelectWithXB<v4i32, 49>;
- defm : XXEvalVSelectWithXB<v2i64, 49>;
defm : XXEvalVSelectWithXC<v4i32, 81>;
- defm : XXEvalVSelectWithXC<v2i64, 81>;
-
+
defm : XXEvalVSelectWithXXor<v4i32, 97>;
- defm : XXEvalVSelectWithXXor<v2i64, 97>;
// Anonymous patterns to select prefixed VSX loads and stores.
// Load / Store f128
diff --git a/llvm/test/CodeGen/PowerPC/xxeval-vselect-x-and.ll b/llvm/test/CodeGen/PowerPC/xxeval-vselect-x-and.ll
index 19305336f78df..dfb910b8d0c75 100644
--- a/llvm/test/CodeGen/PowerPC/xxeval-vselect-x-and.ll
+++ b/llvm/test/CodeGen/PowerPC/xxeval-vselect-x-and.ll
@@ -31,10 +31,12 @@ define <2 x i64> @ternary_A_xor_BC_and_BC_2x64(<2 x i1> %A, <2 x i64> %B, <2 x i
; CHECK-LABEL: ternary_A_xor_BC_and_BC_2x64:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xxlxor v5, v5, v5
+; CHECK-NEXT: xxlxor vs0, v3, v4
+; CHECK-NEXT: xxland vs1, v3, v4
; CHECK-NEXT: xxsplti32dx v5, 1, 63
; CHECK-NEXT: vsld v2, v2, v5
; CHECK-NEXT: vsrad v2, v2, v5
-; CHECK-NEXT: xxeval v2, v2, v3, v4, 22
+; CHECK-NEXT: xxsel v2, vs1, vs0, v2
; CHECK-NEXT: blr
entry:
%xor = xor <2 x i64> %B, %C
@@ -65,10 +67,12 @@ define <2 x i64> @ternary_A_nor_BC_and_BC_2x64(<2 x i1> %A, <2 x i64> %B, <2 x i
; CHECK-LABEL: ternary_A_nor_BC_and_BC_2x64:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xxlxor v5, v5, v5
+; CHECK-NEXT: xxlnor vs0, v3, v4
+; CHECK-NEXT: xxland vs1, v3, v4
; CHECK-NEXT: xxsplti32dx v5, 1, 63
; CHECK-NEXT: vsld v2, v2, v5
; CHECK-NEXT: vsrad v2, v2, v5
-; CHECK-NEXT: xxeval v2, v2, v3, v4, 24
+; CHECK-NEXT: xxsel v2, vs1, vs0, v2
; CHECK-NEXT: blr
entry:
%or = or <2 x i64> %B, %C
@@ -100,10 +104,12 @@ define <2 x i64> @ternary_A_eqv_BC_and_BC_2x64(<2 x i1> %A, <2 x i64> %B, <2 x i
; CHECK-LABEL: ternary_A_eqv_BC_and_BC_2x64:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xxlxor v5, v5, v5
+; CHECK-NEXT: xxleqv vs0, v3, v4
+; CHECK-NEXT: xxland vs1, v3, v4
; CHECK-NEXT: xxsplti32dx v5, 1, 63
; CHECK-NEXT: vsld v2, v2, v5
; CHECK-NEXT: vsrad v2, v2, v5
-; CHECK-NEXT: xxeval v2, v2, v3, v4, 25
+; CHECK-NEXT: xxsel v2, vs1, vs0, v2
; CHECK-NEXT: blr
entry:
%xor = xor <2 x i64> %B, %C
@@ -134,10 +140,12 @@ define <2 x i64> @ternary_A_not_C_and_BC_2x64(<2 x i1> %A, <2 x i64> %B, <2 x i6
; CHECK-LABEL: ternary_A_not_C_and_BC_2x64:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xxlxor v5, v5, v5
+; CHECK-NEXT: xxlnor vs0, v4, v4
+; CHECK-NEXT: xxland vs1, v3, v4
; CHECK-NEXT: xxsplti32dx v5, 1, 63
; CHECK-NEXT: vsld v2, v2, v5
; CHECK-NEXT: vsrad v2, v2, v5
-; CHECK-NEXT: xxeval v2, v2, v3, v4, 26
+; CHECK-NEXT: xxsel v2, vs1, vs0, v2
; CHECK-NEXT: blr
entry:
%not = xor <2 x i64> %C, <i64 -1, i64 -1> ; Vector not operation
@@ -167,10 +175,12 @@ define <2 x i64> @ternary_A_not_B_and_BC_2x64(<2 x i1> %A, <2 x i64> %B, <2 x i6
; CHECK-LABEL: ternary_A_not_B_and_BC_2x64:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xxlxor v5, v5, v5
+; CHECK-NEXT: xxlnor vs0, v3, v3
+; CHECK-NEXT: xxland vs1, v3, v4
; CHECK-NEXT: xxsplti32dx v5, 1, 63
; CHECK-NEXT: vsld v2, v2, v5
; CHECK-NEXT: vsrad v2, v2, v5
-; CHECK-NEXT: xxeval v2, v2, v3, v4, 28
+; CHECK-NEXT: xxsel v2, vs1, vs0, v2
; CHECK-NEXT: blr
entry:
%not = xor <2 x i64> %B, <i64 -1, i64 -1> ; Vector not operation
diff --git a/llvm/test/CodeGen/PowerPC/xxeval-vselect-x-b.ll b/llvm/test/CodeGen/PowerPC/xxeval-vselect-x-b.ll
index c36fd68ba0ece..9943c35dfb134 100644
--- a/llvm/test/CodeGen/PowerPC/xxeval-vselect-x-b.ll
+++ b/llvm/test/CodeGen/PowerPC/xxeval-vselect-x-b.ll
@@ -30,10 +30,11 @@ define <2 x i64> @ternary_A_and_BC_B_2x64(<2 x i1> %A, <2 x i64> %B, <2 x i64> %
; CHECK-LABEL: ternary_A_and_BC_B_2x64:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xxlxor v5, v5, v5
+; CHECK-NEXT: xxland vs0, v3, v4
; CHECK-NEXT: xxsplti32dx v5, 1, 63
; CHECK-NEXT: vsld v2, v2, v5
; CHECK-NEXT: vsrad v2, v2, v5
-; CHECK-NEXT: xxeval v2, v2, v3, v4, 49
+; CHECK-NEXT: xxsel v2, v3, vs0, v2
; CHECK-NEXT: blr
entry:
%and = and <2 x i64> %B, %C
@@ -62,10 +63,11 @@ define <2 x i64> @ternary_A_nor_BC_B_2x64(<2 x i1> %A, <2 x i64> %B, <2 x i64> %
; CHECK-LABEL: ternary_A_nor_BC_B_2x64:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xxlxor v5, v5, v5
+; CHECK-NEXT: xxlnor vs0, v3, v4
; CHECK-NEXT: xxsplti32dx v5, 1, 63
; CHECK-NEXT: vsld v2, v2, v5
; CHECK-NEXT: vsrad v2, v2, v5
-; CHECK-NEXT: xxeval v2, v2, v3, v4, 56
+; CHECK-NEXT: xxsel v2, v3, vs0, v2
; CHECK-NEXT: blr
entry:
%or = or <2 x i64> %B, %C
@@ -95,10 +97,11 @@ define <2 x i64> @ternary_A_eqv_BC_B_2x64(<2 x i1> %A, <2 x i64> %B, <2 x i64> %
; CHECK-LABEL: ternary_A_eqv_BC_B_2x64:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xxlxor v5, v5, v5
+; CHECK-NEXT: xxleqv vs0, v3, v4
; CHECK-NEXT: xxsplti32dx v5, 1, 63
; CHECK-NEXT: vsld v2, v2, v5
; CHECK-NEXT: vsrad v2, v2, v5
-; CHECK-NEXT: xxeval v2, v2, v3, v4, 57
+; CHECK-NEXT: xxsel v2, v3, vs0, v2
; CHECK-NEXT: blr
entry:
%xor = xor <2 x i64> %B, %C
@@ -127,10 +130,11 @@ define <2 x i64> @ternary_A_not_C_B_2x64(<2 x i1> %A, <2 x i64> %B, <2 x i64> %C
; CHECK-LABEL: ternary_A_not_C_B_2x64:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xxlxor v5, v5, v5
+; CHECK-NEXT: xxlnor vs0, v4, v4
; CHECK-NEXT: xxsplti32dx v5, 1, 63
; CHECK-NEXT: vsld v2, v2, v5
; CHECK-NEXT: vsrad v2, v2, v5
-; CHECK-NEXT: xxeval v2, v2, v3, v4, 58
+; CHECK-NEXT: xxsel v2, v3, vs0, v2
; CHECK-NEXT: blr
entry:
%not = xor <2 x i64> %C, <i64 -1, i64 -1> ; Vector not operation
@@ -159,10 +163,11 @@ define <2 x i64> @ternary_A_nand_BC_B_2x64(<2 x i1> %A, <2 x i64> %B, <2 x i64>
; CHECK-LABEL: ternary_A_nand_BC_B_2x64:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xxlxor v5, v5, v5
+; CHECK-NEXT: xxlnand vs0, v3, v4
; CHECK-NEXT: xxsplti32dx v5, 1, 63
; CHECK-NEXT: vsld v2, v2, v5
; CHECK-NEXT: vsrad v2, v2, v5
-; CHECK-NEXT: xxeval v2, v2, v3, v4, 62
+; CHECK-NEXT: xxsel v2, v3, vs0, v2
; CHECK-NEXT: blr
entry:
%and = and <2 x i64> %B, %C
diff --git a/llvm/test/CodeGen/PowerPC/xxeval-vselect-x-c.ll b/llvm/test/CodeGen/PowerPC/xxeval-vselect-x-c.ll
index 54fda6063bfac..f770cf768147a 100644
--- a/llvm/test/CodeGen/PowerPC/xxeval-vselect-x-c.ll
+++ b/llvm/test/CodeGen/PowerPC/xxeval-vselect-x-c.ll
@@ -30,10 +30,11 @@ define <2 x i64> @ternary_A_and_BC_C_2x64(<2 x i1> %A, <2 x i64> %B, <2 x i64> %
; CHECK-LABEL: ternary_A_and_BC_C_2x64:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xxlxor v5, v5, v5
+; CHECK-NEXT: xxland vs0, v3, v4
; CHECK-NEXT: xxsplti32dx v5, 1, 63
; CHECK-NEXT: vsld v2, v2, v5
; CHECK-NEXT: vsrad v2, v2, v5
-; CHECK-NEXT: xxeval v2, v2, v3, v4, 81
+; CHECK-NEXT: xxsel v2, v4, vs0, v2
; CHECK-NEXT: blr
entry:
%and = and <2 x i64> %B, %C
@@ -62,10 +63,11 @@ define <2 x i64> @ternary_A_nor_BC_C_2x64(<2 x i1> %A, <2 x i64> %B, <2 x i64> %
; CHECK-LABEL: ternary_A_nor_BC_C_2x64:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xxlxor v5, v5, v5
+; CHECK-NEXT: xxlnor vs0, v3, v4
; CHECK-NEXT: xxsplti32dx v5, 1, 63
; CHECK-NEXT: vsld v2, v2, v5
; CHECK-NEXT: vsrad v2, v2, v5
-; CHECK-NEXT: xxeval v2, v2, v3, v4, 88
+; CHECK-NEXT: xxsel v2, v4, vs0, v2
; CHECK-NEXT: blr
entry:
%or = or <2 x i64> %B, %C
@@ -95,10 +97,11 @@ define <2 x i64> @ternary_A_eqv_BC_C_2x64(<2 x i1> %A, <2 x i64> %B, <2 x i64> %
; CHECK-LABEL: ternary_A_eqv_BC_C_2x64:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xxlxor v5, v5, v5
+; CHECK-NEXT: xxleqv vs0, v3, v4
; CHECK-NEXT: xxsplti32dx v5, 1, 63
; CHECK-NEXT: vsld v2, v2, v5
; CHECK-NEXT: vsrad v2, v2, v5
-; CHECK-NEXT: xxeval v2, v2, v3, v4, 89
+; CHECK-NEXT: xxsel v2, v4, vs0, v2
; CHECK-NEXT: blr
entry:
%xor = xor <2 x i64> %B, %C
@@ -128,10 +131,11 @@ define <2 x i64> @ternary_A_nand_BC_C_2x64(<2 x i1> %A, <2 x i64> %B, <2 x i64>
; CHECK-LABEL: ternary_A_nand_BC_C_2x64:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xxlxor v5, v5, v5
+; CHECK-NEXT: xxlnand vs0, v3, v4
; CHECK-NEXT: xxsplti32dx v5, 1, 63
; CHECK-NEXT: vsld v2, v2, v5
; CHECK-NEXT: vsrad v2, v2, v5
-; CHECK-NEXT: xxeval v2, v2, v3, v4, 94
+; CHECK-NEXT: xxsel v2, v4, vs0, v2
; CHECK-NEXT: blr
entry:
%and = and <2 x i64> %B, %C
diff --git a/llvm/test/CodeGen/PowerPC/xxeval-vselect-x-or.ll b/llvm/test/CodeGen/PowerPC/xxeval-vselect-x-or.ll
index c956785a757ca..bf2b691056781 100644
--- a/llvm/test/CodeGen/PowerPC/xxeval-vselect-x-or.ll
+++ b/llvm/test/CodeGen/PowerPC/xxeval-vselect-x-or.ll
@@ -176,11 +176,12 @@ define <2 x i64> @ternary_A_not_C_or_BC_2x64(<2 x i1> %A, <2 x i64> %B, <2 x i64
; CHECK-LABEL: ternary_A_not_C_or_BC_2x64:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xxlxor v5, v5, v5
-; CHECK-NEXT: xxlor vs0, v3, v4
+; CHECK-NEXT: xxlnor vs0, v4, v4
+; CHECK-NEXT: xxlor vs1, v3, v4
; CHECK-NEXT: xxsplti32dx v5, 1, 63
; CHECK-NEXT: vsld v2, v2, v5
; CHECK-NEXT: vsrad v2, v2, v5
-; CHECK-NEXT: xxeval v2, v2, vs0, v4, 58
+; CHECK-NEXT: xxsel v2, vs1, vs0, v2
; CHECK-NEXT: blr
entry:
%not = xor <2 x i64> %C, <i64 -1, i64 -1> ; Vector not operation
@@ -211,11 +212,12 @@ define <2 x i64> @ternary_A_not_B_or_BC_2x64(<2 x i1> %A, <2 x i64> %B, <2 x i64
; CHECK-LABEL: ternary_A_not_B_or_BC_2x64:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xxlxor v5, v5, v5
-; CHECK-NEXT: xxlor vs0, v3, v4
+; CHECK-NEXT: xxlnor vs0, v3, v3
+; CHECK-NEXT: xxlor vs1, v3, v4
; CHECK-NEXT: xxsplti32dx v5, 1, 63
; CHECK-NEXT: vsld v2, v2, v5
; CHECK-NEXT: vsrad v2, v2, v5
-; CHECK-NEXT: xxeval v2, v2, vs0, v3, 58
+; CHECK-NEXT: xxsel v2, vs1, vs0, v2
; CHECK-NEXT: blr
entry:
%not = xor <2 x i64> %B, <i64 -1, i64 -1> ; Vector not operation
diff --git a/llvm/test/CodeGen/PowerPC/xxeval-vselect-x-xor.ll b/llvm/test/CodeGen/PowerPC/xxeval-vselect-x-xor.ll
index 74d3a3bf6196e..2725525846dd8 100644
--- a/llvm/test/CodeGen/PowerPC/xxeval-vselect-x-xor.ll
+++ b/llvm/test/CodeGen/PowerPC/xxeval-vselect-x-xor.ll
@@ -31,10 +31,12 @@ define <2 x i64> @ternary_A_and_BC_xor_BC_2x64(<2 x i1> %A, <2 x i64> %B, <2 x i
; CHECK-LABEL: ternary_A_and_BC_xor_BC_2x64:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xxlxor v5, v5, v5
+; CHECK-NEXT: xxland vs0, v3, v4
+; CHECK-NEXT: xxlxor vs1, v3, v4
; CHECK-NEXT: xxsplti32dx v5, 1, 63
; CHECK-NEXT: vsld v2, v2, v5
; CHECK-NEXT: vsrad v2, v2, v5
-; CHECK-NEXT: xxeval v2, v2, v3, v4, 97
+; CHECK-NEXT: xxsel v2, vs1, vs0, v2
; CHECK-NEXT: blr
entry:
%and = and <2 x i64> %B, %C
@@ -63,10 +65,11 @@ define <2 x i64> @ternary_A_B_xor_BC_2x64(<2 x i1> %A, <2 x i64> %B, <2 x i64> %
; CHECK-LABEL: ternary_A_B_xor_BC_2x64:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xxlxor v5, v5, v5
+; CHECK-NEXT: xxlxor vs0, v3, v4
; CHECK-NEXT: xxsplti32dx v5, 1, 63
; CHECK-NEXT: vsld v2, v2, v5
; CHECK-NEXT: vsrad v2, v2, v5
-; CHECK-NEXT: xxeval v2, v2, v3, v4, 99
+; CHECK-NEXT: xxsel v2, vs0, v3, v2
; CHECK-NEXT: blr
entry:
%xor = xor <2 x i64> %B, %C
@@ -94,10 +97,11 @@ define <2 x i64> @ternary_A_C_xor_BC_2x64(<2 x i1> %A, <2 x i64> %B, <2 x i64> %
; CHECK-LABEL: ternary_A_C_xor_BC_2x64:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xxlxor v5, v5, v5
+; CHECK-NEXT: xxlxor vs0, v3, v4
; CHECK-NEXT: xxsplti32dx v5, 1, 63
; CHECK-NEXT: vsld v2, v2, v5
; CHECK-NEXT: vsrad v2, v2, v5
-; CHECK-NEXT: xxeval v2, v2, v3, v4, 101
+; CHECK-NEXT: xxsel v2, vs0, v4, v2
; CHECK-NEXT: blr
entry:
%xor = xor <2 x i64> %B, %C
@@ -126,10 +130,12 @@ define <2 x i64> @ternary_A_or_BC_xor_BC_2x64(<2 x i1> %A, <2 x i64> %B, <2 x i6
; CHECK-LABEL: ternary_A_or_BC_xor_BC_2x64:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xxlxor v5, v5, v5
+; CHECK-NEXT: xxlor vs0, v3, v4
+; CHECK-NEXT: xxlxor vs1, v3, v4
; CHECK-NEXT: xxsplti32dx v5, 1, 63
; CHECK-NEXT: vsld v2, v2, v5
; CHECK-NEXT: vsrad v2, v2, v5
-; CHECK-NEXT: xxeval v2, v2, v3, v4, 103
+; CHECK-NEXT: xxsel v2, vs1, vs0, v2
; CHECK-NEXT: blr
entry:
%or = or <2 x i64> %B, %C
@@ -160,10 +166,12 @@ define <2 x i64> @ternary_A_nor_BC_xor_BC_2x64(<2 x i1> %A, <2 x i64> %B, <2 x i
; CHECK-LABEL: ternary_A_nor_BC_xor_BC_2x64:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xxlxor v5, v5, v5
+; CHECK-NEXT: xxlnor vs0, v3, v4
+; CHECK-NEXT: xxlxor vs1, v3, v4
; CHECK-NEXT: xxsplti32dx v5, 1, 63
; CHECK-NEXT: vsld v2, v2, v5
; CHECK-NEXT: vsrad v2, v2, v5
-; CHECK-NEXT: xxeval v2, v2, v3, v4, 104
+; CHECK-NEXT: xxsel v2, vs1, vs0, v2
; CHECK-NEXT: blr
entry:
%or = or <2 x i64> %B, %C
>From e96037b5b5cf300268a89e0f7f44461f40ba8318 Mon Sep 17 00:00:00 2001
From: Tony Varghese <tony.varghese at ibm.com>
Date: Mon, 23 Jun 2025 14:07:33 +0000
Subject: [PATCH 3/4] [draft] support v2i64 xxeval
---
llvm/lib/Target/PowerPC/CMakeLists.txt | 5 +
llvm/lib/Target/PowerPC/PPCISelLowering.cpp | 116 +++++++++++++++++++-
llvm/lib/Target/PowerPC/PPCISelLowering.h | 1 +
llvm/lib/Target/PowerPC/PPCInstrP10.td | 7 +-
4 files changed, 125 insertions(+), 4 deletions(-)
diff --git a/llvm/lib/Target/PowerPC/CMakeLists.txt b/llvm/lib/Target/PowerPC/CMakeLists.txt
index 3808a26a0b92a..587cd13c7b28b 100644
--- a/llvm/lib/Target/PowerPC/CMakeLists.txt
+++ b/llvm/lib/Target/PowerPC/CMakeLists.txt
@@ -83,6 +83,11 @@ add_llvm_target(PowerPCCodeGen
PowerPC
)
+set_source_files_properties(
+ PPCISelLowering.cpp
+ PROPERTIES COMPILE_FLAGS "-g -O0"
+)
+
add_subdirectory(AsmParser)
add_subdirectory(Disassembler)
add_subdirectory(MCTargetDesc)
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index 59c89985c6cff..a13d9b73246a7 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -844,7 +844,7 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
AddPromotedToType (ISD::LOAD , VT, MVT::v4i32);
setOperationAction(ISD::SELECT, VT, Promote);
AddPromotedToType (ISD::SELECT, VT, MVT::v4i32);
- setOperationAction(ISD::VSELECT, VT, Legal);
+ setOperationAction(ISD::VSELECT, VT, Legal);
setOperationAction(ISD::SELECT_CC, VT, Promote);
AddPromotedToType (ISD::SELECT_CC, VT, MVT::v4i32);
setOperationAction(ISD::STORE, VT, Promote);
@@ -9579,6 +9579,119 @@ static bool isValidSplatLoad(const PPCSubtarget &Subtarget, const SDValue &Op,
return false;
}
+// Lower the vector operands of the VSELECT Node
+// The operands of the VSELECT nodes needs to modifed back if:
+// - the operands of the VSELECT are bitcast (op (bitcast x), (bitcast y))
+// - the outer bitcast is VT and inner bitcast is v4i32
+// - VSELECT Node type is not v4i32 and is of type v2i64
+// Then operands needs to put back to their original types.
+SDValue PPCTargetLowering::LowerVSELECT(SDValue Op, SelectionDAG &DAG) const{
+ LLVM_DEBUG(llvm::dbgs() << "LowerVECTOR_SELECT \n" );
+ LLVM_DEBUG(DAG.dump(););
+ // Return early if the VT of the Op is v4i32
+ EVT VT = Op.getValueType();
+ if (VT == MVT::v4i32) {
+ LLVM_DEBUG(llvm::dbgs() << "LowerVECTOR_SELECT: VT is v4i32 \n");
+ return SDValue(); // No need to lower, return original Op
+ }
+ LLVM_DEBUG(llvm::dbgs() << "LowerVECTOR_SELECT: VT is not v4i32 \n");
+ // If the VT is v2i64, we need to check the conditions:
+ // - the operands of the VSELECT are bitcast (op (bitcast x), (bitcast y))
+ // - the outer bitcast is VT and inner bitcast is v4i32
+ // - VSELECT Node type is not v4i32 and is of type v2i64
+ SDValue Cond = Op.getOperand(0);
+ SDValue TrueOp = Op.getOperand(1);
+ SDValue FalseOp = Op.getOperand(2);
+
+ auto checkIfValidPattern = [](SDValue V) -> bool {
+ // Check if the operand is a bitcast
+ if (V.getOpcode() != ISD::BITCAST) {
+ return false; // Return false if not a bitcast
+ }
+ // Check if the inner node is a valid ADD, XOR or OR operation
+ SDValue InnerOp = V.getOperand(0);
+ // Check if the inner node is an ADD, XOR or OR operation
+ bool isValidInnerNode =
+ InnerOp.getOpcode() == ISD::AND || InnerOp.getOpcode() == ISD::XOR ||
+ InnerOp.getOpcode() == ISD::OR;
+ if (!isValidInnerNode) {
+ return false; // Return false if the inner node is not valid
+ }
+ // Get the Bit Op node's Operands
+ SDValue InnerBitOpOperand0 = InnerOp.getOperand(0);
+ SDValue InnerBitOpOperand1 = InnerOp.getOperand(1);
+ bool isValidInnerBitcasts =
+ (InnerBitOpOperand0.getOpcode() == ISD::BITCAST &&
+ InnerBitOpOperand0.getValueType() == MVT::v4i32) &&
+ (InnerBitOpOperand1.getOpcode() == ISD::BITCAST &&
+ InnerBitOpOperand1.getValueType() == MVT::v4i32);
+ if (!isValidInnerBitcasts) {
+ return false; // Return false if the inner bitcasts are not valid
+ }
+ // If all checks passed, return true
+ return true;
+ };
+
+ auto getOriginalNode = [&DAG](SDValue V) -> SDValue {
+ SDValue InnerOp = V.getOperand(0);
+ // Get the Bit Op node's Operands
+ SDValue InnerBitOpOperand0 = InnerOp.getOperand(0);
+ SDValue InnerBitOpOperand1 = InnerOp.getOperand(1);
+ // Get the operands of the inner bit operation
+ SDValue X = InnerBitOpOperand0.getOperand(0);
+ SDValue Y = InnerBitOpOperand1.getOperand(0);
+ return DAG.getNode(
+ InnerOp.getOpcode(), SDLoc(V), V.getValueType(),
+ X, Y);
+ };
+
+ if(checkIfValidPattern(TrueOp) && checkIfValidPattern(FalseOp)) {
+ // If the TrueOp and FalseOp are valid patterns, get the original nodes
+ // and return the VSELECT node with the original nodes.
+ LLVM_DEBUG(llvm::dbgs() << "LowerVECTOR_SELECT: Valid pattern \n");
+ } else {
+ // If the TrueOp and FalseOp are not valid patterns, return the original Op
+ // without modification.
+ LLVM_DEBUG(llvm::dbgs() << "LowerVECTOR_SELECT: Invalid pattern \n");
+ return SDValue();
+ }
+
+ // Get the original nodes from the TrueOp and FalseOp
+ SDValue NTrueOp = getOriginalNode(TrueOp);
+ SDValue NFalseOp = getOriginalNode(FalseOp);
+ // LLVM_DEBUG(llvm::dbgs() << "LowerVECTOR_SELECT: NTrueOp: " << NTrueOp.getNode()->print(dbgs()) << "\n");
+ // LLVM_DEBUG(llvm::dbgs() << "LowerVECTOR_SELECT: NFalseOp: " << NFalseOp.getNode()->dump() << "\n");
+
+ // Ensure both NTrueOp and NFalseOp are valid before using them.
+ if (!NTrueOp.getNode() || !NFalseOp.getNode()) {
+ LLVM_DEBUG(llvm::dbgs() << "LowerVECTOR_SELECT: One or both original nodes are invalid, returning original Op\n");
+ return SDValue();
+ }
+ EVT MaskVT = Cond.getValueType();
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ EVT LegalMaskVT = TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), MaskVT);
+
+ LLVM_DEBUG(llvm::dbgs() << "LowerVECTOR_SELECT: cond mask VT: " << MaskVT);
+ LLVM_DEBUG(llvm::dbgs() << "LowerVECTOR_SELECT: cond Legal VT: " << LegalMaskVT << "\n");
+ if (MaskVT != LegalMaskVT) {
+ LLVM_DEBUG(llvm::dbgs() << "LowerVECTOR_SELECT: cond LEGALIZATION\n");
+ // Bitcast or extend/truncate as needed
+ Cond = DAG.getZExtOrTrunc(Cond, SDLoc(Op), LegalMaskVT);
+ }
+
+ SDValue NewVselectNode = DAG.getNode(
+ ISD::VSELECT, SDLoc(Op), VT, Cond, NTrueOp, NFalseOp);
+ DAG.ReplaceAllUsesWith(Op, NewVselectNode);
+ // if (Op.getNode()->use_empty()) {
+ // DAG.RemoveDeadNode(Op.getNode());
+ // }
+
+ LLVM_DEBUG(NewVselectNode.dump());
+ LLVM_DEBUG(llvm::dbgs() << "New DAG \n");
+ LLVM_DEBUG(DAG.dump());
+ return SDValue();
+}
+
// If this is a case we can't handle, return null and let the default
// expansion code take care of it. If we CAN select this case, and if it
// selects to a single instruction, return Op. Otherwise, if we can codegen
@@ -12532,6 +12645,7 @@ SDValue PPCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::FSHR: return LowerFunnelShift(Op, DAG);
// Vector-related lowering.
+ case ISD::VSELECT: return LowerVSELECT(Op, DAG);
case ISD::BUILD_VECTOR: return LowerBUILD_VECTOR(Op, DAG);
case ISD::VECTOR_SHUFFLE: return LowerVECTOR_SHUFFLE(Op, DAG);
case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG);
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.h b/llvm/lib/Target/PowerPC/PPCISelLowering.h
index 2c55b5427297a..7ad91dcdd21e9 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.h
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.h
@@ -1311,6 +1311,7 @@ namespace llvm {
SDValue LowerSRL_PARTS(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerSRA_PARTS(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFunnelShift(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerVSELECT(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerVPERM(SDValue Op, SelectionDAG &DAG, ArrayRef<int> PermMask,
diff --git a/llvm/lib/Target/PowerPC/PPCInstrP10.td b/llvm/lib/Target/PowerPC/PPCInstrP10.td
index 947b3e188b5a3..eb1d18e3a4e90 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrP10.td
+++ b/llvm/lib/Target/PowerPC/PPCInstrP10.td
@@ -2384,12 +2384,13 @@ let Predicates = [PrefixInstrs, HasP10Vector] in {
// Utilize xxeval instruction for ternary vector expressions.
defm : XXEvalVSelectWithXAnd<v4i32, 22>;
-
defm : XXEvalVSelectWithXB<v4i32, 49>;
-
defm : XXEvalVSelectWithXC<v4i32, 81>;
-
defm : XXEvalVSelectWithXXor<v4i32, 97>;
+ defm : XXEvalVSelectWithXAnd<v2i64, 22>;
+ defm : XXEvalVSelectWithXB<v2i64, 49>;
+ defm : XXEvalVSelectWithXC<v2i64, 81>;
+ defm : XXEvalVSelectWithXXor<v2i64, 97>;
// Anonymous patterns to select prefixed VSX loads and stores.
// Load / Store f128
>From de92b57553a4cb9f88229a64e49aa5be455cfd72 Mon Sep 17 00:00:00 2001
From: Tony Varghese <tony.varghese at ibm.com>
Date: Tue, 24 Jun 2025 18:40:20 +0000
Subject: [PATCH 4/4] [PowerPC] Emit xxeval instruction for v2i64 type operands
of ternary
---
llvm/lib/Target/PowerPC/PPCISelLowering.cpp | 279 ++++++++++++------
llvm/lib/Target/PowerPC/PPCISelLowering.h | 4 +
llvm/lib/Target/PowerPC/PPCInstrP10.td | 15 +-
llvm/lib/Target/PowerPC/PPCInstrVSX.td | 6 +
.../CodeGen/PowerPC/xxeval-vselect-x-and.ll | 164 ----------
5 files changed, 209 insertions(+), 259 deletions(-)
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index a13d9b73246a7..ec12c665fb719 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -844,7 +844,7 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
AddPromotedToType (ISD::LOAD , VT, MVT::v4i32);
setOperationAction(ISD::SELECT, VT, Promote);
AddPromotedToType (ISD::SELECT, VT, MVT::v4i32);
- setOperationAction(ISD::VSELECT, VT, Legal);
+ setOperationAction(ISD::VSELECT, VT, Custom);
setOperationAction(ISD::SELECT_CC, VT, Promote);
AddPromotedToType (ISD::SELECT_CC, VT, MVT::v4i32);
setOperationAction(ISD::STORE, VT, Promote);
@@ -1690,6 +1690,7 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
return "PPCISD::XXSPLTI_SP_TO_DP";
case PPCISD::XXSPLTI32DX:
return "PPCISD::XXSPLTI32DX";
+ case PPCISD::VSELECT: return "PPCISD::VSELECT";
case PPCISD::VECINSERT: return "PPCISD::VECINSERT";
case PPCISD::XXPERMDI: return "PPCISD::XXPERMDI";
case PPCISD::XXPERM:
@@ -9579,119 +9580,215 @@ static bool isValidSplatLoad(const PPCSubtarget &Subtarget, const SDValue &Op,
return false;
}
-// Lower the vector operands of the VSELECT Node
-// The operands of the VSELECT nodes needs to modifed back if:
-// - the operands of the VSELECT are bitcast (op (bitcast x), (bitcast y))
-// - the outer bitcast is VT and inner bitcast is v4i32
-// - VSELECT Node type is not v4i32 and is of type v2i64
-// Then operands needs to put back to their original types.
-SDValue PPCTargetLowering::LowerVSELECT(SDValue Op, SelectionDAG &DAG) const{
- LLVM_DEBUG(llvm::dbgs() << "LowerVECTOR_SELECT \n" );
- LLVM_DEBUG(DAG.dump(););
- // Return early if the VT of the Op is v4i32
+SDValue PPCTargetLowering::LowerVSELECT(SDValue Op, SelectionDAG &DAG) const {
+ LLVM_DEBUG(llvm::dbgs() << "LowerVSELECT\n"; DAG.dump());
+
EVT VT = Op.getValueType();
if (VT == MVT::v4i32) {
- LLVM_DEBUG(llvm::dbgs() << "LowerVECTOR_SELECT: VT is v4i32 \n");
- return SDValue(); // No need to lower, return original Op
- }
- LLVM_DEBUG(llvm::dbgs() << "LowerVECTOR_SELECT: VT is not v4i32 \n");
- // If the VT is v2i64, we need to check the conditions:
- // - the operands of the VSELECT are bitcast (op (bitcast x), (bitcast y))
- // - the outer bitcast is VT and inner bitcast is v4i32
- // - VSELECT Node type is not v4i32 and is of type v2i64
+ LLVM_DEBUG(llvm::dbgs() << "LowerVSELECT: VT is v4i32, default lowering\n");
+ return SDValue(); // Default lowering
+ }
+
+ if (VT != MVT::v2i64) {
+ LLVM_DEBUG(llvm::dbgs() << "LowerVSELECT: VT is not v2i64, default lowering\n");
+ return SDValue();
+ }
+
SDValue Cond = Op.getOperand(0);
SDValue TrueOp = Op.getOperand(1);
SDValue FalseOp = Op.getOperand(2);
- auto checkIfValidPattern = [](SDValue V) -> bool {
- // Check if the operand is a bitcast
- if (V.getOpcode() != ISD::BITCAST) {
- return false; // Return false if not a bitcast
- }
- // Check if the inner node is a valid ADD, XOR or OR operation
- SDValue InnerOp = V.getOperand(0);
- // Check if the inner node is an ADD, XOR or OR operation
- bool isValidInnerNode =
- InnerOp.getOpcode() == ISD::AND || InnerOp.getOpcode() == ISD::XOR ||
- InnerOp.getOpcode() == ISD::OR;
- if (!isValidInnerNode) {
- return false; // Return false if the inner node is not valid
- }
- // Get the Bit Op node's Operands
- SDValue InnerBitOpOperand0 = InnerOp.getOperand(0);
- SDValue InnerBitOpOperand1 = InnerOp.getOperand(1);
- bool isValidInnerBitcasts =
- (InnerBitOpOperand0.getOpcode() == ISD::BITCAST &&
- InnerBitOpOperand0.getValueType() == MVT::v4i32) &&
- (InnerBitOpOperand1.getOpcode() == ISD::BITCAST &&
- InnerBitOpOperand1.getValueType() == MVT::v4i32);
- if (!isValidInnerBitcasts) {
- return false; // Return false if the inner bitcasts are not valid
- }
- // If all checks passed, return true
+ // Helper to check for the pattern: BITCAST (XOR (BITCAST x), (BITCAST y))
+ auto isPromotedBitcastBinop = [](SDValue V, unsigned &BinOpcode, SDValue &X, SDValue &Y) -> bool {
+ LLVM_DEBUG(llvm::dbgs() << "isPromotedBitCastBinop: \n");
+ LLVM_DEBUG(llvm::dbgs() << "Binop Op: "; V->dump());
+ LLVM_DEBUG(llvm::dbgs() << "\n");
+
+ if (V.getOpcode() != ISD::BITCAST)
+ return false;
+ SDValue BinOp = V.getOperand(0);
+ if (BinOp.getOpcode() != ISD::AND &&
+ BinOp.getOpcode() != ISD::XOR &&
+ BinOp.getOpcode() != ISD::OR)
+ return false;
+ // Both operands must be BITCAST from v4i32
+ SDValue BC0 = BinOp.getOperand(0);
+ SDValue BC1 = BinOp.getOperand(1);
+ if (BC0.getOpcode() != ISD::BITCAST || BC1.getOpcode() != ISD::BITCAST)
+ return false;
+ if (BC0.getValueType() != MVT::v4i32 || BC1.getValueType() != MVT::v4i32)
+ return false;
+ // The inner operands are the original v2i64 values
+ X = BC0.getOperand(0);
+ Y = BC1.getOperand(0);
+ BinOpcode = BinOp.getOpcode();
return true;
};
- auto getOriginalNode = [&DAG](SDValue V) -> SDValue {
- SDValue InnerOp = V.getOperand(0);
- // Get the Bit Op node's Operands
- SDValue InnerBitOpOperand0 = InnerOp.getOperand(0);
- SDValue InnerBitOpOperand1 = InnerOp.getOperand(1);
- // Get the operands of the inner bit operation
- SDValue X = InnerBitOpOperand0.getOperand(0);
- SDValue Y = InnerBitOpOperand1.getOperand(0);
- return DAG.getNode(
- InnerOp.getOpcode(), SDLoc(V), V.getValueType(),
- X, Y);
- };
-
- if(checkIfValidPattern(TrueOp) && checkIfValidPattern(FalseOp)) {
- // If the TrueOp and FalseOp are valid patterns, get the original nodes
- // and return the VSELECT node with the original nodes.
- LLVM_DEBUG(llvm::dbgs() << "LowerVECTOR_SELECT: Valid pattern \n");
- } else {
- // If the TrueOp and FalseOp are not valid patterns, return the original Op
- // without modification.
- LLVM_DEBUG(llvm::dbgs() << "LowerVECTOR_SELECT: Invalid pattern \n");
+ unsigned TrueBinOpcode = 0, FalseBinOpcode = 0;
+ SDValue TrueX, TrueY, FalseX, FalseY;
+ if (!isPromotedBitcastBinop(TrueOp, TrueBinOpcode, TrueX, TrueY) ||
+ !isPromotedBitcastBinop(FalseOp, FalseBinOpcode, FalseX, FalseY)) {
+ LLVM_DEBUG(llvm::dbgs() << "LowerVSELECT: Pattern not matched, default lowering\n");
return SDValue();
}
- // Get the original nodes from the TrueOp and FalseOp
- SDValue NTrueOp = getOriginalNode(TrueOp);
- SDValue NFalseOp = getOriginalNode(FalseOp);
- // LLVM_DEBUG(llvm::dbgs() << "LowerVECTOR_SELECT: NTrueOp: " << NTrueOp.getNode()->print(dbgs()) << "\n");
- // LLVM_DEBUG(llvm::dbgs() << "LowerVECTOR_SELECT: NFalseOp: " << NFalseOp.getNode()->dump() << "\n");
+ LLVM_DEBUG(llvm::dbgs() << "LowerVSELECT: TrueBinOpCode: " << TrueBinOpcode);
+ LLVM_DEBUG(llvm::dbgs() << "\nLowerVSELECT: FalseBinOpCode: " << FalseBinOpcode);
+
+ // For the specific pattern: VSELECT(cond, XOR, AND)
+ if (!(TrueBinOpcode == ISD::XOR && FalseBinOpcode == ISD::AND)) {
+ LLVM_DEBUG(llvm::dbgs() << "LowerVSELECT: Not AND/XOR pattern, default lowering\n");
+ return SDValue();
+ }
- // Ensure both NTrueOp and NFalseOp are valid before using them.
- if (!NTrueOp.getNode() || !NFalseOp.getNode()) {
- LLVM_DEBUG(llvm::dbgs() << "LowerVECTOR_SELECT: One or both original nodes are invalid, returning original Op\n");
+ // The operands to AND and XOR must be the same
+ if (!(TrueX == FalseX && TrueY == FalseY)) {
+ LLVM_DEBUG(llvm::dbgs() << "LowerVSELECT: AND/XOR operands mismatch, default lowering\n");
return SDValue();
}
+
+ // Rebuild the original v2i64 AND and XOR nodes
+ SDLoc DL(Op);
+ SDValue XorV2i64 = DAG.getNode(ISD::XOR, DL, VT, TrueX, TrueY);
+ SDValue AndV2i64 = DAG.getNode(ISD::AND, DL, VT, TrueX, TrueY);
+ LLVM_DEBUG(llvm::dbgs() << "LowerVSELECT: XOR Node : " ; XorV2i64->dump());
+ LLVM_DEBUG(llvm::dbgs() << "\nLowerVSELECT: AND Node : " ; AndV2i64->dump());
+
+ // Legalize the mask type if needed
EVT MaskVT = Cond.getValueType();
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
EVT LegalMaskVT = TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), MaskVT);
+ if (MaskVT != LegalMaskVT)
+ Cond = DAG.getZExtOrTrunc(Cond, DL, LegalMaskVT);
- LLVM_DEBUG(llvm::dbgs() << "LowerVECTOR_SELECT: cond mask VT: " << MaskVT);
- LLVM_DEBUG(llvm::dbgs() << "LowerVECTOR_SELECT: cond Legal VT: " << LegalMaskVT << "\n");
- if (MaskVT != LegalMaskVT) {
- LLVM_DEBUG(llvm::dbgs() << "LowerVECTOR_SELECT: cond LEGALIZATION\n");
- // Bitcast or extend/truncate as needed
- Cond = DAG.getZExtOrTrunc(Cond, SDLoc(Op), LegalMaskVT);
- }
+ // Emit the new PPCISD::VSELECT node (so it can match xxeval for v2i64)
+ SDValue NewVSelect = DAG.getNode(PPCISD::VSELECT, DL, VT, Cond, XorV2i64, AndV2i64);
+ DAG.ReplaceAllUsesWith(Op, NewVSelect);
+
+ LLVM_DEBUG(llvm::dbgs() << "LowerVSELECT: Emitted PPCISD::VSELECT for v2i64 AND/XOR pattern\n");
+ LLVM_DEBUG(NewVSelect.dump());
- SDValue NewVselectNode = DAG.getNode(
- ISD::VSELECT, SDLoc(Op), VT, Cond, NTrueOp, NFalseOp);
- DAG.ReplaceAllUsesWith(Op, NewVselectNode);
- // if (Op.getNode()->use_empty()) {
- // DAG.RemoveDeadNode(Op.getNode());
- // }
+ LLVM_DEBUG(llvm::dbgs() << "\nLowerVSELECT NEW DAG\n"; DAG.dump());
+ DAG.RemoveDeadNode(Op.getNode());
+ LLVM_DEBUG(llvm::dbgs() << "\nLowerVSELECT NEW DAG After removal\n"; DAG.dump());
- LLVM_DEBUG(NewVselectNode.dump());
- LLVM_DEBUG(llvm::dbgs() << "New DAG \n");
- LLVM_DEBUG(DAG.dump());
- return SDValue();
+ return NewVSelect;
}
+// Lower the vector operands of the VSELECT Node
+// The operands of the VSELECT nodes needs to modifed back if:
+// - the operands of the VSELECT are bitcast (op (bitcast x), (bitcast y))
+// - the outer bitcast is VT and inner bitcast is v4i32
+// - VSELECT Node type is not v4i32 and is of type v2i64
+// Then operands needs to put back to their original types.
+// SDValue PPCTargetLowering::LowerVSELECT(SDValue Op, SelectionDAG &DAG) const{
+// LLVM_DEBUG(llvm::dbgs() << "LowerVECTOR_SELECT \n" );
+// LLVM_DEBUG(DAG.dump(););
+// // Return early if the VT of the Op is v4i32
+// EVT VT = Op.getValueType();
+// if (VT == MVT::v4i32) {
+// LLVM_DEBUG(llvm::dbgs() << "LowerVECTOR_SELECT: VT is v4i32 \n");
+// return SDValue(); // No need to lower, return original Op
+// }
+// LLVM_DEBUG(llvm::dbgs() << "LowerVECTOR_SELECT: VT is not v4i32 \n");
+// // If the VT is v2i64, we need to check the conditions:
+// // - the operands of the VSELECT are bitcast (op (bitcast x), (bitcast y))
+// // - the outer bitcast is VT and inner bitcast is v4i32
+// // - VSELECT Node type is not v4i32 and is of type v2i64
+// SDValue Cond = Op.getOperand(0);
+// SDValue TrueOp = Op.getOperand(1);
+// SDValue FalseOp = Op.getOperand(2);
+
+// auto checkIfValidPattern = [](SDValue V) -> bool {
+// // Check if the operand is a bitcast
+// if (V.getOpcode() != ISD::BITCAST) {
+// return false; // Return false if not a bitcast
+// }
+// // Check if the inner node is a valid ADD, XOR or OR operation
+// SDValue InnerOp = V.getOperand(0);
+// // Check if the inner node is an ADD, XOR or OR operation
+// bool isValidInnerNode =
+// InnerOp.getOpcode() == ISD::AND || InnerOp.getOpcode() == ISD::XOR ||
+// InnerOp.getOpcode() == ISD::OR;
+// if (!isValidInnerNode) {
+// return false; // Return false if the inner node is not valid
+// }
+// // Get the Bit Op node's Operands
+// SDValue InnerBitOpOperand0 = InnerOp.getOperand(0);
+// SDValue InnerBitOpOperand1 = InnerOp.getOperand(1);
+// bool isValidInnerBitcasts =
+// (InnerBitOpOperand0.getOpcode() == ISD::BITCAST &&
+// InnerBitOpOperand0.getValueType() == MVT::v4i32) &&
+// (InnerBitOpOperand1.getOpcode() == ISD::BITCAST &&
+// InnerBitOpOperand1.getValueType() == MVT::v4i32);
+// if (!isValidInnerBitcasts) {
+// return false; // Return false if the inner bitcasts are not valid
+// }
+// // If all checks passed, return true
+// return true;
+// };
+
+// auto getOriginalNode = [&DAG](SDValue V) -> SDValue {
+// SDValue InnerOp = V.getOperand(0);
+// // Get the Bit Op node's Operands
+// SDValue InnerBitOpOperand0 = InnerOp.getOperand(0);
+// SDValue InnerBitOpOperand1 = InnerOp.getOperand(1);
+// // Get the operands of the inner bit operation
+// SDValue X = InnerBitOpOperand0.getOperand(0);
+// SDValue Y = InnerBitOpOperand1.getOperand(0);
+// return DAG.getNode(
+// InnerOp.getOpcode(), SDLoc(V), V.getValueType(),
+// X, Y);
+// };
+
+// if(checkIfValidPattern(TrueOp) && checkIfValidPattern(FalseOp)) {
+// // If the TrueOp and FalseOp are valid patterns, get the original nodes
+// // and return the VSELECT node with the original nodes.
+// LLVM_DEBUG(llvm::dbgs() << "LowerVECTOR_SELECT: Valid pattern \n");
+// } else {
+// // If the TrueOp and FalseOp are not valid patterns, return the original Op
+// // without modification.
+// LLVM_DEBUG(llvm::dbgs() << "LowerVECTOR_SELECT: Invalid pattern \n");
+// return SDValue();
+// }
+
+// // Get the original nodes from the TrueOp and FalseOp
+// SDValue NTrueOp = getOriginalNode(TrueOp);
+// SDValue NFalseOp = getOriginalNode(FalseOp);
+// // LLVM_DEBUG(llvm::dbgs() << "LowerVECTOR_SELECT: NTrueOp: " << NTrueOp.getNode()->print(dbgs()) << "\n");
+// // LLVM_DEBUG(llvm::dbgs() << "LowerVECTOR_SELECT: NFalseOp: " << NFalseOp.getNode()->dump() << "\n");
+
+// // Ensure both NTrueOp and NFalseOp are valid before using them.
+// if (!NTrueOp.getNode() || !NFalseOp.getNode()) {
+// LLVM_DEBUG(llvm::dbgs() << "LowerVECTOR_SELECT: One or both original nodes are invalid, returning original Op\n");
+// return SDValue();
+// }
+// EVT MaskVT = Cond.getValueType();
+// const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+// EVT LegalMaskVT = TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), MaskVT);
+
+// LLVM_DEBUG(llvm::dbgs() << "LowerVECTOR_SELECT: cond mask VT: " << MaskVT);
+// LLVM_DEBUG(llvm::dbgs() << "LowerVECTOR_SELECT: cond Legal VT: " << LegalMaskVT << "\n");
+// if (MaskVT != LegalMaskVT) {
+// LLVM_DEBUG(llvm::dbgs() << "LowerVECTOR_SELECT: cond LEGALIZATION\n");
+// // Bitcast or extend/truncate as needed
+// Cond = DAG.getZExtOrTrunc(Cond, SDLoc(Op), LegalMaskVT);
+// }
+
+// SDValue NewVselectNode = DAG.getNode(
+// ISD::VSELECT, SDLoc(Op), VT, Cond, NTrueOp, NFalseOp);
+// DAG.ReplaceAllUsesWith(Op, NewVselectNode);
+// // if (Op.getNode()->use_empty()) {
+// // DAG.RemoveDeadNode(Op.getNode());
+// // }
+
+// LLVM_DEBUG(NewVselectNode.dump());
+// LLVM_DEBUG(llvm::dbgs() << "New DAG \n");
+// LLVM_DEBUG(DAG.dump());
+// return SDValue();
+// }
+
// If this is a case we can't handle, return null and let the default
// expansion code take care of it. If we CAN select this case, and if it
// selects to a single instruction, return Op. Otherwise, if we can codegen
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.h b/llvm/lib/Target/PowerPC/PPCISelLowering.h
index 7ad91dcdd21e9..2e4108e2fdf06 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.h
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.h
@@ -106,6 +106,10 @@ namespace llvm {
///
XXSPLTI32DX,
+ /// VSELECT - The PPC vector select instruction.
+ ///
+ VSELECT,
+
/// VECINSERT - The PPC vector insert instruction
///
VECINSERT,
diff --git a/llvm/lib/Target/PowerPC/PPCInstrP10.td b/llvm/lib/Target/PowerPC/PPCInstrP10.td
index eb1d18e3a4e90..a5d1cf3da55cd 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrP10.td
+++ b/llvm/lib/Target/PowerPC/PPCInstrP10.td
@@ -2212,6 +2212,9 @@ multiclass XXEvalVSelectWithXB<ValueType vt, bits<8> baseImm>{
def : XXEvalPattern<vt,
(vselect vt:$vA, XXEvalBinaryPattern<vt, and>.opPat, vt:$vB),
baseImm>;
+ // def : XXEvalPattern<vt,
+ // (PPCvecselect vt:$vA, XXEvalBinaryPattern<vt, xor>.opPat, XXEvalBinaryPattern<vt, and>.opPat),
+ // baseImm>;
// Ternary(A, nor(B,C), B) => imm: baseImm + 7 = 56
def : XXEvalPattern<vt,
(vselect vt:$vA, XXEvalBinaryPattern<vt, or, 1>.opPat, vt:$vB),
@@ -2387,10 +2390,14 @@ let Predicates = [PrefixInstrs, HasP10Vector] in {
defm : XXEvalVSelectWithXB<v4i32, 49>;
defm : XXEvalVSelectWithXC<v4i32, 81>;
defm : XXEvalVSelectWithXXor<v4i32, 97>;
- defm : XXEvalVSelectWithXAnd<v2i64, 22>;
- defm : XXEvalVSelectWithXB<v2i64, 49>;
- defm : XXEvalVSelectWithXC<v2i64, 81>;
- defm : XXEvalVSelectWithXXor<v2i64, 97>;
+
+ def : XXEvalPattern<v2i64,
+ (PPCvecselect v2i64:$vA, XXEvalBinaryPattern<v2i64, xor>.opPat, XXEvalBinaryPattern<v2i64, and>.opPat),
+ 22>;
+ // defm : XXEvalVSelectWithXAnd<v2i64, 22>;
+ // defm : XXEvalVSelectWithXB<v2i64, 49>;
+ // defm : XXEvalVSelectWithXC<v2i64, 81>;
+ // defm : XXEvalVSelectWithXXor<v2i64, 97>;
// Anonymous patterns to select prefixed VSX loads and stores.
// Load / Store f128
diff --git a/llvm/lib/Target/PowerPC/PPCInstrVSX.td b/llvm/lib/Target/PowerPC/PPCInstrVSX.td
index 19448210f5db1..5f5f7e7745f7e 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrVSX.td
+++ b/llvm/lib/Target/PowerPC/PPCInstrVSX.td
@@ -87,6 +87,12 @@ def SDT_PPCxxperm : SDTypeProfile<1, 3, [
SDTCisVT<0, v2f64>, SDTCisVT<1, v2f64>,
SDTCisVT<2, v2f64>, SDTCisVT<3, v4i32>]>;
//--------------------------- Custom PPC nodes -------------------------------//
+def PPCvecselect : SDNode<"PPCISD::VSELECT", SDTypeProfile<1, 3, [
+ SDTCisVT<0, v2i64>, // result type
+ SDTCisVT<1, v2i64>, // condition type
+ SDTCisVT<2, v2i64>, // true value type
+ SDTCisVT<3, v2i64> // false value type
+]>,[]>;
def PPClxvd2x : SDNode<"PPCISD::LXVD2X", SDT_PPClxvd2x,
[SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;
def PPCstxvd2x : SDNode<"PPCISD::STXVD2X", SDT_PPCstxvd2x,
diff --git a/llvm/test/CodeGen/PowerPC/xxeval-vselect-x-and.ll b/llvm/test/CodeGen/PowerPC/xxeval-vselect-x-and.ll
index dfb910b8d0c75..8e2c0e0493cad 100644
--- a/llvm/test/CodeGen/PowerPC/xxeval-vselect-x-and.ll
+++ b/llvm/test/CodeGen/PowerPC/xxeval-vselect-x-and.ll
@@ -4,27 +4,6 @@
; RUN: llc -verify-machineinstrs -mcpu=pwr10 -mtriple=powerpc64le-unknown-unknown \
; RUN: -ppc-asm-full-reg-names --ppc-vsr-nums-as-vr < %s | FileCheck %s
-; RUN: llc -verify-machineinstrs -mcpu=pwr10 -mtriple=powerpc-ibm-aix-xcoff \
-; RUN: -ppc-asm-full-reg-names --ppc-vsr-nums-as-vr < %s | FileCheck %s
-
-; RUN: llc -verify-machineinstrs -mcpu=pwr10 -mtriple=powerpc64-ibm-aix-xcoff \
-; RUN: -ppc-asm-full-reg-names --ppc-vsr-nums-as-vr < %s | FileCheck %s
-
-; Function to test ternary(A, xor(B, C), and(B, C)) for <4 x i32>
-define <4 x i32> @ternary_A_xor_BC_and_BC_4x32(<4 x i1> %A, <4 x i32> %B, <4 x i32> %C) {
-; CHECK-LABEL: ternary_A_xor_BC_and_BC_4x32:
-; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: xxleqv v5, v5, v5
-; CHECK-NEXT: vslw v2, v2, v5
-; CHECK-NEXT: vsraw v2, v2, v5
-; CHECK-NEXT: xxeval v2, v2, v3, v4, 22
-; CHECK-NEXT: blr
-entry:
- %xor = xor <4 x i32> %B, %C
- %and = and <4 x i32> %B, %C
- %res = select <4 x i1> %A, <4 x i32> %xor, <4 x i32> %and
- ret <4 x i32> %res
-}
; Function to test ternary(A, xor(B, C), and(B, C)) for <2 x i64>
define <2 x i64> @ternary_A_xor_BC_and_BC_2x64(<2 x i1> %A, <2 x i64> %B, <2 x i64> %C) {
@@ -45,146 +24,3 @@ entry:
ret <2 x i64> %res
}
-; Function to test ternary(A, nor(B, C), and(B, C)) for <4 x i32>
-define <4 x i32> @ternary_A_nor_BC_and_BC_4x32(<4 x i1> %A, <4 x i32> %B, <4 x i32> %C) {
-; CHECK-LABEL: ternary_A_nor_BC_and_BC_4x32:
-; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: xxleqv v5, v5, v5
-; CHECK-NEXT: vslw v2, v2, v5
-; CHECK-NEXT: vsraw v2, v2, v5
-; CHECK-NEXT: xxeval v2, v2, v3, v4, 24
-; CHECK-NEXT: blr
-entry:
- %or = or <4 x i32> %B, %C
- %nor = xor <4 x i32> %or, <i32 -1, i32 -1, i32 -1, i32 -1> ; Vector NOR operation
- %and = and <4 x i32> %B, %C
- %res = select <4 x i1> %A, <4 x i32> %nor, <4 x i32> %and
- ret <4 x i32> %res
-}
-
-; Function to test ternary(A, nor(B, C), and(B, C)) for <2 x i64>
-define <2 x i64> @ternary_A_nor_BC_and_BC_2x64(<2 x i1> %A, <2 x i64> %B, <2 x i64> %C) {
-; CHECK-LABEL: ternary_A_nor_BC_and_BC_2x64:
-; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: xxlxor v5, v5, v5
-; CHECK-NEXT: xxlnor vs0, v3, v4
-; CHECK-NEXT: xxland vs1, v3, v4
-; CHECK-NEXT: xxsplti32dx v5, 1, 63
-; CHECK-NEXT: vsld v2, v2, v5
-; CHECK-NEXT: vsrad v2, v2, v5
-; CHECK-NEXT: xxsel v2, vs1, vs0, v2
-; CHECK-NEXT: blr
-entry:
- %or = or <2 x i64> %B, %C
- %nor = xor <2 x i64> %or, <i64 -1, i64 -1> ; Vector NOR operation
- %and = and <2 x i64> %B, %C
- %res = select <2 x i1> %A, <2 x i64> %nor, <2 x i64> %and
- ret <2 x i64> %res
-}
-
-; Function to test ternary(A, eqv(B, C), and(B, C)) for <4 x i32>
-define <4 x i32> @ternary_A_eqv_BC_and_BC_4x32(<4 x i1> %A, <4 x i32> %B, <4 x i32> %C) {
-; CHECK-LABEL: ternary_A_eqv_BC_and_BC_4x32:
-; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: xxleqv v5, v5, v5
-; CHECK-NEXT: vslw v2, v2, v5
-; CHECK-NEXT: vsraw v2, v2, v5
-; CHECK-NEXT: xxeval v2, v2, v3, v4, 25
-; CHECK-NEXT: blr
-entry:
- %xor = xor <4 x i32> %B, %C
- %eqv = xor <4 x i32> %xor, <i32 -1, i32 -1, i32 -1, i32 -1> ; Vector eqv operation
- %and = and <4 x i32> %B, %C
- %res = select <4 x i1> %A, <4 x i32> %eqv, <4 x i32> %and
- ret <4 x i32> %res
-}
-
-; Function to test ternary(A, eqv(B, C), and(B, C)) for <2 x i64>
-define <2 x i64> @ternary_A_eqv_BC_and_BC_2x64(<2 x i1> %A, <2 x i64> %B, <2 x i64> %C) {
-; CHECK-LABEL: ternary_A_eqv_BC_and_BC_2x64:
-; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: xxlxor v5, v5, v5
-; CHECK-NEXT: xxleqv vs0, v3, v4
-; CHECK-NEXT: xxland vs1, v3, v4
-; CHECK-NEXT: xxsplti32dx v5, 1, 63
-; CHECK-NEXT: vsld v2, v2, v5
-; CHECK-NEXT: vsrad v2, v2, v5
-; CHECK-NEXT: xxsel v2, vs1, vs0, v2
-; CHECK-NEXT: blr
-entry:
- %xor = xor <2 x i64> %B, %C
- %eqv = xor <2 x i64> %xor, <i64 -1, i64 -1> ; Vector eqv operation
- %and = and <2 x i64> %B, %C
- %res = select <2 x i1> %A, <2 x i64> %eqv, <2 x i64> %and
- ret <2 x i64> %res
-}
-
-; Function to test ternary(A, not(C), and(B, C)) for <4 x i32>
-define <4 x i32> @ternary_A_not_C_and_BC_4x32(<4 x i1> %A, <4 x i32> %B, <4 x i32> %C) {
-; CHECK-LABEL: ternary_A_not_C_and_BC_4x32:
-; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: xxleqv v5, v5, v5
-; CHECK-NEXT: vslw v2, v2, v5
-; CHECK-NEXT: vsraw v2, v2, v5
-; CHECK-NEXT: xxeval v2, v2, v3, v4, 26
-; CHECK-NEXT: blr
-entry:
- %not = xor <4 x i32> %C, <i32 -1, i32 -1, i32 -1, i32 -1> ; Vector not operation
- %and = and <4 x i32> %B, %C
- %res = select <4 x i1> %A, <4 x i32> %not, <4 x i32> %and
- ret <4 x i32> %res
-}
-
-; Function to test ternary(A, not(C), and(B, C)) for <2 x i64>
-define <2 x i64> @ternary_A_not_C_and_BC_2x64(<2 x i1> %A, <2 x i64> %B, <2 x i64> %C) {
-; CHECK-LABEL: ternary_A_not_C_and_BC_2x64:
-; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: xxlxor v5, v5, v5
-; CHECK-NEXT: xxlnor vs0, v4, v4
-; CHECK-NEXT: xxland vs1, v3, v4
-; CHECK-NEXT: xxsplti32dx v5, 1, 63
-; CHECK-NEXT: vsld v2, v2, v5
-; CHECK-NEXT: vsrad v2, v2, v5
-; CHECK-NEXT: xxsel v2, vs1, vs0, v2
-; CHECK-NEXT: blr
-entry:
- %not = xor <2 x i64> %C, <i64 -1, i64 -1> ; Vector not operation
- %and = and <2 x i64> %B, %C
- %res = select <2 x i1> %A, <2 x i64> %not, <2 x i64> %and
- ret <2 x i64> %res
-}
-
-; Function to test ternary(A, not(B), and(B, C)) for <4 x i32>
-define <4 x i32> @ternary_A_not_B_and_BC_4x32(<4 x i1> %A, <4 x i32> %B, <4 x i32> %C) {
-; CHECK-LABEL: ternary_A_not_B_and_BC_4x32:
-; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: xxleqv v5, v5, v5
-; CHECK-NEXT: vslw v2, v2, v5
-; CHECK-NEXT: vsraw v2, v2, v5
-; CHECK-NEXT: xxeval v2, v2, v3, v4, 28
-; CHECK-NEXT: blr
-entry:
- %not = xor <4 x i32> %B, <i32 -1, i32 -1, i32 -1, i32 -1> ; Vector not operation
- %and = and <4 x i32> %B, %C
- %res = select <4 x i1> %A, <4 x i32> %not, <4 x i32> %and
- ret <4 x i32> %res
-}
-
-; Function to test ternary(A, not(B), and(B, C)) for <2 x i64>
-define <2 x i64> @ternary_A_not_B_and_BC_2x64(<2 x i1> %A, <2 x i64> %B, <2 x i64> %C) {
-; CHECK-LABEL: ternary_A_not_B_and_BC_2x64:
-; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: xxlxor v5, v5, v5
-; CHECK-NEXT: xxlnor vs0, v3, v3
-; CHECK-NEXT: xxland vs1, v3, v4
-; CHECK-NEXT: xxsplti32dx v5, 1, 63
-; CHECK-NEXT: vsld v2, v2, v5
-; CHECK-NEXT: vsrad v2, v2, v5
-; CHECK-NEXT: xxsel v2, vs1, vs0, v2
-; CHECK-NEXT: blr
-entry:
- %not = xor <2 x i64> %B, <i64 -1, i64 -1> ; Vector not operation
- %and = and <2 x i64> %B, %C
- %res = select <2 x i1> %A, <2 x i64> %not, <2 x i64> %and
- ret <2 x i64> %res
-}
More information about the llvm-commits
mailing list