[llvm] [PowerPC] [Draft] Emit xxeval instruction for Ternary operation with v2i64 operand types (PR #145574)

Tony Varghese via llvm-commits llvm-commits at lists.llvm.org
Tue Jun 24 12:13:33 PDT 2025


https://github.com/tonykuttai created https://github.com/llvm/llvm-project/pull/145574

`xxeval` instruction can be used for supporting the ternary patterns. 

Supporting the following patterns in this change with `v2i64` operands. Since the Binary operations `xor` & `and` gets promoted to `v4i32` the original pattern becomes:
`ternary(v2i64:A, xor(v2i64:B, v2i64:C), and(v2i64:B, v2i64:C))`  to `ternary(v2i64:A, bitcast(v2i64, xor(v4i32:B, v4i32:C)), bitcast(v2i64, and(v4i32:B, v4i32:C)) )`.
- To lower this pattern to `xxeval` instruction, Custom lowering is seeked and `ISD::VSELECT` node is Custom Lowered during the `PPCIselLowering`.
- If the pattern matches `ternary(v2i64:A, bitcast(v2i64, xor(v4i32:B, v4i32:C)), bitcast(v2i64, and(v4i32:B, v4i32:C)) )`, then a new node `PPCISD::VSELECT` is emitted as shown.  All other nodes will follow the default lowering.
```
SelectionDAG has 15 nodes:
  t0: ch,glue = EntryToken
  t4: v2i64,ch = CopyFromReg t0, Register:v2i64 %1
  t6: v2i64,ch = CopyFromReg t0, Register:v2i64 %2
        t2: v2i64,ch = CopyFromReg t0, Register:v2i64 %0
      t16: v2i64 = sign_extend_inreg t2, ValueType:ch:v2i1
      t8: v2i64 = xor t4, t6
      t9: v2i64 = and t4, t6
    t23: v2i64 = PPCISD::VSELECT t16, t8, t9
  t12: ch,glue = CopyToReg t0, Register:v2i64 $v2, t23
  t13: ch = PPCISD::RET_GLUE t12, Register:v2i64 $v2, t12:1
```
- Now, I have added the pattern matching for PPCISD::VSELECT` in `llvm/lib/Target/PowerPC/PPCInstrP10.td`.
- Sample test file used `xxeval-vselect-x-and-v2i64.ll `:
```
; Function to test ternary(A, xor(B, C), and(B, C)) for <2 x i64>
define <2 x i64> @ternary_A_xor_BC_and_BC_2x64(<2 x i1> %A, <2 x i64> %B, <2 x i64> %C) {
; CHECK-LABEL: ternary_A_xor_BC_and_BC_2x64:
; CHECK:       # %bb.0: # %entry
; CHECK-NEXT:    xxlxor v5, v5, v5
; CHECK-NEXT:    xxlxor vs0, v3, v4
; CHECK-NEXT:    xxland vs1, v3, v4
; CHECK-NEXT:    xxsplti32dx v5, 1, 63
; CHECK-NEXT:    vsld v2, v2, v5
; CHECK-NEXT:    vsrad v2, v2, v5
; CHECK-NEXT:    xxsel v2, vs1, vs0, v2
; CHECK-NEXT:    blr
entry:
  %xor = xor <2 x i64> %B, %C
  %and = and <2 x i64> %B, %C
  %res = select <2 x i1> %A, <2 x i64> %xor, <2 x i64> %and
  ret <2 x i64> %res
}

```
`doit.sh` script:
```
$LLVM_BUILD/bin/llc -mcpu=pwr10 -mtriple=powerpc64le-unknown-unknown -ppc-asm-full-reg-names --ppc-vsr-nums-as-vr -debug-only=isel -debug-only=legalize-types xxeval-vselect-x-and-v2i64.ll > isel_debug_x_and_2x64.ll 2<&1
$LLVM_BUILD/bin/llc -mcpu=pwr10 -mtriple=powerpc64le-unknown-unknown -ppc-asm-full-reg-names --ppc-vsr-nums-as-vr -debug-only=ppc-lowering xxeval-vselect-x-and-v2i64.ll > ppc-lowering_debug_x_and_2x64.ll 2<&1


```
- This is resulting in a crash during legalize vector types.

>From 3ebc44298670bdfb054a4c34fbbc7dab2e2e8ded Mon Sep 17 00:00:00 2001
From: Tony Varghese <tony.varghese at ibm.com>
Date: Thu, 29 May 2025 16:33:21 +0000
Subject: [PATCH 1/4] [PowerPC][XXEVAL] Exploit xxeval instruction for cases of
 the ternary(A,X, and(B,C)), ternary(A,X,B), ternary(A,X,C),
 ternary(A,X,xor(B,C)) forms.

---
 llvm/lib/Target/PowerPC/PPCInstrP10.td        | 200 +++++++++++++++---
 .../CodeGen/PowerPC/xxeval-vselect-x-and.ll   |  42 +---
 .../CodeGen/PowerPC/xxeval-vselect-x-b.ll     |  32 +--
 .../CodeGen/PowerPC/xxeval-vselect-x-c.ll     |  26 +--
 .../CodeGen/PowerPC/xxeval-vselect-x-or.ll    |  28 ++-
 .../CodeGen/PowerPC/xxeval-vselect-x-xor.ll   |  38 +---
 6 files changed, 222 insertions(+), 144 deletions(-)

diff --git a/llvm/lib/Target/PowerPC/PPCInstrP10.td b/llvm/lib/Target/PowerPC/PPCInstrP10.td
index d295f35fb1dd0..d0985f4a3a9bb 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrP10.td
+++ b/llvm/lib/Target/PowerPC/PPCInstrP10.td
@@ -2159,8 +2159,131 @@ let AddedComplexity = 400, Predicates = [IsISA3_1, HasVSX] in {
                                (COPY_TO_REGCLASS $VRB, VSRC), 2)))>;
 }
 
-class XXEvalPattern <dag pattern, bits<8> imm> :
-  Pat<(v4i32 pattern), (XXEVAL $vA, $vB, $vC, imm)> {}
+class XXEvalPattern <ValueType vt, dag pattern, bits<8> imm> :
+  Pat<(vt pattern), (XXEVAL $vA, $vB, $vC, imm)> {}
+
+class DagCondVNot<dag d, bit negate> {
+  // Utility to define a vnot around the dag.
+  dag res = !if(!ne(negate, 0),
+               (vnot d),
+               d);
+}
+
+class XXEvalUnaryPattern<ValueType vt> {
+  // vnot Operand B
+  dag vnotB = !cond(
+    !eq(vt, v4i32) : (vnot v4i32:$vB),
+    !eq(vt, v2i64) : (v2i64 (bitconvert (vnot (v4i32 (bitconvert v2i64:$vB)))))
+  );
+  // vnot Operand C
+  dag vnotC = !cond(
+    !eq(vt, v4i32) : (vnot v4i32:$vC),
+    !eq(vt, v2i64) : (v2i64 (bitconvert (vnot (v4i32 (bitconvert v2i64:$vC)))))
+  );
+}
+
+class XXEvalBinaryPattern<ValueType vt, SDPatternOperator op, bit notResult = 0> {
+  // Defines a wrapper class for binary patterns with optional NOT on result.
+  // Generate op pattern with optional NOT wrapping for result depending on "notResult".
+      dag opPat = !cond(
+                !eq(vt, v4i32) : DagCondVNot<(op v4i32:$vB, v4i32:$vC), notResult>.res,
+                !eq(vt, v2i64) : (v2i64 (bitconvert DagCondVNot<(op
+                                      (v4i32 (bitconvert v2i64:$vB)),
+                                      (v4i32 (bitconvert v2i64:$vC))), notResult>.res))
+                );
+}
+
+multiclass XXEvalVSelectWithXAnd<ValueType vt, bits<8> baseImm> {
+  // Multiclass for Ternary(A, X, and(B, C)) style patterns.
+  // Ternary(A, xor(B,C), and(B,C)) => imm: baseImm = 22
+  def : XXEvalPattern<vt, 
+        (vselect vt:$vA, XXEvalBinaryPattern<vt, xor>.opPat, XXEvalBinaryPattern<vt, and>.opPat), 
+        baseImm>;
+  // Ternary(A, nor(B,C), and(B,C)) => imm: baseImm + 2 = 24
+  def : XXEvalPattern<vt, 
+        (vselect vt:$vA, XXEvalBinaryPattern<vt, or, 1>.opPat, XXEvalBinaryPattern<vt, and>.opPat), 
+        !add(baseImm, 2)>;
+  // Ternary(A, eqv(B,C), and(B,C)) => imm: baseImm + 3 = 25
+  def : XXEvalPattern<vt, 
+        (vselect vt:$vA, XXEvalBinaryPattern<vt, xor, 1>.opPat, XXEvalBinaryPattern<vt, and>.opPat), 
+        !add(baseImm, 3)>;
+  // Ternary(A, not(C), and(B,C)) => imm: baseImm + 4 = 26
+  def : XXEvalPattern<vt,
+        (vselect vt:$vA, XXEvalUnaryPattern<vt>.vnotC, XXEvalBinaryPattern<vt, and>.opPat), 
+        !add(baseImm, 4)>;
+  // Ternary(A, not(B), and(B,C)) => imm: baseImm + 6 = 28
+  def : XXEvalPattern<vt,
+        (vselect vt:$vA, XXEvalUnaryPattern<vt>.vnotB, XXEvalBinaryPattern<vt, and>.opPat), 
+        !add(baseImm, 6)>;
+}
+
+multiclass XXEvalVSelectWithXB<ValueType vt, bits<8> baseImm>{
+  // Multiclass for Ternary(A, X, B) style patterns
+  // Ternary(A, and(B,C), B) => imm: baseImm = 49
+  def : XXEvalPattern<vt,
+        (vselect vt:$vA, XXEvalBinaryPattern<vt, and>.opPat, vt:$vB), 
+        baseImm>;
+  // Ternary(A, nor(B,C), B) => imm: baseImm + 7 = 56
+  def : XXEvalPattern<vt,
+        (vselect vt:$vA, XXEvalBinaryPattern<vt, or, 1>.opPat, vt:$vB), 
+        !add(baseImm, 7)>;
+  // Ternary(A, eqv(B,C), B) => imm: baseImm + 8 = 57
+  def : XXEvalPattern<vt,
+        (vselect vt:$vA, XXEvalBinaryPattern<vt, xor, 1>.opPat, vt:$vB), 
+        !add(baseImm, 8)>;
+  // Ternary(A, not(C), B) => imm: baseImm + 9 = 58
+  def : XXEvalPattern<vt,
+        (vselect vt:$vA, XXEvalUnaryPattern<vt>.vnotC, vt:$vB), 
+        !add(baseImm, 9)>;
+  // Ternary(A, nand(B,C), B) => imm: baseImm + 13 = 62
+  def : XXEvalPattern<vt,
+        (vselect vt:$vA, XXEvalBinaryPattern<vt, and, 1>.opPat, vt:$vB), 
+        !add(baseImm, 13)>;
+}
+
+multiclass XXEvalVSelectWithXC<ValueType vt, bits<8> baseImm>{
+  // Multiclass for Ternary(A, X, C) style patterns
+  // Ternary(A, and(B,C), C) => imm: baseImm = 81
+  def : XXEvalPattern<vt,
+        (vselect vt:$vA, XXEvalBinaryPattern<vt, and>.opPat, vt:$vC), 
+        baseImm>;
+  // Ternary(A, nor(B,C), C) => imm: baseImm + 7 = 88
+  def : XXEvalPattern<vt,
+        (vselect vt:$vA, XXEvalBinaryPattern<vt, or, 1>.opPat, vt:$vC), 
+        !add(baseImm, 7)>;
+  // Ternary(A, eqv(B,C), C) => imm: baseImm + 8 = 89
+  def : XXEvalPattern<vt,
+        (vselect vt:$vA, XXEvalBinaryPattern<vt, xor, 1>.opPat, vt:$vC), 
+        !add(baseImm, 8)>;
+  // Ternary(A, nand(B,C), C) => imm: baseImm + 13 = 94
+  def : XXEvalPattern<vt,
+        (vselect vt:$vA, XXEvalBinaryPattern<vt, and, 1>.opPat, vt:$vC), 
+        !add(baseImm, 13)>;
+}
+
+multiclass XXEvalVSelectWithXXor<ValueType vt, bits<8> baseImm>{
+  // Multiclass for Ternary(A, X, xor(B,C)) style patterns
+  // Ternary(A, and(B,C), xor(B,C)) => imm: baseImm = 97
+  def : XXEvalPattern<vt,
+        (vselect vt:$vA, XXEvalBinaryPattern<vt, and>.opPat, XXEvalBinaryPattern<vt, xor>.opPat), 
+        baseImm>;
+  // Ternary(A, B, xor(B,C)) => imm: baseImm + 2 = 99
+  def : XXEvalPattern<vt,
+        (vselect vt:$vA, vt:$vB, XXEvalBinaryPattern<vt, xor>.opPat), 
+        !add(baseImm, 2)>;
+  // Ternary(A, C, xor(B,C)) => imm: baseImm + 4 = 101
+  def : XXEvalPattern<vt,
+        (vselect vt:$vA, vt:$vC, XXEvalBinaryPattern<vt, xor>.opPat), 
+        !add(baseImm, 4)>;
+  // Ternary(A, or(B,C), xor(B,C)) => imm: baseImm + 6 = 103
+  def : XXEvalPattern<vt,
+        (vselect vt:$vA, XXEvalBinaryPattern<vt, or>.opPat, XXEvalBinaryPattern<vt, xor>.opPat), 
+        !add(baseImm, 6)>;
+  // Ternary(A, nor(B,C), xor(B,C)) => imm: baseImm + 7 = 104
+  def : XXEvalPattern<vt,
+        (vselect vt:$vA, XXEvalBinaryPattern<vt, or, 1>.opPat, XXEvalBinaryPattern<vt, xor>.opPat), 
+        !add(baseImm, 7)>; 
+}
 
 let Predicates = [PrefixInstrs, HasP10Vector] in {
   let AddedComplexity = 400 in {
@@ -2192,83 +2315,96 @@ let Predicates = [PrefixInstrs, HasP10Vector] in {
     // Anonymous patterns for XXEVAL
     // AND
     // and(A, B, C)
-    def : XXEvalPattern<(and v4i32:$vA, (and v4i32:$vB, v4i32:$vC)), 1>;
+    def : XXEvalPattern<v4i32, (and v4i32:$vA, (and v4i32:$vB, v4i32:$vC)), 1>;
     // and(A, xor(B, C))
-    def : XXEvalPattern<(and v4i32:$vA, (xor v4i32:$vB, v4i32:$vC)), 6>;
+    def : XXEvalPattern<v4i32, (and v4i32:$vA, (xor v4i32:$vB, v4i32:$vC)), 6>;
     // and(A, or(B, C))
-    def : XXEvalPattern<(and v4i32:$vA, (or v4i32:$vB, v4i32:$vC)), 7>;
+    def : XXEvalPattern<v4i32, (and v4i32:$vA, (or v4i32:$vB, v4i32:$vC)), 7>;
     // and(A, nor(B, C))
-    def : XXEvalPattern<(and v4i32:$vA, (vnot (or v4i32:$vB, v4i32:$vC))), 8>;
+    def : XXEvalPattern<v4i32, (and v4i32:$vA, (vnot (or v4i32:$vB, v4i32:$vC))), 8>;
     // and(A, eqv(B, C))
-    def : XXEvalPattern<(and v4i32:$vA, (vnot (xor v4i32:$vB, v4i32:$vC))), 9>;
+    def : XXEvalPattern<v4i32, (and v4i32:$vA, (vnot (xor v4i32:$vB, v4i32:$vC))), 9>;
     // and(A, nand(B, C))
-    def : XXEvalPattern<(and v4i32:$vA, (vnot (and v4i32:$vB, v4i32:$vC))), 14>;
+    def : XXEvalPattern<v4i32, (and v4i32:$vA, (vnot (and v4i32:$vB, v4i32:$vC))), 14>;
 
     // NAND
     // nand(A, B, C)
-    def : XXEvalPattern<(vnot (and v4i32:$vA, (and v4i32:$vB, v4i32:$vC))),
+    def : XXEvalPattern<v4i32, (vnot (and v4i32:$vA, (and v4i32:$vB, v4i32:$vC))),
                          !sub(255, 1)>;
     // nand(A, xor(B, C))
-    def : XXEvalPattern<(vnot (and v4i32:$vA, (xor v4i32:$vB, v4i32:$vC))),
+    def : XXEvalPattern<v4i32, (vnot (and v4i32:$vA, (xor v4i32:$vB, v4i32:$vC))),
                          !sub(255, 6)>;
     // nand(A, or(B, C))
-    def : XXEvalPattern<(vnot (and v4i32:$vA, (or v4i32:$vB, v4i32:$vC))),
+    def : XXEvalPattern<v4i32, (vnot (and v4i32:$vA, (or v4i32:$vB, v4i32:$vC))),
                          !sub(255, 7)>;
     // nand(A, nor(B, C))
-    def : XXEvalPattern<(or (vnot v4i32:$vA), (or v4i32:$vB, v4i32:$vC)),
+    def : XXEvalPattern<v4i32, (or (vnot v4i32:$vA), (or v4i32:$vB, v4i32:$vC)),
                          !sub(255, 8)>;
     // nand(A, eqv(B, C))
-    def : XXEvalPattern<(or (vnot v4i32:$vA), (xor v4i32:$vB, v4i32:$vC)),
+    def : XXEvalPattern<v4i32, (or (vnot v4i32:$vA), (xor v4i32:$vB, v4i32:$vC)),
                          !sub(255, 9)>;
     // nand(A, nand(B, C))
-    def : XXEvalPattern<(or (vnot v4i32:$vA), (and v4i32:$vB, v4i32:$vC)),
+    def : XXEvalPattern<v4i32, (or (vnot v4i32:$vA), (and v4i32:$vB, v4i32:$vC)),
                          !sub(255, 14)>;
 
     // EQV
     // (eqv A, B, C)
-    def : XXEvalPattern<(or (and v4i32:$vA, (and v4i32:$vB, v4i32:$vC)),
+    def : XXEvalPattern<v4i32, (or (and v4i32:$vA, (and v4i32:$vB, v4i32:$vC)),
                             (vnot (or v4i32:$vA, (or v4i32:$vB, v4i32:$vC)))),
                          150>;
     // (eqv A, (and B, C))
-    def : XXEvalPattern<(vnot (xor v4i32:$vA, (and v4i32:$vB, v4i32:$vC))), 225>;
+    def : XXEvalPattern<v4i32, (vnot (xor v4i32:$vA, (and v4i32:$vB, v4i32:$vC))), 225>;
     // (eqv A, (or B, C))
-    def : XXEvalPattern<(vnot (xor v4i32:$vA, (or v4i32:$vB, v4i32:$vC))), 135>;
+    def : XXEvalPattern<v4i32, (vnot (xor v4i32:$vA, (or v4i32:$vB, v4i32:$vC))), 135>;
 
     // NOR
     // (nor A, B, C)
-    def : XXEvalPattern<(vnot (or v4i32:$vA, (or v4i32:$vB, v4i32:$vC))), 128>;
+    def : XXEvalPattern<v4i32, (vnot (or v4i32:$vA, (or v4i32:$vB, v4i32:$vC))), 128>;
     // (nor A, (and B, C))
-    def : XXEvalPattern<(vnot (or v4i32:$vA, (and v4i32:$vB, v4i32:$vC))), 224>;
+    def : XXEvalPattern<v4i32, (vnot (or v4i32:$vA, (and v4i32:$vB, v4i32:$vC))), 224>;
     // (nor A, (eqv B, C))
-    def : XXEvalPattern<(and (vnot v4i32:$vA), (xor v4i32:$vB, v4i32:$vC)), 96>;
+    def : XXEvalPattern<v4i32, (and (vnot v4i32:$vA), (xor v4i32:$vB, v4i32:$vC)), 96>;
     // (nor A, (nand B, C))
-    def : XXEvalPattern<(and (vnot v4i32:$vA), (and v4i32:$vB, v4i32:$vC)), 16>;
+    def : XXEvalPattern<v4i32, (and (vnot v4i32:$vA), (and v4i32:$vB, v4i32:$vC)), 16>;
     // (nor A, (nor B, C))
-    def : XXEvalPattern<(and (vnot v4i32:$vA), (or v4i32:$vB, v4i32:$vC)), 112>;
+    def : XXEvalPattern<v4i32, (and (vnot v4i32:$vA), (or v4i32:$vB, v4i32:$vC)), 112>;
     // (nor A, (xor B, C))
-    def : XXEvalPattern<(vnot (or v4i32:$vA, (xor v4i32:$vB, v4i32:$vC))), 144>;
+    def : XXEvalPattern<v4i32, (vnot (or v4i32:$vA, (xor v4i32:$vB, v4i32:$vC))), 144>;
 
     // OR
     // (or A, B, C)
-    def : XXEvalPattern<(or v4i32:$vA, (or v4i32:$vB, v4i32:$vC)), 127>;
+    def : XXEvalPattern<v4i32, (or v4i32:$vA, (or v4i32:$vB, v4i32:$vC)), 127>;
     // (or A, (and B, C))
-    def : XXEvalPattern<(or v4i32:$vA, (and v4i32:$vB, v4i32:$vC)), 31>;
+    def : XXEvalPattern<v4i32, (or v4i32:$vA, (and v4i32:$vB, v4i32:$vC)), 31>;
     // (or A, (eqv B, C))
-    def : XXEvalPattern<(or v4i32:$vA, (vnot (xor v4i32:$vB, v4i32:$vC))), 159>;
+    def : XXEvalPattern<v4i32, (or v4i32:$vA, (vnot (xor v4i32:$vB, v4i32:$vC))), 159>;
     // (or A, (nand B, C))
-    def : XXEvalPattern<(or v4i32:$vA, (vnot (and v4i32:$vB, v4i32:$vC))), 239>;
+    def : XXEvalPattern<v4i32, (or v4i32:$vA, (vnot (and v4i32:$vB, v4i32:$vC))), 239>;
     // (or A, (nor B, C))
-    def : XXEvalPattern<(or v4i32:$vA, (vnot (or v4i32:$vB, v4i32:$vC))), 143>;
+    def : XXEvalPattern<v4i32, (or v4i32:$vA, (vnot (or v4i32:$vB, v4i32:$vC))), 143>;
     // (or A, (xor B, C))
-    def : XXEvalPattern<(or v4i32:$vA, (xor v4i32:$vB, v4i32:$vC)), 111>;
+    def : XXEvalPattern<v4i32, (or v4i32:$vA, (xor v4i32:$vB, v4i32:$vC)), 111>;
 
     // XOR
     // (xor A, B, C)
-    def : XXEvalPattern<(xor v4i32:$vA, (xor v4i32:$vB, v4i32:$vC)), 105>;
+    def : XXEvalPattern<v4i32, (xor v4i32:$vA, (xor v4i32:$vB, v4i32:$vC)), 105>;
     // (xor A, (and B, C))
-    def : XXEvalPattern<(xor v4i32:$vA, (and v4i32:$vB, v4i32:$vC)), 30>;
+    def : XXEvalPattern<v4i32, (xor v4i32:$vA, (and v4i32:$vB, v4i32:$vC)), 30>;
     // (xor A, (or B, C))
-    def : XXEvalPattern<(xor v4i32:$vA, (or v4i32:$vB, v4i32:$vC)), 120>;
+    def : XXEvalPattern<v4i32, (xor v4i32:$vA, (or v4i32:$vB, v4i32:$vC)), 120>;
+
+    // Utilize xxeval instruction for ternary vector expressions.
+    defm : XXEvalVSelectWithXAnd<v4i32, 22>;
+    defm : XXEvalVSelectWithXAnd<v2i64, 22>;
+
+    defm : XXEvalVSelectWithXB<v4i32, 49>;
+    defm : XXEvalVSelectWithXB<v2i64, 49>;
+
+    defm : XXEvalVSelectWithXC<v4i32, 81>;
+    defm : XXEvalVSelectWithXC<v2i64, 81>;
+
+    defm : XXEvalVSelectWithXXor<v4i32, 97>;
+    defm : XXEvalVSelectWithXXor<v2i64, 97>;
 
     // Anonymous patterns to select prefixed VSX loads and stores.
     // Load / Store f128
diff --git a/llvm/test/CodeGen/PowerPC/xxeval-vselect-x-and.ll b/llvm/test/CodeGen/PowerPC/xxeval-vselect-x-and.ll
index 2868669c52ce6..19305336f78df 100644
--- a/llvm/test/CodeGen/PowerPC/xxeval-vselect-x-and.ll
+++ b/llvm/test/CodeGen/PowerPC/xxeval-vselect-x-and.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
-; Test file to verify the emission of Vector selection instructions when ternary operators are used.
+; Test file to verify the emission of Vector Evaluate instructions when ternary operators are used.
 
 ; RUN: llc -verify-machineinstrs -mcpu=pwr10 -mtriple=powerpc64le-unknown-unknown \
 ; RUN:   -ppc-asm-full-reg-names --ppc-vsr-nums-as-vr < %s | FileCheck %s
@@ -15,11 +15,9 @@ define <4 x i32> @ternary_A_xor_BC_and_BC_4x32(<4 x i1> %A, <4 x i32> %B, <4 x i
 ; CHECK-LABEL: ternary_A_xor_BC_and_BC_4x32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    xxleqv v5, v5, v5
-; CHECK-NEXT:    xxlxor vs0, v3, v4
-; CHECK-NEXT:    xxland vs1, v3, v4
 ; CHECK-NEXT:    vslw v2, v2, v5
 ; CHECK-NEXT:    vsraw v2, v2, v5
-; CHECK-NEXT:    xxsel v2, vs1, vs0, v2
+; CHECK-NEXT:    xxeval v2, v2, v3, v4, 22
 ; CHECK-NEXT:    blr
 entry:
   %xor = xor <4 x i32> %B, %C
@@ -33,12 +31,10 @@ define <2 x i64> @ternary_A_xor_BC_and_BC_2x64(<2 x i1> %A, <2 x i64> %B, <2 x i
 ; CHECK-LABEL: ternary_A_xor_BC_and_BC_2x64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    xxlxor v5, v5, v5
-; CHECK-NEXT:    xxlxor vs0, v3, v4
-; CHECK-NEXT:    xxland vs1, v3, v4
 ; CHECK-NEXT:    xxsplti32dx v5, 1, 63
 ; CHECK-NEXT:    vsld v2, v2, v5
 ; CHECK-NEXT:    vsrad v2, v2, v5
-; CHECK-NEXT:    xxsel v2, vs1, vs0, v2
+; CHECK-NEXT:    xxeval v2, v2, v3, v4, 22
 ; CHECK-NEXT:    blr
 entry:
   %xor = xor <2 x i64> %B, %C
@@ -52,11 +48,9 @@ define <4 x i32> @ternary_A_nor_BC_and_BC_4x32(<4 x i1> %A, <4 x i32> %B, <4 x i
 ; CHECK-LABEL: ternary_A_nor_BC_and_BC_4x32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    xxleqv v5, v5, v5
-; CHECK-NEXT:    xxlnor vs0, v3, v4
-; CHECK-NEXT:    xxland vs1, v3, v4
 ; CHECK-NEXT:    vslw v2, v2, v5
 ; CHECK-NEXT:    vsraw v2, v2, v5
-; CHECK-NEXT:    xxsel v2, vs1, vs0, v2
+; CHECK-NEXT:    xxeval v2, v2, v3, v4, 24
 ; CHECK-NEXT:    blr
 entry:
   %or = or <4 x i32> %B, %C
@@ -71,12 +65,10 @@ define <2 x i64> @ternary_A_nor_BC_and_BC_2x64(<2 x i1> %A, <2 x i64> %B, <2 x i
 ; CHECK-LABEL: ternary_A_nor_BC_and_BC_2x64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    xxlxor v5, v5, v5
-; CHECK-NEXT:    xxlnor vs0, v3, v4
-; CHECK-NEXT:    xxland vs1, v3, v4
 ; CHECK-NEXT:    xxsplti32dx v5, 1, 63
 ; CHECK-NEXT:    vsld v2, v2, v5
 ; CHECK-NEXT:    vsrad v2, v2, v5
-; CHECK-NEXT:    xxsel v2, vs1, vs0, v2
+; CHECK-NEXT:    xxeval v2, v2, v3, v4, 24
 ; CHECK-NEXT:    blr
 entry:
   %or = or <2 x i64> %B, %C
@@ -91,11 +83,9 @@ define <4 x i32> @ternary_A_eqv_BC_and_BC_4x32(<4 x i1> %A, <4 x i32> %B, <4 x i
 ; CHECK-LABEL: ternary_A_eqv_BC_and_BC_4x32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    xxleqv v5, v5, v5
-; CHECK-NEXT:    xxleqv vs0, v3, v4
-; CHECK-NEXT:    xxland vs1, v3, v4
 ; CHECK-NEXT:    vslw v2, v2, v5
 ; CHECK-NEXT:    vsraw v2, v2, v5
-; CHECK-NEXT:    xxsel v2, vs1, vs0, v2
+; CHECK-NEXT:    xxeval v2, v2, v3, v4, 25
 ; CHECK-NEXT:    blr
 entry:
   %xor = xor <4 x i32> %B, %C
@@ -110,12 +100,10 @@ define <2 x i64> @ternary_A_eqv_BC_and_BC_2x64(<2 x i1> %A, <2 x i64> %B, <2 x i
 ; CHECK-LABEL: ternary_A_eqv_BC_and_BC_2x64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    xxlxor v5, v5, v5
-; CHECK-NEXT:    xxleqv vs0, v3, v4
-; CHECK-NEXT:    xxland vs1, v3, v4
 ; CHECK-NEXT:    xxsplti32dx v5, 1, 63
 ; CHECK-NEXT:    vsld v2, v2, v5
 ; CHECK-NEXT:    vsrad v2, v2, v5
-; CHECK-NEXT:    xxsel v2, vs1, vs0, v2
+; CHECK-NEXT:    xxeval v2, v2, v3, v4, 25
 ; CHECK-NEXT:    blr
 entry:
   %xor = xor <2 x i64> %B, %C
@@ -130,11 +118,9 @@ define <4 x i32> @ternary_A_not_C_and_BC_4x32(<4 x i1> %A, <4 x i32> %B, <4 x i3
 ; CHECK-LABEL: ternary_A_not_C_and_BC_4x32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    xxleqv v5, v5, v5
-; CHECK-NEXT:    xxlnor vs0, v4, v4
-; CHECK-NEXT:    xxland vs1, v3, v4
 ; CHECK-NEXT:    vslw v2, v2, v5
 ; CHECK-NEXT:    vsraw v2, v2, v5
-; CHECK-NEXT:    xxsel v2, vs1, vs0, v2
+; CHECK-NEXT:    xxeval v2, v2, v3, v4, 26
 ; CHECK-NEXT:    blr
 entry:
   %not = xor <4 x i32> %C, <i32 -1, i32 -1, i32 -1, i32 -1>  ; Vector not operation
@@ -148,12 +134,10 @@ define <2 x i64> @ternary_A_not_C_and_BC_2x64(<2 x i1> %A, <2 x i64> %B, <2 x i6
 ; CHECK-LABEL: ternary_A_not_C_and_BC_2x64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    xxlxor v5, v5, v5
-; CHECK-NEXT:    xxlnor vs0, v4, v4
-; CHECK-NEXT:    xxland vs1, v3, v4
 ; CHECK-NEXT:    xxsplti32dx v5, 1, 63
 ; CHECK-NEXT:    vsld v2, v2, v5
 ; CHECK-NEXT:    vsrad v2, v2, v5
-; CHECK-NEXT:    xxsel v2, vs1, vs0, v2
+; CHECK-NEXT:    xxeval v2, v2, v3, v4, 26
 ; CHECK-NEXT:    blr
 entry:
   %not = xor <2 x i64> %C, <i64 -1, i64 -1>  ; Vector not operation
@@ -167,11 +151,9 @@ define <4 x i32> @ternary_A_not_B_and_BC_4x32(<4 x i1> %A, <4 x i32> %B, <4 x i3
 ; CHECK-LABEL: ternary_A_not_B_and_BC_4x32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    xxleqv v5, v5, v5
-; CHECK-NEXT:    xxlnor vs0, v3, v3
-; CHECK-NEXT:    xxland vs1, v3, v4
 ; CHECK-NEXT:    vslw v2, v2, v5
 ; CHECK-NEXT:    vsraw v2, v2, v5
-; CHECK-NEXT:    xxsel v2, vs1, vs0, v2
+; CHECK-NEXT:    xxeval v2, v2, v3, v4, 28
 ; CHECK-NEXT:    blr
 entry:
   %not = xor <4 x i32> %B, <i32 -1, i32 -1, i32 -1, i32 -1>  ; Vector not operation
@@ -185,12 +167,10 @@ define <2 x i64> @ternary_A_not_B_and_BC_2x64(<2 x i1> %A, <2 x i64> %B, <2 x i6
 ; CHECK-LABEL: ternary_A_not_B_and_BC_2x64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    xxlxor v5, v5, v5
-; CHECK-NEXT:    xxlnor vs0, v3, v3
-; CHECK-NEXT:    xxland vs1, v3, v4
 ; CHECK-NEXT:    xxsplti32dx v5, 1, 63
 ; CHECK-NEXT:    vsld v2, v2, v5
 ; CHECK-NEXT:    vsrad v2, v2, v5
-; CHECK-NEXT:    xxsel v2, vs1, vs0, v2
+; CHECK-NEXT:    xxeval v2, v2, v3, v4, 28
 ; CHECK-NEXT:    blr
 entry:
   %not = xor <2 x i64> %B, <i64 -1, i64 -1>  ; Vector not operation
diff --git a/llvm/test/CodeGen/PowerPC/xxeval-vselect-x-b.ll b/llvm/test/CodeGen/PowerPC/xxeval-vselect-x-b.ll
index 37a0edb14b78f..c36fd68ba0ece 100644
--- a/llvm/test/CodeGen/PowerPC/xxeval-vselect-x-b.ll
+++ b/llvm/test/CodeGen/PowerPC/xxeval-vselect-x-b.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
-; Test file to verify the emission of Vector selection instructions when ternary operators are used.
+; Test file to verify the emission of Vector Evaluate instructions when ternary operators are used.
 
 ; RUN: llc -verify-machineinstrs -mcpu=pwr10 -mtriple=powerpc64le-unknown-unknown \
 ; RUN:   -ppc-asm-full-reg-names --ppc-vsr-nums-as-vr < %s | FileCheck %s
@@ -15,10 +15,9 @@ define <4 x i32> @ternary_A_and_BC_B_4x32(<4 x i1> %A, <4 x i32> %B, <4 x i32> %
 ; CHECK-LABEL: ternary_A_and_BC_B_4x32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    xxleqv v5, v5, v5
-; CHECK-NEXT:    xxland vs0, v3, v4
 ; CHECK-NEXT:    vslw v2, v2, v5
 ; CHECK-NEXT:    vsraw v2, v2, v5
-; CHECK-NEXT:    xxsel v2, v3, vs0, v2
+; CHECK-NEXT:    xxeval v2, v2, v3, v4, 49
 ; CHECK-NEXT:    blr
 entry:
   %and = and <4 x i32> %B, %C
@@ -31,11 +30,10 @@ define <2 x i64> @ternary_A_and_BC_B_2x64(<2 x i1> %A, <2 x i64> %B, <2 x i64> %
 ; CHECK-LABEL: ternary_A_and_BC_B_2x64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    xxlxor v5, v5, v5
-; CHECK-NEXT:    xxland vs0, v3, v4
 ; CHECK-NEXT:    xxsplti32dx v5, 1, 63
 ; CHECK-NEXT:    vsld v2, v2, v5
 ; CHECK-NEXT:    vsrad v2, v2, v5
-; CHECK-NEXT:    xxsel v2, v3, vs0, v2
+; CHECK-NEXT:    xxeval v2, v2, v3, v4, 49
 ; CHECK-NEXT:    blr
 entry:
   %and = and <2 x i64> %B, %C
@@ -48,10 +46,9 @@ define <4 x i32> @ternary_A_nor_BC_B_4x32(<4 x i1> %A, <4 x i32> %B, <4 x i32> %
 ; CHECK-LABEL: ternary_A_nor_BC_B_4x32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    xxleqv v5, v5, v5
-; CHECK-NEXT:    xxlnor vs0, v3, v4
 ; CHECK-NEXT:    vslw v2, v2, v5
 ; CHECK-NEXT:    vsraw v2, v2, v5
-; CHECK-NEXT:    xxsel v2, v3, vs0, v2
+; CHECK-NEXT:    xxeval v2, v2, v3, v4, 56
 ; CHECK-NEXT:    blr
 entry:
   %or = or <4 x i32> %B, %C
@@ -65,11 +62,10 @@ define <2 x i64> @ternary_A_nor_BC_B_2x64(<2 x i1> %A, <2 x i64> %B, <2 x i64> %
 ; CHECK-LABEL: ternary_A_nor_BC_B_2x64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    xxlxor v5, v5, v5
-; CHECK-NEXT:    xxlnor vs0, v3, v4
 ; CHECK-NEXT:    xxsplti32dx v5, 1, 63
 ; CHECK-NEXT:    vsld v2, v2, v5
 ; CHECK-NEXT:    vsrad v2, v2, v5
-; CHECK-NEXT:    xxsel v2, v3, vs0, v2
+; CHECK-NEXT:    xxeval v2, v2, v3, v4, 56
 ; CHECK-NEXT:    blr
 entry:
   %or = or <2 x i64> %B, %C
@@ -83,10 +79,9 @@ define <4 x i32> @ternary_A_eqv_BC_B_4x32(<4 x i1> %A, <4 x i32> %B, <4 x i32> %
 ; CHECK-LABEL: ternary_A_eqv_BC_B_4x32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    xxleqv v5, v5, v5
-; CHECK-NEXT:    xxleqv vs0, v3, v4
 ; CHECK-NEXT:    vslw v2, v2, v5
 ; CHECK-NEXT:    vsraw v2, v2, v5
-; CHECK-NEXT:    xxsel v2, v3, vs0, v2
+; CHECK-NEXT:    xxeval v2, v2, v3, v4, 57
 ; CHECK-NEXT:    blr
 entry:
   %xor = xor <4 x i32> %B, %C
@@ -100,11 +95,10 @@ define <2 x i64> @ternary_A_eqv_BC_B_2x64(<2 x i1> %A, <2 x i64> %B, <2 x i64> %
 ; CHECK-LABEL: ternary_A_eqv_BC_B_2x64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    xxlxor v5, v5, v5
-; CHECK-NEXT:    xxleqv vs0, v3, v4
 ; CHECK-NEXT:    xxsplti32dx v5, 1, 63
 ; CHECK-NEXT:    vsld v2, v2, v5
 ; CHECK-NEXT:    vsrad v2, v2, v5
-; CHECK-NEXT:    xxsel v2, v3, vs0, v2
+; CHECK-NEXT:    xxeval v2, v2, v3, v4, 57
 ; CHECK-NEXT:    blr
 entry:
   %xor = xor <2 x i64> %B, %C
@@ -118,10 +112,9 @@ define <4 x i32> @ternary_A_not_C_B_4x32(<4 x i1> %A, <4 x i32> %B, <4 x i32> %C
 ; CHECK-LABEL: ternary_A_not_C_B_4x32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    xxleqv v5, v5, v5
-; CHECK-NEXT:    xxlnor vs0, v4, v4
 ; CHECK-NEXT:    vslw v2, v2, v5
 ; CHECK-NEXT:    vsraw v2, v2, v5
-; CHECK-NEXT:    xxsel v2, v3, vs0, v2
+; CHECK-NEXT:    xxeval v2, v2, v3, v4, 58
 ; CHECK-NEXT:    blr
 entry:
   %not = xor <4 x i32> %C, <i32 -1, i32 -1, i32 -1, i32 -1>  ; Vector not operation
@@ -134,11 +127,10 @@ define <2 x i64> @ternary_A_not_C_B_2x64(<2 x i1> %A, <2 x i64> %B, <2 x i64> %C
 ; CHECK-LABEL: ternary_A_not_C_B_2x64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    xxlxor v5, v5, v5
-; CHECK-NEXT:    xxlnor vs0, v4, v4
 ; CHECK-NEXT:    xxsplti32dx v5, 1, 63
 ; CHECK-NEXT:    vsld v2, v2, v5
 ; CHECK-NEXT:    vsrad v2, v2, v5
-; CHECK-NEXT:    xxsel v2, v3, vs0, v2
+; CHECK-NEXT:    xxeval v2, v2, v3, v4, 58
 ; CHECK-NEXT:    blr
 entry:
   %not = xor <2 x i64> %C, <i64 -1, i64 -1>  ; Vector not operation
@@ -151,10 +143,9 @@ define <4 x i32> @ternary_A_nand_BC_B_4x32(<4 x i1> %A, <4 x i32> %B, <4 x i32>
 ; CHECK-LABEL: ternary_A_nand_BC_B_4x32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    xxleqv v5, v5, v5
-; CHECK-NEXT:    xxlnand vs0, v3, v4
 ; CHECK-NEXT:    vslw v2, v2, v5
 ; CHECK-NEXT:    vsraw v2, v2, v5
-; CHECK-NEXT:    xxsel v2, v3, vs0, v2
+; CHECK-NEXT:    xxeval v2, v2, v3, v4, 62
 ; CHECK-NEXT:    blr
 entry:
   %and = and <4 x i32> %B, %C
@@ -168,11 +159,10 @@ define <2 x i64> @ternary_A_nand_BC_B_2x64(<2 x i1> %A, <2 x i64> %B, <2 x i64>
 ; CHECK-LABEL: ternary_A_nand_BC_B_2x64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    xxlxor v5, v5, v5
-; CHECK-NEXT:    xxlnand vs0, v3, v4
 ; CHECK-NEXT:    xxsplti32dx v5, 1, 63
 ; CHECK-NEXT:    vsld v2, v2, v5
 ; CHECK-NEXT:    vsrad v2, v2, v5
-; CHECK-NEXT:    xxsel v2, v3, vs0, v2
+; CHECK-NEXT:    xxeval v2, v2, v3, v4, 62
 ; CHECK-NEXT:    blr
 entry:
   %and = and <2 x i64> %B, %C
diff --git a/llvm/test/CodeGen/PowerPC/xxeval-vselect-x-c.ll b/llvm/test/CodeGen/PowerPC/xxeval-vselect-x-c.ll
index 411aa27a61861..54fda6063bfac 100644
--- a/llvm/test/CodeGen/PowerPC/xxeval-vselect-x-c.ll
+++ b/llvm/test/CodeGen/PowerPC/xxeval-vselect-x-c.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
-; Test file to verify the emission of Vector selection instructions when ternary operators are used.
+; Test file to verify the emission of Vector Evaluate instructions when ternary operators are used.
 
 ; RUN: llc -verify-machineinstrs -mcpu=pwr10 -mtriple=powerpc64le-unknown-unknown \
 ; RUN:   -ppc-asm-full-reg-names --ppc-vsr-nums-as-vr < %s | FileCheck %s
@@ -15,10 +15,9 @@ define <4 x i32> @ternary_A_and_BC_C_4x32(<4 x i1> %A, <4 x i32> %B, <4 x i32> %
 ; CHECK-LABEL: ternary_A_and_BC_C_4x32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    xxleqv v5, v5, v5
-; CHECK-NEXT:    xxland vs0, v3, v4
 ; CHECK-NEXT:    vslw v2, v2, v5
 ; CHECK-NEXT:    vsraw v2, v2, v5
-; CHECK-NEXT:    xxsel v2, v4, vs0, v2
+; CHECK-NEXT:    xxeval v2, v2, v3, v4, 81
 ; CHECK-NEXT:    blr
 entry:
   %and = and <4 x i32> %B, %C
@@ -31,11 +30,10 @@ define <2 x i64> @ternary_A_and_BC_C_2x64(<2 x i1> %A, <2 x i64> %B, <2 x i64> %
 ; CHECK-LABEL: ternary_A_and_BC_C_2x64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    xxlxor v5, v5, v5
-; CHECK-NEXT:    xxland vs0, v3, v4
 ; CHECK-NEXT:    xxsplti32dx v5, 1, 63
 ; CHECK-NEXT:    vsld v2, v2, v5
 ; CHECK-NEXT:    vsrad v2, v2, v5
-; CHECK-NEXT:    xxsel v2, v4, vs0, v2
+; CHECK-NEXT:    xxeval v2, v2, v3, v4, 81
 ; CHECK-NEXT:    blr
 entry:
   %and = and <2 x i64> %B, %C
@@ -48,10 +46,9 @@ define <4 x i32> @ternary_A_nor_BC_C_4x32(<4 x i1> %A, <4 x i32> %B, <4 x i32> %
 ; CHECK-LABEL: ternary_A_nor_BC_C_4x32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    xxleqv v5, v5, v5
-; CHECK-NEXT:    xxlnor vs0, v3, v4
 ; CHECK-NEXT:    vslw v2, v2, v5
 ; CHECK-NEXT:    vsraw v2, v2, v5
-; CHECK-NEXT:    xxsel v2, v4, vs0, v2
+; CHECK-NEXT:    xxeval v2, v2, v3, v4, 88
 ; CHECK-NEXT:    blr
 entry:
   %or = or <4 x i32> %B, %C
@@ -65,11 +62,10 @@ define <2 x i64> @ternary_A_nor_BC_C_2x64(<2 x i1> %A, <2 x i64> %B, <2 x i64> %
 ; CHECK-LABEL: ternary_A_nor_BC_C_2x64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    xxlxor v5, v5, v5
-; CHECK-NEXT:    xxlnor vs0, v3, v4
 ; CHECK-NEXT:    xxsplti32dx v5, 1, 63
 ; CHECK-NEXT:    vsld v2, v2, v5
 ; CHECK-NEXT:    vsrad v2, v2, v5
-; CHECK-NEXT:    xxsel v2, v4, vs0, v2
+; CHECK-NEXT:    xxeval v2, v2, v3, v4, 88
 ; CHECK-NEXT:    blr
 entry:
   %or = or <2 x i64> %B, %C
@@ -83,10 +79,9 @@ define <4 x i32> @ternary_A_eqv_BC_C_4x32(<4 x i1> %A, <4 x i32> %B, <4 x i32> %
 ; CHECK-LABEL: ternary_A_eqv_BC_C_4x32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    xxleqv v5, v5, v5
-; CHECK-NEXT:    xxleqv vs0, v3, v4
 ; CHECK-NEXT:    vslw v2, v2, v5
 ; CHECK-NEXT:    vsraw v2, v2, v5
-; CHECK-NEXT:    xxsel v2, v4, vs0, v2
+; CHECK-NEXT:    xxeval v2, v2, v3, v4, 89
 ; CHECK-NEXT:    blr
 entry:
   %xor = xor <4 x i32> %B, %C
@@ -100,11 +95,10 @@ define <2 x i64> @ternary_A_eqv_BC_C_2x64(<2 x i1> %A, <2 x i64> %B, <2 x i64> %
 ; CHECK-LABEL: ternary_A_eqv_BC_C_2x64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    xxlxor v5, v5, v5
-; CHECK-NEXT:    xxleqv vs0, v3, v4
 ; CHECK-NEXT:    xxsplti32dx v5, 1, 63
 ; CHECK-NEXT:    vsld v2, v2, v5
 ; CHECK-NEXT:    vsrad v2, v2, v5
-; CHECK-NEXT:    xxsel v2, v4, vs0, v2
+; CHECK-NEXT:    xxeval v2, v2, v3, v4, 89
 ; CHECK-NEXT:    blr
 entry:
   %xor = xor <2 x i64> %B, %C
@@ -118,10 +112,9 @@ define <4 x i32> @ternary_A_nand_BC_C_4x32(<4 x i1> %A, <4 x i32> %B, <4 x i32>
 ; CHECK-LABEL: ternary_A_nand_BC_C_4x32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    xxleqv v5, v5, v5
-; CHECK-NEXT:    xxlnand vs0, v3, v4
 ; CHECK-NEXT:    vslw v2, v2, v5
 ; CHECK-NEXT:    vsraw v2, v2, v5
-; CHECK-NEXT:    xxsel v2, v4, vs0, v2
+; CHECK-NEXT:    xxeval v2, v2, v3, v4, 94
 ; CHECK-NEXT:    blr
 entry:
   %and = and <4 x i32> %B, %C
@@ -135,11 +128,10 @@ define <2 x i64> @ternary_A_nand_BC_C_2x64(<2 x i1> %A, <2 x i64> %B, <2 x i64>
 ; CHECK-LABEL: ternary_A_nand_BC_C_2x64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    xxlxor v5, v5, v5
-; CHECK-NEXT:    xxlnand vs0, v3, v4
 ; CHECK-NEXT:    xxsplti32dx v5, 1, 63
 ; CHECK-NEXT:    vsld v2, v2, v5
 ; CHECK-NEXT:    vsrad v2, v2, v5
-; CHECK-NEXT:    xxsel v2, v4, vs0, v2
+; CHECK-NEXT:    xxeval v2, v2, v3, v4, 94
 ; CHECK-NEXT:    blr
 entry:
   %and = and <2 x i64> %B, %C
diff --git a/llvm/test/CodeGen/PowerPC/xxeval-vselect-x-or.ll b/llvm/test/CodeGen/PowerPC/xxeval-vselect-x-or.ll
index 1ad7e95e3682e..c956785a757ca 100644
--- a/llvm/test/CodeGen/PowerPC/xxeval-vselect-x-or.ll
+++ b/llvm/test/CodeGen/PowerPC/xxeval-vselect-x-or.ll
@@ -120,11 +120,11 @@ define <4 x i32> @ternary_A_eqv_BC_or_BC_4x32(<4 x i1> %A, <4 x i32> %B, <4 x i3
 ; CHECK-LABEL: ternary_A_eqv_BC_or_BC_4x32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    xxleqv v5, v5, v5
-; CHECK-NEXT:    xxleqv vs0, v3, v4
+; CHECK-NEXT:    xxlxor vs0, v3, v4
 ; CHECK-NEXT:    xxlor vs1, v3, v4
 ; CHECK-NEXT:    vslw v2, v2, v5
 ; CHECK-NEXT:    vsraw v2, v2, v5
-; CHECK-NEXT:    xxsel v2, vs1, vs0, v2
+; CHECK-NEXT:    xxeval v2, v2, vs1, vs0, 58
 ; CHECK-NEXT:    blr
 entry:
   %xor = xor <4 x i32> %B, %C
@@ -159,11 +159,10 @@ define <4 x i32> @ternary_A_not_C_or_BC_4x32(<4 x i1> %A, <4 x i32> %B, <4 x i32
 ; CHECK-LABEL: ternary_A_not_C_or_BC_4x32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    xxleqv v5, v5, v5
-; CHECK-NEXT:    xxlnor vs0, v4, v4
-; CHECK-NEXT:    xxlor vs1, v3, v4
+; CHECK-NEXT:    xxlor vs0, v3, v4
 ; CHECK-NEXT:    vslw v2, v2, v5
 ; CHECK-NEXT:    vsraw v2, v2, v5
-; CHECK-NEXT:    xxsel v2, vs1, vs0, v2
+; CHECK-NEXT:    xxeval v2, v2, vs0, v4, 58
 ; CHECK-NEXT:    blr
 entry:
   %not = xor <4 x i32> %C, <i32 -1, i32 -1, i32 -1, i32 -1>  ; Vector not operation
@@ -177,12 +176,11 @@ define <2 x i64> @ternary_A_not_C_or_BC_2x64(<2 x i1> %A, <2 x i64> %B, <2 x i64
 ; CHECK-LABEL: ternary_A_not_C_or_BC_2x64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    xxlxor v5, v5, v5
-; CHECK-NEXT:    xxlnor vs0, v4, v4
-; CHECK-NEXT:    xxlor vs1, v3, v4
+; CHECK-NEXT:    xxlor vs0, v3, v4
 ; CHECK-NEXT:    xxsplti32dx v5, 1, 63
 ; CHECK-NEXT:    vsld v2, v2, v5
 ; CHECK-NEXT:    vsrad v2, v2, v5
-; CHECK-NEXT:    xxsel v2, vs1, vs0, v2
+; CHECK-NEXT:    xxeval v2, v2, vs0, v4, 58
 ; CHECK-NEXT:    blr
 entry:
   %not = xor <2 x i64> %C, <i64 -1, i64 -1>  ; Vector not operation
@@ -196,11 +194,10 @@ define <4 x i32> @ternary_A_not_B_or_BC_4x32(<4 x i1> %A, <4 x i32> %B, <4 x i32
 ; CHECK-LABEL: ternary_A_not_B_or_BC_4x32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    xxleqv v5, v5, v5
-; CHECK-NEXT:    xxlnor vs0, v3, v3
-; CHECK-NEXT:    xxlor vs1, v3, v4
+; CHECK-NEXT:    xxlor vs0, v3, v4
 ; CHECK-NEXT:    vslw v2, v2, v5
 ; CHECK-NEXT:    vsraw v2, v2, v5
-; CHECK-NEXT:    xxsel v2, vs1, vs0, v2
+; CHECK-NEXT:    xxeval v2, v2, vs0, v3, 58
 ; CHECK-NEXT:    blr
 entry:
   %not = xor <4 x i32> %B, <i32 -1, i32 -1, i32 -1, i32 -1>  ; Vector not operation
@@ -214,12 +211,11 @@ define <2 x i64> @ternary_A_not_B_or_BC_2x64(<2 x i1> %A, <2 x i64> %B, <2 x i64
 ; CHECK-LABEL: ternary_A_not_B_or_BC_2x64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    xxlxor v5, v5, v5
-; CHECK-NEXT:    xxlnor vs0, v3, v3
-; CHECK-NEXT:    xxlor vs1, v3, v4
+; CHECK-NEXT:    xxlor vs0, v3, v4
 ; CHECK-NEXT:    xxsplti32dx v5, 1, 63
 ; CHECK-NEXT:    vsld v2, v2, v5
 ; CHECK-NEXT:    vsrad v2, v2, v5
-; CHECK-NEXT:    xxsel v2, vs1, vs0, v2
+; CHECK-NEXT:    xxeval v2, v2, vs0, v3, 58
 ; CHECK-NEXT:    blr
 entry:
   %not = xor <2 x i64> %B, <i64 -1, i64 -1>  ; Vector not operation
@@ -233,11 +229,11 @@ define <4 x i32> @ternary_A_nand_BC_or_BC_4x32(<4 x i1> %A, <4 x i32> %B, <4 x i
 ; CHECK-LABEL: ternary_A_nand_BC_or_BC_4x32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    xxleqv v5, v5, v5
-; CHECK-NEXT:    xxlnand vs0, v3, v4
+; CHECK-NEXT:    xxland vs0, v3, v4
 ; CHECK-NEXT:    xxlor vs1, v3, v4
 ; CHECK-NEXT:    vslw v2, v2, v5
 ; CHECK-NEXT:    vsraw v2, v2, v5
-; CHECK-NEXT:    xxsel v2, vs1, vs0, v2
+; CHECK-NEXT:    xxeval v2, v2, vs1, vs0, 58
 ; CHECK-NEXT:    blr
 entry:
   %and = and <4 x i32> %B, %C
diff --git a/llvm/test/CodeGen/PowerPC/xxeval-vselect-x-xor.ll b/llvm/test/CodeGen/PowerPC/xxeval-vselect-x-xor.ll
index f8a8b6e9a0486..74d3a3bf6196e 100644
--- a/llvm/test/CodeGen/PowerPC/xxeval-vselect-x-xor.ll
+++ b/llvm/test/CodeGen/PowerPC/xxeval-vselect-x-xor.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
-; Test file to verify the emission of Vector selection instructions when ternary operators are used.
+; Test file to verify the emission of Vector Evaluate instructions when ternary operators are used.
 
 ; RUN: llc -verify-machineinstrs -mcpu=pwr10 -mtriple=powerpc64le-unknown-unknown \
 ; RUN:   -ppc-asm-full-reg-names --ppc-vsr-nums-as-vr < %s | FileCheck %s
@@ -15,11 +15,9 @@ define <4 x i32> @ternary_A_and_BC_xor_BC_4x32(<4 x i1> %A, <4 x i32> %B, <4 x i
 ; CHECK-LABEL: ternary_A_and_BC_xor_BC_4x32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    xxleqv v5, v5, v5
-; CHECK-NEXT:    xxland vs0, v3, v4
-; CHECK-NEXT:    xxlxor vs1, v3, v4
 ; CHECK-NEXT:    vslw v2, v2, v5
 ; CHECK-NEXT:    vsraw v2, v2, v5
-; CHECK-NEXT:    xxsel v2, vs1, vs0, v2
+; CHECK-NEXT:    xxeval v2, v2, v3, v4, 97
 ; CHECK-NEXT:    blr
 entry:
   %and = and <4 x i32> %B, %C
@@ -33,12 +31,10 @@ define <2 x i64> @ternary_A_and_BC_xor_BC_2x64(<2 x i1> %A, <2 x i64> %B, <2 x i
 ; CHECK-LABEL: ternary_A_and_BC_xor_BC_2x64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    xxlxor v5, v5, v5
-; CHECK-NEXT:    xxland vs0, v3, v4
-; CHECK-NEXT:    xxlxor vs1, v3, v4
 ; CHECK-NEXT:    xxsplti32dx v5, 1, 63
 ; CHECK-NEXT:    vsld v2, v2, v5
 ; CHECK-NEXT:    vsrad v2, v2, v5
-; CHECK-NEXT:    xxsel v2, vs1, vs0, v2
+; CHECK-NEXT:    xxeval v2, v2, v3, v4, 97
 ; CHECK-NEXT:    blr
 entry:
   %and = and <2 x i64> %B, %C
@@ -52,10 +48,9 @@ define <4 x i32> @ternary_A_B_xor_BC_4x32(<4 x i1> %A, <4 x i32> %B, <4 x i32> %
 ; CHECK-LABEL: ternary_A_B_xor_BC_4x32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    xxleqv v5, v5, v5
-; CHECK-NEXT:    xxlxor vs0, v3, v4
 ; CHECK-NEXT:    vslw v2, v2, v5
 ; CHECK-NEXT:    vsraw v2, v2, v5
-; CHECK-NEXT:    xxsel v2, vs0, v3, v2
+; CHECK-NEXT:    xxeval v2, v2, v3, v4, 99
 ; CHECK-NEXT:    blr
 entry:
   %xor = xor <4 x i32> %B, %C
@@ -68,11 +63,10 @@ define <2 x i64> @ternary_A_B_xor_BC_2x64(<2 x i1> %A, <2 x i64> %B, <2 x i64> %
 ; CHECK-LABEL: ternary_A_B_xor_BC_2x64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    xxlxor v5, v5, v5
-; CHECK-NEXT:    xxlxor vs0, v3, v4
 ; CHECK-NEXT:    xxsplti32dx v5, 1, 63
 ; CHECK-NEXT:    vsld v2, v2, v5
 ; CHECK-NEXT:    vsrad v2, v2, v5
-; CHECK-NEXT:    xxsel v2, vs0, v3, v2
+; CHECK-NEXT:    xxeval v2, v2, v3, v4, 99
 ; CHECK-NEXT:    blr
 entry:
   %xor = xor <2 x i64> %B, %C
@@ -85,10 +79,9 @@ define <4 x i32> @ternary_A_C_xor_BC_4x32(<4 x i1> %A, <4 x i32> %B, <4 x i32> %
 ; CHECK-LABEL: ternary_A_C_xor_BC_4x32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    xxleqv v5, v5, v5
-; CHECK-NEXT:    xxlxor vs0, v3, v4
 ; CHECK-NEXT:    vslw v2, v2, v5
 ; CHECK-NEXT:    vsraw v2, v2, v5
-; CHECK-NEXT:    xxsel v2, vs0, v4, v2
+; CHECK-NEXT:    xxeval v2, v2, v3, v4, 101
 ; CHECK-NEXT:    blr
 entry:
   %xor = xor <4 x i32> %B, %C
@@ -101,11 +94,10 @@ define <2 x i64> @ternary_A_C_xor_BC_2x64(<2 x i1> %A, <2 x i64> %B, <2 x i64> %
 ; CHECK-LABEL: ternary_A_C_xor_BC_2x64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    xxlxor v5, v5, v5
-; CHECK-NEXT:    xxlxor vs0, v3, v4
 ; CHECK-NEXT:    xxsplti32dx v5, 1, 63
 ; CHECK-NEXT:    vsld v2, v2, v5
 ; CHECK-NEXT:    vsrad v2, v2, v5
-; CHECK-NEXT:    xxsel v2, vs0, v4, v2
+; CHECK-NEXT:    xxeval v2, v2, v3, v4, 101
 ; CHECK-NEXT:    blr
 entry:
   %xor = xor <2 x i64> %B, %C
@@ -118,11 +110,9 @@ define <4 x i32> @ternary_A_or_BC_xor_BC_4x32(<4 x i1> %A, <4 x i32> %B, <4 x i3
 ; CHECK-LABEL: ternary_A_or_BC_xor_BC_4x32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    xxleqv v5, v5, v5
-; CHECK-NEXT:    xxlor vs0, v3, v4
-; CHECK-NEXT:    xxlxor vs1, v3, v4
 ; CHECK-NEXT:    vslw v2, v2, v5
 ; CHECK-NEXT:    vsraw v2, v2, v5
-; CHECK-NEXT:    xxsel v2, vs1, vs0, v2
+; CHECK-NEXT:    xxeval v2, v2, v3, v4, 103
 ; CHECK-NEXT:    blr
 entry:
   %or = or <4 x i32> %B, %C
@@ -136,12 +126,10 @@ define <2 x i64> @ternary_A_or_BC_xor_BC_2x64(<2 x i1> %A, <2 x i64> %B, <2 x i6
 ; CHECK-LABEL: ternary_A_or_BC_xor_BC_2x64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    xxlxor v5, v5, v5
-; CHECK-NEXT:    xxlor vs0, v3, v4
-; CHECK-NEXT:    xxlxor vs1, v3, v4
 ; CHECK-NEXT:    xxsplti32dx v5, 1, 63
 ; CHECK-NEXT:    vsld v2, v2, v5
 ; CHECK-NEXT:    vsrad v2, v2, v5
-; CHECK-NEXT:    xxsel v2, vs1, vs0, v2
+; CHECK-NEXT:    xxeval v2, v2, v3, v4, 103
 ; CHECK-NEXT:    blr
 entry:
   %or = or <2 x i64> %B, %C
@@ -155,11 +143,9 @@ define <4 x i32> @ternary_A_nor_BC_xor_BC_4x32(<4 x i1> %A, <4 x i32> %B, <4 x i
 ; CHECK-LABEL: ternary_A_nor_BC_xor_BC_4x32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    xxleqv v5, v5, v5
-; CHECK-NEXT:    xxlnor vs0, v3, v4
-; CHECK-NEXT:    xxlxor vs1, v3, v4
 ; CHECK-NEXT:    vslw v2, v2, v5
 ; CHECK-NEXT:    vsraw v2, v2, v5
-; CHECK-NEXT:    xxsel v2, vs1, vs0, v2
+; CHECK-NEXT:    xxeval v2, v2, v3, v4, 104
 ; CHECK-NEXT:    blr
 entry:
   %or = or <4 x i32> %B, %C
@@ -174,12 +160,10 @@ define <2 x i64> @ternary_A_nor_BC_xor_BC_2x64(<2 x i1> %A, <2 x i64> %B, <2 x i
 ; CHECK-LABEL: ternary_A_nor_BC_xor_BC_2x64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    xxlxor v5, v5, v5
-; CHECK-NEXT:    xxlnor vs0, v3, v4
-; CHECK-NEXT:    xxlxor vs1, v3, v4
 ; CHECK-NEXT:    xxsplti32dx v5, 1, 63
 ; CHECK-NEXT:    vsld v2, v2, v5
 ; CHECK-NEXT:    vsrad v2, v2, v5
-; CHECK-NEXT:    xxsel v2, vs1, vs0, v2
+; CHECK-NEXT:    xxeval v2, v2, v3, v4, 104
 ; CHECK-NEXT:    blr
 entry:
   %or = or <2 x i64> %B, %C

>From 9c8a424bfc0c235798ea2ce142963f011cc74ed3 Mon Sep 17 00:00:00 2001
From: Tony Varghese <tony.varghese at ibm.com>
Date: Fri, 20 Jun 2025 18:59:08 +0000
Subject: [PATCH 2/4] [PowerPC]xxeval instruction for ternary operations
 support for v4i32

---
 llvm/lib/Target/PowerPC/PPCInstrP10.td        | 23 ++++---------------
 .../CodeGen/PowerPC/xxeval-vselect-x-and.ll   | 20 ++++++++++++----
 .../CodeGen/PowerPC/xxeval-vselect-x-b.ll     | 15 ++++++++----
 .../CodeGen/PowerPC/xxeval-vselect-x-c.ll     | 12 ++++++----
 .../CodeGen/PowerPC/xxeval-vselect-x-or.ll    | 10 ++++----
 .../CodeGen/PowerPC/xxeval-vselect-x-xor.ll   | 18 +++++++++++----
 6 files changed, 56 insertions(+), 42 deletions(-)

diff --git a/llvm/lib/Target/PowerPC/PPCInstrP10.td b/llvm/lib/Target/PowerPC/PPCInstrP10.td
index d0985f4a3a9bb..947b3e188b5a3 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrP10.td
+++ b/llvm/lib/Target/PowerPC/PPCInstrP10.td
@@ -2171,26 +2171,15 @@ class DagCondVNot<dag d, bit negate> {
 
 class XXEvalUnaryPattern<ValueType vt> {
   // vnot Operand B
-  dag vnotB = !cond(
-    !eq(vt, v4i32) : (vnot v4i32:$vB),
-    !eq(vt, v2i64) : (v2i64 (bitconvert (vnot (v4i32 (bitconvert v2i64:$vB)))))
-  );
+  dag vnotB = (vnot vt:$vB);
   // vnot Operand C
-  dag vnotC = !cond(
-    !eq(vt, v4i32) : (vnot v4i32:$vC),
-    !eq(vt, v2i64) : (v2i64 (bitconvert (vnot (v4i32 (bitconvert v2i64:$vC)))))
-  );
+  dag vnotC = (vnot vt:$vC);
 }
 
 class XXEvalBinaryPattern<ValueType vt, SDPatternOperator op, bit notResult = 0> {
   // Defines a wrapper class for binary patterns with optional NOT on result.
   // Generate op pattern with optional NOT wrapping for result depending on "notResult".
-      dag opPat = !cond(
-                !eq(vt, v4i32) : DagCondVNot<(op v4i32:$vB, v4i32:$vC), notResult>.res,
-                !eq(vt, v2i64) : (v2i64 (bitconvert DagCondVNot<(op
-                                      (v4i32 (bitconvert v2i64:$vB)),
-                                      (v4i32 (bitconvert v2i64:$vC))), notResult>.res))
-                );
+  dag opPat = DagCondVNot<(op vt:$vB, vt:$vC), notResult>.res;
 }
 
 multiclass XXEvalVSelectWithXAnd<ValueType vt, bits<8> baseImm> {
@@ -2395,16 +2384,12 @@ let Predicates = [PrefixInstrs, HasP10Vector] in {
 
     // Utilize xxeval instruction for ternary vector expressions.
     defm : XXEvalVSelectWithXAnd<v4i32, 22>;
-    defm : XXEvalVSelectWithXAnd<v2i64, 22>;
 
     defm : XXEvalVSelectWithXB<v4i32, 49>;
-    defm : XXEvalVSelectWithXB<v2i64, 49>;
 
     defm : XXEvalVSelectWithXC<v4i32, 81>;
-    defm : XXEvalVSelectWithXC<v2i64, 81>;
-
+    
     defm : XXEvalVSelectWithXXor<v4i32, 97>;
-    defm : XXEvalVSelectWithXXor<v2i64, 97>;
 
     // Anonymous patterns to select prefixed VSX loads and stores.
     // Load / Store f128
diff --git a/llvm/test/CodeGen/PowerPC/xxeval-vselect-x-and.ll b/llvm/test/CodeGen/PowerPC/xxeval-vselect-x-and.ll
index 19305336f78df..dfb910b8d0c75 100644
--- a/llvm/test/CodeGen/PowerPC/xxeval-vselect-x-and.ll
+++ b/llvm/test/CodeGen/PowerPC/xxeval-vselect-x-and.ll
@@ -31,10 +31,12 @@ define <2 x i64> @ternary_A_xor_BC_and_BC_2x64(<2 x i1> %A, <2 x i64> %B, <2 x i
 ; CHECK-LABEL: ternary_A_xor_BC_and_BC_2x64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    xxlxor v5, v5, v5
+; CHECK-NEXT:    xxlxor vs0, v3, v4
+; CHECK-NEXT:    xxland vs1, v3, v4
 ; CHECK-NEXT:    xxsplti32dx v5, 1, 63
 ; CHECK-NEXT:    vsld v2, v2, v5
 ; CHECK-NEXT:    vsrad v2, v2, v5
-; CHECK-NEXT:    xxeval v2, v2, v3, v4, 22
+; CHECK-NEXT:    xxsel v2, vs1, vs0, v2
 ; CHECK-NEXT:    blr
 entry:
   %xor = xor <2 x i64> %B, %C
@@ -65,10 +67,12 @@ define <2 x i64> @ternary_A_nor_BC_and_BC_2x64(<2 x i1> %A, <2 x i64> %B, <2 x i
 ; CHECK-LABEL: ternary_A_nor_BC_and_BC_2x64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    xxlxor v5, v5, v5
+; CHECK-NEXT:    xxlnor vs0, v3, v4
+; CHECK-NEXT:    xxland vs1, v3, v4
 ; CHECK-NEXT:    xxsplti32dx v5, 1, 63
 ; CHECK-NEXT:    vsld v2, v2, v5
 ; CHECK-NEXT:    vsrad v2, v2, v5
-; CHECK-NEXT:    xxeval v2, v2, v3, v4, 24
+; CHECK-NEXT:    xxsel v2, vs1, vs0, v2
 ; CHECK-NEXT:    blr
 entry:
   %or = or <2 x i64> %B, %C
@@ -100,10 +104,12 @@ define <2 x i64> @ternary_A_eqv_BC_and_BC_2x64(<2 x i1> %A, <2 x i64> %B, <2 x i
 ; CHECK-LABEL: ternary_A_eqv_BC_and_BC_2x64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    xxlxor v5, v5, v5
+; CHECK-NEXT:    xxleqv vs0, v3, v4
+; CHECK-NEXT:    xxland vs1, v3, v4
 ; CHECK-NEXT:    xxsplti32dx v5, 1, 63
 ; CHECK-NEXT:    vsld v2, v2, v5
 ; CHECK-NEXT:    vsrad v2, v2, v5
-; CHECK-NEXT:    xxeval v2, v2, v3, v4, 25
+; CHECK-NEXT:    xxsel v2, vs1, vs0, v2
 ; CHECK-NEXT:    blr
 entry:
   %xor = xor <2 x i64> %B, %C
@@ -134,10 +140,12 @@ define <2 x i64> @ternary_A_not_C_and_BC_2x64(<2 x i1> %A, <2 x i64> %B, <2 x i6
 ; CHECK-LABEL: ternary_A_not_C_and_BC_2x64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    xxlxor v5, v5, v5
+; CHECK-NEXT:    xxlnor vs0, v4, v4
+; CHECK-NEXT:    xxland vs1, v3, v4
 ; CHECK-NEXT:    xxsplti32dx v5, 1, 63
 ; CHECK-NEXT:    vsld v2, v2, v5
 ; CHECK-NEXT:    vsrad v2, v2, v5
-; CHECK-NEXT:    xxeval v2, v2, v3, v4, 26
+; CHECK-NEXT:    xxsel v2, vs1, vs0, v2
 ; CHECK-NEXT:    blr
 entry:
   %not = xor <2 x i64> %C, <i64 -1, i64 -1>  ; Vector not operation
@@ -167,10 +175,12 @@ define <2 x i64> @ternary_A_not_B_and_BC_2x64(<2 x i1> %A, <2 x i64> %B, <2 x i6
 ; CHECK-LABEL: ternary_A_not_B_and_BC_2x64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    xxlxor v5, v5, v5
+; CHECK-NEXT:    xxlnor vs0, v3, v3
+; CHECK-NEXT:    xxland vs1, v3, v4
 ; CHECK-NEXT:    xxsplti32dx v5, 1, 63
 ; CHECK-NEXT:    vsld v2, v2, v5
 ; CHECK-NEXT:    vsrad v2, v2, v5
-; CHECK-NEXT:    xxeval v2, v2, v3, v4, 28
+; CHECK-NEXT:    xxsel v2, vs1, vs0, v2
 ; CHECK-NEXT:    blr
 entry:
   %not = xor <2 x i64> %B, <i64 -1, i64 -1>  ; Vector not operation
diff --git a/llvm/test/CodeGen/PowerPC/xxeval-vselect-x-b.ll b/llvm/test/CodeGen/PowerPC/xxeval-vselect-x-b.ll
index c36fd68ba0ece..9943c35dfb134 100644
--- a/llvm/test/CodeGen/PowerPC/xxeval-vselect-x-b.ll
+++ b/llvm/test/CodeGen/PowerPC/xxeval-vselect-x-b.ll
@@ -30,10 +30,11 @@ define <2 x i64> @ternary_A_and_BC_B_2x64(<2 x i1> %A, <2 x i64> %B, <2 x i64> %
 ; CHECK-LABEL: ternary_A_and_BC_B_2x64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    xxlxor v5, v5, v5
+; CHECK-NEXT:    xxland vs0, v3, v4
 ; CHECK-NEXT:    xxsplti32dx v5, 1, 63
 ; CHECK-NEXT:    vsld v2, v2, v5
 ; CHECK-NEXT:    vsrad v2, v2, v5
-; CHECK-NEXT:    xxeval v2, v2, v3, v4, 49
+; CHECK-NEXT:    xxsel v2, v3, vs0, v2
 ; CHECK-NEXT:    blr
 entry:
   %and = and <2 x i64> %B, %C
@@ -62,10 +63,11 @@ define <2 x i64> @ternary_A_nor_BC_B_2x64(<2 x i1> %A, <2 x i64> %B, <2 x i64> %
 ; CHECK-LABEL: ternary_A_nor_BC_B_2x64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    xxlxor v5, v5, v5
+; CHECK-NEXT:    xxlnor vs0, v3, v4
 ; CHECK-NEXT:    xxsplti32dx v5, 1, 63
 ; CHECK-NEXT:    vsld v2, v2, v5
 ; CHECK-NEXT:    vsrad v2, v2, v5
-; CHECK-NEXT:    xxeval v2, v2, v3, v4, 56
+; CHECK-NEXT:    xxsel v2, v3, vs0, v2
 ; CHECK-NEXT:    blr
 entry:
   %or = or <2 x i64> %B, %C
@@ -95,10 +97,11 @@ define <2 x i64> @ternary_A_eqv_BC_B_2x64(<2 x i1> %A, <2 x i64> %B, <2 x i64> %
 ; CHECK-LABEL: ternary_A_eqv_BC_B_2x64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    xxlxor v5, v5, v5
+; CHECK-NEXT:    xxleqv vs0, v3, v4
 ; CHECK-NEXT:    xxsplti32dx v5, 1, 63
 ; CHECK-NEXT:    vsld v2, v2, v5
 ; CHECK-NEXT:    vsrad v2, v2, v5
-; CHECK-NEXT:    xxeval v2, v2, v3, v4, 57
+; CHECK-NEXT:    xxsel v2, v3, vs0, v2
 ; CHECK-NEXT:    blr
 entry:
   %xor = xor <2 x i64> %B, %C
@@ -127,10 +130,11 @@ define <2 x i64> @ternary_A_not_C_B_2x64(<2 x i1> %A, <2 x i64> %B, <2 x i64> %C
 ; CHECK-LABEL: ternary_A_not_C_B_2x64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    xxlxor v5, v5, v5
+; CHECK-NEXT:    xxlnor vs0, v4, v4
 ; CHECK-NEXT:    xxsplti32dx v5, 1, 63
 ; CHECK-NEXT:    vsld v2, v2, v5
 ; CHECK-NEXT:    vsrad v2, v2, v5
-; CHECK-NEXT:    xxeval v2, v2, v3, v4, 58
+; CHECK-NEXT:    xxsel v2, v3, vs0, v2
 ; CHECK-NEXT:    blr
 entry:
   %not = xor <2 x i64> %C, <i64 -1, i64 -1>  ; Vector not operation
@@ -159,10 +163,11 @@ define <2 x i64> @ternary_A_nand_BC_B_2x64(<2 x i1> %A, <2 x i64> %B, <2 x i64>
 ; CHECK-LABEL: ternary_A_nand_BC_B_2x64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    xxlxor v5, v5, v5
+; CHECK-NEXT:    xxlnand vs0, v3, v4
 ; CHECK-NEXT:    xxsplti32dx v5, 1, 63
 ; CHECK-NEXT:    vsld v2, v2, v5
 ; CHECK-NEXT:    vsrad v2, v2, v5
-; CHECK-NEXT:    xxeval v2, v2, v3, v4, 62
+; CHECK-NEXT:    xxsel v2, v3, vs0, v2
 ; CHECK-NEXT:    blr
 entry:
   %and = and <2 x i64> %B, %C
diff --git a/llvm/test/CodeGen/PowerPC/xxeval-vselect-x-c.ll b/llvm/test/CodeGen/PowerPC/xxeval-vselect-x-c.ll
index 54fda6063bfac..f770cf768147a 100644
--- a/llvm/test/CodeGen/PowerPC/xxeval-vselect-x-c.ll
+++ b/llvm/test/CodeGen/PowerPC/xxeval-vselect-x-c.ll
@@ -30,10 +30,11 @@ define <2 x i64> @ternary_A_and_BC_C_2x64(<2 x i1> %A, <2 x i64> %B, <2 x i64> %
 ; CHECK-LABEL: ternary_A_and_BC_C_2x64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    xxlxor v5, v5, v5
+; CHECK-NEXT:    xxland vs0, v3, v4
 ; CHECK-NEXT:    xxsplti32dx v5, 1, 63
 ; CHECK-NEXT:    vsld v2, v2, v5
 ; CHECK-NEXT:    vsrad v2, v2, v5
-; CHECK-NEXT:    xxeval v2, v2, v3, v4, 81
+; CHECK-NEXT:    xxsel v2, v4, vs0, v2
 ; CHECK-NEXT:    blr
 entry:
   %and = and <2 x i64> %B, %C
@@ -62,10 +63,11 @@ define <2 x i64> @ternary_A_nor_BC_C_2x64(<2 x i1> %A, <2 x i64> %B, <2 x i64> %
 ; CHECK-LABEL: ternary_A_nor_BC_C_2x64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    xxlxor v5, v5, v5
+; CHECK-NEXT:    xxlnor vs0, v3, v4
 ; CHECK-NEXT:    xxsplti32dx v5, 1, 63
 ; CHECK-NEXT:    vsld v2, v2, v5
 ; CHECK-NEXT:    vsrad v2, v2, v5
-; CHECK-NEXT:    xxeval v2, v2, v3, v4, 88
+; CHECK-NEXT:    xxsel v2, v4, vs0, v2
 ; CHECK-NEXT:    blr
 entry:
   %or = or <2 x i64> %B, %C
@@ -95,10 +97,11 @@ define <2 x i64> @ternary_A_eqv_BC_C_2x64(<2 x i1> %A, <2 x i64> %B, <2 x i64> %
 ; CHECK-LABEL: ternary_A_eqv_BC_C_2x64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    xxlxor v5, v5, v5
+; CHECK-NEXT:    xxleqv vs0, v3, v4
 ; CHECK-NEXT:    xxsplti32dx v5, 1, 63
 ; CHECK-NEXT:    vsld v2, v2, v5
 ; CHECK-NEXT:    vsrad v2, v2, v5
-; CHECK-NEXT:    xxeval v2, v2, v3, v4, 89
+; CHECK-NEXT:    xxsel v2, v4, vs0, v2
 ; CHECK-NEXT:    blr
 entry:
   %xor = xor <2 x i64> %B, %C
@@ -128,10 +131,11 @@ define <2 x i64> @ternary_A_nand_BC_C_2x64(<2 x i1> %A, <2 x i64> %B, <2 x i64>
 ; CHECK-LABEL: ternary_A_nand_BC_C_2x64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    xxlxor v5, v5, v5
+; CHECK-NEXT:    xxlnand vs0, v3, v4
 ; CHECK-NEXT:    xxsplti32dx v5, 1, 63
 ; CHECK-NEXT:    vsld v2, v2, v5
 ; CHECK-NEXT:    vsrad v2, v2, v5
-; CHECK-NEXT:    xxeval v2, v2, v3, v4, 94
+; CHECK-NEXT:    xxsel v2, v4, vs0, v2
 ; CHECK-NEXT:    blr
 entry:
   %and = and <2 x i64> %B, %C
diff --git a/llvm/test/CodeGen/PowerPC/xxeval-vselect-x-or.ll b/llvm/test/CodeGen/PowerPC/xxeval-vselect-x-or.ll
index c956785a757ca..bf2b691056781 100644
--- a/llvm/test/CodeGen/PowerPC/xxeval-vselect-x-or.ll
+++ b/llvm/test/CodeGen/PowerPC/xxeval-vselect-x-or.ll
@@ -176,11 +176,12 @@ define <2 x i64> @ternary_A_not_C_or_BC_2x64(<2 x i1> %A, <2 x i64> %B, <2 x i64
 ; CHECK-LABEL: ternary_A_not_C_or_BC_2x64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    xxlxor v5, v5, v5
-; CHECK-NEXT:    xxlor vs0, v3, v4
+; CHECK-NEXT:    xxlnor vs0, v4, v4
+; CHECK-NEXT:    xxlor vs1, v3, v4
 ; CHECK-NEXT:    xxsplti32dx v5, 1, 63
 ; CHECK-NEXT:    vsld v2, v2, v5
 ; CHECK-NEXT:    vsrad v2, v2, v5
-; CHECK-NEXT:    xxeval v2, v2, vs0, v4, 58
+; CHECK-NEXT:    xxsel v2, vs1, vs0, v2
 ; CHECK-NEXT:    blr
 entry:
   %not = xor <2 x i64> %C, <i64 -1, i64 -1>  ; Vector not operation
@@ -211,11 +212,12 @@ define <2 x i64> @ternary_A_not_B_or_BC_2x64(<2 x i1> %A, <2 x i64> %B, <2 x i64
 ; CHECK-LABEL: ternary_A_not_B_or_BC_2x64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    xxlxor v5, v5, v5
-; CHECK-NEXT:    xxlor vs0, v3, v4
+; CHECK-NEXT:    xxlnor vs0, v3, v3
+; CHECK-NEXT:    xxlor vs1, v3, v4
 ; CHECK-NEXT:    xxsplti32dx v5, 1, 63
 ; CHECK-NEXT:    vsld v2, v2, v5
 ; CHECK-NEXT:    vsrad v2, v2, v5
-; CHECK-NEXT:    xxeval v2, v2, vs0, v3, 58
+; CHECK-NEXT:    xxsel v2, vs1, vs0, v2
 ; CHECK-NEXT:    blr
 entry:
   %not = xor <2 x i64> %B, <i64 -1, i64 -1>  ; Vector not operation
diff --git a/llvm/test/CodeGen/PowerPC/xxeval-vselect-x-xor.ll b/llvm/test/CodeGen/PowerPC/xxeval-vselect-x-xor.ll
index 74d3a3bf6196e..2725525846dd8 100644
--- a/llvm/test/CodeGen/PowerPC/xxeval-vselect-x-xor.ll
+++ b/llvm/test/CodeGen/PowerPC/xxeval-vselect-x-xor.ll
@@ -31,10 +31,12 @@ define <2 x i64> @ternary_A_and_BC_xor_BC_2x64(<2 x i1> %A, <2 x i64> %B, <2 x i
 ; CHECK-LABEL: ternary_A_and_BC_xor_BC_2x64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    xxlxor v5, v5, v5
+; CHECK-NEXT:    xxland vs0, v3, v4
+; CHECK-NEXT:    xxlxor vs1, v3, v4
 ; CHECK-NEXT:    xxsplti32dx v5, 1, 63
 ; CHECK-NEXT:    vsld v2, v2, v5
 ; CHECK-NEXT:    vsrad v2, v2, v5
-; CHECK-NEXT:    xxeval v2, v2, v3, v4, 97
+; CHECK-NEXT:    xxsel v2, vs1, vs0, v2
 ; CHECK-NEXT:    blr
 entry:
   %and = and <2 x i64> %B, %C
@@ -63,10 +65,11 @@ define <2 x i64> @ternary_A_B_xor_BC_2x64(<2 x i1> %A, <2 x i64> %B, <2 x i64> %
 ; CHECK-LABEL: ternary_A_B_xor_BC_2x64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    xxlxor v5, v5, v5
+; CHECK-NEXT:    xxlxor vs0, v3, v4
 ; CHECK-NEXT:    xxsplti32dx v5, 1, 63
 ; CHECK-NEXT:    vsld v2, v2, v5
 ; CHECK-NEXT:    vsrad v2, v2, v5
-; CHECK-NEXT:    xxeval v2, v2, v3, v4, 99
+; CHECK-NEXT:    xxsel v2, vs0, v3, v2
 ; CHECK-NEXT:    blr
 entry:
   %xor = xor <2 x i64> %B, %C
@@ -94,10 +97,11 @@ define <2 x i64> @ternary_A_C_xor_BC_2x64(<2 x i1> %A, <2 x i64> %B, <2 x i64> %
 ; CHECK-LABEL: ternary_A_C_xor_BC_2x64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    xxlxor v5, v5, v5
+; CHECK-NEXT:    xxlxor vs0, v3, v4
 ; CHECK-NEXT:    xxsplti32dx v5, 1, 63
 ; CHECK-NEXT:    vsld v2, v2, v5
 ; CHECK-NEXT:    vsrad v2, v2, v5
-; CHECK-NEXT:    xxeval v2, v2, v3, v4, 101
+; CHECK-NEXT:    xxsel v2, vs0, v4, v2
 ; CHECK-NEXT:    blr
 entry:
   %xor = xor <2 x i64> %B, %C
@@ -126,10 +130,12 @@ define <2 x i64> @ternary_A_or_BC_xor_BC_2x64(<2 x i1> %A, <2 x i64> %B, <2 x i6
 ; CHECK-LABEL: ternary_A_or_BC_xor_BC_2x64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    xxlxor v5, v5, v5
+; CHECK-NEXT:    xxlor vs0, v3, v4
+; CHECK-NEXT:    xxlxor vs1, v3, v4
 ; CHECK-NEXT:    xxsplti32dx v5, 1, 63
 ; CHECK-NEXT:    vsld v2, v2, v5
 ; CHECK-NEXT:    vsrad v2, v2, v5
-; CHECK-NEXT:    xxeval v2, v2, v3, v4, 103
+; CHECK-NEXT:    xxsel v2, vs1, vs0, v2
 ; CHECK-NEXT:    blr
 entry:
   %or = or <2 x i64> %B, %C
@@ -160,10 +166,12 @@ define <2 x i64> @ternary_A_nor_BC_xor_BC_2x64(<2 x i1> %A, <2 x i64> %B, <2 x i
 ; CHECK-LABEL: ternary_A_nor_BC_xor_BC_2x64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    xxlxor v5, v5, v5
+; CHECK-NEXT:    xxlnor vs0, v3, v4
+; CHECK-NEXT:    xxlxor vs1, v3, v4
 ; CHECK-NEXT:    xxsplti32dx v5, 1, 63
 ; CHECK-NEXT:    vsld v2, v2, v5
 ; CHECK-NEXT:    vsrad v2, v2, v5
-; CHECK-NEXT:    xxeval v2, v2, v3, v4, 104
+; CHECK-NEXT:    xxsel v2, vs1, vs0, v2
 ; CHECK-NEXT:    blr
 entry:
   %or = or <2 x i64> %B, %C

>From e96037b5b5cf300268a89e0f7f44461f40ba8318 Mon Sep 17 00:00:00 2001
From: Tony Varghese <tony.varghese at ibm.com>
Date: Mon, 23 Jun 2025 14:07:33 +0000
Subject: [PATCH 3/4] [draft] support v2i64 xxeval

---
 llvm/lib/Target/PowerPC/CMakeLists.txt      |   5 +
 llvm/lib/Target/PowerPC/PPCISelLowering.cpp | 116 +++++++++++++++++++-
 llvm/lib/Target/PowerPC/PPCISelLowering.h   |   1 +
 llvm/lib/Target/PowerPC/PPCInstrP10.td      |   7 +-
 4 files changed, 125 insertions(+), 4 deletions(-)

diff --git a/llvm/lib/Target/PowerPC/CMakeLists.txt b/llvm/lib/Target/PowerPC/CMakeLists.txt
index 3808a26a0b92a..587cd13c7b28b 100644
--- a/llvm/lib/Target/PowerPC/CMakeLists.txt
+++ b/llvm/lib/Target/PowerPC/CMakeLists.txt
@@ -83,6 +83,11 @@ add_llvm_target(PowerPCCodeGen
   PowerPC
   )
 
+set_source_files_properties(
+    PPCISelLowering.cpp
+    PROPERTIES COMPILE_FLAGS "-g -O0"
+)
+
 add_subdirectory(AsmParser)
 add_subdirectory(Disassembler)
 add_subdirectory(MCTargetDesc)
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index 59c89985c6cff..a13d9b73246a7 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -844,7 +844,7 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
       AddPromotedToType (ISD::LOAD  , VT, MVT::v4i32);
       setOperationAction(ISD::SELECT, VT, Promote);
       AddPromotedToType (ISD::SELECT, VT, MVT::v4i32);
-      setOperationAction(ISD::VSELECT, VT, Legal);
+      setOperationAction(ISD::VSELECT, VT, Legal);      
       setOperationAction(ISD::SELECT_CC, VT, Promote);
       AddPromotedToType (ISD::SELECT_CC, VT, MVT::v4i32);
       setOperationAction(ISD::STORE, VT, Promote);
@@ -9579,6 +9579,119 @@ static bool isValidSplatLoad(const PPCSubtarget &Subtarget, const SDValue &Op,
   return false;
 }
 
+// Lower the vector operands of the VSELECT Node 
+// The operands of the VSELECT nodes needs to modifed back if:
+// - the operands of the VSELECT are bitcast (op (bitcast x), (bitcast y)) 
+// - the outer bitcast is VT and inner bitcast is v4i32
+// - VSELECT Node type is not v4i32 and is of type v2i64
+// Then operands needs to put back to their original types.
+SDValue PPCTargetLowering::LowerVSELECT(SDValue Op, SelectionDAG &DAG) const{
+  LLVM_DEBUG(llvm::dbgs() << "LowerVECTOR_SELECT \n" );
+  LLVM_DEBUG(DAG.dump(););
+  // Return early if the VT of the Op is v4i32
+  EVT VT = Op.getValueType();
+  if (VT == MVT::v4i32) {
+    LLVM_DEBUG(llvm::dbgs() << "LowerVECTOR_SELECT: VT is v4i32 \n");
+    return SDValue(); // No need to lower, return original Op
+  }
+  LLVM_DEBUG(llvm::dbgs() << "LowerVECTOR_SELECT: VT is not v4i32 \n");
+  // If the VT is v2i64, we need to check the conditions:
+  // - the operands of the VSELECT are bitcast (op (bitcast x), (bitcast y)) 
+  // - the outer bitcast is VT and inner bitcast is v4i32
+  // - VSELECT Node type is not v4i32 and is of type v2i64
+  SDValue Cond = Op.getOperand(0);
+  SDValue TrueOp = Op.getOperand(1);
+  SDValue FalseOp = Op.getOperand(2);
+
+  auto checkIfValidPattern = [](SDValue V) -> bool {
+    // Check if the operand is a bitcast
+    if (V.getOpcode() != ISD::BITCAST) {
+      return false; // Return false if not a bitcast
+    }
+    // Check if the inner node is a valid ADD, XOR or OR operation
+    SDValue InnerOp = V.getOperand(0);
+    // Check if the inner node is an ADD, XOR or OR operation
+    bool isValidInnerNode =
+        InnerOp.getOpcode() == ISD::AND || InnerOp.getOpcode() == ISD::XOR ||
+        InnerOp.getOpcode() == ISD::OR;
+    if (!isValidInnerNode) {
+      return false; // Return false if the inner node is not valid
+    }
+    // Get the Bit Op node's Operands
+    SDValue InnerBitOpOperand0 = InnerOp.getOperand(0);
+    SDValue InnerBitOpOperand1 = InnerOp.getOperand(1);
+    bool isValidInnerBitcasts =
+        (InnerBitOpOperand0.getOpcode() == ISD::BITCAST &&
+         InnerBitOpOperand0.getValueType() == MVT::v4i32) &&
+        (InnerBitOpOperand1.getOpcode() == ISD::BITCAST &&
+         InnerBitOpOperand1.getValueType() == MVT::v4i32);
+    if (!isValidInnerBitcasts) {
+      return false; // Return false if the inner bitcasts are not valid
+    }
+    // If all checks passed, return true
+    return true;
+  };
+
+  auto getOriginalNode = [&DAG](SDValue V) -> SDValue {   
+      SDValue InnerOp = V.getOperand(0);
+      // Get the Bit Op node's Operands
+      SDValue InnerBitOpOperand0 = InnerOp.getOperand(0);
+      SDValue InnerBitOpOperand1 = InnerOp.getOperand(1);
+      // Get the operands of the inner bit operation
+      SDValue X = InnerBitOpOperand0.getOperand(0);
+      SDValue Y = InnerBitOpOperand1.getOperand(0);
+      return DAG.getNode(
+          InnerOp.getOpcode(), SDLoc(V), V.getValueType(),
+          X, Y);
+  };
+
+  if(checkIfValidPattern(TrueOp) && checkIfValidPattern(FalseOp)) {
+    // If the TrueOp and FalseOp are valid patterns, get the original nodes
+    // and return the VSELECT node with the original nodes.
+    LLVM_DEBUG(llvm::dbgs() << "LowerVECTOR_SELECT: Valid pattern \n");
+  } else {
+    // If the TrueOp and FalseOp are not valid patterns, return the original Op
+    // without modification.
+    LLVM_DEBUG(llvm::dbgs() << "LowerVECTOR_SELECT: Invalid pattern \n");
+    return SDValue();
+  }
+
+  // Get the original nodes from the TrueOp and FalseOp
+  SDValue NTrueOp = getOriginalNode(TrueOp);
+  SDValue NFalseOp = getOriginalNode(FalseOp);
+  // LLVM_DEBUG(llvm::dbgs() << "LowerVECTOR_SELECT: NTrueOp: " << NTrueOp.getNode()->print(dbgs()) << "\n");
+  // LLVM_DEBUG(llvm::dbgs() << "LowerVECTOR_SELECT: NFalseOp: " << NFalseOp.getNode()->dump() << "\n");
+
+  // Ensure both NTrueOp and NFalseOp are valid before using them.
+  if (!NTrueOp.getNode() || !NFalseOp.getNode()) {
+    LLVM_DEBUG(llvm::dbgs() << "LowerVECTOR_SELECT: One or both original nodes are invalid, returning original Op\n");
+    return SDValue();
+  }
+  EVT MaskVT = Cond.getValueType();
+  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+  EVT LegalMaskVT = TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), MaskVT);
+
+  LLVM_DEBUG(llvm::dbgs() << "LowerVECTOR_SELECT: cond mask VT: " << MaskVT);
+  LLVM_DEBUG(llvm::dbgs() << "LowerVECTOR_SELECT: cond Legal VT: " << LegalMaskVT << "\n");
+  if (MaskVT != LegalMaskVT) {
+    LLVM_DEBUG(llvm::dbgs() << "LowerVECTOR_SELECT: cond LEGALIZATION\n");
+    // Bitcast or extend/truncate as needed
+    Cond = DAG.getZExtOrTrunc(Cond, SDLoc(Op), LegalMaskVT);
+  }
+
+  SDValue NewVselectNode = DAG.getNode(
+      ISD::VSELECT, SDLoc(Op), VT, Cond, NTrueOp, NFalseOp);
+  DAG.ReplaceAllUsesWith(Op, NewVselectNode);
+  // if (Op.getNode()->use_empty()) {
+  //   DAG.RemoveDeadNode(Op.getNode());
+  // }
+
+  LLVM_DEBUG(NewVselectNode.dump());
+  LLVM_DEBUG(llvm::dbgs() << "New DAG \n");
+  LLVM_DEBUG(DAG.dump());
+  return  SDValue();
+}
+
 // If this is a case we can't handle, return null and let the default
 // expansion code take care of it.  If we CAN select this case, and if it
 // selects to a single instruction, return Op.  Otherwise, if we can codegen
@@ -12532,6 +12645,7 @@ SDValue PPCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
   case ISD::FSHR:               return LowerFunnelShift(Op, DAG);
 
   // Vector-related lowering.
+  case ISD::VSELECT:            return LowerVSELECT(Op, DAG);
   case ISD::BUILD_VECTOR:       return LowerBUILD_VECTOR(Op, DAG);
   case ISD::VECTOR_SHUFFLE:     return LowerVECTOR_SHUFFLE(Op, DAG);
   case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG);
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.h b/llvm/lib/Target/PowerPC/PPCISelLowering.h
index 2c55b5427297a..7ad91dcdd21e9 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.h
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.h
@@ -1311,6 +1311,7 @@ namespace llvm {
     SDValue LowerSRL_PARTS(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerSRA_PARTS(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerFunnelShift(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerVSELECT(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerVPERM(SDValue Op, SelectionDAG &DAG, ArrayRef<int> PermMask,
diff --git a/llvm/lib/Target/PowerPC/PPCInstrP10.td b/llvm/lib/Target/PowerPC/PPCInstrP10.td
index 947b3e188b5a3..eb1d18e3a4e90 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrP10.td
+++ b/llvm/lib/Target/PowerPC/PPCInstrP10.td
@@ -2384,12 +2384,13 @@ let Predicates = [PrefixInstrs, HasP10Vector] in {
 
     // Utilize xxeval instruction for ternary vector expressions.
     defm : XXEvalVSelectWithXAnd<v4i32, 22>;
-
     defm : XXEvalVSelectWithXB<v4i32, 49>;
-
     defm : XXEvalVSelectWithXC<v4i32, 81>;
-    
     defm : XXEvalVSelectWithXXor<v4i32, 97>;
+    defm : XXEvalVSelectWithXAnd<v2i64, 22>;
+    defm : XXEvalVSelectWithXB<v2i64, 49>;
+    defm : XXEvalVSelectWithXC<v2i64, 81>;
+    defm : XXEvalVSelectWithXXor<v2i64, 97>;
 
     // Anonymous patterns to select prefixed VSX loads and stores.
     // Load / Store f128

>From de92b57553a4cb9f88229a64e49aa5be455cfd72 Mon Sep 17 00:00:00 2001
From: Tony Varghese <tony.varghese at ibm.com>
Date: Tue, 24 Jun 2025 18:40:20 +0000
Subject: [PATCH 4/4] [PowerPC] Emit xxeval instruction for v2i64 type operands
 of ternary

---
 llvm/lib/Target/PowerPC/PPCISelLowering.cpp   | 279 ++++++++++++------
 llvm/lib/Target/PowerPC/PPCISelLowering.h     |   4 +
 llvm/lib/Target/PowerPC/PPCInstrP10.td        |  15 +-
 llvm/lib/Target/PowerPC/PPCInstrVSX.td        |   6 +
 .../CodeGen/PowerPC/xxeval-vselect-x-and.ll   | 164 ----------
 5 files changed, 209 insertions(+), 259 deletions(-)

diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index a13d9b73246a7..ec12c665fb719 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -844,7 +844,7 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
       AddPromotedToType (ISD::LOAD  , VT, MVT::v4i32);
       setOperationAction(ISD::SELECT, VT, Promote);
       AddPromotedToType (ISD::SELECT, VT, MVT::v4i32);
-      setOperationAction(ISD::VSELECT, VT, Legal);      
+      setOperationAction(ISD::VSELECT, VT, Custom);      
       setOperationAction(ISD::SELECT_CC, VT, Promote);
       AddPromotedToType (ISD::SELECT_CC, VT, MVT::v4i32);
       setOperationAction(ISD::STORE, VT, Promote);
@@ -1690,6 +1690,7 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
     return "PPCISD::XXSPLTI_SP_TO_DP";
   case PPCISD::XXSPLTI32DX:
     return "PPCISD::XXSPLTI32DX";
+  case PPCISD::VSELECT:        return "PPCISD::VSELECT";
   case PPCISD::VECINSERT:       return "PPCISD::VECINSERT";
   case PPCISD::XXPERMDI:        return "PPCISD::XXPERMDI";
   case PPCISD::XXPERM:
@@ -9579,119 +9580,215 @@ static bool isValidSplatLoad(const PPCSubtarget &Subtarget, const SDValue &Op,
   return false;
 }
 
-// Lower the vector operands of the VSELECT Node 
-// The operands of the VSELECT nodes needs to modifed back if:
-// - the operands of the VSELECT are bitcast (op (bitcast x), (bitcast y)) 
-// - the outer bitcast is VT and inner bitcast is v4i32
-// - VSELECT Node type is not v4i32 and is of type v2i64
-// Then operands needs to put back to their original types.
-SDValue PPCTargetLowering::LowerVSELECT(SDValue Op, SelectionDAG &DAG) const{
-  LLVM_DEBUG(llvm::dbgs() << "LowerVECTOR_SELECT \n" );
-  LLVM_DEBUG(DAG.dump(););
-  // Return early if the VT of the Op is v4i32
+SDValue PPCTargetLowering::LowerVSELECT(SDValue Op, SelectionDAG &DAG) const {
+  LLVM_DEBUG(llvm::dbgs() << "LowerVSELECT\n"; DAG.dump());
+
   EVT VT = Op.getValueType();
   if (VT == MVT::v4i32) {
-    LLVM_DEBUG(llvm::dbgs() << "LowerVECTOR_SELECT: VT is v4i32 \n");
-    return SDValue(); // No need to lower, return original Op
-  }
-  LLVM_DEBUG(llvm::dbgs() << "LowerVECTOR_SELECT: VT is not v4i32 \n");
-  // If the VT is v2i64, we need to check the conditions:
-  // - the operands of the VSELECT are bitcast (op (bitcast x), (bitcast y)) 
-  // - the outer bitcast is VT and inner bitcast is v4i32
-  // - VSELECT Node type is not v4i32 and is of type v2i64
+    LLVM_DEBUG(llvm::dbgs() << "LowerVSELECT: VT is v4i32, default lowering\n");
+    return SDValue(); // Default lowering
+  }
+
+  if (VT != MVT::v2i64) {
+    LLVM_DEBUG(llvm::dbgs() << "LowerVSELECT: VT is not v2i64, default lowering\n");
+    return SDValue();
+  }
+
   SDValue Cond = Op.getOperand(0);
   SDValue TrueOp = Op.getOperand(1);
   SDValue FalseOp = Op.getOperand(2);
 
-  auto checkIfValidPattern = [](SDValue V) -> bool {
-    // Check if the operand is a bitcast
-    if (V.getOpcode() != ISD::BITCAST) {
-      return false; // Return false if not a bitcast
-    }
-    // Check if the inner node is a valid ADD, XOR or OR operation
-    SDValue InnerOp = V.getOperand(0);
-    // Check if the inner node is an ADD, XOR or OR operation
-    bool isValidInnerNode =
-        InnerOp.getOpcode() == ISD::AND || InnerOp.getOpcode() == ISD::XOR ||
-        InnerOp.getOpcode() == ISD::OR;
-    if (!isValidInnerNode) {
-      return false; // Return false if the inner node is not valid
-    }
-    // Get the Bit Op node's Operands
-    SDValue InnerBitOpOperand0 = InnerOp.getOperand(0);
-    SDValue InnerBitOpOperand1 = InnerOp.getOperand(1);
-    bool isValidInnerBitcasts =
-        (InnerBitOpOperand0.getOpcode() == ISD::BITCAST &&
-         InnerBitOpOperand0.getValueType() == MVT::v4i32) &&
-        (InnerBitOpOperand1.getOpcode() == ISD::BITCAST &&
-         InnerBitOpOperand1.getValueType() == MVT::v4i32);
-    if (!isValidInnerBitcasts) {
-      return false; // Return false if the inner bitcasts are not valid
-    }
-    // If all checks passed, return true
+  // Helper to check for the pattern: BITCAST (XOR (BITCAST x), (BITCAST y))
+  auto isPromotedBitcastBinop = [](SDValue V, unsigned &BinOpcode, SDValue &X, SDValue &Y) -> bool {
+    LLVM_DEBUG(llvm::dbgs() << "isPromotedBitCastBinop: \n");
+    LLVM_DEBUG(llvm::dbgs() << "Binop Op: "; V->dump());
+    LLVM_DEBUG(llvm::dbgs() << "\n");
+    
+    if (V.getOpcode() != ISD::BITCAST)
+      return false;
+    SDValue BinOp = V.getOperand(0);
+    if (BinOp.getOpcode() != ISD::AND &&
+        BinOp.getOpcode() != ISD::XOR &&
+        BinOp.getOpcode() != ISD::OR)
+      return false;
+    // Both operands must be BITCAST from v4i32
+    SDValue BC0 = BinOp.getOperand(0);
+    SDValue BC1 = BinOp.getOperand(1);
+    if (BC0.getOpcode() != ISD::BITCAST || BC1.getOpcode() != ISD::BITCAST)
+      return false;
+    if (BC0.getValueType() != MVT::v4i32 || BC1.getValueType() != MVT::v4i32)
+      return false;
+    // The inner operands are the original v2i64 values
+    X = BC0.getOperand(0);
+    Y = BC1.getOperand(0);
+    BinOpcode = BinOp.getOpcode();
     return true;
   };
 
-  auto getOriginalNode = [&DAG](SDValue V) -> SDValue {   
-      SDValue InnerOp = V.getOperand(0);
-      // Get the Bit Op node's Operands
-      SDValue InnerBitOpOperand0 = InnerOp.getOperand(0);
-      SDValue InnerBitOpOperand1 = InnerOp.getOperand(1);
-      // Get the operands of the inner bit operation
-      SDValue X = InnerBitOpOperand0.getOperand(0);
-      SDValue Y = InnerBitOpOperand1.getOperand(0);
-      return DAG.getNode(
-          InnerOp.getOpcode(), SDLoc(V), V.getValueType(),
-          X, Y);
-  };
-
-  if(checkIfValidPattern(TrueOp) && checkIfValidPattern(FalseOp)) {
-    // If the TrueOp and FalseOp are valid patterns, get the original nodes
-    // and return the VSELECT node with the original nodes.
-    LLVM_DEBUG(llvm::dbgs() << "LowerVECTOR_SELECT: Valid pattern \n");
-  } else {
-    // If the TrueOp and FalseOp are not valid patterns, return the original Op
-    // without modification.
-    LLVM_DEBUG(llvm::dbgs() << "LowerVECTOR_SELECT: Invalid pattern \n");
+  unsigned TrueBinOpcode = 0, FalseBinOpcode = 0;
+  SDValue TrueX, TrueY, FalseX, FalseY;
+  if (!isPromotedBitcastBinop(TrueOp, TrueBinOpcode, TrueX, TrueY) ||
+      !isPromotedBitcastBinop(FalseOp, FalseBinOpcode, FalseX, FalseY)) {
+    LLVM_DEBUG(llvm::dbgs() << "LowerVSELECT: Pattern not matched, default lowering\n");
     return SDValue();
   }
 
-  // Get the original nodes from the TrueOp and FalseOp
-  SDValue NTrueOp = getOriginalNode(TrueOp);
-  SDValue NFalseOp = getOriginalNode(FalseOp);
-  // LLVM_DEBUG(llvm::dbgs() << "LowerVECTOR_SELECT: NTrueOp: " << NTrueOp.getNode()->print(dbgs()) << "\n");
-  // LLVM_DEBUG(llvm::dbgs() << "LowerVECTOR_SELECT: NFalseOp: " << NFalseOp.getNode()->dump() << "\n");
+  LLVM_DEBUG(llvm::dbgs() << "LowerVSELECT: TrueBinOpCode: " << TrueBinOpcode);
+  LLVM_DEBUG(llvm::dbgs() << "\nLowerVSELECT: FalseBinOpCode: " << FalseBinOpcode);
+
+  // For the specific pattern: VSELECT(cond, XOR, AND)
+  if (!(TrueBinOpcode == ISD::XOR && FalseBinOpcode == ISD::AND)) {
+    LLVM_DEBUG(llvm::dbgs() << "LowerVSELECT: Not AND/XOR pattern, default lowering\n");
+    return SDValue();
+  }
 
-  // Ensure both NTrueOp and NFalseOp are valid before using them.
-  if (!NTrueOp.getNode() || !NFalseOp.getNode()) {
-    LLVM_DEBUG(llvm::dbgs() << "LowerVECTOR_SELECT: One or both original nodes are invalid, returning original Op\n");
+  // The operands to AND and XOR must be the same
+  if (!(TrueX == FalseX && TrueY == FalseY)) {
+    LLVM_DEBUG(llvm::dbgs() << "LowerVSELECT: AND/XOR operands mismatch, default lowering\n");
     return SDValue();
   }
+
+  // Rebuild the original v2i64 AND and XOR nodes
+  SDLoc DL(Op);
+  SDValue XorV2i64 = DAG.getNode(ISD::XOR, DL, VT, TrueX, TrueY);
+  SDValue AndV2i64 = DAG.getNode(ISD::AND, DL, VT, TrueX, TrueY);
+  LLVM_DEBUG(llvm::dbgs() << "LowerVSELECT: XOR Node : " ; XorV2i64->dump());
+  LLVM_DEBUG(llvm::dbgs() << "\nLowerVSELECT: AND Node : " ; AndV2i64->dump());
+
+  // Legalize the mask type if needed
   EVT MaskVT = Cond.getValueType();
   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
   EVT LegalMaskVT = TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), MaskVT);
+  if (MaskVT != LegalMaskVT)
+    Cond = DAG.getZExtOrTrunc(Cond, DL, LegalMaskVT);
 
-  LLVM_DEBUG(llvm::dbgs() << "LowerVECTOR_SELECT: cond mask VT: " << MaskVT);
-  LLVM_DEBUG(llvm::dbgs() << "LowerVECTOR_SELECT: cond Legal VT: " << LegalMaskVT << "\n");
-  if (MaskVT != LegalMaskVT) {
-    LLVM_DEBUG(llvm::dbgs() << "LowerVECTOR_SELECT: cond LEGALIZATION\n");
-    // Bitcast or extend/truncate as needed
-    Cond = DAG.getZExtOrTrunc(Cond, SDLoc(Op), LegalMaskVT);
-  }
+  // Emit the new PPCISD::VSELECT node (so it can match xxeval for v2i64)
+  SDValue NewVSelect = DAG.getNode(PPCISD::VSELECT, DL, VT, Cond, XorV2i64, AndV2i64);
+  DAG.ReplaceAllUsesWith(Op, NewVSelect);
+
+  LLVM_DEBUG(llvm::dbgs() << "LowerVSELECT: Emitted PPCISD::VSELECT for v2i64 AND/XOR pattern\n");
+  LLVM_DEBUG(NewVSelect.dump());
 
-  SDValue NewVselectNode = DAG.getNode(
-      ISD::VSELECT, SDLoc(Op), VT, Cond, NTrueOp, NFalseOp);
-  DAG.ReplaceAllUsesWith(Op, NewVselectNode);
-  // if (Op.getNode()->use_empty()) {
-  //   DAG.RemoveDeadNode(Op.getNode());
-  // }
+  LLVM_DEBUG(llvm::dbgs() << "\nLowerVSELECT NEW DAG\n"; DAG.dump());
+  DAG.RemoveDeadNode(Op.getNode());
+  LLVM_DEBUG(llvm::dbgs() << "\nLowerVSELECT NEW DAG After removal\n"; DAG.dump());
 
-  LLVM_DEBUG(NewVselectNode.dump());
-  LLVM_DEBUG(llvm::dbgs() << "New DAG \n");
-  LLVM_DEBUG(DAG.dump());
-  return  SDValue();
+  return NewVSelect;
 }
 
+// Lower the vector operands of the VSELECT Node 
+// The operands of the VSELECT nodes needs to modifed back if:
+// - the operands of the VSELECT are bitcast (op (bitcast x), (bitcast y)) 
+// - the outer bitcast is VT and inner bitcast is v4i32
+// - VSELECT Node type is not v4i32 and is of type v2i64
+// Then operands needs to put back to their original types.
+// SDValue PPCTargetLowering::LowerVSELECT(SDValue Op, SelectionDAG &DAG) const{
+//   LLVM_DEBUG(llvm::dbgs() << "LowerVECTOR_SELECT \n" );
+//   LLVM_DEBUG(DAG.dump(););
+//   // Return early if the VT of the Op is v4i32
+//   EVT VT = Op.getValueType();
+//   if (VT == MVT::v4i32) {
+//     LLVM_DEBUG(llvm::dbgs() << "LowerVECTOR_SELECT: VT is v4i32 \n");
+//     return SDValue(); // No need to lower, return original Op
+//   }
+//   LLVM_DEBUG(llvm::dbgs() << "LowerVECTOR_SELECT: VT is not v4i32 \n");
+//   // If the VT is v2i64, we need to check the conditions:
+//   // - the operands of the VSELECT are bitcast (op (bitcast x), (bitcast y)) 
+//   // - the outer bitcast is VT and inner bitcast is v4i32
+//   // - VSELECT Node type is not v4i32 and is of type v2i64
+//   SDValue Cond = Op.getOperand(0);
+//   SDValue TrueOp = Op.getOperand(1);
+//   SDValue FalseOp = Op.getOperand(2);
+
+//   auto checkIfValidPattern = [](SDValue V) -> bool {
+//     // Check if the operand is a bitcast
+//     if (V.getOpcode() != ISD::BITCAST) {
+//       return false; // Return false if not a bitcast
+//     }
+//     // Check if the inner node is a valid ADD, XOR or OR operation
+//     SDValue InnerOp = V.getOperand(0);
+//     // Check if the inner node is an ADD, XOR or OR operation
+//     bool isValidInnerNode =
+//         InnerOp.getOpcode() == ISD::AND || InnerOp.getOpcode() == ISD::XOR ||
+//         InnerOp.getOpcode() == ISD::OR;
+//     if (!isValidInnerNode) {
+//       return false; // Return false if the inner node is not valid
+//     }
+//     // Get the Bit Op node's Operands
+//     SDValue InnerBitOpOperand0 = InnerOp.getOperand(0);
+//     SDValue InnerBitOpOperand1 = InnerOp.getOperand(1);
+//     bool isValidInnerBitcasts =
+//         (InnerBitOpOperand0.getOpcode() == ISD::BITCAST &&
+//          InnerBitOpOperand0.getValueType() == MVT::v4i32) &&
+//         (InnerBitOpOperand1.getOpcode() == ISD::BITCAST &&
+//          InnerBitOpOperand1.getValueType() == MVT::v4i32);
+//     if (!isValidInnerBitcasts) {
+//       return false; // Return false if the inner bitcasts are not valid
+//     }
+//     // If all checks passed, return true
+//     return true;
+//   };
+
+//   auto getOriginalNode = [&DAG](SDValue V) -> SDValue {   
+//       SDValue InnerOp = V.getOperand(0);
+//       // Get the Bit Op node's Operands
+//       SDValue InnerBitOpOperand0 = InnerOp.getOperand(0);
+//       SDValue InnerBitOpOperand1 = InnerOp.getOperand(1);
+//       // Get the operands of the inner bit operation
+//       SDValue X = InnerBitOpOperand0.getOperand(0);
+//       SDValue Y = InnerBitOpOperand1.getOperand(0);
+//       return DAG.getNode(
+//           InnerOp.getOpcode(), SDLoc(V), V.getValueType(),
+//           X, Y);
+//   };
+
+//   if(checkIfValidPattern(TrueOp) && checkIfValidPattern(FalseOp)) {
+//     // If the TrueOp and FalseOp are valid patterns, get the original nodes
+//     // and return the VSELECT node with the original nodes.
+//     LLVM_DEBUG(llvm::dbgs() << "LowerVECTOR_SELECT: Valid pattern \n");
+//   } else {
+//     // If the TrueOp and FalseOp are not valid patterns, return the original Op
+//     // without modification.
+//     LLVM_DEBUG(llvm::dbgs() << "LowerVECTOR_SELECT: Invalid pattern \n");
+//     return SDValue();
+//   }
+
+//   // Get the original nodes from the TrueOp and FalseOp
+//   SDValue NTrueOp = getOriginalNode(TrueOp);
+//   SDValue NFalseOp = getOriginalNode(FalseOp);
+//   // LLVM_DEBUG(llvm::dbgs() << "LowerVECTOR_SELECT: NTrueOp: " << NTrueOp.getNode()->print(dbgs()) << "\n");
+//   // LLVM_DEBUG(llvm::dbgs() << "LowerVECTOR_SELECT: NFalseOp: " << NFalseOp.getNode()->dump() << "\n");
+
+//   // Ensure both NTrueOp and NFalseOp are valid before using them.
+//   if (!NTrueOp.getNode() || !NFalseOp.getNode()) {
+//     LLVM_DEBUG(llvm::dbgs() << "LowerVECTOR_SELECT: One or both original nodes are invalid, returning original Op\n");
+//     return SDValue();
+//   }
+//   EVT MaskVT = Cond.getValueType();
+//   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+//   EVT LegalMaskVT = TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), MaskVT);
+
+//   LLVM_DEBUG(llvm::dbgs() << "LowerVECTOR_SELECT: cond mask VT: " << MaskVT);
+//   LLVM_DEBUG(llvm::dbgs() << "LowerVECTOR_SELECT: cond Legal VT: " << LegalMaskVT << "\n");
+//   if (MaskVT != LegalMaskVT) {
+//     LLVM_DEBUG(llvm::dbgs() << "LowerVECTOR_SELECT: cond LEGALIZATION\n");
+//     // Bitcast or extend/truncate as needed
+//     Cond = DAG.getZExtOrTrunc(Cond, SDLoc(Op), LegalMaskVT);
+//   }
+
+//   SDValue NewVselectNode = DAG.getNode(
+//       ISD::VSELECT, SDLoc(Op), VT, Cond, NTrueOp, NFalseOp);
+//   DAG.ReplaceAllUsesWith(Op, NewVselectNode);
+//   // if (Op.getNode()->use_empty()) {
+//   //   DAG.RemoveDeadNode(Op.getNode());
+//   // }
+
+//   LLVM_DEBUG(NewVselectNode.dump());
+//   LLVM_DEBUG(llvm::dbgs() << "New DAG \n");
+//   LLVM_DEBUG(DAG.dump());
+//   return  SDValue();
+// }
+
 // If this is a case we can't handle, return null and let the default
 // expansion code take care of it.  If we CAN select this case, and if it
 // selects to a single instruction, return Op.  Otherwise, if we can codegen
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.h b/llvm/lib/Target/PowerPC/PPCISelLowering.h
index 7ad91dcdd21e9..2e4108e2fdf06 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.h
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.h
@@ -106,6 +106,10 @@ namespace llvm {
     ///
     XXSPLTI32DX,
 
+    /// VSELECT - The PPC vector select instruction.
+    ///
+    VSELECT,
+
     /// VECINSERT - The PPC vector insert instruction
     ///
     VECINSERT,
diff --git a/llvm/lib/Target/PowerPC/PPCInstrP10.td b/llvm/lib/Target/PowerPC/PPCInstrP10.td
index eb1d18e3a4e90..a5d1cf3da55cd 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrP10.td
+++ b/llvm/lib/Target/PowerPC/PPCInstrP10.td
@@ -2212,6 +2212,9 @@ multiclass XXEvalVSelectWithXB<ValueType vt, bits<8> baseImm>{
   def : XXEvalPattern<vt,
         (vselect vt:$vA, XXEvalBinaryPattern<vt, and>.opPat, vt:$vB), 
         baseImm>;
+  // def : XXEvalPattern<vt, 
+  //       (PPCvecselect vt:$vA, XXEvalBinaryPattern<vt, xor>.opPat, XXEvalBinaryPattern<vt, and>.opPat), 
+  //       baseImm>;
   // Ternary(A, nor(B,C), B) => imm: baseImm + 7 = 56
   def : XXEvalPattern<vt,
         (vselect vt:$vA, XXEvalBinaryPattern<vt, or, 1>.opPat, vt:$vB), 
@@ -2387,10 +2390,14 @@ let Predicates = [PrefixInstrs, HasP10Vector] in {
     defm : XXEvalVSelectWithXB<v4i32, 49>;
     defm : XXEvalVSelectWithXC<v4i32, 81>;
     defm : XXEvalVSelectWithXXor<v4i32, 97>;
-    defm : XXEvalVSelectWithXAnd<v2i64, 22>;
-    defm : XXEvalVSelectWithXB<v2i64, 49>;
-    defm : XXEvalVSelectWithXC<v2i64, 81>;
-    defm : XXEvalVSelectWithXXor<v2i64, 97>;
+    
+    def : XXEvalPattern<v2i64, 
+        (PPCvecselect v2i64:$vA, XXEvalBinaryPattern<v2i64, xor>.opPat, XXEvalBinaryPattern<v2i64, and>.opPat), 
+        22>;
+    // defm : XXEvalVSelectWithXAnd<v2i64, 22>;
+    // defm : XXEvalVSelectWithXB<v2i64, 49>;
+    // defm : XXEvalVSelectWithXC<v2i64, 81>;
+    // defm : XXEvalVSelectWithXXor<v2i64, 97>;
 
     // Anonymous patterns to select prefixed VSX loads and stores.
     // Load / Store f128
diff --git a/llvm/lib/Target/PowerPC/PPCInstrVSX.td b/llvm/lib/Target/PowerPC/PPCInstrVSX.td
index 19448210f5db1..5f5f7e7745f7e 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrVSX.td
+++ b/llvm/lib/Target/PowerPC/PPCInstrVSX.td
@@ -87,6 +87,12 @@ def SDT_PPCxxperm : SDTypeProfile<1, 3, [
   SDTCisVT<0, v2f64>, SDTCisVT<1, v2f64>,
   SDTCisVT<2, v2f64>, SDTCisVT<3, v4i32>]>;
 //--------------------------- Custom PPC nodes -------------------------------//
+def PPCvecselect : SDNode<"PPCISD::VSELECT", SDTypeProfile<1, 3, [
+  SDTCisVT<0, v2i64>, // result type
+  SDTCisVT<1, v2i64>, // condition type
+  SDTCisVT<2, v2i64>, // true value type
+  SDTCisVT<3, v2i64>  // false value type
+]>,[]>;
 def PPClxvd2x  : SDNode<"PPCISD::LXVD2X", SDT_PPClxvd2x,
                         [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;
 def PPCstxvd2x : SDNode<"PPCISD::STXVD2X", SDT_PPCstxvd2x,
diff --git a/llvm/test/CodeGen/PowerPC/xxeval-vselect-x-and.ll b/llvm/test/CodeGen/PowerPC/xxeval-vselect-x-and.ll
index dfb910b8d0c75..8e2c0e0493cad 100644
--- a/llvm/test/CodeGen/PowerPC/xxeval-vselect-x-and.ll
+++ b/llvm/test/CodeGen/PowerPC/xxeval-vselect-x-and.ll
@@ -4,27 +4,6 @@
 ; RUN: llc -verify-machineinstrs -mcpu=pwr10 -mtriple=powerpc64le-unknown-unknown \
 ; RUN:   -ppc-asm-full-reg-names --ppc-vsr-nums-as-vr < %s | FileCheck %s
 
-; RUN: llc -verify-machineinstrs -mcpu=pwr10 -mtriple=powerpc-ibm-aix-xcoff \
-; RUN:   -ppc-asm-full-reg-names --ppc-vsr-nums-as-vr < %s | FileCheck %s
-
-; RUN: llc -verify-machineinstrs -mcpu=pwr10 -mtriple=powerpc64-ibm-aix-xcoff \
-; RUN:   -ppc-asm-full-reg-names --ppc-vsr-nums-as-vr < %s | FileCheck %s
-
-; Function to test ternary(A, xor(B, C), and(B, C)) for <4 x i32>
-define <4 x i32> @ternary_A_xor_BC_and_BC_4x32(<4 x i1> %A, <4 x i32> %B, <4 x i32> %C) {
-; CHECK-LABEL: ternary_A_xor_BC_and_BC_4x32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    xxleqv v5, v5, v5
-; CHECK-NEXT:    vslw v2, v2, v5
-; CHECK-NEXT:    vsraw v2, v2, v5
-; CHECK-NEXT:    xxeval v2, v2, v3, v4, 22
-; CHECK-NEXT:    blr
-entry:
-  %xor = xor <4 x i32> %B, %C
-  %and = and <4 x i32> %B, %C
-  %res = select <4 x i1> %A, <4 x i32> %xor, <4 x i32> %and
-  ret <4 x i32> %res
-}
 
 ; Function to test ternary(A, xor(B, C), and(B, C)) for <2 x i64>
 define <2 x i64> @ternary_A_xor_BC_and_BC_2x64(<2 x i1> %A, <2 x i64> %B, <2 x i64> %C) {
@@ -45,146 +24,3 @@ entry:
   ret <2 x i64> %res
 }
 
-; Function to test ternary(A, nor(B, C), and(B, C)) for <4 x i32>
-define <4 x i32> @ternary_A_nor_BC_and_BC_4x32(<4 x i1> %A, <4 x i32> %B, <4 x i32> %C) {
-; CHECK-LABEL: ternary_A_nor_BC_and_BC_4x32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    xxleqv v5, v5, v5
-; CHECK-NEXT:    vslw v2, v2, v5
-; CHECK-NEXT:    vsraw v2, v2, v5
-; CHECK-NEXT:    xxeval v2, v2, v3, v4, 24
-; CHECK-NEXT:    blr
-entry:
-  %or = or <4 x i32> %B, %C
-  %nor = xor <4 x i32> %or, <i32 -1, i32 -1, i32 -1, i32 -1>  ; Vector NOR operation
-  %and = and <4 x i32> %B, %C
-  %res = select <4 x i1> %A, <4 x i32> %nor, <4 x i32> %and
-  ret <4 x i32> %res
-}
-
-; Function to test ternary(A, nor(B, C), and(B, C)) for <2 x i64>
-define <2 x i64> @ternary_A_nor_BC_and_BC_2x64(<2 x i1> %A, <2 x i64> %B, <2 x i64> %C) {
-; CHECK-LABEL: ternary_A_nor_BC_and_BC_2x64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    xxlxor v5, v5, v5
-; CHECK-NEXT:    xxlnor vs0, v3, v4
-; CHECK-NEXT:    xxland vs1, v3, v4
-; CHECK-NEXT:    xxsplti32dx v5, 1, 63
-; CHECK-NEXT:    vsld v2, v2, v5
-; CHECK-NEXT:    vsrad v2, v2, v5
-; CHECK-NEXT:    xxsel v2, vs1, vs0, v2
-; CHECK-NEXT:    blr
-entry:
-  %or = or <2 x i64> %B, %C
-  %nor = xor <2 x i64> %or, <i64 -1, i64 -1>  ; Vector NOR operation
-  %and = and <2 x i64> %B, %C
-  %res = select <2 x i1> %A, <2 x i64> %nor, <2 x i64> %and
-  ret <2 x i64> %res
-}
-
-; Function to test ternary(A, eqv(B, C), and(B, C)) for <4 x i32>
-define <4 x i32> @ternary_A_eqv_BC_and_BC_4x32(<4 x i1> %A, <4 x i32> %B, <4 x i32> %C) {
-; CHECK-LABEL: ternary_A_eqv_BC_and_BC_4x32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    xxleqv v5, v5, v5
-; CHECK-NEXT:    vslw v2, v2, v5
-; CHECK-NEXT:    vsraw v2, v2, v5
-; CHECK-NEXT:    xxeval v2, v2, v3, v4, 25
-; CHECK-NEXT:    blr
-entry:
-  %xor = xor <4 x i32> %B, %C
-  %eqv = xor <4 x i32> %xor, <i32 -1, i32 -1, i32 -1, i32 -1>  ; Vector eqv operation
-  %and = and <4 x i32> %B, %C
-  %res = select <4 x i1> %A, <4 x i32> %eqv, <4 x i32> %and
-  ret <4 x i32> %res
-}
-
-; Function to test ternary(A, eqv(B, C), and(B, C)) for <2 x i64>
-define <2 x i64> @ternary_A_eqv_BC_and_BC_2x64(<2 x i1> %A, <2 x i64> %B, <2 x i64> %C) {
-; CHECK-LABEL: ternary_A_eqv_BC_and_BC_2x64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    xxlxor v5, v5, v5
-; CHECK-NEXT:    xxleqv vs0, v3, v4
-; CHECK-NEXT:    xxland vs1, v3, v4
-; CHECK-NEXT:    xxsplti32dx v5, 1, 63
-; CHECK-NEXT:    vsld v2, v2, v5
-; CHECK-NEXT:    vsrad v2, v2, v5
-; CHECK-NEXT:    xxsel v2, vs1, vs0, v2
-; CHECK-NEXT:    blr
-entry:
-  %xor = xor <2 x i64> %B, %C
-  %eqv = xor <2 x i64> %xor, <i64 -1, i64 -1>  ; Vector eqv operation
-  %and = and <2 x i64> %B, %C
-  %res = select <2 x i1> %A, <2 x i64> %eqv, <2 x i64> %and
-  ret <2 x i64> %res
-}
-
-; Function to test ternary(A, not(C), and(B, C)) for <4 x i32>
-define <4 x i32> @ternary_A_not_C_and_BC_4x32(<4 x i1> %A, <4 x i32> %B, <4 x i32> %C) {
-; CHECK-LABEL: ternary_A_not_C_and_BC_4x32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    xxleqv v5, v5, v5
-; CHECK-NEXT:    vslw v2, v2, v5
-; CHECK-NEXT:    vsraw v2, v2, v5
-; CHECK-NEXT:    xxeval v2, v2, v3, v4, 26
-; CHECK-NEXT:    blr
-entry:
-  %not = xor <4 x i32> %C, <i32 -1, i32 -1, i32 -1, i32 -1>  ; Vector not operation
-  %and = and <4 x i32> %B, %C
-  %res = select <4 x i1> %A, <4 x i32> %not, <4 x i32> %and
-  ret <4 x i32> %res
-}
-
-; Function to test ternary(A, not(C), and(B, C)) for <2 x i64>
-define <2 x i64> @ternary_A_not_C_and_BC_2x64(<2 x i1> %A, <2 x i64> %B, <2 x i64> %C) {
-; CHECK-LABEL: ternary_A_not_C_and_BC_2x64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    xxlxor v5, v5, v5
-; CHECK-NEXT:    xxlnor vs0, v4, v4
-; CHECK-NEXT:    xxland vs1, v3, v4
-; CHECK-NEXT:    xxsplti32dx v5, 1, 63
-; CHECK-NEXT:    vsld v2, v2, v5
-; CHECK-NEXT:    vsrad v2, v2, v5
-; CHECK-NEXT:    xxsel v2, vs1, vs0, v2
-; CHECK-NEXT:    blr
-entry:
-  %not = xor <2 x i64> %C, <i64 -1, i64 -1>  ; Vector not operation
-  %and = and <2 x i64> %B, %C
-  %res = select <2 x i1> %A, <2 x i64> %not, <2 x i64> %and
-  ret <2 x i64> %res
-}
-
-; Function to test ternary(A, not(B), and(B, C)) for <4 x i32>
-define <4 x i32> @ternary_A_not_B_and_BC_4x32(<4 x i1> %A, <4 x i32> %B, <4 x i32> %C) {
-; CHECK-LABEL: ternary_A_not_B_and_BC_4x32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    xxleqv v5, v5, v5
-; CHECK-NEXT:    vslw v2, v2, v5
-; CHECK-NEXT:    vsraw v2, v2, v5
-; CHECK-NEXT:    xxeval v2, v2, v3, v4, 28
-; CHECK-NEXT:    blr
-entry:
-  %not = xor <4 x i32> %B, <i32 -1, i32 -1, i32 -1, i32 -1>  ; Vector not operation
-  %and = and <4 x i32> %B, %C
-  %res = select <4 x i1> %A, <4 x i32> %not, <4 x i32> %and
-  ret <4 x i32> %res
-}
-
-; Function to test ternary(A, not(B), and(B, C)) for <2 x i64>
-define <2 x i64> @ternary_A_not_B_and_BC_2x64(<2 x i1> %A, <2 x i64> %B, <2 x i64> %C) {
-; CHECK-LABEL: ternary_A_not_B_and_BC_2x64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    xxlxor v5, v5, v5
-; CHECK-NEXT:    xxlnor vs0, v3, v3
-; CHECK-NEXT:    xxland vs1, v3, v4
-; CHECK-NEXT:    xxsplti32dx v5, 1, 63
-; CHECK-NEXT:    vsld v2, v2, v5
-; CHECK-NEXT:    vsrad v2, v2, v5
-; CHECK-NEXT:    xxsel v2, vs1, vs0, v2
-; CHECK-NEXT:    blr
-entry:
-  %not = xor <2 x i64> %B, <i64 -1, i64 -1>  ; Vector not operation
-  %and = and <2 x i64> %B, %C
-  %res = select <2 x i1> %A, <2 x i64> %not, <2 x i64> %and
-  ret <2 x i64> %res
-}



More information about the llvm-commits mailing list