[llvm] [PPC] Fix suspicious AltiVec VAVG patterns (PR #176891)

Simon Pilgrim via llvm-commits llvm-commits at lists.llvm.org
Wed Jan 21 07:56:10 PST 2026


https://github.com/RKSimon updated https://github.com/llvm/llvm-project/pull/176891

>From 88d437e05e3dd3478bacdcf66b14e927b1929165 Mon Sep 17 00:00:00 2001
From: Simon Pilgrim <llvm-dev at redking.me.uk>
Date: Tue, 20 Jan 2026 10:23:08 +0000
Subject: [PATCH 1/2] [PPC] Fix suspicious AltiVec VAVG patterns

The existing ((X+Y+1)>>1) patterns didn't correct handle overflow, like the VAVG instructions would

Remove the old patterns and correctly mark the altivec VAVGS/VAVGU patterns as matching the ISD::AVGCEIL opcodes - we can leave the generic DAG folds to handle everything else

I've updated the vavg.ll tests to correct match ISD::AVGCEILS/U patterns and added the old tests as negative "overflow" patterns that shouldn't fold to VAVG instructions

Fixes #174718
---
 llvm/lib/Target/PowerPC/PPCISelLowering.cpp |   2 +
 llvm/lib/Target/PowerPC/PPCInstrAltivec.td  |  24 +-
 llvm/test/CodeGen/PowerPC/vavg.ll           | 275 ++++++++++++--------
 3 files changed, 183 insertions(+), 118 deletions(-)

diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index 641b7804097f8..1d41e59d6d4f7 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -804,6 +804,8 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
 
   if (Subtarget.hasAltivec()) {
     for (MVT VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32 }) {
+      setOperationAction(ISD::AVGCEILS, VT, Legal);
+      setOperationAction(ISD::AVGCEILU, VT, Legal);
       setOperationAction(ISD::SADDSAT, VT, Legal);
       setOperationAction(ISD::SSUBSAT, VT, Legal);
       setOperationAction(ISD::UADDSAT, VT, Legal);
diff --git a/llvm/lib/Target/PowerPC/PPCInstrAltivec.td b/llvm/lib/Target/PowerPC/PPCInstrAltivec.td
index fe1eea2b33615..5ee97326715c5 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrAltivec.td
+++ b/llvm/lib/Target/PowerPC/PPCInstrAltivec.td
@@ -1168,18 +1168,18 @@ def : Pat<(v1i128 (vselect v1i128:$vA, v1i128:$vB, v1i128:$vC)),
           (VSEL $vC, $vB, $vA)>;
 
 // Vector Integer Average Instructions
-def : Pat<(v4i32 (sra (sub v4i32:$vA, (vnot v4i32:$vB)),
-          (v4i32 (immEQOneV)))), (v4i32 (VAVGSW $vA, $vB))>;
-def : Pat<(v8i16 (sra (sub v8i16:$vA, (v8i16 (bitconvert(vnot v4i32:$vB)))),
-          (v8i16 (immEQOneV)))), (v8i16 (VAVGSH $vA, $vB))>;
-def : Pat<(v16i8 (sra (sub v16i8:$vA, (v16i8 (bitconvert(vnot v4i32:$vB)))),
-          (v16i8 (immEQOneV)))), (v16i8 (VAVGSB $vA, $vB))>;
-def : Pat<(v4i32 (srl (sub v4i32:$vA, (vnot v4i32:$vB)),
-          (v4i32 (immEQOneV)))), (v4i32 (VAVGUW $vA, $vB))>;
-def : Pat<(v8i16 (srl (sub v8i16:$vA, (v8i16 (bitconvert(vnot v4i32:$vB)))),
-          (v8i16 (immEQOneV)))), (v8i16 (VAVGUH $vA, $vB))>;
-def : Pat<(v16i8 (srl (sub v16i8:$vA, (v16i8 (bitconvert(vnot v4i32:$vB)))),
-          (v16i8 (immEQOneV)))), (v16i8 (VAVGUB $vA, $vB))>;
+def : Pat<(v4i32 (avgceils v4i32:$src1, v4i32:$src2)),
+          (v4i32 (VAVGSW $src1, $src2))>;
+def : Pat<(v8i16 (avgceils v8i16:$src1, v8i16:$src2)),
+          (v8i16 (VAVGSH $src1, $src2))>;
+def : Pat<(v16i8 (avgceils v16i8:$src1, v16i8:$src2)),
+          (v16i8 (VAVGSB $src1, $src2))>;
+def : Pat<(v4i32 (avgceilu v4i32:$src1, v4i32:$src2)),
+          (v4i32 (VAVGUW $src1, $src2))>;
+def : Pat<(v8i16 (avgceilu v8i16:$src1, v8i16:$src2)),
+          (v8i16 (VAVGUH $src1, $src2))>;
+def : Pat<(v16i8 (avgceilu v16i8:$src1, v16i8:$src2)),
+          (v16i8 (VAVGUB $src1, $src2))>;
 
 def : Pat<(v16i8 (shl v16i8:$vA, (v16i8 (immEQOneV)))),
           (v16i8 (VADDUBM $vA, $vA))>;
diff --git a/llvm/test/CodeGen/PowerPC/vavg.ll b/llvm/test/CodeGen/PowerPC/vavg.ll
index 31a876116523b..bc6fa43dfbf7a 100644
--- a/llvm/test/CodeGen/PowerPC/vavg.ll
+++ b/llvm/test/CodeGen/PowerPC/vavg.ll
@@ -2,136 +2,154 @@
 ; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr9 < %s | FileCheck -check-prefix=CHECK-P9 %s
 ; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr8 < %s | FileCheck -check-prefix=CHECK-P8 %s
 ; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr7 < %s | FileCheck -check-prefix=CHECK-P7 %s
+
 define <8 x i16> @test_v8i16(<8 x i16> %m, <8 x i16> %n) {
 ; CHECK-P9-LABEL: test_v8i16:
 ; CHECK-P9:       # %bb.0: # %entry
-; CHECK-P9-NEXT:    vavguh 2, 3, 2
+; CHECK-P9-NEXT:    vavguh 2, 2, 3
 ; CHECK-P9-NEXT:    blr
 ;
 ; CHECK-P8-LABEL: test_v8i16:
 ; CHECK-P8:       # %bb.0: # %entry
-; CHECK-P8-NEXT:    vavguh 2, 3, 2
+; CHECK-P8-NEXT:    vavguh 2, 2, 3
 ; CHECK-P8-NEXT:    blr
 ;
 ; CHECK-P7-LABEL: test_v8i16:
 ; CHECK-P7:       # %bb.0: # %entry
-; CHECK-P7-NEXT:    vavguh 2, 3, 2
+; CHECK-P7-NEXT:    vavguh 2, 2, 3
 ; CHECK-P7-NEXT:    blr
 entry:
-  %add = add <8 x i16> %m, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
-  %add1 = add <8 x i16> %add, %n
-  %shr = lshr <8 x i16> %add1, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
-  ret <8 x i16> %shr
+  %xm = zext <8 x i16> %m to <8 x i17>
+  %xn = zext <8 x i16> %n to <8 x i17>
+  %add = add <8 x i17> %xm, <i17 1, i17 1, i17 1, i17 1, i17 1, i17 1, i17 1, i17 1>
+  %add1 = add <8 x i17> %add, %xn
+  %shr = lshr <8 x i17> %add1, <i17 1, i17 1, i17 1, i17 1, i17 1, i17 1, i17 1, i17 1>
+  %tr = trunc <8 x i17> %shr to <8 x i16>
+  ret <8 x i16> %tr
 }
 
 define <8 x i16> @test_v8i16_sign(<8 x i16> %m, <8 x i16> %n) {
 ; CHECK-P9-LABEL: test_v8i16_sign:
 ; CHECK-P9:       # %bb.0: # %entry
-; CHECK-P9-NEXT:    vavgsh 2, 3, 2
+; CHECK-P9-NEXT:    vavgsh 2, 2, 3
 ; CHECK-P9-NEXT:    blr
 ;
 ; CHECK-P8-LABEL: test_v8i16_sign:
 ; CHECK-P8:       # %bb.0: # %entry
-; CHECK-P8-NEXT:    vavgsh 2, 3, 2
+; CHECK-P8-NEXT:    vavgsh 2, 2, 3
 ; CHECK-P8-NEXT:    blr
 ;
 ; CHECK-P7-LABEL: test_v8i16_sign:
 ; CHECK-P7:       # %bb.0: # %entry
-; CHECK-P7-NEXT:    vavgsh 2, 3, 2
+; CHECK-P7-NEXT:    vavgsh 2, 2, 3
 ; CHECK-P7-NEXT:    blr
 entry:
-  %add = add <8 x i16> %m, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
-  %add1 = add <8 x i16> %add, %n
-  %shr = ashr <8 x i16> %add1, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
-  ret <8 x i16> %shr
+  %xm = sext <8 x i16> %m to <8 x i17>
+  %xn = sext <8 x i16> %n to <8 x i17>
+  %add = add <8 x i17> %xm, <i17 1, i17 1, i17 1, i17 1, i17 1, i17 1, i17 1, i17 1>
+  %add1 = add <8 x i17> %add, %xn
+  %shr = ashr <8 x i17> %add1, <i17 1, i17 1, i17 1, i17 1, i17 1, i17 1, i17 1, i17 1>
+  %tr = trunc <8 x i17> %shr to <8 x i16>
+  ret <8 x i16> %tr
 }
 
 define <4 x i32> @test_v4i32(<4 x i32> %m, <4 x i32> %n) {
 ; CHECK-P9-LABEL: test_v4i32:
 ; CHECK-P9:       # %bb.0: # %entry
-; CHECK-P9-NEXT:    vavguw 2, 3, 2
+; CHECK-P9-NEXT:    vavguw 2, 2, 3
 ; CHECK-P9-NEXT:    blr
 ;
 ; CHECK-P8-LABEL: test_v4i32:
 ; CHECK-P8:       # %bb.0: # %entry
-; CHECK-P8-NEXT:    vavguw 2, 3, 2
+; CHECK-P8-NEXT:    vavguw 2, 2, 3
 ; CHECK-P8-NEXT:    blr
 ;
 ; CHECK-P7-LABEL: test_v4i32:
 ; CHECK-P7:       # %bb.0: # %entry
-; CHECK-P7-NEXT:    vavguw 2, 3, 2
+; CHECK-P7-NEXT:    vavguw 2, 2, 3
 ; CHECK-P7-NEXT:    blr
 entry:
-  %add = add <4 x i32> %m, <i32 1, i32 1, i32 1, i32 1>
-  %add1 = add <4 x i32> %add, %n
-  %shr = lshr <4 x i32> %add1, <i32 1, i32 1, i32 1, i32 1>
-  ret <4 x i32> %shr
+  %xm = zext <4 x i32> %m to <4 x i33>
+  %xn = zext <4 x i32> %n to <4 x i33>
+  %add = add <4 x i33> %xm, <i33 1, i33 1, i33 1, i33 1>
+  %add1 = add <4 x i33> %add, %xn
+  %shr = lshr <4 x i33> %add1, <i33 1, i33 1, i33 1, i33 1>
+  %tr = trunc <4 x i33> %shr to <4 x i32>
+  ret <4 x i32> %tr
 }
 
 define <4 x i32> @test_v4i32_sign(<4 x i32> %m, <4 x i32> %n) {
 ; CHECK-P9-LABEL: test_v4i32_sign:
 ; CHECK-P9:       # %bb.0: # %entry
-; CHECK-P9-NEXT:    vavgsw 2, 3, 2
+; CHECK-P9-NEXT:    vavgsw 2, 2, 3
 ; CHECK-P9-NEXT:    blr
 ;
 ; CHECK-P8-LABEL: test_v4i32_sign:
 ; CHECK-P8:       # %bb.0: # %entry
-; CHECK-P8-NEXT:    vavgsw 2, 3, 2
+; CHECK-P8-NEXT:    vavgsw 2, 2, 3
 ; CHECK-P8-NEXT:    blr
 ;
 ; CHECK-P7-LABEL: test_v4i32_sign:
 ; CHECK-P7:       # %bb.0: # %entry
-; CHECK-P7-NEXT:    vavgsw 2, 3, 2
+; CHECK-P7-NEXT:    vavgsw 2, 2, 3
 ; CHECK-P7-NEXT:    blr
 entry:
-  %add = add <4 x i32> %m, <i32 1, i32 1, i32 1, i32 1>
-  %add1 = add <4 x i32> %add, %n
-  %shr = ashr <4 x i32> %add1, <i32 1, i32 1, i32 1, i32 1>
-  ret <4 x i32> %shr
+  %xm = sext <4 x i32> %m to <4 x i33>
+  %xn = sext <4 x i32> %n to <4 x i33>
+  %add = add <4 x i33> %xm, <i33 1, i33 1, i33 1, i33 1>
+  %add1 = add <4 x i33> %add, %xn
+  %shr = ashr <4 x i33> %add1, <i33 1, i33 1, i33 1, i33 1>
+  %tr = trunc <4 x i33> %shr to <4 x i32>
+  ret <4 x i32> %tr
 }
 
 define <16 x i8> @test_v16i8(<16 x i8> %m, <16 x i8> %n) {
 ; CHECK-P9-LABEL: test_v16i8:
 ; CHECK-P9:       # %bb.0: # %entry
-; CHECK-P9-NEXT:    vavgub 2, 3, 2
+; CHECK-P9-NEXT:    vavgub 2, 2, 3
 ; CHECK-P9-NEXT:    blr
 ;
 ; CHECK-P8-LABEL: test_v16i8:
 ; CHECK-P8:       # %bb.0: # %entry
-; CHECK-P8-NEXT:    vavgub 2, 3, 2
+; CHECK-P8-NEXT:    vavgub 2, 2, 3
 ; CHECK-P8-NEXT:    blr
 ;
 ; CHECK-P7-LABEL: test_v16i8:
 ; CHECK-P7:       # %bb.0: # %entry
-; CHECK-P7-NEXT:    vavgub 2, 3, 2
+; CHECK-P7-NEXT:    vavgub 2, 2, 3
 ; CHECK-P7-NEXT:    blr
 entry:
-  %add = add <16 x i8> %m, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
-  %add1 = add <16 x i8> %add, %n
-  %shr = lshr <16 x i8> %add1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
-  ret <16 x i8> %shr
+  %xm = zext <16 x i8> %m to <16 x i9>
+  %xn = zext <16 x i8> %n to <16 x i9>
+  %add = add <16 x i9> %xm, <i9 1, i9 1, i9 1, i9 1, i9 1, i9 1, i9 1, i9 1, i9 1, i9 1, i9 1, i9 1, i9 1, i9 1, i9 1, i9 1>
+  %add1 = add <16 x i9> %add, %xn
+  %shr = lshr <16 x i9> %add1, <i9 1, i9 1, i9 1, i9 1, i9 1, i9 1, i9 1, i9 1, i9 1, i9 1, i9 1, i9 1, i9 1, i9 1, i9 1, i9 1>
+  %tr = trunc <16 x i9> %shr to <16 x i8>
+  ret <16 x i8> %tr
 }
 
 define <16 x i8> @test_v16i8_sign(<16 x i8> %m, <16 x i8> %n) {
 ; CHECK-P9-LABEL: test_v16i8_sign:
-; CHECK-P9:       # %bb.0: # %entry
-; CHECK-P9-NEXT:    vavgsb 2, 3, 2
+; CHECK-P9:       # %bb.0:
+; CHECK-P9-NEXT:    vavgsb 2, 2, 3
 ; CHECK-P9-NEXT:    blr
 ;
 ; CHECK-P8-LABEL: test_v16i8_sign:
-; CHECK-P8:       # %bb.0: # %entry
-; CHECK-P8-NEXT:    vavgsb 2, 3, 2
+; CHECK-P8:       # %bb.0:
+; CHECK-P8-NEXT:    vavgsb 2, 2, 3
 ; CHECK-P8-NEXT:    blr
 ;
 ; CHECK-P7-LABEL: test_v16i8_sign:
-; CHECK-P7:       # %bb.0: # %entry
-; CHECK-P7-NEXT:    vavgsb 2, 3, 2
+; CHECK-P7:       # %bb.0:
+; CHECK-P7-NEXT:    vavgsb 2, 2, 3
 ; CHECK-P7-NEXT:    blr
-entry:
-  %add = add <16 x i8> %m, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
-  %add1 = add <16 x i8> %add, %n
-  %shr = ashr <16 x i8> %add1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
-  ret <16 x i8> %shr
+  %xm = sext <16 x i8> %m to <16 x i9>
+  %xn = sext <16 x i8> %n to <16 x i9>
+  %add = add <16 x i9> %xm, <i9 1, i9 1, i9 1, i9 1, i9 1, i9 1, i9 1, i9 1, i9 1, i9 1, i9 1, i9 1, i9 1, i9 1, i9 1, i9 1>
+  %add1 = add <16 x i9> %add, %xn
+  %shr = ashr <16 x i9> %add1, <i9 1, i9 1, i9 1, i9 1, i9 1, i9 1, i9 1, i9 1, i9 1, i9 1, i9 1, i9 1, i9 1, i9 1, i9 1, i9 1>
+  %tr = trunc <16 x i9> %shr to <16 x i8>
+  ret <16 x i8> %tr
 }
 
 define <8 x i16> @test_v8i16_sign_negative(<8 x i16> %m, <8 x i16> %n) {
@@ -241,32 +259,113 @@ entry:
   ret <4 x i32> %shr
 }
 
+define <8 x i16> @test_v8i16_overflow(<8 x i16> %m, <8 x i16> %n) {
+; CHECK-P9-LABEL: test_v8i16_overflow:
+; CHECK-P9:       # %bb.0: # %entry
+; CHECK-P9-NEXT:    xxlnor 34, 34, 34
+; CHECK-P9-NEXT:    vspltish 4, 1
+; CHECK-P9-NEXT:    vsubuhm 2, 3, 2
+; CHECK-P9-NEXT:    vsrh 2, 2, 4
+; CHECK-P9-NEXT:    blr
+;
+; CHECK-P8-LABEL: test_v8i16_overflow:
+; CHECK-P8:       # %bb.0: # %entry
+; CHECK-P8-NEXT:    xxlnor 34, 34, 34
+; CHECK-P8-NEXT:    vspltish 4, 1
+; CHECK-P8-NEXT:    vsubuhm 2, 3, 2
+; CHECK-P8-NEXT:    vsrh 2, 2, 4
+; CHECK-P8-NEXT:    blr
+;
+; CHECK-P7-LABEL: test_v8i16_overflow:
+; CHECK-P7:       # %bb.0: # %entry
+; CHECK-P7-NEXT:    xxlnor 34, 34, 34
+; CHECK-P7-NEXT:    vspltish 4, 1
+; CHECK-P7-NEXT:    vsubuhm 2, 3, 2
+; CHECK-P7-NEXT:    vsrh 2, 2, 4
+; CHECK-P7-NEXT:    blr
+entry:
+  %add = add <8 x i16> %m, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
+  %add1 = add <8 x i16> %add, %n
+  %shr = lshr <8 x i16> %add1, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
+  ret <8 x i16> %shr
+}
+
+define <4 x i32> @test_v4i32_overflow(<4 x i32> %m, <4 x i32> %n) {
+; CHECK-P9-LABEL: test_v4i32_overflow:
+; CHECK-P9:       # %bb.0: # %entry
+; CHECK-P9-NEXT:    xxlnor 34, 34, 34
+; CHECK-P9-NEXT:    vspltisw 4, 1
+; CHECK-P9-NEXT:    vsubuwm 2, 3, 2
+; CHECK-P9-NEXT:    vsrw 2, 2, 4
+; CHECK-P9-NEXT:    blr
+;
+; CHECK-P8-LABEL: test_v4i32_overflow:
+; CHECK-P8:       # %bb.0: # %entry
+; CHECK-P8-NEXT:    xxlnor 34, 34, 34
+; CHECK-P8-NEXT:    vspltisw 4, 1
+; CHECK-P8-NEXT:    vsubuwm 2, 3, 2
+; CHECK-P8-NEXT:    vsrw 2, 2, 4
+; CHECK-P8-NEXT:    blr
+;
+; CHECK-P7-LABEL: test_v4i32_overflow:
+; CHECK-P7:       # %bb.0: # %entry
+; CHECK-P7-NEXT:    xxlnor 34, 34, 34
+; CHECK-P7-NEXT:    vspltisw 4, 1
+; CHECK-P7-NEXT:    vsubuwm 2, 3, 2
+; CHECK-P7-NEXT:    vsrw 2, 2, 4
+; CHECK-P7-NEXT:    blr
+entry:
+  %add = add <4 x i32> %m, <i32 1, i32 1, i32 1, i32 1>
+  %add1 = add <4 x i32> %add, %n
+  %shr = lshr <4 x i32> %add1, <i32 1, i32 1, i32 1, i32 1>
+  ret <4 x i32> %shr
+}
+
+define <16 x i8> @test_v16i8_overflow(<16 x i8> %m, <16 x i8> %n) {
+; CHECK-P9-LABEL: test_v16i8_overflow:
+; CHECK-P9:       # %bb.0: # %entry
+; CHECK-P9-NEXT:    xxlnor 34, 34, 34
+; CHECK-P9-NEXT:    xxspltib 36, 1
+; CHECK-P9-NEXT:    vsububm 2, 3, 2
+; CHECK-P9-NEXT:    vsrb 2, 2, 4
+; CHECK-P9-NEXT:    blr
+;
+; CHECK-P8-LABEL: test_v16i8_overflow:
+; CHECK-P8:       # %bb.0: # %entry
+; CHECK-P8-NEXT:    xxlnor 34, 34, 34
+; CHECK-P8-NEXT:    vspltisb 4, 1
+; CHECK-P8-NEXT:    vsububm 2, 3, 2
+; CHECK-P8-NEXT:    vsrb 2, 2, 4
+; CHECK-P8-NEXT:    blr
+;
+; CHECK-P7-LABEL: test_v16i8_overflow:
+; CHECK-P7:       # %bb.0: # %entry
+; CHECK-P7-NEXT:    xxlnor 34, 34, 34
+; CHECK-P7-NEXT:    vspltisb 4, 1
+; CHECK-P7-NEXT:    vsububm 2, 3, 2
+; CHECK-P7-NEXT:    vsrb 2, 2, 4
+; CHECK-P7-NEXT:    blr
+entry:
+  %add = add <16 x i8> %m, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
+  %add1 = add <16 x i8> %add, %n
+  %shr = lshr <16 x i8> %add1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
+  ret <16 x i8> %shr
+}
+
 define <16 x i8> @test_avgceilu_v16i8(<16 x i8> %a, <16 x i8> %b) {
 ; CHECK-P9-LABEL: test_avgceilu_v16i8:
 ; CHECK-P9:       # %bb.0:
-; CHECK-P9-NEXT:    xxlor 36, 35, 34
-; CHECK-P9-NEXT:    xxlxor 34, 35, 34
-; CHECK-P9-NEXT:    xxspltib 35, 1
-; CHECK-P9-NEXT:    vsrb 2, 2, 3
-; CHECK-P9-NEXT:    vsububm 2, 4, 2
+; CHECK-P9-NEXT:    vavgub 2, 3, 2
 ; CHECK-P9-NEXT:    blr
 ;
 ; CHECK-P8-LABEL: test_avgceilu_v16i8:
 ; CHECK-P8:       # %bb.0:
-; CHECK-P8-NEXT:    vspltisb 4, 1
-; CHECK-P8-NEXT:    xxlor 37, 35, 34
-; CHECK-P8-NEXT:    xxlxor 34, 35, 34
-; CHECK-P8-NEXT:    vsrb 2, 2, 4
-; CHECK-P8-NEXT:    vsububm 2, 5, 2
+; CHECK-P8-NEXT:    vavgub 2, 3, 2
 ; CHECK-P8-NEXT:    blr
 ;
 ; CHECK-P7-LABEL: test_avgceilu_v16i8:
 ; CHECK-P7:       # %bb.0:
-; CHECK-P7-NEXT:    vspltisb 4, 1
-; CHECK-P7-NEXT:    xxlor 37, 35, 34
-; CHECK-P7-NEXT:    xxlxor 34, 35, 34
-; CHECK-P7-NEXT:    vsrb 2, 2, 4
-; CHECK-P7-NEXT:    vsububm 2, 5, 2
+; CHECK-P7-NEXT:    vavgub 2, 3, 2
 ; CHECK-P7-NEXT:    blr
   %a_shr_1 = lshr <16 x i8> %a, splat (i8 1)
   %b_shr_1 = lshr <16 x i8> %b, splat (i8 1)
@@ -280,29 +379,17 @@ define <16 x i8> @test_avgceilu_v16i8(<16 x i8> %a, <16 x i8> %b) {
 define <8 x i16> @test_avgceilu_v8i16(<8 x i16> %a, <8 x i16> %b) {
 ; CHECK-P9-LABEL: test_avgceilu_v8i16:
 ; CHECK-P9:       # %bb.0:
-; CHECK-P9-NEXT:    xxlor 36, 35, 34
-; CHECK-P9-NEXT:    xxlxor 34, 35, 34
-; CHECK-P9-NEXT:    vspltish 3, 1
-; CHECK-P9-NEXT:    vsrh 2, 2, 3
-; CHECK-P9-NEXT:    vsubuhm 2, 4, 2
+; CHECK-P9-NEXT:    vavguh 2, 3, 2
 ; CHECK-P9-NEXT:    blr
 ;
 ; CHECK-P8-LABEL: test_avgceilu_v8i16:
 ; CHECK-P8:       # %bb.0:
-; CHECK-P8-NEXT:    vspltish 4, 1
-; CHECK-P8-NEXT:    xxlor 37, 35, 34
-; CHECK-P8-NEXT:    xxlxor 34, 35, 34
-; CHECK-P8-NEXT:    vsrh 2, 2, 4
-; CHECK-P8-NEXT:    vsubuhm 2, 5, 2
+; CHECK-P8-NEXT:    vavguh 2, 3, 2
 ; CHECK-P8-NEXT:    blr
 ;
 ; CHECK-P7-LABEL: test_avgceilu_v8i16:
 ; CHECK-P7:       # %bb.0:
-; CHECK-P7-NEXT:    vspltish 4, 1
-; CHECK-P7-NEXT:    xxlor 37, 35, 34
-; CHECK-P7-NEXT:    xxlxor 34, 35, 34
-; CHECK-P7-NEXT:    vsrh 2, 2, 4
-; CHECK-P7-NEXT:    vsubuhm 2, 5, 2
+; CHECK-P7-NEXT:    vavguh 2, 3, 2
 ; CHECK-P7-NEXT:    blr
   %a_shr_1 = lshr <8 x i16> %a, splat (i16 1)
   %b_shr_1 = lshr <8 x i16> %b, splat (i16 1)
@@ -316,29 +403,17 @@ define <8 x i16> @test_avgceilu_v8i16(<8 x i16> %a, <8 x i16> %b) {
 define <16 x i8> @test_avgceils_v16i8(<16 x i8> %a, <16 x i8> %b) {
 ; CHECK-P9-LABEL: test_avgceils_v16i8:
 ; CHECK-P9:       # %bb.0:
-; CHECK-P9-NEXT:    xxlor 36, 35, 34
-; CHECK-P9-NEXT:    xxlxor 34, 35, 34
-; CHECK-P9-NEXT:    xxspltib 35, 1
-; CHECK-P9-NEXT:    vsrab 2, 2, 3
-; CHECK-P9-NEXT:    vsububm 2, 4, 2
+; CHECK-P9-NEXT:    vavgsb 2, 3, 2
 ; CHECK-P9-NEXT:    blr
 ;
 ; CHECK-P8-LABEL: test_avgceils_v16i8:
 ; CHECK-P8:       # %bb.0:
-; CHECK-P8-NEXT:    vspltisb 4, 1
-; CHECK-P8-NEXT:    xxlor 37, 35, 34
-; CHECK-P8-NEXT:    xxlxor 34, 35, 34
-; CHECK-P8-NEXT:    vsrab 2, 2, 4
-; CHECK-P8-NEXT:    vsububm 2, 5, 2
+; CHECK-P8-NEXT:    vavgsb 2, 3, 2
 ; CHECK-P8-NEXT:    blr
 ;
 ; CHECK-P7-LABEL: test_avgceils_v16i8:
 ; CHECK-P7:       # %bb.0:
-; CHECK-P7-NEXT:    vspltisb 4, 1
-; CHECK-P7-NEXT:    xxlor 37, 35, 34
-; CHECK-P7-NEXT:    xxlxor 34, 35, 34
-; CHECK-P7-NEXT:    vsrab 2, 2, 4
-; CHECK-P7-NEXT:    vsububm 2, 5, 2
+; CHECK-P7-NEXT:    vavgsb 2, 3, 2
 ; CHECK-P7-NEXT:    blr
   %a_shr_1 = ashr <16 x i8> %a, splat (i8 1)
   %b_shr_1 = ashr <16 x i8> %b, splat (i8 1)
@@ -352,29 +427,17 @@ define <16 x i8> @test_avgceils_v16i8(<16 x i8> %a, <16 x i8> %b) {
 define <8 x i16> @test_avgceils_v8i16(<8 x i16> %a, <8 x i16> %b) {
 ; CHECK-P9-LABEL: test_avgceils_v8i16:
 ; CHECK-P9:       # %bb.0:
-; CHECK-P9-NEXT:    xxlor 36, 35, 34
-; CHECK-P9-NEXT:    xxlxor 34, 35, 34
-; CHECK-P9-NEXT:    vspltish 3, 1
-; CHECK-P9-NEXT:    vsrah 2, 2, 3
-; CHECK-P9-NEXT:    vsubuhm 2, 4, 2
+; CHECK-P9-NEXT:    vavgsh 2, 3, 2
 ; CHECK-P9-NEXT:    blr
 ;
 ; CHECK-P8-LABEL: test_avgceils_v8i16:
 ; CHECK-P8:       # %bb.0:
-; CHECK-P8-NEXT:    vspltish 4, 1
-; CHECK-P8-NEXT:    xxlor 37, 35, 34
-; CHECK-P8-NEXT:    xxlxor 34, 35, 34
-; CHECK-P8-NEXT:    vsrah 2, 2, 4
-; CHECK-P8-NEXT:    vsubuhm 2, 5, 2
+; CHECK-P8-NEXT:    vavgsh 2, 3, 2
 ; CHECK-P8-NEXT:    blr
 ;
 ; CHECK-P7-LABEL: test_avgceils_v8i16:
 ; CHECK-P7:       # %bb.0:
-; CHECK-P7-NEXT:    vspltish 4, 1
-; CHECK-P7-NEXT:    xxlor 37, 35, 34
-; CHECK-P7-NEXT:    xxlxor 34, 35, 34
-; CHECK-P7-NEXT:    vsrah 2, 2, 4
-; CHECK-P7-NEXT:    vsubuhm 2, 5, 2
+; CHECK-P7-NEXT:    vavgsh 2, 3, 2
 ; CHECK-P7-NEXT:    blr
   %a_shr_1 = ashr <8 x i16> %a, splat (i16 1)
   %b_shr_1 = ashr <8 x i16> %b, splat (i16 1)

>From e4a0366a7a9b956385aa67c2da0f08eccd9827f2 Mon Sep 17 00:00:00 2001
From: Simon Pilgrim <llvm-dev at redking.me.uk>
Date: Wed, 21 Jan 2026 15:55:53 +0000
Subject: [PATCH 2/2] Bump width

---
 llvm/test/CodeGen/PowerPC/vavg.ll | 72 +++++++++++++++----------------
 1 file changed, 36 insertions(+), 36 deletions(-)

diff --git a/llvm/test/CodeGen/PowerPC/vavg.ll b/llvm/test/CodeGen/PowerPC/vavg.ll
index bc6fa43dfbf7a..e473cbcd3f281 100644
--- a/llvm/test/CodeGen/PowerPC/vavg.ll
+++ b/llvm/test/CodeGen/PowerPC/vavg.ll
@@ -19,12 +19,12 @@ define <8 x i16> @test_v8i16(<8 x i16> %m, <8 x i16> %n) {
 ; CHECK-P7-NEXT:    vavguh 2, 2, 3
 ; CHECK-P7-NEXT:    blr
 entry:
-  %xm = zext <8 x i16> %m to <8 x i17>
-  %xn = zext <8 x i16> %n to <8 x i17>
-  %add = add <8 x i17> %xm, <i17 1, i17 1, i17 1, i17 1, i17 1, i17 1, i17 1, i17 1>
-  %add1 = add <8 x i17> %add, %xn
-  %shr = lshr <8 x i17> %add1, <i17 1, i17 1, i17 1, i17 1, i17 1, i17 1, i17 1, i17 1>
-  %tr = trunc <8 x i17> %shr to <8 x i16>
+  %xm = zext <8 x i16> %m to <8 x i18>
+  %xn = zext <8 x i16> %n to <8 x i18>
+  %add = add <8 x i18> %xm, <i18 1, i18 1, i18 1, i18 1, i18 1, i18 1, i18 1, i18 1>
+  %add1 = add <8 x i18> %add, %xn
+  %shr = lshr <8 x i18> %add1, <i18 1, i18 1, i18 1, i18 1, i18 1, i18 1, i18 1, i18 1>
+  %tr = trunc <8 x i18> %shr to <8 x i16>
   ret <8 x i16> %tr
 }
 
@@ -44,12 +44,12 @@ define <8 x i16> @test_v8i16_sign(<8 x i16> %m, <8 x i16> %n) {
 ; CHECK-P7-NEXT:    vavgsh 2, 2, 3
 ; CHECK-P7-NEXT:    blr
 entry:
-  %xm = sext <8 x i16> %m to <8 x i17>
-  %xn = sext <8 x i16> %n to <8 x i17>
-  %add = add <8 x i17> %xm, <i17 1, i17 1, i17 1, i17 1, i17 1, i17 1, i17 1, i17 1>
-  %add1 = add <8 x i17> %add, %xn
-  %shr = ashr <8 x i17> %add1, <i17 1, i17 1, i17 1, i17 1, i17 1, i17 1, i17 1, i17 1>
-  %tr = trunc <8 x i17> %shr to <8 x i16>
+  %xm = sext <8 x i16> %m to <8 x i18>
+  %xn = sext <8 x i16> %n to <8 x i18>
+  %add = add <8 x i18> %xm, <i18 1, i18 1, i18 1, i18 1, i18 1, i18 1, i18 1, i18 1>
+  %add1 = add <8 x i18> %add, %xn
+  %shr = ashr <8 x i18> %add1, <i18 1, i18 1, i18 1, i18 1, i18 1, i18 1, i18 1, i18 1>
+  %tr = trunc <8 x i18> %shr to <8 x i16>
   ret <8 x i16> %tr
 }
 
@@ -69,12 +69,12 @@ define <4 x i32> @test_v4i32(<4 x i32> %m, <4 x i32> %n) {
 ; CHECK-P7-NEXT:    vavguw 2, 2, 3
 ; CHECK-P7-NEXT:    blr
 entry:
-  %xm = zext <4 x i32> %m to <4 x i33>
-  %xn = zext <4 x i32> %n to <4 x i33>
-  %add = add <4 x i33> %xm, <i33 1, i33 1, i33 1, i33 1>
-  %add1 = add <4 x i33> %add, %xn
-  %shr = lshr <4 x i33> %add1, <i33 1, i33 1, i33 1, i33 1>
-  %tr = trunc <4 x i33> %shr to <4 x i32>
+  %xm = zext <4 x i32> %m to <4 x i34>
+  %xn = zext <4 x i32> %n to <4 x i34>
+  %add = add <4 x i34> %xm, <i34 1, i34 1, i34 1, i34 1>
+  %add1 = add <4 x i34> %add, %xn
+  %shr = lshr <4 x i34> %add1, <i34 1, i34 1, i34 1, i34 1>
+  %tr = trunc <4 x i34> %shr to <4 x i32>
   ret <4 x i32> %tr
 }
 
@@ -94,12 +94,12 @@ define <4 x i32> @test_v4i32_sign(<4 x i32> %m, <4 x i32> %n) {
 ; CHECK-P7-NEXT:    vavgsw 2, 2, 3
 ; CHECK-P7-NEXT:    blr
 entry:
-  %xm = sext <4 x i32> %m to <4 x i33>
-  %xn = sext <4 x i32> %n to <4 x i33>
-  %add = add <4 x i33> %xm, <i33 1, i33 1, i33 1, i33 1>
-  %add1 = add <4 x i33> %add, %xn
-  %shr = ashr <4 x i33> %add1, <i33 1, i33 1, i33 1, i33 1>
-  %tr = trunc <4 x i33> %shr to <4 x i32>
+  %xm = sext <4 x i32> %m to <4 x i34>
+  %xn = sext <4 x i32> %n to <4 x i34>
+  %add = add <4 x i34> %xm, <i34 1, i34 1, i34 1, i34 1>
+  %add1 = add <4 x i34> %add, %xn
+  %shr = ashr <4 x i34> %add1, <i34 1, i34 1, i34 1, i34 1>
+  %tr = trunc <4 x i34> %shr to <4 x i32>
   ret <4 x i32> %tr
 }
 
@@ -119,12 +119,12 @@ define <16 x i8> @test_v16i8(<16 x i8> %m, <16 x i8> %n) {
 ; CHECK-P7-NEXT:    vavgub 2, 2, 3
 ; CHECK-P7-NEXT:    blr
 entry:
-  %xm = zext <16 x i8> %m to <16 x i9>
-  %xn = zext <16 x i8> %n to <16 x i9>
-  %add = add <16 x i9> %xm, <i9 1, i9 1, i9 1, i9 1, i9 1, i9 1, i9 1, i9 1, i9 1, i9 1, i9 1, i9 1, i9 1, i9 1, i9 1, i9 1>
-  %add1 = add <16 x i9> %add, %xn
-  %shr = lshr <16 x i9> %add1, <i9 1, i9 1, i9 1, i9 1, i9 1, i9 1, i9 1, i9 1, i9 1, i9 1, i9 1, i9 1, i9 1, i9 1, i9 1, i9 1>
-  %tr = trunc <16 x i9> %shr to <16 x i8>
+  %xm = zext <16 x i8> %m to <16 x i10>
+  %xn = zext <16 x i8> %n to <16 x i10>
+  %add = add <16 x i10> %xm, <i10 1, i10 1, i10 1, i10 1, i10 1, i10 1, i10 1, i10 1, i10 1, i10 1, i10 1, i10 1, i10 1, i10 1, i10 1, i10 1>
+  %add1 = add <16 x i10> %add, %xn
+  %shr = lshr <16 x i10> %add1, <i10 1, i10 1, i10 1, i10 1, i10 1, i10 1, i10 1, i10 1, i10 1, i10 1, i10 1, i10 1, i10 1, i10 1, i10 1, i10 1>
+  %tr = trunc <16 x i10> %shr to <16 x i8>
   ret <16 x i8> %tr
 }
 
@@ -143,12 +143,12 @@ define <16 x i8> @test_v16i8_sign(<16 x i8> %m, <16 x i8> %n) {
 ; CHECK-P7:       # %bb.0:
 ; CHECK-P7-NEXT:    vavgsb 2, 2, 3
 ; CHECK-P7-NEXT:    blr
-  %xm = sext <16 x i8> %m to <16 x i9>
-  %xn = sext <16 x i8> %n to <16 x i9>
-  %add = add <16 x i9> %xm, <i9 1, i9 1, i9 1, i9 1, i9 1, i9 1, i9 1, i9 1, i9 1, i9 1, i9 1, i9 1, i9 1, i9 1, i9 1, i9 1>
-  %add1 = add <16 x i9> %add, %xn
-  %shr = ashr <16 x i9> %add1, <i9 1, i9 1, i9 1, i9 1, i9 1, i9 1, i9 1, i9 1, i9 1, i9 1, i9 1, i9 1, i9 1, i9 1, i9 1, i9 1>
-  %tr = trunc <16 x i9> %shr to <16 x i8>
+  %xm = sext <16 x i8> %m to <16 x i10>
+  %xn = sext <16 x i8> %n to <16 x i10>
+  %add = add <16 x i10> %xm, <i10 1, i10 1, i10 1, i10 1, i10 1, i10 1, i10 1, i10 1, i10 1, i10 1, i10 1, i10 1, i10 1, i10 1, i10 1, i10 1>
+  %add1 = add <16 x i10> %add, %xn
+  %shr = ashr <16 x i10> %add1, <i10 1, i10 1, i10 1, i10 1, i10 1, i10 1, i10 1, i10 1, i10 1, i10 1, i10 1, i10 1, i10 1, i10 1, i10 1, i10 1>
+  %tr = trunc <16 x i10> %shr to <16 x i8>
   ret <16 x i8> %tr
 }
 



More information about the llvm-commits mailing list