[llvm] [PowerPC] Add intrinsic support for xvrlw (PR #167349)
Lei Huang via llvm-commits
llvm-commits at lists.llvm.org
Mon Nov 10 09:28:50 PST 2025
https://github.com/lei137 created https://github.com/llvm/llvm-project/pull/167349
None
>From 049e350148879dc75269138eda4974cc820c77fe Mon Sep 17 00:00:00 2001
From: Lei Huang <lei at ca.ibm.com>
Date: Fri, 7 Nov 2025 15:54:46 +0000
Subject: [PATCH 1/2] add intrinsic support for xvrlw
---
llvm/include/llvm/IR/IntrinsicsPowerPC.td | 7 +++++++
llvm/test/CodeGen/PowerPC/vec_rotate_lw.ll | 22 ++++++++++++++++++++++
2 files changed, 29 insertions(+)
create mode 100644 llvm/test/CodeGen/PowerPC/vec_rotate_lw.ll
diff --git a/llvm/include/llvm/IR/IntrinsicsPowerPC.td b/llvm/include/llvm/IR/IntrinsicsPowerPC.td
index 636e88898a55e..3907e864bed1e 100644
--- a/llvm/include/llvm/IR/IntrinsicsPowerPC.td
+++ b/llvm/include/llvm/IR/IntrinsicsPowerPC.td
@@ -387,6 +387,12 @@ class PowerPC_VSX_Sca_DDD_Intrinsic<string GCCIntSuffix>
[llvm_double_ty], [llvm_double_ty, llvm_double_ty],
[IntrNoMem]>;
+/// PowerPC_VSX_WWW_Intrinsic - A PowerPC intrinsic that takes two v4i32
+/// vectors and returns one. These intrinsics have no side effects.
+class PowerPC_VSX_WWW_Intrinsic<string GCCIntSuffix>
+ : PowerPC_VSX_Intrinsic<GCCIntSuffix,
+ [llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty],
+ [IntrNoMem]>;
//===----------------------------------------------------------------------===//
// PowerPC Altivec Intrinsic Definitions.
@@ -1214,6 +1220,7 @@ def int_ppc_altivec_vsraw : PowerPC_Vec_WWW_Intrinsic<"vsraw">;
def int_ppc_altivec_vrlb : PowerPC_Vec_BBB_Intrinsic<"vrlb">;
def int_ppc_altivec_vrlh : PowerPC_Vec_HHH_Intrinsic<"vrlh">;
def int_ppc_altivec_vrlw : PowerPC_Vec_WWW_Intrinsic<"vrlw">;
+def int_ppc_vsx_xvrlw : PowerPC_VSX_WWW_Intrinsic<"xvrlw">;
def int_ppc_altivec_vrld : PowerPC_Vec_DDD_Intrinsic<"vrld">;
let TargetPrefix = "ppc" in { // All PPC intrinsics start with "llvm.ppc.".
diff --git a/llvm/test/CodeGen/PowerPC/vec_rotate_lw.ll b/llvm/test/CodeGen/PowerPC/vec_rotate_lw.ll
new file mode 100644
index 0000000000000..03b1456f0c036
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/vec_rotate_lw.ll
@@ -0,0 +1,22 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
+; RUN: -mcpu=future -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s | \
+; RUN: FileCheck %s
+
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu \
+; RUN: -mcpu=future -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s | \
+; RUN: FileCheck %s
+
+; RUN: llc -verify-machineinstrs -mtriple=powerpc-unknown-aix \
+; RUN: -mcpu=future -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s | \
+; RUN: FileCheck %s
+
+define <4 x i32> @testVRLWMI(<4 x i32> %a, <4 x i32> %b) {
+; CHECK-LABEL: testVRLWMI:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: xvrlw v2, v2, v3
+; CHECK-NEXT: blr
+entry:
+ %0 = tail call <4 x i32> @llvm.ppc.vsx.xvrlw(<4 x i32> %a, <4 x i32> %b)
+ ret <4 x i32> %0
+}
>From 6ea53d565911332098f59a70af49cf52380e5178 Mon Sep 17 00:00:00 2001
From: Lei Huang <lei at ca.ibm.com>
Date: Fri, 7 Nov 2025 16:05:13 +0000
Subject: [PATCH 2/2] update rotl v4i32 to use xvrlw for cpu=future
---
llvm/lib/Target/PowerPC/PPCInstrAltivec.td | 1 +
llvm/lib/Target/PowerPC/PPCInstrFuture.td | 10 +-
llvm/test/CodeGen/PowerPC/funnel-shift-rot.ll | 108 ++++++++++++++++++
llvm/test/CodeGen/PowerPC/vector-rotates.ll | 41 +++++++
4 files changed, 158 insertions(+), 2 deletions(-)
diff --git a/llvm/lib/Target/PowerPC/PPCInstrAltivec.td b/llvm/lib/Target/PowerPC/PPCInstrAltivec.td
index 23d6d8853800f..fe1eea2b33615 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrAltivec.td
+++ b/llvm/lib/Target/PowerPC/PPCInstrAltivec.td
@@ -889,6 +889,7 @@ def : Pat<(v16i8 (rotl v16i8:$vA, v16i8:$vB)),
(v16i8 (VRLB v16i8:$vA, v16i8:$vB))>;
def : Pat<(v8i16 (rotl v8i16:$vA, v8i16:$vB)),
(v8i16 (VRLH v8i16:$vA, v8i16:$vB))>;
+let Predicates = [IsNotISAFuture] in
def : Pat<(v4i32 (rotl v4i32:$vA, v4i32:$vB)),
(v4i32 (VRLW v4i32:$vA, v4i32:$vB))>;
diff --git a/llvm/lib/Target/PowerPC/PPCInstrFuture.td b/llvm/lib/Target/PowerPC/PPCInstrFuture.td
index dfbbba0116f25..e417ffe6d3677 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrFuture.td
+++ b/llvm/lib/Target/PowerPC/PPCInstrFuture.td
@@ -420,8 +420,10 @@ let Predicates = [HasVSX, IsISAFuture] in {
: VXForm_VRTAB5<323, (outs vrrc:$VRT), (ins vrrc:$VRA, vrrc:$VRB),
"vucmprlh $VRT, $VRA, $VRB", []>;
- def XVRLW: XX3Form_XTAB6<60, 184, (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
- "xvrlw $XT, $XA, $XB", []>;
+ def XVRLW : XX3Form_XTAB6<60, 184, (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
+ "xvrlw $XT, $XA, $XB",
+ [(set v4i32:$XT, (int_ppc_vsx_xvrlw v4i32:$XA,
+ v4i32:$XB))]>;
// AES Acceleration Instructions
def XXAESENCP : XX3Form_XTABp5_M2<194, (outs vsrprc:$XTp),
@@ -550,6 +552,10 @@ def : Pat<(int_ppc_vsx_stxvprl v256i1:$XTp, addr:$RA, i64:$RB), (STXVPRL $XTp,
$RA, $RB)>;
def : Pat<(int_ppc_vsx_stxvprll v256i1:$XTp, addr:$RA, i64:$RB), (STXVPRLL $XTp,
$RA, $RB)>;
+let Predicates = [HasVSX, IsISAFuture] in {
+ def : Pat<(v4i32 (rotl v4i32:$vA, v4i32:$vB)), (v4i32 (XVRLW v4i32:$vA,
+ v4i32:$vB))>;
+}
//---------------------------- Instruction aliases ---------------------------//
// Predicate combinations available:
diff --git a/llvm/test/CodeGen/PowerPC/funnel-shift-rot.ll b/llvm/test/CodeGen/PowerPC/funnel-shift-rot.ll
index 12078adbbc2f3..383dcdb06c331 100644
--- a/llvm/test/CodeGen/PowerPC/funnel-shift-rot.ll
+++ b/llvm/test/CodeGen/PowerPC/funnel-shift-rot.ll
@@ -2,6 +2,7 @@
; RUN: llc < %s -mtriple=ppc32-- | FileCheck %s --check-prefixes=CHECK,CHECK32,CHECK32_32
; RUN: llc < %s -mtriple=ppc32-- -mcpu=ppc64 | FileCheck %s --check-prefixes=CHECK,CHECK32,CHECK32_64
; RUN: llc < %s -mtriple=powerpc64le-- | FileCheck %s --check-prefixes=CHECK,CHECK64
+; RUN: llc < %s -mcpu=future -mtriple=powerpc64le-- | FileCheck %s --check-prefix=FUTURE
declare i8 @llvm.fshl.i8(i8, i8, i8)
declare i16 @llvm.fshl.i16(i16, i16, i16)
@@ -24,6 +25,13 @@ define i8 @rotl_i8_const_shift(i8 %x) {
; CHECK-NEXT: rlwimi 4, 3, 3, 0, 28
; CHECK-NEXT: mr 3, 4
; CHECK-NEXT: blr
+;
+; FUTURE-LABEL: rotl_i8_const_shift:
+; FUTURE: # %bb.0:
+; FUTURE-NEXT: rotlwi 4, 3, 27
+; FUTURE-NEXT: rlwimi 4, 3, 3, 0, 28
+; FUTURE-NEXT: mr 3, 4
+; FUTURE-NEXT: blr
%f = call i8 @llvm.fshl.i8(i8 %x, i8 %x, i8 3)
ret i8 %f
}
@@ -43,6 +51,11 @@ define i64 @rotl_i64_const_shift(i64 %x) {
; CHECK64: # %bb.0:
; CHECK64-NEXT: rotldi 3, 3, 3
; CHECK64-NEXT: blr
+;
+; FUTURE-LABEL: rotl_i64_const_shift:
+; FUTURE: # %bb.0:
+; FUTURE-NEXT: rotldi 3, 3, 3
+; FUTURE-NEXT: blr
%f = call i64 @llvm.fshl.i64(i64 %x, i64 %x, i64 3)
ret i64 %f
}
@@ -60,6 +73,17 @@ define i16 @rotl_i16(i16 %x, i16 %z) {
; CHECK-NEXT: srw 4, 5, 4
; CHECK-NEXT: or 3, 3, 4
; CHECK-NEXT: blr
+;
+; FUTURE-LABEL: rotl_i16:
+; FUTURE: # %bb.0:
+; FUTURE-NEXT: clrlwi 6, 4, 28
+; FUTURE-NEXT: neg 4, 4
+; FUTURE-NEXT: clrlwi 5, 3, 16
+; FUTURE-NEXT: clrlwi 4, 4, 28
+; FUTURE-NEXT: slw 3, 3, 6
+; FUTURE-NEXT: srw 4, 5, 4
+; FUTURE-NEXT: or 3, 3, 4
+; FUTURE-NEXT: blr
%f = call i16 @llvm.fshl.i16(i16 %x, i16 %x, i16 %z)
ret i16 %f
}
@@ -69,6 +93,11 @@ define i32 @rotl_i32(i32 %x, i32 %z) {
; CHECK: # %bb.0:
; CHECK-NEXT: rotlw 3, 3, 4
; CHECK-NEXT: blr
+;
+; FUTURE-LABEL: rotl_i32:
+; FUTURE: # %bb.0:
+; FUTURE-NEXT: rotlw 3, 3, 4
+; FUTURE-NEXT: blr
%f = call i32 @llvm.fshl.i32(i32 %x, i32 %x, i32 %z)
ret i32 %f
}
@@ -100,6 +129,11 @@ define i64 @rotl_i64(i64 %x, i64 %z) {
; CHECK64: # %bb.0:
; CHECK64-NEXT: rotld 3, 3, 4
; CHECK64-NEXT: blr
+;
+; FUTURE-LABEL: rotl_i64:
+; FUTURE: # %bb.0:
+; FUTURE-NEXT: rotld 3, 3, 4
+; FUTURE-NEXT: blr
%f = call i64 @llvm.fshl.i64(i64 %x, i64 %x, i64 %z)
ret i64 %f
}
@@ -124,6 +158,11 @@ define <4 x i32> @rotl_v4i32(<4 x i32> %x, <4 x i32> %z) {
; CHECK64: # %bb.0:
; CHECK64-NEXT: vrlw 2, 2, 3
; CHECK64-NEXT: blr
+;
+; FUTURE-LABEL: rotl_v4i32:
+; FUTURE: # %bb.0:
+; FUTURE-NEXT: xvrlw 34, 34, 35
+; FUTURE-NEXT: blr
%f = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %x, <4 x i32> %x, <4 x i32> %z)
ret <4 x i32> %f
}
@@ -150,6 +189,12 @@ define <4 x i32> @rotl_v4i32_const_shift(<4 x i32> %x) {
; CHECK64-NEXT: vspltisw 3, 3
; CHECK64-NEXT: vrlw 2, 2, 3
; CHECK64-NEXT: blr
+;
+; FUTURE-LABEL: rotl_v4i32_const_shift:
+; FUTURE: # %bb.0:
+; FUTURE-NEXT: vspltisw 3, 3
+; FUTURE-NEXT: xvrlw 34, 34, 35
+; FUTURE-NEXT: blr
%f = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %x, <4 x i32> %x, <4 x i32> <i32 3, i32 3, i32 3, i32 3>)
ret <4 x i32> %f
}
@@ -163,6 +208,13 @@ define i8 @rotr_i8_const_shift(i8 %x) {
; CHECK-NEXT: rlwimi 4, 3, 5, 0, 26
; CHECK-NEXT: mr 3, 4
; CHECK-NEXT: blr
+;
+; FUTURE-LABEL: rotr_i8_const_shift:
+; FUTURE: # %bb.0:
+; FUTURE-NEXT: rotlwi 4, 3, 29
+; FUTURE-NEXT: rlwimi 4, 3, 5, 0, 26
+; FUTURE-NEXT: mr 3, 4
+; FUTURE-NEXT: blr
%f = call i8 @llvm.fshr.i8(i8 %x, i8 %x, i8 3)
ret i8 %f
}
@@ -172,6 +224,11 @@ define i32 @rotr_i32_const_shift(i32 %x) {
; CHECK: # %bb.0:
; CHECK-NEXT: rotlwi 3, 3, 29
; CHECK-NEXT: blr
+;
+; FUTURE-LABEL: rotr_i32_const_shift:
+; FUTURE: # %bb.0:
+; FUTURE-NEXT: rotlwi 3, 3, 29
+; FUTURE-NEXT: blr
%f = call i32 @llvm.fshr.i32(i32 %x, i32 %x, i32 3)
ret i32 %f
}
@@ -189,6 +246,17 @@ define i16 @rotr_i16(i16 %x, i16 %z) {
; CHECK-NEXT: slw 3, 3, 4
; CHECK-NEXT: or 3, 5, 3
; CHECK-NEXT: blr
+;
+; FUTURE-LABEL: rotr_i16:
+; FUTURE: # %bb.0:
+; FUTURE-NEXT: clrlwi 6, 4, 28
+; FUTURE-NEXT: neg 4, 4
+; FUTURE-NEXT: clrlwi 5, 3, 16
+; FUTURE-NEXT: clrlwi 4, 4, 28
+; FUTURE-NEXT: srw 5, 5, 6
+; FUTURE-NEXT: slw 3, 3, 4
+; FUTURE-NEXT: or 3, 5, 3
+; FUTURE-NEXT: blr
%f = call i16 @llvm.fshr.i16(i16 %x, i16 %x, i16 %z)
ret i16 %f
}
@@ -199,6 +267,12 @@ define i32 @rotr_i32(i32 %x, i32 %z) {
; CHECK-NEXT: neg 4, 4
; CHECK-NEXT: rotlw 3, 3, 4
; CHECK-NEXT: blr
+;
+; FUTURE-LABEL: rotr_i32:
+; FUTURE: # %bb.0:
+; FUTURE-NEXT: neg 4, 4
+; FUTURE-NEXT: rotlw 3, 3, 4
+; FUTURE-NEXT: blr
%f = call i32 @llvm.fshr.i32(i32 %x, i32 %x, i32 %z)
ret i32 %f
}
@@ -231,6 +305,12 @@ define i64 @rotr_i64(i64 %x, i64 %z) {
; CHECK64-NEXT: neg 4, 4
; CHECK64-NEXT: rotld 3, 3, 4
; CHECK64-NEXT: blr
+;
+; FUTURE-LABEL: rotr_i64:
+; FUTURE: # %bb.0:
+; FUTURE-NEXT: neg 4, 4
+; FUTURE-NEXT: rotld 3, 3, 4
+; FUTURE-NEXT: blr
%f = call i64 @llvm.fshr.i64(i64 %x, i64 %x, i64 %z)
ret i64 %f
}
@@ -263,6 +343,12 @@ define <4 x i32> @rotr_v4i32(<4 x i32> %x, <4 x i32> %z) {
; CHECK64-NEXT: vsubuwm 3, 4, 3
; CHECK64-NEXT: vrlw 2, 2, 3
; CHECK64-NEXT: blr
+;
+; FUTURE-LABEL: rotr_v4i32:
+; FUTURE: # %bb.0:
+; FUTURE-NEXT: vnegw 3, 3
+; FUTURE-NEXT: xvrlw 34, 34, 35
+; FUTURE-NEXT: blr
%f = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %x, <4 x i32> %x, <4 x i32> %z)
ret <4 x i32> %f
}
@@ -293,6 +379,12 @@ define <4 x i32> @rotr_v4i32_const_shift(<4 x i32> %x) {
; CHECK64-NEXT: vsubuwm 3, 4, 3
; CHECK64-NEXT: vrlw 2, 2, 3
; CHECK64-NEXT: blr
+;
+; FUTURE-LABEL: rotr_v4i32_const_shift:
+; FUTURE: # %bb.0:
+; FUTURE-NEXT: xxspltiw 0, 29
+; FUTURE-NEXT: xvrlw 34, 34, 0
+; FUTURE-NEXT: blr
%f = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %x, <4 x i32> %x, <4 x i32> <i32 3, i32 3, i32 3, i32 3>)
ret <4 x i32> %f
}
@@ -301,6 +393,10 @@ define i32 @rotl_i32_shift_by_bitwidth(i32 %x) {
; CHECK-LABEL: rotl_i32_shift_by_bitwidth:
; CHECK: # %bb.0:
; CHECK-NEXT: blr
+;
+; FUTURE-LABEL: rotl_i32_shift_by_bitwidth:
+; FUTURE: # %bb.0:
+; FUTURE-NEXT: blr
%f = call i32 @llvm.fshl.i32(i32 %x, i32 %x, i32 32)
ret i32 %f
}
@@ -309,6 +405,10 @@ define i32 @rotr_i32_shift_by_bitwidth(i32 %x) {
; CHECK-LABEL: rotr_i32_shift_by_bitwidth:
; CHECK: # %bb.0:
; CHECK-NEXT: blr
+;
+; FUTURE-LABEL: rotr_i32_shift_by_bitwidth:
+; FUTURE: # %bb.0:
+; FUTURE-NEXT: blr
%f = call i32 @llvm.fshr.i32(i32 %x, i32 %x, i32 32)
ret i32 %f
}
@@ -317,6 +417,10 @@ define <4 x i32> @rotl_v4i32_shift_by_bitwidth(<4 x i32> %x) {
; CHECK-LABEL: rotl_v4i32_shift_by_bitwidth:
; CHECK: # %bb.0:
; CHECK-NEXT: blr
+;
+; FUTURE-LABEL: rotl_v4i32_shift_by_bitwidth:
+; FUTURE: # %bb.0:
+; FUTURE-NEXT: blr
%f = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %x, <4 x i32> %x, <4 x i32> <i32 32, i32 32, i32 32, i32 32>)
ret <4 x i32> %f
}
@@ -325,6 +429,10 @@ define <4 x i32> @rotr_v4i32_shift_by_bitwidth(<4 x i32> %x) {
; CHECK-LABEL: rotr_v4i32_shift_by_bitwidth:
; CHECK: # %bb.0:
; CHECK-NEXT: blr
+;
+; FUTURE-LABEL: rotr_v4i32_shift_by_bitwidth:
+; FUTURE: # %bb.0:
+; FUTURE-NEXT: blr
%f = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %x, <4 x i32> %x, <4 x i32> <i32 32, i32 32, i32 32, i32 32>)
ret <4 x i32> %f
}
diff --git a/llvm/test/CodeGen/PowerPC/vector-rotates.ll b/llvm/test/CodeGen/PowerPC/vector-rotates.ll
index 2de8804ba8e24..38e273634da2a 100644
--- a/llvm/test/CodeGen/PowerPC/vector-rotates.ll
+++ b/llvm/test/CodeGen/PowerPC/vector-rotates.ll
@@ -5,6 +5,9 @@
; RUN: llc -O3 -mtriple=powerpc64-unknown-unknown -ppc-asm-full-reg-names \
; RUN: -verify-machineinstrs -mcpu=pwr7 < %s | \
; RUN: FileCheck --check-prefix=CHECK-P7 %s
+; RUN: llc -O3 -mtriple=powerpc64-unknown-unknown -ppc-asm-full-reg-names \
+; RUN: -verify-machineinstrs -mcpu=future < %s | \
+; RUN: FileCheck --check-prefix=CHECK-FUTURE %s
define <16 x i8> @rotl_v16i8(<16 x i8> %a) {
; CHECK-P8-LABEL: rotl_v16i8:
@@ -23,6 +26,14 @@ define <16 x i8> @rotl_v16i8(<16 x i8> %a) {
; CHECK-P7-NEXT: lxvw4x vs35, 0, r3
; CHECK-P7-NEXT: vrlb v2, v2, v3
; CHECK-P7-NEXT: blr
+;
+; CHECK-FUTURE-LABEL: rotl_v16i8:
+; CHECK-FUTURE: # %bb.0: # %entry
+; CHECK-FUTURE-NEXT: addis r3, r2, .LCPI0_0 at toc@ha
+; CHECK-FUTURE-NEXT: addi r3, r3, .LCPI0_0 at toc@l
+; CHECK-FUTURE-NEXT: lxv vs35, 0(r3)
+; CHECK-FUTURE-NEXT: vrlb v2, v2, v3
+; CHECK-FUTURE-NEXT: blr
entry:
%b = shl <16 x i8> %a, <i8 1, i8 1, i8 2, i8 2, i8 3, i8 3, i8 4, i8 4, i8 5, i8 5, i8 6, i8 6, i8 7, i8 7, i8 8, i8 8>
%c = lshr <16 x i8> %a, <i8 7, i8 7, i8 6, i8 6, i8 5, i8 5, i8 4, i8 4, i8 3, i8 3, i8 2, i8 2, i8 1, i8 1, i8 0, i8 0>
@@ -47,6 +58,14 @@ define <8 x i16> @rotl_v8i16(<8 x i16> %a) {
; CHECK-P7-NEXT: lxvw4x vs35, 0, r3
; CHECK-P7-NEXT: vrlh v2, v2, v3
; CHECK-P7-NEXT: blr
+;
+; CHECK-FUTURE-LABEL: rotl_v8i16:
+; CHECK-FUTURE: # %bb.0: # %entry
+; CHECK-FUTURE-NEXT: addis r3, r2, .LCPI1_0 at toc@ha
+; CHECK-FUTURE-NEXT: addi r3, r3, .LCPI1_0 at toc@l
+; CHECK-FUTURE-NEXT: lxv vs35, 0(r3)
+; CHECK-FUTURE-NEXT: vrlh v2, v2, v3
+; CHECK-FUTURE-NEXT: blr
entry:
%b = shl <8 x i16> %a, <i16 1, i16 2, i16 3, i16 5, i16 7, i16 11, i16 13, i16 16>
%c = lshr <8 x i16> %a, <i16 15, i16 14, i16 13, i16 11, i16 9, i16 5, i16 3, i16 0>
@@ -71,6 +90,14 @@ define <4 x i32> @rotl_v4i32_0(<4 x i32> %a) {
; CHECK-P7-NEXT: lxvw4x vs35, 0, r3
; CHECK-P7-NEXT: vrlw v2, v2, v3
; CHECK-P7-NEXT: blr
+;
+; CHECK-FUTURE-LABEL: rotl_v4i32_0:
+; CHECK-FUTURE: # %bb.0: # %entry
+; CHECK-FUTURE-NEXT: addis r3, r2, .LCPI2_0 at toc@ha
+; CHECK-FUTURE-NEXT: addi r3, r3, .LCPI2_0 at toc@l
+; CHECK-FUTURE-NEXT: lxv vs0, 0(r3)
+; CHECK-FUTURE-NEXT: xvrlw vs34, vs34, vs0
+; CHECK-FUTURE-NEXT: blr
entry:
%b = shl <4 x i32> %a, <i32 29, i32 19, i32 17, i32 11>
%c = lshr <4 x i32> %a, <i32 3, i32 13, i32 15, i32 21>
@@ -94,6 +121,12 @@ define <4 x i32> @rotl_v4i32_1(<4 x i32> %a) {
; CHECK-P7-NEXT: vsubuwm v3, v4, v3
; CHECK-P7-NEXT: vrlw v2, v2, v3
; CHECK-P7-NEXT: blr
+;
+; CHECK-FUTURE-LABEL: rotl_v4i32_1:
+; CHECK-FUTURE: # %bb.0: # %entry
+; CHECK-FUTURE-NEXT: xxspltiw vs0, 23
+; CHECK-FUTURE-NEXT: xvrlw vs34, vs34, vs0
+; CHECK-FUTURE-NEXT: blr
entry:
%b = shl <4 x i32> %a, <i32 23, i32 23, i32 23, i32 23>
%c = lshr <4 x i32> %a, <i32 9, i32 9, i32 9, i32 9>
@@ -124,6 +157,14 @@ define <2 x i64> @rotl_v2i64(<2 x i64> %a) {
; CHECK-P7-NEXT: addi r3, r1, -16
; CHECK-P7-NEXT: lxvd2x vs34, 0, r3
; CHECK-P7-NEXT: blr
+;
+; CHECK-FUTURE-LABEL: rotl_v2i64:
+; CHECK-FUTURE: # %bb.0: # %entry
+; CHECK-FUTURE-NEXT: addis r3, r2, .LCPI4_0 at toc@ha
+; CHECK-FUTURE-NEXT: addi r3, r3, .LCPI4_0 at toc@l
+; CHECK-FUTURE-NEXT: lxv vs35, 0(r3)
+; CHECK-FUTURE-NEXT: vrld v2, v2, v3
+; CHECK-FUTURE-NEXT: blr
entry:
%b = shl <2 x i64> %a, <i64 41, i64 53>
%c = lshr <2 x i64> %a, <i64 23, i64 11>
More information about the llvm-commits
mailing list