[llvm] [LoongArch][NFC] Pre-commit tests for custom rotr (PR #161115)
Zhaoxin Yang via llvm-commits
llvm-commits at lists.llvm.org
Sun Sep 28 19:37:58 PDT 2025
https://github.com/ylzsx created https://github.com/llvm/llvm-project/pull/161115
None
>From 5c3f9c608982073ac669c6d73ccfdb74e6ca1c2c Mon Sep 17 00:00:00 2001
From: yangzhaoxin <yangzhaoxin at loongson.cn>
Date: Mon, 29 Sep 2025 10:33:25 +0800
Subject: [PATCH] [LoongArch][NFC] Pre-commit tests for custom rotr
---
llvm/test/CodeGen/LoongArch/lasx/rotl-rotr.ll | 283 ++++++++++++++++++
llvm/test/CodeGen/LoongArch/lsx/rotl-rotr.ll | 283 ++++++++++++++++++
2 files changed, 566 insertions(+)
create mode 100644 llvm/test/CodeGen/LoongArch/lasx/rotl-rotr.ll
create mode 100644 llvm/test/CodeGen/LoongArch/lsx/rotl-rotr.ll
diff --git a/llvm/test/CodeGen/LoongArch/lasx/rotl-rotr.ll b/llvm/test/CodeGen/LoongArch/lasx/rotl-rotr.ll
new file mode 100644
index 0000000000000..f9f024dda973c
--- /dev/null
+++ b/llvm/test/CodeGen/LoongArch/lasx/rotl-rotr.ll
@@ -0,0 +1,283 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
+; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s | FileCheck %s --check-prefixes=CHECK,LA32
+; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s --check-prefixes=CHECK,LA64
+
+define void @rotl_v32i8(ptr %dst, ptr %src, i8 signext %a0) nounwind {
+; CHECK-LABEL: rotl_v32i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: xvld $xr0, $a1, 0
+; CHECK-NEXT: xvreplgr2vr.b $xr1, $a2
+; CHECK-NEXT: xvrepli.b $xr2, 8
+; CHECK-NEXT: xvsub.b $xr2, $xr2, $xr1
+; CHECK-NEXT: xvsll.b $xr1, $xr0, $xr1
+; CHECK-NEXT: xvsrl.b $xr0, $xr0, $xr2
+; CHECK-NEXT: xvor.v $xr0, $xr1, $xr0
+; CHECK-NEXT: xvst $xr0, $a0, 0
+; CHECK-NEXT: ret
+ %v0 = load <32 x i8>, ptr %src
+ %v1.ele = insertelement <32 x i8> poison, i8 %a0, i8 0
+ %v1 = shufflevector <32 x i8> %v1.ele, <32 x i8> poison, <32 x i32> zeroinitializer
+ %v1.sub = sub <32 x i8> splat (i8 8), %v1
+ %b = shl <32 x i8> %v0, %v1
+ %c = lshr <32 x i8> %v0, %v1.sub
+ %d = or <32 x i8> %b, %c
+ store <32 x i8> %d, ptr %dst
+ ret void
+}
+
+define void @rotr_v32i8(ptr %dst, ptr %src, i8 signext %a0) nounwind {
+; CHECK-LABEL: rotr_v32i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: xvld $xr0, $a1, 0
+; CHECK-NEXT: xvreplgr2vr.b $xr1, $a2
+; CHECK-NEXT: xvrepli.b $xr2, 8
+; CHECK-NEXT: xvsub.b $xr2, $xr2, $xr1
+; CHECK-NEXT: xvsrl.b $xr1, $xr0, $xr1
+; CHECK-NEXT: xvsll.b $xr0, $xr0, $xr2
+; CHECK-NEXT: xvor.v $xr0, $xr1, $xr0
+; CHECK-NEXT: xvst $xr0, $a0, 0
+; CHECK-NEXT: ret
+ %v0 = load <32 x i8>, ptr %src
+ %v1.ele = insertelement <32 x i8> poison, i8 %a0, i8 0
+ %v1 = shufflevector <32 x i8> %v1.ele, <32 x i8> poison, <32 x i32> zeroinitializer
+ %v1.sub = sub <32 x i8> splat (i8 8), %v1
+ %b = lshr <32 x i8> %v0, %v1
+ %c = shl <32 x i8> %v0, %v1.sub
+ %d = or <32 x i8> %b, %c
+ store <32 x i8> %d, ptr %dst
+ ret void
+}
+
+define void @rotr_v32i8_imm(ptr %dst, ptr %src) nounwind {
+; CHECK-LABEL: rotr_v32i8_imm:
+; CHECK: # %bb.0:
+; CHECK-NEXT: xvld $xr0, $a1, 0
+; CHECK-NEXT: xvsrli.b $xr1, $xr0, 2
+; CHECK-NEXT: xvslli.b $xr0, $xr0, 6
+; CHECK-NEXT: xvor.v $xr0, $xr0, $xr1
+; CHECK-NEXT: xvst $xr0, $a0, 0
+; CHECK-NEXT: ret
+ %v0 = load <32 x i8>, ptr %src
+ %b = lshr <32 x i8> %v0, splat (i8 2)
+ %c = shl <32 x i8> %v0, splat (i8 6)
+ %d = or <32 x i8> %b, %c
+ store <32 x i8> %d, ptr %dst
+ ret void
+}
+
+define void @rotl_v16i16(ptr %dst, ptr %src, i16 signext %a0) nounwind {
+; CHECK-LABEL: rotl_v16i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: xvld $xr0, $a1, 0
+; CHECK-NEXT: xvreplgr2vr.h $xr1, $a2
+; CHECK-NEXT: xvrepli.h $xr2, 16
+; CHECK-NEXT: xvsub.h $xr2, $xr2, $xr1
+; CHECK-NEXT: xvsll.h $xr1, $xr0, $xr1
+; CHECK-NEXT: xvsrl.h $xr0, $xr0, $xr2
+; CHECK-NEXT: xvor.v $xr0, $xr1, $xr0
+; CHECK-NEXT: xvst $xr0, $a0, 0
+; CHECK-NEXT: ret
+ %v0 = load <16 x i16>, ptr %src
+ %v1.ele = insertelement <16 x i16> poison, i16 %a0, i16 0
+ %v1 = shufflevector <16 x i16> %v1.ele, <16 x i16> poison, <16 x i32> zeroinitializer
+ %v1.sub = sub <16 x i16> splat (i16 16), %v1
+ %b = shl <16 x i16> %v0, %v1
+ %c = lshr <16 x i16> %v0, %v1.sub
+ %d = or <16 x i16> %b, %c
+ store <16 x i16> %d, ptr %dst
+ ret void
+}
+
+define void @rotr_v16i16(ptr %dst, ptr %src, i16 signext %a0) nounwind {
+; CHECK-LABEL: rotr_v16i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: xvld $xr0, $a1, 0
+; CHECK-NEXT: xvreplgr2vr.h $xr1, $a2
+; CHECK-NEXT: xvrepli.h $xr2, 16
+; CHECK-NEXT: xvsub.h $xr2, $xr2, $xr1
+; CHECK-NEXT: xvsrl.h $xr1, $xr0, $xr1
+; CHECK-NEXT: xvsll.h $xr0, $xr0, $xr2
+; CHECK-NEXT: xvor.v $xr0, $xr1, $xr0
+; CHECK-NEXT: xvst $xr0, $a0, 0
+; CHECK-NEXT: ret
+ %v0 = load <16 x i16>, ptr %src
+ %v1.ele = insertelement <16 x i16> poison, i16 %a0, i16 0
+ %v1 = shufflevector <16 x i16> %v1.ele, <16 x i16> poison, <16 x i32> zeroinitializer
+ %v1.sub = sub <16 x i16> splat (i16 16), %v1
+ %b = lshr <16 x i16> %v0, %v1
+ %c = shl <16 x i16> %v0, %v1.sub
+ %d = or <16 x i16> %b, %c
+ store <16 x i16> %d, ptr %dst
+ ret void
+}
+
+define void @rotr_v16i16_imm(ptr %dst, ptr %src) nounwind {
+; CHECK-LABEL: rotr_v16i16_imm:
+; CHECK: # %bb.0:
+; CHECK-NEXT: xvld $xr0, $a1, 0
+; CHECK-NEXT: xvsrli.h $xr1, $xr0, 2
+; CHECK-NEXT: xvslli.h $xr0, $xr0, 14
+; CHECK-NEXT: xvor.v $xr0, $xr0, $xr1
+; CHECK-NEXT: xvst $xr0, $a0, 0
+; CHECK-NEXT: ret
+ %v0 = load <16 x i16>, ptr %src
+ %b = lshr <16 x i16> %v0, splat (i16 2)
+ %c = shl <16 x i16> %v0, splat (i16 14)
+ %d = or <16 x i16> %b, %c
+ store <16 x i16> %d, ptr %dst
+ ret void
+}
+
+define void @rotl_v8i32(ptr %dst, ptr %src, i32 signext %a0) nounwind {
+; CHECK-LABEL: rotl_v8i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: xvld $xr0, $a1, 0
+; CHECK-NEXT: xvreplgr2vr.w $xr1, $a2
+; CHECK-NEXT: xvrepli.w $xr2, 32
+; CHECK-NEXT: xvsub.w $xr2, $xr2, $xr1
+; CHECK-NEXT: xvsll.w $xr1, $xr0, $xr1
+; CHECK-NEXT: xvsrl.w $xr0, $xr0, $xr2
+; CHECK-NEXT: xvor.v $xr0, $xr1, $xr0
+; CHECK-NEXT: xvst $xr0, $a0, 0
+; CHECK-NEXT: ret
+ %v0 = load <8 x i32>, ptr %src
+ %v1.ele = insertelement <8 x i32> poison, i32 %a0, i32 0
+ %v1 = shufflevector <8 x i32> %v1.ele, <8 x i32> poison, <8 x i32> zeroinitializer
+ %v1.sub = sub <8 x i32> splat (i32 32), %v1
+ %b = shl <8 x i32> %v0, %v1
+ %c = lshr <8 x i32> %v0, %v1.sub
+ %d = or <8 x i32> %b, %c
+ store <8 x i32> %d, ptr %dst
+ ret void
+}
+
+define void @rotr_v8i32(ptr %dst, ptr %src, i32 signext %a0) nounwind {
+; CHECK-LABEL: rotr_v8i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: xvld $xr0, $a1, 0
+; CHECK-NEXT: xvreplgr2vr.w $xr1, $a2
+; CHECK-NEXT: xvrepli.w $xr2, 32
+; CHECK-NEXT: xvsub.w $xr2, $xr2, $xr1
+; CHECK-NEXT: xvsrl.w $xr1, $xr0, $xr1
+; CHECK-NEXT: xvsll.w $xr0, $xr0, $xr2
+; CHECK-NEXT: xvor.v $xr0, $xr1, $xr0
+; CHECK-NEXT: xvst $xr0, $a0, 0
+; CHECK-NEXT: ret
+ %v0 = load <8 x i32>, ptr %src
+ %v1.ele = insertelement <8 x i32> poison, i32 %a0, i32 0
+ %v1 = shufflevector <8 x i32> %v1.ele, <8 x i32> poison, <8 x i32> zeroinitializer
+ %v1.sub = sub <8 x i32> splat (i32 32), %v1
+ %b = lshr <8 x i32> %v0, %v1
+ %c = shl <8 x i32> %v0, %v1.sub
+ %d = or <8 x i32> %b, %c
+ store <8 x i32> %d, ptr %dst
+ ret void
+}
+
+define void @rotr_v8i32_imm(ptr %dst, ptr %src) nounwind {
+; CHECK-LABEL: rotr_v8i32_imm:
+; CHECK: # %bb.0:
+; CHECK-NEXT: xvld $xr0, $a1, 0
+; CHECK-NEXT: xvsrli.w $xr1, $xr0, 2
+; CHECK-NEXT: xvslli.w $xr0, $xr0, 30
+; CHECK-NEXT: xvor.v $xr0, $xr0, $xr1
+; CHECK-NEXT: xvst $xr0, $a0, 0
+; CHECK-NEXT: ret
+ %v0 = load <8 x i32>, ptr %src
+ %b = lshr <8 x i32> %v0, splat (i32 2)
+ %c = shl <8 x i32> %v0, splat (i32 30)
+ %d = or <8 x i32> %b, %c
+ store <8 x i32> %d, ptr %dst
+ ret void
+}
+
+define void @rotl_v4i64(ptr %dst, ptr %src, i64 %a0) nounwind {
+; LA32-LABEL: rotl_v4i64:
+; LA32: # %bb.0:
+; LA32-NEXT: xvld $xr0, $a1, 0
+; LA32-NEXT: vinsgr2vr.w $vr1, $a2, 0
+; LA32-NEXT: vinsgr2vr.w $vr1, $a3, 1
+; LA32-NEXT: xvreplve0.d $xr1, $xr1
+; LA32-NEXT: xvrepli.d $xr2, 64
+; LA32-NEXT: xvsub.d $xr2, $xr2, $xr1
+; LA32-NEXT: xvsll.d $xr1, $xr0, $xr1
+; LA32-NEXT: xvsrl.d $xr0, $xr0, $xr2
+; LA32-NEXT: xvor.v $xr0, $xr1, $xr0
+; LA32-NEXT: xvst $xr0, $a0, 0
+; LA32-NEXT: ret
+;
+; LA64-LABEL: rotl_v4i64:
+; LA64: # %bb.0:
+; LA64-NEXT: xvld $xr0, $a1, 0
+; LA64-NEXT: xvreplgr2vr.d $xr1, $a2
+; LA64-NEXT: xvrepli.d $xr2, 64
+; LA64-NEXT: xvsub.d $xr2, $xr2, $xr1
+; LA64-NEXT: xvsll.d $xr1, $xr0, $xr1
+; LA64-NEXT: xvsrl.d $xr0, $xr0, $xr2
+; LA64-NEXT: xvor.v $xr0, $xr1, $xr0
+; LA64-NEXT: xvst $xr0, $a0, 0
+; LA64-NEXT: ret
+ %v0 = load <4 x i64>, ptr %src
+ %v1.ele = insertelement <4 x i64> poison, i64 %a0, i64 0
+ %v1 = shufflevector <4 x i64> %v1.ele, <4 x i64> poison, <4 x i32> zeroinitializer
+ %v1.sub = sub <4 x i64> splat (i64 64), %v1
+ %b = shl <4 x i64> %v0, %v1
+ %c = lshr <4 x i64> %v0, %v1.sub
+ %d = or <4 x i64> %b, %c
+ store <4 x i64> %d, ptr %dst
+ ret void
+}
+
+define void @rotr_v4i64(ptr %dst, ptr %src, i64 %a0) nounwind {
+; LA32-LABEL: rotr_v4i64:
+; LA32: # %bb.0:
+; LA32-NEXT: xvld $xr0, $a1, 0
+; LA32-NEXT: vinsgr2vr.w $vr1, $a2, 0
+; LA32-NEXT: vinsgr2vr.w $vr1, $a3, 1
+; LA32-NEXT: xvreplve0.d $xr1, $xr1
+; LA32-NEXT: xvrepli.d $xr2, 64
+; LA32-NEXT: xvsub.d $xr2, $xr2, $xr1
+; LA32-NEXT: xvsrl.d $xr1, $xr0, $xr1
+; LA32-NEXT: xvsll.d $xr0, $xr0, $xr2
+; LA32-NEXT: xvor.v $xr0, $xr1, $xr0
+; LA32-NEXT: xvst $xr0, $a0, 0
+; LA32-NEXT: ret
+;
+; LA64-LABEL: rotr_v4i64:
+; LA64: # %bb.0:
+; LA64-NEXT: xvld $xr0, $a1, 0
+; LA64-NEXT: xvreplgr2vr.d $xr1, $a2
+; LA64-NEXT: xvrepli.d $xr2, 64
+; LA64-NEXT: xvsub.d $xr2, $xr2, $xr1
+; LA64-NEXT: xvsrl.d $xr1, $xr0, $xr1
+; LA64-NEXT: xvsll.d $xr0, $xr0, $xr2
+; LA64-NEXT: xvor.v $xr0, $xr1, $xr0
+; LA64-NEXT: xvst $xr0, $a0, 0
+; LA64-NEXT: ret
+ %v0 = load <4 x i64>, ptr %src
+ %v1.ele = insertelement <4 x i64> poison, i64 %a0, i64 0
+ %v1 = shufflevector <4 x i64> %v1.ele, <4 x i64> poison, <4 x i32> zeroinitializer
+ %v1.sub = sub <4 x i64> splat (i64 64), %v1
+ %b = lshr <4 x i64> %v0, %v1
+ %c = shl <4 x i64> %v0, %v1.sub
+ %d = or <4 x i64> %b, %c
+ store <4 x i64> %d, ptr %dst
+ ret void
+}
+
+define void @rotr_v4i64_imm(ptr %dst, ptr %src) nounwind {
+; CHECK-LABEL: rotr_v4i64_imm:
+; CHECK: # %bb.0:
+; CHECK-NEXT: xvld $xr0, $a1, 0
+; CHECK-NEXT: xvsrli.d $xr1, $xr0, 2
+; CHECK-NEXT: xvslli.d $xr0, $xr0, 62
+; CHECK-NEXT: xvor.v $xr0, $xr0, $xr1
+; CHECK-NEXT: xvst $xr0, $a0, 0
+; CHECK-NEXT: ret
+ %v0 = load <4 x i64>, ptr %src
+ %b = lshr <4 x i64> %v0, splat (i64 2)
+ %c = shl <4 x i64> %v0, splat (i64 62)
+ %d = or <4 x i64> %b, %c
+ store <4 x i64> %d, ptr %dst
+ ret void
+}
diff --git a/llvm/test/CodeGen/LoongArch/lsx/rotl-rotr.ll b/llvm/test/CodeGen/LoongArch/lsx/rotl-rotr.ll
new file mode 100644
index 0000000000000..79e74f35abafb
--- /dev/null
+++ b/llvm/test/CodeGen/LoongArch/lsx/rotl-rotr.ll
@@ -0,0 +1,283 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
+; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lsx < %s | FileCheck %s --check-prefixes=CHECK,LA32
+; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s --check-prefixes=CHECK,LA64
+
+define void @rotl_v16i8(ptr %dst, ptr %src, i8 signext %a0) nounwind {
+; CHECK-LABEL: rotl_v16i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vld $vr0, $a1, 0
+; CHECK-NEXT: vreplgr2vr.b $vr1, $a2
+; CHECK-NEXT: vrepli.b $vr2, 8
+; CHECK-NEXT: vsub.b $vr2, $vr2, $vr1
+; CHECK-NEXT: vsll.b $vr1, $vr0, $vr1
+; CHECK-NEXT: vsrl.b $vr0, $vr0, $vr2
+; CHECK-NEXT: vor.v $vr0, $vr1, $vr0
+; CHECK-NEXT: vst $vr0, $a0, 0
+; CHECK-NEXT: ret
+ %v0 = load <16 x i8>, ptr %src
+ %v1.ele = insertelement <16 x i8> poison, i8 %a0, i8 0
+ %v1 = shufflevector <16 x i8> %v1.ele, <16 x i8> poison, <16 x i32> zeroinitializer
+ %v1.sub = sub <16 x i8> splat (i8 8), %v1
+ %b = shl <16 x i8> %v0, %v1
+ %c = lshr <16 x i8> %v0, %v1.sub
+ %d = or <16 x i8> %b, %c
+ store <16 x i8> %d, ptr %dst
+ ret void
+}
+
+define void @rotr_v16i8(ptr %dst, ptr %src, i8 signext %a0) nounwind {
+; CHECK-LABEL: rotr_v16i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vld $vr0, $a1, 0
+; CHECK-NEXT: vreplgr2vr.b $vr1, $a2
+; CHECK-NEXT: vrepli.b $vr2, 8
+; CHECK-NEXT: vsub.b $vr2, $vr2, $vr1
+; CHECK-NEXT: vsrl.b $vr1, $vr0, $vr1
+; CHECK-NEXT: vsll.b $vr0, $vr0, $vr2
+; CHECK-NEXT: vor.v $vr0, $vr1, $vr0
+; CHECK-NEXT: vst $vr0, $a0, 0
+; CHECK-NEXT: ret
+ %v0 = load <16 x i8>, ptr %src
+ %v1.ele = insertelement <16 x i8> poison, i8 %a0, i8 0
+ %v1 = shufflevector <16 x i8> %v1.ele, <16 x i8> poison, <16 x i32> zeroinitializer
+ %v1.sub = sub <16 x i8> splat (i8 8), %v1
+ %b = lshr <16 x i8> %v0, %v1
+ %c = shl <16 x i8> %v0, %v1.sub
+ %d = or <16 x i8> %b, %c
+ store <16 x i8> %d, ptr %dst
+ ret void
+}
+
+define void @rotr_v16i8_imm(ptr %dst, ptr %src) nounwind {
+; CHECK-LABEL: rotr_v16i8_imm:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vld $vr0, $a1, 0
+; CHECK-NEXT: vsrli.b $vr1, $vr0, 2
+; CHECK-NEXT: vslli.b $vr0, $vr0, 6
+; CHECK-NEXT: vor.v $vr0, $vr0, $vr1
+; CHECK-NEXT: vst $vr0, $a0, 0
+; CHECK-NEXT: ret
+ %v0 = load <16 x i8>, ptr %src
+ %b = lshr <16 x i8> %v0, splat (i8 2)
+ %c = shl <16 x i8> %v0, splat (i8 6)
+ %d = or <16 x i8> %b, %c
+ store <16 x i8> %d, ptr %dst
+ ret void
+}
+
+define void @rotl_v8i16(ptr %dst, ptr %src, i16 signext %a0) nounwind {
+; CHECK-LABEL: rotl_v8i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vld $vr0, $a1, 0
+; CHECK-NEXT: vreplgr2vr.h $vr1, $a2
+; CHECK-NEXT: vrepli.h $vr2, 16
+; CHECK-NEXT: vsub.h $vr2, $vr2, $vr1
+; CHECK-NEXT: vsll.h $vr1, $vr0, $vr1
+; CHECK-NEXT: vsrl.h $vr0, $vr0, $vr2
+; CHECK-NEXT: vor.v $vr0, $vr1, $vr0
+; CHECK-NEXT: vst $vr0, $a0, 0
+; CHECK-NEXT: ret
+ %v0 = load <8 x i16>, ptr %src
+ %v1.ele = insertelement <8 x i16> poison, i16 %a0, i16 0
+ %v1 = shufflevector <8 x i16> %v1.ele, <8 x i16> poison, <8 x i32> zeroinitializer
+ %v1.sub = sub <8 x i16> splat (i16 16), %v1
+ %b = shl <8 x i16> %v0, %v1
+ %c = lshr <8 x i16> %v0, %v1.sub
+ %d = or <8 x i16> %b, %c
+ store <8 x i16> %d, ptr %dst
+ ret void
+}
+
+define void @rotr_v8i16(ptr %dst, ptr %src, i16 signext %a0) nounwind {
+; CHECK-LABEL: rotr_v8i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vld $vr0, $a1, 0
+; CHECK-NEXT: vreplgr2vr.h $vr1, $a2
+; CHECK-NEXT: vrepli.h $vr2, 16
+; CHECK-NEXT: vsub.h $vr2, $vr2, $vr1
+; CHECK-NEXT: vsrl.h $vr1, $vr0, $vr1
+; CHECK-NEXT: vsll.h $vr0, $vr0, $vr2
+; CHECK-NEXT: vor.v $vr0, $vr1, $vr0
+; CHECK-NEXT: vst $vr0, $a0, 0
+; CHECK-NEXT: ret
+ %v0 = load <8 x i16>, ptr %src
+ %v1.ele = insertelement <8 x i16> poison, i16 %a0, i16 0
+ %v1 = shufflevector <8 x i16> %v1.ele, <8 x i16> poison, <8 x i32> zeroinitializer
+ %v1.sub = sub <8 x i16> splat (i16 16), %v1
+ %b = lshr <8 x i16> %v0, %v1
+ %c = shl <8 x i16> %v0, %v1.sub
+ %d = or <8 x i16> %b, %c
+ store <8 x i16> %d, ptr %dst
+ ret void
+}
+
+define void @rotr_v8i16_imm(ptr %dst, ptr %src) nounwind {
+; CHECK-LABEL: rotr_v8i16_imm:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vld $vr0, $a1, 0
+; CHECK-NEXT: vsrli.h $vr1, $vr0, 2
+; CHECK-NEXT: vslli.h $vr0, $vr0, 14
+; CHECK-NEXT: vor.v $vr0, $vr0, $vr1
+; CHECK-NEXT: vst $vr0, $a0, 0
+; CHECK-NEXT: ret
+ %v0 = load <8 x i16>, ptr %src
+ %b = lshr <8 x i16> %v0, splat (i16 2)
+ %c = shl <8 x i16> %v0, splat (i16 14)
+ %d = or <8 x i16> %b, %c
+ store <8 x i16> %d, ptr %dst
+ ret void
+}
+
+define void @rotl_v4i32(ptr %dst, ptr %src, i32 signext %a0) nounwind {
+; CHECK-LABEL: rotl_v4i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vld $vr0, $a1, 0
+; CHECK-NEXT: vreplgr2vr.w $vr1, $a2
+; CHECK-NEXT: vrepli.w $vr2, 32
+; CHECK-NEXT: vsub.w $vr2, $vr2, $vr1
+; CHECK-NEXT: vsll.w $vr1, $vr0, $vr1
+; CHECK-NEXT: vsrl.w $vr0, $vr0, $vr2
+; CHECK-NEXT: vor.v $vr0, $vr1, $vr0
+; CHECK-NEXT: vst $vr0, $a0, 0
+; CHECK-NEXT: ret
+ %v0 = load <4 x i32>, ptr %src
+ %v1.ele = insertelement <4 x i32> poison, i32 %a0, i32 0
+ %v1 = shufflevector <4 x i32> %v1.ele, <4 x i32> poison, <4 x i32> zeroinitializer
+ %v1.sub = sub <4 x i32> splat (i32 32), %v1
+ %b = shl <4 x i32> %v0, %v1
+ %c = lshr <4 x i32> %v0, %v1.sub
+ %d = or <4 x i32> %b, %c
+ store <4 x i32> %d, ptr %dst
+ ret void
+}
+
+define void @rotr_v4i32(ptr %dst, ptr %src, i32 signext %a0) nounwind {
+; CHECK-LABEL: rotr_v4i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vld $vr0, $a1, 0
+; CHECK-NEXT: vreplgr2vr.w $vr1, $a2
+; CHECK-NEXT: vrepli.w $vr2, 32
+; CHECK-NEXT: vsub.w $vr2, $vr2, $vr1
+; CHECK-NEXT: vsrl.w $vr1, $vr0, $vr1
+; CHECK-NEXT: vsll.w $vr0, $vr0, $vr2
+; CHECK-NEXT: vor.v $vr0, $vr1, $vr0
+; CHECK-NEXT: vst $vr0, $a0, 0
+; CHECK-NEXT: ret
+ %v0 = load <4 x i32>, ptr %src
+ %v1.ele = insertelement <4 x i32> poison, i32 %a0, i32 0
+ %v1 = shufflevector <4 x i32> %v1.ele, <4 x i32> poison, <4 x i32> zeroinitializer
+ %v1.sub = sub <4 x i32> splat (i32 32), %v1
+ %b = lshr <4 x i32> %v0, %v1
+ %c = shl <4 x i32> %v0, %v1.sub
+ %d = or <4 x i32> %b, %c
+ store <4 x i32> %d, ptr %dst
+ ret void
+}
+
+define void @rotr_v4i32_imm(ptr %dst, ptr %src) nounwind {
+; CHECK-LABEL: rotr_v4i32_imm:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vld $vr0, $a1, 0
+; CHECK-NEXT: vsrli.w $vr1, $vr0, 2
+; CHECK-NEXT: vslli.w $vr0, $vr0, 30
+; CHECK-NEXT: vor.v $vr0, $vr0, $vr1
+; CHECK-NEXT: vst $vr0, $a0, 0
+; CHECK-NEXT: ret
+ %v0 = load <4 x i32>, ptr %src
+ %b = lshr <4 x i32> %v0, splat (i32 2)
+ %c = shl <4 x i32> %v0, splat (i32 30)
+ %d = or <4 x i32> %b, %c
+ store <4 x i32> %d, ptr %dst
+ ret void
+}
+
+define void @rotl_v2i64(ptr %dst, ptr %src, i64 %a0) nounwind {
+; LA32-LABEL: rotl_v2i64:
+; LA32: # %bb.0:
+; LA32-NEXT: vld $vr0, $a1, 0
+; LA32-NEXT: vinsgr2vr.w $vr1, $a2, 0
+; LA32-NEXT: vinsgr2vr.w $vr1, $a3, 1
+; LA32-NEXT: vreplvei.d $vr1, $vr1, 0
+; LA32-NEXT: vrepli.d $vr2, 64
+; LA32-NEXT: vsub.d $vr2, $vr2, $vr1
+; LA32-NEXT: vsll.d $vr1, $vr0, $vr1
+; LA32-NEXT: vsrl.d $vr0, $vr0, $vr2
+; LA32-NEXT: vor.v $vr0, $vr1, $vr0
+; LA32-NEXT: vst $vr0, $a0, 0
+; LA32-NEXT: ret
+;
+; LA64-LABEL: rotl_v2i64:
+; LA64: # %bb.0:
+; LA64-NEXT: vld $vr0, $a1, 0
+; LA64-NEXT: vreplgr2vr.d $vr1, $a2
+; LA64-NEXT: vrepli.d $vr2, 64
+; LA64-NEXT: vsub.d $vr2, $vr2, $vr1
+; LA64-NEXT: vsll.d $vr1, $vr0, $vr1
+; LA64-NEXT: vsrl.d $vr0, $vr0, $vr2
+; LA64-NEXT: vor.v $vr0, $vr1, $vr0
+; LA64-NEXT: vst $vr0, $a0, 0
+; LA64-NEXT: ret
+ %v0 = load <2 x i64>, ptr %src
+ %v1.ele = insertelement <2 x i64> poison, i64 %a0, i64 0
+ %v1 = shufflevector <2 x i64> %v1.ele, <2 x i64> poison, <2 x i32> zeroinitializer
+ %v1.sub = sub <2 x i64> splat (i64 64), %v1
+ %b = shl <2 x i64> %v0, %v1
+ %c = lshr <2 x i64> %v0, %v1.sub
+ %d = or <2 x i64> %b, %c
+ store <2 x i64> %d, ptr %dst
+ ret void
+}
+
+define void @rotr_v2i64(ptr %dst, ptr %src, i64 %a0) nounwind {
+; LA32-LABEL: rotr_v2i64:
+; LA32: # %bb.0:
+; LA32-NEXT: vld $vr0, $a1, 0
+; LA32-NEXT: vinsgr2vr.w $vr1, $a2, 0
+; LA32-NEXT: vinsgr2vr.w $vr1, $a3, 1
+; LA32-NEXT: vreplvei.d $vr1, $vr1, 0
+; LA32-NEXT: vrepli.d $vr2, 64
+; LA32-NEXT: vsub.d $vr2, $vr2, $vr1
+; LA32-NEXT: vsrl.d $vr1, $vr0, $vr1
+; LA32-NEXT: vsll.d $vr0, $vr0, $vr2
+; LA32-NEXT: vor.v $vr0, $vr1, $vr0
+; LA32-NEXT: vst $vr0, $a0, 0
+; LA32-NEXT: ret
+;
+; LA64-LABEL: rotr_v2i64:
+; LA64: # %bb.0:
+; LA64-NEXT: vld $vr0, $a1, 0
+; LA64-NEXT: vreplgr2vr.d $vr1, $a2
+; LA64-NEXT: vrepli.d $vr2, 64
+; LA64-NEXT: vsub.d $vr2, $vr2, $vr1
+; LA64-NEXT: vsrl.d $vr1, $vr0, $vr1
+; LA64-NEXT: vsll.d $vr0, $vr0, $vr2
+; LA64-NEXT: vor.v $vr0, $vr1, $vr0
+; LA64-NEXT: vst $vr0, $a0, 0
+; LA64-NEXT: ret
+ %v0 = load <2 x i64>, ptr %src
+ %v1.ele = insertelement <2 x i64> poison, i64 %a0, i64 0
+ %v1 = shufflevector <2 x i64> %v1.ele, <2 x i64> poison, <2 x i32> zeroinitializer
+ %v1.sub = sub <2 x i64> splat (i64 64), %v1
+ %b = lshr <2 x i64> %v0, %v1
+ %c = shl <2 x i64> %v0, %v1.sub
+ %d = or <2 x i64> %b, %c
+ store <2 x i64> %d, ptr %dst
+ ret void
+}
+
+define void @rotr_v2i64_imm(ptr %dst, ptr %src) nounwind {
+; CHECK-LABEL: rotr_v2i64_imm:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vld $vr0, $a1, 0
+; CHECK-NEXT: vsrli.d $vr1, $vr0, 2
+; CHECK-NEXT: vslli.d $vr0, $vr0, 62
+; CHECK-NEXT: vor.v $vr0, $vr0, $vr1
+; CHECK-NEXT: vst $vr0, $a0, 0
+; CHECK-NEXT: ret
+ %v0 = load <2 x i64>, ptr %src
+ %b = lshr <2 x i64> %v0, splat (i64 2)
+ %c = shl <2 x i64> %v0, splat (i64 62)
+ %d = or <2 x i64> %b, %c
+ store <2 x i64> %d, ptr %dst
+ ret void
+}
More information about the llvm-commits
mailing list