[llvm-branch-commits] [llvm] 094f771 - [Hexagon] Add patterns for bspap/bitreverse for scalar vectors
Tom Stellard via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Tue May 30 15:38:34 PDT 2023
Author: Krzysztof Parzyszek
Date: 2023-05-30T15:37:57-07:00
New Revision: 094f77145b95fce1bc309c8905e55d73ab73191e
URL: https://github.com/llvm/llvm-project/commit/094f77145b95fce1bc309c8905e55d73ab73191e
DIFF: https://github.com/llvm/llvm-project/commit/094f77145b95fce1bc309c8905e55d73ab73191e.diff
LOG: [Hexagon] Add patterns for bspap/bitreverse for scalar vectors
Fixes https://github.com/llvm/llvm-project/issues/62474
(cherry picked from commit c7b291a63f5cabea47e1b4b13e7091e3e37dadb1)
Added:
Modified:
llvm/lib/Target/Hexagon/HexagonISelLowering.cpp
llvm/lib/Target/Hexagon/HexagonPatterns.td
llvm/test/CodeGen/Hexagon/bitmanip.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp b/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp
index 202fc473f9e49..609a383426d66 100644
--- a/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp
@@ -1628,7 +1628,7 @@ HexagonTargetLowering::HexagonTargetLowering(const TargetMachine &TM,
ISD::UADDO, ISD::SSUBO, ISD::USUBO, ISD::SMUL_LOHI, ISD::UMUL_LOHI,
// Logical/bit:
ISD::AND, ISD::OR, ISD::XOR, ISD::ROTL, ISD::ROTR,
- ISD::CTPOP, ISD::CTLZ, ISD::CTTZ,
+ ISD::CTPOP, ISD::CTLZ, ISD::CTTZ, ISD::BSWAP, ISD::BITREVERSE,
// Floating point arithmetic/math functions:
ISD::FADD, ISD::FSUB, ISD::FMUL, ISD::FMA, ISD::FDIV,
ISD::FREM, ISD::FNEG, ISD::FABS, ISD::FSQRT, ISD::FSIN,
@@ -1701,8 +1701,11 @@ HexagonTargetLowering::HexagonTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::OR, NativeVT, Legal);
setOperationAction(ISD::XOR, NativeVT, Legal);
- if (NativeVT.getVectorElementType() != MVT::i1)
+ if (NativeVT.getVectorElementType() != MVT::i1) {
setOperationAction(ISD::SPLAT_VECTOR, NativeVT, Legal);
+ setOperationAction(ISD::BSWAP, NativeVT, Legal);
+ setOperationAction(ISD::BITREVERSE, NativeVT, Legal);
+ }
}
for (MVT VT : {MVT::v8i8, MVT::v4i16, MVT::v2i32}) {
diff --git a/llvm/lib/Target/Hexagon/HexagonPatterns.td b/llvm/lib/Target/Hexagon/HexagonPatterns.td
index a75ac0e1378ef..375e519a6848b 100644
--- a/llvm/lib/Target/Hexagon/HexagonPatterns.td
+++ b/llvm/lib/Target/Hexagon/HexagonPatterns.td
@@ -117,8 +117,8 @@ def usat: PatFrag<(ops node:$V, node:$Ty), (HexagonUSAT node:$V, node:$Ty)>;
// Pattern fragments to extract the low and high subregisters from a
// 64-bit value.
-def LoReg: OutPatFrag<(ops node:$Rs), (EXTRACT_SUBREG (i64 $Rs), isub_lo)>;
-def HiReg: OutPatFrag<(ops node:$Rs), (EXTRACT_SUBREG (i64 $Rs), isub_hi)>;
+def LoReg: OutPatFrag<(ops node:$Rs), (EXTRACT_SUBREG $Rs, isub_lo)>;
+def HiReg: OutPatFrag<(ops node:$Rs), (EXTRACT_SUBREG $Rs, isub_hi)>;
def IsOrAdd: PatFrag<(ops node:$A, node:$B), (or node:$A, node:$B), [{
return isOrEquivalentToAdd(N);
@@ -1123,6 +1123,12 @@ def: Pat<(bswap I32:$Rs), (A2_swiz I32:$Rs)>;
def: Pat<(bswap I64:$Rss), (Combinew (A2_swiz (LoReg $Rss)),
(A2_swiz (HiReg $Rss)))>;
+def: Pat<(bswap V2I16:$Rs), (A2_combine_lh (A2_swiz $Rs), (A2_swiz $Rs))>;
+def: Pat<(bswap V2I32:$Rs), (Combinew (A2_swiz (HiReg $Rs)),
+ (A2_swiz (LoReg $Rs)))>;
+def: Pat<(bswap V4I16:$Rs), (A2_orp (S2_lsr_i_vh $Rs, 8),
+ (S2_asl_i_vh $Rs, 8))>;
+
def: Pat<(shl s6_0ImmPred:$s6, I32:$Rt), (S4_lsli imm:$s6, I32:$Rt)>;
def: Pat<(shl I32:$Rs, (i32 16)), (A2_aslh I32:$Rs)>;
def: Pat<(sra I32:$Rs, (i32 16)), (A2_asrh I32:$Rs)>;
@@ -1854,6 +1860,20 @@ def: Pat<(i32 (ctpop I32:$Rs)), (S5_popcountp (A4_combineir 0, I32:$Rs))>;
def: Pat<(bitreverse I32:$Rs), (S2_brev I32:$Rs)>;
def: Pat<(bitreverse I64:$Rss), (S2_brevp I64:$Rss)>;
+def: Pat<(bitreverse V4I8:$Rs), (A2_swiz (S2_brev $Rs))>;
+def: Pat<(bitreverse V8I8:$Rs), (Combinew (A2_swiz (LoReg (S2_brevp $Rs))),
+ (A2_swiz (HiReg (S2_brevp $Rs))))>;
+def: Pat<(bitreverse V2I16:$Rs), (A2_combine_lh (S2_brev $Rs),
+ (S2_brev $Rs))>;
+def: Pat<(bitreverse V4I16:$Rs),
+ (Combinew (A2_combine_lh (LoReg (S2_brevp $Rs)),
+ (LoReg (S2_brevp $Rs))),
+ (A2_combine_lh (HiReg (S2_brevp $Rs)),
+ (HiReg (S2_brevp $Rs))))>;
+def: Pat<(bitreverse V2I32:$Rs),
+ (Combinew (i32 (LoReg (S2_brevp $Rs))),
+ (i32 (HiReg (S2_brevp $Rs))))>;
+
let AddedComplexity = 20 in { // Complexity greater than and/or/xor
def: Pat<(and I32:$Rs, IsNPow2_32:$V),
(S2_clrbit_i IntRegs:$Rs, (LogN2_32 $V))>;
diff --git a/llvm/test/CodeGen/Hexagon/bitmanip.ll b/llvm/test/CodeGen/Hexagon/bitmanip.ll
index 2044a2fdd083b..9ce7f0576506c 100644
--- a/llvm/test/CodeGen/Hexagon/bitmanip.ll
+++ b/llvm/test/CodeGen/Hexagon/bitmanip.ll
@@ -1,135 +1,370 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
; RUN: llc -march=hexagon < %s | FileCheck %s
-; CHECK-LABEL: popcount_16
-; CHECK: zxth
-; CHECK: popcount
-define i16 @popcount_16(i16 %p) #0 {
- %t = call i16 @llvm.ctpop.i16(i16 %p) #0
- ret i16 %t
+define i16 @popcount_i16(i16 %a0) #0 {
+; CHECK-LABEL: popcount_i16:
+; CHECK: .cfi_startproc
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: {
+; CHECK-NEXT: r1 = #0
+; CHECK-NEXT: r0 = zxth(r0)
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: r0 = popcount(r1:0)
+; CHECK-NEXT: jumpr r31
+; CHECK-NEXT: }
+ %v0 = tail call i16 @llvm.ctpop.i16(i16 %a0) #1
+ ret i16 %v0
}
-; CHECK-LABEL: popcount_32
-; CHECK: popcount
-define i32 @popcount_32(i32 %p) #0 {
- %t = call i32 @llvm.ctpop.i32(i32 %p) #0
- ret i32 %t
+define i32 @popcount_i32(i32 %a0) #0 {
+; CHECK-LABEL: popcount_i32:
+; CHECK: .cfi_startproc
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: {
+; CHECK-NEXT: r1 = #0
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: r0 = popcount(r1:0)
+; CHECK-NEXT: jumpr r31
+; CHECK-NEXT: }
+ %v0 = tail call i32 @llvm.ctpop.i32(i32 %a0) #1
+ ret i32 %v0
}
-; CHECK-LABEL: popcount_64
-; CHECK: popcount
-define i64 @popcount_64(i64 %p) #0 {
- %t = call i64 @llvm.ctpop.i64(i64 %p) #0
- ret i64 %t
+define i64 @popcount_i64(i64 %a0) #0 {
+; CHECK-LABEL: popcount_i64:
+; CHECK: .cfi_startproc
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: {
+; CHECK-NEXT: r0 = popcount(r1:0)
+; CHECK-NEXT: r1 = #0
+; CHECK-NEXT: jumpr r31
+; CHECK-NEXT: }
+ %v0 = tail call i64 @llvm.ctpop.i64(i64 %a0) #1
+ ret i64 %v0
}
-; CHECK-LABEL: ctlz_16
-; CHECK: [[REG0:r[0-9]+]] = zxth
-; CHECK: [[REG1:r[0-9]+]] = cl0([[REG0]])
-; CHECK: add([[REG1]],#-16)
-define i16 @ctlz_16(i16 %p) #0 {
- %t = call i16 @llvm.ctlz.i16(i16 %p, i1 true) #0
- ret i16 %t
+define i16 @ctlz_i16(i16 %a0) #0 {
+; CHECK-LABEL: ctlz_i16:
+; CHECK: .cfi_startproc
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: {
+; CHECK-NEXT: r0 = zxth(r0)
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: r0 = cl0(r0)
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: r0 = add(r0,#-16)
+; CHECK-NEXT: jumpr r31
+; CHECK-NEXT: }
+ %v0 = tail call i16 @llvm.ctlz.i16(i16 %a0, i1 true) #1
+ ret i16 %v0
}
-; CHECK-LABEL: ctlz_32
-; CHECK: cl0
-define i32 @ctlz_32(i32 %p) #0 {
- %t = call i32 @llvm.ctlz.i32(i32 %p, i1 true) #0
- ret i32 %t
+define i32 @ctlz_i32(i32 %a0) #0 {
+; CHECK-LABEL: ctlz_i32:
+; CHECK: .cfi_startproc
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: {
+; CHECK-NEXT: r0 = cl0(r0)
+; CHECK-NEXT: jumpr r31
+; CHECK-NEXT: }
+ %v0 = tail call i32 @llvm.ctlz.i32(i32 %a0, i1 true) #1
+ ret i32 %v0
}
-; CHECK-LABEL: ctlz_64
-; CHECK: cl0
-define i64 @ctlz_64(i64 %p) #0 {
- %t = call i64 @llvm.ctlz.i64(i64 %p, i1 true) #0
- ret i64 %t
+define i64 @ctlz_i64(i64 %a0) #0 {
+; CHECK-LABEL: ctlz_i64:
+; CHECK: .cfi_startproc
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: {
+; CHECK-NEXT: r0 = cl0(r1:0)
+; CHECK-NEXT: r1 = #0
+; CHECK-NEXT: jumpr r31
+; CHECK-NEXT: }
+ %v0 = tail call i64 @llvm.ctlz.i64(i64 %a0, i1 true) #1
+ ret i64 %v0
}
-; CHECK-LABEL: cttz_16
-; CHECK: ct0
-define i16 @cttz_16(i16 %p) #0 {
- %t = call i16 @llvm.cttz.i16(i16 %p, i1 true) #0
- ret i16 %t
+define i16 @cttz_i16(i16 %a0) #0 {
+; CHECK-LABEL: cttz_i16:
+; CHECK: .cfi_startproc
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: {
+; CHECK-NEXT: r0 = ct0(r0)
+; CHECK-NEXT: jumpr r31
+; CHECK-NEXT: }
+ %v0 = tail call i16 @llvm.cttz.i16(i16 %a0, i1 true) #1
+ ret i16 %v0
}
-; CHECK-LABEL: cttz_32
-; CHECK: ct0
-define i32 @cttz_32(i32 %p) #0 {
- %t = call i32 @llvm.cttz.i32(i32 %p, i1 true) #0
- ret i32 %t
+define i32 @cttz_i32(i32 %a0) #0 {
+; CHECK-LABEL: cttz_i32:
+; CHECK: .cfi_startproc
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: {
+; CHECK-NEXT: r0 = ct0(r0)
+; CHECK-NEXT: jumpr r31
+; CHECK-NEXT: }
+ %v0 = tail call i32 @llvm.cttz.i32(i32 %a0, i1 true) #1
+ ret i32 %v0
}
-; CHECK-LABEL: cttz_64
-; CHECK: ct0
-define i64 @cttz_64(i64 %p) #0 {
- %t = call i64 @llvm.cttz.i64(i64 %p, i1 true) #0
- ret i64 %t
+define i64 @cttz_i64(i64 %a0) #0 {
+; CHECK-LABEL: cttz_i64:
+; CHECK: .cfi_startproc
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: {
+; CHECK-NEXT: r0 = ct0(r1:0)
+; CHECK-NEXT: r1 = #0
+; CHECK-NEXT: jumpr r31
+; CHECK-NEXT: }
+ %v0 = tail call i64 @llvm.cttz.i64(i64 %a0, i1 true) #1
+ ret i64 %v0
}
-; CHECK-LABEL: brev_16
-; CHECK: [[REG:r[0-9]+]] = brev
-; CHECK: lsr([[REG]],#16)
-define i16 @brev_16(i16 %p) #0 {
- %t = call i16 @llvm.bitreverse.i16(i16 %p) #0
- ret i16 %t
+define i16 @bswap_i16(i16 %a0) #0 {
+; CHECK-LABEL: bswap_i16:
+; CHECK: .cfi_startproc
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: {
+; CHECK-NEXT: r0 = swiz(r0)
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: r0 = lsr(r0,#16)
+; CHECK-NEXT: jumpr r31
+; CHECK-NEXT: }
+ %v0 = tail call i16 @llvm.bswap.i16(i16 %a0) #1
+ ret i16 %v0
}
-; CHECK-LABEL: brev_32
-; CHECK: brev
-define i32 @brev_32(i32 %p) #0 {
- %t = call i32 @llvm.bitreverse.i32(i32 %p) #0
- ret i32 %t
+define i32 @bswap_i32(i32 %a0) #0 {
+; CHECK-LABEL: bswap_i32:
+; CHECK: .cfi_startproc
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: {
+; CHECK-NEXT: r0 = swiz(r0)
+; CHECK-NEXT: jumpr r31
+; CHECK-NEXT: }
+ %v0 = tail call i32 @llvm.bswap.i32(i32 %a0) #1
+ ret i32 %v0
}
-; CHECK-LABEL: brev_64
-; CHECK: brev
-define i64 @brev_64(i64 %p) #0 {
- %t = call i64 @llvm.bitreverse.i64(i64 %p) #0
- ret i64 %t
+define i64 @bswap_i64(i64 %a0) #0 {
+; CHECK-LABEL: bswap_i64:
+; CHECK: .cfi_startproc
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: {
+; CHECK-NEXT: r2 = swiz(r1)
+; CHECK-NEXT: r3 = swiz(r0)
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: r1:0 = combine(r3,r2)
+; CHECK-NEXT: jumpr r31
+; CHECK-NEXT: }
+ %v0 = tail call i64 @llvm.bswap.i64(i64 %a0) #1
+ ret i64 %v0
}
-; CHECK-LABEL: bswap_16
-; CHECK: [[REG:r[0-9]+]] = swiz
-; CHECK: lsr([[REG]],#16)
-define i16 @bswap_16(i16 %p) #0 {
- %t = call i16 @llvm.bswap.i16(i16 %p) #0
- ret i16 %t
+define <2 x i16> @bswap_v2i16(<2 x i16> %a0) #0 {
+; CHECK-LABEL: bswap_v2i16:
+; CHECK: .cfi_startproc
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: {
+; CHECK-NEXT: r0 = swiz(r0)
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: r0 = combine(r0.l,r0.h)
+; CHECK-NEXT: jumpr r31
+; CHECK-NEXT: }
+ %v0 = tail call <2 x i16> @llvm.bswap.v2i16(<2 x i16> %a0)
+ ret <2 x i16> %v0
}
-; CHECK-LABEL: bswap_32
-; CHECK: swiz
-define i32 @bswap_32(i32 %p) #0 {
- %t = call i32 @llvm.bswap.i32(i32 %p) #0
- ret i32 %t
+define <4 x i16> @bswap_v4i16(<4 x i16> %a0) #0 {
+; CHECK-LABEL: bswap_v4i16:
+; CHECK: .cfi_startproc
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: {
+; CHECK-NEXT: r3:2 = vlsrh(r1:0,#8)
+; CHECK-NEXT: r5:4 = vaslh(r1:0,#8)
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: r1:0 = or(r3:2,r5:4)
+; CHECK-NEXT: jumpr r31
+; CHECK-NEXT: }
+ %v0 = tail call <4 x i16> @llvm.bswap.v4i16(<4 x i16> %a0)
+ ret <4 x i16> %v0
}
-; CHECK-LABEL: bswap_64
-; CHECK: swiz
-; CHECK: swiz
-; CHECK: combine
-define i64 @bswap_64(i64 %p) #0 {
- %t = call i64 @llvm.bswap.i64(i64 %p) #0
- ret i64 %t
+define <2 x i32> @bswap_v2i32(<2 x i32> %a0) #0 {
+; CHECK-LABEL: bswap_v2i32:
+; CHECK: .cfi_startproc
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: {
+; CHECK-NEXT: r0 = swiz(r0)
+; CHECK-NEXT: r1 = swiz(r1)
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: jumpr r31
+; CHECK-NEXT: }
+ %v0 = tail call <2 x i32> @llvm.bswap.v2i32(<2 x i32> %a0)
+ ret <2 x i32> %v0
}
-declare i16 @llvm.ctpop.i16(i16) #0
-declare i32 @llvm.ctpop.i32(i32) #0
-declare i64 @llvm.ctpop.i64(i64) #0
+define i16 @brev_i16(i16 %a0) #0 {
+; CHECK-LABEL: brev_i16:
+; CHECK: .cfi_startproc
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: {
+; CHECK-NEXT: r0 = brev(r0)
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: r0 = lsr(r0,#16)
+; CHECK-NEXT: jumpr r31
+; CHECK-NEXT: }
+ %v0 = tail call i16 @llvm.bitreverse.i16(i16 %a0) #1
+ ret i16 %v0
+}
+
+define i32 @brev_i32(i32 %a0) #0 {
+; CHECK-LABEL: brev_i32:
+; CHECK: .cfi_startproc
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: {
+; CHECK-NEXT: r0 = brev(r0)
+; CHECK-NEXT: jumpr r31
+; CHECK-NEXT: }
+ %v0 = tail call i32 @llvm.bitreverse.i32(i32 %a0) #1
+ ret i32 %v0
+}
+
+define i64 @brev_i64(i64 %a0) #0 {
+; CHECK-LABEL: brev_i64:
+; CHECK: .cfi_startproc
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: {
+; CHECK-NEXT: r1:0 = brev(r1:0)
+; CHECK-NEXT: jumpr r31
+; CHECK-NEXT: }
+ %v0 = tail call i64 @llvm.bitreverse.i64(i64 %a0) #1
+ ret i64 %v0
+}
+
+define <4 x i8> @brev_v4i8(<4 x i8> %a0) #0 {
+; CHECK-LABEL: brev_v4i8:
+; CHECK: .cfi_startproc
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: {
+; CHECK-NEXT: r0 = brev(r0)
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: r0 = swiz(r0)
+; CHECK-NEXT: jumpr r31
+; CHECK-NEXT: }
+ %v0 = tail call <4 x i8> @llvm.bitreverse.v4i8(<4 x i8> %a0)
+ ret <4 x i8> %v0
+}
+
+define <8 x i8> @brev_v8i8(<8 x i8> %a0) #0 {
+; CHECK-LABEL: brev_v8i8:
+; CHECK: .cfi_startproc
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: {
+; CHECK-NEXT: r3:2 = brev(r1:0)
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: r0 = swiz(r3)
+; CHECK-NEXT: r1 = swiz(r2)
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: jumpr r31
+; CHECK-NEXT: }
+ %v0 = tail call <8 x i8> @llvm.bitreverse.v8i8(<8 x i8> %a0)
+ ret <8 x i8> %v0
+}
+
+define <2 x i16> @brev_v2i16(<2 x i16> %a0) #0 {
+; CHECK-LABEL: brev_v2i16:
+; CHECK: .cfi_startproc
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: {
+; CHECK-NEXT: r0 = brev(r0)
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: r0 = combine(r0.l,r0.h)
+; CHECK-NEXT: jumpr r31
+; CHECK-NEXT: }
+ %v0 = tail call <2 x i16> @llvm.bitreverse.v2i16(<2 x i16> %a0)
+ ret <2 x i16> %v0
+}
+
+define <4 x i16> @brev_v4i16(<4 x i16> %a0) #0 {
+; CHECK-LABEL: brev_v4i16:
+; CHECK: .cfi_startproc
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: {
+; CHECK-NEXT: r3:2 = brev(r1:0)
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: r0 = combine(r3.l,r3.h)
+; CHECK-NEXT: jumpr r31
+; CHECK-NEXT: r1 = combine(r2.l,r2.h)
+; CHECK-NEXT: }
+ %v0 = tail call <4 x i16> @llvm.bitreverse.v4i16(<4 x i16> %a0)
+ ret <4 x i16> %v0
+}
+
+define <2 x i32> @brev_v2i32(<2 x i32> %a0) #0 {
+; CHECK-LABEL: brev_v2i32:
+; CHECK: .cfi_startproc
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: {
+; CHECK-NEXT: r3:2 = brev(r1:0)
+; CHECK-NEXT: }
+; CHECK-NEXT: {
+; CHECK-NEXT: r1:0 = combine(r2,r3)
+; CHECK-NEXT: jumpr r31
+; CHECK-NEXT: }
+ %v0 = tail call <2 x i32> @llvm.bitreverse.v2i32(<2 x i32> %a0)
+ ret <2 x i32> %v0
+}
+
+
+declare i16 @llvm.ctpop.i16(i16) #1
+declare i32 @llvm.ctpop.i32(i32) #1
+declare i64 @llvm.ctpop.i64(i64) #1
+
+declare i16 @llvm.ctlz.i16(i16, i1) #1
+declare i32 @llvm.ctlz.i32(i32, i1) #1
+declare i64 @llvm.ctlz.i64(i64, i1) #1
+
+declare i16 @llvm.cttz.i16(i16, i1) #1
+declare i32 @llvm.cttz.i32(i32, i1) #1
+declare i64 @llvm.cttz.i64(i64, i1) #1
+
+declare i16 @llvm.bswap.i16(i16) #1
+declare i32 @llvm.bswap.i32(i32) #1
+declare i64 @llvm.bswap.i64(i64) #1
+
+declare <2 x i16> @llvm.bswap.v2i16(<2 x i16>) #1
+declare <4 x i16> @llvm.bswap.v4i16(<4 x i16>) #1
+declare <2 x i32> @llvm.bswap.v2i32(<2 x i32>) #1
+
+declare i16 @llvm.bitreverse.i16(i16) #1
+declare i32 @llvm.bitreverse.i32(i32) #1
+declare i64 @llvm.bitreverse.i64(i64) #1
-declare i16 @llvm.ctlz.i16(i16, i1) #0
-declare i32 @llvm.ctlz.i32(i32, i1) #0
-declare i64 @llvm.ctlz.i64(i64, i1) #0
+declare <4 x i8> @llvm.bitreverse.v4i8(<4 x i8>) #1
+declare <8 x i8> @llvm.bitreverse.v8i8(<8 x i8>) #1
-declare i16 @llvm.cttz.i16(i16, i1) #0
-declare i32 @llvm.cttz.i32(i32, i1) #0
-declare i64 @llvm.cttz.i64(i64, i1) #0
+declare <2 x i16> @llvm.bitreverse.v2i16(<2 x i16>) #1
+declare <4 x i16> @llvm.bitreverse.v4i16(<4 x i16>) #1
+declare <2 x i32> @llvm.bitreverse.v2i32(<2 x i32>) #1
-declare i16 @llvm.bitreverse.i16(i16) #0
-declare i32 @llvm.bitreverse.i32(i32) #0
-declare i64 @llvm.bitreverse.i64(i64) #0
-declare i16 @llvm.bswap.i16(i16) #0
-declare i32 @llvm.bswap.i32(i32) #0
-declare i64 @llvm.bswap.i64(i64) #0
+attributes #0 = { "target-features"="+v68,-long-calls" }
+attributes #1 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
-attributes #0 = { nounwind readnone }
More information about the llvm-branch-commits
mailing list