[llvm-branch-commits] [llvm] 094f771 - [Hexagon] Add patterns for bspap/bitreverse for scalar vectors

Tom Stellard via llvm-branch-commits llvm-branch-commits at lists.llvm.org
Tue May 30 15:38:34 PDT 2023


Author: Krzysztof Parzyszek
Date: 2023-05-30T15:37:57-07:00
New Revision: 094f77145b95fce1bc309c8905e55d73ab73191e

URL: https://github.com/llvm/llvm-project/commit/094f77145b95fce1bc309c8905e55d73ab73191e
DIFF: https://github.com/llvm/llvm-project/commit/094f77145b95fce1bc309c8905e55d73ab73191e.diff

LOG: [Hexagon] Add patterns for bspap/bitreverse for scalar vectors

Fixes https://github.com/llvm/llvm-project/issues/62474

(cherry picked from commit c7b291a63f5cabea47e1b4b13e7091e3e37dadb1)

Added: 
    

Modified: 
    llvm/lib/Target/Hexagon/HexagonISelLowering.cpp
    llvm/lib/Target/Hexagon/HexagonPatterns.td
    llvm/test/CodeGen/Hexagon/bitmanip.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp b/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp
index 202fc473f9e49..609a383426d66 100644
--- a/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp
@@ -1628,7 +1628,7 @@ HexagonTargetLowering::HexagonTargetLowering(const TargetMachine &TM,
     ISD::UADDO,   ISD::SSUBO,   ISD::USUBO,   ISD::SMUL_LOHI, ISD::UMUL_LOHI,
     // Logical/bit:
     ISD::AND,     ISD::OR,      ISD::XOR,     ISD::ROTL,    ISD::ROTR,
-    ISD::CTPOP,   ISD::CTLZ,    ISD::CTTZ,
+    ISD::CTPOP,   ISD::CTLZ,    ISD::CTTZ,    ISD::BSWAP,   ISD::BITREVERSE,
     // Floating point arithmetic/math functions:
     ISD::FADD,    ISD::FSUB,    ISD::FMUL,    ISD::FMA,     ISD::FDIV,
     ISD::FREM,    ISD::FNEG,    ISD::FABS,    ISD::FSQRT,   ISD::FSIN,
@@ -1701,8 +1701,11 @@ HexagonTargetLowering::HexagonTargetLowering(const TargetMachine &TM,
     setOperationAction(ISD::OR,  NativeVT, Legal);
     setOperationAction(ISD::XOR, NativeVT, Legal);
 
-    if (NativeVT.getVectorElementType() != MVT::i1)
+    if (NativeVT.getVectorElementType() != MVT::i1) {
       setOperationAction(ISD::SPLAT_VECTOR, NativeVT, Legal);
+      setOperationAction(ISD::BSWAP,        NativeVT, Legal);
+      setOperationAction(ISD::BITREVERSE,   NativeVT, Legal);
+    }
   }
 
   for (MVT VT : {MVT::v8i8, MVT::v4i16, MVT::v2i32}) {

diff  --git a/llvm/lib/Target/Hexagon/HexagonPatterns.td b/llvm/lib/Target/Hexagon/HexagonPatterns.td
index a75ac0e1378ef..375e519a6848b 100644
--- a/llvm/lib/Target/Hexagon/HexagonPatterns.td
+++ b/llvm/lib/Target/Hexagon/HexagonPatterns.td
@@ -117,8 +117,8 @@ def usat: PatFrag<(ops node:$V, node:$Ty), (HexagonUSAT node:$V, node:$Ty)>;
 
 // Pattern fragments to extract the low and high subregisters from a
 // 64-bit value.
-def LoReg: OutPatFrag<(ops node:$Rs), (EXTRACT_SUBREG (i64 $Rs), isub_lo)>;
-def HiReg: OutPatFrag<(ops node:$Rs), (EXTRACT_SUBREG (i64 $Rs), isub_hi)>;
+def LoReg: OutPatFrag<(ops node:$Rs), (EXTRACT_SUBREG $Rs, isub_lo)>;
+def HiReg: OutPatFrag<(ops node:$Rs), (EXTRACT_SUBREG $Rs, isub_hi)>;
 
 def IsOrAdd: PatFrag<(ops node:$A, node:$B), (or node:$A, node:$B), [{
   return isOrEquivalentToAdd(N);
@@ -1123,6 +1123,12 @@ def: Pat<(bswap I32:$Rs),  (A2_swiz I32:$Rs)>;
 def: Pat<(bswap I64:$Rss), (Combinew (A2_swiz (LoReg $Rss)),
                                      (A2_swiz (HiReg $Rss)))>;
 
+def: Pat<(bswap V2I16:$Rs), (A2_combine_lh (A2_swiz $Rs), (A2_swiz $Rs))>;
+def: Pat<(bswap V2I32:$Rs), (Combinew (A2_swiz (HiReg $Rs)),
+                                      (A2_swiz (LoReg $Rs)))>;
+def: Pat<(bswap V4I16:$Rs), (A2_orp (S2_lsr_i_vh $Rs, 8),
+                                    (S2_asl_i_vh $Rs, 8))>;
+
 def: Pat<(shl s6_0ImmPred:$s6, I32:$Rt),  (S4_lsli imm:$s6, I32:$Rt)>;
 def: Pat<(shl I32:$Rs, (i32 16)),         (A2_aslh I32:$Rs)>;
 def: Pat<(sra I32:$Rs, (i32 16)),         (A2_asrh I32:$Rs)>;
@@ -1854,6 +1860,20 @@ def: Pat<(i32 (ctpop I32:$Rs)),   (S5_popcountp (A4_combineir 0, I32:$Rs))>;
 def: Pat<(bitreverse I32:$Rs),    (S2_brev I32:$Rs)>;
 def: Pat<(bitreverse I64:$Rss),   (S2_brevp I64:$Rss)>;
 
+def: Pat<(bitreverse V4I8:$Rs),   (A2_swiz (S2_brev $Rs))>;
+def: Pat<(bitreverse V8I8:$Rs),   (Combinew (A2_swiz (LoReg (S2_brevp $Rs))),
+                                            (A2_swiz (HiReg (S2_brevp $Rs))))>;
+def: Pat<(bitreverse V2I16:$Rs),  (A2_combine_lh (S2_brev $Rs),
+                                                 (S2_brev $Rs))>;
+def: Pat<(bitreverse V4I16:$Rs),
+         (Combinew (A2_combine_lh (LoReg (S2_brevp $Rs)),
+                                  (LoReg (S2_brevp $Rs))),
+                   (A2_combine_lh (HiReg (S2_brevp $Rs)),
+                                  (HiReg (S2_brevp $Rs))))>;
+def: Pat<(bitreverse V2I32:$Rs),
+         (Combinew (i32 (LoReg (S2_brevp $Rs))),
+                   (i32 (HiReg (S2_brevp $Rs))))>;
+
 let AddedComplexity = 20 in { // Complexity greater than and/or/xor
   def: Pat<(and I32:$Rs, IsNPow2_32:$V),
            (S2_clrbit_i IntRegs:$Rs, (LogN2_32 $V))>;

diff  --git a/llvm/test/CodeGen/Hexagon/bitmanip.ll b/llvm/test/CodeGen/Hexagon/bitmanip.ll
index 2044a2fdd083b..9ce7f0576506c 100644
--- a/llvm/test/CodeGen/Hexagon/bitmanip.ll
+++ b/llvm/test/CodeGen/Hexagon/bitmanip.ll
@@ -1,135 +1,370 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
 ; RUN: llc -march=hexagon < %s | FileCheck %s
 
-; CHECK-LABEL: popcount_16
-; CHECK: zxth
-; CHECK: popcount
-define i16 @popcount_16(i16 %p) #0 {
-  %t = call i16 @llvm.ctpop.i16(i16 %p) #0
-  ret i16 %t
+define i16 @popcount_i16(i16 %a0) #0 {
+; CHECK-LABEL: popcount_i16:
+; CHECK:         .cfi_startproc
+; CHECK-NEXT:  // %bb.0:
+; CHECK-NEXT:    {
+; CHECK-NEXT:     r1 = #0
+; CHECK-NEXT:     r0 = zxth(r0)
+; CHECK-NEXT:    }
+; CHECK-NEXT:    {
+; CHECK-NEXT:     r0 = popcount(r1:0)
+; CHECK-NEXT:     jumpr r31
+; CHECK-NEXT:    }
+  %v0 = tail call i16 @llvm.ctpop.i16(i16 %a0) #1
+  ret i16 %v0
 }
 
-; CHECK-LABEL: popcount_32
-; CHECK: popcount
-define i32 @popcount_32(i32 %p) #0 {
-  %t = call i32 @llvm.ctpop.i32(i32 %p) #0
-  ret i32 %t
+define i32 @popcount_i32(i32 %a0) #0 {
+; CHECK-LABEL: popcount_i32:
+; CHECK:         .cfi_startproc
+; CHECK-NEXT:  // %bb.0:
+; CHECK-NEXT:    {
+; CHECK-NEXT:     r1 = #0
+; CHECK-NEXT:    }
+; CHECK-NEXT:    {
+; CHECK-NEXT:     r0 = popcount(r1:0)
+; CHECK-NEXT:     jumpr r31
+; CHECK-NEXT:    }
+  %v0 = tail call i32 @llvm.ctpop.i32(i32 %a0) #1
+  ret i32 %v0
 }
 
-; CHECK-LABEL: popcount_64
-; CHECK: popcount
-define i64 @popcount_64(i64 %p) #0 {
-  %t = call i64 @llvm.ctpop.i64(i64 %p) #0
-  ret i64 %t
+define i64 @popcount_i64(i64 %a0) #0 {
+; CHECK-LABEL: popcount_i64:
+; CHECK:         .cfi_startproc
+; CHECK-NEXT:  // %bb.0:
+; CHECK-NEXT:    {
+; CHECK-NEXT:     r0 = popcount(r1:0)
+; CHECK-NEXT:     r1 = #0
+; CHECK-NEXT:     jumpr r31
+; CHECK-NEXT:    }
+  %v0 = tail call i64 @llvm.ctpop.i64(i64 %a0) #1
+  ret i64 %v0
 }
 
-; CHECK-LABEL: ctlz_16
-; CHECK: [[REG0:r[0-9]+]] = zxth
-; CHECK: [[REG1:r[0-9]+]] = cl0([[REG0]])
-; CHECK: add([[REG1]],#-16)
-define i16 @ctlz_16(i16 %p) #0 {
-  %t = call i16 @llvm.ctlz.i16(i16 %p, i1 true) #0
-  ret i16 %t
+define i16 @ctlz_i16(i16 %a0) #0 {
+; CHECK-LABEL: ctlz_i16:
+; CHECK:         .cfi_startproc
+; CHECK-NEXT:  // %bb.0:
+; CHECK-NEXT:    {
+; CHECK-NEXT:     r0 = zxth(r0)
+; CHECK-NEXT:    }
+; CHECK-NEXT:    {
+; CHECK-NEXT:     r0 = cl0(r0)
+; CHECK-NEXT:    }
+; CHECK-NEXT:    {
+; CHECK-NEXT:     r0 = add(r0,#-16)
+; CHECK-NEXT:     jumpr r31
+; CHECK-NEXT:    }
+  %v0 = tail call i16 @llvm.ctlz.i16(i16 %a0, i1 true) #1
+  ret i16 %v0
 }
 
-; CHECK-LABEL: ctlz_32
-; CHECK: cl0
-define i32 @ctlz_32(i32 %p) #0 {
-  %t = call i32 @llvm.ctlz.i32(i32 %p, i1 true) #0
-  ret i32 %t
+define i32 @ctlz_i32(i32 %a0) #0 {
+; CHECK-LABEL: ctlz_i32:
+; CHECK:         .cfi_startproc
+; CHECK-NEXT:  // %bb.0:
+; CHECK-NEXT:    {
+; CHECK-NEXT:     r0 = cl0(r0)
+; CHECK-NEXT:     jumpr r31
+; CHECK-NEXT:    }
+  %v0 = tail call i32 @llvm.ctlz.i32(i32 %a0, i1 true) #1
+  ret i32 %v0
 }
 
-; CHECK-LABEL: ctlz_64
-; CHECK: cl0
-define i64 @ctlz_64(i64 %p) #0 {
-  %t = call i64 @llvm.ctlz.i64(i64 %p, i1 true) #0
-  ret i64 %t
+define i64 @ctlz_i64(i64 %a0) #0 {
+; CHECK-LABEL: ctlz_i64:
+; CHECK:         .cfi_startproc
+; CHECK-NEXT:  // %bb.0:
+; CHECK-NEXT:    {
+; CHECK-NEXT:     r0 = cl0(r1:0)
+; CHECK-NEXT:     r1 = #0
+; CHECK-NEXT:     jumpr r31
+; CHECK-NEXT:    }
+  %v0 = tail call i64 @llvm.ctlz.i64(i64 %a0, i1 true) #1
+  ret i64 %v0
 }
 
-; CHECK-LABEL: cttz_16
-; CHECK: ct0
-define i16 @cttz_16(i16 %p) #0 {
-  %t = call i16 @llvm.cttz.i16(i16 %p, i1 true) #0
-  ret i16 %t
+define i16 @cttz_i16(i16 %a0) #0 {
+; CHECK-LABEL: cttz_i16:
+; CHECK:         .cfi_startproc
+; CHECK-NEXT:  // %bb.0:
+; CHECK-NEXT:    {
+; CHECK-NEXT:     r0 = ct0(r0)
+; CHECK-NEXT:     jumpr r31
+; CHECK-NEXT:    }
+  %v0 = tail call i16 @llvm.cttz.i16(i16 %a0, i1 true) #1
+  ret i16 %v0
 }
 
-; CHECK-LABEL: cttz_32
-; CHECK: ct0
-define i32 @cttz_32(i32 %p) #0 {
-  %t = call i32 @llvm.cttz.i32(i32 %p, i1 true) #0
-  ret i32 %t
+define i32 @cttz_i32(i32 %a0) #0 {
+; CHECK-LABEL: cttz_i32:
+; CHECK:         .cfi_startproc
+; CHECK-NEXT:  // %bb.0:
+; CHECK-NEXT:    {
+; CHECK-NEXT:     r0 = ct0(r0)
+; CHECK-NEXT:     jumpr r31
+; CHECK-NEXT:    }
+  %v0 = tail call i32 @llvm.cttz.i32(i32 %a0, i1 true) #1
+  ret i32 %v0
 }
 
-; CHECK-LABEL: cttz_64
-; CHECK: ct0
-define i64 @cttz_64(i64 %p) #0 {
-  %t = call i64 @llvm.cttz.i64(i64 %p, i1 true) #0
-  ret i64 %t
+define i64 @cttz_i64(i64 %a0) #0 {
+; CHECK-LABEL: cttz_i64:
+; CHECK:         .cfi_startproc
+; CHECK-NEXT:  // %bb.0:
+; CHECK-NEXT:    {
+; CHECK-NEXT:     r0 = ct0(r1:0)
+; CHECK-NEXT:     r1 = #0
+; CHECK-NEXT:     jumpr r31
+; CHECK-NEXT:    }
+  %v0 = tail call i64 @llvm.cttz.i64(i64 %a0, i1 true) #1
+  ret i64 %v0
 }
 
-; CHECK-LABEL: brev_16
-; CHECK: [[REG:r[0-9]+]] = brev
-; CHECK: lsr([[REG]],#16)
-define i16 @brev_16(i16 %p) #0 {
-  %t = call i16 @llvm.bitreverse.i16(i16 %p) #0
-  ret i16 %t
+define i16 @bswap_i16(i16 %a0) #0 {
+; CHECK-LABEL: bswap_i16:
+; CHECK:         .cfi_startproc
+; CHECK-NEXT:  // %bb.0:
+; CHECK-NEXT:    {
+; CHECK-NEXT:     r0 = swiz(r0)
+; CHECK-NEXT:    }
+; CHECK-NEXT:    {
+; CHECK-NEXT:     r0 = lsr(r0,#16)
+; CHECK-NEXT:     jumpr r31
+; CHECK-NEXT:    }
+  %v0 = tail call i16 @llvm.bswap.i16(i16 %a0) #1
+  ret i16 %v0
 }
 
-; CHECK-LABEL: brev_32
-; CHECK: brev
-define i32 @brev_32(i32 %p) #0 {
-  %t = call i32 @llvm.bitreverse.i32(i32 %p) #0
-  ret i32 %t
+define i32 @bswap_i32(i32 %a0) #0 {
+; CHECK-LABEL: bswap_i32:
+; CHECK:         .cfi_startproc
+; CHECK-NEXT:  // %bb.0:
+; CHECK-NEXT:    {
+; CHECK-NEXT:     r0 = swiz(r0)
+; CHECK-NEXT:     jumpr r31
+; CHECK-NEXT:    }
+  %v0 = tail call i32 @llvm.bswap.i32(i32 %a0) #1
+  ret i32 %v0
 }
 
-; CHECK-LABEL: brev_64
-; CHECK: brev
-define i64 @brev_64(i64 %p) #0 {
-  %t = call i64 @llvm.bitreverse.i64(i64 %p) #0
-  ret i64 %t
+define i64 @bswap_i64(i64 %a0) #0 {
+; CHECK-LABEL: bswap_i64:
+; CHECK:         .cfi_startproc
+; CHECK-NEXT:  // %bb.0:
+; CHECK-NEXT:    {
+; CHECK-NEXT:     r2 = swiz(r1)
+; CHECK-NEXT:     r3 = swiz(r0)
+; CHECK-NEXT:    }
+; CHECK-NEXT:    {
+; CHECK-NEXT:     r1:0 = combine(r3,r2)
+; CHECK-NEXT:     jumpr r31
+; CHECK-NEXT:    }
+  %v0 = tail call i64 @llvm.bswap.i64(i64 %a0) #1
+  ret i64 %v0
 }
 
-; CHECK-LABEL: bswap_16
-; CHECK: [[REG:r[0-9]+]] = swiz
-; CHECK: lsr([[REG]],#16)
-define i16 @bswap_16(i16 %p) #0 {
-  %t = call i16 @llvm.bswap.i16(i16 %p) #0
-  ret i16 %t
+define <2 x i16> @bswap_v2i16(<2 x i16> %a0) #0 {
+; CHECK-LABEL: bswap_v2i16:
+; CHECK:         .cfi_startproc
+; CHECK-NEXT:  // %bb.0:
+; CHECK-NEXT:    {
+; CHECK-NEXT:     r0 = swiz(r0)
+; CHECK-NEXT:    }
+; CHECK-NEXT:    {
+; CHECK-NEXT:     r0 = combine(r0.l,r0.h)
+; CHECK-NEXT:     jumpr r31
+; CHECK-NEXT:    }
+  %v0 = tail call <2 x i16> @llvm.bswap.v2i16(<2 x i16> %a0)
+  ret <2 x i16> %v0
 }
 
-; CHECK-LABEL: bswap_32
-; CHECK: swiz
-define i32 @bswap_32(i32 %p) #0 {
-  %t = call i32 @llvm.bswap.i32(i32 %p) #0
-  ret i32 %t
+define <4 x i16> @bswap_v4i16(<4 x i16> %a0) #0 {
+; CHECK-LABEL: bswap_v4i16:
+; CHECK:         .cfi_startproc
+; CHECK-NEXT:  // %bb.0:
+; CHECK-NEXT:    {
+; CHECK-NEXT:     r3:2 = vlsrh(r1:0,#8)
+; CHECK-NEXT:     r5:4 = vaslh(r1:0,#8)
+; CHECK-NEXT:    }
+; CHECK-NEXT:    {
+; CHECK-NEXT:     r1:0 = or(r3:2,r5:4)
+; CHECK-NEXT:     jumpr r31
+; CHECK-NEXT:    }
+  %v0 = tail call <4 x i16> @llvm.bswap.v4i16(<4 x i16> %a0)
+  ret <4 x i16> %v0
 }
 
-; CHECK-LABEL: bswap_64
-; CHECK: swiz
-; CHECK: swiz
-; CHECK: combine
-define i64 @bswap_64(i64 %p) #0 {
-  %t = call i64 @llvm.bswap.i64(i64 %p) #0
-  ret i64 %t
+define <2 x i32> @bswap_v2i32(<2 x i32> %a0) #0 {
+; CHECK-LABEL: bswap_v2i32:
+; CHECK:         .cfi_startproc
+; CHECK-NEXT:  // %bb.0:
+; CHECK-NEXT:    {
+; CHECK-NEXT:     r0 = swiz(r0)
+; CHECK-NEXT:     r1 = swiz(r1)
+; CHECK-NEXT:    }
+; CHECK-NEXT:    {
+; CHECK-NEXT:     jumpr r31
+; CHECK-NEXT:    }
+  %v0 = tail call <2 x i32> @llvm.bswap.v2i32(<2 x i32> %a0)
+  ret <2 x i32> %v0
 }
 
-declare i16 @llvm.ctpop.i16(i16) #0
-declare i32 @llvm.ctpop.i32(i32) #0
-declare i64 @llvm.ctpop.i64(i64) #0
+define i16 @brev_i16(i16 %a0) #0 {
+; CHECK-LABEL: brev_i16:
+; CHECK:         .cfi_startproc
+; CHECK-NEXT:  // %bb.0:
+; CHECK-NEXT:    {
+; CHECK-NEXT:     r0 = brev(r0)
+; CHECK-NEXT:    }
+; CHECK-NEXT:    {
+; CHECK-NEXT:     r0 = lsr(r0,#16)
+; CHECK-NEXT:     jumpr r31
+; CHECK-NEXT:    }
+  %v0 = tail call i16 @llvm.bitreverse.i16(i16 %a0) #1
+  ret i16 %v0
+}
+
+define i32 @brev_i32(i32 %a0) #0 {
+; CHECK-LABEL: brev_i32:
+; CHECK:         .cfi_startproc
+; CHECK-NEXT:  // %bb.0:
+; CHECK-NEXT:    {
+; CHECK-NEXT:     r0 = brev(r0)
+; CHECK-NEXT:     jumpr r31
+; CHECK-NEXT:    }
+  %v0 = tail call i32 @llvm.bitreverse.i32(i32 %a0) #1
+  ret i32 %v0
+}
+
+define i64 @brev_i64(i64 %a0) #0 {
+; CHECK-LABEL: brev_i64:
+; CHECK:         .cfi_startproc
+; CHECK-NEXT:  // %bb.0:
+; CHECK-NEXT:    {
+; CHECK-NEXT:     r1:0 = brev(r1:0)
+; CHECK-NEXT:     jumpr r31
+; CHECK-NEXT:    }
+  %v0 = tail call i64 @llvm.bitreverse.i64(i64 %a0) #1
+  ret i64 %v0
+}
+
+define <4 x i8> @brev_v4i8(<4 x i8> %a0) #0 {
+; CHECK-LABEL: brev_v4i8:
+; CHECK:         .cfi_startproc
+; CHECK-NEXT:  // %bb.0:
+; CHECK-NEXT:    {
+; CHECK-NEXT:     r0 = brev(r0)
+; CHECK-NEXT:    }
+; CHECK-NEXT:    {
+; CHECK-NEXT:     r0 = swiz(r0)
+; CHECK-NEXT:     jumpr r31
+; CHECK-NEXT:    }
+  %v0 = tail call <4 x i8> @llvm.bitreverse.v4i8(<4 x i8> %a0)
+  ret <4 x i8> %v0
+}
+
+define <8 x i8> @brev_v8i8(<8 x i8> %a0) #0 {
+; CHECK-LABEL: brev_v8i8:
+; CHECK:         .cfi_startproc
+; CHECK-NEXT:  // %bb.0:
+; CHECK-NEXT:    {
+; CHECK-NEXT:     r3:2 = brev(r1:0)
+; CHECK-NEXT:    }
+; CHECK-NEXT:    {
+; CHECK-NEXT:     r0 = swiz(r3)
+; CHECK-NEXT:     r1 = swiz(r2)
+; CHECK-NEXT:    }
+; CHECK-NEXT:    {
+; CHECK-NEXT:     jumpr r31
+; CHECK-NEXT:    }
+  %v0 = tail call <8 x i8> @llvm.bitreverse.v8i8(<8 x i8> %a0)
+  ret <8 x i8> %v0
+}
+
+define <2 x i16> @brev_v2i16(<2 x i16> %a0) #0 {
+; CHECK-LABEL: brev_v2i16:
+; CHECK:         .cfi_startproc
+; CHECK-NEXT:  // %bb.0:
+; CHECK-NEXT:    {
+; CHECK-NEXT:     r0 = brev(r0)
+; CHECK-NEXT:    }
+; CHECK-NEXT:    {
+; CHECK-NEXT:     r0 = combine(r0.l,r0.h)
+; CHECK-NEXT:     jumpr r31
+; CHECK-NEXT:    }
+  %v0 = tail call <2 x i16> @llvm.bitreverse.v2i16(<2 x i16> %a0)
+  ret <2 x i16> %v0
+}
+
+define <4 x i16> @brev_v4i16(<4 x i16> %a0) #0 {
+; CHECK-LABEL: brev_v4i16:
+; CHECK:         .cfi_startproc
+; CHECK-NEXT:  // %bb.0:
+; CHECK-NEXT:    {
+; CHECK-NEXT:     r3:2 = brev(r1:0)
+; CHECK-NEXT:    }
+; CHECK-NEXT:    {
+; CHECK-NEXT:     r0 = combine(r3.l,r3.h)
+; CHECK-NEXT:     jumpr r31
+; CHECK-NEXT:     r1 = combine(r2.l,r2.h)
+; CHECK-NEXT:    }
+  %v0 = tail call <4 x i16> @llvm.bitreverse.v4i16(<4 x i16> %a0)
+  ret <4 x i16> %v0
+}
+
+define <2 x i32> @brev_v2i32(<2 x i32> %a0) #0 {
+; CHECK-LABEL: brev_v2i32:
+; CHECK:         .cfi_startproc
+; CHECK-NEXT:  // %bb.0:
+; CHECK-NEXT:    {
+; CHECK-NEXT:     r3:2 = brev(r1:0)
+; CHECK-NEXT:    }
+; CHECK-NEXT:    {
+; CHECK-NEXT:     r1:0 = combine(r2,r3)
+; CHECK-NEXT:     jumpr r31
+; CHECK-NEXT:    }
+  %v0 = tail call <2 x i32> @llvm.bitreverse.v2i32(<2 x i32> %a0)
+  ret <2 x i32> %v0
+}
+
+
+declare i16 @llvm.ctpop.i16(i16) #1
+declare i32 @llvm.ctpop.i32(i32) #1
+declare i64 @llvm.ctpop.i64(i64) #1
+
+declare i16 @llvm.ctlz.i16(i16, i1) #1
+declare i32 @llvm.ctlz.i32(i32, i1) #1
+declare i64 @llvm.ctlz.i64(i64, i1) #1
+
+declare i16 @llvm.cttz.i16(i16, i1) #1
+declare i32 @llvm.cttz.i32(i32, i1) #1
+declare i64 @llvm.cttz.i64(i64, i1) #1
+
+declare i16 @llvm.bswap.i16(i16) #1
+declare i32 @llvm.bswap.i32(i32) #1
+declare i64 @llvm.bswap.i64(i64) #1
+
+declare <2 x i16> @llvm.bswap.v2i16(<2 x i16>) #1
+declare <4 x i16> @llvm.bswap.v4i16(<4 x i16>) #1
+declare <2 x i32> @llvm.bswap.v2i32(<2 x i32>) #1
+
+declare i16 @llvm.bitreverse.i16(i16) #1
+declare i32 @llvm.bitreverse.i32(i32) #1
+declare i64 @llvm.bitreverse.i64(i64) #1
 
-declare i16 @llvm.ctlz.i16(i16, i1) #0
-declare i32 @llvm.ctlz.i32(i32, i1) #0
-declare i64 @llvm.ctlz.i64(i64, i1) #0
+declare <4 x i8> @llvm.bitreverse.v4i8(<4 x i8>) #1
+declare <8 x i8> @llvm.bitreverse.v8i8(<8 x i8>) #1
 
-declare i16 @llvm.cttz.i16(i16, i1) #0
-declare i32 @llvm.cttz.i32(i32, i1) #0
-declare i64 @llvm.cttz.i64(i64, i1) #0
+declare <2 x i16> @llvm.bitreverse.v2i16(<2 x i16>) #1
+declare <4 x i16> @llvm.bitreverse.v4i16(<4 x i16>) #1
+declare <2 x i32> @llvm.bitreverse.v2i32(<2 x i32>) #1
 
-declare i16 @llvm.bitreverse.i16(i16) #0
-declare i32 @llvm.bitreverse.i32(i32) #0
-declare i64 @llvm.bitreverse.i64(i64) #0
 
-declare i16 @llvm.bswap.i16(i16) #0
-declare i32 @llvm.bswap.i32(i32) #0
-declare i64 @llvm.bswap.i64(i64) #0
+attributes #0 = { "target-features"="+v68,-long-calls" }
+attributes #1 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
 
-attributes #0 = { nounwind readnone }


        


More information about the llvm-branch-commits mailing list