[llvm] 59703f1 - [VE] Update bit operations
Kazushi Marukawa via llvm-commits
llvm-commits at lists.llvm.org
Tue Aug 11 03:42:19 PDT 2020
Author: Kazushi (Jam) Marukawa
Date: 2020-08-11T19:42:12+09:00
New Revision: 59703f17361815e3854aeb0822961278246fb666
URL: https://github.com/llvm/llvm-project/commit/59703f17361815e3854aeb0822961278246fb666
DIFF: https://github.com/llvm/llvm-project/commit/59703f17361815e3854aeb0822961278246fb666.diff
LOG: [VE] Update bit operations
Change bitreverse/bswap/ctlz/ctpop/cttz regression tests to support i128
and signext/zeroext i32 types. This patch also change the way to support
i32 types using 64 bits VE instructions.
Reviewed By: simoll
Differential Revision: https://reviews.llvm.org/D85712
Added:
Modified:
llvm/lib/Target/VE/VEISelLowering.cpp
llvm/lib/Target/VE/VEISelLowering.h
llvm/lib/Target/VE/VEInstrInfo.td
llvm/test/CodeGen/VE/bitreverse.ll
llvm/test/CodeGen/VE/bswap.ll
llvm/test/CodeGen/VE/ctlz.ll
llvm/test/CodeGen/VE/ctpop.ll
llvm/test/CodeGen/VE/cttz.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/VE/VEISelLowering.cpp b/llvm/lib/Target/VE/VEISelLowering.cpp
index bfe1e8d39e47..510dc0f7ab34 100644
--- a/llvm/lib/Target/VE/VEISelLowering.cpp
+++ b/llvm/lib/Target/VE/VEISelLowering.cpp
@@ -684,14 +684,18 @@ VETargetLowering::VETargetLowering(const TargetMachine &TM,
setOperationAction(ISD::ROTL, IntVT, Expand);
setOperationAction(ISD::ROTR, IntVT, Expand);
- // Use isel patterns for i32 and i64
+ // VE has 64 bits instruction which works as i64 BSWAP operation. This
+ // instruction works fine as i32 BSWAP operation with an additional
+ // parameter. Use isel patterns to lower BSWAP.
setOperationAction(ISD::BSWAP, IntVT, Legal);
- setOperationAction(ISD::CTLZ, IntVT, Legal);
- setOperationAction(ISD::CTPOP, IntVT, Legal);
- // Use isel patterns for i64, Promote i32
+ // VE has only 64 bits instructions which work as i64 BITREVERSE/CTLZ/CTPOP
+ // operations. Use isel patterns for i64, promote for i32.
LegalizeAction Act = (IntVT == MVT::i32) ? Promote : Legal;
setOperationAction(ISD::BITREVERSE, IntVT, Act);
+ setOperationAction(ISD::CTLZ, IntVT, Act);
+ setOperationAction(ISD::CTLZ_ZERO_UNDEF, IntVT, Act);
+ setOperationAction(ISD::CTPOP, IntVT, Act);
}
/// } Int Ops
diff --git a/llvm/lib/Target/VE/VEISelLowering.h b/llvm/lib/Target/VE/VEISelLowering.h
index cd24cbfe5161..85fb2f61bfcf 100644
--- a/llvm/lib/Target/VE/VEISelLowering.h
+++ b/llvm/lib/Target/VE/VEISelLowering.h
@@ -105,10 +105,20 @@ class VETargetLowering : public TargetLowering {
MachineMemOperand::Flags Flags,
bool *Fast) const override;
- // Block s/udiv lowering for now
- bool isIntDivCheap(EVT VT, AttributeList Attr) const override { return true; }
-
+ /// Target Optimization {
+
+ // SX-Aurora VE's s/udiv is 5-9 times slower than multiply.
+ bool isIntDivCheap(EVT, AttributeList) const override { return false; }
+ // VE doesn't have rem.
+ bool hasStandaloneRem(EVT) const override { return false; }
+ // VE LDZ instruction returns 64 if the input is zero.
+ bool isCheapToSpeculateCtlz() const override { return true; }
+ // VE LDZ instruction is fast.
+ bool isCtlzFast() const override { return true; }
+ // VE has NND instruction.
bool hasAndNot(SDValue Y) const override;
+
+ /// } Target Optimization
};
} // namespace llvm
diff --git a/llvm/lib/Target/VE/VEInstrInfo.td b/llvm/lib/Target/VE/VEInstrInfo.td
index 8500f8ef1292..2555b16138b2 100644
--- a/llvm/lib/Target/VE/VEInstrInfo.td
+++ b/llvm/lib/Target/VE/VEInstrInfo.td
@@ -1203,7 +1203,10 @@ defm NND : RRNCm<"nnd", 0x54, I64, i64, and_not>;
defm MRG : RRMRGm<"mrg", 0x56, I64, i64>;
// Section 8.5.7 - LDZ (Leading Zero Count)
-defm LDZ : RRI1m<"ldz", 0x67, I64, i64, ctlz>;
+def ctlz_pat : PatFrags<(ops node:$src),
+ [(ctlz node:$src),
+ (ctlz_zero_undef node:$src)]>;
+defm LDZ : RRI1m<"ldz", 0x67, I64, i64, ctlz_pat>;
// Section 8.5.8 - PCNT (Population Count)
defm PCNT : RRI1m<"pcnt", 0x38, I64, i64, ctpop>;
@@ -1213,6 +1216,16 @@ defm BRV : RRI1m<"brv", 0x39, I64, i64, bitreverse>;
// Section 8.5.10 - BSWP (Byte Swap)
defm BSWP : RRSWPm<"bswp", 0x2B, I64, i64>;
+def : Pat<(i64 (bswap i64:$src)),
+ (BSWPri $src, 0)>;
+def : Pat<(i64 (bswap (i64 mimm:$src))),
+ (BSWPmi (MIMM $src), 0)>;
+def : Pat<(i32 (bswap i32:$src)),
+ (EXTRACT_SUBREG
+ (BSWPri (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $src, sub_i32), 1),
+ sub_i32)>;
+def : Pat<(i32 (bswap (i32 mimm:$src))),
+ (EXTRACT_SUBREG (BSWPmi (MIMM $src), 1), sub_i32)>;
// Section 8.5.11 - CMOV (Conditional Move)
let cw = 0, cw2 = 0 in defm CMOVL : RRCMOVm<"cmov.l.${cfw}", 0x3B, I64, i64>;
@@ -1982,19 +1995,6 @@ def : Pat<(f32 (bitconvert i32:$op)),
(EXTRACT_SUBREG (SLLri (INSERT_SUBREG (i64 (IMPLICIT_DEF)),
$op, sub_i32), 32), sub_f32)>;
-// Bits operations pattern matchings.
-def : Pat<(i32 (ctpop i32:$src)),
- (EXTRACT_SUBREG (PCNTr (ANDrm (INSERT_SUBREG
- (i64 (IMPLICIT_DEF)), $src, sub_i32), !add(32, 64))), sub_i32)>;
-def : Pat<(i32 (ctlz i32:$src)),
- (EXTRACT_SUBREG (LDZr (SLLri (INSERT_SUBREG
- (i64 (IMPLICIT_DEF)), $src, sub_i32), 32)), sub_i32)>;
-def : Pat<(i64 (bswap i64:$src)),
- (BSWPri $src, 0)>;
-def : Pat<(i32 (bswap i32:$src)),
- (EXTRACT_SUBREG (BSWPri (INSERT_SUBREG
- (i64 (IMPLICIT_DEF)), $src, sub_i32), 1), sub_i32)>;
-
// Several special pattern matches to optimize code
def : Pat<(i32 (and i32:$lhs, 0xff)),
diff --git a/llvm/test/CodeGen/VE/bitreverse.ll b/llvm/test/CodeGen/VE/bitreverse.ll
index af58afe38fd9..a4e4839c8f1e 100644
--- a/llvm/test/CodeGen/VE/bitreverse.ll
+++ b/llvm/test/CodeGen/VE/bitreverse.ll
@@ -1,7 +1,24 @@
; RUN: llc < %s -mtriple=ve-unknown-unknown | FileCheck %s
-define i64 @func1(i64 %p) {
-; CHECK-LABEL: func1:
+declare i128 @llvm.bitreverse.i128(i128)
+declare i64 @llvm.bitreverse.i64(i64)
+declare i32 @llvm.bitreverse.i32(i32)
+declare i16 @llvm.bitreverse.i16(i16)
+declare i8 @llvm.bitreverse.i8(i8)
+
+define i128 @func128(i128 %p) {
+; CHECK-LABEL: func128:
+; CHECK: .LBB{{[0-9]+}}_2:
+; CHECK-NEXT: brv %s2, %s1
+; CHECK-NEXT: brv %s1, %s0
+; CHECK-NEXT: or %s0, 0, %s2
+; CHECK-NEXT: or %s11, 0, %s9
+ %r = tail call i128 @llvm.bitreverse.i128(i128 %p)
+ ret i128 %r
+}
+
+define i64 @func64(i64 %p) {
+; CHECK-LABEL: func64:
; CHECK: .LBB{{[0-9]+}}_2:
; CHECK-NEXT: brv %s0, %s0
; CHECK-NEXT: or %s11, 0, %s9
@@ -9,10 +26,18 @@ define i64 @func1(i64 %p) {
ret i64 %r
}
-declare i64 @llvm.bitreverse.i64(i64)
+define signext i32 @func32s(i32 signext %p) {
+; CHECK-LABEL: func32s:
+; CHECK: .LBB{{[0-9]+}}_2:
+; CHECK-NEXT: brv %s0, %s0
+; CHECK-NEXT: sra.l %s0, %s0, 32
+; CHECK-NEXT: or %s11, 0, %s9
+ %r = tail call i32 @llvm.bitreverse.i32(i32 %p)
+ ret i32 %r
+}
-define i32 @func2(i32 %p) {
-; CHECK-LABEL: func2:
+define zeroext i32 @func32z(i32 zeroext %p) {
+; CHECK-LABEL: func32z:
; CHECK: .LBB{{[0-9]+}}_2:
; CHECK-NEXT: brv %s0, %s0
; CHECK-NEXT: srl %s0, %s0, 32
@@ -21,10 +46,8 @@ define i32 @func2(i32 %p) {
ret i32 %r
}
-declare i32 @llvm.bitreverse.i32(i32)
-
-define signext i16 @func3(i16 signext %p) {
-; CHECK-LABEL: func3:
+define signext i16 @func16s(i16 signext %p) {
+; CHECK-LABEL: func16s:
; CHECK: .LBB{{[0-9]+}}_2:
; CHECK-NEXT: brv %s0, %s0
; CHECK-NEXT: sra.l %s0, %s0, 48
@@ -33,10 +56,18 @@ define signext i16 @func3(i16 signext %p) {
ret i16 %r
}
-declare i16 @llvm.bitreverse.i16(i16)
+define zeroext i16 @func16z(i16 zeroext %p) {
+; CHECK-LABEL: func16z:
+; CHECK: .LBB{{[0-9]+}}_2:
+; CHECK-NEXT: brv %s0, %s0
+; CHECK-NEXT: srl %s0, %s0, 48
+; CHECK-NEXT: or %s11, 0, %s9
+ %r = tail call i16 @llvm.bitreverse.i16(i16 %p)
+ ret i16 %r
+}
-define signext i8 @func4(i8 signext %p) {
-; CHECK-LABEL: func4:
+define signext i8 @func8s(i8 signext %p) {
+; CHECK-LABEL: func8s:
; CHECK: .LBB{{[0-9]+}}_2:
; CHECK-NEXT: brv %s0, %s0
; CHECK-NEXT: sra.l %s0, %s0, 56
@@ -45,44 +76,86 @@ define signext i8 @func4(i8 signext %p) {
ret i8 %r
}
-declare i8 @llvm.bitreverse.i8(i8)
-
-define i64 @func5(i64 %p) {
-; CHECK-LABEL: func5:
+define zeroext i8 @func8z(i8 zeroext %p) {
+; CHECK-LABEL: func8z:
; CHECK: .LBB{{[0-9]+}}_2:
; CHECK-NEXT: brv %s0, %s0
+; CHECK-NEXT: srl %s0, %s0, 56
; CHECK-NEXT: or %s11, 0, %s9
- %r = tail call i64 @llvm.bitreverse.i64(i64 %p)
+ %r = tail call i8 @llvm.bitreverse.i8(i8 %p)
+ ret i8 %r
+}
+
+define i128 @func128i() {
+; CHECK-LABEL: func128i:
+; CHECK: .LBB{{[0-9]+}}_2:
+; CHECK-NEXT: or %s0, 0, (0)1
+; CHECK-NEXT: lea.sl %s1, -65536
+; CHECK-NEXT: or %s11, 0, %s9
+ %r = tail call i128 @llvm.bitreverse.i128(i128 65535)
+ ret i128 %r
+}
+
+define i64 @func64i() {
+; CHECK-LABEL: func64i:
+; CHECK: .LBB{{[0-9]+}}_2:
+; CHECK-NEXT: lea.sl %s0, -65536
+; CHECK-NEXT: or %s11, 0, %s9
+ %r = tail call i64 @llvm.bitreverse.i64(i64 65535)
ret i64 %r
}
-define i32 @func6(i32 %p) {
-; CHECK-LABEL: func6:
+define signext i32 @func32is() {
+; CHECK-LABEL: func32is:
; CHECK: .LBB{{[0-9]+}}_2:
-; CHECK-NEXT: brv %s0, %s0
-; CHECK-NEXT: srl %s0, %s0, 32
+; CHECK-NEXT: lea %s0, -65536
; CHECK-NEXT: or %s11, 0, %s9
- %r = tail call i32 @llvm.bitreverse.i32(i32 %p)
+ %r = tail call i32 @llvm.bitreverse.i32(i32 65535)
ret i32 %r
}
-define zeroext i16 @func7(i16 zeroext %p) {
-; CHECK-LABEL: func7:
+define zeroext i32 @func32iz() {
+; CHECK-LABEL: func32iz:
; CHECK: .LBB{{[0-9]+}}_2:
-; CHECK-NEXT: brv %s0, %s0
-; CHECK-NEXT: srl %s0, %s0, 48
+; CHECK-NEXT: lea %s0, -65536
+; CHECK-NEXT: and %s0, %s0, (32)0
; CHECK-NEXT: or %s11, 0, %s9
- %r = tail call i16 @llvm.bitreverse.i16(i16 %p)
+ %r = tail call i32 @llvm.bitreverse.i32(i32 65535)
+ ret i32 %r
+}
+
+define signext i16 @func16is() {
+; CHECK-LABEL: func16is:
+; CHECK: .LBB{{[0-9]+}}_2:
+; CHECK-NEXT: lea %s0, -256
+; CHECK-NEXT: or %s11, 0, %s9
+ %r = tail call i16 @llvm.bitreverse.i16(i16 255)
ret i16 %r
}
-define zeroext i8 @func8(i8 zeroext %p) {
-; CHECK-LABEL: func8:
+define zeroext i16 @func16iz() {
+; CHECK-LABEL: func16iz:
; CHECK: .LBB{{[0-9]+}}_2:
-; CHECK-NEXT: brv %s0, %s0
-; CHECK-NEXT: srl %s0, %s0, 56
+; CHECK-NEXT: lea %s0, 65280
; CHECK-NEXT: or %s11, 0, %s9
- %r = tail call i8 @llvm.bitreverse.i8(i8 %p)
+ %r = tail call i16 @llvm.bitreverse.i16(i16 255)
+ ret i16 %r
+}
+
+define signext i8 @func8is() {
+; CHECK-LABEL: func8is:
+; CHECK: .LBB{{[0-9]+}}_2:
+; CHECK-NEXT: or %s0, 15, (0)1
+; CHECK-NEXT: or %s11, 0, %s9
+ %r = tail call i8 @llvm.bitreverse.i8(i8 240)
ret i8 %r
}
+define zeroext i8 @func8iz() {
+; CHECK-LABEL: func8iz:
+; CHECK: .LBB{{[0-9]+}}_2:
+; CHECK-NEXT: or %s0, 15, (0)1
+; CHECK-NEXT: or %s11, 0, %s9
+ %r = tail call i8 @llvm.bitreverse.i8(i8 240)
+ ret i8 %r
+}
diff --git a/llvm/test/CodeGen/VE/bswap.ll b/llvm/test/CodeGen/VE/bswap.ll
index d87f3c93cb22..54f2df4c1400 100644
--- a/llvm/test/CodeGen/VE/bswap.ll
+++ b/llvm/test/CodeGen/VE/bswap.ll
@@ -1,7 +1,23 @@
; RUN: llc < %s -mtriple=ve-unknown-unknown | FileCheck %s
-define i64 @func1(i64 %p) {
-; CHECK-LABEL: func1:
+declare i128 @llvm.bswap.i128(i128)
+declare i64 @llvm.bswap.i64(i64)
+declare i32 @llvm.bswap.i32(i32)
+declare i16 @llvm.bswap.i16(i16)
+
+define i128 @func128(i128 %p) {
+; CHECK-LABEL: func128:
+; CHECK: .LBB{{[0-9]+}}_2:
+; CHECK-NEXT: bswp %s2, %s1, 0
+; CHECK-NEXT: bswp %s1, %s0, 0
+; CHECK-NEXT: or %s0, 0, %s2
+; CHECK-NEXT: or %s11, 0, %s9
+ %r = tail call i128 @llvm.bswap.i128(i128 %p)
+ ret i128 %r
+}
+
+define i64 @func64(i64 %p) {
+; CHECK-LABEL: func64:
; CHECK: .LBB{{[0-9]+}}_2:
; CHECK-NEXT: bswp %s0, %s0, 0
; CHECK-NEXT: or %s11, 0, %s9
@@ -9,21 +25,28 @@ define i64 @func1(i64 %p) {
ret i64 %r
}
-declare i64 @llvm.bswap.i64(i64)
-
-define i32 @func2(i32 %p) {
-; CHECK-LABEL: func2:
+define signext i32 @func32s(i32 signext %p) {
+; CHECK-LABEL: func32s:
; CHECK: .LBB{{[0-9]+}}_2:
; CHECK-NEXT: bswp %s0, %s0, 1
+; CHECK-NEXT: adds.w.sx %s0, %s0, (0)1
; CHECK-NEXT: or %s11, 0, %s9
%r = tail call i32 @llvm.bswap.i32(i32 %p)
ret i32 %r
}
-declare i32 @llvm.bswap.i32(i32)
+define zeroext i32 @func32z(i32 zeroext %p) {
+; CHECK-LABEL: func32z:
+; CHECK: .LBB{{[0-9]+}}_2:
+; CHECK-NEXT: bswp %s0, %s0, 1
+; CHECK-NEXT: adds.w.zx %s0, %s0, (0)1
+; CHECK-NEXT: or %s11, 0, %s9
+ %r = tail call i32 @llvm.bswap.i32(i32 %p)
+ ret i32 %r
+}
-define signext i16 @func3(i16 signext %p) {
-; CHECK-LABEL: func3:
+define signext i16 @func16s(i16 signext %p) {
+; CHECK-LABEL: func16s:
; CHECK: .LBB{{[0-9]+}}_2:
; CHECK-NEXT: bswp %s0, %s0, 1
; CHECK-NEXT: and %s0, %s0, (32)0
@@ -35,34 +58,70 @@ define signext i16 @func3(i16 signext %p) {
ret i16 %r
}
-declare i16 @llvm.bswap.i16(i16)
+define zeroext i16 @func16z(i16 zeroext %p) {
+; CHECK-LABEL: func16z:
+; CHECK: .LBB{{[0-9]+}}_2:
+; CHECK-NEXT: bswp %s0, %s0, 1
+; CHECK-NEXT: and %s0, %s0, (32)0
+; CHECK-NEXT: srl %s0, %s0, 16
+; CHECK-NEXT: adds.w.zx %s0, %s0, (0)1
+; CHECK-NEXT: or %s11, 0, %s9
+ %r = tail call i16 @llvm.bswap.i16(i16 %p)
+ ret i16 %r
+}
-define i64 @func4(i64 %p) {
-; CHECK-LABEL: func4:
+define i128 @func128i() {
+; CHECK-LABEL: func128i:
; CHECK: .LBB{{[0-9]+}}_2:
-; CHECK-NEXT: bswp %s0, %s0, 0
+; CHECK-NEXT: or %s0, 0, (0)1
+; CHECK-NEXT: lea.sl %s1, -16777216
; CHECK-NEXT: or %s11, 0, %s9
- %r = tail call i64 @llvm.bswap.i64(i64 %p)
+ %r = tail call i128 @llvm.bswap.i128(i128 255)
+ ret i128 %r
+}
+
+define i64 @func64i() {
+; CHECK-LABEL: func64i:
+; CHECK: .LBB{{[0-9]+}}_2:
+; CHECK-NEXT: lea.sl %s0, -16777216
+; CHECK-NEXT: or %s11, 0, %s9
+ %r = tail call i64 @llvm.bswap.i64(i64 255)
ret i64 %r
}
-define i32 @func5(i32 %p) {
-; CHECK-LABEL: func5:
+define signext i32 @func32si() {
+; CHECK-LABEL: func32si:
; CHECK: .LBB{{[0-9]+}}_2:
-; CHECK-NEXT: bswp %s0, %s0, 1
+; CHECK-NEXT: lea %s0, -16777216
; CHECK-NEXT: or %s11, 0, %s9
- %r = tail call i32 @llvm.bswap.i32(i32 %p)
+ %r = tail call i32 @llvm.bswap.i32(i32 255)
ret i32 %r
}
-define zeroext i16 @func6(i16 zeroext %p) {
-; CHECK-LABEL: func6:
+define zeroext i32 @func32zi() {
+; CHECK-LABEL: func32zi:
; CHECK: .LBB{{[0-9]+}}_2:
-; CHECK-NEXT: bswp %s0, %s0, 1
+; CHECK-NEXT: lea %s0, -16777216
; CHECK-NEXT: and %s0, %s0, (32)0
-; CHECK-NEXT: srl %s0, %s0, 16
-; CHECK-NEXT: adds.w.zx %s0, %s0, (0)1
; CHECK-NEXT: or %s11, 0, %s9
- %r = tail call i16 @llvm.bswap.i16(i16 %p)
+ %r = tail call i32 @llvm.bswap.i32(i32 255)
+ ret i32 %r
+}
+
+define signext i16 @func16si() {
+; CHECK-LABEL: func16si:
+; CHECK: .LBB{{[0-9]+}}_2:
+; CHECK-NEXT: lea %s0, -256
+; CHECK-NEXT: or %s11, 0, %s9
+ %r = tail call i16 @llvm.bswap.i16(i16 255)
+ ret i16 %r
+}
+
+define zeroext i16 @func16zi() {
+; CHECK-LABEL: func16zi:
+; CHECK: .LBB{{[0-9]+}}_2:
+; CHECK-NEXT: lea %s0, 65280
+; CHECK-NEXT: or %s11, 0, %s9
+ %r = tail call i16 @llvm.bswap.i16(i16 255)
ret i16 %r
}
diff --git a/llvm/test/CodeGen/VE/ctlz.ll b/llvm/test/CodeGen/VE/ctlz.ll
index 5853851ac9c9..6be8accf1343 100644
--- a/llvm/test/CodeGen/VE/ctlz.ll
+++ b/llvm/test/CodeGen/VE/ctlz.ll
@@ -1,7 +1,28 @@
; RUN: llc < %s -mtriple=ve-unknown-unknown | FileCheck %s
-define i64 @func1(i64 %p) {
-; CHECK-LABEL: func1:
+declare i128 @llvm.ctlz.i128(i128, i1)
+declare i64 @llvm.ctlz.i64(i64, i1)
+declare i32 @llvm.ctlz.i32(i32, i1)
+declare i16 @llvm.ctlz.i16(i16, i1)
+declare i8 @llvm.ctlz.i8(i8, i1)
+
+define i128 @func128(i128 %p){
+; CHECK-LABEL: func128:
+; CHECK: .LBB{{[0-9]+}}_2:
+; CHECK-NEXT: or %s2, 0, (0)1
+; CHECK-NEXT: cmps.l %s3, %s1, %s2
+; CHECK-NEXT: ldz %s1, %s1
+; CHECK-NEXT: ldz %s0, %s0
+; CHECK-NEXT: lea %s0, 64(, %s0)
+; CHECK-NEXT: cmov.l.ne %s0, %s1, %s3
+; CHECK-NEXT: or %s1, 0, %s2
+; CHECK-NEXT: or %s11, 0, %s9
+ %r = tail call i128 @llvm.ctlz.i128(i128 %p, i1 true)
+ ret i128 %r
+}
+
+define i64 @func64(i64 %p) {
+; CHECK-LABEL: func64:
; CHECK: .LBB{{[0-9]+}}_2:
; CHECK-NEXT: ldz %s0, %s0
; CHECK-NEXT: or %s11, 0, %s9
@@ -9,45 +30,245 @@ define i64 @func1(i64 %p) {
ret i64 %r
}
-declare i64 @llvm.ctlz.i64(i64, i1)
-
-define i32 @func2(i32 %p) {
-; CHECK-LABEL: func2:
+define signext i32 @func32s(i32 signext %p) {
+; CHECK-LABEL: func32s:
; CHECK: .LBB{{[0-9]+}}_2:
-; CHECK-NEXT: adds.w.sx %s0, %s0, (0)1
-; CHECK-NEXT: sll %s0, %s0, 32
+; CHECK-NEXT: and %s0, %s0, (32)0
; CHECK-NEXT: ldz %s0, %s0
+; CHECK-NEXT: lea %s0, -32(, %s0)
+; CHECK-NEXT: and %s0, %s0, (32)0
; CHECK-NEXT: or %s11, 0, %s9
%r = tail call i32 @llvm.ctlz.i32(i32 %p, i1 true)
ret i32 %r
}
-declare i32 @llvm.ctlz.i32(i32, i1)
+define zeroext i32 @func32z(i32 zeroext %p) {
+; CHECK-LABEL: func32z:
+; CHECK: .LBB{{[0-9]+}}_2:
+; CHECK-NEXT: ldz %s0, %s0
+; CHECK-NEXT: lea %s0, -32(, %s0)
+; CHECK-NEXT: and %s0, %s0, (32)0
+; CHECK-NEXT: or %s11, 0, %s9
+ %r = tail call i32 @llvm.ctlz.i32(i32 %p, i1 true)
+ ret i32 %r
+}
-define i16 @func3(i16 %p) {
-; CHECK-LABEL: func3:
+define signext i16 @func16s(i16 signext %p) {
+; CHECK-LABEL: func16s:
; CHECK: .LBB{{[0-9]+}}_2:
; CHECK-NEXT: and %s0, %s0, (48)0
-; CHECK-NEXT: sll %s0, %s0, 32
; CHECK-NEXT: ldz %s0, %s0
+; CHECK-NEXT: lea %s0, -32(, %s0)
; CHECK-NEXT: adds.w.sx %s0, -16, %s0
+; CHECK-NEXT: and %s0, %s0, (48)0
; CHECK-NEXT: or %s11, 0, %s9
%r = tail call i16 @llvm.ctlz.i16(i16 %p, i1 true)
ret i16 %r
}
-declare i16 @llvm.ctlz.i16(i16, i1)
+define zeroext i16 @func16z(i16 zeroext %p) {
+; CHECK-LABEL: func16z:
+; CHECK: .LBB{{[0-9]+}}_2:
+; CHECK-NEXT: ldz %s0, %s0
+; CHECK-NEXT: lea %s0, -32(, %s0)
+; CHECK-NEXT: adds.w.sx %s0, -16, %s0
+; CHECK-NEXT: and %s0, %s0, (48)0
+; CHECK-NEXT: or %s11, 0, %s9
+ %r = tail call i16 @llvm.ctlz.i16(i16 %p, i1 true)
+ ret i16 %r
+}
-define i8 @func4(i8 %p) {
-; CHECK-LABEL: func4:
+define signext i8 @func8s(i8 signext %p) {
+; CHECK-LABEL: func8s:
; CHECK: .LBB{{[0-9]+}}_2:
; CHECK-NEXT: and %s0, %s0, (56)0
-; CHECK-NEXT: sll %s0, %s0, 32
; CHECK-NEXT: ldz %s0, %s0
+; CHECK-NEXT: lea %s0, -32(, %s0)
; CHECK-NEXT: adds.w.sx %s0, -24, %s0
+; CHECK-NEXT: and %s0, %s0, (56)0
; CHECK-NEXT: or %s11, 0, %s9
%r = tail call i8 @llvm.ctlz.i8(i8 %p, i1 true)
ret i8 %r
}
-declare i8 @llvm.ctlz.i8(i8, i1)
+define zeroext i8 @func8z(i8 zeroext %p) {
+; CHECK-LABEL: func8z:
+; CHECK: .LBB{{[0-9]+}}_2:
+; CHECK-NEXT: ldz %s0, %s0
+; CHECK-NEXT: lea %s0, -32(, %s0)
+; CHECK-NEXT: adds.w.sx %s0, -24, %s0
+; CHECK-NEXT: and %s0, %s0, (56)0
+; CHECK-NEXT: or %s11, 0, %s9
+ %r = tail call i8 @llvm.ctlz.i8(i8 %p, i1 true)
+ ret i8 %r
+}
+
+define i128 @func128i(){
+; CHECK-LABEL: func128i:
+; CHECK: .LBB{{[0-9]+}}_2:
+; CHECK-NEXT: lea %s0, 112
+; CHECK-NEXT: or %s1, 0, (0)1
+; CHECK-NEXT: or %s11, 0, %s9
+ %r = tail call i128 @llvm.ctlz.i128(i128 65535, i1 true)
+ ret i128 %r
+}
+
+define i64 @func64i() {
+; CHECK-LABEL: func64i:
+; CHECK: .LBB{{[0-9]+}}_2:
+; CHECK-NEXT: or %s0, 48, (0)1
+; CHECK-NEXT: or %s11, 0, %s9
+ %r = tail call i64 @llvm.ctlz.i64(i64 65535, i1 true)
+ ret i64 %r
+}
+
+define signext i32 @func32is() {
+; CHECK-LABEL: func32is:
+; CHECK: .LBB{{[0-9]+}}_2:
+; CHECK-NEXT: or %s0, 16, (0)1
+; CHECK-NEXT: or %s11, 0, %s9
+ %r = tail call i32 @llvm.ctlz.i32(i32 65535, i1 true)
+ ret i32 %r
+}
+
+define zeroext i32 @func32iz() {
+; CHECK-LABEL: func32iz:
+; CHECK: .LBB{{[0-9]+}}_2:
+; CHECK-NEXT: or %s0, 16, (0)1
+; CHECK-NEXT: or %s11, 0, %s9
+ %r = tail call i32 @llvm.ctlz.i32(i32 65535, i1 true)
+ ret i32 %r
+}
+
+define signext i16 @func16is() {
+; CHECK-LABEL: func16is:
+; CHECK: .LBB{{[0-9]+}}_2:
+; CHECK-NEXT: or %s0, 8, (0)1
+; CHECK-NEXT: or %s11, 0, %s9
+ %r = tail call i16 @llvm.ctlz.i16(i16 255, i1 true)
+ ret i16 %r
+}
+
+define zeroext i16 @func16iz() {
+; CHECK-LABEL: func16iz:
+; CHECK: .LBB{{[0-9]+}}_2:
+; CHECK-NEXT: or %s0, 8, (0)1
+; CHECK-NEXT: or %s11, 0, %s9
+ %r = tail call i16 @llvm.ctlz.i16(i16 255, i1 true)
+ ret i16 %r
+}
+
+define signext i8 @func8is() {
+; CHECK-LABEL: func8is:
+; CHECK: .LBB{{[0-9]+}}_2:
+; CHECK-NEXT: or %s0, 0, (0)1
+; CHECK-NEXT: or %s11, 0, %s9
+ %r = tail call i8 @llvm.ctlz.i8(i8 255, i1 true)
+ ret i8 %r
+}
+
+define zeroext i8 @func8iz() {
+; CHECK-LABEL: func8iz:
+; CHECK: .LBB{{[0-9]+}}_2:
+; CHECK-NEXT: or %s0, 0, (0)1
+; CHECK-NEXT: or %s11, 0, %s9
+ %r = tail call i8 @llvm.ctlz.i8(i8 255, i1 true)
+ ret i8 %r
+}
+
+define i128 @func128x(i128 %p){
+; CHECK-LABEL: func128x:
+; CHECK: .LBB{{[0-9]+}}_2:
+; CHECK-NEXT: or %s2, 0, (0)1
+; CHECK-NEXT: cmps.l %s3, %s1, %s2
+; CHECK-NEXT: ldz %s1, %s1
+; CHECK-NEXT: ldz %s0, %s0
+; CHECK-NEXT: lea %s0, 64(, %s0)
+; CHECK-NEXT: cmov.l.ne %s0, %s1, %s3
+; CHECK-NEXT: or %s1, 0, %s2
+; CHECK-NEXT: or %s11, 0, %s9
+ %r = tail call i128 @llvm.ctlz.i128(i128 %p, i1 false)
+ ret i128 %r
+}
+
+define i64 @func64x(i64 %p) {
+; CHECK-LABEL: func64x:
+; CHECK: .LBB{{[0-9]+}}_2:
+; CHECK-NEXT: ldz %s0, %s0
+; CHECK-NEXT: or %s11, 0, %s9
+ %r = tail call i64 @llvm.ctlz.i64(i64 %p, i1 false)
+ ret i64 %r
+}
+
+define signext i32 @func32sx(i32 signext %p) {
+; CHECK-LABEL: func32sx:
+; CHECK: .LBB{{[0-9]+}}_2:
+; CHECK-NEXT: and %s0, %s0, (32)0
+; CHECK-NEXT: ldz %s0, %s0
+; CHECK-NEXT: lea %s0, -32(, %s0)
+; CHECK-NEXT: and %s0, %s0, (32)0
+; CHECK-NEXT: or %s11, 0, %s9
+ %r = tail call i32 @llvm.ctlz.i32(i32 %p, i1 false)
+ ret i32 %r
+}
+
+define zeroext i32 @func32zx(i32 zeroext %p) {
+; CHECK-LABEL: func32zx:
+; CHECK: .LBB{{[0-9]+}}_2:
+; CHECK-NEXT: ldz %s0, %s0
+; CHECK-NEXT: lea %s0, -32(, %s0)
+; CHECK-NEXT: and %s0, %s0, (32)0
+; CHECK-NEXT: or %s11, 0, %s9
+ %r = tail call i32 @llvm.ctlz.i32(i32 %p, i1 false)
+ ret i32 %r
+}
+
+define signext i16 @func16sx(i16 signext %p) {
+; CHECK-LABEL: func16sx:
+; CHECK: .LBB{{[0-9]+}}_2:
+; CHECK-NEXT: and %s0, %s0, (48)0
+; CHECK-NEXT: ldz %s0, %s0
+; CHECK-NEXT: lea %s0, -32(, %s0)
+; CHECK-NEXT: adds.w.sx %s0, -16, %s0
+; CHECK-NEXT: and %s0, %s0, (48)0
+; CHECK-NEXT: or %s11, 0, %s9
+ %r = tail call i16 @llvm.ctlz.i16(i16 %p, i1 false)
+ ret i16 %r
+}
+
+define zeroext i16 @func16zx(i16 zeroext %p) {
+; CHECK-LABEL: func16zx:
+; CHECK: .LBB{{[0-9]+}}_2:
+; CHECK-NEXT: ldz %s0, %s0
+; CHECK-NEXT: lea %s0, -32(, %s0)
+; CHECK-NEXT: adds.w.sx %s0, -16, %s0
+; CHECK-NEXT: and %s0, %s0, (48)0
+; CHECK-NEXT: or %s11, 0, %s9
+ %r = tail call i16 @llvm.ctlz.i16(i16 %p, i1 false)
+ ret i16 %r
+}
+
+define signext i8 @func8sx(i8 signext %p) {
+; CHECK-LABEL: func8sx:
+; CHECK: .LBB{{[0-9]+}}_2:
+; CHECK-NEXT: and %s0, %s0, (56)0
+; CHECK-NEXT: ldz %s0, %s0
+; CHECK-NEXT: lea %s0, -32(, %s0)
+; CHECK-NEXT: adds.w.sx %s0, -24, %s0
+; CHECK-NEXT: and %s0, %s0, (56)0
+; CHECK-NEXT: or %s11, 0, %s9
+ %r = tail call i8 @llvm.ctlz.i8(i8 %p, i1 false)
+ ret i8 %r
+}
+
+define zeroext i8 @func8zx(i8 zeroext %p) {
+; CHECK-LABEL: func8zx:
+; CHECK: .LBB{{[0-9]+}}_2:
+; CHECK-NEXT: ldz %s0, %s0
+; CHECK-NEXT: lea %s0, -32(, %s0)
+; CHECK-NEXT: adds.w.sx %s0, -24, %s0
+; CHECK-NEXT: and %s0, %s0, (56)0
+; CHECK-NEXT: or %s11, 0, %s9
+ %r = tail call i8 @llvm.ctlz.i8(i8 %p, i1 false)
+ ret i8 %r
+}
diff --git a/llvm/test/CodeGen/VE/ctpop.ll b/llvm/test/CodeGen/VE/ctpop.ll
index 8fee9104ed07..4ea8aba1b7b6 100644
--- a/llvm/test/CodeGen/VE/ctpop.ll
+++ b/llvm/test/CodeGen/VE/ctpop.ll
@@ -1,7 +1,25 @@
; RUN: llc < %s -mtriple=ve-unknown-unknown | FileCheck %s
-define i64 @func1(i64 %p) {
-; CHECK-LABEL: func1:
+declare i128 @llvm.ctpop.i128(i128)
+declare i64 @llvm.ctpop.i64(i64)
+declare i32 @llvm.ctpop.i32(i32)
+declare i16 @llvm.ctpop.i16(i16)
+declare i8 @llvm.ctpop.i8(i8)
+
+define i128 @func128(i128 %p) {
+; CHECK-LABEL: func128:
+; CHECK: .LBB{{[0-9]+}}_2:
+; CHECK-NEXT: pcnt %s1, %s1
+; CHECK-NEXT: pcnt %s0, %s0
+; CHECK-NEXT: adds.l %s0, %s0, %s1
+; CHECK-NEXT: or %s1, 0, (0)1
+; CHECK-NEXT: or %s11, 0, %s9
+ %r = tail call i128 @llvm.ctpop.i128(i128 %p)
+ ret i128 %r
+}
+
+define i64 @func64(i64 %p) {
+; CHECK-LABEL: func64:
; CHECK: .LBB{{[0-9]+}}_2:
; CHECK-NEXT: pcnt %s0, %s0
; CHECK-NEXT: or %s11, 0, %s9
@@ -9,12 +27,9 @@ define i64 @func1(i64 %p) {
ret i64 %r
}
-declare i64 @llvm.ctpop.i64(i64 %p)
-
-define i32 @func2(i32 %p) {
-; CHECK-LABEL: func2:
+define signext i32 @func32s(i32 signext %p) {
+; CHECK-LABEL: func32s:
; CHECK: .LBB{{[0-9]+}}_2:
-; CHECK-NEXT: adds.w.sx %s0, %s0, (0)1
; CHECK-NEXT: and %s0, %s0, (32)0
; CHECK-NEXT: pcnt %s0, %s0
; CHECK-NEXT: or %s11, 0, %s9
@@ -22,10 +37,17 @@ define i32 @func2(i32 %p) {
ret i32 %r
}
-declare i32 @llvm.ctpop.i32(i32 %p)
+define zeroext i32 @func32z(i32 zeroext %p) {
+; CHECK-LABEL: func32z:
+; CHECK: .LBB{{[0-9]+}}_2:
+; CHECK-NEXT: pcnt %s0, %s0
+; CHECK-NEXT: or %s11, 0, %s9
+ %r = tail call i32 @llvm.ctpop.i32(i32 %p)
+ ret i32 %r
+}
-define i16 @func3(i16 %p) {
-; CHECK-LABEL: func3:
+define signext i16 @func16s(i16 signext %p) {
+; CHECK-LABEL: func16s:
; CHECK: .LBB{{[0-9]+}}_2:
; CHECK-NEXT: and %s0, %s0, (48)0
; CHECK-NEXT: pcnt %s0, %s0
@@ -34,10 +56,17 @@ define i16 @func3(i16 %p) {
ret i16 %r
}
-declare i16 @llvm.ctpop.i16(i16 %p)
+define zeroext i16 @func16z(i16 zeroext %p) {
+; CHECK-LABEL: func16z:
+; CHECK: .LBB{{[0-9]+}}_2:
+; CHECK-NEXT: pcnt %s0, %s0
+; CHECK-NEXT: or %s11, 0, %s9
+ %r = tail call i16 @llvm.ctpop.i16(i16 %p)
+ ret i16 %r
+}
-define i8 @func4(i8 %p) {
-; CHECK-LABEL: func4:
+define signext i8 @func8s(i8 signext %p) {
+; CHECK-LABEL: func8s:
; CHECK: .LBB{{[0-9]+}}_2:
; CHECK-NEXT: and %s0, %s0, (56)0
; CHECK-NEXT: pcnt %s0, %s0
@@ -46,4 +75,84 @@ define i8 @func4(i8 %p) {
ret i8 %r
}
-declare i8 @llvm.ctpop.i8(i8)
+define zeroext i8 @func8z(i8 zeroext %p) {
+; CHECK-LABEL: func8z:
+; CHECK: .LBB{{[0-9]+}}_2:
+; CHECK-NEXT: pcnt %s0, %s0
+; CHECK-NEXT: or %s11, 0, %s9
+ %r = tail call i8 @llvm.ctpop.i8(i8 %p)
+ ret i8 %r
+}
+
+define i128 @func128i() {
+; CHECK-LABEL: func128i:
+; CHECK: .LBB{{[0-9]+}}_2:
+; CHECK-NEXT: or %s0, 16, (0)1
+; CHECK-NEXT: or %s1, 0, (0)1
+; CHECK-NEXT: or %s11, 0, %s9
+ %r = tail call i128 @llvm.ctpop.i128(i128 65535)
+ ret i128 %r
+}
+
+define i64 @func64i() {
+; CHECK-LABEL: func64i:
+; CHECK: .LBB{{[0-9]+}}_2:
+; CHECK-NEXT: or %s0, 16, (0)1
+; CHECK-NEXT: or %s11, 0, %s9
+ %r = tail call i64 @llvm.ctpop.i64(i64 65535)
+ ret i64 %r
+}
+
+define signext i32 @func32is() {
+; CHECK-LABEL: func32is:
+; CHECK: .LBB{{[0-9]+}}_2:
+; CHECK-NEXT: or %s0, 16, (0)1
+; CHECK-NEXT: or %s11, 0, %s9
+ %r = tail call i32 @llvm.ctpop.i32(i32 65535)
+ ret i32 %r
+}
+
+define zeroext i32 @func32iz() {
+; CHECK-LABEL: func32iz:
+; CHECK: .LBB{{[0-9]+}}_2:
+; CHECK-NEXT: or %s0, 16, (0)1
+; CHECK-NEXT: or %s11, 0, %s9
+ %r = tail call i32 @llvm.ctpop.i32(i32 65535)
+ ret i32 %r
+}
+
+define signext i16 @func16si() {
+; CHECK-LABEL: func16si:
+; CHECK: .LBB{{[0-9]+}}_2:
+; CHECK-NEXT: or %s0, 16, (0)1
+; CHECK-NEXT: or %s11, 0, %s9
+ %r = tail call i16 @llvm.ctpop.i16(i16 65535)
+ ret i16 %r
+}
+
+define zeroext i16 @func16zi() {
+; CHECK-LABEL: func16zi:
+; CHECK: .LBB{{[0-9]+}}_2:
+; CHECK-NEXT: or %s0, 16, (0)1
+; CHECK-NEXT: or %s11, 0, %s9
+ %r = tail call i16 @llvm.ctpop.i16(i16 65535)
+ ret i16 %r
+}
+
+define signext i8 @func8si() {
+; CHECK-LABEL: func8si:
+; CHECK: .LBB{{[0-9]+}}_2:
+; CHECK-NEXT: or %s0, 8, (0)1
+; CHECK-NEXT: or %s11, 0, %s9
+ %r = tail call i8 @llvm.ctpop.i8(i8 255)
+ ret i8 %r
+}
+
+define zeroext i8 @func8zi() {
+; CHECK-LABEL: func8zi:
+; CHECK: .LBB{{[0-9]+}}_2:
+; CHECK-NEXT: or %s0, 8, (0)1
+; CHECK-NEXT: or %s11, 0, %s9
+ %r = tail call i8 @llvm.ctpop.i8(i8 255)
+ ret i8 %r
+}
diff --git a/llvm/test/CodeGen/VE/cttz.ll b/llvm/test/CodeGen/VE/cttz.ll
index 46bb52d29102..9d4f94c742f5 100644
--- a/llvm/test/CodeGen/VE/cttz.ll
+++ b/llvm/test/CodeGen/VE/cttz.ll
@@ -1,7 +1,32 @@
; RUN: llc < %s -mtriple=ve-unknown-unknown | FileCheck %s
-define i64 @func1(i64 %p) {
-; CHECK-LABEL: func1:
+declare i128 @llvm.cttz.i128(i128, i1)
+declare i64 @llvm.cttz.i64(i64, i1)
+declare i32 @llvm.cttz.i32(i32, i1)
+declare i16 @llvm.cttz.i16(i16, i1)
+declare i8 @llvm.cttz.i8(i8, i1)
+
+define i128 @func128(i128 %p) {
+; CHECK-LABEL: func128:
+; CHECK: .LBB{{[0-9]+}}_2:
+; CHECK-NEXT: or %s2, 0, (0)1
+; CHECK-NEXT: cmps.l %s3, %s0, %s2
+; CHECK-NEXT: lea %s4, -1(, %s0)
+; CHECK-NEXT: nnd %s0, %s0, %s4
+; CHECK-NEXT: pcnt %s4, %s0
+; CHECK-NEXT: lea %s0, -1(, %s1)
+; CHECK-NEXT: nnd %s0, %s1, %s0
+; CHECK-NEXT: pcnt %s0, %s0
+; CHECK-NEXT: lea %s0, 64(, %s0)
+; CHECK-NEXT: cmov.l.ne %s0, %s4, %s3
+; CHECK-NEXT: or %s1, 0, %s2
+; CHECK-NEXT: or %s11, 0, %s9
+ %r = tail call i128 @llvm.cttz.i128(i128 %p, i1 true)
+ ret i128 %r
+}
+
+define i64 @func64(i64 %p) {
+; CHECK-LABEL: func64:
; CHECK: .LBB{{[0-9]+}}_2:
; CHECK-NEXT: lea %s1, -1(, %s0)
; CHECK-NEXT: nnd %s0, %s0, %s1
@@ -11,52 +36,159 @@ define i64 @func1(i64 %p) {
ret i64 %r
}
-declare i64 @llvm.cttz.i64(i64, i1)
+define signext i32 @func32s(i32 signext %p) {
+; CHECK-LABEL: func32s:
+; CHECK: .LBB{{[0-9]+}}_2:
+; CHECK-NEXT: adds.w.sx %s0, %s0, (0)1
+; CHECK-NEXT: adds.w.sx %s1, -1, %s0
+; CHECK-NEXT: xor %s0, -1, %s0
+; CHECK-NEXT: and %s0, %s0, %s1
+; CHECK-NEXT: adds.w.zx %s0, %s0, (0)1
+; CHECK-NEXT: pcnt %s0, %s0
+; CHECK-NEXT: or %s11, 0, %s9
+ %r = tail call i32 @llvm.cttz.i32(i32 %p, i1 true)
+ ret i32 %r
+}
-define i32 @func2(i32 %p) {
-; CHECK-LABEL: func2:
+define zeroext i32 @func32z(i32 zeroext %p) {
+; CHECK-LABEL: func32z:
; CHECK: .LBB{{[0-9]+}}_2:
; CHECK-NEXT: adds.w.sx %s0, %s0, (0)1
; CHECK-NEXT: adds.w.sx %s1, -1, %s0
; CHECK-NEXT: xor %s0, -1, %s0
; CHECK-NEXT: and %s0, %s0, %s1
-; CHECK-NEXT: and %s0, %s0, (32)0
+; CHECK-NEXT: adds.w.zx %s0, %s0, (0)1
; CHECK-NEXT: pcnt %s0, %s0
; CHECK-NEXT: or %s11, 0, %s9
%r = tail call i32 @llvm.cttz.i32(i32 %p, i1 true)
ret i32 %r
}
-declare i32 @llvm.cttz.i32(i32, i1)
+define signext i16 @func16s(i16 signext %p) {
+; CHECK-LABEL: func16s:
+; CHECK: .LBB{{[0-9]+}}_2:
+; CHECK-NEXT: adds.w.sx %s0, %s0, (0)1
+; CHECK-NEXT: adds.w.sx %s1, -1, %s0
+; CHECK-NEXT: xor %s0, -1, %s0
+; CHECK-NEXT: and %s0, %s0, %s1
+; CHECK-NEXT: adds.w.zx %s0, %s0, (0)1
+; CHECK-NEXT: pcnt %s0, %s0
+; CHECK-NEXT: or %s11, 0, %s9
+ %r = tail call i16 @llvm.cttz.i16(i16 %p, i1 true)
+ ret i16 %r
+}
-define i16 @func3(i16 %p) {
-; CHECK-LABEL: func3:
+define zeroext i16 @func16z(i16 zeroext %p) {
+; CHECK-LABEL: func16z:
; CHECK: .LBB{{[0-9]+}}_2:
; CHECK-NEXT: adds.w.sx %s0, %s0, (0)1
; CHECK-NEXT: adds.w.sx %s1, -1, %s0
; CHECK-NEXT: xor %s0, -1, %s0
; CHECK-NEXT: and %s0, %s0, %s1
-; CHECK-NEXT: and %s0, %s0, (32)0
+; CHECK-NEXT: adds.w.zx %s0, %s0, (0)1
; CHECK-NEXT: pcnt %s0, %s0
; CHECK-NEXT: or %s11, 0, %s9
%r = tail call i16 @llvm.cttz.i16(i16 %p, i1 true)
ret i16 %r
}
-declare i16 @llvm.cttz.i16(i16, i1)
+define signext i8 @func8s(i8 signext %p) {
+; CHECK-LABEL: func8s:
+; CHECK: .LBB{{[0-9]+}}_2:
+; CHECK-NEXT: adds.w.sx %s0, %s0, (0)1
+; CHECK-NEXT: adds.w.sx %s1, -1, %s0
+; CHECK-NEXT: xor %s0, -1, %s0
+; CHECK-NEXT: and %s0, %s0, %s1
+; CHECK-NEXT: adds.w.zx %s0, %s0, (0)1
+; CHECK-NEXT: pcnt %s0, %s0
+; CHECK-NEXT: or %s11, 0, %s9
+ %r = tail call i8 @llvm.cttz.i8(i8 %p, i1 true)
+ ret i8 %r
+}
-define i8 @func4(i8 %p) {
-; CHECK-LABEL: func4:
+define zeroext i8 @func8z(i8 zeroext %p) {
+; CHECK-LABEL: func8z:
; CHECK: .LBB{{[0-9]+}}_2:
; CHECK-NEXT: adds.w.sx %s0, %s0, (0)1
; CHECK-NEXT: adds.w.sx %s1, -1, %s0
; CHECK-NEXT: xor %s0, -1, %s0
; CHECK-NEXT: and %s0, %s0, %s1
-; CHECK-NEXT: and %s0, %s0, (32)0
+; CHECK-NEXT: adds.w.zx %s0, %s0, (0)1
; CHECK-NEXT: pcnt %s0, %s0
; CHECK-NEXT: or %s11, 0, %s9
%r = tail call i8 @llvm.cttz.i8(i8 %p, i1 true)
ret i8 %r
}
-declare i8 @llvm.cttz.i8(i8, i1)
+define i128 @func128i() {
+; CHECK-LABEL: func128i:
+; CHECK: .LBB{{[0-9]+}}_2:
+; CHECK-NEXT: or %s0, 8, (0)1
+; CHECK-NEXT: or %s1, 0, (0)1
+; CHECK-NEXT: or %s11, 0, %s9
+ %r = tail call i128 @llvm.cttz.i128(i128 65280, i1 true)
+ ret i128 %r
+}
+
+define i64 @func64i() {
+; CHECK-LABEL: func64i:
+; CHECK: .LBB{{[0-9]+}}_2:
+; CHECK-NEXT: or %s0, 8, (0)1
+; CHECK-NEXT: or %s11, 0, %s9
+ %r = tail call i64 @llvm.cttz.i64(i64 65280, i1 true)
+ ret i64 %r
+}
+
+define signext i32 @func32is() {
+; CHECK-LABEL: func32is:
+; CHECK: .LBB{{[0-9]+}}_2:
+; CHECK-NEXT: or %s0, 8, (0)1
+; CHECK-NEXT: or %s11, 0, %s9
+ %r = tail call i32 @llvm.cttz.i32(i32 65280, i1 true)
+ ret i32 %r
+}
+
+define zeroext i32 @func32iz() {
+; CHECK-LABEL: func32iz:
+; CHECK: .LBB{{[0-9]+}}_2:
+; CHECK-NEXT: or %s0, 8, (0)1
+; CHECK-NEXT: or %s11, 0, %s9
+ %r = tail call i32 @llvm.cttz.i32(i32 65280, i1 true)
+ ret i32 %r
+}
+
+define signext i16 @func16is() {
+; CHECK-LABEL: func16is:
+; CHECK: .LBB{{[0-9]+}}_2:
+; CHECK-NEXT: or %s0, 8, (0)1
+; CHECK-NEXT: or %s11, 0, %s9
+ %r = tail call i16 @llvm.cttz.i16(i16 65280, i1 true)
+ ret i16 %r
+}
+
+define zeroext i16 @func16iz() {
+; CHECK-LABEL: func16iz:
+; CHECK: .LBB{{[0-9]+}}_2:
+; CHECK-NEXT: or %s0, 8, (0)1
+; CHECK-NEXT: or %s11, 0, %s9
+ %r = tail call i16 @llvm.cttz.i16(i16 65280, i1 true)
+ ret i16 %r
+}
+
+define signext i8 @func8is() {
+; CHECK-LABEL: func8is:
+; CHECK: .LBB{{[0-9]+}}_2:
+; CHECK-NEXT: or %s0, 4, (0)1
+; CHECK-NEXT: or %s11, 0, %s9
+ %r = tail call i8 @llvm.cttz.i8(i8 240, i1 true)
+ ret i8 %r
+}
+
+define zeroext i8 @func8iz() {
+; CHECK-LABEL: func8iz:
+; CHECK: .LBB{{[0-9]+}}_2:
+; CHECK-NEXT: or %s0, 4, (0)1
+; CHECK-NEXT: or %s11, 0, %s9
+ %r = tail call i8 @llvm.cttz.i8(i8 240, i1 true)
+ ret i8 %r
+}
More information about the llvm-commits
mailing list