[llvm] 4d5b020 - [ARM] Addition SSAT/USAT tests for min/max patterns. NFC
David Green via llvm-commits
llvm-commits at lists.llvm.org
Mon Feb 21 08:25:02 PST 2022
Author: David Green
Date: 2022-02-21T16:24:58Z
New Revision: 4d5b020d6e0df8e34bd79154660cefd3676d21f2
URL: https://github.com/llvm/llvm-project/commit/4d5b020d6e0df8e34bd79154660cefd3676d21f2
DIFF: https://github.com/llvm/llvm-project/commit/4d5b020d6e0df8e34bd79154660cefd3676d21f2.diff
LOG: [ARM] Addition SSAT/USAT tests for min/max patterns. NFC
Added:
Modified:
llvm/test/CodeGen/ARM/ssat-unroll-loops.ll
llvm/test/CodeGen/ARM/ssat.ll
llvm/test/CodeGen/ARM/usat.ll
Removed:
################################################################################
diff --git a/llvm/test/CodeGen/ARM/ssat-unroll-loops.ll b/llvm/test/CodeGen/ARM/ssat-unroll-loops.ll
index f1b4ab2d937d7..1f7574a8cca98 100644
--- a/llvm/test/CodeGen/ARM/ssat-unroll-loops.ll
+++ b/llvm/test/CodeGen/ARM/ssat-unroll-loops.ll
@@ -121,3 +121,139 @@ while.body: ; preds = %while.body.prol.loo
while.end: ; preds = %while.body, %while.body.prol.loopexit, %entry
ret void
}
+
+define void @ssat_unroll_minmax(i16* nocapture readonly %pSrcA, i16* nocapture readonly %pSrcB, i16* nocapture writeonly %pDst, i32 %blockSize) {
+; CHECK-LABEL: ssat_unroll_minmax:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: .save {r4, r5, r11, lr}
+; CHECK-NEXT: push {r4, r5, r11, lr}
+; CHECK-NEXT: cmp r3, #0
+; CHECK-NEXT: beq .LBB1_6
+; CHECK-NEXT: @ %bb.1: @ %while.body.preheader
+; CHECK-NEXT: movw r12, #32768
+; CHECK-NEXT: sub lr, r3, #1
+; CHECK-NEXT: tst r3, #1
+; CHECK-NEXT: movt r12, #65535
+; CHECK-NEXT: beq .LBB1_3
+; CHECK-NEXT: @ %bb.2: @ %while.body.prol.preheader
+; CHECK-NEXT: ldrsh r3, [r0], #2
+; CHECK-NEXT: ldrsh r4, [r1], #2
+; CHECK-NEXT: smulbb r3, r4, r3
+; CHECK-NEXT: asr r4, r3, #14
+; CHECK-NEXT: cmn r4, #32768
+; CHECK-NEXT: mov r4, r12
+; CHECK-NEXT: asrgt r4, r3, #14
+; CHECK-NEXT: movw r3, #32767
+; CHECK-NEXT: cmp r4, r3
+; CHECK-NEXT: movge r4, r3
+; CHECK-NEXT: mov r3, lr
+; CHECK-NEXT: strh r4, [r2], #2
+; CHECK-NEXT: .LBB1_3: @ %while.body.prol.loopexit
+; CHECK-NEXT: cmp lr, #0
+; CHECK-NEXT: beq .LBB1_6
+; CHECK-NEXT: @ %bb.4: @ %while.body.preheader1
+; CHECK-NEXT: movw lr, #32767
+; CHECK-NEXT: .LBB1_5: @ %while.body
+; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
+; CHECK-NEXT: ldrsh r4, [r0]
+; CHECK-NEXT: ldrsh r5, [r1]
+; CHECK-NEXT: smulbb r4, r5, r4
+; CHECK-NEXT: asr r5, r4, #14
+; CHECK-NEXT: cmn r5, #32768
+; CHECK-NEXT: mov r5, r12
+; CHECK-NEXT: asrgt r5, r4, #14
+; CHECK-NEXT: cmp r5, lr
+; CHECK-NEXT: movge r5, lr
+; CHECK-NEXT: strh r5, [r2]
+; CHECK-NEXT: ldrsh r4, [r0, #2]
+; CHECK-NEXT: add r0, r0, #4
+; CHECK-NEXT: ldrsh r5, [r1, #2]
+; CHECK-NEXT: add r1, r1, #4
+; CHECK-NEXT: smulbb r4, r5, r4
+; CHECK-NEXT: asr r5, r4, #14
+; CHECK-NEXT: cmn r5, #32768
+; CHECK-NEXT: mov r5, r12
+; CHECK-NEXT: asrgt r5, r4, #14
+; CHECK-NEXT: cmp r5, lr
+; CHECK-NEXT: movge r5, lr
+; CHECK-NEXT: subs r3, r3, #2
+; CHECK-NEXT: strh r5, [r2, #2]
+; CHECK-NEXT: add r2, r2, #4
+; CHECK-NEXT: bne .LBB1_5
+; CHECK-NEXT: .LBB1_6: @ %while.end
+; CHECK-NEXT: pop {r4, r5, r11, pc}
+entry:
+ %cmp.not7 = icmp eq i32 %blockSize, 0
+ br i1 %cmp.not7, label %while.end, label %while.body.preheader
+
+while.body.preheader: ; preds = %entry
+ %0 = add i32 %blockSize, -1
+ %xtraiter = and i32 %blockSize, 1
+ %lcmp.mod.not = icmp eq i32 %xtraiter, 0
+ br i1 %lcmp.mod.not, label %while.body.prol.loopexit, label %while.body.prol.preheader
+
+while.body.prol.preheader: ; preds = %while.body.preheader
+ %incdec.ptr.prol = getelementptr inbounds i16, i16* %pSrcA, i64 1
+ %1 = load i16, i16* %pSrcA, align 2
+ %conv.prol = sext i16 %1 to i32
+ %incdec.ptr1.prol = getelementptr inbounds i16, i16* %pSrcB, i64 1
+ %2 = load i16, i16* %pSrcB, align 2
+ %conv2.prol = sext i16 %2 to i32
+ %mul.prol = mul nsw i32 %conv2.prol, %conv.prol
+ %shr.prol = ashr i32 %mul.prol, 14
+ %3 = call i32 @llvm.smax.i32(i32 %shr.prol, i32 -32768)
+ %4 = call i32 @llvm.smin.i32(i32 %3, i32 32767)
+ %conv3.prol = trunc i32 %4 to i16
+ %incdec.ptr4.prol = getelementptr inbounds i16, i16* %pDst, i64 1
+ store i16 %conv3.prol, i16* %pDst, align 2
+ br label %while.body.prol.loopexit
+
+while.body.prol.loopexit: ; preds = %while.body.prol.preheader, %while.body.preheader
+ %blkCnt.011.unr = phi i32 [ %blockSize, %while.body.preheader ], [ %0, %while.body.prol.preheader ]
+ %pSrcA.addr.010.unr = phi i16* [ %pSrcA, %while.body.preheader ], [ %incdec.ptr.prol, %while.body.prol.preheader ]
+ %pDst.addr.09.unr = phi i16* [ %pDst, %while.body.preheader ], [ %incdec.ptr4.prol, %while.body.prol.preheader ]
+ %pSrcB.addr.08.unr = phi i16* [ %pSrcB, %while.body.preheader ], [ %incdec.ptr1.prol, %while.body.prol.preheader ]
+ %5 = icmp eq i32 %0, 0
+ br i1 %5, label %while.end, label %while.body
+
+while.body: ; preds = %while.body.prol.loopexit, %while.body
+ %blkCnt.011 = phi i32 [ %dec.1, %while.body ], [ %blkCnt.011.unr, %while.body.prol.loopexit ]
+ %pSrcA.addr.010 = phi i16* [ %incdec.ptr.1, %while.body ], [ %pSrcA.addr.010.unr, %while.body.prol.loopexit ]
+ %pDst.addr.09 = phi i16* [ %incdec.ptr4.1, %while.body ], [ %pDst.addr.09.unr, %while.body.prol.loopexit ]
+ %pSrcB.addr.08 = phi i16* [ %incdec.ptr1.1, %while.body ], [ %pSrcB.addr.08.unr, %while.body.prol.loopexit ]
+ %incdec.ptr = getelementptr inbounds i16, i16* %pSrcA.addr.010, i64 1
+ %6 = load i16, i16* %pSrcA.addr.010, align 2
+ %conv = sext i16 %6 to i32
+ %incdec.ptr1 = getelementptr inbounds i16, i16* %pSrcB.addr.08, i64 1
+ %7 = load i16, i16* %pSrcB.addr.08, align 2
+ %conv2 = sext i16 %7 to i32
+ %mul = mul nsw i32 %conv2, %conv
+ %shr = ashr i32 %mul, 14
+ %8 = call i32 @llvm.smax.i32(i32 %shr, i32 -32768)
+ %9 = call i32 @llvm.smin.i32(i32 %8, i32 32767)
+ %conv3 = trunc i32 %9 to i16
+ %incdec.ptr4 = getelementptr inbounds i16, i16* %pDst.addr.09, i64 1
+ store i16 %conv3, i16* %pDst.addr.09, align 2
+ %incdec.ptr.1 = getelementptr inbounds i16, i16* %pSrcA.addr.010, i64 2
+ %10 = load i16, i16* %incdec.ptr, align 2
+ %conv.1 = sext i16 %10 to i32
+ %incdec.ptr1.1 = getelementptr inbounds i16, i16* %pSrcB.addr.08, i64 2
+ %11 = load i16, i16* %incdec.ptr1, align 2
+ %conv2.1 = sext i16 %11 to i32
+ %mul.1 = mul nsw i32 %conv2.1, %conv.1
+ %shr.1 = ashr i32 %mul.1, 14
+ %12 = call i32 @llvm.smax.i32(i32 %shr.1, i32 -32768)
+ %13 = call i32 @llvm.smin.i32(i32 %12, i32 32767)
+ %conv3.1 = trunc i32 %13 to i16
+ %incdec.ptr4.1 = getelementptr inbounds i16, i16* %pDst.addr.09, i64 2
+ store i16 %conv3.1, i16* %incdec.ptr4, align 2
+ %dec.1 = add i32 %blkCnt.011, -2
+ %cmp.not.1 = icmp eq i32 %dec.1, 0
+ br i1 %cmp.not.1, label %while.end, label %while.body
+
+while.end: ; preds = %while.body, %while.body.prol.loopexit, %entry
+ ret void
+}
+
+declare i32 @llvm.smax.i32(i32, i32) #1
+declare i32 @llvm.smin.i32(i32, i32) #1
diff --git a/llvm/test/CodeGen/ARM/ssat.ll b/llvm/test/CodeGen/ARM/ssat.ll
index 436f6edc2bbbb..ff16b59489f5b 100644
--- a/llvm/test/CodeGen/ARM/ssat.ll
+++ b/llvm/test/CodeGen/ARM/ssat.ll
@@ -649,3 +649,339 @@ define i32 @formulated_invalid(i32 %a) {
%r = and i32 %s2, 16777215
ret i32 %r
}
+
+
+define i32 @mm_sat_base_32bit(i32 %x) {
+; V4T-LABEL: mm_sat_base_32bit:
+; V4T: @ %bb.0: @ %entry
+; V4T-NEXT: ldr r1, .LCPI18_0
+; V4T-NEXT: cmp r0, r1
+; V4T-NEXT: movge r0, r1
+; V4T-NEXT: mov r1, #1065353216
+; V4T-NEXT: orr r1, r1, #-1073741824
+; V4T-NEXT: cmn r0, #8388608
+; V4T-NEXT: movle r0, r1
+; V4T-NEXT: bx lr
+; V4T-NEXT: .p2align 2
+; V4T-NEXT: @ %bb.1:
+; V4T-NEXT: .LCPI18_0:
+; V4T-NEXT: .long 8388607 @ 0x7fffff
+;
+; V6T2-LABEL: mm_sat_base_32bit:
+; V6T2: @ %bb.0: @ %entry
+; V6T2-NEXT: movw r1, #65535
+; V6T2-NEXT: movt r1, #127
+; V6T2-NEXT: cmp r0, r1
+; V6T2-NEXT: movge r0, r1
+; V6T2-NEXT: movw r1, #0
+; V6T2-NEXT: movt r1, #65408
+; V6T2-NEXT: cmn r0, #8388608
+; V6T2-NEXT: movle r0, r1
+; V6T2-NEXT: bx lr
+entry:
+ %0 = call i32 @llvm.smin.i32(i32 %x, i32 8388607)
+ %1 = call i32 @llvm.smax.i32(i32 %0, i32 -8388608)
+ ret i32 %1
+}
+
+define i16 @mm_sat_base_16bit(i16 %x) {
+; V4T-LABEL: mm_sat_base_16bit:
+; V4T: @ %bb.0: @ %entry
+; V4T-NEXT: mov r2, #255
+; V4T-NEXT: lsl r0, r0, #16
+; V4T-NEXT: orr r2, r2, #1792
+; V4T-NEXT: asr r1, r0, #16
+; V4T-NEXT: cmp r1, r2
+; V4T-NEXT: asrlt r2, r0, #16
+; V4T-NEXT: ldr r0, .LCPI19_0
+; V4T-NEXT: cmn r2, #2048
+; V4T-NEXT: movgt r0, r2
+; V4T-NEXT: bx lr
+; V4T-NEXT: .p2align 2
+; V4T-NEXT: @ %bb.1:
+; V4T-NEXT: .LCPI19_0:
+; V4T-NEXT: .long 4294965248 @ 0xfffff800
+;
+; V6T2-LABEL: mm_sat_base_16bit:
+; V6T2: @ %bb.0: @ %entry
+; V6T2-NEXT: sxth r0, r0
+; V6T2-NEXT: movw r1, #2047
+; V6T2-NEXT: cmp r0, r1
+; V6T2-NEXT: movlt r1, r0
+; V6T2-NEXT: movw r0, #63488
+; V6T2-NEXT: movt r0, #65535
+; V6T2-NEXT: cmn r1, #2048
+; V6T2-NEXT: movgt r0, r1
+; V6T2-NEXT: bx lr
+entry:
+ %0 = call i16 @llvm.smin.i16(i16 %x, i16 2047)
+ %1 = call i16 @llvm.smax.i16(i16 %0, i16 -2048)
+ ret i16 %1
+}
+
+define i8 @mm_sat_base_8bit(i8 %x) {
+; V4T-LABEL: mm_sat_base_8bit:
+; V4T: @ %bb.0: @ %entry
+; V4T-NEXT: lsl r1, r0, #24
+; V4T-NEXT: mov r0, #31
+; V4T-NEXT: asr r2, r1, #24
+; V4T-NEXT: cmp r2, #31
+; V4T-NEXT: asrlt r0, r1, #24
+; V4T-NEXT: cmn r0, #32
+; V4T-NEXT: mvnle r0, #31
+; V4T-NEXT: bx lr
+;
+; V6T2-LABEL: mm_sat_base_8bit:
+; V6T2: @ %bb.0: @ %entry
+; V6T2-NEXT: sxtb r0, r0
+; V6T2-NEXT: cmp r0, #31
+; V6T2-NEXT: movge r0, #31
+; V6T2-NEXT: cmn r0, #32
+; V6T2-NEXT: mvnle r0, #31
+; V6T2-NEXT: bx lr
+entry:
+ %0 = call i8 @llvm.smin.i8(i8 %x, i8 31)
+ %1 = call i8 @llvm.smax.i8(i8 %0, i8 -32)
+ ret i8 %1
+}
+
+define i32 @mm_sat_lower_upper_1(i32 %x) {
+; V4T-LABEL: mm_sat_lower_upper_1:
+; V4T: @ %bb.0: @ %entry
+; V4T-NEXT: ldr r1, .LCPI21_0
+; V4T-NEXT: cmp r0, r1
+; V4T-NEXT: movge r0, r1
+; V4T-NEXT: mov r1, #1065353216
+; V4T-NEXT: orr r1, r1, #-1073741824
+; V4T-NEXT: cmn r0, #8388608
+; V4T-NEXT: movle r0, r1
+; V4T-NEXT: bx lr
+; V4T-NEXT: .p2align 2
+; V4T-NEXT: @ %bb.1:
+; V4T-NEXT: .LCPI21_0:
+; V4T-NEXT: .long 8388607 @ 0x7fffff
+;
+; V6T2-LABEL: mm_sat_lower_upper_1:
+; V6T2: @ %bb.0: @ %entry
+; V6T2-NEXT: movw r1, #65535
+; V6T2-NEXT: movt r1, #127
+; V6T2-NEXT: cmp r0, r1
+; V6T2-NEXT: movge r0, r1
+; V6T2-NEXT: movw r1, #0
+; V6T2-NEXT: movt r1, #65408
+; V6T2-NEXT: cmn r0, #8388608
+; V6T2-NEXT: movle r0, r1
+; V6T2-NEXT: bx lr
+entry:
+ %0 = call i32 @llvm.smin.i32(i32 %x, i32 8388607)
+ %1 = call i32 @llvm.smax.i32(i32 %0, i32 -8388608)
+ ret i32 %1
+}
+
+define i32 @mm_sat_lower_upper_2(i32 %x) {
+; V4T-LABEL: mm_sat_lower_upper_2:
+; V4T: @ %bb.0: @ %entry
+; V4T-NEXT: ldr r1, .LCPI22_0
+; V4T-NEXT: cmp r0, r1
+; V4T-NEXT: movge r0, r1
+; V4T-NEXT: mov r1, #1065353216
+; V4T-NEXT: orr r1, r1, #-1073741824
+; V4T-NEXT: cmn r0, #8388608
+; V4T-NEXT: movle r0, r1
+; V4T-NEXT: bx lr
+; V4T-NEXT: .p2align 2
+; V4T-NEXT: @ %bb.1:
+; V4T-NEXT: .LCPI22_0:
+; V4T-NEXT: .long 8388607 @ 0x7fffff
+;
+; V6T2-LABEL: mm_sat_lower_upper_2:
+; V6T2: @ %bb.0: @ %entry
+; V6T2-NEXT: movw r1, #65535
+; V6T2-NEXT: movt r1, #127
+; V6T2-NEXT: cmp r0, r1
+; V6T2-NEXT: movge r0, r1
+; V6T2-NEXT: movw r1, #0
+; V6T2-NEXT: movt r1, #65408
+; V6T2-NEXT: cmn r0, #8388608
+; V6T2-NEXT: movle r0, r1
+; V6T2-NEXT: bx lr
+entry:
+ %0 = call i32 @llvm.smin.i32(i32 %x, i32 8388607)
+ %1 = call i32 @llvm.smax.i32(i32 %0, i32 -8388608)
+ ret i32 %1
+}
+
+define i32 @mm_sat_upper_lower_1(i32 %x) {
+; V4T-LABEL: mm_sat_upper_lower_1:
+; V4T: @ %bb.0: @ %entry
+; V4T-NEXT: mov r1, #1065353216
+; V4T-NEXT: cmn r0, #8388608
+; V4T-NEXT: orr r1, r1, #-1073741824
+; V4T-NEXT: movle r0, r1
+; V4T-NEXT: ldr r1, .LCPI23_0
+; V4T-NEXT: cmp r0, r1
+; V4T-NEXT: movge r0, r1
+; V4T-NEXT: bx lr
+; V4T-NEXT: .p2align 2
+; V4T-NEXT: @ %bb.1:
+; V4T-NEXT: .LCPI23_0:
+; V4T-NEXT: .long 8388607 @ 0x7fffff
+;
+; V6T2-LABEL: mm_sat_upper_lower_1:
+; V6T2: @ %bb.0: @ %entry
+; V6T2-NEXT: movw r1, #0
+; V6T2-NEXT: cmn r0, #8388608
+; V6T2-NEXT: movt r1, #65408
+; V6T2-NEXT: movle r0, r1
+; V6T2-NEXT: movw r1, #65535
+; V6T2-NEXT: movt r1, #127
+; V6T2-NEXT: cmp r0, r1
+; V6T2-NEXT: movge r0, r1
+; V6T2-NEXT: bx lr
+entry:
+ %0 = call i32 @llvm.smax.i32(i32 %x, i32 -8388608)
+ %1 = call i32 @llvm.smin.i32(i32 %0, i32 8388607)
+ ret i32 %1
+}
+
+define i32 @mm_sat_upper_lower_2(i32 %x) {
+; V4T-LABEL: mm_sat_upper_lower_2:
+; V4T: @ %bb.0: @ %entry
+; V4T-NEXT: mov r1, #1065353216
+; V4T-NEXT: cmn r0, #8388608
+; V4T-NEXT: orr r1, r1, #-1073741824
+; V4T-NEXT: movle r0, r1
+; V4T-NEXT: ldr r1, .LCPI24_0
+; V4T-NEXT: cmp r0, r1
+; V4T-NEXT: movge r0, r1
+; V4T-NEXT: bx lr
+; V4T-NEXT: .p2align 2
+; V4T-NEXT: @ %bb.1:
+; V4T-NEXT: .LCPI24_0:
+; V4T-NEXT: .long 8388607 @ 0x7fffff
+;
+; V6T2-LABEL: mm_sat_upper_lower_2:
+; V6T2: @ %bb.0: @ %entry
+; V6T2-NEXT: movw r1, #0
+; V6T2-NEXT: cmn r0, #8388608
+; V6T2-NEXT: movt r1, #65408
+; V6T2-NEXT: movle r0, r1
+; V6T2-NEXT: movw r1, #65535
+; V6T2-NEXT: movt r1, #127
+; V6T2-NEXT: cmp r0, r1
+; V6T2-NEXT: movge r0, r1
+; V6T2-NEXT: bx lr
+entry:
+ %0 = call i32 @llvm.smax.i32(i32 %x, i32 -8388608)
+ %1 = call i32 @llvm.smin.i32(i32 %0, i32 8388607)
+ ret i32 %1
+}
+
+define i32 @mm_sat_upper_lower_3(i32 %x) {
+; V4T-LABEL: mm_sat_upper_lower_3:
+; V4T: @ %bb.0: @ %entry
+; V4T-NEXT: mov r1, #1065353216
+; V4T-NEXT: cmn r0, #8388608
+; V4T-NEXT: orr r1, r1, #-1073741824
+; V4T-NEXT: movle r0, r1
+; V4T-NEXT: ldr r1, .LCPI25_0
+; V4T-NEXT: cmp r0, r1
+; V4T-NEXT: movge r0, r1
+; V4T-NEXT: bx lr
+; V4T-NEXT: .p2align 2
+; V4T-NEXT: @ %bb.1:
+; V4T-NEXT: .LCPI25_0:
+; V4T-NEXT: .long 8388607 @ 0x7fffff
+;
+; V6T2-LABEL: mm_sat_upper_lower_3:
+; V6T2: @ %bb.0: @ %entry
+; V6T2-NEXT: movw r1, #0
+; V6T2-NEXT: cmn r0, #8388608
+; V6T2-NEXT: movt r1, #65408
+; V6T2-NEXT: movle r0, r1
+; V6T2-NEXT: movw r1, #65535
+; V6T2-NEXT: movt r1, #127
+; V6T2-NEXT: cmp r0, r1
+; V6T2-NEXT: movge r0, r1
+; V6T2-NEXT: bx lr
+entry:
+ %0 = call i32 @llvm.smax.i32(i32 %x, i32 -8388608)
+ %1 = call i32 @llvm.smin.i32(i32 %0, i32 8388607)
+ ret i32 %1
+}
+
+define i32 @mm_sat_le_ge(i32 %x) {
+; V4T-LABEL: mm_sat_le_ge:
+; V4T: @ %bb.0: @ %entry
+; V4T-NEXT: mov r1, #1065353216
+; V4T-NEXT: cmn r0, #8388608
+; V4T-NEXT: orr r1, r1, #-1073741824
+; V4T-NEXT: movle r0, r1
+; V4T-NEXT: ldr r1, .LCPI26_0
+; V4T-NEXT: cmp r0, r1
+; V4T-NEXT: movge r0, r1
+; V4T-NEXT: bx lr
+; V4T-NEXT: .p2align 2
+; V4T-NEXT: @ %bb.1:
+; V4T-NEXT: .LCPI26_0:
+; V4T-NEXT: .long 8388607 @ 0x7fffff
+;
+; V6T2-LABEL: mm_sat_le_ge:
+; V6T2: @ %bb.0: @ %entry
+; V6T2-NEXT: movw r1, #0
+; V6T2-NEXT: cmn r0, #8388608
+; V6T2-NEXT: movt r1, #65408
+; V6T2-NEXT: movle r0, r1
+; V6T2-NEXT: movw r1, #65535
+; V6T2-NEXT: movt r1, #127
+; V6T2-NEXT: cmp r0, r1
+; V6T2-NEXT: movge r0, r1
+; V6T2-NEXT: bx lr
+entry:
+ %0 = call i32 @llvm.smax.i32(i32 %x, i32 -8388608)
+ %1 = call i32 @llvm.smin.i32(i32 %0, i32 8388607)
+ ret i32 %1
+}
+
+define i32 @mm_no_sat_incorrect_interval(i32 %x) {
+; V4T-LABEL: mm_no_sat_incorrect_interval:
+; V4T: @ %bb.0: @ %entry
+; V4T-NEXT: ldr r1, .LCPI27_0
+; V4T-NEXT: cmp r0, r1
+; V4T-NEXT: movle r0, r1
+; V4T-NEXT: ldr r1, .LCPI27_1
+; V4T-NEXT: cmp r0, r1
+; V4T-NEXT: movge r0, r1
+; V4T-NEXT: bx lr
+; V4T-NEXT: .p2align 2
+; V4T-NEXT: @ %bb.1:
+; V4T-NEXT: .LCPI27_0:
+; V4T-NEXT: .long 4275878552 @ 0xfedcba98
+; V4T-NEXT: .LCPI27_1:
+; V4T-NEXT: .long 8388607 @ 0x7fffff
+;
+; V6T2-LABEL: mm_no_sat_incorrect_interval:
+; V6T2: @ %bb.0: @ %entry
+; V6T2-NEXT: movw r1, #47768
+; V6T2-NEXT: movt r1, #65244
+; V6T2-NEXT: cmp r0, r1
+; V6T2-NEXT: movle r0, r1
+; V6T2-NEXT: movw r1, #65535
+; V6T2-NEXT: movt r1, #127
+; V6T2-NEXT: cmp r0, r1
+; V6T2-NEXT: movge r0, r1
+; V6T2-NEXT: bx lr
+entry:
+ %0 = call i32 @llvm.smax.i32(i32 %x, i32 -19088744)
+ %1 = call i32 @llvm.smin.i32(i32 %0, i32 8388607)
+ ret i32 %1
+}
+
+declare i32 @llvm.smin.i32(i32, i32)
+declare i32 @llvm.smax.i32(i32, i32)
+declare i16 @llvm.smin.i16(i16, i16)
+declare i16 @llvm.smax.i16(i16, i16)
+declare i8 @llvm.smin.i8(i8, i8)
+declare i8 @llvm.smax.i8(i8, i8)
+
+
diff --git a/llvm/test/CodeGen/ARM/usat.ll b/llvm/test/CodeGen/ARM/usat.ll
index 84de3c9a0ecae..077aa9de317d2 100644
--- a/llvm/test/CodeGen/ARM/usat.ll
+++ b/llvm/test/CodeGen/ARM/usat.ll
@@ -608,3 +608,427 @@ entry:
%saturateUp = select i1 %cmpUp, i32 8388607, i32 %saturateLow
ret i32 %saturateUp
}
+
+define i32 @mm_unsigned_sat_base_32bit(i32 %x) {
+; V4T-LABEL: mm_unsigned_sat_base_32bit:
+; V4T: @ %bb.0: @ %entry
+; V4T-NEXT: ldr r1, .LCPI15_0
+; V4T-NEXT: cmp r0, r1
+; V4T-NEXT: movlt r1, r0
+; V4T-NEXT: bic r0, r1, r1, asr #31
+; V4T-NEXT: bx lr
+; V4T-NEXT: .p2align 2
+; V4T-NEXT: @ %bb.1:
+; V4T-NEXT: .LCPI15_0:
+; V4T-NEXT: .long 8388607 @ 0x7fffff
+;
+; V6-LABEL: mm_unsigned_sat_base_32bit:
+; V6: @ %bb.0: @ %entry
+; V6-NEXT: ldr r1, .LCPI15_0
+; V6-NEXT: cmp r0, r1
+; V6-NEXT: movlt r1, r0
+; V6-NEXT: bic r0, r1, r1, asr #31
+; V6-NEXT: bx lr
+; V6-NEXT: .p2align 2
+; V6-NEXT: @ %bb.1:
+; V6-NEXT: .LCPI15_0:
+; V6-NEXT: .long 8388607 @ 0x7fffff
+;
+; V6T2-LABEL: mm_unsigned_sat_base_32bit:
+; V6T2: @ %bb.0: @ %entry
+; V6T2-NEXT: movw r1, #65535
+; V6T2-NEXT: movt r1, #127
+; V6T2-NEXT: cmp r0, r1
+; V6T2-NEXT: movlt r1, r0
+; V6T2-NEXT: bic r0, r1, r1, asr #31
+; V6T2-NEXT: bx lr
+entry:
+ %0 = call i32 @llvm.smin.i32(i32 %x, i32 8388607)
+ %1 = call i32 @llvm.smax.i32(i32 %0, i32 0)
+ ret i32 %1
+}
+
+define i16 @mm_unsigned_sat_base_16bit(i16 %x) {
+; V4T-LABEL: mm_unsigned_sat_base_16bit:
+; V4T: @ %bb.0: @ %entry
+; V4T-NEXT: mov r2, #255
+; V4T-NEXT: lsl r0, r0, #16
+; V4T-NEXT: orr r2, r2, #1792
+; V4T-NEXT: asr r1, r0, #16
+; V4T-NEXT: cmp r1, r2
+; V4T-NEXT: asrlt r2, r0, #16
+; V4T-NEXT: bic r0, r2, r2, asr #31
+; V4T-NEXT: bx lr
+;
+; V6-LABEL: mm_unsigned_sat_base_16bit:
+; V6: @ %bb.0: @ %entry
+; V6-NEXT: mov r1, #255
+; V6-NEXT: sxth r0, r0
+; V6-NEXT: orr r1, r1, #1792
+; V6-NEXT: cmp r0, r1
+; V6-NEXT: movlt r1, r0
+; V6-NEXT: bic r0, r1, r1, asr #31
+; V6-NEXT: bx lr
+;
+; V6T2-LABEL: mm_unsigned_sat_base_16bit:
+; V6T2: @ %bb.0: @ %entry
+; V6T2-NEXT: sxth r0, r0
+; V6T2-NEXT: movw r1, #2047
+; V6T2-NEXT: cmp r0, r1
+; V6T2-NEXT: movlt r1, r0
+; V6T2-NEXT: bic r0, r1, r1, asr #31
+; V6T2-NEXT: bx lr
+entry:
+ %0 = call i16 @llvm.smin.i16(i16 %x, i16 2047)
+ %1 = call i16 @llvm.smax.i16(i16 %0, i16 0)
+ ret i16 %1
+}
+
+define i8 @mm_unsigned_sat_base_8bit(i8 %x) {
+; V4T-LABEL: mm_unsigned_sat_base_8bit:
+; V4T: @ %bb.0: @ %entry
+; V4T-NEXT: lsl r0, r0, #24
+; V4T-NEXT: mov r2, #31
+; V4T-NEXT: asr r1, r0, #24
+; V4T-NEXT: cmp r1, #31
+; V4T-NEXT: asrlt r2, r0, #24
+; V4T-NEXT: bic r0, r2, r2, asr #31
+; V4T-NEXT: bx lr
+;
+; V6-LABEL: mm_unsigned_sat_base_8bit:
+; V6: @ %bb.0: @ %entry
+; V6-NEXT: sxtb r0, r0
+; V6-NEXT: cmp r0, #31
+; V6-NEXT: movge r0, #31
+; V6-NEXT: bic r0, r0, r0, asr #31
+; V6-NEXT: bx lr
+;
+; V6T2-LABEL: mm_unsigned_sat_base_8bit:
+; V6T2: @ %bb.0: @ %entry
+; V6T2-NEXT: sxtb r0, r0
+; V6T2-NEXT: cmp r0, #31
+; V6T2-NEXT: movge r0, #31
+; V6T2-NEXT: bic r0, r0, r0, asr #31
+; V6T2-NEXT: bx lr
+entry:
+ %0 = call i8 @llvm.smin.i8(i8 %x, i8 31)
+ %1 = call i8 @llvm.smax.i8(i8 %0, i8 0)
+ ret i8 %1
+}
+
+define i32 @mm_unsigned_sat_lower_upper_1(i32 %x) {
+; V4T-LABEL: mm_unsigned_sat_lower_upper_1:
+; V4T: @ %bb.0: @ %entry
+; V4T-NEXT: ldr r1, .LCPI18_0
+; V4T-NEXT: cmp r0, r1
+; V4T-NEXT: movlt r1, r0
+; V4T-NEXT: bic r0, r1, r1, asr #31
+; V4T-NEXT: bx lr
+; V4T-NEXT: .p2align 2
+; V4T-NEXT: @ %bb.1:
+; V4T-NEXT: .LCPI18_0:
+; V4T-NEXT: .long 8388607 @ 0x7fffff
+;
+; V6-LABEL: mm_unsigned_sat_lower_upper_1:
+; V6: @ %bb.0: @ %entry
+; V6-NEXT: ldr r1, .LCPI18_0
+; V6-NEXT: cmp r0, r1
+; V6-NEXT: movlt r1, r0
+; V6-NEXT: bic r0, r1, r1, asr #31
+; V6-NEXT: bx lr
+; V6-NEXT: .p2align 2
+; V6-NEXT: @ %bb.1:
+; V6-NEXT: .LCPI18_0:
+; V6-NEXT: .long 8388607 @ 0x7fffff
+;
+; V6T2-LABEL: mm_unsigned_sat_lower_upper_1:
+; V6T2: @ %bb.0: @ %entry
+; V6T2-NEXT: movw r1, #65535
+; V6T2-NEXT: movt r1, #127
+; V6T2-NEXT: cmp r0, r1
+; V6T2-NEXT: movlt r1, r0
+; V6T2-NEXT: bic r0, r1, r1, asr #31
+; V6T2-NEXT: bx lr
+entry:
+ %0 = call i32 @llvm.smin.i32(i32 %x, i32 8388607)
+ %1 = call i32 @llvm.smax.i32(i32 %0, i32 0)
+ ret i32 %1
+}
+
+define i32 @mm_unsigned_sat_lower_upper_2(i32 %x) {
+; V4T-LABEL: mm_unsigned_sat_lower_upper_2:
+; V4T: @ %bb.0: @ %entry
+; V4T-NEXT: ldr r1, .LCPI19_0
+; V4T-NEXT: cmp r0, r1
+; V4T-NEXT: movlt r1, r0
+; V4T-NEXT: bic r0, r1, r1, asr #31
+; V4T-NEXT: bx lr
+; V4T-NEXT: .p2align 2
+; V4T-NEXT: @ %bb.1:
+; V4T-NEXT: .LCPI19_0:
+; V4T-NEXT: .long 8388607 @ 0x7fffff
+;
+; V6-LABEL: mm_unsigned_sat_lower_upper_2:
+; V6: @ %bb.0: @ %entry
+; V6-NEXT: ldr r1, .LCPI19_0
+; V6-NEXT: cmp r0, r1
+; V6-NEXT: movlt r1, r0
+; V6-NEXT: bic r0, r1, r1, asr #31
+; V6-NEXT: bx lr
+; V6-NEXT: .p2align 2
+; V6-NEXT: @ %bb.1:
+; V6-NEXT: .LCPI19_0:
+; V6-NEXT: .long 8388607 @ 0x7fffff
+;
+; V6T2-LABEL: mm_unsigned_sat_lower_upper_2:
+; V6T2: @ %bb.0: @ %entry
+; V6T2-NEXT: movw r1, #65535
+; V6T2-NEXT: movt r1, #127
+; V6T2-NEXT: cmp r0, r1
+; V6T2-NEXT: movlt r1, r0
+; V6T2-NEXT: bic r0, r1, r1, asr #31
+; V6T2-NEXT: bx lr
+entry:
+ %0 = call i32 @llvm.smin.i32(i32 %x, i32 8388607)
+ %1 = call i32 @llvm.smax.i32(i32 %0, i32 0)
+ ret i32 %1
+}
+
+define i32 @mm_unsigned_sat_upper_lower_1(i32 %x) {
+; V4T-LABEL: mm_unsigned_sat_upper_lower_1:
+; V4T: @ %bb.0: @ %entry
+; V4T-NEXT: bic r1, r0, r0, asr #31
+; V4T-NEXT: ldr r0, .LCPI20_0
+; V4T-NEXT: cmp r1, r0
+; V4T-NEXT: movlo r0, r1
+; V4T-NEXT: bx lr
+; V4T-NEXT: .p2align 2
+; V4T-NEXT: @ %bb.1:
+; V4T-NEXT: .LCPI20_0:
+; V4T-NEXT: .long 8388607 @ 0x7fffff
+;
+; V6-LABEL: mm_unsigned_sat_upper_lower_1:
+; V6: @ %bb.0: @ %entry
+; V6-NEXT: bic r1, r0, r0, asr #31
+; V6-NEXT: ldr r0, .LCPI20_0
+; V6-NEXT: cmp r1, r0
+; V6-NEXT: movlo r0, r1
+; V6-NEXT: bx lr
+; V6-NEXT: .p2align 2
+; V6-NEXT: @ %bb.1:
+; V6-NEXT: .LCPI20_0:
+; V6-NEXT: .long 8388607 @ 0x7fffff
+;
+; V6T2-LABEL: mm_unsigned_sat_upper_lower_1:
+; V6T2: @ %bb.0: @ %entry
+; V6T2-NEXT: bic r1, r0, r0, asr #31
+; V6T2-NEXT: movw r0, #65535
+; V6T2-NEXT: movt r0, #127
+; V6T2-NEXT: cmp r1, r0
+; V6T2-NEXT: movlo r0, r1
+; V6T2-NEXT: bx lr
+entry:
+ %0 = call i32 @llvm.smax.i32(i32 %x, i32 0)
+ %1 = call i32 @llvm.umin.i32(i32 %0, i32 8388607)
+ ret i32 %1
+}
+
+define i32 @mm_unsigned_sat_upper_lower_2(i32 %x) {
+; V4T-LABEL: mm_unsigned_sat_upper_lower_2:
+; V4T: @ %bb.0: @ %entry
+; V4T-NEXT: bic r1, r0, r0, asr #31
+; V4T-NEXT: ldr r0, .LCPI21_0
+; V4T-NEXT: cmp r1, r0
+; V4T-NEXT: movlo r0, r1
+; V4T-NEXT: bx lr
+; V4T-NEXT: .p2align 2
+; V4T-NEXT: @ %bb.1:
+; V4T-NEXT: .LCPI21_0:
+; V4T-NEXT: .long 8388607 @ 0x7fffff
+;
+; V6-LABEL: mm_unsigned_sat_upper_lower_2:
+; V6: @ %bb.0: @ %entry
+; V6-NEXT: bic r1, r0, r0, asr #31
+; V6-NEXT: ldr r0, .LCPI21_0
+; V6-NEXT: cmp r1, r0
+; V6-NEXT: movlo r0, r1
+; V6-NEXT: bx lr
+; V6-NEXT: .p2align 2
+; V6-NEXT: @ %bb.1:
+; V6-NEXT: .LCPI21_0:
+; V6-NEXT: .long 8388607 @ 0x7fffff
+;
+; V6T2-LABEL: mm_unsigned_sat_upper_lower_2:
+; V6T2: @ %bb.0: @ %entry
+; V6T2-NEXT: bic r1, r0, r0, asr #31
+; V6T2-NEXT: movw r0, #65535
+; V6T2-NEXT: movt r0, #127
+; V6T2-NEXT: cmp r1, r0
+; V6T2-NEXT: movlo r0, r1
+; V6T2-NEXT: bx lr
+entry:
+ %0 = call i32 @llvm.smax.i32(i32 %x, i32 0)
+ %1 = call i32 @llvm.umin.i32(i32 %0, i32 8388607)
+ ret i32 %1
+}
+
+define i32 @mm_unsigned_sat_upper_lower_3(i32 %x) {
+; V4T-LABEL: mm_unsigned_sat_upper_lower_3:
+; V4T: @ %bb.0: @ %entry
+; V4T-NEXT: bic r1, r0, r0, asr #31
+; V4T-NEXT: ldr r0, .LCPI22_0
+; V4T-NEXT: cmp r1, r0
+; V4T-NEXT: movlo r0, r1
+; V4T-NEXT: bx lr
+; V4T-NEXT: .p2align 2
+; V4T-NEXT: @ %bb.1:
+; V4T-NEXT: .LCPI22_0:
+; V4T-NEXT: .long 8388607 @ 0x7fffff
+;
+; V6-LABEL: mm_unsigned_sat_upper_lower_3:
+; V6: @ %bb.0: @ %entry
+; V6-NEXT: bic r1, r0, r0, asr #31
+; V6-NEXT: ldr r0, .LCPI22_0
+; V6-NEXT: cmp r1, r0
+; V6-NEXT: movlo r0, r1
+; V6-NEXT: bx lr
+; V6-NEXT: .p2align 2
+; V6-NEXT: @ %bb.1:
+; V6-NEXT: .LCPI22_0:
+; V6-NEXT: .long 8388607 @ 0x7fffff
+;
+; V6T2-LABEL: mm_unsigned_sat_upper_lower_3:
+; V6T2: @ %bb.0: @ %entry
+; V6T2-NEXT: bic r1, r0, r0, asr #31
+; V6T2-NEXT: movw r0, #65535
+; V6T2-NEXT: movt r0, #127
+; V6T2-NEXT: cmp r1, r0
+; V6T2-NEXT: movlo r0, r1
+; V6T2-NEXT: bx lr
+entry:
+ %0 = call i32 @llvm.smax.i32(i32 %x, i32 0)
+ %1 = call i32 @llvm.umin.i32(i32 %0, i32 8388607)
+ ret i32 %1
+}
+
+define i32 @mm_no_unsigned_sat_incorrect_constant(i32 %x) {
+; V4T-LABEL: mm_no_unsigned_sat_incorrect_constant:
+; V4T: @ %bb.0: @ %entry
+; V4T-NEXT: orr r1, r0, r0, asr #31
+; V4T-NEXT: ldr r0, .LCPI23_0
+; V4T-NEXT: cmp r1, r0
+; V4T-NEXT: movlt r0, r1
+; V4T-NEXT: bx lr
+; V4T-NEXT: .p2align 2
+; V4T-NEXT: @ %bb.1:
+; V4T-NEXT: .LCPI23_0:
+; V4T-NEXT: .long 8388607 @ 0x7fffff
+;
+; V6-LABEL: mm_no_unsigned_sat_incorrect_constant:
+; V6: @ %bb.0: @ %entry
+; V6-NEXT: orr r1, r0, r0, asr #31
+; V6-NEXT: ldr r0, .LCPI23_0
+; V6-NEXT: cmp r1, r0
+; V6-NEXT: movlt r0, r1
+; V6-NEXT: bx lr
+; V6-NEXT: .p2align 2
+; V6-NEXT: @ %bb.1:
+; V6-NEXT: .LCPI23_0:
+; V6-NEXT: .long 8388607 @ 0x7fffff
+;
+; V6T2-LABEL: mm_no_unsigned_sat_incorrect_constant:
+; V6T2: @ %bb.0: @ %entry
+; V6T2-NEXT: orr r1, r0, r0, asr #31
+; V6T2-NEXT: movw r0, #65535
+; V6T2-NEXT: movt r0, #127
+; V6T2-NEXT: cmp r1, r0
+; V6T2-NEXT: movlt r0, r1
+; V6T2-NEXT: bx lr
+entry:
+ %0 = call i32 @llvm.smax.i32(i32 %x, i32 -1)
+ %1 = call i32 @llvm.smin.i32(i32 %0, i32 8388607)
+ ret i32 %1
+}
+
+define i32 @mm_no_unsigned_sat_incorrect_constant2(i32 %x) {
+; V4T-LABEL: mm_no_unsigned_sat_incorrect_constant2:
+; V4T: @ %bb.0: @ %entry
+; V4T-NEXT: bic r1, r0, r0, asr #31
+; V4T-NEXT: mov r0, #1
+; V4T-NEXT: orr r0, r0, #8388608
+; V4T-NEXT: cmp r1, #8388608
+; V4T-NEXT: movls r0, r1
+; V4T-NEXT: bx lr
+;
+; V6-LABEL: mm_no_unsigned_sat_incorrect_constant2:
+; V6: @ %bb.0: @ %entry
+; V6-NEXT: bic r1, r0, r0, asr #31
+; V6-NEXT: mov r0, #1
+; V6-NEXT: orr r0, r0, #8388608
+; V6-NEXT: cmp r1, #8388608
+; V6-NEXT: movls r0, r1
+; V6-NEXT: bx lr
+;
+; V6T2-LABEL: mm_no_unsigned_sat_incorrect_constant2:
+; V6T2: @ %bb.0: @ %entry
+; V6T2-NEXT: bic r1, r0, r0, asr #31
+; V6T2-NEXT: movw r0, #1
+; V6T2-NEXT: movt r0, #128
+; V6T2-NEXT: cmp r1, #8388608
+; V6T2-NEXT: movls r0, r1
+; V6T2-NEXT: bx lr
+entry:
+ %0 = call i32 @llvm.smax.i32(i32 %x, i32 0)
+ %1 = call i32 @llvm.umin.i32(i32 %0, i32 8388609)
+ ret i32 %1
+}
+
+define i32 @mm_no_unsigned_sat_incorrect_interval(i32 %x) {
+; V4T-LABEL: mm_no_unsigned_sat_incorrect_interval:
+; V4T: @ %bb.0: @ %entry
+; V4T-NEXT: ldr r1, .LCPI25_0
+; V4T-NEXT: cmn r0, #4
+; V4T-NEXT: mvnle r0, #3
+; V4T-NEXT: cmp r0, r1
+; V4T-NEXT: movge r0, r1
+; V4T-NEXT: bx lr
+; V4T-NEXT: .p2align 2
+; V4T-NEXT: @ %bb.1:
+; V4T-NEXT: .LCPI25_0:
+; V4T-NEXT: .long 8388607 @ 0x7fffff
+;
+; V6-LABEL: mm_no_unsigned_sat_incorrect_interval:
+; V6: @ %bb.0: @ %entry
+; V6-NEXT: ldr r1, .LCPI25_0
+; V6-NEXT: cmn r0, #4
+; V6-NEXT: mvnle r0, #3
+; V6-NEXT: cmp r0, r1
+; V6-NEXT: movge r0, r1
+; V6-NEXT: bx lr
+; V6-NEXT: .p2align 2
+; V6-NEXT: @ %bb.1:
+; V6-NEXT: .LCPI25_0:
+; V6-NEXT: .long 8388607 @ 0x7fffff
+;
+; V6T2-LABEL: mm_no_unsigned_sat_incorrect_interval:
+; V6T2: @ %bb.0: @ %entry
+; V6T2-NEXT: cmn r0, #4
+; V6T2-NEXT: movw r1, #65535
+; V6T2-NEXT: mvnle r0, #3
+; V6T2-NEXT: movt r1, #127
+; V6T2-NEXT: cmp r0, r1
+; V6T2-NEXT: movge r0, r1
+; V6T2-NEXT: bx lr
+entry:
+ %0 = call i32 @llvm.smax.i32(i32 %x, i32 -4)
+ %1 = call i32 @llvm.smin.i32(i32 %0, i32 8388607)
+ ret i32 %1
+}
+
+declare i32 @llvm.smin.i32(i32, i32)
+declare i32 @llvm.smax.i32(i32, i32)
+declare i16 @llvm.smin.i16(i16, i16)
+declare i16 @llvm.smax.i16(i16, i16)
+declare i8 @llvm.smin.i8(i8, i8)
+declare i8 @llvm.smax.i8(i8, i8)
+declare i32 @llvm.umin.i32(i32, i32)
More information about the llvm-commits
mailing list