[llvm] 0f68597 - Pre-commit test cases for DAG combine folds of SSHLSAT/USHLSAT -> SHL

Sun Feb 6 10:00:28 PST 2022

Author: Bjorn Pettersson
Date: 2022-02-06T18:59:06+01:00
New Revision: 0f685976057e2ea27b35b609f4d1653d24ab6694

URL: https://github.com/llvm/llvm-project/commit/0f685976057e2ea27b35b609f4d1653d24ab6694
DIFF: https://github.com/llvm/llvm-project/commit/0f685976057e2ea27b35b609f4d1653d24ab6694.diff

LOG: Pre-commit test cases for DAG combine folds of SSHLSAT/USHLSAT -> SHL

It should be possible to replace SSHLSAT and USHLSAT with SHL when
it is known that no saturation will take place (e.g. by analysing
known sign bits in the first shift operand).

Differential Revision: https://reviews.llvm.org/D118764

Added: 
    

Modified: 
    llvm/test/CodeGen/AArch64/sshl_sat.ll
    llvm/test/CodeGen/AArch64/ushl_sat.ll

Removed: 
    


################################################################################
diff  --git a/llvm/test/CodeGen/AArch64/sshl_sat.ll b/llvm/test/CodeGen/AArch64/sshl_sat.ll
index a63cd3241a0eb..9cebbed06c68f 100644

--- a/llvm/test/CodeGen/AArch64/sshl_sat.ll
+++ b/llvm/test/CodeGen/AArch64/sshl_sat.ll
@@ -126,3 +126,93 @@ define void @combine_shlsat_vector() nounwind {
   call void @sink4xi16(i16 %e0, i16 %e1, i16 %e2, i16 %e3)
   ret void
 }
+
+; Fold shlsat -> shl, if known not to saturate.
+define i16 @combine_shlsat_to_shl(i16 %x) nounwind {
+; CHECK-LABEL: combine_shlsat_to_shl:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sxth w8, w0
+; CHECK-NEXT:    mov w9, #-65536
+; CHECK-NEXT:    mov w10, #-2147483648
+; CHECK-NEXT:    ands w8, w9, w8, lsl #14
+; CHECK-NEXT:    lsl w9, w8, #2
+; CHECK-NEXT:    cinv w10, w10, ge
+; CHECK-NEXT:    cmp w8, w9, asr #2
+; CHECK-NEXT:    csel w8, w10, w9, ne
+; CHECK-NEXT:    asr w0, w8, #16
+; CHECK-NEXT:    ret
+  %x2 = ashr i16 %x, 2
+  %tmp = call i16 @llvm.sshl.sat.i16(i16 %x2, i16 2)
+  ret i16 %tmp
+}
+
+; Do not fold shlsat -> shl.
+define i16 @combine_shlsat_to_shl_no_fold(i16 %x) nounwind {
+; CHECK-LABEL: combine_shlsat_to_shl_no_fold:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sxth w8, w0
+; CHECK-NEXT:    mov w9, #-65536
+; CHECK-NEXT:    mov w10, #-2147483648
+; CHECK-NEXT:    ands w8, w9, w8, lsl #14
+; CHECK-NEXT:    lsl w9, w8, #3
+; CHECK-NEXT:    cinv w10, w10, ge
+; CHECK-NEXT:    cmp w8, w9, asr #3
+; CHECK-NEXT:    csel w8, w10, w9, ne
+; CHECK-NEXT:    asr w0, w8, #16
+; CHECK-NEXT:    ret
+  %x2 = ashr i16 %x, 2
+  %tmp = call i16 @llvm.sshl.sat.i16(i16 %x2, i16 3)
+  ret i16 %tmp
+}
+
+; Fold shlsat -> shl, if known not to saturate.
+define <4 x i16> @combine_shlsat_to_shl_vec(<4 x i8> %a) nounwind {
+; CHECK-LABEL: combine_shlsat_to_shl_vec:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    shl v0.4h, v0.4h, #8
+; CHECK-NEXT:    mov w8, #-2147483648
+; CHECK-NEXT:    sshr v1.4h, v0.4h, #8
+; CHECK-NEXT:    umov w9, v1.h[0]
+; CHECK-NEXT:    umov w10, v1.h[1]
+; CHECK-NEXT:    umov w12, v1.h[2]
+; CHECK-NEXT:    lsl w11, w9, #16
+; CHECK-NEXT:    lsl w9, w9, #23
+; CHECK-NEXT:    cmp w11, #0
+; CHECK-NEXT:    cinv w13, w8, ge
+; CHECK-NEXT:    cmp w11, w9, asr #7
+; CHECK-NEXT:    lsl w11, w10, #16
+; CHECK-NEXT:    lsl w10, w10, #23
+; CHECK-NEXT:    csel w9, w13, w9, ne
+; CHECK-NEXT:    cmp w11, #0
+; CHECK-NEXT:    asr w9, w9, #16
+; CHECK-NEXT:    cinv w13, w8, ge
+; CHECK-NEXT:    cmp w11, w10, asr #7
+; CHECK-NEXT:    lsl w11, w12, #16
+; CHECK-NEXT:    csel w10, w13, w10, ne
+; CHECK-NEXT:    lsl w12, w12, #23
+; CHECK-NEXT:    asr w10, w10, #16
+; CHECK-NEXT:    fmov s0, w9
+; CHECK-NEXT:    umov w9, v1.h[3]
+; CHECK-NEXT:    cmp w11, #0
+; CHECK-NEXT:    cinv w13, w8, ge
+; CHECK-NEXT:    cmp w11, w12, asr #7
+; CHECK-NEXT:    csel w11, w13, w12, ne
+; CHECK-NEXT:    mov v0.h[1], w10
+; CHECK-NEXT:    asr w10, w11, #16
+; CHECK-NEXT:    lsl w11, w9, #16
+; CHECK-NEXT:    lsl w9, w9, #23
+; CHECK-NEXT:    cmp w11, #0
+; CHECK-NEXT:    cinv w8, w8, ge
+; CHECK-NEXT:    cmp w11, w9, asr #7
+; CHECK-NEXT:    mov v0.h[2], w10
+; CHECK-NEXT:    csel w8, w8, w9, ne
+; CHECK-NEXT:    asr w8, w8, #16
+; CHECK-NEXT:    mov v0.h[3], w8
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
+; CHECK-NEXT:    ret
+  %sext = sext <4 x i8> %a to <4 x i16>
+  %tmp = call <4 x i16> @llvm.sshl.sat.v4i16(
+                          <4 x i16> %sext,
+                          <4 x i16> <i16 7, i16 7, i16 7, i16 7>)
+  ret <4 x i16> %tmp
+}

diff  --git a/llvm/test/CodeGen/AArch64/ushl_sat.ll b/llvm/test/CodeGen/AArch64/ushl_sat.ll
index f6ccd09374abd..2701494b8e394 100644
--- a/llvm/test/CodeGen/AArch64/ushl_sat.ll
+++ b/llvm/test/CodeGen/AArch64/ushl_sat.ll
@@ -112,3 +112,64 @@ define void @combine_shlsat_vector() nounwind {
   call void @sink2xi16(i16 %e0, i16 %e1)
   ret void
 }
+
+; Fold shlsat -> shl, if known not to saturate.
+define i16 @combine_shlsat_to_shl(i16 %x) nounwind {
+; CHECK-LABEL: combine_shlsat_to_shl:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    and w8, w0, #0xfffc
+; CHECK-NEXT:    lsl w9, w8, #14
+; CHECK-NEXT:    lsl w8, w8, #16
+; CHECK-NEXT:    cmp w9, w9
+; CHECK-NEXT:    csinv w8, w8, wzr, eq
+; CHECK-NEXT:    lsr w0, w8, #16
+; CHECK-NEXT:    ret
+  %x2 = lshr i16 %x, 2
+  %tmp = call i16 @llvm.ushl.sat.i16(i16 %x2, i16 2)
+  ret i16 %tmp
+}
+
+; Do not fold shlsat -> shl.
+define i16 @combine_shlsat_to_shl_no_fold(i16 %x) nounwind {
+; CHECK-LABEL: combine_shlsat_to_shl_no_fold:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    and w8, w0, #0xfffc
+; CHECK-NEXT:    lsl w9, w8, #14
+; CHECK-NEXT:    lsl w8, w8, #17
+; CHECK-NEXT:    and w10, w9, #0x1fff0000
+; CHECK-NEXT:    cmp w9, w10
+; CHECK-NEXT:    csinv w8, w8, wzr, eq
+; CHECK-NEXT:    lsr w0, w8, #16
+; CHECK-NEXT:    ret
+  %x2 = lshr i16 %x, 2
+  %tmp = call i16 @llvm.ushl.sat.i16(i16 %x2, i16 3)
+  ret i16 %tmp
+}
+
+; Fold shlsat -> shl, if known not to saturate.
+define <2 x i16> @combine_shlsat_to_shl_vec(<2 x i8> %a) nounwind {
+; CHECK-LABEL: combine_shlsat_to_shl_vec:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    movi d1, #0x0000ff000000ff
+; CHECK-NEXT:    and v0.8b, v0.8b, v1.8b
+; CHECK-NEXT:    shl v0.2s, v0.2s, #16
+; CHECK-NEXT:    mov w8, v0.s[1]
+; CHECK-NEXT:    fmov w9, s0
+; CHECK-NEXT:    and w10, w8, #0xffffff
+; CHECK-NEXT:    lsl w11, w8, #8
+; CHECK-NEXT:    cmp w8, w10
+; CHECK-NEXT:    and w8, w9, #0xffffff
+; CHECK-NEXT:    csinv w10, w11, wzr, eq
+; CHECK-NEXT:    lsl w11, w9, #8
+; CHECK-NEXT:    cmp w9, w8
+; CHECK-NEXT:    csinv w8, w11, wzr, eq
+; CHECK-NEXT:    fmov s0, w8
+; CHECK-NEXT:    mov v0.s[1], w10
+; CHECK-NEXT:    ushr v0.2s, v0.2s, #16
+; CHECK-NEXT:    ret
+  %ext = zext <2 x i8> %a to <2 x i16>
+  %tmp = call <2 x i16> @llvm.ushl.sat.v2i16(
+                          <2 x i16> %ext,
+                          <2 x i16> <i16 8, i16 8>)
+  ret <2 x i16> %tmp
+}