[llvm] r367189 - [NFC][InstCombine] Shift amount reassociation: can have trunc between shl's

Sun Jul 28 06:13:47 PDT 2019

Author: lebedevri
Date: Sun Jul 28 06:13:46 2019
New Revision: 367189

URL: http://llvm.org/viewvc/llvm-project?rev=367189&view=rev
Log:
[NFC][InstCombine] Shift amount reassociation: can have trunc between shl's

https://rise4fun.com/Alive/OQbM
Not so simple for lshr/ashr, so those maybe later.

https://bugs.llvm.org/show_bug.cgi?id=42391

Added:
    llvm/trunk/test/Transforms/InstCombine/shift-amount-reassociation-with-truncation-shl.ll

Added: llvm/trunk/test/Transforms/InstCombine/shift-amount-reassociation-with-truncation-shl.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/InstCombine/shift-amount-reassociation-with-truncation-shl.ll?rev=367189&view=auto
==============================================================================

--- llvm/trunk/test/Transforms/InstCombine/shift-amount-reassociation-with-truncation-shl.ll (added)
+++ llvm/trunk/test/Transforms/InstCombine/shift-amount-reassociation-with-truncation-shl.ll Sun Jul 28 06:13:46 2019
@@ -0,0 +1,289 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt %s -instcombine -S | FileCheck %s
+
+; Given pattern:
+;   (trunc (x << Q) to iDst) << K
+; we should rewrite it as
+;   (trunc (x << (Q+K)) to iDst)  iff (Q+K) u< iDst
+; This is only valid for shl.
+; THIS FOLD DOES *NOT* REQUIRE ANY 'nuw'/`nsw` FLAGS!
+
+; Basic scalar test
+
+define i16 @t0(i32 %x, i16 %y) {
+; CHECK-LABEL: @t0(
+; CHECK-NEXT:    [[T0:%.*]] = sub i16 32, [[Y:%.*]]
+; CHECK-NEXT:    [[T1:%.*]] = zext i16 [[T0]] to i32
+; CHECK-NEXT:    [[T2:%.*]] = shl i32 [[X:%.*]], [[T1]]
+; CHECK-NEXT:    [[T3:%.*]] = trunc i32 [[T2]] to i16
+; CHECK-NEXT:    [[T4:%.*]] = add i16 [[Y]], -2
+; CHECK-NEXT:    [[T5:%.*]] = shl i16 [[T3]], [[T4]]
+; CHECK-NEXT:    ret i16 [[T5]]
+;
+  %t0 = sub i16 32, %y
+  %t1 = zext i16 %t0 to i32
+  %t2 = shl i32 %x, %t1
+  %t3 = trunc i32 %t2 to i16
+  %t4 = add i16 %y, -2
+  %t5 = shl i16 %t3, %t4
+  ret i16 %t5
+}
+
+define <2 x i16> @t1_vec_splat(<2 x i32> %x, <2 x i16> %y) {
+; CHECK-LABEL: @t1_vec_splat(
+; CHECK-NEXT:    [[T0:%.*]] = sub <2 x i16> <i16 32, i16 32>, [[Y:%.*]]
+; CHECK-NEXT:    [[T1:%.*]] = zext <2 x i16> [[T0]] to <2 x i32>
+; CHECK-NEXT:    [[T2:%.*]] = shl <2 x i32> [[X:%.*]], [[T1]]
+; CHECK-NEXT:    [[T3:%.*]] = trunc <2 x i32> [[T2]] to <2 x i16>
+; CHECK-NEXT:    [[T4:%.*]] = add <2 x i16> [[Y]], <i16 -2, i16 -2>
+; CHECK-NEXT:    [[T5:%.*]] = shl <2 x i16> [[T3]], [[T4]]
+; CHECK-NEXT:    ret <2 x i16> [[T5]]
+;
+  %t0 = sub <2 x i16> <i16 32, i16 32>, %y
+  %t1 = zext <2 x i16> %t0 to <2 x i32>
+  %t2 = shl <2 x i32> %x, %t1
+  %t3 = trunc <2 x i32> %t2 to <2 x i16>
+  %t4 = add <2 x i16> %y, <i16 -2, i16 -2>
+  %t5 = shl <2 x i16> %t3, %t4
+  ret <2 x i16> %t5
+}
+
+define <2 x i16> @t2_vec_nonsplat(<2 x i32> %x, <2 x i16> %y) {
+; CHECK-LABEL: @t2_vec_nonsplat(
+; CHECK-NEXT:    [[T0:%.*]] = sub <2 x i16> <i16 32, i16 30>, [[Y:%.*]]
+; CHECK-NEXT:    [[T1:%.*]] = zext <2 x i16> [[T0]] to <2 x i32>
+; CHECK-NEXT:    [[T2:%.*]] = shl <2 x i32> [[X:%.*]], [[T1]]
+; CHECK-NEXT:    [[T3:%.*]] = trunc <2 x i32> [[T2]] to <2 x i16>
+; CHECK-NEXT:    [[T4:%.*]] = add <2 x i16> [[Y]], <i16 -2, i16 0>
+; CHECK-NEXT:    [[T5:%.*]] = shl <2 x i16> [[T3]], [[T4]]
+; CHECK-NEXT:    ret <2 x i16> [[T5]]
+;
+  %t0 = sub <2 x i16> <i16 32, i16 30>, %y
+  %t1 = zext <2 x i16> %t0 to <2 x i32>
+  %t2 = shl <2 x i32> %x, %t1
+  %t3 = trunc <2 x i32> %t2 to <2 x i16>
+  %t4 = add <2 x i16> %y, <i16 -2, i16 0>
+  %t5 = shl <2 x i16> %t3, %t4
+  ret <2 x i16> %t5
+}
+
+; Basic vector tests
+
+define <3 x i16> @t3_vec_nonsplat_undef0(<3 x i32> %x, <3 x i16> %y) {
+; CHECK-LABEL: @t3_vec_nonsplat_undef0(
+; CHECK-NEXT:    [[T0:%.*]] = sub <3 x i16> <i16 32, i16 undef, i16 32>, [[Y:%.*]]
+; CHECK-NEXT:    [[T1:%.*]] = zext <3 x i16> [[T0]] to <3 x i32>
+; CHECK-NEXT:    [[T2:%.*]] = shl <3 x i32> [[X:%.*]], [[T1]]
+; CHECK-NEXT:    [[T3:%.*]] = trunc <3 x i32> [[T2]] to <3 x i16>
+; CHECK-NEXT:    [[T4:%.*]] = add <3 x i16> [[Y]], <i16 -2, i16 -2, i16 -2>
+; CHECK-NEXT:    [[T5:%.*]] = shl <3 x i16> [[T3]], [[T4]]
+; CHECK-NEXT:    ret <3 x i16> [[T5]]
+;
+  %t0 = sub <3 x i16> <i16 32, i16 undef, i16 32>, %y
+  %t1 = zext <3 x i16> %t0 to <3 x i32>
+  %t2 = shl <3 x i32> %x, %t1
+  %t3 = trunc <3 x i32> %t2 to <3 x i16>
+  %t4 = add <3 x i16> %y, <i16 -2, i16 -2, i16 -2>
+  %t5 = shl <3 x i16> %t3, %t4
+  ret <3 x i16> %t5
+}
+
+define <3 x i16> @t4_vec_nonsplat_undef1(<3 x i32> %x, <3 x i16> %y) {
+; CHECK-LABEL: @t4_vec_nonsplat_undef1(
+; CHECK-NEXT:    [[T0:%.*]] = sub <3 x i16> <i16 32, i16 32, i16 32>, [[Y:%.*]]
+; CHECK-NEXT:    [[T1:%.*]] = zext <3 x i16> [[T0]] to <3 x i32>
+; CHECK-NEXT:    [[T2:%.*]] = shl <3 x i32> [[X:%.*]], [[T1]]
+; CHECK-NEXT:    [[T3:%.*]] = trunc <3 x i32> [[T2]] to <3 x i16>
+; CHECK-NEXT:    [[T4:%.*]] = add <3 x i16> [[Y]], <i16 -2, i16 undef, i16 -2>
+; CHECK-NEXT:    [[T5:%.*]] = shl <3 x i16> [[T3]], [[T4]]
+; CHECK-NEXT:    ret <3 x i16> [[T5]]
+;
+  %t0 = sub <3 x i16> <i16 32, i16 32, i16 32>, %y
+  %t1 = zext <3 x i16> %t0 to <3 x i32>
+  %t2 = shl <3 x i32> %x, %t1
+  %t3 = trunc <3 x i32> %t2 to <3 x i16>
+  %t4 = add <3 x i16> %y, <i16 -2, i16 undef, i16 -2>
+  %t5 = shl <3 x i16> %t3, %t4
+  ret <3 x i16> %t5
+}
+
+define <3 x i16> @t5_vec_nonsplat_undef1(<3 x i32> %x, <3 x i16> %y) {
+; CHECK-LABEL: @t5_vec_nonsplat_undef1(
+; CHECK-NEXT:    [[T0:%.*]] = sub <3 x i16> <i16 32, i16 undef, i16 32>, [[Y:%.*]]
+; CHECK-NEXT:    [[T1:%.*]] = zext <3 x i16> [[T0]] to <3 x i32>
+; CHECK-NEXT:    [[T2:%.*]] = shl <3 x i32> [[X:%.*]], [[T1]]
+; CHECK-NEXT:    [[T3:%.*]] = trunc <3 x i32> [[T2]] to <3 x i16>
+; CHECK-NEXT:    [[T4:%.*]] = add <3 x i16> [[Y]], <i16 -2, i16 undef, i16 -2>
+; CHECK-NEXT:    [[T5:%.*]] = shl <3 x i16> [[T3]], [[T4]]
+; CHECK-NEXT:    ret <3 x i16> [[T5]]
+;
+  %t0 = sub <3 x i16> <i16 32, i16 undef, i16 32>, %y
+  %t1 = zext <3 x i16> %t0 to <3 x i32>
+  %t2 = shl <3 x i32> %x, %t1
+  %t3 = trunc <3 x i32> %t2 to <3 x i16>
+  %t4 = add <3 x i16> %y, <i16 -2, i16 undef, i16 -2>
+  %t5 = shl <3 x i16> %t3, %t4
+  ret <3 x i16> %t5
+}
+
+; One-use tests
+
+declare void @use16(i16)
+declare void @use32(i32)
+
+define i16 @t6_extrause0(i32 %x, i16 %y) {
+; CHECK-LABEL: @t6_extrause0(
+; CHECK-NEXT:    [[T0:%.*]] = sub i16 32, [[Y:%.*]]
+; CHECK-NEXT:    [[T1:%.*]] = zext i16 [[T0]] to i32
+; CHECK-NEXT:    [[T2:%.*]] = shl i32 [[X:%.*]], [[T1]]
+; CHECK-NEXT:    call void @use32(i32 [[T2]])
+; CHECK-NEXT:    [[T3:%.*]] = trunc i32 [[T2]] to i16
+; CHECK-NEXT:    [[T4:%.*]] = add i16 [[Y]], -2
+; CHECK-NEXT:    [[T5:%.*]] = shl i16 [[T3]], [[T4]]
+; CHECK-NEXT:    ret i16 [[T5]]
+;
+  %t0 = sub i16 32, %y
+  %t1 = zext i16 %t0 to i32
+  %t2 = shl i32 %x, %t1
+  call void @use32(i32 %t2)
+  %t3 = trunc i32 %t2 to i16
+  %t4 = add i16 %y, -2
+  %t5 = shl i16 %t3, %t4
+  ret i16 %t5
+}
+
+define i16 @t7_extrause1(i32 %x, i16 %y) {
+; CHECK-LABEL: @t7_extrause1(
+; CHECK-NEXT:    [[T0:%.*]] = sub i16 32, [[Y:%.*]]
+; CHECK-NEXT:    [[T1:%.*]] = zext i16 [[T0]] to i32
+; CHECK-NEXT:    [[T2:%.*]] = shl i32 [[X:%.*]], [[T1]]
+; CHECK-NEXT:    [[T3:%.*]] = trunc i32 [[T2]] to i16
+; CHECK-NEXT:    call void @use16(i16 [[T3]])
+; CHECK-NEXT:    [[T4:%.*]] = add i16 [[Y]], -2
+; CHECK-NEXT:    [[T5:%.*]] = shl i16 [[T3]], [[T4]]
+; CHECK-NEXT:    ret i16 [[T5]]
+;
+  %t0 = sub i16 32, %y
+  %t1 = zext i16 %t0 to i32
+  %t2 = shl i32 %x, %t1
+  %t3 = trunc i32 %t2 to i16
+  call void @use16(i16 %t3)
+  %t4 = add i16 %y, -2
+  %t5 = shl i16 %t3, %t4
+  ret i16 %t5
+}
+
+define i16 @t8_extrause2(i32 %x, i16 %y) {
+; CHECK-LABEL: @t8_extrause2(
+; CHECK-NEXT:    [[T0:%.*]] = sub i16 32, [[Y:%.*]]
+; CHECK-NEXT:    [[T1:%.*]] = zext i16 [[T0]] to i32
+; CHECK-NEXT:    [[T2:%.*]] = shl i32 [[X:%.*]], [[T1]]
+; CHECK-NEXT:    call void @use32(i32 [[T2]])
+; CHECK-NEXT:    [[T3:%.*]] = trunc i32 [[T2]] to i16
+; CHECK-NEXT:    call void @use16(i16 [[T3]])
+; CHECK-NEXT:    [[T4:%.*]] = add i16 [[Y]], -2
+; CHECK-NEXT:    [[T5:%.*]] = shl i16 [[T3]], [[T4]]
+; CHECK-NEXT:    ret i16 [[T5]]
+;
+  %t0 = sub i16 32, %y
+  %t1 = zext i16 %t0 to i32
+  %t2 = shl i32 %x, %t1
+  call void @use32(i32 %t2)
+  %t3 = trunc i32 %t2 to i16
+  call void @use16(i16 %t3)
+  %t4 = add i16 %y, -2
+  %t5 = shl i16 %t3, %t4
+  ret i16 %t5
+}
+
+; Special test
+
+; New shift amount is less than bitwidth after truncation, so we could pre-truncated.
+define i16 @t9_pretrunc(i32 %x, i16 %y) {
+; CHECK-LABEL: @t9_pretrunc(
+; CHECK-NEXT:    [[T0:%.*]] = sub i16 32, [[Y:%.*]]
+; CHECK-NEXT:    [[T1:%.*]] = zext i16 [[T0]] to i32
+; CHECK-NEXT:    [[T2:%.*]] = shl i32 [[X:%.*]], [[T1]]
+; CHECK-NEXT:    [[T3:%.*]] = trunc i32 [[T2]] to i16
+; CHECK-NEXT:    ret i16 [[T3]]
+;
+  %t0 = sub i16 32, %y
+  %t1 = zext i16 %t0 to i32
+  %t2 = shl i32 %x, %t1
+  %t3 = trunc i32 %t2 to i16
+  %t4 = add i16 %y, -18
+  %t5 = shl i16 %t3, %t4
+  ret i16 %t3
+}
+
+; No 'nuw'/'nsw' flags are to be propagated!
+define i16 @t10_no_flags(i32 %x, i16 %y) {
+; CHECK-LABEL: @t10_no_flags(
+; CHECK-NEXT:    [[T0:%.*]] = sub i16 32, [[Y:%.*]]
+; CHECK-NEXT:    [[T1:%.*]] = zext i16 [[T0]] to i32
+; CHECK-NEXT:    [[T2:%.*]] = shl nuw nsw i32 [[X:%.*]], [[T1]]
+; CHECK-NEXT:    [[T3:%.*]] = trunc i32 [[T2]] to i16
+; CHECK-NEXT:    ret i16 [[T3]]
+;
+  %t0 = sub i16 32, %y
+  %t1 = zext i16 %t0 to i32
+  %t2 = shl nuw nsw i32 %x, %t1
+  %t3 = trunc i32 %t2 to i16
+  %t4 = add i16 %y, -2
+  %t5 = shl nuw nsw i16 %t3, %t4
+  ret i16 %t3
+}
+
+; Negative tests
+
+; As-is this is not correct for other shift opcodes.
+define i16 @t11_shl(i32 %x, i16 %y) {
+; CHECK-LABEL: @t11_shl(
+; CHECK-NEXT:    [[T0:%.*]] = sub i16 32, [[Y:%.*]]
+; CHECK-NEXT:    [[T1:%.*]] = zext i16 [[T0]] to i32
+; CHECK-NEXT:    [[T2:%.*]] = lshr i32 [[X:%.*]], [[T1]]
+; CHECK-NEXT:    [[T3:%.*]] = trunc i32 [[T2]] to i16
+; CHECK-NEXT:    ret i16 [[T3]]
+;
+  %t0 = sub i16 32, %y
+  %t1 = zext i16 %t0 to i32
+  %t2 = lshr i32 %x, %t1
+  %t3 = trunc i32 %t2 to i16
+  %t4 = add i16 %y, -2
+  %t5 = lshr i16 %t3, %t4
+  ret i16 %t3
+}
+define i16 @t12_ashr(i32 %x, i16 %y) {
+; CHECK-LABEL: @t12_ashr(
+; CHECK-NEXT:    [[T0:%.*]] = sub i16 32, [[Y:%.*]]
+; CHECK-NEXT:    [[T1:%.*]] = zext i16 [[T0]] to i32
+; CHECK-NEXT:    [[T2:%.*]] = ashr i32 [[X:%.*]], [[T1]]
+; CHECK-NEXT:    [[T3:%.*]] = trunc i32 [[T2]] to i16
+; CHECK-NEXT:    ret i16 [[T3]]
+;
+  %t0 = sub i16 32, %y
+  %t1 = zext i16 %t0 to i32
+  %t2 = ashr i32 %x, %t1
+  %t3 = trunc i32 %t2 to i16
+  %t4 = add i16 %y, -2
+  %t5 = ashr i16 %t3, %t4
+  ret i16 %t3
+}
+
+; Can't fold, total shift would be 32
+define i16 @n13(i32 %x, i16 %y) {
+; CHECK-LABEL: @n13(
+; CHECK-NEXT:    [[T0:%.*]] = sub i16 30, [[Y:%.*]]
+; CHECK-NEXT:    [[T1:%.*]] = zext i16 [[T0]] to i32
+; CHECK-NEXT:    [[T2:%.*]] = shl i32 [[X:%.*]], [[T1]]
+; CHECK-NEXT:    [[T3:%.*]] = trunc i32 [[T2]] to i16
+; CHECK-NEXT:    ret i16 [[T3]]
+;
+  %t0 = sub i16 30, %y
+  %t1 = zext i16 %t0 to i32
+  %t2 = shl i32 %x, %t1
+  %t3 = trunc i32 %t2 to i16
+  %t4 = add i16 %y, -2
+  %t5 = shl i16 %t3, %t4
+  ret i16 %t3
+}