[llvm] r358552 - Revert "Temporarily Revert "Add basic loop fusion pass.""

Eric Christopher via llvm-commits llvm-commits at lists.llvm.org
Tue Apr 16 21:53:01 PDT 2019


Added: llvm/trunk/test/Transforms/InstCombine/X86/x86-sse.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/InstCombine/X86/x86-sse.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/InstCombine/X86/x86-sse.ll (added)
+++ llvm/trunk/test/Transforms/InstCombine/X86/x86-sse.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,611 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -instcombine -S | FileCheck %s
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+
+define float @test_rcp_ss_0(float %a) {
+; CHECK-LABEL: @test_rcp_ss_0(
+; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <4 x float> undef, float %a, i32 0
+; CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x float> @llvm.x86.sse.rcp.ss(<4 x float> [[TMP1]])
+; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <4 x float> [[TMP2]], i32 0
+; CHECK-NEXT:    ret float [[TMP3]]
+;
+  %1 = insertelement <4 x float> undef, float %a, i32 0
+  %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
+  %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
+  %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
+  %5 = tail call <4 x float> @llvm.x86.sse.rcp.ss(<4 x float> %4)
+  %6 = extractelement <4 x float> %5, i32 0
+  ret float %6
+}
+
+define float @test_rcp_ss_1(float %a) {
+; CHECK-LABEL: @test_rcp_ss_1(
+; CHECK-NEXT:    ret float 1.000000e+00
+;
+  %1 = insertelement <4 x float> undef, float %a, i32 0
+  %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
+  %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
+  %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
+  %5 = tail call <4 x float> @llvm.x86.sse.rcp.ss(<4 x float> %4)
+  %6 = extractelement <4 x float> %5, i32 1
+  ret float %6
+}
+
+define float @test_sqrt_ss_0(float %a) {
+; CHECK-LABEL: @test_sqrt_ss_0(
+; CHECK-NEXT:    [[TMP1:%.*]] = call float @llvm.sqrt.f32(float %a)
+; CHECK-NEXT:    ret float [[TMP1]]
+;
+  %1 = insertelement <4 x float> undef, float %a, i32 0
+  %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
+  %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
+  %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
+  %5 = tail call <4 x float> @llvm.x86.sse.sqrt.ss(<4 x float> %4)
+  %6 = extractelement <4 x float> %5, i32 0
+  ret float %6
+}
+
+define float @test_sqrt_ss_2(float %a) {
+; CHECK-LABEL: @test_sqrt_ss_2(
+; CHECK-NEXT:    ret float 2.000000e+00
+;
+  %1 = insertelement <4 x float> undef, float %a, i32 0
+  %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
+  %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
+  %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
+  %5 = tail call <4 x float> @llvm.x86.sse.sqrt.ss(<4 x float> %4)
+  %6 = extractelement <4 x float> %5, i32 2
+  ret float %6
+}
+
+define float @test_rsqrt_ss_0(float %a) {
+; CHECK-LABEL: @test_rsqrt_ss_0(
+; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <4 x float> undef, float %a, i32 0
+; CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x float> @llvm.x86.sse.rsqrt.ss(<4 x float> [[TMP1]])
+; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <4 x float> [[TMP2]], i32 0
+; CHECK-NEXT:    ret float [[TMP3]]
+;
+  %1 = insertelement <4 x float> undef, float %a, i32 0
+  %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
+  %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
+  %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
+  %5 = tail call <4 x float> @llvm.x86.sse.rsqrt.ss(<4 x float> %4)
+  %6 = extractelement <4 x float> %5, i32 0
+  ret float %6
+}
+
+define float @test_rsqrt_ss_3(float %a) {
+; CHECK-LABEL: @test_rsqrt_ss_3(
+; CHECK-NEXT:    ret float 3.000000e+00
+;
+  %1 = insertelement <4 x float> undef, float %a, i32 0
+  %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
+  %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
+  %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
+  %5 = tail call <4 x float> @llvm.x86.sse.rsqrt.ss(<4 x float> %4)
+  %6 = extractelement <4 x float> %5, i32 3
+  ret float %6
+}
+
+define float @test_add_ss_0(float %a, float %b) {
+; CHECK-LABEL: @test_add_ss_0(
+; CHECK-NEXT:    [[TMP1:%.*]] = fadd float %a, %b
+; CHECK-NEXT:    ret float [[TMP1]]
+;
+  %1 = insertelement <4 x float> undef, float %a, i32 0
+  %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
+  %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
+  %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
+  %5 = insertelement <4 x float> undef, float %b, i32 0
+  %6 = insertelement <4 x float> %5, float 4.000000e+00, i32 1
+  %7 = insertelement <4 x float> %6, float 5.000000e+00, i32 2
+  %8 = insertelement <4 x float> %7, float 6.000000e+00, i32 3
+  %9 = tail call <4 x float> @llvm.x86.sse.add.ss(<4 x float> %4, <4 x float> %8)
+  %r = extractelement <4 x float> %9, i32 0
+  ret float %r
+}
+
+define float @test_add_ss_1(float %a, float %b) {
+; CHECK-LABEL: @test_add_ss_1(
+; CHECK-NEXT:    ret float 1.000000e+00
+;
+  %1 = insertelement <4 x float> undef, float %a, i32 0
+  %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
+  %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
+  %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
+  %5 = insertelement <4 x float> undef, float %b, i32 0
+  %6 = tail call <4 x float> @llvm.x86.sse.add.ss(<4 x float> %4, <4 x float> %5)
+  %7 = extractelement <4 x float> %6, i32 1
+  ret float %7
+}
+
+define float @test_sub_ss_0(float %a, float %b) {
+; CHECK-LABEL: @test_sub_ss_0(
+; CHECK-NEXT:    [[TMP1:%.*]] = fsub float %a, %b
+; CHECK-NEXT:    ret float [[TMP1]]
+;
+  %1 = insertelement <4 x float> undef, float %a, i32 0
+  %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
+  %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
+  %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
+  %5 = insertelement <4 x float> undef, float %b, i32 0
+  %6 = insertelement <4 x float> %5, float 4.000000e+00, i32 1
+  %7 = insertelement <4 x float> %6, float 5.000000e+00, i32 2
+  %8 = insertelement <4 x float> %7, float 6.000000e+00, i32 3
+  %9 = tail call <4 x float> @llvm.x86.sse.sub.ss(<4 x float> %4, <4 x float> %8)
+  %r = extractelement <4 x float> %9, i32 0
+  ret float %r
+}
+
+define float @test_sub_ss_2(float %a, float %b) {
+; CHECK-LABEL: @test_sub_ss_2(
+; CHECK-NEXT:    ret float 2.000000e+00
+;
+  %1 = insertelement <4 x float> undef, float %a, i32 0
+  %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
+  %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
+  %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
+  %5 = insertelement <4 x float> undef, float %b, i32 0
+  %6 = tail call <4 x float> @llvm.x86.sse.sub.ss(<4 x float> %4, <4 x float> %5)
+  %7 = extractelement <4 x float> %6, i32 2
+  ret float %7
+}
+
+define float @test_mul_ss_0(float %a, float %b) {
+; CHECK-LABEL: @test_mul_ss_0(
+; CHECK-NEXT:    [[TMP1:%.*]] = fmul float %a, %b
+; CHECK-NEXT:    ret float [[TMP1]]
+;
+  %1 = insertelement <4 x float> undef, float %a, i32 0
+  %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
+  %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
+  %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
+  %5 = insertelement <4 x float> undef, float %b, i32 0
+  %6 = insertelement <4 x float> %5, float 4.000000e+00, i32 1
+  %7 = insertelement <4 x float> %6, float 5.000000e+00, i32 2
+  %8 = insertelement <4 x float> %7, float 6.000000e+00, i32 3
+  %9 = tail call <4 x float> @llvm.x86.sse.mul.ss(<4 x float> %4, <4 x float> %8)
+  %r = extractelement <4 x float> %9, i32 0
+  ret float %r
+}
+
+define float @test_mul_ss_3(float %a, float %b) {
+; CHECK-LABEL: @test_mul_ss_3(
+; CHECK-NEXT:    ret float 3.000000e+00
+;
+  %1 = insertelement <4 x float> undef, float %a, i32 0
+  %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
+  %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
+  %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
+  %5 = insertelement <4 x float> undef, float %b, i32 0
+  %6 = tail call <4 x float> @llvm.x86.sse.mul.ss(<4 x float> %4, <4 x float> %5)
+  %7 = extractelement <4 x float> %6, i32 3
+  ret float %7
+}
+
+define float @test_div_ss_0(float %a, float %b) {
+; CHECK-LABEL: @test_div_ss_0(
+; CHECK-NEXT:    [[TMP1:%.*]] = fdiv float %a, %b
+; CHECK-NEXT:    ret float [[TMP1]]
+;
+  %1 = insertelement <4 x float> undef, float %a, i32 0
+  %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
+  %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
+  %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
+  %5 = insertelement <4 x float> undef, float %b, i32 0
+  %6 = insertelement <4 x float> %5, float 4.000000e+00, i32 1
+  %7 = insertelement <4 x float> %6, float 5.000000e+00, i32 2
+  %8 = insertelement <4 x float> %7, float 6.000000e+00, i32 3
+  %9 = tail call <4 x float> @llvm.x86.sse.div.ss(<4 x float> %4, <4 x float> %8)
+  %r = extractelement <4 x float> %9, i32 0
+  ret float %r
+}
+
+define float @test_div_ss_1(float %a, float %b) {
+; CHECK-LABEL: @test_div_ss_1(
+; CHECK-NEXT:    ret float 1.000000e+00
+;
+  %1 = insertelement <4 x float> undef, float %a, i32 0
+  %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
+  %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
+  %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
+  %5 = insertelement <4 x float> undef, float %b, i32 0
+  %6 = tail call <4 x float> @llvm.x86.sse.div.ss(<4 x float> %4, <4 x float> %5)
+  %7 = extractelement <4 x float> %6, i32 1
+  ret float %7
+}
+
+define <4 x float> @test_min_ss(<4 x float> %a, <4 x float> %b) {
+; CHECK-LABEL: @test_min_ss(
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x float> @llvm.x86.sse.min.ss(<4 x float> %a, <4 x float> %b)
+; CHECK-NEXT:    ret <4 x float> [[TMP1]]
+;
+  %1 = insertelement <4 x float> %b, float 1.000000e+00, i32 1
+  %2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2
+  %3 = insertelement <4 x float> %2, float 3.000000e+00, i32 3
+  %4 = tail call <4 x float> @llvm.x86.sse.min.ss(<4 x float> %a, <4 x float> %3)
+  ret <4 x float> %4
+}
+
+define float @test_min_ss_0(float %a, float %b) {
+; CHECK-LABEL: @test_min_ss_0(
+; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <4 x float> undef, float %a, i32 0
+; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <4 x float> undef, float %b, i32 0
+; CHECK-NEXT:    [[TMP3:%.*]] = tail call <4 x float> @llvm.x86.sse.min.ss(<4 x float> [[TMP1]], <4 x float> [[TMP2]])
+; CHECK-NEXT:    [[TMP4:%.*]] = extractelement <4 x float> [[TMP3]], i32 0
+; CHECK-NEXT:    ret float [[TMP4]]
+;
+  %1 = insertelement <4 x float> undef, float %a, i32 0
+  %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
+  %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
+  %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
+  %5 = insertelement <4 x float> undef, float %b, i32 0
+  %6 = insertelement <4 x float> %5, float 4.000000e+00, i32 1
+  %7 = insertelement <4 x float> %6, float 5.000000e+00, i32 2
+  %8 = insertelement <4 x float> %7, float 6.000000e+00, i32 3
+  %9 = tail call <4 x float> @llvm.x86.sse.min.ss(<4 x float> %4, <4 x float> %8)
+  %10 = extractelement <4 x float> %9, i32 0
+  ret float %10
+}
+
+define float @test_min_ss_2(float %a, float %b) {
+; CHECK-LABEL: @test_min_ss_2(
+; CHECK-NEXT:    ret float 2.000000e+00
+;
+  %1 = insertelement <4 x float> undef, float %a, i32 0
+  %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
+  %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
+  %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
+  %5 = insertelement <4 x float> undef, float %b, i32 0
+  %6 = tail call <4 x float> @llvm.x86.sse.min.ss(<4 x float> %4, <4 x float> %5)
+  %7 = extractelement <4 x float> %6, i32 2
+  ret float %7
+}
+
+define <4 x float> @test_max_ss(<4 x float> %a, <4 x float> %b) {
+; CHECK-LABEL: @test_max_ss(
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x float> @llvm.x86.sse.max.ss(<4 x float> %a, <4 x float> %b)
+; CHECK-NEXT:    ret <4 x float> [[TMP1]]
+;
+  %1 = insertelement <4 x float> %b, float 1.000000e+00, i32 1
+  %2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2
+  %3 = insertelement <4 x float> %2, float 3.000000e+00, i32 3
+  %4 = tail call <4 x float> @llvm.x86.sse.max.ss(<4 x float> %a, <4 x float> %3)
+  ret <4 x float> %4
+}
+
+define float @test_max_ss_0(float %a, float %b) {
+; CHECK-LABEL: @test_max_ss_0(
+; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <4 x float> undef, float %a, i32 0
+; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <4 x float> undef, float %b, i32 0
+; CHECK-NEXT:    [[TMP3:%.*]] = tail call <4 x float> @llvm.x86.sse.max.ss(<4 x float> [[TMP1]], <4 x float> [[TMP2]])
+; CHECK-NEXT:    [[TMP4:%.*]] = extractelement <4 x float> [[TMP3]], i32 0
+; CHECK-NEXT:    ret float [[TMP4]]
+;
+  %1 = insertelement <4 x float> undef, float %a, i32 0
+  %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
+  %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
+  %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
+  %5 = insertelement <4 x float> undef, float %b, i32 0
+  %6 = insertelement <4 x float> %5, float 4.000000e+00, i32 1
+  %7 = insertelement <4 x float> %6, float 5.000000e+00, i32 2
+  %8 = insertelement <4 x float> %7, float 6.000000e+00, i32 3
+  %9 = tail call <4 x float> @llvm.x86.sse.max.ss(<4 x float> %4, <4 x float> %8)
+  %10 = extractelement <4 x float> %9, i32 0
+  ret float %10
+}
+
+define float @test_max_ss_3(float %a, float %b) {
+; CHECK-LABEL: @test_max_ss_3(
+; CHECK-NEXT:    ret float 3.000000e+00
+;
+  %1 = insertelement <4 x float> undef, float %a, i32 0
+  %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
+  %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
+  %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
+  %5 = insertelement <4 x float> undef, float %b, i32 0
+  %6 = tail call <4 x float> @llvm.x86.sse.max.ss(<4 x float> %4, <4 x float> %5)
+  %7 = extractelement <4 x float> %6, i32 3
+  ret float %7
+}
+
+define <4 x float> @test_cmp_ss(<4 x float> %a, <4 x float> %b) {
+; CHECK-LABEL: @test_cmp_ss(
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x float> @llvm.x86.sse.cmp.ss(<4 x float> %a, <4 x float> %b, i8 0)
+; CHECK-NEXT:    ret <4 x float> [[TMP1]]
+;
+  %1 = insertelement <4 x float> %b, float 1.000000e+00, i32 1
+  %2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2
+  %3 = insertelement <4 x float> %2, float 3.000000e+00, i32 3
+  %4 = tail call <4 x float> @llvm.x86.sse.cmp.ss(<4 x float> %a, <4 x float> %3, i8 0)
+  ret <4 x float> %4
+}
+
+define float @test_cmp_ss_0(float %a, float %b) {
+; CHECK-LABEL: @test_cmp_ss_0(
+; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <4 x float> undef, float %a, i32 0
+; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <4 x float> undef, float %b, i32 0
+; CHECK-NEXT:    [[TMP3:%.*]] = tail call <4 x float> @llvm.x86.sse.cmp.ss(<4 x float> [[TMP1]], <4 x float> [[TMP2]], i8 0)
+; CHECK-NEXT:    [[R:%.*]] = extractelement <4 x float> [[TMP3]], i32 0
+; CHECK-NEXT:    ret float [[R]]
+;
+  %1 = insertelement <4 x float> undef, float %a, i32 0
+  %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
+  %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
+  %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
+  %5 = insertelement <4 x float> undef, float %b, i32 0
+  %6 = insertelement <4 x float> %5, float 4.000000e+00, i32 1
+  %7 = insertelement <4 x float> %6, float 5.000000e+00, i32 2
+  %8 = insertelement <4 x float> %7, float 6.000000e+00, i32 3
+  %9 = tail call <4 x float> @llvm.x86.sse.cmp.ss(<4 x float> %4, <4 x float> %8, i8 0)
+  %r = extractelement <4 x float> %9, i32 0
+  ret float %r
+}
+
+define float @test_cmp_ss_1(float %a, float %b) {
+; CHECK-LABEL: @test_cmp_ss_1(
+; CHECK-NEXT:    ret float 1.000000e+00
+;
+  %1 = insertelement <4 x float> undef, float %a, i32 0
+  %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
+  %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
+  %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
+  %5 = insertelement <4 x float> undef, float %b, i32 0
+  %6 = tail call <4 x float> @llvm.x86.sse.cmp.ss(<4 x float> %4, <4 x float> %5, i8 0)
+  %7 = extractelement <4 x float> %6, i32 1
+  ret float %7
+}
+
+define i32 @test_comieq_ss_0(float %a, float %b) {
+; CHECK-LABEL: @test_comieq_ss_0(
+; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <4 x float> undef, float %a, i32 0
+; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <4 x float> undef, float %b, i32 0
+; CHECK-NEXT:    [[TMP3:%.*]] = tail call i32 @llvm.x86.sse.comieq.ss(<4 x float> [[TMP1]], <4 x float> [[TMP2]])
+; CHECK-NEXT:    ret i32 [[TMP3]]
+;
+  %1 = insertelement <4 x float> undef, float %a, i32 0
+  %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
+  %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
+  %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
+  %5 = insertelement <4 x float> undef, float %b, i32 0
+  %6 = insertelement <4 x float> %5, float 4.000000e+00, i32 1
+  %7 = insertelement <4 x float> %6, float 5.000000e+00, i32 2
+  %8 = insertelement <4 x float> %7, float 6.000000e+00, i32 3
+  %9 = tail call i32 @llvm.x86.sse.comieq.ss(<4 x float> %4, <4 x float> %8)
+  ret i32 %9
+}
+
+define i32 @test_comige_ss_0(float %a, float %b) {
+; CHECK-LABEL: @test_comige_ss_0(
+; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <4 x float> undef, float %a, i32 0
+; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <4 x float> undef, float %b, i32 0
+; CHECK-NEXT:    [[TMP3:%.*]] = tail call i32 @llvm.x86.sse.comige.ss(<4 x float> [[TMP1]], <4 x float> [[TMP2]])
+; CHECK-NEXT:    ret i32 [[TMP3]]
+;
+  %1 = insertelement <4 x float> undef, float %a, i32 0
+  %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
+  %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
+  %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
+  %5 = insertelement <4 x float> undef, float %b, i32 0
+  %6 = insertelement <4 x float> %5, float 4.000000e+00, i32 1
+  %7 = insertelement <4 x float> %6, float 5.000000e+00, i32 2
+  %8 = insertelement <4 x float> %7, float 6.000000e+00, i32 3
+  %9 = tail call i32 @llvm.x86.sse.comige.ss(<4 x float> %4, <4 x float> %8)
+  ret i32 %9
+}
+
+define i32 @test_comigt_ss_0(float %a, float %b) {
+; CHECK-LABEL: @test_comigt_ss_0(
+; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <4 x float> undef, float %a, i32 0
+; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <4 x float> undef, float %b, i32 0
+; CHECK-NEXT:    [[TMP3:%.*]] = tail call i32 @llvm.x86.sse.comigt.ss(<4 x float> [[TMP1]], <4 x float> [[TMP2]])
+; CHECK-NEXT:    ret i32 [[TMP3]]
+;
+  %1 = insertelement <4 x float> undef, float %a, i32 0
+  %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
+  %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
+  %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
+  %5 = insertelement <4 x float> undef, float %b, i32 0
+  %6 = insertelement <4 x float> %5, float 4.000000e+00, i32 1
+  %7 = insertelement <4 x float> %6, float 5.000000e+00, i32 2
+  %8 = insertelement <4 x float> %7, float 6.000000e+00, i32 3
+  %9 = tail call i32 @llvm.x86.sse.comigt.ss(<4 x float> %4, <4 x float> %8)
+  ret i32 %9
+}
+
+define i32 @test_comile_ss_0(float %a, float %b) {
+; CHECK-LABEL: @test_comile_ss_0(
+; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <4 x float> undef, float %a, i32 0
+; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <4 x float> undef, float %b, i32 0
+; CHECK-NEXT:    [[TMP3:%.*]] = tail call i32 @llvm.x86.sse.comile.ss(<4 x float> [[TMP1]], <4 x float> [[TMP2]])
+; CHECK-NEXT:    ret i32 [[TMP3]]
+;
+  %1 = insertelement <4 x float> undef, float %a, i32 0
+  %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
+  %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
+  %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
+  %5 = insertelement <4 x float> undef, float %b, i32 0
+  %6 = insertelement <4 x float> %5, float 4.000000e+00, i32 1
+  %7 = insertelement <4 x float> %6, float 5.000000e+00, i32 2
+  %8 = insertelement <4 x float> %7, float 6.000000e+00, i32 3
+  %9 = tail call i32 @llvm.x86.sse.comile.ss(<4 x float> %4, <4 x float> %8)
+  ret i32 %9
+}
+
+define i32 @test_comilt_ss_0(float %a, float %b) {
+; CHECK-LABEL: @test_comilt_ss_0(
+; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <4 x float> undef, float %a, i32 0
+; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <4 x float> undef, float %b, i32 0
+; CHECK-NEXT:    [[TMP3:%.*]] = tail call i32 @llvm.x86.sse.comilt.ss(<4 x float> [[TMP1]], <4 x float> [[TMP2]])
+; CHECK-NEXT:    ret i32 [[TMP3]]
+;
+  %1 = insertelement <4 x float> undef, float %a, i32 0
+  %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
+  %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
+  %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
+  %5 = insertelement <4 x float> undef, float %b, i32 0
+  %6 = insertelement <4 x float> %5, float 4.000000e+00, i32 1
+  %7 = insertelement <4 x float> %6, float 5.000000e+00, i32 2
+  %8 = insertelement <4 x float> %7, float 6.000000e+00, i32 3
+  %9 = tail call i32 @llvm.x86.sse.comilt.ss(<4 x float> %4, <4 x float> %8)
+  ret i32 %9
+}
+
+define i32 @test_comineq_ss_0(float %a, float %b) {
+; CHECK-LABEL: @test_comineq_ss_0(
+; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <4 x float> undef, float %a, i32 0
+; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <4 x float> undef, float %b, i32 0
+; CHECK-NEXT:    [[TMP3:%.*]] = tail call i32 @llvm.x86.sse.comineq.ss(<4 x float> [[TMP1]], <4 x float> [[TMP2]])
+; CHECK-NEXT:    ret i32 [[TMP3]]
+;
+  %1 = insertelement <4 x float> undef, float %a, i32 0
+  %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
+  %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
+  %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
+  %5 = insertelement <4 x float> undef, float %b, i32 0
+  %6 = insertelement <4 x float> %5, float 4.000000e+00, i32 1
+  %7 = insertelement <4 x float> %6, float 5.000000e+00, i32 2
+  %8 = insertelement <4 x float> %7, float 6.000000e+00, i32 3
+  %9 = tail call i32 @llvm.x86.sse.comineq.ss(<4 x float> %4, <4 x float> %8)
+  ret i32 %9
+}
+
+define i32 @test_ucomieq_ss_0(float %a, float %b) {
+; CHECK-LABEL: @test_ucomieq_ss_0(
+; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <4 x float> undef, float %a, i32 0
+; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <4 x float> undef, float %b, i32 0
+; CHECK-NEXT:    [[TMP3:%.*]] = tail call i32 @llvm.x86.sse.ucomieq.ss(<4 x float> [[TMP1]], <4 x float> [[TMP2]])
+; CHECK-NEXT:    ret i32 [[TMP3]]
+;
+  %1 = insertelement <4 x float> undef, float %a, i32 0
+  %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
+  %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
+  %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
+  %5 = insertelement <4 x float> undef, float %b, i32 0
+  %6 = insertelement <4 x float> %5, float 4.000000e+00, i32 1
+  %7 = insertelement <4 x float> %6, float 5.000000e+00, i32 2
+  %8 = insertelement <4 x float> %7, float 6.000000e+00, i32 3
+  %9 = tail call i32 @llvm.x86.sse.ucomieq.ss(<4 x float> %4, <4 x float> %8)
+  ret i32 %9
+}
+
+define i32 @test_ucomige_ss_0(float %a, float %b) {
+; CHECK-LABEL: @test_ucomige_ss_0(
+; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <4 x float> undef, float %a, i32 0
+; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <4 x float> undef, float %b, i32 0
+; CHECK-NEXT:    [[TMP3:%.*]] = tail call i32 @llvm.x86.sse.ucomige.ss(<4 x float> [[TMP1]], <4 x float> [[TMP2]])
+; CHECK-NEXT:    ret i32 [[TMP3]]
+;
+  %1 = insertelement <4 x float> undef, float %a, i32 0
+  %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
+  %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
+  %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
+  %5 = insertelement <4 x float> undef, float %b, i32 0
+  %6 = insertelement <4 x float> %5, float 4.000000e+00, i32 1
+  %7 = insertelement <4 x float> %6, float 5.000000e+00, i32 2
+  %8 = insertelement <4 x float> %7, float 6.000000e+00, i32 3
+  %9 = tail call i32 @llvm.x86.sse.ucomige.ss(<4 x float> %4, <4 x float> %8)
+  ret i32 %9
+}
+
+define i32 @test_ucomigt_ss_0(float %a, float %b) {
+; CHECK-LABEL: @test_ucomigt_ss_0(
+; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <4 x float> undef, float %a, i32 0
+; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <4 x float> undef, float %b, i32 0
+; CHECK-NEXT:    [[TMP3:%.*]] = tail call i32 @llvm.x86.sse.ucomigt.ss(<4 x float> [[TMP1]], <4 x float> [[TMP2]])
+; CHECK-NEXT:    ret i32 [[TMP3]]
+;
+  %1 = insertelement <4 x float> undef, float %a, i32 0
+  %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
+  %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
+  %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
+  %5 = insertelement <4 x float> undef, float %b, i32 0
+  %6 = insertelement <4 x float> %5, float 4.000000e+00, i32 1
+  %7 = insertelement <4 x float> %6, float 5.000000e+00, i32 2
+  %8 = insertelement <4 x float> %7, float 6.000000e+00, i32 3
+  %9 = tail call i32 @llvm.x86.sse.ucomigt.ss(<4 x float> %4, <4 x float> %8)
+  ret i32 %9
+}
+
+define i32 @test_ucomile_ss_0(float %a, float %b) {
+; CHECK-LABEL: @test_ucomile_ss_0(
+; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <4 x float> undef, float %a, i32 0
+; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <4 x float> undef, float %b, i32 0
+; CHECK-NEXT:    [[TMP3:%.*]] = tail call i32 @llvm.x86.sse.ucomile.ss(<4 x float> [[TMP1]], <4 x float> [[TMP2]])
+; CHECK-NEXT:    ret i32 [[TMP3]]
+;
+  %1 = insertelement <4 x float> undef, float %a, i32 0
+  %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
+  %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
+  %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
+  %5 = insertelement <4 x float> undef, float %b, i32 0
+  %6 = insertelement <4 x float> %5, float 4.000000e+00, i32 1
+  %7 = insertelement <4 x float> %6, float 5.000000e+00, i32 2
+  %8 = insertelement <4 x float> %7, float 6.000000e+00, i32 3
+  %9 = tail call i32 @llvm.x86.sse.ucomile.ss(<4 x float> %4, <4 x float> %8)
+  ret i32 %9
+}
+
+define i32 @test_ucomilt_ss_0(float %a, float %b) {
+; CHECK-LABEL: @test_ucomilt_ss_0(
+; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <4 x float> undef, float %a, i32 0
+; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <4 x float> undef, float %b, i32 0
+; CHECK-NEXT:    [[TMP3:%.*]] = tail call i32 @llvm.x86.sse.ucomilt.ss(<4 x float> [[TMP1]], <4 x float> [[TMP2]])
+; CHECK-NEXT:    ret i32 [[TMP3]]
+;
+  %1 = insertelement <4 x float> undef, float %a, i32 0
+  %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
+  %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
+  %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
+  %5 = insertelement <4 x float> undef, float %b, i32 0
+  %6 = insertelement <4 x float> %5, float 4.000000e+00, i32 1
+  %7 = insertelement <4 x float> %6, float 5.000000e+00, i32 2
+  %8 = insertelement <4 x float> %7, float 6.000000e+00, i32 3
+  %9 = tail call i32 @llvm.x86.sse.ucomilt.ss(<4 x float> %4, <4 x float> %8)
+  ret i32 %9
+}
+
+define i32 @test_ucomineq_ss_0(float %a, float %b) {
+; CHECK-LABEL: @test_ucomineq_ss_0(
+; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <4 x float> undef, float %a, i32 0
+; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <4 x float> undef, float %b, i32 0
+; CHECK-NEXT:    [[TMP3:%.*]] = tail call i32 @llvm.x86.sse.ucomineq.ss(<4 x float> [[TMP1]], <4 x float> [[TMP2]])
+; CHECK-NEXT:    ret i32 [[TMP3]]
+;
+  %1 = insertelement <4 x float> undef, float %a, i32 0
+  %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
+  %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
+  %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
+  %5 = insertelement <4 x float> undef, float %b, i32 0
+  %6 = insertelement <4 x float> %5, float 4.000000e+00, i32 1
+  %7 = insertelement <4 x float> %6, float 5.000000e+00, i32 2
+  %8 = insertelement <4 x float> %7, float 6.000000e+00, i32 3
+  %9 = tail call i32 @llvm.x86.sse.ucomineq.ss(<4 x float> %4, <4 x float> %8)
+  ret i32 %9
+}
+
+declare <4 x float> @llvm.x86.sse.rcp.ss(<4 x float>)
+declare <4 x float> @llvm.x86.sse.sqrt.ss(<4 x float>)
+declare <4 x float> @llvm.x86.sse.rsqrt.ss(<4 x float>)
+
+declare <4 x float> @llvm.x86.sse.add.ss(<4 x float>, <4 x float>)
+declare <4 x float> @llvm.x86.sse.sub.ss(<4 x float>, <4 x float>)
+declare <4 x float> @llvm.x86.sse.mul.ss(<4 x float>, <4 x float>)
+declare <4 x float> @llvm.x86.sse.div.ss(<4 x float>, <4 x float>)
+declare <4 x float> @llvm.x86.sse.min.ss(<4 x float>, <4 x float>)
+declare <4 x float> @llvm.x86.sse.max.ss(<4 x float>, <4 x float>)
+declare <4 x float> @llvm.x86.sse.cmp.ss(<4 x float>, <4 x float>, i8)
+
+declare i32 @llvm.x86.sse.comieq.ss(<4 x float>, <4 x float>)
+declare i32 @llvm.x86.sse.comige.ss(<4 x float>, <4 x float>)
+declare i32 @llvm.x86.sse.comigt.ss(<4 x float>, <4 x float>)
+declare i32 @llvm.x86.sse.comile.ss(<4 x float>, <4 x float>)
+declare i32 @llvm.x86.sse.comilt.ss(<4 x float>, <4 x float>)
+declare i32 @llvm.x86.sse.comineq.ss(<4 x float>, <4 x float>)
+
+declare i32 @llvm.x86.sse.ucomieq.ss(<4 x float>, <4 x float>)
+declare i32 @llvm.x86.sse.ucomige.ss(<4 x float>, <4 x float>)
+declare i32 @llvm.x86.sse.ucomigt.ss(<4 x float>, <4 x float>)
+declare i32 @llvm.x86.sse.ucomile.ss(<4 x float>, <4 x float>)
+declare i32 @llvm.x86.sse.ucomilt.ss(<4 x float>, <4 x float>)
+declare i32 @llvm.x86.sse.ucomineq.ss(<4 x float>, <4 x float>)

Added: llvm/trunk/test/Transforms/InstCombine/X86/x86-sse2.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/InstCombine/X86/x86-sse2.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/InstCombine/X86/x86-sse2.ll (added)
+++ llvm/trunk/test/Transforms/InstCombine/X86/x86-sse2.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,458 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -instcombine -S | FileCheck %s
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+
+define double @test_sqrt_sd_0(double %a) {
+; CHECK-LABEL: @test_sqrt_sd_0(
+; CHECK-NEXT:    [[TMP1:%.*]] = call double @llvm.sqrt.f64(double %a)
+; CHECK-NEXT:    ret double [[TMP1]]
+;
+  %1 = insertelement <2 x double> undef, double %a, i32 0
+  %2 = insertelement <2 x double> %1, double 1.000000e+00, i32 1
+  %3 = tail call <2 x double> @llvm.x86.sse2.sqrt.sd(<2 x double> %2)
+  %4 = extractelement <2 x double> %3, i32 0
+  ret double %4
+}
+
+define double @test_sqrt_sd_1(double %a) {
+; CHECK-LABEL: @test_sqrt_sd_1(
+; CHECK-NEXT:    ret double 1.000000e+00
+;
+  %1 = insertelement <2 x double> undef, double %a, i32 0
+  %2 = insertelement <2 x double> %1, double 1.000000e+00, i32 1
+  %3 = tail call <2 x double> @llvm.x86.sse2.sqrt.sd(<2 x double> %2)
+  %4 = extractelement <2 x double> %3, i32 1
+  ret double %4
+}
+
+define double @test_add_sd_0(double %a, double %b) {
+; CHECK-LABEL: @test_add_sd_0(
+; CHECK-NEXT:    [[TMP1:%.*]] = fadd double %a, %b
+; CHECK-NEXT:    ret double [[TMP1]]
+;
+  %1 = insertelement <2 x double> undef, double %a, i32 0
+  %2 = insertelement <2 x double> %1, double 1.000000e+00, i32 1
+  %3 = insertelement <2 x double> undef, double %b, i32 0
+  %4 = insertelement <2 x double> %3, double 2.000000e+00, i32 1
+  %5 = tail call <2 x double> @llvm.x86.sse2.add.sd(<2 x double> %2, <2 x double> %4)
+  %6 = extractelement <2 x double> %5, i32 0
+  ret double %6
+}
+
+define double @test_add_sd_1(double %a, double %b) {
+; CHECK-LABEL: @test_add_sd_1(
+; CHECK-NEXT:    ret double 1.000000e+00
+;
+  %1 = insertelement <2 x double> undef, double %a, i32 0
+  %2 = insertelement <2 x double> %1, double 1.000000e+00, i32 1
+  %3 = insertelement <2 x double> undef, double %b, i32 0
+  %4 = insertelement <2 x double> %3, double 2.000000e+00, i32 1
+  %5 = tail call <2 x double> @llvm.x86.sse2.add.sd(<2 x double> %2, <2 x double> %4)
+  %6 = extractelement <2 x double> %5, i32 1
+  ret double %6
+}
+
+define double @test_sub_sd_0(double %a, double %b) {
+; CHECK-LABEL: @test_sub_sd_0(
+; CHECK-NEXT:    [[TMP1:%.*]] = fsub double %a, %b
+; CHECK-NEXT:    ret double [[TMP1]]
+;
+  %1 = insertelement <2 x double> undef, double %a, i32 0
+  %2 = insertelement <2 x double> %1, double 1.000000e+00, i32 1
+  %3 = insertelement <2 x double> undef, double %b, i32 0
+  %4 = insertelement <2 x double> %3, double 2.000000e+00, i32 1
+  %5 = tail call <2 x double> @llvm.x86.sse2.sub.sd(<2 x double> %2, <2 x double> %4)
+  %6 = extractelement <2 x double> %5, i32 0
+  ret double %6
+}
+
+define double @test_sub_sd_1(double %a, double %b) {
+; CHECK-LABEL: @test_sub_sd_1(
+; CHECK-NEXT:    ret double 1.000000e+00
+;
+  %1 = insertelement <2 x double> undef, double %a, i32 0
+  %2 = insertelement <2 x double> %1, double 1.000000e+00, i32 1
+  %3 = insertelement <2 x double> undef, double %b, i32 0
+  %4 = insertelement <2 x double> %3, double 2.000000e+00, i32 1
+  %5 = tail call <2 x double> @llvm.x86.sse2.sub.sd(<2 x double> %2, <2 x double> %4)
+  %6 = extractelement <2 x double> %5, i32 1
+  ret double %6
+}
+
+define double @test_mul_sd_0(double %a, double %b) {
+; CHECK-LABEL: @test_mul_sd_0(
+; CHECK-NEXT:    [[TMP1:%.*]] = fmul double %a, %b
+; CHECK-NEXT:    ret double [[TMP1]]
+;
+  %1 = insertelement <2 x double> undef, double %a, i32 0
+  %2 = insertelement <2 x double> %1, double 1.000000e+00, i32 1
+  %3 = insertelement <2 x double> undef, double %b, i32 0
+  %4 = insertelement <2 x double> %3, double 2.000000e+00, i32 1
+  %5 = tail call <2 x double> @llvm.x86.sse2.mul.sd(<2 x double> %2, <2 x double> %4)
+  %6 = extractelement <2 x double> %5, i32 0
+  ret double %6
+}
+
+define double @test_mul_sd_1(double %a, double %b) {
+; CHECK-LABEL: @test_mul_sd_1(
+; CHECK-NEXT:    ret double 1.000000e+00
+;
+  %1 = insertelement <2 x double> undef, double %a, i32 0
+  %2 = insertelement <2 x double> %1, double 1.000000e+00, i32 1
+  %3 = insertelement <2 x double> undef, double %b, i32 0
+  %4 = insertelement <2 x double> %3, double 2.000000e+00, i32 1
+  %5 = tail call <2 x double> @llvm.x86.sse2.mul.sd(<2 x double> %2, <2 x double> %4)
+  %6 = extractelement <2 x double> %5, i32 1
+  ret double %6
+}
+
+define double @test_div_sd_0(double %a, double %b) {
+; CHECK-LABEL: @test_div_sd_0(
+; CHECK-NEXT:    [[TMP1:%.*]] = fdiv double %a, %b
+; CHECK-NEXT:    ret double [[TMP1]]
+;
+  %1 = insertelement <2 x double> undef, double %a, i32 0
+  %2 = insertelement <2 x double> %1, double 1.000000e+00, i32 1
+  %3 = insertelement <2 x double> undef, double %b, i32 0
+  %4 = insertelement <2 x double> %3, double 2.000000e+00, i32 1
+  %5 = tail call <2 x double> @llvm.x86.sse2.div.sd(<2 x double> %2, <2 x double> %4)
+  %6 = extractelement <2 x double> %5, i32 0
+  ret double %6
+}
+
+define double @test_div_sd_1(double %a, double %b) {
+; CHECK-LABEL: @test_div_sd_1(
+; CHECK-NEXT:    ret double 1.000000e+00
+;
+  %1 = insertelement <2 x double> undef, double %a, i32 0
+  %2 = insertelement <2 x double> %1, double 1.000000e+00, i32 1
+  %3 = insertelement <2 x double> undef, double %b, i32 0
+  %4 = insertelement <2 x double> %3, double 2.000000e+00, i32 1
+  %5 = tail call <2 x double> @llvm.x86.sse2.div.sd(<2 x double> %2, <2 x double> %4)
+  %6 = extractelement <2 x double> %5, i32 1
+  ret double %6
+}
+
+define <2 x double> @test_min_sd(<2 x double> %a, <2 x double> %b) {
+; CHECK-LABEL: @test_min_sd(
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x double> @llvm.x86.sse2.min.sd(<2 x double> %a, <2 x double> %b)
+; CHECK-NEXT:    ret <2 x double> [[TMP1]]
+;
+  %1 = insertelement <2 x double> %b, double 2.000000e+00, i32 1
+  %2 = tail call <2 x double> @llvm.x86.sse2.min.sd(<2 x double> %a, <2 x double> %1)
+  ret <2 x double> %2
+}
+
+define double @test_min_sd_0(double %a, double %b) {
+; CHECK-LABEL: @test_min_sd_0(
+; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <2 x double> undef, double %a, i32 0
+; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <2 x double> undef, double %b, i32 0
+; CHECK-NEXT:    [[TMP3:%.*]] = tail call <2 x double> @llvm.x86.sse2.min.sd(<2 x double> [[TMP1]], <2 x double> [[TMP2]])
+; CHECK-NEXT:    [[TMP4:%.*]] = extractelement <2 x double> [[TMP3]], i32 0
+; CHECK-NEXT:    ret double [[TMP4]]
+;
+  %1 = insertelement <2 x double> undef, double %a, i32 0
+  %2 = insertelement <2 x double> %1, double 1.000000e+00, i32 1
+  %3 = insertelement <2 x double> undef, double %b, i32 0
+  %4 = insertelement <2 x double> %3, double 2.000000e+00, i32 1
+  %5 = tail call <2 x double> @llvm.x86.sse2.min.sd(<2 x double> %2, <2 x double> %4)
+  %6 = extractelement <2 x double> %5, i32 0
+  ret double %6
+}
+
+define double @test_min_sd_1(double %a, double %b) {
+; CHECK-LABEL: @test_min_sd_1(
+; CHECK-NEXT:    ret double 1.000000e+00
+;
+  %1 = insertelement <2 x double> undef, double %a, i32 0
+  %2 = insertelement <2 x double> %1, double 1.000000e+00, i32 1
+  %3 = insertelement <2 x double> undef, double %b, i32 0
+  %4 = insertelement <2 x double> %3, double 2.000000e+00, i32 1
+  %5 = tail call <2 x double> @llvm.x86.sse2.min.sd(<2 x double> %2, <2 x double> %4)
+  %6 = extractelement <2 x double> %5, i32 1
+  ret double %6
+}
+
+define <2 x double> @test_max_sd(<2 x double> %a, <2 x double> %b) {
+; CHECK-LABEL: @test_max_sd(
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x double> @llvm.x86.sse2.max.sd(<2 x double> %a, <2 x double> %b)
+; CHECK-NEXT:    ret <2 x double> [[TMP1]]
+;
+  %1 = insertelement <2 x double> %b, double 2.000000e+00, i32 1
+  %2 = tail call <2 x double> @llvm.x86.sse2.max.sd(<2 x double> %a, <2 x double> %1)
+  ret <2 x double> %2
+}
+
+define double @test_max_sd_0(double %a, double %b) {
+; CHECK-LABEL: @test_max_sd_0(
+; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <2 x double> undef, double %a, i32 0
+; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <2 x double> undef, double %b, i32 0
+; CHECK-NEXT:    [[TMP3:%.*]] = tail call <2 x double> @llvm.x86.sse2.max.sd(<2 x double> [[TMP1]], <2 x double> [[TMP2]])
+; CHECK-NEXT:    [[TMP4:%.*]] = extractelement <2 x double> [[TMP3]], i32 0
+; CHECK-NEXT:    ret double [[TMP4]]
+;
+  %1 = insertelement <2 x double> undef, double %a, i32 0
+  %2 = insertelement <2 x double> %1, double 1.000000e+00, i32 1
+  %3 = insertelement <2 x double> undef, double %b, i32 0
+  %4 = insertelement <2 x double> %3, double 2.000000e+00, i32 1
+  %5 = tail call <2 x double> @llvm.x86.sse2.max.sd(<2 x double> %2, <2 x double> %4)
+  %6 = extractelement <2 x double> %5, i32 0
+  ret double %6
+}
+
+define double @test_max_sd_1(double %a, double %b) {
+; CHECK-LABEL: @test_max_sd_1(
+; CHECK-NEXT:    ret double 1.000000e+00
+;
+  %1 = insertelement <2 x double> undef, double %a, i32 0
+  %2 = insertelement <2 x double> %1, double 1.000000e+00, i32 1
+  %3 = insertelement <2 x double> undef, double %b, i32 0
+  %4 = insertelement <2 x double> %3, double 2.000000e+00, i32 1
+  %5 = tail call <2 x double> @llvm.x86.sse2.max.sd(<2 x double> %2, <2 x double> %4)
+  %6 = extractelement <2 x double> %5, i32 1
+  ret double %6
+}
+
+define <2 x double> @test_cmp_sd(<2 x double> %a, <2 x double> %b) {
+; CHECK-LABEL: @test_cmp_sd(
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %a, <2 x double> %b, i8 0)
+; CHECK-NEXT:    ret <2 x double> [[TMP1]]
+;
+  %1 = insertelement <2 x double> %b, double 2.000000e+00, i32 1
+  %2 = tail call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %a, <2 x double> %1, i8 0)
+  ret <2 x double> %2
+}
+
+define double @test_cmp_sd_0(double %a, double %b) {
+; CHECK-LABEL: @test_cmp_sd_0(
+; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <2 x double> undef, double %a, i32 0
+; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <2 x double> undef, double %b, i32 0
+; CHECK-NEXT:    [[TMP3:%.*]] = tail call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> [[TMP1]], <2 x double> [[TMP2]], i8 0)
+; CHECK-NEXT:    [[TMP4:%.*]] = extractelement <2 x double> [[TMP3]], i32 0
+; CHECK-NEXT:    ret double [[TMP4]]
+;
+  %1 = insertelement <2 x double> undef, double %a, i32 0
+  %2 = insertelement <2 x double> %1, double 1.000000e+00, i32 1
+  %3 = insertelement <2 x double> undef, double %b, i32 0
+  %4 = insertelement <2 x double> %3, double 2.000000e+00, i32 1
+  %5 = tail call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %2, <2 x double> %4, i8 0)
+  %6 = extractelement <2 x double> %5, i32 0
+  ret double %6
+}
+
+define double @test_cmp_sd_1(double %a, double %b) {
+; CHECK-LABEL: @test_cmp_sd_1(
+; CHECK-NEXT:    ret double 1.000000e+00
+;
+  %1 = insertelement <2 x double> undef, double %a, i32 0
+  %2 = insertelement <2 x double> %1, double 1.000000e+00, i32 1
+  %3 = insertelement <2 x double> undef, double %b, i32 0
+  %4 = insertelement <2 x double> %3, double 2.000000e+00, i32 1
+  %5 = tail call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %2, <2 x double> %4, i8 0)
+  %6 = extractelement <2 x double> %5, i32 1
+  ret double %6
+}
+
+define i32 @test_comieq_sd_0(double %a, double %b) {
+; CHECK-LABEL: @test_comieq_sd_0(
+; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <2 x double> undef, double %a, i32 0
+; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <2 x double> undef, double %b, i32 0
+; CHECK-NEXT:    [[TMP3:%.*]] = tail call i32 @llvm.x86.sse2.comieq.sd(<2 x double> [[TMP1]], <2 x double> [[TMP2]])
+; CHECK-NEXT:    ret i32 [[TMP3]]
+;
+  %1 = insertelement <2 x double> undef, double %a, i32 0
+  %2 = insertelement <2 x double> %1, double 1.000000e+00, i32 1
+  %3 = insertelement <2 x double> undef, double %b, i32 0
+  %4 = insertelement <2 x double> %3, double 2.000000e+00, i32 1
+  %5 = tail call i32 @llvm.x86.sse2.comieq.sd(<2 x double> %2, <2 x double> %4)
+  ret i32 %5
+}
+
+define i32 @test_comige_sd_0(double %a, double %b) {
+; CHECK-LABEL: @test_comige_sd_0(
+; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <2 x double> undef, double %a, i32 0
+; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <2 x double> undef, double %b, i32 0
+; CHECK-NEXT:    [[TMP3:%.*]] = tail call i32 @llvm.x86.sse2.comige.sd(<2 x double> [[TMP1]], <2 x double> [[TMP2]])
+; CHECK-NEXT:    ret i32 [[TMP3]]
+;
+  %1 = insertelement <2 x double> undef, double %a, i32 0
+  %2 = insertelement <2 x double> %1, double 1.000000e+00, i32 1
+  %3 = insertelement <2 x double> undef, double %b, i32 0
+  %4 = insertelement <2 x double> %3, double 2.000000e+00, i32 1
+  %5 = tail call i32 @llvm.x86.sse2.comige.sd(<2 x double> %2, <2 x double> %4)
+  ret i32 %5
+}
+
+define i32 @test_comigt_sd_0(double %a, double %b) {
+; CHECK-LABEL: @test_comigt_sd_0(
+; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <2 x double> undef, double %a, i32 0
+; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <2 x double> undef, double %b, i32 0
+; CHECK-NEXT:    [[TMP3:%.*]] = tail call i32 @llvm.x86.sse2.comigt.sd(<2 x double> [[TMP1]], <2 x double> [[TMP2]])
+; CHECK-NEXT:    ret i32 [[TMP3]]
+;
+  %1 = insertelement <2 x double> undef, double %a, i32 0
+  %2 = insertelement <2 x double> %1, double 1.000000e+00, i32 1
+  %3 = insertelement <2 x double> undef, double %b, i32 0
+  %4 = insertelement <2 x double> %3, double 2.000000e+00, i32 1
+  %5 = tail call i32 @llvm.x86.sse2.comigt.sd(<2 x double> %2, <2 x double> %4)
+  ret i32 %5
+}
+
+define i32 @test_comile_sd_0(double %a, double %b) {
+; CHECK-LABEL: @test_comile_sd_0(
+; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <2 x double> undef, double %a, i32 0
+; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <2 x double> undef, double %b, i32 0
+; CHECK-NEXT:    [[TMP3:%.*]] = tail call i32 @llvm.x86.sse2.comile.sd(<2 x double> [[TMP1]], <2 x double> [[TMP2]])
+; CHECK-NEXT:    ret i32 [[TMP3]]
+;
+  %1 = insertelement <2 x double> undef, double %a, i32 0
+  %2 = insertelement <2 x double> %1, double 1.000000e+00, i32 1
+  %3 = insertelement <2 x double> undef, double %b, i32 0
+  %4 = insertelement <2 x double> %3, double 2.000000e+00, i32 1
+  %5 = tail call i32 @llvm.x86.sse2.comile.sd(<2 x double> %2, <2 x double> %4)
+  ret i32 %5
+}
+
+define i32 @test_comilt_sd_0(double %a, double %b) {
+; CHECK-LABEL: @test_comilt_sd_0(
+; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <2 x double> undef, double %a, i32 0
+; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <2 x double> undef, double %b, i32 0
+; CHECK-NEXT:    [[TMP3:%.*]] = tail call i32 @llvm.x86.sse2.comilt.sd(<2 x double> [[TMP1]], <2 x double> [[TMP2]])
+; CHECK-NEXT:    ret i32 [[TMP3]]
+;
+  %1 = insertelement <2 x double> undef, double %a, i32 0
+  %2 = insertelement <2 x double> %1, double 1.000000e+00, i32 1
+  %3 = insertelement <2 x double> undef, double %b, i32 0
+  %4 = insertelement <2 x double> %3, double 2.000000e+00, i32 1
+  %5 = tail call i32 @llvm.x86.sse2.comilt.sd(<2 x double> %2, <2 x double> %4)
+  ret i32 %5
+}
+
+define i32 @test_comineq_sd_0(double %a, double %b) {
+; CHECK-LABEL: @test_comineq_sd_0(
+; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <2 x double> undef, double %a, i32 0
+; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <2 x double> undef, double %b, i32 0
+; CHECK-NEXT:    [[TMP3:%.*]] = tail call i32 @llvm.x86.sse2.comineq.sd(<2 x double> [[TMP1]], <2 x double> [[TMP2]])
+; CHECK-NEXT:    ret i32 [[TMP3]]
+;
+  %1 = insertelement <2 x double> undef, double %a, i32 0
+  %2 = insertelement <2 x double> %1, double 1.000000e+00, i32 1
+  %3 = insertelement <2 x double> undef, double %b, i32 0
+  %4 = insertelement <2 x double> %3, double 2.000000e+00, i32 1
+  %5 = tail call i32 @llvm.x86.sse2.comineq.sd(<2 x double> %2, <2 x double> %4)
+  ret i32 %5
+}
+
+define i32 @test_ucomieq_sd_0(double %a, double %b) {
+; CHECK-LABEL: @test_ucomieq_sd_0(
+; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <2 x double> undef, double %a, i32 0
+; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <2 x double> undef, double %b, i32 0
+; CHECK-NEXT:    [[TMP3:%.*]] = tail call i32 @llvm.x86.sse2.ucomieq.sd(<2 x double> [[TMP1]], <2 x double> [[TMP2]])
+; CHECK-NEXT:    ret i32 [[TMP3]]
+;
+  %1 = insertelement <2 x double> undef, double %a, i32 0
+  %2 = insertelement <2 x double> %1, double 1.000000e+00, i32 1
+  %3 = insertelement <2 x double> undef, double %b, i32 0
+  %4 = insertelement <2 x double> %3, double 2.000000e+00, i32 1
+  %5 = tail call i32 @llvm.x86.sse2.ucomieq.sd(<2 x double> %2, <2 x double> %4)
+  ret i32 %5
+}
+
+define i32 @test_ucomige_sd_0(double %a, double %b) {
+; CHECK-LABEL: @test_ucomige_sd_0(
+; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <2 x double> undef, double %a, i32 0
+; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <2 x double> undef, double %b, i32 0
+; CHECK-NEXT:    [[TMP3:%.*]] = tail call i32 @llvm.x86.sse2.ucomige.sd(<2 x double> [[TMP1]], <2 x double> [[TMP2]])
+; CHECK-NEXT:    ret i32 [[TMP3]]
+;
+  %1 = insertelement <2 x double> undef, double %a, i32 0
+  %2 = insertelement <2 x double> %1, double 1.000000e+00, i32 1
+  %3 = insertelement <2 x double> undef, double %b, i32 0
+  %4 = insertelement <2 x double> %3, double 2.000000e+00, i32 1
+  %5 = tail call i32 @llvm.x86.sse2.ucomige.sd(<2 x double> %2, <2 x double> %4)
+  ret i32 %5
+}
+
+define i32 @test_ucomigt_sd_0(double %a, double %b) {
+; CHECK-LABEL: @test_ucomigt_sd_0(
+; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <2 x double> undef, double %a, i32 0
+; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <2 x double> undef, double %b, i32 0
+; CHECK-NEXT:    [[TMP3:%.*]] = tail call i32 @llvm.x86.sse2.ucomigt.sd(<2 x double> [[TMP1]], <2 x double> [[TMP2]])
+; CHECK-NEXT:    ret i32 [[TMP3]]
+;
+  %1 = insertelement <2 x double> undef, double %a, i32 0
+  %2 = insertelement <2 x double> %1, double 1.000000e+00, i32 1
+  %3 = insertelement <2 x double> undef, double %b, i32 0
+  %4 = insertelement <2 x double> %3, double 2.000000e+00, i32 1
+  %5 = tail call i32 @llvm.x86.sse2.ucomigt.sd(<2 x double> %2, <2 x double> %4)
+  ret i32 %5
+}
+
+define i32 @test_ucomile_sd_0(double %a, double %b) {
+; CHECK-LABEL: @test_ucomile_sd_0(
+; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <2 x double> undef, double %a, i32 0
+; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <2 x double> undef, double %b, i32 0
+; CHECK-NEXT:    [[TMP3:%.*]] = tail call i32 @llvm.x86.sse2.ucomile.sd(<2 x double> [[TMP1]], <2 x double> [[TMP2]])
+; CHECK-NEXT:    ret i32 [[TMP3]]
+;
+  %1 = insertelement <2 x double> undef, double %a, i32 0
+  %2 = insertelement <2 x double> %1, double 1.000000e+00, i32 1
+  %3 = insertelement <2 x double> undef, double %b, i32 0
+  %4 = insertelement <2 x double> %3, double 2.000000e+00, i32 1
+  %5 = tail call i32 @llvm.x86.sse2.ucomile.sd(<2 x double> %2, <2 x double> %4)
+  ret i32 %5
+}
+
+define i32 @test_ucomilt_sd_0(double %a, double %b) {
+; CHECK-LABEL: @test_ucomilt_sd_0(
+; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <2 x double> undef, double %a, i32 0
+; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <2 x double> undef, double %b, i32 0
+; CHECK-NEXT:    [[TMP3:%.*]] = tail call i32 @llvm.x86.sse2.ucomilt.sd(<2 x double> [[TMP1]], <2 x double> [[TMP2]])
+; CHECK-NEXT:    ret i32 [[TMP3]]
+;
+  %1 = insertelement <2 x double> undef, double %a, i32 0
+  %2 = insertelement <2 x double> %1, double 1.000000e+00, i32 1
+  %3 = insertelement <2 x double> undef, double %b, i32 0
+  %4 = insertelement <2 x double> %3, double 2.000000e+00, i32 1
+  %5 = tail call i32 @llvm.x86.sse2.ucomilt.sd(<2 x double> %2, <2 x double> %4)
+  ret i32 %5
+}
+
+define i32 @test_ucomineq_sd_0(double %a, double %b) {
+; CHECK-LABEL: @test_ucomineq_sd_0(
+; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <2 x double> undef, double %a, i32 0
+; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <2 x double> undef, double %b, i32 0
+; CHECK-NEXT:    [[TMP3:%.*]] = tail call i32 @llvm.x86.sse2.ucomineq.sd(<2 x double> [[TMP1]], <2 x double> [[TMP2]])
+; CHECK-NEXT:    ret i32 [[TMP3]]
+;
+  %1 = insertelement <2 x double> undef, double %a, i32 0
+  %2 = insertelement <2 x double> %1, double 1.000000e+00, i32 1
+  %3 = insertelement <2 x double> undef, double %b, i32 0
+  %4 = insertelement <2 x double> %3, double 2.000000e+00, i32 1
+  %5 = tail call i32 @llvm.x86.sse2.ucomineq.sd(<2 x double> %2, <2 x double> %4)
+  ret i32 %5
+}
+
+declare <2 x double> @llvm.x86.sse2.sqrt.sd(<2 x double>) nounwind readnone
+
+declare <2 x double> @llvm.x86.sse2.add.sd(<2 x double>, <2 x double>)
+declare <2 x double> @llvm.x86.sse2.sub.sd(<2 x double>, <2 x double>)
+declare <2 x double> @llvm.x86.sse2.mul.sd(<2 x double>, <2 x double>)
+declare <2 x double> @llvm.x86.sse2.div.sd(<2 x double>, <2 x double>)
+declare <2 x double> @llvm.x86.sse2.min.sd(<2 x double>, <2 x double>)
+declare <2 x double> @llvm.x86.sse2.max.sd(<2 x double>, <2 x double>)
+declare <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double>, <2 x double>, i8)
+
+declare i32 @llvm.x86.sse2.comieq.sd(<2 x double>, <2 x double>)
+declare i32 @llvm.x86.sse2.comige.sd(<2 x double>, <2 x double>)
+declare i32 @llvm.x86.sse2.comigt.sd(<2 x double>, <2 x double>)
+declare i32 @llvm.x86.sse2.comile.sd(<2 x double>, <2 x double>)
+declare i32 @llvm.x86.sse2.comilt.sd(<2 x double>, <2 x double>)
+declare i32 @llvm.x86.sse2.comineq.sd(<2 x double>, <2 x double>)
+
+declare i32 @llvm.x86.sse2.ucomieq.sd(<2 x double>, <2 x double>)
+declare i32 @llvm.x86.sse2.ucomige.sd(<2 x double>, <2 x double>)
+declare i32 @llvm.x86.sse2.ucomigt.sd(<2 x double>, <2 x double>)
+declare i32 @llvm.x86.sse2.ucomile.sd(<2 x double>, <2 x double>)
+declare i32 @llvm.x86.sse2.ucomilt.sd(<2 x double>, <2 x double>)
+declare i32 @llvm.x86.sse2.ucomineq.sd(<2 x double>, <2 x double>)

Added: llvm/trunk/test/Transforms/InstCombine/X86/x86-sse41.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/InstCombine/X86/x86-sse41.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/InstCombine/X86/x86-sse41.ll (added)
+++ llvm/trunk/test/Transforms/InstCombine/X86/x86-sse41.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,142 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -instcombine -S | FileCheck %s
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+
+define <2 x double> @test_round_sd(<2 x double> %a, <2 x double> %b) {
+; CHECK-LABEL: @test_round_sd(
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x double> @llvm.x86.sse41.round.sd(<2 x double> [[A:%.*]], <2 x double> [[B:%.*]], i32 10)
+; CHECK-NEXT:    ret <2 x double> [[TMP1]]
+;
+  %1 = insertelement <2 x double> %a, double 1.000000e+00, i32 0
+  %2 = insertelement <2 x double> %b, double 2.000000e+00, i32 1
+  %3 = tail call <2 x double> @llvm.x86.sse41.round.sd(<2 x double> %1, <2 x double> %2, i32 10)
+  ret <2 x double> %3
+}
+
+define <2 x double> @test_round_sd_floor(<2 x double> %a, <2 x double> %b) {
+; CHECK-LABEL: @test_round_sd_floor(
+; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <2 x double> [[B:%.*]], i64 0
+; CHECK-NEXT:    [[TMP2:%.*]] = call double @llvm.floor.f64(double [[TMP1]])
+; CHECK-NEXT:    [[TMP3:%.*]] = insertelement <2 x double> [[A:%.*]], double [[TMP2]], i64 0
+; CHECK-NEXT:    ret <2 x double> [[TMP3]]
+;
+  %1 = tail call <2 x double> @llvm.x86.sse41.round.sd(<2 x double> %a, <2 x double> %b, i32 1)
+  ret <2 x double> %1
+}
+
+define <2 x double> @test_round_sd_ceil(<2 x double> %a, <2 x double> %b) {
+; CHECK-LABEL: @test_round_sd_ceil(
+; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <2 x double> [[B:%.*]], i64 0
+; CHECK-NEXT:    [[TMP2:%.*]] = call double @llvm.ceil.f64(double [[TMP1]])
+; CHECK-NEXT:    [[TMP3:%.*]] = insertelement <2 x double> [[A:%.*]], double [[TMP2]], i64 0
+; CHECK-NEXT:    ret <2 x double> [[TMP3]]
+;
+  %1 = tail call <2 x double> @llvm.x86.sse41.round.sd(<2 x double> %a, <2 x double> %b, i32 2)
+  ret <2 x double> %1
+}
+
+define double @test_round_sd_0(double %a, double %b) {
+; CHECK-LABEL: @test_round_sd_0(
+; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <2 x double> undef, double [[B:%.*]], i32 0
+; CHECK-NEXT:    [[TMP2:%.*]] = tail call <2 x double> @llvm.x86.sse41.round.sd(<2 x double> undef, <2 x double> [[TMP1]], i32 10)
+; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <2 x double> [[TMP2]], i32 0
+; CHECK-NEXT:    ret double [[TMP3]]
+;
+  %1 = insertelement <2 x double> undef, double %a, i32 0
+  %2 = insertelement <2 x double> %1, double 1.000000e+00, i32 1
+  %3 = insertelement <2 x double> undef, double %b, i32 0
+  %4 = insertelement <2 x double> %3, double 2.000000e+00, i32 1
+  %5 = tail call <2 x double> @llvm.x86.sse41.round.sd(<2 x double> %2, <2 x double> %4, i32 10)
+  %6 = extractelement <2 x double> %5, i32 0
+  ret double %6
+}
+
+define double @test_round_sd_1(double %a, double %b) {
+; CHECK-LABEL: @test_round_sd_1(
+; CHECK-NEXT:    ret double 1.000000e+00
+;
+  %1 = insertelement <2 x double> undef, double %a, i32 0
+  %2 = insertelement <2 x double> %1, double 1.000000e+00, i32 1
+  %3 = insertelement <2 x double> undef, double %b, i32 0
+  %4 = insertelement <2 x double> %3, double 2.000000e+00, i32 1
+  %5 = tail call <2 x double> @llvm.x86.sse41.round.sd(<2 x double> %2, <2 x double> %4, i32 10)
+  %6 = extractelement <2 x double> %5, i32 1
+  ret double %6
+}
+
+define <4 x float> @test_round_ss(<4 x float> %a, <4 x float> %b) {
+; CHECK-LABEL: @test_round_ss(
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x float> @llvm.x86.sse41.round.ss(<4 x float> <float undef, float 1.000000e+00, float 2.000000e+00, float 3.000000e+00>, <4 x float> [[B:%.*]], i32 10)
+; CHECK-NEXT:    ret <4 x float> [[TMP1]]
+;
+  %1 = insertelement <4 x float> %a, float 1.000000e+00, i32 1
+  %2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2
+  %3 = insertelement <4 x float> %2, float 3.000000e+00, i32 3
+  %4 = insertelement <4 x float> %b, float 1.000000e+00, i32 1
+  %5 = insertelement <4 x float> %4, float 2.000000e+00, i32 2
+  %6 = insertelement <4 x float> %5, float 3.000000e+00, i32 3
+  %7 = tail call <4 x float> @llvm.x86.sse41.round.ss(<4 x float> %3, <4 x float> %6, i32 10)
+  ret <4 x float> %7
+}
+
+define <4 x float> @test_round_ss_floor(<4 x float> %a, <4 x float> %b) {
+; CHECK-LABEL: @test_round_ss_floor(
+; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <4 x float> [[B:%.*]], i64 0
+; CHECK-NEXT:    [[TMP2:%.*]] = call float @llvm.floor.f32(float [[TMP1]])
+; CHECK-NEXT:    [[TMP3:%.*]] = insertelement <4 x float> [[A:%.*]], float [[TMP2]], i64 0
+; CHECK-NEXT:    ret <4 x float> [[TMP3]]
+;
+  %1 = tail call <4 x float> @llvm.x86.sse41.round.ss(<4 x float> %a, <4 x float> %b, i32 1)
+  ret <4 x float> %1
+}
+
+define <4 x float> @test_round_ss_ceil(<4 x float> %a, <4 x float> %b) {
+; CHECK-LABEL: @test_round_ss_ceil(
+; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <4 x float> [[B:%.*]], i64 0
+; CHECK-NEXT:    [[TMP2:%.*]] = call float @llvm.ceil.f32(float [[TMP1]])
+; CHECK-NEXT:    [[TMP3:%.*]] = insertelement <4 x float> [[A:%.*]], float [[TMP2]], i64 0
+; CHECK-NEXT:    ret <4 x float> [[TMP3]]
+;
+  %1 = tail call <4 x float> @llvm.x86.sse41.round.ss(<4 x float> %a, <4 x float> %b, i32 2)
+  ret <4 x float> %1
+}
+
+define float @test_round_ss_0(float %a, float %b) {
+; CHECK-LABEL: @test_round_ss_0(
+; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <4 x float> undef, float [[B:%.*]], i32 0
+; CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x float> @llvm.x86.sse41.round.ss(<4 x float> undef, <4 x float> [[TMP1]], i32 10)
+; CHECK-NEXT:    [[R:%.*]] = extractelement <4 x float> [[TMP2]], i32 0
+; CHECK-NEXT:    ret float [[R]]
+;
+  %1 = insertelement <4 x float> undef, float %a, i32 0
+  %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
+  %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
+  %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
+  %5 = insertelement <4 x float> undef, float %b, i32 0
+  %6 = insertelement <4 x float> %5, float 4.000000e+00, i32 1
+  %7 = insertelement <4 x float> %6, float 5.000000e+00, i32 2
+  %8 = insertelement <4 x float> %7, float 6.000000e+00, i32 3
+  %9 = tail call <4 x float> @llvm.x86.sse41.round.ss(<4 x float> %4, <4 x float> %8, i32 10)
+  %r = extractelement <4 x float> %9, i32 0
+  ret float %r
+}
+
+define float @test_round_ss_2(float %a, float %b) {
+; CHECK-LABEL: @test_round_ss_2(
+; CHECK-NEXT:    ret float 2.000000e+00
+;
+  %1 = insertelement <4 x float> undef, float %a, i32 0
+  %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
+  %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
+  %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
+  %5 = insertelement <4 x float> undef, float %b, i32 0
+  %6 = insertelement <4 x float> %5, float 4.000000e+00, i32 1
+  %7 = insertelement <4 x float> %6, float 5.000000e+00, i32 2
+  %8 = insertelement <4 x float> %7, float 6.000000e+00, i32 3
+  %9 = tail call <4 x float> @llvm.x86.sse41.round.ss(<4 x float> %4, <4 x float> %8, i32 10)
+  %r = extractelement <4 x float> %9, i32 2
+  ret float %r
+}
+
+declare <2 x double> @llvm.x86.sse41.round.sd(<2 x double>, <2 x double>, i32) nounwind readnone
+declare <4 x float> @llvm.x86.sse41.round.ss(<4 x float>, <4 x float>, i32) nounwind readnone

Added: llvm/trunk/test/Transforms/InstCombine/X86/x86-sse4a.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/InstCombine/X86/x86-sse4a.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/InstCombine/X86/x86-sse4a.ll (added)
+++ llvm/trunk/test/Transforms/InstCombine/X86/x86-sse4a.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,408 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+;
+; EXTRQ
+;
+
+define <2 x i64> @test_extrq_call(<2 x i64> %x, <16 x i8> %y) {
+; CHECK-LABEL: @test_extrq_call(
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x i64> @llvm.x86.sse4a.extrq(<2 x i64> [[X:%.*]], <16 x i8> [[Y:%.*]]) #1
+; CHECK-NEXT:    ret <2 x i64> [[TMP1]]
+;
+  %1 = tail call <2 x i64> @llvm.x86.sse4a.extrq(<2 x i64> %x, <16 x i8> %y) nounwind
+  ret <2 x i64> %1
+}
+
+define <2 x i64> @test_extrq_zero_arg0(<2 x i64> %x, <16 x i8> %y) {
+; CHECK-LABEL: @test_extrq_zero_arg0(
+; CHECK-NEXT:    ret <2 x i64> <i64 0, i64 undef>
+;
+  %1 = tail call <2 x i64> @llvm.x86.sse4a.extrq(<2 x i64> zeroinitializer, <16 x i8> %y) nounwind
+  ret <2 x i64> %1
+}
+
+define <2 x i64> @test_extrq_zero_arg1(<2 x i64> %x, <16 x i8> %y) {
+; CHECK-LABEL: @test_extrq_zero_arg1(
+; CHECK-NEXT:    ret <2 x i64> [[X:%.*]]
+;
+  %1 = tail call <2 x i64> @llvm.x86.sse4a.extrq(<2 x i64> %x, <16 x i8> zeroinitializer) nounwind
+  ret <2 x i64> %1
+}
+
+define <2 x i64> @test_extrq_to_extqi(<2 x i64> %x, <16 x i8> %y) {
+; CHECK-LABEL: @test_extrq_to_extqi(
+; CHECK-NEXT:    [[TMP1:%.*]] = call <2 x i64> @llvm.x86.sse4a.extrqi(<2 x i64> [[X:%.*]], i8 8, i8 15)
+; CHECK-NEXT:    ret <2 x i64> [[TMP1]]
+;
+  %1 = tail call <2 x i64> @llvm.x86.sse4a.extrq(<2 x i64> %x, <16 x i8> <i8 8, i8 15, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>) nounwind
+  ret <2 x i64> %1
+}
+
+define <2 x i64> @test_extrq_constant(<2 x i64> %x, <16 x i8> %y) {
+; CHECK-LABEL: @test_extrq_constant(
+; CHECK-NEXT:    ret <2 x i64> <i64 255, i64 undef>
+;
+  %1 = tail call <2 x i64> @llvm.x86.sse4a.extrq(<2 x i64> <i64 -1, i64 55>, <16 x i8> <i8 8, i8 15, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>) nounwind
+  ret <2 x i64> %1
+}
+
+define <2 x i64> @test_extrq_constant_undef(<2 x i64> %x, <16 x i8> %y) {
+; CHECK-LABEL: @test_extrq_constant_undef(
+; CHECK-NEXT:    ret <2 x i64> <i64 65535, i64 undef>
+;
+  %1 = tail call <2 x i64> @llvm.x86.sse4a.extrq(<2 x i64> <i64 -1, i64 undef>, <16 x i8> <i8 16, i8 15, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>) nounwind
+  ret <2 x i64> %1
+}
+
+define <2 x i64> @test_extrq_call_constexpr(<2 x i64> %x) {
+; CHECK-LABEL: @test_extrq_call_constexpr(
+; CHECK-NEXT:    ret <2 x i64> [[X:%.*]]
+;
+  %1 = call <2 x i64> @llvm.x86.sse4a.extrq(<2 x i64> %x, <16 x i8> bitcast (<2 x i64> <i64 0, i64 undef> to <16 x i8>))
+  ret <2 x i64> %1
+}
+
+;
+; EXTRQI
+;
+
+define <2 x i64> @test_extrqi_call(<2 x i64> %x) {
+; CHECK-LABEL: @test_extrqi_call(
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x i64> @llvm.x86.sse4a.extrqi(<2 x i64> [[X:%.*]], i8 8, i8 23)
+; CHECK-NEXT:    ret <2 x i64> [[TMP1]]
+;
+  %1 = tail call <2 x i64> @llvm.x86.sse4a.extrqi(<2 x i64> %x, i8 8, i8 23)
+  ret <2 x i64> %1
+}
+
+define <2 x i64> @test_extrqi_shuffle_1zuu(<2 x i64> %x) {
+; CHECK-LABEL: @test_extrqi_shuffle_1zuu(
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <2 x i64> [[X:%.*]] to <16 x i8>
+; CHECK-NEXT:    [[TMP2:%.*]] = shufflevector <16 x i8> [[TMP1]], <16 x i8> <i8 undef, i8 undef, i8 undef, i8 undef, i8 0, i8 0, i8 0, i8 0, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef>, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 20, i32 21, i32 22, i32 23, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+; CHECK-NEXT:    [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to <2 x i64>
+; CHECK-NEXT:    ret <2 x i64> [[TMP3]]
+;
+  %1 = tail call <2 x i64> @llvm.x86.sse4a.extrqi(<2 x i64> %x, i8 32, i8 32)
+  ret <2 x i64> %1
+}
+
+define <2 x i64> @test_extrqi_shuffle_2zzzzzzzuuuuuuuu(<2 x i64> %x) {
+; CHECK-LABEL: @test_extrqi_shuffle_2zzzzzzzuuuuuuuu(
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <2 x i64> [[X:%.*]] to <16 x i8>
+; CHECK-NEXT:    [[TMP2:%.*]] = shufflevector <16 x i8> [[TMP1]], <16 x i8> <i8 undef, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef>, <16 x i32> <i32 2, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+; CHECK-NEXT:    [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to <2 x i64>
+; CHECK-NEXT:    ret <2 x i64> [[TMP3]]
+;
+  %1 = tail call <2 x i64> @llvm.x86.sse4a.extrqi(<2 x i64> %x, i8 8, i8 16)
+  ret <2 x i64> %1
+}
+
+define <2 x i64> @test_extrqi_undef(<2 x i64> %x) {
+; CHECK-LABEL: @test_extrqi_undef(
+; CHECK-NEXT:    ret <2 x i64> undef
+;
+  %1 = tail call <2 x i64> @llvm.x86.sse4a.extrqi(<2 x i64> zeroinitializer, i8 32, i8 33)
+  ret <2 x i64> %1
+}
+
+define <2 x i64> @test_extrqi_zero(<2 x i64> %x) {
+; CHECK-LABEL: @test_extrqi_zero(
+; CHECK-NEXT:    ret <2 x i64> <i64 0, i64 undef>
+;
+  %1 = tail call <2 x i64> @llvm.x86.sse4a.extrqi(<2 x i64> zeroinitializer, i8 3, i8 18)
+  ret <2 x i64> %1
+}
+
+define <2 x i64> @test_extrqi_constant(<2 x i64> %x) {
+; CHECK-LABEL: @test_extrqi_constant(
+; CHECK-NEXT:    ret <2 x i64> <i64 7, i64 undef>
+;
+  %1 = tail call <2 x i64> @llvm.x86.sse4a.extrqi(<2 x i64> <i64 -1, i64 55>, i8 3, i8 18)
+  ret <2 x i64> %1
+}
+
+define <2 x i64> @test_extrqi_constant_undef(<2 x i64> %x) {
+; CHECK-LABEL: @test_extrqi_constant_undef(
+; CHECK-NEXT:    ret <2 x i64> <i64 15, i64 undef>
+;
+  %1 = tail call <2 x i64> @llvm.x86.sse4a.extrqi(<2 x i64> <i64 -1, i64 undef>, i8 4, i8 18)
+  ret <2 x i64> %1
+}
+
+define <2 x i64> @test_extrqi_call_constexpr() {
+; CHECK-LABEL: @test_extrqi_call_constexpr(
+; CHECK-NEXT:    ret <2 x i64> zeroinitializer
+;
+  %1 = tail call <2 x i64> @llvm.x86.sse4a.extrqi(<2 x i64> bitcast (<16 x i8> trunc (<16 x i16> bitcast (<4 x i64> <i64 0, i64 undef, i64 2, i64 undef> to <16 x i16>) to <16 x i8>) to <2 x i64>), i8 8, i8 16)
+  ret <2 x i64> %1
+}
+
+;
+; INSERTQ
+;
+
+define <2 x i64> @test_insertq_call(<2 x i64> %x, <2 x i64> %y) {
+; CHECK-LABEL: @test_insertq_call(
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x i64> @llvm.x86.sse4a.insertq(<2 x i64> [[X:%.*]], <2 x i64> [[Y:%.*]]) #1
+; CHECK-NEXT:    ret <2 x i64> [[TMP1]]
+;
+  %1 = tail call <2 x i64> @llvm.x86.sse4a.insertq(<2 x i64> %x, <2 x i64> %y) nounwind
+  ret <2 x i64> %1
+}
+
+define <2 x i64> @test_insertq_to_insertqi(<2 x i64> %x, <2 x i64> %y) {
+; CHECK-LABEL: @test_insertq_to_insertqi(
+; CHECK-NEXT:    [[TMP1:%.*]] = call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> [[X:%.*]], <2 x i64> <i64 8, i64 undef>, i8 18, i8 2)
+; CHECK-NEXT:    ret <2 x i64> [[TMP1]]
+;
+  %1 = tail call <2 x i64> @llvm.x86.sse4a.insertq(<2 x i64> %x, <2 x i64> <i64 8, i64 658>) nounwind
+  ret <2 x i64> %1
+}
+
+define <2 x i64> @test_insertq_constant(<2 x i64> %x, <2 x i64> %y) {
+; CHECK-LABEL: @test_insertq_constant(
+; CHECK-NEXT:    ret <2 x i64> <i64 32, i64 undef>
+;
+  %1 = tail call <2 x i64> @llvm.x86.sse4a.insertq(<2 x i64> <i64 0, i64 0>, <2 x i64> <i64 8, i64 658>) nounwind
+  ret <2 x i64> %1
+}
+
+define <2 x i64> @test_insertq_constant_undef(<2 x i64> %x, <2 x i64> %y) {
+; CHECK-LABEL: @test_insertq_constant_undef(
+; CHECK-NEXT:    ret <2 x i64> <i64 33, i64 undef>
+;
+  %1 = tail call <2 x i64> @llvm.x86.sse4a.insertq(<2 x i64> <i64 1, i64 undef>, <2 x i64> <i64 8, i64 658>) nounwind
+  ret <2 x i64> %1
+}
+
+define <2 x i64> @test_insertq_call_constexpr(<2 x i64> %x) {
+; CHECK-LABEL: @test_insertq_call_constexpr(
+; CHECK-NEXT:    [[TMP1:%.*]] = call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> [[X:%.*]], <2 x i64> <i64 0, i64 undef>, i8 2, i8 0)
+; CHECK-NEXT:    ret <2 x i64> [[TMP1]]
+;
+  %1 = tail call <2 x i64> @llvm.x86.sse4a.insertq(<2 x i64> %x, <2 x i64> bitcast (<16 x i8> trunc (<16 x i16> bitcast (<4 x i64> <i64 0, i64 undef, i64 2, i64 undef> to <16 x i16>) to <16 x i8>) to <2 x i64>))
+  ret <2 x i64> %1
+}
+
+;
+; INSERTQI
+;
+
+define <16 x i8> @test_insertqi_shuffle_04uu(<16 x i8> %v, <16 x i8> %i) {
+; CHECK-LABEL: @test_insertqi_shuffle_04uu(
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <16 x i8> [[V:%.*]], <16 x i8> [[I:%.*]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 16, i32 17, i32 18, i32 19, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+; CHECK-NEXT:    ret <16 x i8> [[TMP1]]
+;
+  %1 = bitcast <16 x i8> %v to <2 x i64>
+  %2 = bitcast <16 x i8> %i to <2 x i64>
+  %3 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %1, <2 x i64> %2, i8 32, i8 32)
+  %4 = bitcast <2 x i64> %3 to <16 x i8>
+  ret <16 x i8> %4
+}
+
+define <16 x i8> @test_insertqi_shuffle_8123uuuu(<16 x i8> %v, <16 x i8> %i) {
+; CHECK-LABEL: @test_insertqi_shuffle_8123uuuu(
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <16 x i8> [[I:%.*]], <16 x i8> [[V:%.*]], <16 x i32> <i32 0, i32 1, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+; CHECK-NEXT:    ret <16 x i8> [[TMP1]]
+;
+  %1 = bitcast <16 x i8> %v to <2 x i64>
+  %2 = bitcast <16 x i8> %i to <2 x i64>
+  %3 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %1, <2 x i64> %2, i8 16, i8 0)
+  %4 = bitcast <2 x i64> %3 to <16 x i8>
+  ret <16 x i8> %4
+}
+
+define <2 x i64> @test_insertqi_constant(<2 x i64> %v, <2 x i64> %i) {
+; CHECK-LABEL: @test_insertqi_constant(
+; CHECK-NEXT:    ret <2 x i64> <i64 -131055, i64 undef>
+;
+  %1 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> <i64 -1, i64 -1>, <2 x i64> <i64 8, i64 0>, i8 16, i8 1)
+  ret <2 x i64> %1
+}
+
+define <2 x i64> @test_insertqi_call_constexpr(<2 x i64> %x) {
+; CHECK-LABEL: @test_insertqi_call_constexpr(
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> [[X:%.*]], <2 x i64> <i64 0, i64 undef>, i8 48, i8 3)
+; CHECK-NEXT:    ret <2 x i64> [[TMP1]]
+;
+  %1 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %x, <2 x i64> bitcast (<16 x i8> trunc (<16 x i16> bitcast (<4 x i64> <i64 0, i64 undef, i64 2, i64 undef> to <16 x i16>) to <16 x i8>) to <2 x i64>), i8 48, i8 3)
+  ret <2 x i64> %1
+}
+
+; The result of this insert is the second arg, since the top 64 bits of
+; the result are undefined, and we copy the bottom 64 bits from the
+; second arg
+define <2 x i64> @testInsert64Bits(<2 x i64> %v, <2 x i64> %i) {
+; CHECK-LABEL: @testInsert64Bits(
+; CHECK-NEXT:    ret <2 x i64> [[I:%.*]]
+;
+  %1 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %v, <2 x i64> %i, i8 64, i8 0)
+  ret <2 x i64> %1
+}
+
+define <2 x i64> @testZeroLength(<2 x i64> %v, <2 x i64> %i) {
+; CHECK-LABEL: @testZeroLength(
+; CHECK-NEXT:    ret <2 x i64> [[I:%.*]]
+;
+  %1 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %v, <2 x i64> %i, i8 0, i8 0)
+  ret <2 x i64> %1
+}
+
+define <2 x i64> @testUndefinedInsertq_1(<2 x i64> %v, <2 x i64> %i) {
+; CHECK-LABEL: @testUndefinedInsertq_1(
+; CHECK-NEXT:    ret <2 x i64> undef
+;
+  %1 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %v, <2 x i64> %i, i8 0, i8 16)
+  ret <2 x i64> %1
+}
+
+define <2 x i64> @testUndefinedInsertq_2(<2 x i64> %v, <2 x i64> %i) {
+; CHECK-LABEL: @testUndefinedInsertq_2(
+; CHECK-NEXT:    ret <2 x i64> undef
+;
+  %1 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %v, <2 x i64> %i, i8 48, i8 32)
+  ret <2 x i64> %1
+}
+
+define <2 x i64> @testUndefinedInsertq_3(<2 x i64> %v, <2 x i64> %i) {
+; CHECK-LABEL: @testUndefinedInsertq_3(
+; CHECK-NEXT:    ret <2 x i64> undef
+;
+  %1 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %v, <2 x i64> %i, i8 64, i8 16)
+  ret <2 x i64> %1
+}
+
+;
+; Vector Demanded Bits
+;
+
+define <2 x i64> @test_extrq_arg0(<2 x i64> %x, <16 x i8> %y) {
+; CHECK-LABEL: @test_extrq_arg0(
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x i64> @llvm.x86.sse4a.extrq(<2 x i64> [[X:%.*]], <16 x i8> [[Y:%.*]]) #1
+; CHECK-NEXT:    ret <2 x i64> [[TMP1]]
+;
+  %1 = shufflevector <2 x i64> %x, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
+  %2 = tail call <2 x i64> @llvm.x86.sse4a.extrq(<2 x i64> %1, <16 x i8> %y) nounwind
+  ret <2 x i64> %2
+}
+
+define <2 x i64> @test_extrq_arg1(<2 x i64> %x, <16 x i8> %y) {
+; CHECK-LABEL: @test_extrq_arg1(
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x i64> @llvm.x86.sse4a.extrq(<2 x i64> [[X:%.*]], <16 x i8> [[Y:%.*]]) #1
+; CHECK-NEXT:    ret <2 x i64> [[TMP1]]
+;
+  %1 = shufflevector <16 x i8> %y, <16 x i8> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+  %2 = tail call <2 x i64> @llvm.x86.sse4a.extrq(<2 x i64> %x, <16 x i8> %1) nounwind
+  ret <2 x i64> %2
+}
+
+define <2 x i64> @test_extrq_args01(<2 x i64> %x, <16 x i8> %y) {
+; CHECK-LABEL: @test_extrq_args01(
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x i64> @llvm.x86.sse4a.extrq(<2 x i64> [[X:%.*]], <16 x i8> [[Y:%.*]]) #1
+; CHECK-NEXT:    ret <2 x i64> [[TMP1]]
+;
+  %1 = shufflevector <2 x i64> %x, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
+  %2 = shufflevector <16 x i8> %y, <16 x i8> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+  %3 = tail call <2 x i64> @llvm.x86.sse4a.extrq(<2 x i64> %1, <16 x i8> %2) nounwind
+  ret <2 x i64> %3
+}
+
+define <2 x i64> @test_extrq_ret(<2 x i64> %x, <16 x i8> %y) {
+; CHECK-LABEL: @test_extrq_ret(
+; CHECK-NEXT:    ret <2 x i64> undef
+;
+  %1 = tail call <2 x i64> @llvm.x86.sse4a.extrq(<2 x i64> %x, <16 x i8> %y) nounwind
+  %2 = shufflevector <2 x i64> %1, <2 x i64> undef, <2 x i32> <i32 1, i32 1>
+  ret <2 x i64> %2
+}
+
+define <2 x i64> @test_extrqi_arg0(<2 x i64> %x) {
+; CHECK-LABEL: @test_extrqi_arg0(
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x i64> @llvm.x86.sse4a.extrqi(<2 x i64> [[X:%.*]], i8 3, i8 2)
+; CHECK-NEXT:    ret <2 x i64> [[TMP1]]
+;
+  %1 = shufflevector <2 x i64> %x, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
+  %2 = tail call <2 x i64> @llvm.x86.sse4a.extrqi(<2 x i64> %1, i8 3, i8 2)
+  ret <2 x i64> %2
+}
+
+define <2 x i64> @test_extrqi_ret(<2 x i64> %x) {
+; CHECK-LABEL: @test_extrqi_ret(
+; CHECK-NEXT:    ret <2 x i64> undef
+;
+  %1 = tail call <2 x i64> @llvm.x86.sse4a.extrqi(<2 x i64> %x, i8 3, i8 2) nounwind
+  %2 = shufflevector <2 x i64> %1, <2 x i64> undef, <2 x i32> <i32 1, i32 1>
+  ret <2 x i64> %2
+}
+
+define <2 x i64> @test_insertq_arg0(<2 x i64> %x, <2 x i64> %y) {
+; CHECK-LABEL: @test_insertq_arg0(
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x i64> @llvm.x86.sse4a.insertq(<2 x i64> [[X:%.*]], <2 x i64> [[Y:%.*]]) #1
+; CHECK-NEXT:    ret <2 x i64> [[TMP1]]
+;
+  %1 = shufflevector <2 x i64> %x, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
+  %2 = tail call <2 x i64> @llvm.x86.sse4a.insertq(<2 x i64> %1, <2 x i64> %y) nounwind
+  ret <2 x i64> %2
+}
+
+define <2 x i64> @test_insertq_ret(<2 x i64> %x, <2 x i64> %y) {
+; CHECK-LABEL: @test_insertq_ret(
+; CHECK-NEXT:    ret <2 x i64> undef
+;
+  %1 = tail call <2 x i64> @llvm.x86.sse4a.insertq(<2 x i64> %x, <2 x i64> %y) nounwind
+  %2 = shufflevector <2 x i64> %1, <2 x i64> undef, <2 x i32> <i32 1, i32 1>
+  ret <2 x i64> %2
+}
+
+define <2 x i64> @test_insertqi_arg0(<2 x i64> %x, <2 x i64> %y) {
+; CHECK-LABEL: @test_insertqi_arg0(
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> [[X:%.*]], <2 x i64> [[Y:%.*]], i8 3, i8 2) #1
+; CHECK-NEXT:    ret <2 x i64> [[TMP1]]
+;
+  %1 = shufflevector <2 x i64> %x, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
+  %2 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %1, <2 x i64> %y, i8 3, i8 2) nounwind
+  ret <2 x i64> %2
+}
+
+define <2 x i64> @test_insertqi_arg1(<2 x i64> %x, <2 x i64> %y) {
+; CHECK-LABEL: @test_insertqi_arg1(
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> [[X:%.*]], <2 x i64> [[Y:%.*]], i8 3, i8 2) #1
+; CHECK-NEXT:    ret <2 x i64> [[TMP1]]
+;
+  %1 = shufflevector <2 x i64> %y, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
+  %2 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %x, <2 x i64> %1, i8 3, i8 2) nounwind
+  ret <2 x i64> %2
+}
+
+define <2 x i64> @test_insertqi_args01(<2 x i64> %x, <2 x i64> %y) {
+; CHECK-LABEL: @test_insertqi_args01(
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> [[X:%.*]], <2 x i64> [[Y:%.*]], i8 3, i8 2) #1
+; CHECK-NEXT:    ret <2 x i64> [[TMP1]]
+;
+  %1 = shufflevector <2 x i64> %x, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
+  %2 = shufflevector <2 x i64> %y, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
+  %3 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %1, <2 x i64> %2, i8 3, i8 2) nounwind
+  ret <2 x i64> %3
+}
+
+define <2 x i64> @test_insertqi_ret(<2 x i64> %x, <2 x i64> %y) {
+; CHECK-LABEL: @test_insertqi_ret(
+; CHECK-NEXT:    ret <2 x i64> undef
+;
+  %1 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %x, <2 x i64> %y, i8 3, i8 2) nounwind
+  %2 = shufflevector <2 x i64> %1, <2 x i64> undef, <2 x i32> <i32 1, i32 1>
+  ret <2 x i64> %2
+}
+
+; CHECK: declare <2 x i64> @llvm.x86.sse4a.extrq
+declare <2 x i64> @llvm.x86.sse4a.extrq(<2 x i64>, <16 x i8>) nounwind
+
+; CHECK: declare <2 x i64> @llvm.x86.sse4a.extrqi
+declare <2 x i64> @llvm.x86.sse4a.extrqi(<2 x i64>, i8, i8) nounwind
+
+; CHECK: declare <2 x i64> @llvm.x86.sse4a.insertq
+declare <2 x i64> @llvm.x86.sse4a.insertq(<2 x i64>, <2 x i64>) nounwind
+
+; CHECK: declare <2 x i64> @llvm.x86.sse4a.insertqi
+declare <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64>, <2 x i64>, i8, i8) nounwind

Added: llvm/trunk/test/Transforms/InstCombine/X86/x86-vec_demanded_elts.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/InstCombine/X86/x86-vec_demanded_elts.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/InstCombine/X86/x86-vec_demanded_elts.ll (added)
+++ llvm/trunk/test/Transforms/InstCombine/X86/x86-vec_demanded_elts.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,110 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+
+define i16 @test1(float %f) {
+; CHECK-LABEL: @test1(
+; CHECK-NEXT:    [[TMP281:%.*]] = fadd float %f, -1.000000e+00
+; CHECK-NEXT:    [[TMP373:%.*]] = fmul float [[TMP281]], 5.000000e-01
+; CHECK-NEXT:    [[TMP374:%.*]] = insertelement <4 x float> undef, float [[TMP373]], i32 0
+; CHECK-NEXT:    [[TMP48:%.*]] = tail call <4 x float> @llvm.x86.sse.min.ss(<4 x float> [[TMP374]], <4 x float> <float 6.553500e+04, float undef, float undef, float undef>)
+; CHECK-NEXT:    [[TMP59:%.*]] = tail call <4 x float> @llvm.x86.sse.max.ss(<4 x float> [[TMP48]], <4 x float> <float 0.000000e+00, float undef, float undef, float undef>)
+; CHECK-NEXT:    [[TMP_UPGRD_1:%.*]] = tail call i32 @llvm.x86.sse.cvttss2si(<4 x float> [[TMP59]])
+; CHECK-NEXT:    [[TMP69:%.*]] = trunc i32 [[TMP_UPGRD_1]] to i16
+; CHECK-NEXT:    ret i16 [[TMP69]]
+;
+  %tmp = insertelement <4 x float> undef, float %f, i32 0
+  %tmp10 = insertelement <4 x float> %tmp, float 0.000000e+00, i32 1
+  %tmp11 = insertelement <4 x float> %tmp10, float 0.000000e+00, i32 2
+  %tmp12 = insertelement <4 x float> %tmp11, float 0.000000e+00, i32 3
+  %tmp28 = tail call <4 x float> @llvm.x86.sse.sub.ss( <4 x float> %tmp12, <4 x float> < float 1.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00 > )
+  %tmp37 = tail call <4 x float> @llvm.x86.sse.mul.ss( <4 x float> %tmp28, <4 x float> < float 5.000000e-01, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00 > )
+  %tmp48 = tail call <4 x float> @llvm.x86.sse.min.ss( <4 x float> %tmp37, <4 x float> < float 6.553500e+04, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00 > )
+  %tmp59 = tail call <4 x float> @llvm.x86.sse.max.ss( <4 x float> %tmp48, <4 x float> zeroinitializer )
+  %tmp.upgrd.1 = tail call i32 @llvm.x86.sse.cvttss2si( <4 x float> %tmp59 )
+  %tmp69 = trunc i32 %tmp.upgrd.1 to i16
+  ret i16 %tmp69
+}
+
+define i64 @test3(float %f, double %d) {
+; CHECK-LABEL: @test3(
+; CHECK-NEXT:    [[V00:%.*]] = insertelement <4 x float> undef, float %f, i32 0
+; CHECK-NEXT:    [[TMP0:%.*]] = tail call i32 @llvm.x86.sse.cvtss2si(<4 x float> [[V00]])
+; CHECK-NEXT:    [[V10:%.*]] = insertelement <4 x float> undef, float %f, i32 0
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.x86.sse.cvtss2si64(<4 x float> [[V10]])
+; CHECK-NEXT:    [[V20:%.*]] = insertelement <4 x float> undef, float %f, i32 0
+; CHECK-NEXT:    [[TMP2:%.*]] = tail call i32 @llvm.x86.sse.cvttss2si(<4 x float> [[V20]])
+; CHECK-NEXT:    [[V30:%.*]] = insertelement <4 x float> undef, float %f, i32 0
+; CHECK-NEXT:    [[TMP3:%.*]] = tail call i64 @llvm.x86.sse.cvttss2si64(<4 x float> [[V30]])
+; CHECK-NEXT:    [[V40:%.*]] = insertelement <2 x double> undef, double %d, i32 0
+; CHECK-NEXT:    [[TMP4:%.*]] = tail call i32 @llvm.x86.sse2.cvtsd2si(<2 x double> [[V40]])
+; CHECK-NEXT:    [[V50:%.*]] = insertelement <2 x double> undef, double %d, i32 0
+; CHECK-NEXT:    [[TMP5:%.*]] = tail call i64 @llvm.x86.sse2.cvtsd2si64(<2 x double> [[V50]])
+; CHECK-NEXT:    [[V60:%.*]] = insertelement <2 x double> undef, double %d, i32 0
+; CHECK-NEXT:    [[TMP6:%.*]] = tail call i32 @llvm.x86.sse2.cvttsd2si(<2 x double> [[V60]])
+; CHECK-NEXT:    [[V70:%.*]] = insertelement <2 x double> undef, double %d, i32 0
+; CHECK-NEXT:    [[TMP7:%.*]] = tail call i64 @llvm.x86.sse2.cvttsd2si64(<2 x double> [[V70]])
+; CHECK-NEXT:    [[TMP8:%.*]] = add i32 [[TMP0]], [[TMP2]]
+; CHECK-NEXT:    [[TMP9:%.*]] = add i32 [[TMP4]], [[TMP6]]
+; CHECK-NEXT:    [[TMP10:%.*]] = add i32 [[TMP8]], [[TMP9]]
+; CHECK-NEXT:    [[TMP11:%.*]] = sext i32 [[TMP10]] to i64
+; CHECK-NEXT:    [[TMP12:%.*]] = add i64 [[TMP1]], [[TMP3]]
+; CHECK-NEXT:    [[TMP13:%.*]] = add i64 [[TMP5]], [[TMP7]]
+; CHECK-NEXT:    [[TMP14:%.*]] = add i64 [[TMP12]], [[TMP13]]
+; CHECK-NEXT:    [[TMP15:%.*]] = add i64 [[TMP14]], [[TMP11]]
+; CHECK-NEXT:    ret i64 [[TMP15]]
+;
+  %v00 = insertelement <4 x float> undef, float %f, i32 0
+  %v01 = insertelement <4 x float> %v00, float 0.000000e+00, i32 1
+  %v02 = insertelement <4 x float> %v01, float 0.000000e+00, i32 2
+  %v03 = insertelement <4 x float> %v02, float 0.000000e+00, i32 3
+  %tmp0 = tail call i32 @llvm.x86.sse.cvtss2si(<4 x float> %v03)
+  %v10 = insertelement <4 x float> undef, float %f, i32 0
+  %v11 = insertelement <4 x float> %v10, float 0.000000e+00, i32 1
+  %v12 = insertelement <4 x float> %v11, float 0.000000e+00, i32 2
+  %v13 = insertelement <4 x float> %v12, float 0.000000e+00, i32 3
+  %tmp1 = tail call i64 @llvm.x86.sse.cvtss2si64(<4 x float> %v13)
+  %v20 = insertelement <4 x float> undef, float %f, i32 0
+  %v21 = insertelement <4 x float> %v20, float 0.000000e+00, i32 1
+  %v22 = insertelement <4 x float> %v21, float 0.000000e+00, i32 2
+  %v23 = insertelement <4 x float> %v22, float 0.000000e+00, i32 3
+  %tmp2 = tail call i32 @llvm.x86.sse.cvttss2si(<4 x float> %v23)
+  %v30 = insertelement <4 x float> undef, float %f, i32 0
+  %v31 = insertelement <4 x float> %v30, float 0.000000e+00, i32 1
+  %v32 = insertelement <4 x float> %v31, float 0.000000e+00, i32 2
+  %v33 = insertelement <4 x float> %v32, float 0.000000e+00, i32 3
+  %tmp3 = tail call i64 @llvm.x86.sse.cvttss2si64(<4 x float> %v33)
+  %v40 = insertelement <2 x double> undef, double %d, i32 0
+  %v41 = insertelement <2 x double> %v40, double 0.000000e+00, i32 1
+  %tmp4 = tail call i32 @llvm.x86.sse2.cvtsd2si(<2 x double> %v41)
+  %v50 = insertelement <2 x double> undef, double %d, i32 0
+  %v51 = insertelement <2 x double> %v50, double 0.000000e+00, i32 1
+  %tmp5 = tail call i64 @llvm.x86.sse2.cvtsd2si64(<2 x double> %v51)
+  %v60 = insertelement <2 x double> undef, double %d, i32 0
+  %v61 = insertelement <2 x double> %v60, double 0.000000e+00, i32 1
+  %tmp6 = tail call i32 @llvm.x86.sse2.cvttsd2si(<2 x double> %v61)
+  %v70 = insertelement <2 x double> undef, double %d, i32 0
+  %v71 = insertelement <2 x double> %v70, double 0.000000e+00, i32 1
+  %tmp7 = tail call i64 @llvm.x86.sse2.cvttsd2si64(<2 x double> %v71)
+  %tmp8 = add i32 %tmp0, %tmp2
+  %tmp9 = add i32 %tmp4, %tmp6
+  %tmp10 = add i32 %tmp8, %tmp9
+  %tmp11 = sext i32 %tmp10 to i64
+  %tmp12 = add i64 %tmp1, %tmp3
+  %tmp13 = add i64 %tmp5, %tmp7
+  %tmp14 = add i64 %tmp12, %tmp13
+  %tmp15 = add i64 %tmp11, %tmp14
+  ret i64 %tmp15
+}
+
+declare <4 x float> @llvm.x86.sse.sub.ss(<4 x float>, <4 x float>)
+declare <4 x float> @llvm.x86.sse.mul.ss(<4 x float>, <4 x float>)
+declare <4 x float> @llvm.x86.sse.min.ss(<4 x float>, <4 x float>)
+declare <4 x float> @llvm.x86.sse.max.ss(<4 x float>, <4 x float>)
+declare i32 @llvm.x86.sse.cvtss2si(<4 x float>)
+declare i64 @llvm.x86.sse.cvtss2si64(<4 x float>)
+declare i32 @llvm.x86.sse.cvttss2si(<4 x float>)
+declare i64 @llvm.x86.sse.cvttss2si64(<4 x float>)
+declare i32 @llvm.x86.sse2.cvtsd2si(<2 x double>)
+declare i64 @llvm.x86.sse2.cvtsd2si64(<2 x double>)
+declare i32 @llvm.x86.sse2.cvttsd2si(<2 x double>)
+declare i64 @llvm.x86.sse2.cvttsd2si64(<2 x double>)

Added: llvm/trunk/test/Transforms/InstCombine/X86/x86-vector-shifts.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/InstCombine/X86/x86-vector-shifts.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/InstCombine/X86/x86-vector-shifts.ll (added)
+++ llvm/trunk/test/Transforms/InstCombine/X86/x86-vector-shifts.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,3436 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -instcombine -S | FileCheck %s
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+
+;
+; ASHR - Immediate
+;
+
+define <8 x i16> @sse2_psrai_w_0(<8 x i16> %v) {
+; CHECK-LABEL: @sse2_psrai_w_0(
+; CHECK-NEXT:    ret <8 x i16> %v
+;
+  %1 = tail call <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16> %v, i32 0)
+  ret <8 x i16> %1
+}
+
+define <8 x i16> @sse2_psrai_w_15(<8 x i16> %v) {
+; CHECK-LABEL: @sse2_psrai_w_15(
+; CHECK-NEXT:    [[TMP1:%.*]] = ashr <8 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
+; CHECK-NEXT:    ret <8 x i16> [[TMP1]]
+;
+  %1 = tail call <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16> %v, i32 15)
+  ret <8 x i16> %1
+}
+
+define <8 x i16> @sse2_psrai_w_64(<8 x i16> %v) {
+; CHECK-LABEL: @sse2_psrai_w_64(
+; CHECK-NEXT:    [[TMP1:%.*]] = ashr <8 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
+; CHECK-NEXT:    ret <8 x i16> [[TMP1]]
+;
+  %1 = tail call <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16> %v, i32 64)
+  ret <8 x i16> %1
+}
+
+define <4 x i32> @sse2_psrai_d_0(<4 x i32> %v) {
+; CHECK-LABEL: @sse2_psrai_d_0(
+; CHECK-NEXT:    ret <4 x i32> %v
+;
+  %1 = tail call <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32> %v, i32 0)
+  ret <4 x i32> %1
+}
+
+define <4 x i32> @sse2_psrai_d_15(<4 x i32> %v) {
+; CHECK-LABEL: @sse2_psrai_d_15(
+; CHECK-NEXT:    [[TMP1:%.*]] = ashr <4 x i32> %v, <i32 15, i32 15, i32 15, i32 15>
+; CHECK-NEXT:    ret <4 x i32> [[TMP1]]
+;
+  %1 = tail call <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32> %v, i32 15)
+  ret <4 x i32> %1
+}
+
+define <4 x i32> @sse2_psrai_d_64(<4 x i32> %v) {
+; CHECK-LABEL: @sse2_psrai_d_64(
+; CHECK-NEXT:    [[TMP1:%.*]] = ashr <4 x i32> %v, <i32 31, i32 31, i32 31, i32 31>
+; CHECK-NEXT:    ret <4 x i32> [[TMP1]]
+;
+  %1 = tail call <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32> %v, i32 64)
+  ret <4 x i32> %1
+}
+
+define <16 x i16> @avx2_psrai_w_0(<16 x i16> %v) {
+; CHECK-LABEL: @avx2_psrai_w_0(
+; CHECK-NEXT:    ret <16 x i16> %v
+;
+  %1 = tail call <16 x i16> @llvm.x86.avx2.psrai.w(<16 x i16> %v, i32 0)
+  ret <16 x i16> %1
+}
+
+define <16 x i16> @avx2_psrai_w_15(<16 x i16> %v) {
+; CHECK-LABEL: @avx2_psrai_w_15(
+; CHECK-NEXT:    [[TMP1:%.*]] = ashr <16 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
+; CHECK-NEXT:    ret <16 x i16> [[TMP1]]
+;
+  %1 = tail call <16 x i16> @llvm.x86.avx2.psrai.w(<16 x i16> %v, i32 15)
+  ret <16 x i16> %1
+}
+
+define <16 x i16> @avx2_psrai_w_64(<16 x i16> %v) {
+; CHECK-LABEL: @avx2_psrai_w_64(
+; CHECK-NEXT:    [[TMP1:%.*]] = ashr <16 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
+; CHECK-NEXT:    ret <16 x i16> [[TMP1]]
+;
+  %1 = tail call <16 x i16> @llvm.x86.avx2.psrai.w(<16 x i16> %v, i32 64)
+  ret <16 x i16> %1
+}
+
+define <8 x i32> @avx2_psrai_d_0(<8 x i32> %v) {
+; CHECK-LABEL: @avx2_psrai_d_0(
+; CHECK-NEXT:    ret <8 x i32> %v
+;
+  %1 = tail call <8 x i32> @llvm.x86.avx2.psrai.d(<8 x i32> %v, i32 0)
+  ret <8 x i32> %1
+}
+
+define <8 x i32> @avx2_psrai_d_15(<8 x i32> %v) {
+; CHECK-LABEL: @avx2_psrai_d_15(
+; CHECK-NEXT:    [[TMP1:%.*]] = ashr <8 x i32> %v, <i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15>
+; CHECK-NEXT:    ret <8 x i32> [[TMP1]]
+;
+  %1 = tail call <8 x i32> @llvm.x86.avx2.psrai.d(<8 x i32> %v, i32 15)
+  ret <8 x i32> %1
+}
+
+define <8 x i32> @avx2_psrai_d_64(<8 x i32> %v) {
+; CHECK-LABEL: @avx2_psrai_d_64(
+; CHECK-NEXT:    [[TMP1:%.*]] = ashr <8 x i32> %v, <i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31>
+; CHECK-NEXT:    ret <8 x i32> [[TMP1]]
+;
+  %1 = tail call <8 x i32> @llvm.x86.avx2.psrai.d(<8 x i32> %v, i32 64)
+  ret <8 x i32> %1
+}
+
+define <2 x i64> @avx512_psrai_q_128_0(<2 x i64> %v) {
+; CHECK-LABEL: @avx512_psrai_q_128_0(
+; CHECK-NEXT:    ret <2 x i64> %v
+;
+  %1 = tail call <2 x i64> @llvm.x86.avx512.psrai.q.128(<2 x i64> %v, i32 0)
+  ret <2 x i64> %1
+}
+
+define <2 x i64> @avx512_psrai_q_128_15(<2 x i64> %v) {
+; CHECK-LABEL: @avx512_psrai_q_128_15(
+; CHECK-NEXT:    [[TMP1:%.*]] = ashr <2 x i64> %v, <i64 15, i64 15>
+; CHECK-NEXT:    ret <2 x i64> [[TMP1]]
+;
+  %1 = tail call <2 x i64> @llvm.x86.avx512.psrai.q.128(<2 x i64> %v, i32 15)
+  ret <2 x i64> %1
+}
+
+define <2 x i64> @avx512_psrai_q_128_64(<2 x i64> %v) {
+; CHECK-LABEL: @avx512_psrai_q_128_64(
+; CHECK-NEXT:    [[TMP1:%.*]] = ashr <2 x i64> %v, <i64 63, i64 63>
+; CHECK-NEXT:    ret <2 x i64> [[TMP1]]
+;
+  %1 = tail call <2 x i64> @llvm.x86.avx512.psrai.q.128(<2 x i64> %v, i32 64)
+  ret <2 x i64> %1
+}
+
+define <4 x i64> @avx512_psrai_q_256_0(<4 x i64> %v) {
+; CHECK-LABEL: @avx512_psrai_q_256_0(
+; CHECK-NEXT:    ret <4 x i64> %v
+;
+  %1 = tail call <4 x i64> @llvm.x86.avx512.psrai.q.256(<4 x i64> %v, i32 0)
+  ret <4 x i64> %1
+}
+
+define <4 x i64> @avx512_psrai_q_256_15(<4 x i64> %v) {
+; CHECK-LABEL: @avx512_psrai_q_256_15(
+; CHECK-NEXT:    [[TMP1:%.*]] = ashr <4 x i64> %v, <i64 15, i64 15, i64 15, i64 15>
+; CHECK-NEXT:    ret <4 x i64> [[TMP1]]
+;
+  %1 = tail call <4 x i64> @llvm.x86.avx512.psrai.q.256(<4 x i64> %v, i32 15)
+  ret <4 x i64> %1
+}
+
+define <4 x i64> @avx512_psrai_q_256_64(<4 x i64> %v) {
+; CHECK-LABEL: @avx512_psrai_q_256_64(
+; CHECK-NEXT:    [[TMP1:%.*]] = ashr <4 x i64> %v, <i64 63, i64 63, i64 63, i64 63>
+; CHECK-NEXT:    ret <4 x i64> [[TMP1]]
+;
+  %1 = tail call <4 x i64> @llvm.x86.avx512.psrai.q.256(<4 x i64> %v, i32 64)
+  ret <4 x i64> %1
+}
+
+define <32 x i16> @avx512_psrai_w_512_0(<32 x i16> %v) {
+; CHECK-LABEL: @avx512_psrai_w_512_0(
+; CHECK-NEXT:    ret <32 x i16> %v
+;
+  %1 = tail call <32 x i16> @llvm.x86.avx512.psrai.w.512(<32 x i16> %v, i32 0)
+  ret <32 x i16> %1
+}
+
+define <32 x i16> @avx512_psrai_w_512_15(<32 x i16> %v) {
+; CHECK-LABEL: @avx512_psrai_w_512_15(
+; CHECK-NEXT:    [[TMP1:%.*]] = ashr <32 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
+; CHECK-NEXT:    ret <32 x i16> [[TMP1]]
+;
+  %1 = tail call <32 x i16> @llvm.x86.avx512.psrai.w.512(<32 x i16> %v, i32 15)
+  ret <32 x i16> %1
+}
+
+define <32 x i16> @avx512_psrai_w_512_64(<32 x i16> %v) {
+; CHECK-LABEL: @avx512_psrai_w_512_64(
+; CHECK-NEXT:    [[TMP1:%.*]] = ashr <32 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
+; CHECK-NEXT:    ret <32 x i16> [[TMP1]]
+;
+  %1 = tail call <32 x i16> @llvm.x86.avx512.psrai.w.512(<32 x i16> %v, i32 64)
+  ret <32 x i16> %1
+}
+
+define <16 x i32> @avx512_psrai_d_512_0(<16 x i32> %v) {
+; CHECK-LABEL: @avx512_psrai_d_512_0(
+; CHECK-NEXT:    ret <16 x i32> %v
+;
+  %1 = tail call <16 x i32> @llvm.x86.avx512.psrai.d.512(<16 x i32> %v, i32 0)
+  ret <16 x i32> %1
+}
+
+define <16 x i32> @avx512_psrai_d_512_15(<16 x i32> %v) {
+; CHECK-LABEL: @avx512_psrai_d_512_15(
+; CHECK-NEXT:    [[TMP1:%.*]] = ashr <16 x i32> %v, <i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15>
+; CHECK-NEXT:    ret <16 x i32> [[TMP1]]
+;
+  %1 = tail call <16 x i32> @llvm.x86.avx512.psrai.d.512(<16 x i32> %v, i32 15)
+  ret <16 x i32> %1
+}
+
+define <16 x i32> @avx512_psrai_d_512_64(<16 x i32> %v) {
+; CHECK-LABEL: @avx512_psrai_d_512_64(
+; CHECK-NEXT:    [[TMP1:%.*]] = ashr <16 x i32> %v, <i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31>
+; CHECK-NEXT:    ret <16 x i32> [[TMP1]]
+;
+  %1 = tail call <16 x i32> @llvm.x86.avx512.psrai.d.512(<16 x i32> %v, i32 64)
+  ret <16 x i32> %1
+}
+
+define <8 x i64> @avx512_psrai_q_512_0(<8 x i64> %v) {
+; CHECK-LABEL: @avx512_psrai_q_512_0(
+; CHECK-NEXT:    ret <8 x i64> %v
+;
+  %1 = tail call <8 x i64> @llvm.x86.avx512.psrai.q.512(<8 x i64> %v, i32 0)
+  ret <8 x i64> %1
+}
+
+define <8 x i64> @avx512_psrai_q_512_15(<8 x i64> %v) {
+; CHECK-LABEL: @avx512_psrai_q_512_15(
+; CHECK-NEXT:    [[TMP1:%.*]] = ashr <8 x i64> %v, <i64 15, i64 15, i64 15, i64 15, i64 15, i64 15, i64 15, i64 15>
+; CHECK-NEXT:    ret <8 x i64> [[TMP1]]
+;
+  %1 = tail call <8 x i64> @llvm.x86.avx512.psrai.q.512(<8 x i64> %v, i32 15)
+  ret <8 x i64> %1
+}
+
+define <8 x i64> @avx512_psrai_q_512_64(<8 x i64> %v) {
+; CHECK-LABEL: @avx512_psrai_q_512_64(
+; CHECK-NEXT:    [[TMP1:%.*]] = ashr <8 x i64> %v, <i64 63, i64 63, i64 63, i64 63, i64 63, i64 63, i64 63, i64 63>
+; CHECK-NEXT:    ret <8 x i64> [[TMP1]]
+;
+  %1 = tail call <8 x i64> @llvm.x86.avx512.psrai.q.512(<8 x i64> %v, i32 64)
+  ret <8 x i64> %1
+}
+
+;
+; LSHR - Immediate
+;
+
+define <8 x i16> @sse2_psrli_w_0(<8 x i16> %v) {
+; CHECK-LABEL: @sse2_psrli_w_0(
+; CHECK-NEXT:    ret <8 x i16> %v
+;
+  %1 = tail call <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16> %v, i32 0)
+  ret <8 x i16> %1
+}
+
+define <8 x i16> @sse2_psrli_w_15(<8 x i16> %v) {
+; CHECK-LABEL: @sse2_psrli_w_15(
+; CHECK-NEXT:    [[TMP1:%.*]] = lshr <8 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
+; CHECK-NEXT:    ret <8 x i16> [[TMP1]]
+;
+  %1 = tail call <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16> %v, i32 15)
+  ret <8 x i16> %1
+}
+
+define <8 x i16> @sse2_psrli_w_64(<8 x i16> %v) {
+; CHECK-LABEL: @sse2_psrli_w_64(
+; CHECK-NEXT:    ret <8 x i16> zeroinitializer
+;
+  %1 = tail call <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16> %v, i32 64)
+  ret <8 x i16> %1
+}
+
+define <4 x i32> @sse2_psrli_d_0(<4 x i32> %v) {
+; CHECK-LABEL: @sse2_psrli_d_0(
+; CHECK-NEXT:    ret <4 x i32> %v
+;
+  %1 = tail call <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32> %v, i32 0)
+  ret <4 x i32> %1
+}
+
+define <4 x i32> @sse2_psrli_d_15(<4 x i32> %v) {
+; CHECK-LABEL: @sse2_psrli_d_15(
+; CHECK-NEXT:    [[TMP1:%.*]] = lshr <4 x i32> %v, <i32 15, i32 15, i32 15, i32 15>
+; CHECK-NEXT:    ret <4 x i32> [[TMP1]]
+;
+  %1 = tail call <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32> %v, i32 15)
+  ret <4 x i32> %1
+}
+
+define <4 x i32> @sse2_psrli_d_64(<4 x i32> %v) {
+; CHECK-LABEL: @sse2_psrli_d_64(
+; CHECK-NEXT:    ret <4 x i32> zeroinitializer
+;
+  %1 = tail call <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32> %v, i32 64)
+  ret <4 x i32> %1
+}
+
+define <2 x i64> @sse2_psrli_q_0(<2 x i64> %v) {
+; CHECK-LABEL: @sse2_psrli_q_0(
+; CHECK-NEXT:    ret <2 x i64> %v
+;
+  %1 = tail call <2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64> %v, i32 0)
+  ret <2 x i64> %1
+}
+
+define <2 x i64> @sse2_psrli_q_15(<2 x i64> %v) {
+; CHECK-LABEL: @sse2_psrli_q_15(
+; CHECK-NEXT:    [[TMP1:%.*]] = lshr <2 x i64> %v, <i64 15, i64 15>
+; CHECK-NEXT:    ret <2 x i64> [[TMP1]]
+;
+  %1 = tail call <2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64> %v, i32 15)
+  ret <2 x i64> %1
+}
+
+define <2 x i64> @sse2_psrli_q_64(<2 x i64> %v) {
+; CHECK-LABEL: @sse2_psrli_q_64(
+; CHECK-NEXT:    ret <2 x i64> zeroinitializer
+;
+  %1 = tail call <2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64> %v, i32 64)
+  ret <2 x i64> %1
+}
+
+define <16 x i16> @avx2_psrli_w_0(<16 x i16> %v) {
+; CHECK-LABEL: @avx2_psrli_w_0(
+; CHECK-NEXT:    ret <16 x i16> %v
+;
+  %1 = tail call <16 x i16> @llvm.x86.avx2.psrli.w(<16 x i16> %v, i32 0)
+  ret <16 x i16> %1
+}
+
+define <16 x i16> @avx2_psrli_w_15(<16 x i16> %v) {
+; CHECK-LABEL: @avx2_psrli_w_15(
+; CHECK-NEXT:    [[TMP1:%.*]] = lshr <16 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
+; CHECK-NEXT:    ret <16 x i16> [[TMP1]]
+;
+  %1 = tail call <16 x i16> @llvm.x86.avx2.psrli.w(<16 x i16> %v, i32 15)
+  ret <16 x i16> %1
+}
+
+define <16 x i16> @avx2_psrli_w_64(<16 x i16> %v) {
+; CHECK-LABEL: @avx2_psrli_w_64(
+; CHECK-NEXT:    ret <16 x i16> zeroinitializer
+;
+  %1 = tail call <16 x i16> @llvm.x86.avx2.psrli.w(<16 x i16> %v, i32 64)
+  ret <16 x i16> %1
+}
+
+define <8 x i32> @avx2_psrli_d_0(<8 x i32> %v) {
+; CHECK-LABEL: @avx2_psrli_d_0(
+; CHECK-NEXT:    ret <8 x i32> %v
+;
+  %1 = tail call <8 x i32> @llvm.x86.avx2.psrli.d(<8 x i32> %v, i32 0)
+  ret <8 x i32> %1
+}
+
+define <8 x i32> @avx2_psrli_d_15(<8 x i32> %v) {
+; CHECK-LABEL: @avx2_psrli_d_15(
+; CHECK-NEXT:    [[TMP1:%.*]] = lshr <8 x i32> %v, <i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15>
+; CHECK-NEXT:    ret <8 x i32> [[TMP1]]
+;
+  %1 = tail call <8 x i32> @llvm.x86.avx2.psrli.d(<8 x i32> %v, i32 15)
+  ret <8 x i32> %1
+}
+
+define <8 x i32> @avx2_psrli_d_64(<8 x i32> %v) {
+; CHECK-LABEL: @avx2_psrli_d_64(
+; CHECK-NEXT:    ret <8 x i32> zeroinitializer
+;
+  %1 = tail call <8 x i32> @llvm.x86.avx2.psrli.d(<8 x i32> %v, i32 64)
+  ret <8 x i32> %1
+}
+
+define <4 x i64> @avx2_psrli_q_0(<4 x i64> %v) {
+; CHECK-LABEL: @avx2_psrli_q_0(
+; CHECK-NEXT:    ret <4 x i64> %v
+;
+  %1 = tail call <4 x i64> @llvm.x86.avx2.psrli.q(<4 x i64> %v, i32 0)
+  ret <4 x i64> %1
+}
+
+define <4 x i64> @avx2_psrli_q_15(<4 x i64> %v) {
+; CHECK-LABEL: @avx2_psrli_q_15(
+; CHECK-NEXT:    [[TMP1:%.*]] = lshr <4 x i64> %v, <i64 15, i64 15, i64 15, i64 15>
+; CHECK-NEXT:    ret <4 x i64> [[TMP1]]
+;
+  %1 = tail call <4 x i64> @llvm.x86.avx2.psrli.q(<4 x i64> %v, i32 15)
+  ret <4 x i64> %1
+}
+
+define <4 x i64> @avx2_psrli_q_64(<4 x i64> %v) {
+; CHECK-LABEL: @avx2_psrli_q_64(
+; CHECK-NEXT:    ret <4 x i64> zeroinitializer
+;
+  %1 = tail call <4 x i64> @llvm.x86.avx2.psrli.q(<4 x i64> %v, i32 64)
+  ret <4 x i64> %1
+}
+
+define <32 x i16> @avx512_psrli_w_512_0(<32 x i16> %v) {
+; CHECK-LABEL: @avx512_psrli_w_512_0(
+; CHECK-NEXT:    ret <32 x i16> %v
+;
+  %1 = tail call <32 x i16> @llvm.x86.avx512.psrli.w.512(<32 x i16> %v, i32 0)
+  ret <32 x i16> %1
+}
+
+define <32 x i16> @avx512_psrli_w_512_15(<32 x i16> %v) {
+; CHECK-LABEL: @avx512_psrli_w_512_15(
+; CHECK-NEXT:    [[TMP1:%.*]] = lshr <32 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
+; CHECK-NEXT:    ret <32 x i16> [[TMP1]]
+;
+  %1 = tail call <32 x i16> @llvm.x86.avx512.psrli.w.512(<32 x i16> %v, i32 15)
+  ret <32 x i16> %1
+}
+
+define <32 x i16> @avx512_psrli_w_512_64(<32 x i16> %v) {
+; CHECK-LABEL: @avx512_psrli_w_512_64(
+; CHECK-NEXT:    ret <32 x i16> zeroinitializer
+;
+  %1 = tail call <32 x i16> @llvm.x86.avx512.psrli.w.512(<32 x i16> %v, i32 64)
+  ret <32 x i16> %1
+}
+
+define <16 x i32> @avx512_psrli_d_512_0(<16 x i32> %v) {
+; CHECK-LABEL: @avx512_psrli_d_512_0(
+; CHECK-NEXT:    ret <16 x i32> %v
+;
+  %1 = tail call <16 x i32> @llvm.x86.avx512.psrli.d.512(<16 x i32> %v, i32 0)
+  ret <16 x i32> %1
+}
+
+define <16 x i32> @avx512_psrli_d_512_15(<16 x i32> %v) {
+; CHECK-LABEL: @avx512_psrli_d_512_15(
+; CHECK-NEXT:    [[TMP1:%.*]] = lshr <16 x i32> %v, <i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15>
+; CHECK-NEXT:    ret <16 x i32> [[TMP1]]
+;
+  %1 = tail call <16 x i32> @llvm.x86.avx512.psrli.d.512(<16 x i32> %v, i32 15)
+  ret <16 x i32> %1
+}
+
+define <16 x i32> @avx512_psrli_d_512_64(<16 x i32> %v) {
+; CHECK-LABEL: @avx512_psrli_d_512_64(
+; CHECK-NEXT:    ret <16 x i32> zeroinitializer
+;
+  %1 = tail call <16 x i32> @llvm.x86.avx512.psrli.d.512(<16 x i32> %v, i32 64)
+  ret <16 x i32> %1
+}
+
+define <8 x i64> @avx512_psrli_q_512_0(<8 x i64> %v) {
+; CHECK-LABEL: @avx512_psrli_q_512_0(
+; CHECK-NEXT:    ret <8 x i64> %v
+;
+  %1 = tail call <8 x i64> @llvm.x86.avx512.psrli.q.512(<8 x i64> %v, i32 0)
+  ret <8 x i64> %1
+}
+
+define <8 x i64> @avx512_psrli_q_512_15(<8 x i64> %v) {
+; CHECK-LABEL: @avx512_psrli_q_512_15(
+; CHECK-NEXT:    [[TMP1:%.*]] = lshr <8 x i64> %v, <i64 15, i64 15, i64 15, i64 15, i64 15, i64 15, i64 15, i64 15>
+; CHECK-NEXT:    ret <8 x i64> [[TMP1]]
+;
+  %1 = tail call <8 x i64> @llvm.x86.avx512.psrli.q.512(<8 x i64> %v, i32 15)
+  ret <8 x i64> %1
+}
+
+define <8 x i64> @avx512_psrli_q_512_64(<8 x i64> %v) {
+; CHECK-LABEL: @avx512_psrli_q_512_64(
+; CHECK-NEXT:    ret <8 x i64> zeroinitializer
+;
+  %1 = tail call <8 x i64> @llvm.x86.avx512.psrli.q.512(<8 x i64> %v, i32 64)
+  ret <8 x i64> %1
+}
+
+;
+; SHL - Immediate
+;
+
+define <8 x i16> @sse2_pslli_w_0(<8 x i16> %v) {
+; CHECK-LABEL: @sse2_pslli_w_0(
+; CHECK-NEXT:    ret <8 x i16> %v
+;
+  %1 = tail call <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16> %v, i32 0)
+  ret <8 x i16> %1
+}
+
+define <8 x i16> @sse2_pslli_w_15(<8 x i16> %v) {
+; CHECK-LABEL: @sse2_pslli_w_15(
+; CHECK-NEXT:    [[TMP1:%.*]] = shl <8 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
+; CHECK-NEXT:    ret <8 x i16> [[TMP1]]
+;
+  %1 = tail call <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16> %v, i32 15)
+  ret <8 x i16> %1
+}
+
+define <8 x i16> @sse2_pslli_w_64(<8 x i16> %v) {
+; CHECK-LABEL: @sse2_pslli_w_64(
+; CHECK-NEXT:    ret <8 x i16> zeroinitializer
+;
+  %1 = tail call <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16> %v, i32 64)
+  ret <8 x i16> %1
+}
+
+define <4 x i32> @sse2_pslli_d_0(<4 x i32> %v) {
+; CHECK-LABEL: @sse2_pslli_d_0(
+; CHECK-NEXT:    ret <4 x i32> %v
+;
+  %1 = tail call <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32> %v, i32 0)
+  ret <4 x i32> %1
+}
+
+define <4 x i32> @sse2_pslli_d_15(<4 x i32> %v) {
+; CHECK-LABEL: @sse2_pslli_d_15(
+; CHECK-NEXT:    [[TMP1:%.*]] = shl <4 x i32> %v, <i32 15, i32 15, i32 15, i32 15>
+; CHECK-NEXT:    ret <4 x i32> [[TMP1]]
+;
+  %1 = tail call <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32> %v, i32 15)
+  ret <4 x i32> %1
+}
+
+define <4 x i32> @sse2_pslli_d_64(<4 x i32> %v) {
+; CHECK-LABEL: @sse2_pslli_d_64(
+; CHECK-NEXT:    ret <4 x i32> zeroinitializer
+;
+  %1 = tail call <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32> %v, i32 64)
+  ret <4 x i32> %1
+}
+
+define <2 x i64> @sse2_pslli_q_0(<2 x i64> %v) {
+; CHECK-LABEL: @sse2_pslli_q_0(
+; CHECK-NEXT:    ret <2 x i64> %v
+;
+  %1 = tail call <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64> %v, i32 0)
+  ret <2 x i64> %1
+}
+
+define <2 x i64> @sse2_pslli_q_15(<2 x i64> %v) {
+; CHECK-LABEL: @sse2_pslli_q_15(
+; CHECK-NEXT:    [[TMP1:%.*]] = shl <2 x i64> %v, <i64 15, i64 15>
+; CHECK-NEXT:    ret <2 x i64> [[TMP1]]
+;
+  %1 = tail call <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64> %v, i32 15)
+  ret <2 x i64> %1
+}
+
+define <2 x i64> @sse2_pslli_q_64(<2 x i64> %v) {
+; CHECK-LABEL: @sse2_pslli_q_64(
+; CHECK-NEXT:    ret <2 x i64> zeroinitializer
+;
+  %1 = tail call <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64> %v, i32 64)
+  ret <2 x i64> %1
+}
+
+define <16 x i16> @avx2_pslli_w_0(<16 x i16> %v) {
+; CHECK-LABEL: @avx2_pslli_w_0(
+; CHECK-NEXT:    ret <16 x i16> %v
+;
+  %1 = tail call <16 x i16> @llvm.x86.avx2.pslli.w(<16 x i16> %v, i32 0)
+  ret <16 x i16> %1
+}
+
+define <16 x i16> @avx2_pslli_w_15(<16 x i16> %v) {
+; CHECK-LABEL: @avx2_pslli_w_15(
+; CHECK-NEXT:    [[TMP1:%.*]] = shl <16 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
+; CHECK-NEXT:    ret <16 x i16> [[TMP1]]
+;
+  %1 = tail call <16 x i16> @llvm.x86.avx2.pslli.w(<16 x i16> %v, i32 15)
+  ret <16 x i16> %1
+}
+
+define <16 x i16> @avx2_pslli_w_64(<16 x i16> %v) {
+; CHECK-LABEL: @avx2_pslli_w_64(
+; CHECK-NEXT:    ret <16 x i16> zeroinitializer
+;
+  %1 = tail call <16 x i16> @llvm.x86.avx2.pslli.w(<16 x i16> %v, i32 64)
+  ret <16 x i16> %1
+}
+
+define <8 x i32> @avx2_pslli_d_0(<8 x i32> %v) {
+; CHECK-LABEL: @avx2_pslli_d_0(
+; CHECK-NEXT:    ret <8 x i32> %v
+;
+  %1 = tail call <8 x i32> @llvm.x86.avx2.pslli.d(<8 x i32> %v, i32 0)
+  ret <8 x i32> %1
+}
+
+define <8 x i32> @avx2_pslli_d_15(<8 x i32> %v) {
+; CHECK-LABEL: @avx2_pslli_d_15(
+; CHECK-NEXT:    [[TMP1:%.*]] = shl <8 x i32> %v, <i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15>
+; CHECK-NEXT:    ret <8 x i32> [[TMP1]]
+;
+  %1 = tail call <8 x i32> @llvm.x86.avx2.pslli.d(<8 x i32> %v, i32 15)
+  ret <8 x i32> %1
+}
+
+define <8 x i32> @avx2_pslli_d_64(<8 x i32> %v) {
+; CHECK-LABEL: @avx2_pslli_d_64(
+; CHECK-NEXT:    ret <8 x i32> zeroinitializer
+;
+  %1 = tail call <8 x i32> @llvm.x86.avx2.pslli.d(<8 x i32> %v, i32 64)
+  ret <8 x i32> %1
+}
+
+define <4 x i64> @avx2_pslli_q_0(<4 x i64> %v) {
+; CHECK-LABEL: @avx2_pslli_q_0(
+; CHECK-NEXT:    ret <4 x i64> %v
+;
+  %1 = tail call <4 x i64> @llvm.x86.avx2.pslli.q(<4 x i64> %v, i32 0)
+  ret <4 x i64> %1
+}
+
+define <4 x i64> @avx2_pslli_q_15(<4 x i64> %v) {
+; CHECK-LABEL: @avx2_pslli_q_15(
+; CHECK-NEXT:    [[TMP1:%.*]] = shl <4 x i64> %v, <i64 15, i64 15, i64 15, i64 15>
+; CHECK-NEXT:    ret <4 x i64> [[TMP1]]
+;
+  %1 = tail call <4 x i64> @llvm.x86.avx2.pslli.q(<4 x i64> %v, i32 15)
+  ret <4 x i64> %1
+}
+
+define <4 x i64> @avx2_pslli_q_64(<4 x i64> %v) {
+; CHECK-LABEL: @avx2_pslli_q_64(
+; CHECK-NEXT:    ret <4 x i64> zeroinitializer
+;
+  %1 = tail call <4 x i64> @llvm.x86.avx2.pslli.q(<4 x i64> %v, i32 64)
+  ret <4 x i64> %1
+}
+
+define <32 x i16> @avx512_pslli_w_512_0(<32 x i16> %v) {
+; CHECK-LABEL: @avx512_pslli_w_512_0(
+; CHECK-NEXT:    ret <32 x i16> %v
+;
+  %1 = tail call <32 x i16> @llvm.x86.avx512.pslli.w.512(<32 x i16> %v, i32 0)
+  ret <32 x i16> %1
+}
+
+define <32 x i16> @avx512_pslli_w_512_15(<32 x i16> %v) {
+; CHECK-LABEL: @avx512_pslli_w_512_15(
+; CHECK-NEXT:    [[TMP1:%.*]] = shl <32 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
+; CHECK-NEXT:    ret <32 x i16> [[TMP1]]
+;
+  %1 = tail call <32 x i16> @llvm.x86.avx512.pslli.w.512(<32 x i16> %v, i32 15)
+  ret <32 x i16> %1
+}
+
+define <32 x i16> @avx512_pslli_w_512_64(<32 x i16> %v) {
+; CHECK-LABEL: @avx512_pslli_w_512_64(
+; CHECK-NEXT:    ret <32 x i16> zeroinitializer
+;
+  %1 = tail call <32 x i16> @llvm.x86.avx512.pslli.w.512(<32 x i16> %v, i32 64)
+  ret <32 x i16> %1
+}
+
+define <16 x i32> @avx512_pslli_d_512_0(<16 x i32> %v) {
+; CHECK-LABEL: @avx512_pslli_d_512_0(
+; CHECK-NEXT:    ret <16 x i32> %v
+;
+  %1 = tail call <16 x i32> @llvm.x86.avx512.pslli.d.512(<16 x i32> %v, i32 0)
+  ret <16 x i32> %1
+}
+
+define <16 x i32> @avx512_pslli_d_512_15(<16 x i32> %v) {
+; CHECK-LABEL: @avx512_pslli_d_512_15(
+; CHECK-NEXT:    [[TMP1:%.*]] = shl <16 x i32> %v, <i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15>
+; CHECK-NEXT:    ret <16 x i32> [[TMP1]]
+;
+  %1 = tail call <16 x i32> @llvm.x86.avx512.pslli.d.512(<16 x i32> %v, i32 15)
+  ret <16 x i32> %1
+}
+
+define <16 x i32> @avx512_pslli_d_512_64(<16 x i32> %v) {
+; CHECK-LABEL: @avx512_pslli_d_512_64(
+; CHECK-NEXT:    ret <16 x i32> zeroinitializer
+;
+  %1 = tail call <16 x i32> @llvm.x86.avx512.pslli.d.512(<16 x i32> %v, i32 64)
+  ret <16 x i32> %1
+}
+
+define <8 x i64> @avx512_pslli_q_512_0(<8 x i64> %v) {
+; CHECK-LABEL: @avx512_pslli_q_512_0(
+; CHECK-NEXT:    ret <8 x i64> %v
+;
+  %1 = tail call <8 x i64> @llvm.x86.avx512.pslli.q.512(<8 x i64> %v, i32 0)
+  ret <8 x i64> %1
+}
+
+define <8 x i64> @avx512_pslli_q_512_15(<8 x i64> %v) {
+; CHECK-LABEL: @avx512_pslli_q_512_15(
+; CHECK-NEXT:    [[TMP1:%.*]] = shl <8 x i64> %v, <i64 15, i64 15, i64 15, i64 15, i64 15, i64 15, i64 15, i64 15>
+; CHECK-NEXT:    ret <8 x i64> [[TMP1]]
+;
+  %1 = tail call <8 x i64> @llvm.x86.avx512.pslli.q.512(<8 x i64> %v, i32 15)
+  ret <8 x i64> %1
+}
+
+define <8 x i64> @avx512_pslli_q_512_64(<8 x i64> %v) {
+; CHECK-LABEL: @avx512_pslli_q_512_64(
+; CHECK-NEXT:    ret <8 x i64> zeroinitializer
+;
+  %1 = tail call <8 x i64> @llvm.x86.avx512.pslli.q.512(<8 x i64> %v, i32 64)
+  ret <8 x i64> %1
+}
+
+;
+; ASHR - Constant Vector
+;
+
+define <8 x i16> @sse2_psra_w_0(<8 x i16> %v) {
+; CHECK-LABEL: @sse2_psra_w_0(
+; CHECK-NEXT:    ret <8 x i16> %v
+;
+  %1 = tail call <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16> %v, <8 x i16> zeroinitializer)
+  ret <8 x i16> %1
+}
+
+define <8 x i16> @sse2_psra_w_15(<8 x i16> %v) {
+; CHECK-LABEL: @sse2_psra_w_15(
+; CHECK-NEXT:    [[TMP1:%.*]] = ashr <8 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
+; CHECK-NEXT:    ret <8 x i16> [[TMP1]]
+;
+  %1 = tail call <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16> %v, <8 x i16> <i16 15, i16 0, i16 0, i16 0, i16 9999, i16 9999, i16 9999, i16 9999>)
+  ret <8 x i16> %1
+}
+
+define <8 x i16> @sse2_psra_w_15_splat(<8 x i16> %v) {
+; CHECK-LABEL: @sse2_psra_w_15_splat(
+; CHECK-NEXT:    [[TMP1:%.*]] = ashr <8 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
+; CHECK-NEXT:    ret <8 x i16> [[TMP1]]
+;
+  %1 = tail call <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16> %v, <8 x i16> <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>)
+  ret <8 x i16> %1
+}
+
+define <8 x i16> @sse2_psra_w_64(<8 x i16> %v) {
+; CHECK-LABEL: @sse2_psra_w_64(
+; CHECK-NEXT:    [[TMP1:%.*]] = ashr <8 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
+; CHECK-NEXT:    ret <8 x i16> [[TMP1]]
+;
+  %1 = tail call <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16> %v, <8 x i16> <i16 64, i16 0, i16 0, i16 0, i16 9999, i16 9999, i16 9999, i16 9999>)
+  ret <8 x i16> %1
+}
+
+define <4 x i32> @sse2_psra_d_0(<4 x i32> %v) {
+; CHECK-LABEL: @sse2_psra_d_0(
+; CHECK-NEXT:    ret <4 x i32> %v
+;
+  %1 = tail call <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32> %v, <4 x i32> zeroinitializer)
+  ret <4 x i32> %1
+}
+
+define <4 x i32> @sse2_psra_d_15(<4 x i32> %v) {
+; CHECK-LABEL: @sse2_psra_d_15(
+; CHECK-NEXT:    [[TMP1:%.*]] = ashr <4 x i32> %v, <i32 15, i32 15, i32 15, i32 15>
+; CHECK-NEXT:    ret <4 x i32> [[TMP1]]
+;
+  %1 = tail call <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32> %v, <4 x i32> <i32 15, i32 0, i32 9999, i32 9999>)
+  ret <4 x i32> %1
+}
+
+define <4 x i32> @sse2_psra_d_15_splat(<4 x i32> %v) {
+; CHECK-LABEL: @sse2_psra_d_15_splat(
+; CHECK-NEXT:    [[TMP1:%.*]] = ashr <4 x i32> %v, <i32 31, i32 31, i32 31, i32 31>
+; CHECK-NEXT:    ret <4 x i32> [[TMP1]]
+;
+  %1 = tail call <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32> %v, <4 x i32> <i32 15, i32 15, i32 15, i32 15>)
+  ret <4 x i32> %1
+}
+
+define <4 x i32> @sse2_psra_d_64(<4 x i32> %v) {
+; CHECK-LABEL: @sse2_psra_d_64(
+; CHECK-NEXT:    [[TMP1:%.*]] = ashr <4 x i32> %v, <i32 31, i32 31, i32 31, i32 31>
+; CHECK-NEXT:    ret <4 x i32> [[TMP1]]
+;
+  %1 = tail call <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32> %v, <4 x i32> <i32 64, i32 0, i32 9999, i32 9999>)
+  ret <4 x i32> %1
+}
+
+define <16 x i16> @avx2_psra_w_0(<16 x i16> %v) {
+; CHECK-LABEL: @avx2_psra_w_0(
+; CHECK-NEXT:    ret <16 x i16> %v
+;
+  %1 = tail call <16 x i16> @llvm.x86.avx2.psra.w(<16 x i16> %v, <8 x i16> zeroinitializer)
+  ret <16 x i16> %1
+}
+
+define <16 x i16> @avx2_psra_w_15(<16 x i16> %v) {
+; CHECK-LABEL: @avx2_psra_w_15(
+; CHECK-NEXT:    [[TMP1:%.*]] = ashr <16 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
+; CHECK-NEXT:    ret <16 x i16> [[TMP1]]
+;
+  %1 = tail call <16 x i16> @llvm.x86.avx2.psra.w(<16 x i16> %v, <8 x i16> <i16 15, i16 0, i16 0, i16 0, i16 9999, i16 9999, i16 9999, i16 9999>)
+  ret <16 x i16> %1
+}
+
+define <16 x i16> @avx2_psra_w_15_splat(<16 x i16> %v) {
+; CHECK-LABEL: @avx2_psra_w_15_splat(
+; CHECK-NEXT:    [[TMP1:%.*]] = ashr <16 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
+; CHECK-NEXT:    ret <16 x i16> [[TMP1]]
+;
+  %1 = tail call <16 x i16> @llvm.x86.avx2.psra.w(<16 x i16> %v, <8 x i16> <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>)
+  ret <16 x i16> %1
+}
+
+define <16 x i16> @avx2_psra_w_64(<16 x i16> %v) {
+; CHECK-LABEL: @avx2_psra_w_64(
+; CHECK-NEXT:    [[TMP1:%.*]] = ashr <16 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
+; CHECK-NEXT:    ret <16 x i16> [[TMP1]]
+;
+  %1 = tail call <16 x i16> @llvm.x86.avx2.psra.w(<16 x i16> %v, <8 x i16> <i16 64, i16 0, i16 0, i16 0, i16 9999, i16 9999, i16 9999, i16 9999>)
+  ret <16 x i16> %1
+}
+
+define <8 x i32> @avx2_psra_d_0(<8 x i32> %v) {
+; CHECK-LABEL: @avx2_psra_d_0(
+; CHECK-NEXT:    ret <8 x i32> %v
+;
+  %1 = tail call <8 x i32> @llvm.x86.avx2.psra.d(<8 x i32> %v, <4 x i32> zeroinitializer)
+  ret <8 x i32> %1
+}
+
+define <8 x i32> @avx2_psra_d_15(<8 x i32> %v) {
+; CHECK-LABEL: @avx2_psra_d_15(
+; CHECK-NEXT:    [[TMP1:%.*]] = ashr <8 x i32> %v, <i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15>
+; CHECK-NEXT:    ret <8 x i32> [[TMP1]]
+;
+  %1 = tail call <8 x i32> @llvm.x86.avx2.psra.d(<8 x i32> %v, <4 x i32> <i32 15, i32 0, i32 9999, i32 9999>)
+  ret <8 x i32> %1
+}
+
+define <8 x i32> @avx2_psra_d_15_splat(<8 x i32> %v) {
+; CHECK-LABEL: @avx2_psra_d_15_splat(
+; CHECK-NEXT:    [[TMP1:%.*]] = ashr <8 x i32> %v, <i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31>
+; CHECK-NEXT:    ret <8 x i32> [[TMP1]]
+;
+  %1 = tail call <8 x i32> @llvm.x86.avx2.psra.d(<8 x i32> %v, <4 x i32> <i32 15, i32 15, i32 15, i32 15>)
+  ret <8 x i32> %1
+}
+
+define <8 x i32> @avx2_psra_d_64(<8 x i32> %v) {
+; CHECK-LABEL: @avx2_psra_d_64(
+; CHECK-NEXT:    [[TMP1:%.*]] = ashr <8 x i32> %v, <i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31>
+; CHECK-NEXT:    ret <8 x i32> [[TMP1]]
+;
+  %1 = tail call <8 x i32> @llvm.x86.avx2.psra.d(<8 x i32> %v, <4 x i32> <i32 64, i32 0, i32 9999, i32 9999>)
+  ret <8 x i32> %1
+}
+
+define <2 x i64> @avx512_psra_q_128_0(<2 x i64> %v) {
+; CHECK-LABEL: @avx512_psra_q_128_0(
+; CHECK-NEXT:    ret <2 x i64> %v
+;
+  %1 = tail call <2 x i64> @llvm.x86.avx512.psra.q.128(<2 x i64> %v, <2 x i64> zeroinitializer)
+  ret <2 x i64> %1
+}
+
+define <2 x i64> @avx512_psra_q_128_15(<2 x i64> %v) {
+; CHECK-LABEL: @avx512_psra_q_128_15(
+; CHECK-NEXT:    [[TMP1:%.*]] = ashr <2 x i64> %v, <i64 15, i64 15>
+; CHECK-NEXT:    ret <2 x i64> [[TMP1]]
+;
+  %1 = tail call <2 x i64> @llvm.x86.avx512.psra.q.128(<2 x i64> %v, <2 x i64> <i64 15, i64 9999>)
+  ret <2 x i64> %1
+}
+
+define <2 x i64> @avx512_psra_q_128_64(<2 x i64> %v) {
+; CHECK-LABEL: @avx512_psra_q_128_64(
+; CHECK-NEXT:    [[TMP1:%.*]] = ashr <2 x i64> %v, <i64 63, i64 63>
+; CHECK-NEXT:    ret <2 x i64> [[TMP1]]
+;
+  %1 = tail call <2 x i64> @llvm.x86.avx512.psra.q.128(<2 x i64> %v, <2 x i64> <i64 64, i64 9999>)
+  ret <2 x i64> %1
+}
+
+define <4 x i64> @avx512_psra_q_256_0(<4 x i64> %v) {
+; CHECK-LABEL: @avx512_psra_q_256_0(
+; CHECK-NEXT:    ret <4 x i64> %v
+;
+  %1 = tail call <4 x i64> @llvm.x86.avx512.psra.q.256(<4 x i64> %v, <2 x i64> zeroinitializer)
+  ret <4 x i64> %1
+}
+
+define <4 x i64> @avx512_psra_q_256_15(<4 x i64> %v) {
+; CHECK-LABEL: @avx512_psra_q_256_15(
+; CHECK-NEXT:    [[TMP1:%.*]] = ashr <4 x i64> %v, <i64 15, i64 15, i64 15, i64 15>
+; CHECK-NEXT:    ret <4 x i64> [[TMP1]]
+;
+  %1 = tail call <4 x i64> @llvm.x86.avx512.psra.q.256(<4 x i64> %v, <2 x i64> <i64 15, i64 9999>)
+  ret <4 x i64> %1
+}
+
+define <4 x i64> @avx512_psra_q_256_64(<4 x i64> %v) {
+; CHECK-LABEL: @avx512_psra_q_256_64(
+; CHECK-NEXT:    [[TMP1:%.*]] = ashr <4 x i64> %v, <i64 63, i64 63, i64 63, i64 63>
+; CHECK-NEXT:    ret <4 x i64> [[TMP1]]
+;
+  %1 = tail call <4 x i64> @llvm.x86.avx512.psra.q.256(<4 x i64> %v, <2 x i64> <i64 64, i64 9999>)
+  ret <4 x i64> %1
+}
+
+define <32 x i16> @avx512_psra_w_512_0(<32 x i16> %v) {
+; CHECK-LABEL: @avx512_psra_w_512_0(
+; CHECK-NEXT:    ret <32 x i16> %v
+;
+  %1 = tail call <32 x i16> @llvm.x86.avx512.psra.w.512(<32 x i16> %v, <8 x i16> zeroinitializer)
+  ret <32 x i16> %1
+}
+
+define <32 x i16> @avx512_psra_w_512_15(<32 x i16> %v) {
+; CHECK-LABEL: @avx512_psra_w_512_15(
+; CHECK-NEXT:    [[TMP1:%.*]] = ashr <32 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
+; CHECK-NEXT:    ret <32 x i16> [[TMP1]]
+;
+  %1 = tail call <32 x i16> @llvm.x86.avx512.psra.w.512(<32 x i16> %v, <8 x i16> <i16 15, i16 0, i16 0, i16 0, i16 9999, i16 9999, i16 9999, i16 9999>)
+  ret <32 x i16> %1
+}
+
+define <32 x i16> @avx512_psra_w_512_15_splat(<32 x i16> %v) {
+; CHECK-LABEL: @avx512_psra_w_512_15_splat(
+; CHECK-NEXT:    [[TMP1:%.*]] = ashr <32 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
+; CHECK-NEXT:    ret <32 x i16> [[TMP1]]
+;
+  %1 = tail call <32 x i16> @llvm.x86.avx512.psra.w.512(<32 x i16> %v, <8 x i16> <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>)
+  ret <32 x i16> %1
+}
+
+define <32 x i16> @avx512_psra_w_512_64(<32 x i16> %v) {
+; CHECK-LABEL: @avx512_psra_w_512_64(
+; CHECK-NEXT:    [[TMP1:%.*]] = ashr <32 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
+; CHECK-NEXT:    ret <32 x i16> [[TMP1]]
+;
+  %1 = tail call <32 x i16> @llvm.x86.avx512.psra.w.512(<32 x i16> %v, <8 x i16> <i16 64, i16 0, i16 0, i16 0, i16 9999, i16 9999, i16 9999, i16 9999>)
+  ret <32 x i16> %1
+}
+
+define <16 x i32> @avx512_psra_d_512_0(<16 x i32> %v) {
+; CHECK-LABEL: @avx512_psra_d_512_0(
+; CHECK-NEXT:    ret <16 x i32> %v
+;
+  %1 = tail call <16 x i32> @llvm.x86.avx512.psra.d.512(<16 x i32> %v, <4 x i32> zeroinitializer)
+  ret <16 x i32> %1
+}
+
+define <16 x i32> @avx512_psra_d_512_15(<16 x i32> %v) {
+; CHECK-LABEL: @avx512_psra_d_512_15(
+; CHECK-NEXT:    [[TMP1:%.*]] = ashr <16 x i32> %v, <i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15>
+; CHECK-NEXT:    ret <16 x i32> [[TMP1]]
+;
+  %1 = tail call <16 x i32> @llvm.x86.avx512.psra.d.512(<16 x i32> %v, <4 x i32> <i32 15, i32 0, i32 9999, i32 9999>)
+  ret <16 x i32> %1
+}
+
+define <16 x i32> @avx512_psra_d_512_15_splat(<16 x i32> %v) {
+; CHECK-LABEL: @avx512_psra_d_512_15_splat(
+; CHECK-NEXT:    [[TMP1:%.*]] = ashr <16 x i32> %v, <i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31>
+; CHECK-NEXT:    ret <16 x i32> [[TMP1]]
+;
+  %1 = tail call <16 x i32> @llvm.x86.avx512.psra.d.512(<16 x i32> %v, <4 x i32> <i32 15, i32 15, i32 15, i32 15>)
+  ret <16 x i32> %1
+}
+
+define <16 x i32> @avx512_psra_d_512_64(<16 x i32> %v) {
+; CHECK-LABEL: @avx512_psra_d_512_64(
+; CHECK-NEXT:    [[TMP1:%.*]] = ashr <16 x i32> %v, <i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31>
+; CHECK-NEXT:    ret <16 x i32> [[TMP1]]
+;
+  %1 = tail call <16 x i32> @llvm.x86.avx512.psra.d.512(<16 x i32> %v, <4 x i32> <i32 64, i32 0, i32 9999, i32 9999>)
+  ret <16 x i32> %1
+}
+
+define <8 x i64> @avx512_psra_q_512_0(<8 x i64> %v) {
+; CHECK-LABEL: @avx512_psra_q_512_0(
+; CHECK-NEXT:    ret <8 x i64> %v
+;
+  %1 = tail call <8 x i64> @llvm.x86.avx512.psra.q.512(<8 x i64> %v, <2 x i64> zeroinitializer)
+  ret <8 x i64> %1
+}
+
+define <8 x i64> @avx512_psra_q_512_15(<8 x i64> %v) {
+; CHECK-LABEL: @avx512_psra_q_512_15(
+; CHECK-NEXT:    [[TMP1:%.*]] = ashr <8 x i64> %v, <i64 15, i64 15, i64 15, i64 15, i64 15, i64 15, i64 15, i64 15>
+; CHECK-NEXT:    ret <8 x i64> [[TMP1]]
+;
+  %1 = tail call <8 x i64> @llvm.x86.avx512.psra.q.512(<8 x i64> %v, <2 x i64> <i64 15, i64 9999>)
+  ret <8 x i64> %1
+}
+
+define <8 x i64> @avx512_psra_q_512_64(<8 x i64> %v) {
+; CHECK-LABEL: @avx512_psra_q_512_64(
+; CHECK-NEXT:    [[TMP1:%.*]] = ashr <8 x i64> %v, <i64 63, i64 63, i64 63, i64 63, i64 63, i64 63, i64 63, i64 63>
+; CHECK-NEXT:    ret <8 x i64> [[TMP1]]
+;
+  %1 = tail call <8 x i64> @llvm.x86.avx512.psra.q.512(<8 x i64> %v, <2 x i64> <i64 64, i64 9999>)
+  ret <8 x i64> %1
+}
+
+;
+; LSHR - Constant Vector
+;
+
+define <8 x i16> @sse2_psrl_w_0(<8 x i16> %v) {
+; CHECK-LABEL: @sse2_psrl_w_0(
+; CHECK-NEXT:    ret <8 x i16> %v
+;
+  %1 = tail call <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16> %v, <8 x i16> zeroinitializer)
+  ret <8 x i16> %1
+}
+
+define <8 x i16> @sse2_psrl_w_15(<8 x i16> %v) {
+; CHECK-LABEL: @sse2_psrl_w_15(
+; CHECK-NEXT:    [[TMP1:%.*]] = lshr <8 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
+; CHECK-NEXT:    ret <8 x i16> [[TMP1]]
+;
+  %1 = tail call <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16> %v, <8 x i16> <i16 15, i16 0, i16 0, i16 0, i16 9999, i16 9999, i16 9999, i16 9999>)
+  ret <8 x i16> %1
+}
+
+define <8 x i16> @sse2_psrl_w_15_splat(<8 x i16> %v) {
+; CHECK-LABEL: @sse2_psrl_w_15_splat(
+; CHECK-NEXT:    ret <8 x i16> zeroinitializer
+;
+  %1 = tail call <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16> %v, <8 x i16> <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>)
+  ret <8 x i16> %1
+}
+
+define <8 x i16> @sse2_psrl_w_64(<8 x i16> %v) {
+; CHECK-LABEL: @sse2_psrl_w_64(
+; CHECK-NEXT:    ret <8 x i16> zeroinitializer
+;
+  %1 = tail call <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16> %v, <8 x i16> <i16 64, i16 0, i16 0, i16 0, i16 9999, i16 9999, i16 9999, i16 9999>)
+  ret <8 x i16> %1
+}
+
+define <4 x i32> @sse2_psrl_d_0(<4 x i32> %v) {
+; CHECK-LABEL: @sse2_psrl_d_0(
+; CHECK-NEXT:    ret <4 x i32> %v
+;
+  %1 = tail call <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32> %v, <4 x i32> zeroinitializer)
+  ret <4 x i32> %1
+}
+
+define <4 x i32> @sse2_psrl_d_15(<4 x i32> %v) {
+; CHECK-LABEL: @sse2_psrl_d_15(
+; CHECK-NEXT:    [[TMP1:%.*]] = lshr <4 x i32> %v, <i32 15, i32 15, i32 15, i32 15>
+; CHECK-NEXT:    ret <4 x i32> [[TMP1]]
+;
+  %1 = tail call <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32> %v, <4 x i32> <i32 15, i32 0, i32 9999, i32 9999>)
+  ret <4 x i32> %1
+}
+
+define <4 x i32> @sse2_psrl_d_15_splat(<4 x i32> %v) {
+; CHECK-LABEL: @sse2_psrl_d_15_splat(
+; CHECK-NEXT:    ret <4 x i32> zeroinitializer
+;
+  %1 = tail call <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32> %v, <4 x i32> <i32 15, i32 15, i32 15, i32 15>)
+  ret <4 x i32> %1
+}
+
+define <4 x i32> @sse2_psrl_d_64(<4 x i32> %v) {
+; CHECK-LABEL: @sse2_psrl_d_64(
+; CHECK-NEXT:    ret <4 x i32> zeroinitializer
+;
+  %1 = tail call <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32> %v, <4 x i32> <i32 64, i32 0, i32 9999, i32 9999>)
+  ret <4 x i32> %1
+}
+
+define <2 x i64> @sse2_psrl_q_0(<2 x i64> %v) {
+; CHECK-LABEL: @sse2_psrl_q_0(
+; CHECK-NEXT:    ret <2 x i64> %v
+;
+  %1 = tail call <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64> %v, <2 x i64> zeroinitializer)
+  ret <2 x i64> %1
+}
+
+define <2 x i64> @sse2_psrl_q_15(<2 x i64> %v) {
+; CHECK-LABEL: @sse2_psrl_q_15(
+; CHECK-NEXT:    [[TMP1:%.*]] = lshr <2 x i64> %v, <i64 15, i64 15>
+; CHECK-NEXT:    ret <2 x i64> [[TMP1]]
+;
+  %1 = tail call <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64> %v, <2 x i64> <i64 15, i64 9999>)
+  ret <2 x i64> %1
+}
+
+define <2 x i64> @sse2_psrl_q_64(<2 x i64> %v) {
+; CHECK-LABEL: @sse2_psrl_q_64(
+; CHECK-NEXT:    ret <2 x i64> zeroinitializer
+;
+  %1 = tail call <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64> %v, <2 x i64> <i64 64, i64 9999>)
+  ret <2 x i64> %1
+}
+
+define <16 x i16> @avx2_psrl_w_0(<16 x i16> %v) {
+; CHECK-LABEL: @avx2_psrl_w_0(
+; CHECK-NEXT:    ret <16 x i16> %v
+;
+  %1 = tail call <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16> %v, <8 x i16> zeroinitializer)
+  ret <16 x i16> %1
+}
+
+define <16 x i16> @avx2_psrl_w_15(<16 x i16> %v) {
+; CHECK-LABEL: @avx2_psrl_w_15(
+; CHECK-NEXT:    [[TMP1:%.*]] = lshr <16 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
+; CHECK-NEXT:    ret <16 x i16> [[TMP1]]
+;
+  %1 = tail call <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16> %v, <8 x i16> <i16 15, i16 0, i16 0, i16 0, i16 9999, i16 9999, i16 9999, i16 9999>)
+  ret <16 x i16> %1
+}
+
+define <16 x i16> @avx2_psrl_w_15_splat(<16 x i16> %v) {
+; CHECK-LABEL: @avx2_psrl_w_15_splat(
+; CHECK-NEXT:    ret <16 x i16> zeroinitializer
+;
+  %1 = tail call <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16> %v, <8 x i16> <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>)
+  ret <16 x i16> %1
+}
+
+define <16 x i16> @avx2_psrl_w_64(<16 x i16> %v) {
+; CHECK-LABEL: @avx2_psrl_w_64(
+; CHECK-NEXT:    ret <16 x i16> zeroinitializer
+;
+  %1 = tail call <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16> %v, <8 x i16> <i16 64, i16 0, i16 0, i16 0, i16 9999, i16 9999, i16 9999, i16 9999>)
+  ret <16 x i16> %1
+}
+
+define <8 x i32> @avx2_psrl_d_0(<8 x i32> %v) {
+; CHECK-LABEL: @avx2_psrl_d_0(
+; CHECK-NEXT:    ret <8 x i32> %v
+;
+  %1 = tail call <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32> %v, <4 x i32> zeroinitializer)
+  ret <8 x i32> %1
+}
+
+define <8 x i32> @avx2_psrl_d_15(<8 x i32> %v) {
+; CHECK-LABEL: @avx2_psrl_d_15(
+; CHECK-NEXT:    [[TMP1:%.*]] = lshr <8 x i32> %v, <i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15>
+; CHECK-NEXT:    ret <8 x i32> [[TMP1]]
+;
+  %1 = tail call <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32> %v, <4 x i32> <i32 15, i32 0, i32 9999, i32 9999>)
+  ret <8 x i32> %1
+}
+
+define <8 x i32> @avx2_psrl_d_15_splat(<8 x i32> %v) {
+; CHECK-LABEL: @avx2_psrl_d_15_splat(
+; CHECK-NEXT:    ret <8 x i32> zeroinitializer
+;
+  %1 = tail call <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32> %v, <4 x i32> <i32 15, i32 15, i32 15, i32 15>)
+  ret <8 x i32> %1
+}
+
+define <8 x i32> @avx2_psrl_d_64(<8 x i32> %v) {
+; CHECK-LABEL: @avx2_psrl_d_64(
+; CHECK-NEXT:    ret <8 x i32> zeroinitializer
+;
+  %1 = tail call <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32> %v, <4 x i32> <i32 64, i32 0, i32 9999, i32 9999>)
+  ret <8 x i32> %1
+}
+
+define <4 x i64> @avx2_psrl_q_0(<4 x i64> %v) {
+; CHECK-LABEL: @avx2_psrl_q_0(
+; CHECK-NEXT:    ret <4 x i64> %v
+;
+  %1 = tail call <4 x i64> @llvm.x86.avx2.psrl.q(<4 x i64> %v, <2 x i64> zeroinitializer)
+  ret <4 x i64> %1
+}
+
+define <4 x i64> @avx2_psrl_q_15(<4 x i64> %v) {
+; CHECK-LABEL: @avx2_psrl_q_15(
+; CHECK-NEXT:    [[TMP1:%.*]] = lshr <4 x i64> %v, <i64 15, i64 15, i64 15, i64 15>
+; CHECK-NEXT:    ret <4 x i64> [[TMP1]]
+;
+  %1 = tail call <4 x i64> @llvm.x86.avx2.psrl.q(<4 x i64> %v, <2 x i64> <i64 15, i64 9999>)
+  ret <4 x i64> %1
+}
+
+define <4 x i64> @avx2_psrl_q_64(<4 x i64> %v) {
+; CHECK-LABEL: @avx2_psrl_q_64(
+; CHECK-NEXT:    ret <4 x i64> zeroinitializer
+;
+  %1 = tail call <4 x i64> @llvm.x86.avx2.psrl.q(<4 x i64> %v, <2 x i64> <i64 64, i64 9999>)
+  ret <4 x i64> %1
+}
+
+define <32 x i16> @avx512_psrl_w_512_0(<32 x i16> %v) {
+; CHECK-LABEL: @avx512_psrl_w_512_0(
+; CHECK-NEXT:    ret <32 x i16> %v
+;
+  %1 = tail call <32 x i16> @llvm.x86.avx512.psrl.w.512(<32 x i16> %v, <8 x i16> zeroinitializer)
+  ret <32 x i16> %1
+}
+
+define <32 x i16> @avx512_psrl_w_512_15(<32 x i16> %v) {
+; CHECK-LABEL: @avx512_psrl_w_512_15(
+; CHECK-NEXT:    [[TMP1:%.*]] = lshr <32 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
+; CHECK-NEXT:    ret <32 x i16> [[TMP1]]
+;
+  %1 = tail call <32 x i16> @llvm.x86.avx512.psrl.w.512(<32 x i16> %v, <8 x i16> <i16 15, i16 0, i16 0, i16 0, i16 9999, i16 9999, i16 9999, i16 9999>)
+  ret <32 x i16> %1
+}
+
+define <32 x i16> @avx512_psrl_w_512_15_splat(<32 x i16> %v) {
+; CHECK-LABEL: @avx512_psrl_w_512_15_splat(
+; CHECK-NEXT:    ret <32 x i16> zeroinitializer
+;
+  %1 = tail call <32 x i16> @llvm.x86.avx512.psrl.w.512(<32 x i16> %v, <8 x i16> <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>)
+  ret <32 x i16> %1
+}
+
+define <32 x i16> @avx512_psrl_w_512_64(<32 x i16> %v) {
+; CHECK-LABEL: @avx512_psrl_w_512_64(
+; CHECK-NEXT:    ret <32 x i16> zeroinitializer
+;
+  %1 = tail call <32 x i16> @llvm.x86.avx512.psrl.w.512(<32 x i16> %v, <8 x i16> <i16 64, i16 0, i16 0, i16 0, i16 9999, i16 9999, i16 9999, i16 9999>)
+  ret <32 x i16> %1
+}
+
+define <16 x i32> @avx512_psrl_d_512_0(<16 x i32> %v) {
+; CHECK-LABEL: @avx512_psrl_d_512_0(
+; CHECK-NEXT:    ret <16 x i32> %v
+;
+  %1 = tail call <16 x i32> @llvm.x86.avx512.psrl.d.512(<16 x i32> %v, <4 x i32> zeroinitializer)
+  ret <16 x i32> %1
+}
+
+define <16 x i32> @avx512_psrl_d_512_15(<16 x i32> %v) {
+; CHECK-LABEL: @avx512_psrl_d_512_15(
+; CHECK-NEXT:    [[TMP1:%.*]] = lshr <16 x i32> %v, <i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15>
+; CHECK-NEXT:    ret <16 x i32> [[TMP1]]
+;
+  %1 = tail call <16 x i32> @llvm.x86.avx512.psrl.d.512(<16 x i32> %v, <4 x i32> <i32 15, i32 0, i32 9999, i32 9999>)
+  ret <16 x i32> %1
+}
+
+define <16 x i32> @avx512_psrl_d_512_15_splat(<16 x i32> %v) {
+; CHECK-LABEL: @avx512_psrl_d_512_15_splat(
+; CHECK-NEXT:    ret <16 x i32> zeroinitializer
+;
+  %1 = tail call <16 x i32> @llvm.x86.avx512.psrl.d.512(<16 x i32> %v, <4 x i32> <i32 15, i32 15, i32 15, i32 15>)
+  ret <16 x i32> %1
+}
+
+define <16 x i32> @avx512_psrl_d_512_64(<16 x i32> %v) {
+; CHECK-LABEL: @avx512_psrl_d_512_64(
+; CHECK-NEXT:    ret <16 x i32> zeroinitializer
+;
+  %1 = tail call <16 x i32> @llvm.x86.avx512.psrl.d.512(<16 x i32> %v, <4 x i32> <i32 64, i32 0, i32 9999, i32 9999>)
+  ret <16 x i32> %1
+}
+
+define <8 x i64> @avx512_psrl_q_512_0(<8 x i64> %v) {
+; CHECK-LABEL: @avx512_psrl_q_512_0(
+; CHECK-NEXT:    ret <8 x i64> %v
+;
+  %1 = tail call <8 x i64> @llvm.x86.avx512.psrl.q.512(<8 x i64> %v, <2 x i64> zeroinitializer)
+  ret <8 x i64> %1
+}
+
+define <8 x i64> @avx512_psrl_q_512_15(<8 x i64> %v) {
+; CHECK-LABEL: @avx512_psrl_q_512_15(
+; CHECK-NEXT:    [[TMP1:%.*]] = lshr <8 x i64> %v, <i64 15, i64 15, i64 15, i64 15, i64 15, i64 15, i64 15, i64 15>
+; CHECK-NEXT:    ret <8 x i64> [[TMP1]]
+;
+  %1 = tail call <8 x i64> @llvm.x86.avx512.psrl.q.512(<8 x i64> %v, <2 x i64> <i64 15, i64 9999>)
+  ret <8 x i64> %1
+}
+
+define <8 x i64> @avx512_psrl_q_512_64(<8 x i64> %v) {
+; CHECK-LABEL: @avx512_psrl_q_512_64(
+; CHECK-NEXT:    ret <8 x i64> zeroinitializer
+;
+  %1 = tail call <8 x i64> @llvm.x86.avx512.psrl.q.512(<8 x i64> %v, <2 x i64> <i64 64, i64 9999>)
+  ret <8 x i64> %1
+}
+
+;
+; SHL - Constant Vector
+;
+
+define <8 x i16> @sse2_psll_w_0(<8 x i16> %v) {
+; CHECK-LABEL: @sse2_psll_w_0(
+; CHECK-NEXT:    ret <8 x i16> %v
+;
+  %1 = tail call <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16> %v, <8 x i16> zeroinitializer)
+  ret <8 x i16> %1
+}
+
+define <8 x i16> @sse2_psll_w_15(<8 x i16> %v) {
+; CHECK-LABEL: @sse2_psll_w_15(
+; CHECK-NEXT:    [[TMP1:%.*]] = shl <8 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
+; CHECK-NEXT:    ret <8 x i16> [[TMP1]]
+;
+  %1 = tail call <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16> %v, <8 x i16> <i16 15, i16 0, i16 0, i16 0, i16 9999, i16 9999, i16 9999, i16 9999>)
+  ret <8 x i16> %1
+}
+
+define <8 x i16> @sse2_psll_w_15_splat(<8 x i16> %v) {
+; CHECK-LABEL: @sse2_psll_w_15_splat(
+; CHECK-NEXT:    ret <8 x i16> zeroinitializer
+;
+  %1 = tail call <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16> %v, <8 x i16> <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>)
+  ret <8 x i16> %1
+}
+
+define <8 x i16> @sse2_psll_w_64(<8 x i16> %v) {
+; CHECK-LABEL: @sse2_psll_w_64(
+; CHECK-NEXT:    ret <8 x i16> zeroinitializer
+;
+  %1 = tail call <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16> %v, <8 x i16> <i16 64, i16 0, i16 0, i16 0, i16 9999, i16 9999, i16 9999, i16 9999>)
+  ret <8 x i16> %1
+}
+
+define <4 x i32> @sse2_psll_d_0(<4 x i32> %v) {
+; CHECK-LABEL: @sse2_psll_d_0(
+; CHECK-NEXT:    ret <4 x i32> %v
+;
+  %1 = tail call <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32> %v, <4 x i32> zeroinitializer)
+  ret <4 x i32> %1
+}
+
+define <4 x i32> @sse2_psll_d_15(<4 x i32> %v) {
+; CHECK-LABEL: @sse2_psll_d_15(
+; CHECK-NEXT:    [[TMP1:%.*]] = shl <4 x i32> %v, <i32 15, i32 15, i32 15, i32 15>
+; CHECK-NEXT:    ret <4 x i32> [[TMP1]]
+;
+  %1 = tail call <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32> %v, <4 x i32> <i32 15, i32 0, i32 9999, i32 9999>)
+  ret <4 x i32> %1
+}
+
+define <4 x i32> @sse2_psll_d_15_splat(<4 x i32> %v) {
+; CHECK-LABEL: @sse2_psll_d_15_splat(
+; CHECK-NEXT:    ret <4 x i32> zeroinitializer
+;
+  %1 = tail call <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32> %v, <4 x i32> <i32 15, i32 15, i32 15, i32 15>)
+  ret <4 x i32> %1
+}
+
+define <4 x i32> @sse2_psll_d_64(<4 x i32> %v) {
+; CHECK-LABEL: @sse2_psll_d_64(
+; CHECK-NEXT:    ret <4 x i32> zeroinitializer
+;
+  %1 = tail call <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32> %v, <4 x i32> <i32 64, i32 0, i32 9999, i32 9999>)
+  ret <4 x i32> %1
+}
+
+define <2 x i64> @sse2_psll_q_0(<2 x i64> %v) {
+; CHECK-LABEL: @sse2_psll_q_0(
+; CHECK-NEXT:    ret <2 x i64> %v
+;
+  %1 = tail call <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64> %v, <2 x i64> zeroinitializer)
+  ret <2 x i64> %1
+}
+
+define <2 x i64> @sse2_psll_q_15(<2 x i64> %v) {
+; CHECK-LABEL: @sse2_psll_q_15(
+; CHECK-NEXT:    [[TMP1:%.*]] = shl <2 x i64> %v, <i64 15, i64 15>
+; CHECK-NEXT:    ret <2 x i64> [[TMP1]]
+;
+  %1 = tail call <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64> %v, <2 x i64> <i64 15, i64 9999>)
+  ret <2 x i64> %1
+}
+
+define <2 x i64> @sse2_psll_q_64(<2 x i64> %v) {
+; CHECK-LABEL: @sse2_psll_q_64(
+; CHECK-NEXT:    ret <2 x i64> zeroinitializer
+;
+  %1 = tail call <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64> %v, <2 x i64> <i64 64, i64 9999>)
+  ret <2 x i64> %1
+}
+
+define <16 x i16> @avx2_psll_w_0(<16 x i16> %v) {
+; CHECK-LABEL: @avx2_psll_w_0(
+; CHECK-NEXT:    ret <16 x i16> %v
+;
+  %1 = tail call <16 x i16> @llvm.x86.avx2.psll.w(<16 x i16> %v, <8 x i16> zeroinitializer)
+  ret <16 x i16> %1
+}
+
+define <16 x i16> @avx2_psll_w_15(<16 x i16> %v) {
+; CHECK-LABEL: @avx2_psll_w_15(
+; CHECK-NEXT:    [[TMP1:%.*]] = shl <16 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
+; CHECK-NEXT:    ret <16 x i16> [[TMP1]]
+;
+  %1 = tail call <16 x i16> @llvm.x86.avx2.psll.w(<16 x i16> %v, <8 x i16> <i16 15, i16 0, i16 0, i16 0, i16 9999, i16 9999, i16 9999, i16 9999>)
+  ret <16 x i16> %1
+}
+
+define <16 x i16> @avx2_psll_w_15_splat(<16 x i16> %v) {
+; CHECK-LABEL: @avx2_psll_w_15_splat(
+; CHECK-NEXT:    ret <16 x i16> zeroinitializer
+;
+  %1 = tail call <16 x i16> @llvm.x86.avx2.psll.w(<16 x i16> %v, <8 x i16> <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>)
+  ret <16 x i16> %1
+}
+
+define <16 x i16> @avx2_psll_w_64(<16 x i16> %v) {
+; CHECK-LABEL: @avx2_psll_w_64(
+; CHECK-NEXT:    ret <16 x i16> zeroinitializer
+;
+  %1 = tail call <16 x i16> @llvm.x86.avx2.psll.w(<16 x i16> %v, <8 x i16> <i16 64, i16 0, i16 0, i16 0, i16 9999, i16 9999, i16 9999, i16 9999>)
+  ret <16 x i16> %1
+}
+
+define <8 x i32> @avx2_psll_d_0(<8 x i32> %v) {
+; CHECK-LABEL: @avx2_psll_d_0(
+; CHECK-NEXT:    ret <8 x i32> %v
+;
+  %1 = tail call <8 x i32> @llvm.x86.avx2.psll.d(<8 x i32> %v, <4 x i32> zeroinitializer)
+  ret <8 x i32> %1
+}
+
+define <8 x i32> @avx2_psll_d_15(<8 x i32> %v) {
+; CHECK-LABEL: @avx2_psll_d_15(
+; CHECK-NEXT:    [[TMP1:%.*]] = shl <8 x i32> %v, <i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15>
+; CHECK-NEXT:    ret <8 x i32> [[TMP1]]
+;
+  %1 = tail call <8 x i32> @llvm.x86.avx2.psll.d(<8 x i32> %v, <4 x i32> <i32 15, i32 0, i32 9999, i32 9999>)
+  ret <8 x i32> %1
+}
+
+define <8 x i32> @avx2_psll_d_15_splat(<8 x i32> %v) {
+; CHECK-LABEL: @avx2_psll_d_15_splat(
+; CHECK-NEXT:    ret <8 x i32> zeroinitializer
+;
+  %1 = tail call <8 x i32> @llvm.x86.avx2.psll.d(<8 x i32> %v, <4 x i32> <i32 15, i32 15, i32 15, i32 15>)
+  ret <8 x i32> %1
+}
+
+define <8 x i32> @avx2_psll_d_64(<8 x i32> %v) {
+; CHECK-LABEL: @avx2_psll_d_64(
+; CHECK-NEXT:    ret <8 x i32> zeroinitializer
+;
+  %1 = tail call <8 x i32> @llvm.x86.avx2.psll.d(<8 x i32> %v, <4 x i32> <i32 64, i32 0, i32 9999, i32 9999>)
+  ret <8 x i32> %1
+}
+
+define <4 x i64> @avx2_psll_q_0(<4 x i64> %v) {
+; CHECK-LABEL: @avx2_psll_q_0(
+; CHECK-NEXT:    ret <4 x i64> %v
+;
+  %1 = tail call <4 x i64> @llvm.x86.avx2.psll.q(<4 x i64> %v, <2 x i64> zeroinitializer)
+  ret <4 x i64> %1
+}
+
+define <4 x i64> @avx2_psll_q_15(<4 x i64> %v) {
+; CHECK-LABEL: @avx2_psll_q_15(
+; CHECK-NEXT:    [[TMP1:%.*]] = shl <4 x i64> %v, <i64 15, i64 15, i64 15, i64 15>
+; CHECK-NEXT:    ret <4 x i64> [[TMP1]]
+;
+  %1 = tail call <4 x i64> @llvm.x86.avx2.psll.q(<4 x i64> %v, <2 x i64> <i64 15, i64 9999>)
+  ret <4 x i64> %1
+}
+
+define <4 x i64> @avx2_psll_q_64(<4 x i64> %v) {
+; CHECK-LABEL: @avx2_psll_q_64(
+; CHECK-NEXT:    ret <4 x i64> zeroinitializer
+;
+  %1 = tail call <4 x i64> @llvm.x86.avx2.psll.q(<4 x i64> %v, <2 x i64> <i64 64, i64 9999>)
+  ret <4 x i64> %1
+}
+
+define <32 x i16> @avx512_psll_w_512_0(<32 x i16> %v) {
+; CHECK-LABEL: @avx512_psll_w_512_0(
+; CHECK-NEXT:    ret <32 x i16> %v
+;
+  %1 = tail call <32 x i16> @llvm.x86.avx512.psll.w.512(<32 x i16> %v, <8 x i16> zeroinitializer)
+  ret <32 x i16> %1
+}
+
+define <32 x i16> @avx512_psll_w_512_15(<32 x i16> %v) {
+; CHECK-LABEL: @avx512_psll_w_512_15(
+; CHECK-NEXT:    [[TMP1:%.*]] = shl <32 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
+; CHECK-NEXT:    ret <32 x i16> [[TMP1]]
+;
+  %1 = tail call <32 x i16> @llvm.x86.avx512.psll.w.512(<32 x i16> %v, <8 x i16> <i16 15, i16 0, i16 0, i16 0, i16 9999, i16 9999, i16 9999, i16 9999>)
+  ret <32 x i16> %1
+}
+
+define <32 x i16> @avx512_psll_w_15_512_splat(<32 x i16> %v) {
+; CHECK-LABEL: @avx512_psll_w_15_512_splat(
+; CHECK-NEXT:    ret <32 x i16> zeroinitializer
+;
+  %1 = tail call <32 x i16> @llvm.x86.avx512.psll.w.512(<32 x i16> %v, <8 x i16> <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>)
+  ret <32 x i16> %1
+}
+
+define <32 x i16> @avx512_psll_w_512_64(<32 x i16> %v) {
+; CHECK-LABEL: @avx512_psll_w_512_64(
+; CHECK-NEXT:    ret <32 x i16> zeroinitializer
+;
+  %1 = tail call <32 x i16> @llvm.x86.avx512.psll.w.512(<32 x i16> %v, <8 x i16> <i16 64, i16 0, i16 0, i16 0, i16 9999, i16 9999, i16 9999, i16 9999>)
+  ret <32 x i16> %1
+}
+
+define <16 x i32> @avx512_psll_d_512_0(<16 x i32> %v) {
+; CHECK-LABEL: @avx512_psll_d_512_0(
+; CHECK-NEXT:    ret <16 x i32> %v
+;
+  %1 = tail call <16 x i32> @llvm.x86.avx512.psll.d.512(<16 x i32> %v, <4 x i32> zeroinitializer)
+  ret <16 x i32> %1
+}
+
+define <16 x i32> @avx512_psll_d_512_15(<16 x i32> %v) {
+; CHECK-LABEL: @avx512_psll_d_512_15(
+; CHECK-NEXT:    [[TMP1:%.*]] = shl <16 x i32> %v, <i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15>
+; CHECK-NEXT:    ret <16 x i32> [[TMP1]]
+;
+  %1 = tail call <16 x i32> @llvm.x86.avx512.psll.d.512(<16 x i32> %v, <4 x i32> <i32 15, i32 0, i32 9999, i32 9999>)
+  ret <16 x i32> %1
+}
+
+define <16 x i32> @avx512_psll_d_512_15_splat(<16 x i32> %v) {
+; CHECK-LABEL: @avx512_psll_d_512_15_splat(
+; CHECK-NEXT:    ret <16 x i32> zeroinitializer
+;
+  %1 = tail call <16 x i32> @llvm.x86.avx512.psll.d.512(<16 x i32> %v, <4 x i32> <i32 15, i32 15, i32 15, i32 15>)
+  ret <16 x i32> %1
+}
+
+define <16 x i32> @avx512_psll_d_512_64(<16 x i32> %v) {
+; CHECK-LABEL: @avx512_psll_d_512_64(
+; CHECK-NEXT:    ret <16 x i32> zeroinitializer
+;
+  %1 = tail call <16 x i32> @llvm.x86.avx512.psll.d.512(<16 x i32> %v, <4 x i32> <i32 64, i32 0, i32 9999, i32 9999>)
+  ret <16 x i32> %1
+}
+
+define <8 x i64> @avx512_psll_q_512_0(<8 x i64> %v) {
+; CHECK-LABEL: @avx512_psll_q_512_0(
+; CHECK-NEXT:    ret <8 x i64> %v
+;
+  %1 = tail call <8 x i64> @llvm.x86.avx512.psll.q.512(<8 x i64> %v, <2 x i64> zeroinitializer)
+  ret <8 x i64> %1
+}
+
+define <8 x i64> @avx512_psll_q_512_15(<8 x i64> %v) {
+; CHECK-LABEL: @avx512_psll_q_512_15(
+; CHECK-NEXT:    [[TMP1:%.*]] = shl <8 x i64> %v, <i64 15, i64 15, i64 15, i64 15, i64 15, i64 15, i64 15, i64 15>
+; CHECK-NEXT:    ret <8 x i64> [[TMP1]]
+;
+  %1 = tail call <8 x i64> @llvm.x86.avx512.psll.q.512(<8 x i64> %v, <2 x i64> <i64 15, i64 9999>)
+  ret <8 x i64> %1
+}
+
+define <8 x i64> @avx512_psll_q_512_64(<8 x i64> %v) {
+; CHECK-LABEL: @avx512_psll_q_512_64(
+; CHECK-NEXT:    ret <8 x i64> zeroinitializer
+;
+  %1 = tail call <8 x i64> @llvm.x86.avx512.psll.q.512(<8 x i64> %v, <2 x i64> <i64 64, i64 9999>)
+  ret <8 x i64> %1
+}
+
+;
+; ASHR - Constant Per-Element Vector
+;
+
+define <4 x i32> @avx2_psrav_d_128_0(<4 x i32> %v) {
+; CHECK-LABEL: @avx2_psrav_d_128_0(
+; CHECK-NEXT:    ret <4 x i32> %v
+;
+  %1 = tail call <4 x i32> @llvm.x86.avx2.psrav.d(<4 x i32> %v, <4 x i32> zeroinitializer)
+  ret <4 x i32> %1
+}
+
+define <8 x i32> @avx2_psrav_d_256_0(<8 x i32> %v) {
+; CHECK-LABEL: @avx2_psrav_d_256_0(
+; CHECK-NEXT:    ret <8 x i32> %v
+;
+  %1 = tail call <8 x i32> @llvm.x86.avx2.psrav.d.256(<8 x i32> %v, <8 x i32> zeroinitializer)
+  ret <8 x i32> %1
+}
+
+define <16 x i32> @avx512_psrav_d_512_0(<16 x i32> %v) {
+; CHECK-LABEL: @avx512_psrav_d_512_0(
+; CHECK-NEXT:    ret <16 x i32> %v
+;
+  %1 = tail call <16 x i32> @llvm.x86.avx512.psrav.d.512(<16 x i32> %v, <16 x i32> zeroinitializer)
+  ret <16 x i32> %1
+}
+
+define <4 x i32> @avx2_psrav_d_128_var(<4 x i32> %v) {
+; CHECK-LABEL: @avx2_psrav_d_128_var(
+; CHECK-NEXT:    [[TMP1:%.*]] = ashr <4 x i32> %v, <i32 0, i32 8, i32 16, i32 31>
+; CHECK-NEXT:    ret <4 x i32> [[TMP1]]
+;
+  %1 = tail call <4 x i32> @llvm.x86.avx2.psrav.d(<4 x i32> %v, <4 x i32> <i32 0, i32 8, i32 16, i32 64>)
+  ret <4 x i32> %1
+}
+
+define <8 x i32> @avx2_psrav_d_256_var(<8 x i32> %v) {
+; CHECK-LABEL: @avx2_psrav_d_256_var(
+; CHECK-NEXT:    [[TMP1:%.*]] = ashr <8 x i32> %v, <i32 0, i32 8, i32 16, i32 24, i32 31, i32 24, i32 8, i32 0>
+; CHECK-NEXT:    ret <8 x i32> [[TMP1]]
+;
+  %1 = tail call <8 x i32> @llvm.x86.avx2.psrav.d.256(<8 x i32> %v, <8 x i32> <i32 0, i32 8, i32 16, i32 24, i32 32, i32 24, i32 8, i32 0>)
+  ret <8 x i32> %1
+}
+
+define <16 x i32> @avx512_psrav_d_512_var(<16 x i32> %v) {
+; CHECK-LABEL: @avx512_psrav_d_512_var(
+; CHECK-NEXT:    [[TMP1:%.*]] = ashr <16 x i32> %v, <i32 0, i32 8, i32 16, i32 24, i32 31, i32 24, i32 8, i32 0, i32 0, i32 8, i32 16, i32 24, i32 31, i32 24, i32 8, i32 0>
+; CHECK-NEXT:    ret <16 x i32> [[TMP1]]
+;
+  %1 = tail call <16 x i32> @llvm.x86.avx512.psrav.d.512(<16 x i32> %v, <16 x i32> <i32 0, i32 8, i32 16, i32 24, i32 32, i32 24, i32 8, i32 0, i32 0, i32 8, i32 16, i32 24, i32 32, i32 24, i32 8, i32 0>)
+  ret <16 x i32> %1
+}
+
+define <4 x i32> @avx2_psrav_d_128_allbig(<4 x i32> %v) {
+; CHECK-LABEL: @avx2_psrav_d_128_allbig(
+; CHECK-NEXT:    [[TMP1:%.*]] = ashr <4 x i32> %v, <i32 31, i32 31, i32 31, i32 undef>
+; CHECK-NEXT:    ret <4 x i32> [[TMP1]]
+;
+  %1 = tail call <4 x i32> @llvm.x86.avx2.psrav.d(<4 x i32> %v, <4 x i32> <i32 32, i32 100, i32 -255, i32 undef>)
+  ret <4 x i32> %1
+}
+
+define <8 x i32> @avx2_psrav_d_256_allbig(<8 x i32> %v) {
+; CHECK-LABEL: @avx2_psrav_d_256_allbig(
+; CHECK-NEXT:    [[TMP1:%.*]] = ashr <8 x i32> %v, <i32 undef, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31>
+; CHECK-NEXT:    ret <8 x i32> [[TMP1]]
+;
+  %1 = tail call <8 x i32> @llvm.x86.avx2.psrav.d.256(<8 x i32> %v, <8 x i32> <i32 undef, i32 100, i32 255, i32 55555, i32 -32, i32 -100, i32 -255, i32 -55555>)
+  ret <8 x i32> %1
+}
+
+define <16 x i32> @avx512_psrav_d_512_allbig(<16 x i32> %v) {
+; CHECK-LABEL: @avx512_psrav_d_512_allbig(
+; CHECK-NEXT:    [[TMP1:%.*]] = ashr <16 x i32> %v, <i32 undef, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 undef, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31>
+; CHECK-NEXT:    ret <16 x i32> [[TMP1]]
+;
+  %1 = tail call <16 x i32> @llvm.x86.avx512.psrav.d.512(<16 x i32> %v, <16 x i32> <i32 undef, i32 100, i32 255, i32 55555, i32 -32, i32 -100, i32 -255, i32 -55555, i32 undef, i32 100, i32 255, i32 55555, i32 -32, i32 -100, i32 -255, i32 -55555>)
+  ret <16 x i32> %1
+}
+
+define <4 x i32> @avx2_psrav_d_128_undef(<4 x i32> %v) {
+; CHECK-LABEL: @avx2_psrav_d_128_undef(
+; CHECK-NEXT:    [[TMP1:%.*]] = ashr <4 x i32> %v, <i32 undef, i32 8, i32 16, i32 31>
+; CHECK-NEXT:    ret <4 x i32> [[TMP1]]
+;
+  %1 = insertelement <4 x i32> <i32 0, i32 8, i32 16, i32 64>, i32 undef, i32 0
+  %2 = tail call <4 x i32> @llvm.x86.avx2.psrav.d(<4 x i32> %v, <4 x i32> %1)
+  ret <4 x i32> %2
+}
+
+define <8 x i32> @avx2_psrav_d_256_undef(<8 x i32> %v) {
+; CHECK-LABEL: @avx2_psrav_d_256_undef(
+; CHECK-NEXT:    [[TMP1:%.*]] = ashr <8 x i32> %v, <i32 0, i32 undef, i32 16, i32 24, i32 31, i32 24, i32 8, i32 0>
+; CHECK-NEXT:    ret <8 x i32> [[TMP1]]
+;
+  %1 = insertelement <8 x i32> <i32 0, i32 8, i32 16, i32 24, i32 32, i32 24, i32 8, i32 0>, i32 undef, i32 1
+  %2 = tail call <8 x i32> @llvm.x86.avx2.psrav.d.256(<8 x i32> %v, <8 x i32> %1)
+  ret <8 x i32> %2
+}
+
+define <16 x i32> @avx512_psrav_d_512_undef(<16 x i32> %v) {
+; CHECK-LABEL: @avx512_psrav_d_512_undef(
+; CHECK-NEXT:    [[TMP1:%.*]] = ashr <16 x i32> %v, <i32 0, i32 undef, i32 16, i32 24, i32 31, i32 24, i32 8, i32 0, i32 0, i32 8, i32 16, i32 24, i32 31, i32 24, i32 8, i32 0>
+; CHECK-NEXT:    ret <16 x i32> [[TMP1]]
+;
+  %1 = insertelement <16 x i32> <i32 0, i32 8, i32 16, i32 24, i32 32, i32 24, i32 8, i32 0, i32 0, i32 8, i32 16, i32 24, i32 32, i32 24, i32 8, i32 0>, i32 undef, i32 1
+  %2 = tail call <16 x i32> @llvm.x86.avx512.psrav.d.512(<16 x i32> %v, <16 x i32> %1)
+  ret <16 x i32> %2
+}
+
+define <2 x i64> @avx512_psrav_q_128_0(<2 x i64> %v) {
+; CHECK-LABEL: @avx512_psrav_q_128_0(
+; CHECK-NEXT:    ret <2 x i64> %v
+;
+  %1 = tail call <2 x i64> @llvm.x86.avx512.psrav.q.128(<2 x i64> %v, <2 x i64> zeroinitializer)
+  ret <2 x i64> %1
+}
+
+define <4 x i64> @avx512_psrav_q_256_0(<4 x i64> %v) {
+; CHECK-LABEL: @avx512_psrav_q_256_0(
+; CHECK-NEXT:    ret <4 x i64> %v
+;
+  %1 = tail call <4 x i64> @llvm.x86.avx512.psrav.q.256(<4 x i64> %v, <4 x i64> zeroinitializer)
+  ret <4 x i64> %1
+}
+
+define <2 x i64> @avx512_psrav_q_128_var(<2 x i64> %v) {
+; CHECK-LABEL: @avx512_psrav_q_128_var(
+; CHECK-NEXT:    [[TMP1:%.*]] = ashr <2 x i64> %v, <i64 0, i64 8>
+  %1 = tail call <2 x i64> @llvm.x86.avx512.psrav.q.128(<2 x i64> %v, <2 x i64> <i64 0, i64 8>)
+  ret <2 x i64> %1
+}
+
+define <4 x i64> @avx512_psrav_q_256_var(<4 x i64> %v) {
+; CHECK-LABEL: @avx512_psrav_q_256_var(
+; CHECK-NEXT:    [[TMP1:%.*]] = ashr <4 x i64> %v, <i64 0, i64 8, i64 16, i64 31>
+; CHECK-NEXT:    ret <4 x i64> [[TMP1]]
+;
+  %1 = tail call <4 x i64> @llvm.x86.avx512.psrav.q.256(<4 x i64> %v, <4 x i64> <i64 0, i64 8, i64 16, i64 31>)
+  ret <4 x i64> %1
+}
+
+define <2 x i64> @avx512_psrav_q_128_allbig(<2 x i64> %v) {
+; CHECK-LABEL: @avx512_psrav_q_128_allbig(
+; CHECK-NEXT:    [[TMP1:%.*]] = ashr <2 x i64> %v, <i64 63, i64 undef>
+; CHECK-NEXT:    ret <2 x i64> [[TMP1]]
+;
+  %1 = tail call <2 x i64> @llvm.x86.avx512.psrav.q.128(<2 x i64> %v, <2 x i64> <i64 64, i64 undef>)
+  ret <2 x i64> %1
+}
+
+define <4 x i64> @avx512_psrav_q_256_allbig(<4 x i64> %v) {
+; CHECK-LABEL: @avx512_psrav_q_256_allbig(
+; CHECK-NEXT:    [[TMP1:%.*]] = ashr <4 x i64> %v, <i64 63, i64 undef, i64 63, i64 63>
+; CHECK-NEXT:    ret <4 x i64> [[TMP1]]
+;
+  %1 = tail call <4 x i64> @llvm.x86.avx512.psrav.q.256(<4 x i64> %v, <4 x i64> <i64 64, i64 undef, i64 -128, i64 -60>)
+  ret <4 x i64> %1
+}
+
+define <2 x i64> @avx512_psrav_q_128_undef(<2 x i64> %v) {
+; CHECK-LABEL: @avx512_psrav_q_128_undef(
+; CHECK-NEXT:    [[TMP1:%.*]] = ashr <2 x i64> %v, <i64 undef, i64 8>
+; CHECK-NEXT:    ret <2 x i64> [[TMP1]]
+;
+  %1 = insertelement <2 x i64> <i64 0, i64 8>, i64 undef, i64 0
+  %2 = tail call <2 x i64> @llvm.x86.avx512.psrav.q.128(<2 x i64> %v, <2 x i64> %1)
+  ret <2 x i64> %2
+}
+
+define <4 x i64> @avx512_psrav_q_256_undef(<4 x i64> %v) {
+; CHECK-LABEL: @avx512_psrav_q_256_undef(
+; CHECK-NEXT:    [[TMP1:%.*]] = ashr <4 x i64> %v, <i64 undef, i64 8, i64 16, i64 31>
+; CHECK-NEXT:    ret <4 x i64> [[TMP1]]
+;
+  %1 = insertelement <4 x i64> <i64 0, i64 8, i64 16, i64 31>, i64 undef, i64 0
+  %2 = tail call <4 x i64> @llvm.x86.avx512.psrav.q.256(<4 x i64> %v, <4 x i64> %1)
+  ret <4 x i64> %2
+}
+
+define <8 x i64> @avx512_psrav_q_512_0(<8 x i64> %v) {
+; CHECK-LABEL: @avx512_psrav_q_512_0(
+; CHECK-NEXT:    ret <8 x i64> %v
+;
+  %1 = tail call <8 x i64> @llvm.x86.avx512.psrav.q.512(<8 x i64> %v, <8 x i64> zeroinitializer)
+  ret <8 x i64> %1
+}
+
+define <8 x i64> @avx512_psrav_q_512_var(<8 x i64> %v) {
+; CHECK-LABEL: @avx512_psrav_q_512_var(
+; CHECK-NEXT:    [[TMP1:%.*]] = ashr <8 x i64> %v, <i64 0, i64 8, i64 16, i64 31, i64 0, i64 8, i64 16, i64 31>
+; CHECK-NEXT:    ret <8 x i64> [[TMP1]]
+;
+  %1 = tail call <8 x i64> @llvm.x86.avx512.psrav.q.512(<8 x i64> %v, <8 x i64> <i64 0, i64 8, i64 16, i64 31, i64 0, i64 8, i64 16, i64 31>)
+  ret <8 x i64> %1
+}
+
+define <8 x i64> @avx512_psrav_q_512_allbig(<8 x i64> %v) {
+; CHECK-LABEL: @avx512_psrav_q_512_allbig(
+; CHECK-NEXT:    [[TMP1:%.*]] = ashr <8 x i64> %v, <i64 63, i64 undef, i64 63, i64 63, i64 63, i64 undef, i64 63, i64 63>
+; CHECK-NEXT:    ret <8 x i64> [[TMP1]]
+;
+  %1 = tail call <8 x i64> @llvm.x86.avx512.psrav.q.512(<8 x i64> %v, <8 x i64> <i64 64, i64 undef, i64 -128, i64 -60, i64 64, i64 undef, i64 -128, i64 -60>)
+  ret <8 x i64> %1
+}
+
+define <8 x i64> @avx512_psrav_q_512_undef(<8 x i64> %v) {
+; CHECK-LABEL: @avx512_psrav_q_512_undef(
+; CHECK-NEXT:    [[TMP1:%.*]] = ashr <8 x i64> %v, <i64 undef, i64 8, i64 16, i64 31, i64 0, i64 8, i64 16, i64 31>
+; CHECK-NEXT:    ret <8 x i64> [[TMP1]]
+;
+  %1 = insertelement <8 x i64> <i64 0, i64 8, i64 16, i64 31, i64 0, i64 8, i64 16, i64 31>, i64 undef, i64 0
+  %2 = tail call <8 x i64> @llvm.x86.avx512.psrav.q.512(<8 x i64> %v, <8 x i64> %1)
+  ret <8 x i64> %2
+}
+
+define <8 x i16> @avx512_psrav_w_128_0(<8 x i16> %v) {
+; CHECK-LABEL: @avx512_psrav_w_128_0(
+; CHECK-NEXT:    ret <8 x i16> %v
+;
+  %1 = tail call <8 x i16> @llvm.x86.avx512.psrav.w.128(<8 x i16> %v, <8 x i16> zeroinitializer)
+  ret <8 x i16> %1
+}
+
+define <8 x i16> @avx512_psrav_w_128_var(<8 x i16> %v) {
+; CHECK-LABEL: @avx512_psrav_w_128_var(
+; CHECK-NEXT:    [[TMP1:%.*]] = ashr <8 x i16> %v, <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>
+; CHECK-NEXT:    ret <8 x i16> [[TMP1]]
+;
+  %1 = tail call <8 x i16> @llvm.x86.avx512.psrav.w.128(<8 x i16> %v, <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>)
+  ret <8 x i16> %1
+}
+
+define <8 x i16> @avx512_psrav_w_128_allbig(<8 x i16> %v) {
+; CHECK-LABEL: @avx512_psrav_w_128_allbig(
+; CHECK-NEXT:    [[TMP1:%.*]] = ashr <8 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 undef>
+; CHECK-NEXT:    ret <8 x i16> [[TMP1]]
+;
+;
+  %1 = tail call <8 x i16> @llvm.x86.avx512.psrav.w.128(<8 x i16> %v, <8 x i16> <i16 20, i16 -1, i16 -2, i16 33, i16 44, i16 55, i16 66, i16 undef>)
+  ret <8 x i16> %1
+}
+
+define <8 x i16> @avx512_psrav_w_128_undef(<8 x i16> %v) {
+; CHECK-LABEL: @avx512_psrav_w_128_undef(
+; CHECK-NEXT:    [[TMP1:%.*]] = ashr <8 x i16> %v, <i16 undef, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>
+; CHECK-NEXT:    ret <8 x i16> [[TMP1]]
+;
+  %1 = insertelement <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, i16 undef, i64 0
+  %2 = tail call <8 x i16> @llvm.x86.avx512.psrav.w.128(<8 x i16> %v, <8 x i16> %1)
+  ret <8 x i16> %2
+}
+
+define <16 x i16> @avx512_psrav_w_256_0(<16 x i16> %v) {
+; CHECK-LABEL: @avx512_psrav_w_256_0(
+; CHECK-NEXT:    ret <16 x i16> %v
+;
+  %1 = tail call <16 x i16> @llvm.x86.avx512.psrav.w.256(<16 x i16> %v, <16 x i16> zeroinitializer)
+  ret <16 x i16> %1
+}
+
+define <16 x i16> @avx512_psrav_w_256_var(<16 x i16> %v) {
+; CHECK-LABEL: @avx512_psrav_w_256_var(
+; CHECK-NEXT:    [[TMP1:%.*]] = ashr <16 x i16> %v, <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15>
+; CHECK-NEXT:    ret <16 x i16> [[TMP1]]
+;
+  %1 = tail call <16 x i16> @llvm.x86.avx512.psrav.w.256(<16 x i16> %v, <16 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15>)
+  ret <16 x i16> %1
+}
+
+define <16 x i16> @avx512_psrav_w_256_allbig(<16 x i16> %v) {
+; CHECK-LABEL: @avx512_psrav_w_256_allbig(
+; CHECK-NEXT:    [[TMP1:%.*]] = ashr <16 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 undef, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
+; CHECK-NEXT:    ret <16 x i16> [[TMP1]]
+;
+;
+  %1 = tail call <16 x i16> @llvm.x86.avx512.psrav.w.256(<16 x i16> %v, <16 x i16> <i16 20, i16 -1, i16 -2, i16 33, i16 44, i16 55, i16 66, i16 -7, i16 undef, i16 64, i16 -10, i16 256, i16 16, i16 28, i16 65535, i16 32767>)
+  ret <16 x i16> %1
+}
+
+define <16 x i16> @avx512_psrav_w_256_undef(<16 x i16> %v) {
+; CHECK-LABEL: @avx512_psrav_w_256_undef(
+; CHECK-NEXT:    [[TMP1:%.*]] = ashr <16 x i16> %v, <i16 undef, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15>
+; CHECK-NEXT:    ret <16 x i16> [[TMP1]]
+;
+  %1 = insertelement <16 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15>, i16 undef, i64 0
+  %2 = tail call <16 x i16> @llvm.x86.avx512.psrav.w.256(<16 x i16> %v, <16 x i16> %1)
+  ret <16 x i16> %2
+}
+
+define <32 x i16> @avx512_psrav_w_512_0(<32 x i16> %v) {
+; CHECK-LABEL: @avx512_psrav_w_512_0(
+; CHECK-NEXT:    ret <32 x i16> %v
+;
+  %1 = tail call <32 x i16> @llvm.x86.avx512.psrav.w.512(<32 x i16> %v, <32 x i16> zeroinitializer)
+  ret <32 x i16> %1
+}
+
+define <32 x i16> @avx512_psrav_w_512_var(<32 x i16> %v) {
+; CHECK-LABEL: @avx512_psrav_w_512_var(
+; CHECK-NEXT:    [[TMP1:%.*]] = ashr <32 x i16> %v, <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15, i16 15, i16 14, i16 13, i16 12, i16 11, i16 10, i16 9, i16 8, i16 7, i16 6, i16 5, i16 4, i16 3, i16 2, i16 1, i16 0>
+; CHECK-NEXT:    ret <32 x i16> [[TMP1]]
+;
+  %1 = tail call <32 x i16> @llvm.x86.avx512.psrav.w.512(<32 x i16> %v, <32 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15, i16 15, i16 14, i16 13, i16 12, i16 11, i16 10, i16 9, i16 8, i16 7, i16 6, i16 5, i16 4, i16 3, i16 2, i16 1, i16 0>)
+  ret <32 x i16> %1
+}
+
+define <32 x i16> @avx512_psrav_w_512_allbig(<32 x i16> %v) {
+; CHECK-LABEL: @avx512_psrav_w_512_allbig(
+; CHECK-NEXT:    [[TMP1:%.*]] = ashr <32 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 undef, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 undef, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 undef, i16 15, i16 15, i16 undef, i16 15, i16 15>
+; CHECK-NEXT:    ret <32 x i16> [[TMP1]]
+;
+  %1 = tail call <32 x i16> @llvm.x86.avx512.psrav.w.512(<32 x i16> %v, <32 x i16> <i16 20, i16 -1, i16 -2, i16 33, i16 44, i16 55, i16 66, i16 -7, i16 undef, i16 64, i16 -10, i16 128, i16 16, i16 28, i16 65535, i16 32767, i16 56, i16 -14, i16 undef, i16 16, i16 67, i16 567, i16 -32768, i16 4096, i16 8192, i16 -12345, i16 undef, i16 345, i16 123, i16 undef, i16 1024, i16 54321>)
+  ret <32 x i16> %1
+}
+
+define <32 x i16> @avx512_psrav_w_512_undef(<32 x i16> %v) {
+; CHECK-LABEL: @avx512_psrav_w_512_undef(
+; CHECK-NEXT:    [[TMP1:%.*]] = ashr <32 x i16> %v, <i16 undef, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15, i16 15, i16 14, i16 13, i16 12, i16 11, i16 10, i16 9, i16 8, i16 7, i16 6, i16 5, i16 4, i16 3, i16 2, i16 1, i16 0>
+; CHECK-NEXT:    ret <32 x i16> [[TMP1]]
+;
+  %1 = insertelement <32 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15, i16 15, i16 14, i16 13, i16 12, i16 11, i16 10, i16 9, i16 8, i16 7, i16 6, i16 5, i16 4, i16 3, i16 2, i16 1, i16 0>, i16 undef, i64 0
+  %2 = tail call <32 x i16> @llvm.x86.avx512.psrav.w.512(<32 x i16> %v, <32 x i16> %1)
+  ret <32 x i16> %2
+}
+
+;
+; LSHR - Constant Per-Element Vector
+;
+
+define <4 x i32> @avx2_psrlv_d_128_0(<4 x i32> %v) {
+; CHECK-LABEL: @avx2_psrlv_d_128_0(
+; CHECK-NEXT:    ret <4 x i32> %v
+;
+  %1 = tail call <4 x i32> @llvm.x86.avx2.psrlv.d(<4 x i32> %v, <4 x i32> zeroinitializer)
+  ret <4 x i32> %1
+}
+
+define <8 x i32> @avx2_psrlv_d_256_0(<8 x i32> %v) {
+; CHECK-LABEL: @avx2_psrlv_d_256_0(
+; CHECK-NEXT:    ret <8 x i32> %v
+;
+  %1 = tail call <8 x i32> @llvm.x86.avx2.psrlv.d.256(<8 x i32> %v, <8 x i32> zeroinitializer)
+  ret <8 x i32> %1
+}
+
+define <4 x i32> @avx2_psrlv_d_128_var(<4 x i32> %v) {
+; CHECK-LABEL: @avx2_psrlv_d_128_var(
+; CHECK-NEXT:    [[TMP1:%.*]] = lshr <4 x i32> %v, <i32 0, i32 8, i32 16, i32 31>
+; CHECK-NEXT:    ret <4 x i32> [[TMP1]]
+;
+  %1 = tail call <4 x i32> @llvm.x86.avx2.psrlv.d(<4 x i32> %v, <4 x i32> <i32 0, i32 8, i32 16, i32 31>)
+  ret <4 x i32> %1
+}
+
+define <8 x i32> @avx2_psrlv_d_256_var(<8 x i32> %v) {
+; CHECK-LABEL: @avx2_psrlv_d_256_var(
+; CHECK-NEXT:    [[TMP1:%.*]] = lshr <8 x i32> %v, <i32 0, i32 8, i32 16, i32 24, i32 31, i32 24, i32 8, i32 0>
+; CHECK-NEXT:    ret <8 x i32> [[TMP1]]
+;
+  %1 = tail call <8 x i32> @llvm.x86.avx2.psrlv.d.256(<8 x i32> %v, <8 x i32> <i32 0, i32 8, i32 16, i32 24, i32 31, i32 24, i32 8, i32 0>)
+  ret <8 x i32> %1
+}
+
+define <4 x i32> @avx2_psrlv_d_128_big(<4 x i32> %v) {
+; CHECK-LABEL: @avx2_psrlv_d_128_big(
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i32> @llvm.x86.avx2.psrlv.d(<4 x i32> %v, <4 x i32> <i32 0, i32 8, i32 16, i32 64>)
+; CHECK-NEXT:    ret <4 x i32> [[TMP1]]
+;
+  %1 = tail call <4 x i32> @llvm.x86.avx2.psrlv.d(<4 x i32> %v, <4 x i32> <i32 0, i32 8, i32 16, i32 64>)
+  ret <4 x i32> %1
+}
+
+define <8 x i32> @avx2_psrlv_d_256_big(<8 x i32> %v) {
+; CHECK-LABEL: @avx2_psrlv_d_256_big(
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x i32> @llvm.x86.avx2.psrlv.d.256(<8 x i32> %v, <8 x i32> <i32 0, i32 8, i32 16, i32 64, i32 31, i32 24, i32 8, i32 0>)
+; CHECK-NEXT:    ret <8 x i32> [[TMP1]]
+;
+  %1 = tail call <8 x i32> @llvm.x86.avx2.psrlv.d.256(<8 x i32> %v, <8 x i32> <i32 0, i32 8, i32 16, i32 64, i32 31, i32 24, i32 8, i32 0>)
+  ret <8 x i32> %1
+}
+
+define <4 x i32> @avx2_psrlv_d_128_allbig(<4 x i32> %v) {
+; CHECK-LABEL: @avx2_psrlv_d_128_allbig(
+; CHECK-NEXT:    ret <4 x i32> <i32 0, i32 0, i32 0, i32 undef>
+;
+  %1 = tail call <4 x i32> @llvm.x86.avx2.psrlv.d(<4 x i32> %v, <4 x i32> <i32 32, i32 100, i32 -255, i32 undef>)
+  ret <4 x i32> %1
+}
+
+define <8 x i32> @avx2_psrlv_d_256_allbig(<8 x i32> %v) {
+; CHECK-LABEL: @avx2_psrlv_d_256_allbig(
+; CHECK-NEXT:    ret <8 x i32> <i32 undef, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+;
+  %1 = tail call <8 x i32> @llvm.x86.avx2.psrlv.d.256(<8 x i32> %v, <8 x i32> <i32 undef, i32 100, i32 255, i32 55555, i32 -32, i32 -100, i32 -255, i32 -55555>)
+  ret <8 x i32> %1
+}
+
+define <4 x i32> @avx2_psrlv_d_128_undef(<4 x i32> %v) {
+; CHECK-LABEL: @avx2_psrlv_d_128_undef(
+; CHECK-NEXT:    [[TMP1:%.*]] = lshr <4 x i32> %v, <i32 undef, i32 8, i32 16, i32 31>
+; CHECK-NEXT:    ret <4 x i32> [[TMP1]]
+;
+  %1 = insertelement <4 x i32> <i32 0, i32 8, i32 16, i32 31>, i32 undef, i32 0
+  %2 = tail call <4 x i32> @llvm.x86.avx2.psrlv.d(<4 x i32> %v, <4 x i32> %1)
+  ret <4 x i32> %2
+}
+
+define <8 x i32> @avx2_psrlv_d_256_undef(<8 x i32> %v) {
+; CHECK-LABEL: @avx2_psrlv_d_256_undef(
+; CHECK-NEXT:    [[TMP1:%.*]] = lshr <8 x i32> %v, <i32 0, i32 undef, i32 16, i32 31, i32 31, i32 24, i32 8, i32 0>
+; CHECK-NEXT:    ret <8 x i32> [[TMP1]]
+;
+  %1 = insertelement <8 x i32> <i32 0, i32 8, i32 16, i32 31, i32 31, i32 24, i32 8, i32 0>, i32 undef, i32 1
+  %2 = tail call <8 x i32> @llvm.x86.avx2.psrlv.d.256(<8 x i32> %v, <8 x i32> %1)
+  ret <8 x i32> %2
+}
+
+define <2 x i64> @avx2_psrlv_q_128_0(<2 x i64> %v) {
+; CHECK-LABEL: @avx2_psrlv_q_128_0(
+; CHECK-NEXT:    ret <2 x i64> %v
+;
+  %1 = tail call <2 x i64> @llvm.x86.avx2.psrlv.q(<2 x i64> %v, <2 x i64> zeroinitializer)
+  ret <2 x i64> %1
+}
+
+define <4 x i64> @avx2_psrlv_q_256_0(<4 x i64> %v) {
+; CHECK-LABEL: @avx2_psrlv_q_256_0(
+; CHECK-NEXT:    ret <4 x i64> %v
+;
+  %1 = tail call <4 x i64> @llvm.x86.avx2.psrlv.q.256(<4 x i64> %v, <4 x i64> zeroinitializer)
+  ret <4 x i64> %1
+}
+
+define <2 x i64> @avx2_psrlv_q_128_var(<2 x i64> %v) {
+; CHECK-LABEL: @avx2_psrlv_q_128_var(
+; CHECK-NEXT:    [[TMP1:%.*]] = lshr <2 x i64> %v, <i64 0, i64 8>
+; CHECK-NEXT:    ret <2 x i64> [[TMP1]]
+;
+  %1 = tail call <2 x i64> @llvm.x86.avx2.psrlv.q(<2 x i64> %v, <2 x i64> <i64 0, i64 8>)
+  ret <2 x i64> %1
+}
+
+define <4 x i64> @avx2_psrlv_q_256_var(<4 x i64> %v) {
+; CHECK-LABEL: @avx2_psrlv_q_256_var(
+; CHECK-NEXT:    [[TMP1:%.*]] = lshr <4 x i64> %v, <i64 0, i64 8, i64 16, i64 31>
+; CHECK-NEXT:    ret <4 x i64> [[TMP1]]
+;
+  %1 = tail call <4 x i64> @llvm.x86.avx2.psrlv.q.256(<4 x i64> %v, <4 x i64> <i64 0, i64 8, i64 16, i64 31>)
+  ret <4 x i64> %1
+}
+
+define <2 x i64> @avx2_psrlv_q_128_big(<2 x i64> %v) {
+; CHECK-LABEL: @avx2_psrlv_q_128_big(
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x i64> @llvm.x86.avx2.psrlv.q(<2 x i64> %v, <2 x i64> <i64 0, i64 128>)
+; CHECK-NEXT:    ret <2 x i64> [[TMP1]]
+;
+  %1 = tail call <2 x i64> @llvm.x86.avx2.psrlv.q(<2 x i64> %v, <2 x i64> <i64 0, i64 128>)
+  ret <2 x i64> %1
+}
+
+define <4 x i64> @avx2_psrlv_q_256_big(<4 x i64> %v) {
+; CHECK-LABEL: @avx2_psrlv_q_256_big(
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i64> @llvm.x86.avx2.psrlv.q.256(<4 x i64> %v, <4 x i64> <i64 0, i64 8, i64 16, i64 64>)
+; CHECK-NEXT:    ret <4 x i64> [[TMP1]]
+;
+  %1 = tail call <4 x i64> @llvm.x86.avx2.psrlv.q.256(<4 x i64> %v, <4 x i64> <i64 0, i64 8, i64 16, i64 64>)
+  ret <4 x i64> %1
+}
+
+define <2 x i64> @avx2_psrlv_q_128_allbig(<2 x i64> %v) {
+; CHECK-LABEL: @avx2_psrlv_q_128_allbig(
+; CHECK-NEXT:    ret <2 x i64> zeroinitializer
+;
+  %1 = tail call <2 x i64> @llvm.x86.avx2.psrlv.q(<2 x i64> %v, <2 x i64> <i64 128, i64 -64>)
+  ret <2 x i64> %1
+}
+
+define <4 x i64> @avx2_psrlv_q_256_allbig(<4 x i64> %v) {
+; CHECK-LABEL: @avx2_psrlv_q_256_allbig(
+; CHECK-NEXT:    ret <4 x i64> <i64 0, i64 undef, i64 0, i64 0>
+;
+  %1 = tail call <4 x i64> @llvm.x86.avx2.psrlv.q.256(<4 x i64> %v, <4 x i64> <i64 64, i64 undef, i64 -128, i64 -60>)
+  ret <4 x i64> %1
+}
+
+; The shift amount is 0 (the undef lane could be 0), so we return the unshifted input.
+
+define <2 x i64> @avx2_psrlv_q_128_undef(<2 x i64> %v) {
+; CHECK-LABEL: @avx2_psrlv_q_128_undef(
+; CHECK-NEXT:    ret <2 x i64> [[V:%.*]]
+;
+  %1 = insertelement <2 x i64> <i64 0, i64 8>, i64 undef, i64 1
+  %2 = tail call <2 x i64> @llvm.x86.avx2.psrlv.q(<2 x i64> %v, <2 x i64> %1)
+  ret <2 x i64> %2
+}
+
+define <4 x i64> @avx2_psrlv_q_256_undef(<4 x i64> %v) {
+; CHECK-LABEL: @avx2_psrlv_q_256_undef(
+; CHECK-NEXT:    [[TMP1:%.*]] = lshr <4 x i64> %v, <i64 undef, i64 8, i64 16, i64 31>
+; CHECK-NEXT:    ret <4 x i64> [[TMP1]]
+;
+  %1 = insertelement <4 x i64> <i64 0, i64 8, i64 16, i64 31>, i64 undef, i64 0
+  %2 = tail call <4 x i64> @llvm.x86.avx2.psrlv.q.256(<4 x i64> %v, <4 x i64> %1)
+  ret <4 x i64> %2
+}
+
+define <16 x i32> @avx2_psrlv_d_512_0(<16 x i32> %v) {
+; CHECK-LABEL: @avx2_psrlv_d_512_0(
+; CHECK-NEXT:    ret <16 x i32> %v
+;
+  %1 = tail call <16 x i32> @llvm.x86.avx512.psrlv.d.512(<16 x i32> %v, <16 x i32> zeroinitializer)
+  ret <16 x i32> %1
+}
+
+define <16 x i32> @avx512_psrlv_d_512_var(<16 x i32> %v) {
+; CHECK-LABEL: @avx512_psrlv_d_512_var(
+; CHECK-NEXT:    [[TMP1:%.*]] = lshr <16 x i32> %v, <i32 0, i32 8, i32 16, i32 24, i32 31, i32 24, i32 8, i32 0, i32 0, i32 8, i32 16, i32 24, i32 31, i32 24, i32 8, i32 0>
+; CHECK-NEXT:    ret <16 x i32> [[TMP1]]
+;
+  %1 = tail call <16 x i32> @llvm.x86.avx512.psrlv.d.512(<16 x i32> %v, <16 x i32> <i32 0, i32 8, i32 16, i32 24, i32 31, i32 24, i32 8, i32 0, i32 0, i32 8, i32 16, i32 24, i32 31, i32 24, i32 8, i32 0>)
+  ret <16 x i32> %1
+}
+
+define <16 x i32> @avx512_psrlv_d_512_big(<16 x i32> %v) {
+; CHECK-LABEL: @avx512_psrlv_d_512_big(
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call <16 x i32> @llvm.x86.avx512.psrlv.d.512(<16 x i32> %v, <16 x i32> <i32 0, i32 8, i32 16, i32 64, i32 31, i32 24, i32 8, i32 0, i32 0, i32 8, i32 16, i32 64, i32 31, i32 24, i32 8, i32 0>)
+; CHECK-NEXT:    ret <16 x i32> [[TMP1]]
+;
+  %1 = tail call <16 x i32> @llvm.x86.avx512.psrlv.d.512(<16 x i32> %v, <16 x i32> <i32 0, i32 8, i32 16, i32 64, i32 31, i32 24, i32 8, i32 0, i32 0, i32 8, i32 16, i32 64, i32 31, i32 24, i32 8, i32 0>)
+  ret <16 x i32> %1
+}
+
+define <16 x i32> @avx512_psrlv_d_512_allbig(<16 x i32> %v) {
+; CHECK-LABEL: @avx512_psrlv_d_512_allbig(
+; CHECK-NEXT:    ret <16 x i32> <i32 undef, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 undef, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+;
+  %1 = tail call <16 x i32> @llvm.x86.avx512.psrlv.d.512(<16 x i32> %v, <16 x i32> <i32 undef, i32 100, i32 255, i32 55555, i32 -32, i32 -100, i32 -255, i32 -55555, i32 undef, i32 100, i32 255, i32 55555, i32 -32, i32 -100, i32 -255, i32 -55555>)
+  ret <16 x i32> %1
+}
+
+define <16 x i32> @avx512_psrlv_d_512_undef(<16 x i32> %v) {
+; CHECK-LABEL: @avx512_psrlv_d_512_undef(
+; CHECK-NEXT:    [[TMP1:%.*]] = lshr <16 x i32> %v, <i32 0, i32 undef, i32 16, i32 31, i32 31, i32 24, i32 8, i32 0, i32 0, i32 8, i32 16, i32 31, i32 31, i32 24, i32 8, i32 0>
+; CHECK-NEXT:    ret <16 x i32> [[TMP1]]
+;
+  %1 = insertelement <16 x i32> <i32 0, i32 8, i32 16, i32 31, i32 31, i32 24, i32 8, i32 0, i32 0, i32 8, i32 16, i32 31, i32 31, i32 24, i32 8, i32 0>, i32 undef, i32 1
+  %2 = tail call <16 x i32> @llvm.x86.avx512.psrlv.d.512(<16 x i32> %v, <16 x i32> %1)
+  ret <16 x i32> %2
+}
+
+define <8 x i64> @avx512_psrlv_q_512_0(<8 x i64> %v) {
+; CHECK-LABEL: @avx512_psrlv_q_512_0(
+; CHECK-NEXT:    ret <8 x i64> %v
+;
+  %1 = tail call <8 x i64> @llvm.x86.avx512.psrlv.q.512(<8 x i64> %v, <8 x i64> zeroinitializer)
+  ret <8 x i64> %1
+}
+
+define <8 x i64> @avx512_psrlv_q_512_var(<8 x i64> %v) {
+; CHECK-LABEL: @avx512_psrlv_q_512_var(
+; CHECK-NEXT:    [[TMP1:%.*]] = lshr <8 x i64> %v, <i64 0, i64 8, i64 16, i64 31, i64 0, i64 8, i64 16, i64 31>
+; CHECK-NEXT:    ret <8 x i64> [[TMP1]]
+;
+  %1 = tail call <8 x i64> @llvm.x86.avx512.psrlv.q.512(<8 x i64> %v, <8 x i64> <i64 0, i64 8, i64 16, i64 31, i64 0, i64 8, i64 16, i64 31>)
+  ret <8 x i64> %1
+}
+
+define <8 x i64> @avx512_psrlv_q_512_big(<8 x i64> %v) {
+; CHECK-LABEL: @avx512_psrlv_q_512_big(
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x i64> @llvm.x86.avx512.psrlv.q.512(<8 x i64> %v, <8 x i64> <i64 0, i64 8, i64 16, i64 64, i64 0, i64 8, i64 16, i64 64>)
+; CHECK-NEXT:    ret <8 x i64> [[TMP1]]
+;
+  %1 = tail call <8 x i64> @llvm.x86.avx512.psrlv.q.512(<8 x i64> %v, <8 x i64> <i64 0, i64 8, i64 16, i64 64, i64 0, i64 8, i64 16, i64 64>)
+  ret <8 x i64> %1
+}
+
+define <8 x i64> @avx512_psrlv_q_512_allbig(<8 x i64> %v) {
+; CHECK-LABEL: @avx512_psrlv_q_512_allbig(
+; CHECK-NEXT:    ret <8 x i64> <i64 0, i64 undef, i64 0, i64 0, i64 0, i64 undef, i64 0, i64 0>
+;
+  %1 = tail call <8 x i64> @llvm.x86.avx512.psrlv.q.512(<8 x i64> %v, <8 x i64> <i64 64, i64 undef, i64 -128, i64 -60, i64 64, i64 undef, i64 -128, i64 -60>)
+  ret <8 x i64> %1
+}
+
+define <8 x i64> @avx512_psrlv_q_512_undef(<8 x i64> %v) {
+; CHECK-LABEL: @avx512_psrlv_q_512_undef(
+; CHECK-NEXT:    [[TMP1:%.*]] = lshr <8 x i64> %v, <i64 undef, i64 8, i64 16, i64 31, i64 0, i64 8, i64 16, i64 31>
+; CHECK-NEXT:    ret <8 x i64> [[TMP1]]
+;
+  %1 = insertelement <8 x i64> <i64 0, i64 8, i64 16, i64 31, i64 0, i64 8, i64 16, i64 31>, i64 undef, i64 0
+  %2 = tail call <8 x i64> @llvm.x86.avx512.psrlv.q.512(<8 x i64> %v, <8 x i64> %1)
+  ret <8 x i64> %2
+}
+
+define <8 x i16> @avx512_psrlv_w_128_0(<8 x i16> %v) {
+; CHECK-LABEL: @avx512_psrlv_w_128_0(
+; CHECK-NEXT:    ret <8 x i16> %v
+;
+  %1 = tail call <8 x i16> @llvm.x86.avx512.psrlv.w.128(<8 x i16> %v, <8 x i16> zeroinitializer)
+  ret <8 x i16> %1
+}
+
+define <8 x i16> @avx512_psrlv_w_128_var(<8 x i16> %v) {
+; CHECK-LABEL: @avx512_psrlv_w_128_var(
+; CHECK-NEXT:    [[TMP1:%.*]] = lshr <8 x i16> %v, <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>
+; CHECK-NEXT:    ret <8 x i16> [[TMP1]]
+;
+  %1 = tail call <8 x i16> @llvm.x86.avx512.psrlv.w.128(<8 x i16> %v, <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>)
+  ret <8 x i16> %1
+}
+
+define <8 x i16> @avx512_psrlv_w_128_big(<8 x i16> %v) {
+; CHECK-LABEL: @avx512_psrlv_w_128_big(
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x i16> @llvm.x86.avx512.psrlv.w.128(<8 x i16> %v, <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 16>)
+; CHECK-NEXT:    ret <8 x i16> [[TMP1]]
+;
+  %1 = tail call <8 x i16> @llvm.x86.avx512.psrlv.w.128(<8 x i16> %v, <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 16>)
+  ret <8 x i16> %1
+}
+
+define <8 x i16> @avx512_psrlv_w_128_allbig(<8 x i16> %v) {
+; CHECK-LABEL: @avx512_psrlv_w_128_allbig(
+; CHECK-NEXT:    ret <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 undef>
+;
+  %1 = tail call <8 x i16> @llvm.x86.avx512.psrlv.w.128(<8 x i16> %v, <8 x i16> <i16 20, i16 -1, i16 -2, i16 33, i16 44, i16 55, i16 66, i16 undef>)
+  ret <8 x i16> %1
+}
+
+define <8 x i16> @avx512_psrlv_w_128_undef(<8 x i16> %v) {
+; CHECK-LABEL: @avx512_psrlv_w_128_undef(
+; CHECK-NEXT:    [[TMP1:%.*]] = lshr <8 x i16> %v, <i16 undef, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>
+; CHECK-NEXT:    ret <8 x i16> [[TMP1]]
+;
+  %1 = insertelement <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, i16 undef, i64 0
+  %2 = tail call <8 x i16> @llvm.x86.avx512.psrlv.w.128(<8 x i16> %v, <8 x i16> %1)
+  ret <8 x i16> %2
+}
+
+define <16 x i16> @avx512_psrlv_w_256_0(<16 x i16> %v) {
+; CHECK-LABEL: @avx512_psrlv_w_256_0(
+; CHECK-NEXT:    ret <16 x i16> %v
+;
+  %1 = tail call <16 x i16> @llvm.x86.avx512.psrlv.w.256(<16 x i16> %v, <16 x i16> zeroinitializer)
+  ret <16 x i16> %1
+}
+
+define <16 x i16> @avx512_psrlv_w_256_var(<16 x i16> %v) {
+; CHECK-LABEL: @avx512_psrlv_w_256_var(
+; CHECK-NEXT:    [[TMP1:%.*]] = lshr <16 x i16> %v, <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15>
+; CHECK-NEXT:    ret <16 x i16> [[TMP1]]
+;
+  %1 = tail call <16 x i16> @llvm.x86.avx512.psrlv.w.256(<16 x i16> %v, <16 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15>)
+  ret <16 x i16> %1
+}
+
+define <16 x i16> @avx512_psrlv_w_256_big(<16 x i16> %v) {
+; CHECK-LABEL: @avx512_psrlv_w_256_big(
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call <16 x i16> @llvm.x86.avx512.psrlv.w.256(<16 x i16> %v, <16 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 16>)
+; CHECK-NEXT:    ret <16 x i16> [[TMP1]]
+;
+  %1 = tail call <16 x i16> @llvm.x86.avx512.psrlv.w.256(<16 x i16> %v, <16 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 16>)
+  ret <16 x i16> %1
+}
+
+define <16 x i16> @avx512_psrlv_w_256_allbig(<16 x i16> %v) {
+; CHECK-LABEL: @avx512_psrlv_w_256_allbig(
+; CHECK-NEXT:    ret <16 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 undef, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>
+;
+  %1 = tail call <16 x i16> @llvm.x86.avx512.psrlv.w.256(<16 x i16> %v, <16 x i16> <i16 20, i16 -1, i16 -2, i16 33, i16 44, i16 55, i16 66, i16 -7, i16 undef, i16 64, i16 -10, i16 256, i16 16, i16 28, i16 65535, i16 32767>)
+  ret <16 x i16> %1
+}
+
+define <16 x i16> @avx512_psrlv_w_256_undef(<16 x i16> %v) {
+; CHECK-LABEL: @avx512_psrlv_w_256_undef(
+; CHECK-NEXT:    [[TMP1:%.*]] = lshr <16 x i16> %v, <i16 undef, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15>
+; CHECK-NEXT:    ret <16 x i16> [[TMP1]]
+;
+  %1 = insertelement <16 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15>, i16 undef, i64 0
+  %2 = tail call <16 x i16> @llvm.x86.avx512.psrlv.w.256(<16 x i16> %v, <16 x i16> %1)
+  ret <16 x i16> %2
+}
+
+define <32 x i16> @avx512_psrlv_w_512_0(<32 x i16> %v) {
+; CHECK-LABEL: @avx512_psrlv_w_512_0(
+; CHECK-NEXT:    ret <32 x i16> %v
+;
+  %1 = tail call <32 x i16> @llvm.x86.avx512.psrlv.w.512(<32 x i16> %v, <32 x i16> zeroinitializer)
+  ret <32 x i16> %1
+}
+
+define <32 x i16> @avx512_psrlv_w_512_var(<32 x i16> %v) {
+; CHECK-LABEL: @avx512_psrlv_w_512_var(
+; CHECK-NEXT:    [[TMP1:%.*]] = lshr <32 x i16> %v, <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15, i16 15, i16 14, i16 13, i16 12, i16 11, i16 10, i16 9, i16 8, i16 7, i16 6, i16 5, i16 4, i16 3, i16 2, i16 1, i16 0>
+; CHECK-NEXT:    ret <32 x i16> [[TMP1]]
+;
+  %1 = tail call <32 x i16> @llvm.x86.avx512.psrlv.w.512(<32 x i16> %v, <32 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15, i16 15, i16 14, i16 13, i16 12, i16 11, i16 10, i16 9, i16 8, i16 7, i16 6, i16 5, i16 4, i16 3, i16 2, i16 1, i16 0>)
+  ret <32 x i16> %1
+}
+
+define <32 x i16> @avx512_psrlv_w_512_big(<32 x i16> %v) {
+; CHECK-LABEL: @avx512_psrlv_w_512_big(
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call <32 x i16> @llvm.x86.avx512.psrlv.w.512(<32 x i16> %v, <32 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15, i16 16, i16 14, i16 13, i16 12, i16 11, i16 10, i16 9, i16 8, i16 7, i16 6, i16 5, i16 4, i16 3, i16 2, i16 1, i16 0>)
+; CHECK-NEXT:    ret <32 x i16> [[TMP1]]
+;
+  %1 = tail call <32 x i16> @llvm.x86.avx512.psrlv.w.512(<32 x i16> %v, <32 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15, i16 16, i16 14, i16 13, i16 12, i16 11, i16 10, i16 9, i16 8, i16 7, i16 6, i16 5, i16 4, i16 3, i16 2, i16 1, i16 0>)
+  ret <32 x i16> %1
+}
+
+define <32 x i16> @avx512_psrlv_w_512_allbig(<32 x i16> %v) {
+; CHECK-LABEL: @avx512_psrlv_w_512_allbig(
+; CHECK-NEXT:    ret <32 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 undef, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 undef, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 undef, i16 0, i16 0, i16 undef, i16 0, i16 0>
+;
+  %1 = tail call <32 x i16> @llvm.x86.avx512.psrlv.w.512(<32 x i16> %v, <32 x i16> <i16 20, i16 -1, i16 -2, i16 33, i16 44, i16 55, i16 66, i16 -7, i16 undef, i16 64, i16 -10, i16 128, i16 16, i16 28, i16 65535, i16 32767, i16 56, i16 -14, i16 undef, i16 16, i16 67, i16 567, i16 -32768, i16 4096, i16 8192, i16 -12345, i16 undef, i16 345, i16 123, i16 undef, i16 1024, i16 54321>)
+  ret <32 x i16> %1
+}
+
+define <32 x i16> @avx512_psrlv_w_512_undef(<32 x i16> %v) {
+; CHECK-LABEL: @avx512_psrlv_w_512_undef(
+; CHECK-NEXT:    [[TMP1:%.*]] = lshr <32 x i16> %v, <i16 undef, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15, i16 15, i16 14, i16 13, i16 12, i16 11, i16 10, i16 9, i16 8, i16 7, i16 6, i16 5, i16 4, i16 3, i16 2, i16 1, i16 0>
+; CHECK-NEXT:    ret <32 x i16> [[TMP1]]
+;
+  %1 = insertelement <32 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15, i16 15, i16 14, i16 13, i16 12, i16 11, i16 10, i16 9, i16 8, i16 7, i16 6, i16 5, i16 4, i16 3, i16 2, i16 1, i16 0>, i16 undef, i64 0
+  %2 = tail call <32 x i16> @llvm.x86.avx512.psrlv.w.512(<32 x i16> %v, <32 x i16> %1)
+  ret <32 x i16> %2
+}
+
+;
+; SHL - Constant Per-Element Vector
+;
+
+define <4 x i32> @avx2_psllv_d_128_0(<4 x i32> %v) {
+; CHECK-LABEL: @avx2_psllv_d_128_0(
+; CHECK-NEXT:    ret <4 x i32> %v
+;
+  %1 = tail call <4 x i32> @llvm.x86.avx2.psllv.d(<4 x i32> %v, <4 x i32> zeroinitializer)
+  ret <4 x i32> %1
+}
+
+define <8 x i32> @avx2_psllv_d_256_0(<8 x i32> %v) {
+; CHECK-LABEL: @avx2_psllv_d_256_0(
+; CHECK-NEXT:    ret <8 x i32> %v
+;
+  %1 = tail call <8 x i32> @llvm.x86.avx2.psllv.d.256(<8 x i32> %v, <8 x i32> zeroinitializer)
+  ret <8 x i32> %1
+}
+
+define <4 x i32> @avx2_psllv_d_128_var(<4 x i32> %v) {
+; CHECK-LABEL: @avx2_psllv_d_128_var(
+; CHECK-NEXT:    [[TMP1:%.*]] = shl <4 x i32> %v, <i32 0, i32 8, i32 16, i32 31>
+; CHECK-NEXT:    ret <4 x i32> [[TMP1]]
+;
+  %1 = tail call <4 x i32> @llvm.x86.avx2.psllv.d(<4 x i32> %v, <4 x i32> <i32 0, i32 8, i32 16, i32 31>)
+  ret <4 x i32> %1
+}
+
+define <8 x i32> @avx2_psllv_d_256_var(<8 x i32> %v) {
+; CHECK-LABEL: @avx2_psllv_d_256_var(
+; CHECK-NEXT:    [[TMP1:%.*]] = shl <8 x i32> %v, <i32 0, i32 8, i32 16, i32 24, i32 31, i32 24, i32 8, i32 0>
+; CHECK-NEXT:    ret <8 x i32> [[TMP1]]
+;
+  %1 = tail call <8 x i32> @llvm.x86.avx2.psllv.d.256(<8 x i32> %v, <8 x i32> <i32 0, i32 8, i32 16, i32 24, i32 31, i32 24, i32 8, i32 0>)
+  ret <8 x i32> %1
+}
+
+define <4 x i32> @avx2_psllv_d_128_big(<4 x i32> %v) {
+; CHECK-LABEL: @avx2_psllv_d_128_big(
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i32> @llvm.x86.avx2.psllv.d(<4 x i32> %v, <4 x i32> <i32 0, i32 8, i32 16, i32 64>)
+; CHECK-NEXT:    ret <4 x i32> [[TMP1]]
+;
+  %1 = tail call <4 x i32> @llvm.x86.avx2.psllv.d(<4 x i32> %v, <4 x i32> <i32 0, i32 8, i32 16, i32 64>)
+  ret <4 x i32> %1
+}
+
+define <8 x i32> @avx2_psllv_d_256_big(<8 x i32> %v) {
+; CHECK-LABEL: @avx2_psllv_d_256_big(
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x i32> @llvm.x86.avx2.psllv.d.256(<8 x i32> %v, <8 x i32> <i32 0, i32 8, i32 16, i32 64, i32 31, i32 24, i32 8, i32 0>)
+; CHECK-NEXT:    ret <8 x i32> [[TMP1]]
+;
+  %1 = tail call <8 x i32> @llvm.x86.avx2.psllv.d.256(<8 x i32> %v, <8 x i32> <i32 0, i32 8, i32 16, i32 64, i32 31, i32 24, i32 8, i32 0>)
+  ret <8 x i32> %1
+}
+
+define <4 x i32> @avx2_psllv_d_128_allbig(<4 x i32> %v) {
+; CHECK-LABEL: @avx2_psllv_d_128_allbig(
+; CHECK-NEXT:    ret <4 x i32> <i32 0, i32 0, i32 0, i32 undef>
+;
+  %1 = tail call <4 x i32> @llvm.x86.avx2.psllv.d(<4 x i32> %v, <4 x i32> <i32 32, i32 100, i32 -255, i32 undef>)
+  ret <4 x i32> %1
+}
+
+define <8 x i32> @avx2_psllv_d_256_allbig(<8 x i32> %v) {
+; CHECK-LABEL: @avx2_psllv_d_256_allbig(
+; CHECK-NEXT:    ret <8 x i32> <i32 undef, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+;
+  %1 = tail call <8 x i32> @llvm.x86.avx2.psllv.d.256(<8 x i32> %v, <8 x i32> <i32 undef, i32 100, i32 255, i32 55555, i32 -32, i32 -100, i32 -255, i32 -55555>)
+  ret <8 x i32> %1
+}
+
+define <4 x i32> @avx2_psllv_d_128_undef(<4 x i32> %v) {
+; CHECK-LABEL: @avx2_psllv_d_128_undef(
+; CHECK-NEXT:    [[TMP1:%.*]] = shl <4 x i32> %v, <i32 undef, i32 8, i32 16, i32 31>
+; CHECK-NEXT:    ret <4 x i32> [[TMP1]]
+;
+  %1 = insertelement <4 x i32> <i32 0, i32 8, i32 16, i32 31>, i32 undef, i32 0
+  %2 = tail call <4 x i32> @llvm.x86.avx2.psllv.d(<4 x i32> %v, <4 x i32> %1)
+  ret <4 x i32> %2
+}
+
+define <8 x i32> @avx2_psllv_d_256_undef(<8 x i32> %v) {
+; CHECK-LABEL: @avx2_psllv_d_256_undef(
+; CHECK-NEXT:    [[TMP1:%.*]] = shl <8 x i32> %v, <i32 0, i32 undef, i32 16, i32 31, i32 31, i32 24, i32 8, i32 0>
+; CHECK-NEXT:    ret <8 x i32> [[TMP1]]
+;
+  %1 = insertelement <8 x i32> <i32 0, i32 8, i32 16, i32 31, i32 31, i32 24, i32 8, i32 0>, i32 undef, i32 1
+  %2 = tail call <8 x i32> @llvm.x86.avx2.psllv.d.256(<8 x i32> %v, <8 x i32> %1)
+  ret <8 x i32> %2
+}
+
+define <2 x i64> @avx2_psllv_q_128_0(<2 x i64> %v) {
+; CHECK-LABEL: @avx2_psllv_q_128_0(
+; CHECK-NEXT:    ret <2 x i64> %v
+;
+  %1 = tail call <2 x i64> @llvm.x86.avx2.psllv.q(<2 x i64> %v, <2 x i64> zeroinitializer)
+  ret <2 x i64> %1
+}
+
+define <4 x i64> @avx2_psllv_q_256_0(<4 x i64> %v) {
+; CHECK-LABEL: @avx2_psllv_q_256_0(
+; CHECK-NEXT:    ret <4 x i64> %v
+;
+  %1 = tail call <4 x i64> @llvm.x86.avx2.psllv.q.256(<4 x i64> %v, <4 x i64> zeroinitializer)
+  ret <4 x i64> %1
+}
+
+define <2 x i64> @avx2_psllv_q_128_var(<2 x i64> %v) {
+; CHECK-LABEL: @avx2_psllv_q_128_var(
+; CHECK-NEXT:    [[TMP1:%.*]] = shl <2 x i64> %v, <i64 0, i64 8>
+; CHECK-NEXT:    ret <2 x i64> [[TMP1]]
+;
+  %1 = tail call <2 x i64> @llvm.x86.avx2.psllv.q(<2 x i64> %v, <2 x i64> <i64 0, i64 8>)
+  ret <2 x i64> %1
+}
+
+define <4 x i64> @avx2_psllv_q_256_var(<4 x i64> %v) {
+; CHECK-LABEL: @avx2_psllv_q_256_var(
+; CHECK-NEXT:    [[TMP1:%.*]] = shl <4 x i64> %v, <i64 0, i64 8, i64 16, i64 31>
+; CHECK-NEXT:    ret <4 x i64> [[TMP1]]
+;
+  %1 = tail call <4 x i64> @llvm.x86.avx2.psllv.q.256(<4 x i64> %v, <4 x i64> <i64 0, i64 8, i64 16, i64 31>)
+  ret <4 x i64> %1
+}
+
+define <2 x i64> @avx2_psllv_q_128_big(<2 x i64> %v) {
+; CHECK-LABEL: @avx2_psllv_q_128_big(
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x i64> @llvm.x86.avx2.psllv.q(<2 x i64> %v, <2 x i64> <i64 0, i64 128>)
+; CHECK-NEXT:    ret <2 x i64> [[TMP1]]
+;
+  %1 = tail call <2 x i64> @llvm.x86.avx2.psllv.q(<2 x i64> %v, <2 x i64> <i64 0, i64 128>)
+  ret <2 x i64> %1
+}
+
+define <4 x i64> @avx2_psllv_q_256_big(<4 x i64> %v) {
+; CHECK-LABEL: @avx2_psllv_q_256_big(
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i64> @llvm.x86.avx2.psllv.q.256(<4 x i64> %v, <4 x i64> <i64 0, i64 8, i64 16, i64 64>)
+; CHECK-NEXT:    ret <4 x i64> [[TMP1]]
+;
+  %1 = tail call <4 x i64> @llvm.x86.avx2.psllv.q.256(<4 x i64> %v, <4 x i64> <i64 0, i64 8, i64 16, i64 64>)
+  ret <4 x i64> %1
+}
+
+define <2 x i64> @avx2_psllv_q_128_allbig(<2 x i64> %v) {
+; CHECK-LABEL: @avx2_psllv_q_128_allbig(
+; CHECK-NEXT:    ret <2 x i64> zeroinitializer
+;
+  %1 = tail call <2 x i64> @llvm.x86.avx2.psllv.q(<2 x i64> %v, <2 x i64> <i64 128, i64 -64>)
+  ret <2 x i64> %1
+}
+
+define <4 x i64> @avx2_psllv_q_256_allbig(<4 x i64> %v) {
+; CHECK-LABEL: @avx2_psllv_q_256_allbig(
+; CHECK-NEXT:    ret <4 x i64> <i64 0, i64 undef, i64 0, i64 0>
+;
+  %1 = tail call <4 x i64> @llvm.x86.avx2.psllv.q.256(<4 x i64> %v, <4 x i64> <i64 64, i64 undef, i64 -128, i64 -60>)
+  ret <4 x i64> %1
+}
+
+; The shift amount is 0 (the undef lane could be 0), so we return the unshifted input.
+
+define <2 x i64> @avx2_psllv_q_128_undef(<2 x i64> %v) {
+; CHECK-LABEL: @avx2_psllv_q_128_undef(
+; CHECK-NEXT:    ret <2 x i64> [[V:%.*]]
+;
+  %1 = insertelement <2 x i64> <i64 0, i64 8>, i64 undef, i64 1
+  %2 = tail call <2 x i64> @llvm.x86.avx2.psllv.q(<2 x i64> %v, <2 x i64> %1)
+  ret <2 x i64> %2
+}
+
+define <4 x i64> @avx2_psllv_q_256_undef(<4 x i64> %v) {
+; CHECK-LABEL: @avx2_psllv_q_256_undef(
+; CHECK-NEXT:    [[TMP1:%.*]] = shl <4 x i64> %v, <i64 undef, i64 8, i64 16, i64 31>
+; CHECK-NEXT:    ret <4 x i64> [[TMP1]]
+;
+  %1 = insertelement <4 x i64> <i64 0, i64 8, i64 16, i64 31>, i64 undef, i64 0
+  %2 = tail call <4 x i64> @llvm.x86.avx2.psllv.q.256(<4 x i64> %v, <4 x i64> %1)
+  ret <4 x i64> %2
+}
+
+define <16 x i32> @avx512_psllv_d_512_0(<16 x i32> %v) {
+; CHECK-LABEL: @avx512_psllv_d_512_0(
+; CHECK-NEXT:    ret <16 x i32> %v
+;
+  %1 = tail call <16 x i32> @llvm.x86.avx512.psllv.d.512(<16 x i32> %v, <16 x i32> zeroinitializer)
+  ret <16 x i32> %1
+}
+
+define <16 x i32> @avx512_psllv_d_512_var(<16 x i32> %v) {
+; CHECK-LABEL: @avx512_psllv_d_512_var(
+; CHECK-NEXT:    [[TMP1:%.*]] = shl <16 x i32> %v, <i32 0, i32 8, i32 16, i32 24, i32 31, i32 24, i32 8, i32 0, i32 0, i32 8, i32 16, i32 24, i32 31, i32 24, i32 8, i32 0>
+; CHECK-NEXT:    ret <16 x i32> [[TMP1]]
+;
+  %1 = tail call <16 x i32> @llvm.x86.avx512.psllv.d.512(<16 x i32> %v, <16 x i32> <i32 0, i32 8, i32 16, i32 24, i32 31, i32 24, i32 8, i32 0, i32 0, i32 8, i32 16, i32 24, i32 31, i32 24, i32 8, i32 0>)
+  ret <16 x i32> %1
+}
+
+define <16 x i32> @avx512_psllv_d_512_big(<16 x i32> %v) {
+; CHECK-LABEL: @avx512_psllv_d_512_big(
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call <16 x i32> @llvm.x86.avx512.psllv.d.512(<16 x i32> %v, <16 x i32> <i32 0, i32 8, i32 16, i32 64, i32 31, i32 24, i32 8, i32 0, i32 0, i32 8, i32 16, i32 64, i32 31, i32 24, i32 8, i32 0>)
+; CHECK-NEXT:    ret <16 x i32> [[TMP1]]
+;
+  %1 = tail call <16 x i32> @llvm.x86.avx512.psllv.d.512(<16 x i32> %v, <16 x i32> <i32 0, i32 8, i32 16, i32 64, i32 31, i32 24, i32 8, i32 0, i32 0, i32 8, i32 16, i32 64, i32 31, i32 24, i32 8, i32 0>)
+  ret <16 x i32> %1
+}
+
+define <16 x i32> @avx512_psllv_d_512_allbig(<16 x i32> %v) {
+; CHECK-LABEL: @avx512_psllv_d_512_allbig(
+; CHECK-NEXT:    ret <16 x i32> <i32 undef, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 undef, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+;
+  %1 = tail call <16 x i32> @llvm.x86.avx512.psllv.d.512(<16 x i32> %v, <16 x i32> <i32 undef, i32 100, i32 255, i32 55555, i32 -32, i32 -100, i32 -255, i32 -55555, i32 undef, i32 100, i32 255, i32 55555, i32 -32, i32 -100, i32 -255, i32 -55555>)
+  ret <16 x i32> %1
+}
+
+define <16 x i32> @avx512_psllv_d_512_undef(<16 x i32> %v) {
+; CHECK-LABEL: @avx512_psllv_d_512_undef(
+; CHECK-NEXT:    [[TMP1:%.*]] = shl <16 x i32> %v, <i32 0, i32 undef, i32 16, i32 31, i32 31, i32 24, i32 8, i32 0, i32 0, i32 8, i32 16, i32 31, i32 31, i32 24, i32 8, i32 0>
+; CHECK-NEXT:    ret <16 x i32> [[TMP1]]
+;
+  %1 = insertelement <16 x i32> <i32 0, i32 8, i32 16, i32 31, i32 31, i32 24, i32 8, i32 0, i32 0, i32 8, i32 16, i32 31, i32 31, i32 24, i32 8, i32 0>, i32 undef, i32 1
+  %2 = tail call <16 x i32> @llvm.x86.avx512.psllv.d.512(<16 x i32> %v, <16 x i32> %1)
+  ret <16 x i32> %2
+}
+
+define <8 x i64> @avx512_psllv_q_512_0(<8 x i64> %v) {
+; CHECK-LABEL: @avx512_psllv_q_512_0(
+; CHECK-NEXT:    ret <8 x i64> %v
+;
+  %1 = tail call <8 x i64> @llvm.x86.avx512.psllv.q.512(<8 x i64> %v, <8 x i64> zeroinitializer)
+  ret <8 x i64> %1
+}
+
+define <8 x i64> @avx512_psllv_q_512_var(<8 x i64> %v) {
+; CHECK-LABEL: @avx512_psllv_q_512_var(
+; CHECK-NEXT:    [[TMP1:%.*]] = shl <8 x i64> %v, <i64 0, i64 8, i64 16, i64 31, i64 0, i64 8, i64 16, i64 31>
+; CHECK-NEXT:    ret <8 x i64> [[TMP1]]
+;
+  %1 = tail call <8 x i64> @llvm.x86.avx512.psllv.q.512(<8 x i64> %v, <8 x i64> <i64 0, i64 8, i64 16, i64 31, i64 0, i64 8, i64 16, i64 31>)
+  ret <8 x i64> %1
+}
+
+define <8 x i64> @avx512_psllv_q_512_big(<8 x i64> %v) {
+; CHECK-LABEL: @avx512_psllv_q_512_big(
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x i64> @llvm.x86.avx512.psllv.q.512(<8 x i64> %v, <8 x i64> <i64 0, i64 8, i64 16, i64 64, i64 0, i64 8, i64 16, i64 64>)
+; CHECK-NEXT:    ret <8 x i64> [[TMP1]]
+;
+  %1 = tail call <8 x i64> @llvm.x86.avx512.psllv.q.512(<8 x i64> %v, <8 x i64> <i64 0, i64 8, i64 16, i64 64, i64 0, i64 8, i64 16, i64 64>)
+  ret <8 x i64> %1
+}
+
+define <8 x i64> @avx512_psllv_q_512_allbig(<8 x i64> %v) {
+; CHECK-LABEL: @avx512_psllv_q_512_allbig(
+; CHECK-NEXT:    ret <8 x i64> <i64 0, i64 undef, i64 0, i64 0, i64 0, i64 undef, i64 0, i64 0>
+;
+  %1 = tail call <8 x i64> @llvm.x86.avx512.psllv.q.512(<8 x i64> %v, <8 x i64> <i64 64, i64 undef, i64 -128, i64 -60, i64 64, i64 undef, i64 -128, i64 -60>)
+  ret <8 x i64> %1
+}
+
+define <8 x i64> @avx512_psllv_q_512_undef(<8 x i64> %v) {
+; CHECK-LABEL: @avx512_psllv_q_512_undef(
+; CHECK-NEXT:    [[TMP1:%.*]] = shl <8 x i64> %v, <i64 undef, i64 8, i64 16, i64 31, i64 0, i64 8, i64 16, i64 31>
+; CHECK-NEXT:    ret <8 x i64> [[TMP1]]
+;
+  %1 = insertelement <8 x i64> <i64 0, i64 8, i64 16, i64 31, i64 0, i64 8, i64 16, i64 31>, i64 undef, i64 0
+  %2 = tail call <8 x i64> @llvm.x86.avx512.psllv.q.512(<8 x i64> %v, <8 x i64> %1)
+  ret <8 x i64> %2
+}
+
+define <8 x i16> @avx512_psllv_w_128_0(<8 x i16> %v) {
+; CHECK-LABEL: @avx512_psllv_w_128_0(
+; CHECK-NEXT:    ret <8 x i16> %v
+;
+  %1 = tail call <8 x i16> @llvm.x86.avx512.psllv.w.128(<8 x i16> %v, <8 x i16> zeroinitializer)
+  ret <8 x i16> %1
+}
+
+define <8 x i16> @avx512_psllv_w_128_var(<8 x i16> %v) {
+; CHECK-LABEL: @avx512_psllv_w_128_var(
+; CHECK-NEXT:    [[TMP1:%.*]] = shl <8 x i16> %v, <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>
+; CHECK-NEXT:    ret <8 x i16> [[TMP1]]
+;
+  %1 = tail call <8 x i16> @llvm.x86.avx512.psllv.w.128(<8 x i16> %v, <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>)
+  ret <8 x i16> %1
+}
+
+define <8 x i16> @avx512_psllv_w_128_big(<8 x i16> %v) {
+; CHECK-LABEL: @avx512_psllv_w_128_big(
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x i16> @llvm.x86.avx512.psllv.w.128(<8 x i16> %v, <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 16>)
+; CHECK-NEXT:    ret <8 x i16> [[TMP1]]
+;
+  %1 = tail call <8 x i16> @llvm.x86.avx512.psllv.w.128(<8 x i16> %v, <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 16>)
+  ret <8 x i16> %1
+}
+
+define <8 x i16> @avx512_psllv_w_128_allbig(<8 x i16> %v) {
+; CHECK-LABEL: @avx512_psllv_w_128_allbig(
+; CHECK-NEXT:    ret <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 undef>
+;
+  %1 = tail call <8 x i16> @llvm.x86.avx512.psllv.w.128(<8 x i16> %v, <8 x i16> <i16 20, i16 -1, i16 -2, i16 33, i16 44, i16 55, i16 66, i16 undef>)
+  ret <8 x i16> %1
+}
+
+define <8 x i16> @avx512_psllv_w_128_undef(<8 x i16> %v) {
+; CHECK-LABEL: @avx512_psllv_w_128_undef(
+; CHECK-NEXT:    [[TMP1:%.*]] = shl <8 x i16> %v, <i16 undef, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>
+; CHECK-NEXT:    ret <8 x i16> [[TMP1]]
+;
+  %1 = insertelement <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, i16 undef, i64 0
+  %2 = tail call <8 x i16> @llvm.x86.avx512.psllv.w.128(<8 x i16> %v, <8 x i16> %1)
+  ret <8 x i16> %2
+}
+
+define <16 x i16> @avx512_psllv_w_256_0(<16 x i16> %v) {
+; CHECK-LABEL: @avx512_psllv_w_256_0(
+; CHECK-NEXT:    ret <16 x i16> %v
+;
+  %1 = tail call <16 x i16> @llvm.x86.avx512.psllv.w.256(<16 x i16> %v, <16 x i16> zeroinitializer)
+  ret <16 x i16> %1
+}
+
+define <16 x i16> @avx512_psllv_w_256_var(<16 x i16> %v) {
+; CHECK-LABEL: @avx512_psllv_w_256_var(
+; CHECK-NEXT:    [[TMP1:%.*]] = shl <16 x i16> %v, <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15>
+; CHECK-NEXT:    ret <16 x i16> [[TMP1]]
+;
+  %1 = tail call <16 x i16> @llvm.x86.avx512.psllv.w.256(<16 x i16> %v, <16 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15>)
+  ret <16 x i16> %1
+}
+
+define <16 x i16> @avx512_psllv_w_256_big(<16 x i16> %v) {
+; CHECK-LABEL: @avx512_psllv_w_256_big(
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call <16 x i16> @llvm.x86.avx512.psllv.w.256(<16 x i16> %v, <16 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 16>)
+; CHECK-NEXT:    ret <16 x i16> [[TMP1]]
+;
+  %1 = tail call <16 x i16> @llvm.x86.avx512.psllv.w.256(<16 x i16> %v, <16 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 16>)
+  ret <16 x i16> %1
+}
+
+define <16 x i16> @avx512_psllv_w_256_allbig(<16 x i16> %v) {
+; CHECK-LABEL: @avx512_psllv_w_256_allbig(
+; CHECK-NEXT:    ret <16 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 undef, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>
+;
+  %1 = tail call <16 x i16> @llvm.x86.avx512.psllv.w.256(<16 x i16> %v, <16 x i16> <i16 20, i16 -1, i16 -2, i16 33, i16 44, i16 55, i16 66, i16 -7, i16 undef, i16 64, i16 -10, i16 256, i16 16, i16 28, i16 65535, i16 32767>)
+  ret <16 x i16> %1
+}
+
+define <16 x i16> @avx512_psllv_w_256_undef(<16 x i16> %v) {
+; CHECK-LABEL: @avx512_psllv_w_256_undef(
+; CHECK-NEXT:    [[TMP1:%.*]] = shl <16 x i16> %v, <i16 undef, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15>
+; CHECK-NEXT:    ret <16 x i16> [[TMP1]]
+;
+  %1 = insertelement <16 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15>, i16 undef, i64 0
+  %2 = tail call <16 x i16> @llvm.x86.avx512.psllv.w.256(<16 x i16> %v, <16 x i16> %1)
+  ret <16 x i16> %2
+}
+
+define <32 x i16> @avx512_psllv_w_512_0(<32 x i16> %v) {
+; CHECK-LABEL: @avx512_psllv_w_512_0(
+; CHECK-NEXT:    ret <32 x i16> %v
+;
+  %1 = tail call <32 x i16> @llvm.x86.avx512.psllv.w.512(<32 x i16> %v, <32 x i16> zeroinitializer)
+  ret <32 x i16> %1
+}
+
+define <32 x i16> @avx512_psllv_w_512_var(<32 x i16> %v) {
+; CHECK-LABEL: @avx512_psllv_w_512_var(
+; CHECK-NEXT:    [[TMP1:%.*]] = shl <32 x i16> %v, <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15, i16 15, i16 14, i16 13, i16 12, i16 11, i16 10, i16 9, i16 8, i16 7, i16 6, i16 5, i16 4, i16 3, i16 2, i16 1, i16 0>
+; CHECK-NEXT:    ret <32 x i16> [[TMP1]]
+;
+  %1 = tail call <32 x i16> @llvm.x86.avx512.psllv.w.512(<32 x i16> %v, <32 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15, i16 15, i16 14, i16 13, i16 12, i16 11, i16 10, i16 9, i16 8, i16 7, i16 6, i16 5, i16 4, i16 3, i16 2, i16 1, i16 0>)
+  ret <32 x i16> %1
+}
+
+define <32 x i16> @avx512_psllv_w_512_big(<32 x i16> %v) {
+; CHECK-LABEL: @avx512_psllv_w_512_big(
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call <32 x i16> @llvm.x86.avx512.psllv.w.512(<32 x i16> %v, <32 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15, i16 16, i16 14, i16 13, i16 12, i16 11, i16 10, i16 9, i16 8, i16 7, i16 6, i16 5, i16 4, i16 3, i16 2, i16 1, i16 0>)
+; CHECK-NEXT:    ret <32 x i16> [[TMP1]]
+;
+  %1 = tail call <32 x i16> @llvm.x86.avx512.psllv.w.512(<32 x i16> %v, <32 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15, i16 16, i16 14, i16 13, i16 12, i16 11, i16 10, i16 9, i16 8, i16 7, i16 6, i16 5, i16 4, i16 3, i16 2, i16 1, i16 0>)
+  ret <32 x i16> %1
+}
+
+define <32 x i16> @avx512_psllv_w_512_allbig(<32 x i16> %v) {
+; CHECK-LABEL: @avx512_psllv_w_512_allbig(
+; CHECK-NEXT:    ret <32 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 undef, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 undef, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 undef, i16 0, i16 0, i16 undef, i16 0, i16 0>
+;
+  %1 = tail call <32 x i16> @llvm.x86.avx512.psllv.w.512(<32 x i16> %v, <32 x i16> <i16 20, i16 -1, i16 -2, i16 33, i16 44, i16 55, i16 66, i16 -7, i16 undef, i16 64, i16 -10, i16 128, i16 16, i16 28, i16 65535, i16 32767, i16 56, i16 -14, i16 undef, i16 16, i16 67, i16 567, i16 -32768, i16 4096, i16 8192, i16 -12345, i16 undef, i16 345, i16 123, i16 undef, i16 1024, i16 54321>)
+  ret <32 x i16> %1
+}
+
+define <32 x i16> @avx512_psllv_w_512_undef(<32 x i16> %v) {
+; CHECK-LABEL: @avx512_psllv_w_512_undef(
+; CHECK-NEXT:    [[TMP1:%.*]] = shl <32 x i16> %v, <i16 undef, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15, i16 15, i16 14, i16 13, i16 12, i16 11, i16 10, i16 9, i16 8, i16 7, i16 6, i16 5, i16 4, i16 3, i16 2, i16 1, i16 0>
+; CHECK-NEXT:    ret <32 x i16> [[TMP1]]
+;
+  %1 = insertelement <32 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15, i16 15, i16 14, i16 13, i16 12, i16 11, i16 10, i16 9, i16 8, i16 7, i16 6, i16 5, i16 4, i16 3, i16 2, i16 1, i16 0>, i16 undef, i64 0
+  %2 = tail call <32 x i16> @llvm.x86.avx512.psllv.w.512(<32 x i16> %v, <32 x i16> %1)
+  ret <32 x i16> %2
+}
+
+;
+; Vector Demanded Bits
+;
+
+define <8 x i16> @sse2_psra_w_var(<8 x i16> %v, <8 x i16> %a) {
+; CHECK-LABEL: @sse2_psra_w_var(
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16> %v, <8 x i16> %a)
+; CHECK-NEXT:    ret <8 x i16> [[TMP1]]
+;
+  %1 = shufflevector <8 x i16> %a, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
+  %2 = tail call <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16> %v, <8 x i16> %1)
+  ret <8 x i16> %2
+}
+
+define <8 x i16> @sse2_psra_w_var_bc(<8 x i16> %v, <2 x i64> %a) {
+; CHECK-LABEL: @sse2_psra_w_var_bc(
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <2 x i64> %a to <8 x i16>
+; CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16> %v, <8 x i16> [[TMP1]])
+; CHECK-NEXT:    ret <8 x i16> [[TMP2]]
+;
+  %1 = shufflevector <2 x i64> %a, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
+  %2 = bitcast <2 x i64> %1 to <8 x i16>
+  %3 = tail call <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16> %v, <8 x i16> %2)
+  ret <8 x i16> %3
+}
+
+define <4 x i32> @sse2_psra_d_var(<4 x i32> %v, <4 x i32> %a) {
+; CHECK-LABEL: @sse2_psra_d_var(
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32> %v, <4 x i32> %a)
+; CHECK-NEXT:    ret <4 x i32> [[TMP1]]
+;
+  %1 = shufflevector <4 x i32> %a, <4 x i32> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
+  %2 = tail call <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32> %v, <4 x i32> %1)
+  ret <4 x i32> %2
+}
+
+define <4 x i32> @sse2_psra_d_var_bc(<4 x i32> %v, <8 x i16> %a) {
+; CHECK-LABEL: @sse2_psra_d_var_bc(
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <8 x i16> %a to <4 x i32>
+; CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32> %v, <4 x i32> [[TMP1]])
+; CHECK-NEXT:    ret <4 x i32> [[TMP2]]
+;
+  %1 = shufflevector <8 x i16> %a, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
+  %2 = bitcast <8 x i16> %1 to <4 x i32>
+  %3 = tail call <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32> %v, <4 x i32> %2)
+  ret <4 x i32> %3
+}
+
+define <16 x i16> @avx2_psra_w_var(<16 x i16> %v, <8 x i16> %a) {
+; CHECK-LABEL: @avx2_psra_w_var(
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call <16 x i16> @llvm.x86.avx2.psra.w(<16 x i16> %v, <8 x i16> %a)
+; CHECK-NEXT:    ret <16 x i16> [[TMP1]]
+;
+  %1 = shufflevector <8 x i16> %a, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
+  %2 = tail call <16 x i16> @llvm.x86.avx2.psra.w(<16 x i16> %v, <8 x i16> %1)
+  ret <16 x i16> %2
+}
+
+define <8 x i32> @avx2_psra_d_var(<8 x i32> %v, <4 x i32> %a) {
+; CHECK-LABEL: @avx2_psra_d_var(
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x i32> @llvm.x86.avx2.psra.d(<8 x i32> %v, <4 x i32> %a)
+; CHECK-NEXT:    ret <8 x i32> [[TMP1]]
+;
+  %1 = shufflevector <4 x i32> %a, <4 x i32> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
+  %2 = tail call <8 x i32> @llvm.x86.avx2.psra.d(<8 x i32> %v, <4 x i32> %1)
+  ret <8 x i32> %2
+}
+
+define <2 x i64> @avx512_psra_q_128_var(<2 x i64> %v, <2 x i64> %a) {
+; CHECK-LABEL: @avx512_psra_q_128_var(
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x i64> @llvm.x86.avx512.psra.q.128(<2 x i64> %v, <2 x i64> %a)
+; CHECK-NEXT:    ret <2 x i64> [[TMP1]]
+;
+  %1 = shufflevector <2 x i64> %a, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
+  %2 = tail call <2 x i64> @llvm.x86.avx512.psra.q.128(<2 x i64> %v, <2 x i64> %1)
+  ret <2 x i64> %2
+}
+
+define <4 x i64> @avx512_psra_q_256_var(<4 x i64> %v, <2 x i64> %a) {
+; CHECK-LABEL: @avx512_psra_q_256_var(
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i64> @llvm.x86.avx512.psra.q.256(<4 x i64> %v, <2 x i64> %a)
+; CHECK-NEXT:    ret <4 x i64> [[TMP1]]
+;
+  %1 = shufflevector <2 x i64> %a, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
+  %2 = tail call <4 x i64> @llvm.x86.avx512.psra.q.256(<4 x i64> %v, <2 x i64> %1)
+  ret <4 x i64> %2
+}
+
+define <32 x i16> @avx512_psra_w_512_var(<32 x i16> %v, <8 x i16> %a) {
+; CHECK-LABEL: @avx512_psra_w_512_var(
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call <32 x i16> @llvm.x86.avx512.psra.w.512(<32 x i16> %v, <8 x i16> %a)
+; CHECK-NEXT:    ret <32 x i16> [[TMP1]]
+;
+  %1 = shufflevector <8 x i16> %a, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
+  %2 = tail call <32 x i16> @llvm.x86.avx512.psra.w.512(<32 x i16> %v, <8 x i16> %1)
+  ret <32 x i16> %2
+}
+
+define <16 x i32> @avx512_psra_d_512_var(<16 x i32> %v, <4 x i32> %a) {
+; CHECK-LABEL: @avx512_psra_d_512_var(
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call <16 x i32> @llvm.x86.avx512.psra.d.512(<16 x i32> %v, <4 x i32> %a)
+; CHECK-NEXT:    ret <16 x i32> [[TMP1]]
+;
+  %1 = shufflevector <4 x i32> %a, <4 x i32> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
+  %2 = tail call <16 x i32> @llvm.x86.avx512.psra.d.512(<16 x i32> %v, <4 x i32> %1)
+  ret <16 x i32> %2
+}
+
+define <8 x i64> @avx512_psra_q_512_var(<8 x i64> %v, <2 x i64> %a) {
+; CHECK-LABEL: @avx512_psra_q_512_var(
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x i64> @llvm.x86.avx512.psra.q.512(<8 x i64> %v, <2 x i64> %a)
+; CHECK-NEXT:    ret <8 x i64> [[TMP1]]
+;
+  %1 = shufflevector <2 x i64> %a, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
+  %2 = tail call <8 x i64> @llvm.x86.avx512.psra.q.512(<8 x i64> %v, <2 x i64> %1)
+  ret <8 x i64> %2
+}
+
+define <8 x i16> @sse2_psrl_w_var(<8 x i16> %v, <8 x i16> %a) {
+; CHECK-LABEL: @sse2_psrl_w_var(
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16> %v, <8 x i16> %a)
+; CHECK-NEXT:    ret <8 x i16> [[TMP1]]
+;
+  %1 = shufflevector <8 x i16> %a, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
+  %2 = tail call <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16> %v, <8 x i16> %1)
+  ret <8 x i16> %2
+}
+
+define <4 x i32> @sse2_psrl_d_var(<4 x i32> %v, <4 x i32> %a) {
+; CHECK-LABEL: @sse2_psrl_d_var(
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32> %v, <4 x i32> %a)
+; CHECK-NEXT:    ret <4 x i32> [[TMP1]]
+;
+  %1 = shufflevector <4 x i32> %a, <4 x i32> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
+  %2 = tail call <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32> %v, <4 x i32> %1)
+  ret <4 x i32> %2
+}
+
+define <2 x i64> @sse2_psrl_q_var(<2 x i64> %v, <2 x i64> %a) {
+; CHECK-LABEL: @sse2_psrl_q_var(
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64> %v, <2 x i64> %a)
+; CHECK-NEXT:    ret <2 x i64> [[TMP1]]
+;
+  %1 = shufflevector <2 x i64> %a, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
+  %2 = tail call <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64> %v, <2 x i64> %1)
+  ret <2 x i64> %2
+}
+
+define <16 x i16> @avx2_psrl_w_var(<16 x i16> %v, <8 x i16> %a) {
+; CHECK-LABEL: @avx2_psrl_w_var(
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16> %v, <8 x i16> %a)
+; CHECK-NEXT:    ret <16 x i16> [[TMP1]]
+;
+  %1 = shufflevector <8 x i16> %a, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
+  %2 = tail call <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16> %v, <8 x i16> %1)
+  ret <16 x i16> %2
+}
+
+define <16 x i16> @avx2_psrl_w_var_bc(<16 x i16> %v, <16 x i8> %a) {
+; CHECK-LABEL: @avx2_psrl_w_var_bc(
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <16 x i8> %a to <8 x i16>
+; CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16> %v, <8 x i16> [[TMP1]])
+; CHECK-NEXT:    ret <16 x i16> [[TMP2]]
+;
+  %1 = shufflevector <16 x i8> %a, <16 x i8> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+  %2 = bitcast <16 x i8> %1 to <8 x i16>
+  %3 = tail call <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16> %v, <8 x i16> %2)
+  ret <16 x i16> %3
+}
+
+define <8 x i32> @avx2_psrl_d_var(<8 x i32> %v, <4 x i32> %a) {
+; CHECK-LABEL: @avx2_psrl_d_var(
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32> %v, <4 x i32> %a)
+; CHECK-NEXT:    ret <8 x i32> [[TMP1]]
+;
+  %1 = shufflevector <4 x i32> %a, <4 x i32> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
+  %2 = tail call <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32> %v, <4 x i32> %1)
+  ret <8 x i32> %2
+}
+
+define <8 x i32> @avx2_psrl_d_var_bc(<8 x i32> %v, <2 x i64> %a) {
+; CHECK-LABEL: @avx2_psrl_d_var_bc(
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <2 x i64> %a to <4 x i32>
+; CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32> %v, <4 x i32> [[TMP1]])
+; CHECK-NEXT:    ret <8 x i32> [[TMP2]]
+;
+  %1 = shufflevector <2 x i64> %a, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
+  %2 = bitcast <2 x i64> %1 to <4 x i32>
+  %3 = tail call <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32> %v, <4 x i32> %2)
+  ret <8 x i32> %3
+}
+
+define <4 x i64> @avx2_psrl_q_var(<4 x i64> %v, <2 x i64> %a) {
+; CHECK-LABEL: @avx2_psrl_q_var(
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i64> @llvm.x86.avx2.psrl.q(<4 x i64> %v, <2 x i64> %a)
+; CHECK-NEXT:    ret <4 x i64> [[TMP1]]
+;
+  %1 = shufflevector <2 x i64> %a, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
+  %2 = tail call <4 x i64> @llvm.x86.avx2.psrl.q(<4 x i64> %v, <2 x i64> %1)
+  ret <4 x i64> %2
+}
+
+define <32 x i16> @avx512_psrl_w_512_var(<32 x i16> %v, <8 x i16> %a) {
+; CHECK-LABEL: @avx512_psrl_w_512_var(
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call <32 x i16> @llvm.x86.avx512.psrl.w.512(<32 x i16> %v, <8 x i16> %a)
+; CHECK-NEXT:    ret <32 x i16> [[TMP1]]
+;
+  %1 = shufflevector <8 x i16> %a, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
+  %2 = tail call <32 x i16> @llvm.x86.avx512.psrl.w.512(<32 x i16> %v, <8 x i16> %1)
+  ret <32 x i16> %2
+}
+
+define <32 x i16> @avx512_psrl_w_512_var_bc(<32 x i16> %v, <16 x i8> %a) {
+; CHECK-LABEL: @avx512_psrl_w_512_var_bc(
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <16 x i8> %a to <8 x i16>
+; CHECK-NEXT:    [[TMP2:%.*]] = tail call <32 x i16> @llvm.x86.avx512.psrl.w.512(<32 x i16> %v, <8 x i16> [[TMP1]])
+; CHECK-NEXT:    ret <32 x i16> [[TMP2]]
+;
+  %1 = shufflevector <16 x i8> %a, <16 x i8> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+  %2 = bitcast <16 x i8> %1 to <8 x i16>
+  %3 = tail call <32 x i16> @llvm.x86.avx512.psrl.w.512(<32 x i16> %v, <8 x i16> %2)
+  ret <32 x i16> %3
+}
+
+define <16 x i32> @avx512_psrl_d_512_var(<16 x i32> %v, <4 x i32> %a) {
+; CHECK-LABEL: @avx512_psrl_d_512_var(
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call <16 x i32> @llvm.x86.avx512.psrl.d.512(<16 x i32> %v, <4 x i32> %a)
+; CHECK-NEXT:    ret <16 x i32> [[TMP1]]
+;
+  %1 = shufflevector <4 x i32> %a, <4 x i32> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
+  %2 = tail call <16 x i32> @llvm.x86.avx512.psrl.d.512(<16 x i32> %v, <4 x i32> %1)
+  ret <16 x i32> %2
+}
+
+define <16 x i32> @avx512_psrl_d_512_var_bc(<16 x i32> %v, <2 x i64> %a) {
+; CHECK-LABEL: @avx512_psrl_d_512_var_bc(
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <2 x i64> %a to <4 x i32>
+; CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i32> @llvm.x86.avx512.psrl.d.512(<16 x i32> %v, <4 x i32> [[TMP1]])
+; CHECK-NEXT:    ret <16 x i32> [[TMP2]]
+;
+  %1 = shufflevector <2 x i64> %a, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
+  %2 = bitcast <2 x i64> %1 to <4 x i32>
+  %3 = tail call <16 x i32> @llvm.x86.avx512.psrl.d.512(<16 x i32> %v, <4 x i32> %2)
+  ret <16 x i32> %3
+}
+
+define <8 x i64> @avx512_psrl_q_512_var(<8 x i64> %v, <2 x i64> %a) {
+; CHECK-LABEL: @avx512_psrl_q_512_var(
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x i64> @llvm.x86.avx512.psrl.q.512(<8 x i64> %v, <2 x i64> %a)
+; CHECK-NEXT:    ret <8 x i64> [[TMP1]]
+;
+  %1 = shufflevector <2 x i64> %a, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
+  %2 = tail call <8 x i64> @llvm.x86.avx512.psrl.q.512(<8 x i64> %v, <2 x i64> %1)
+  ret <8 x i64> %2
+}
+
+define <8 x i16> @sse2_psll_w_var(<8 x i16> %v, <8 x i16> %a) {
+; CHECK-LABEL: @sse2_psll_w_var(
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16> %v, <8 x i16> %a)
+; CHECK-NEXT:    ret <8 x i16> [[TMP1]]
+;
+  %1 = shufflevector <8 x i16> %a, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
+  %2 = tail call <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16> %v, <8 x i16> %1)
+  ret <8 x i16> %2
+}
+
+define <4 x i32> @sse2_psll_d_var(<4 x i32> %v, <4 x i32> %a) {
+; CHECK-LABEL: @sse2_psll_d_var(
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32> %v, <4 x i32> %a)
+; CHECK-NEXT:    ret <4 x i32> [[TMP1]]
+;
+  %1 = shufflevector <4 x i32> %a, <4 x i32> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
+  %2 = tail call <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32> %v, <4 x i32> %1)
+  ret <4 x i32> %2
+}
+
+define <2 x i64> @sse2_psll_q_var(<2 x i64> %v, <2 x i64> %a) {
+; CHECK-LABEL: @sse2_psll_q_var(
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64> %v, <2 x i64> %a)
+; CHECK-NEXT:    ret <2 x i64> [[TMP1]]
+;
+  %1 = shufflevector <2 x i64> %a, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
+  %2 = tail call <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64> %v, <2 x i64> %1)
+  ret <2 x i64> %2
+}
+
+define <16 x i16> @avx2_psll_w_var(<16 x i16> %v, <8 x i16> %a) {
+; CHECK-LABEL: @avx2_psll_w_var(
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call <16 x i16> @llvm.x86.avx2.psll.w(<16 x i16> %v, <8 x i16> %a)
+; CHECK-NEXT:    ret <16 x i16> [[TMP1]]
+;
+  %1 = shufflevector <8 x i16> %a, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
+  %2 = tail call <16 x i16> @llvm.x86.avx2.psll.w(<16 x i16> %v, <8 x i16> %1)
+  ret <16 x i16> %2
+}
+
+define <8 x i32> @avx2_psll_d_var(<8 x i32> %v, <4 x i32> %a) {
+; CHECK-LABEL: @avx2_psll_d_var(
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x i32> @llvm.x86.avx2.psll.d(<8 x i32> %v, <4 x i32> %a)
+; CHECK-NEXT:    ret <8 x i32> [[TMP1]]
+;
+  %1 = shufflevector <4 x i32> %a, <4 x i32> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
+  %2 = tail call <8 x i32> @llvm.x86.avx2.psll.d(<8 x i32> %v, <4 x i32> %1)
+  ret <8 x i32> %2
+}
+
+define <4 x i64> @avx2_psll_q_var(<4 x i64> %v, <2 x i64> %a) {
+; CHECK-LABEL: @avx2_psll_q_var(
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i64> @llvm.x86.avx2.psll.q(<4 x i64> %v, <2 x i64> %a)
+; CHECK-NEXT:    ret <4 x i64> [[TMP1]]
+;
+  %1 = shufflevector <2 x i64> %a, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
+  %2 = tail call <4 x i64> @llvm.x86.avx2.psll.q(<4 x i64> %v, <2 x i64> %1)
+  ret <4 x i64> %2
+}
+
+define <32 x i16> @avx512_psll_w_512_var(<32 x i16> %v, <8 x i16> %a) {
+; CHECK-LABEL: @avx512_psll_w_512_var(
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call <32 x i16> @llvm.x86.avx512.psll.w.512(<32 x i16> %v, <8 x i16> %a)
+; CHECK-NEXT:    ret <32 x i16> [[TMP1]]
+;
+  %1 = shufflevector <8 x i16> %a, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
+  %2 = tail call <32 x i16> @llvm.x86.avx512.psll.w.512(<32 x i16> %v, <8 x i16> %1)
+  ret <32 x i16> %2
+}
+
+define <16 x i32> @avx512_psll_d_512_var(<16 x i32> %v, <4 x i32> %a) {
+; CHECK-LABEL: @avx512_psll_d_512_var(
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call <16 x i32> @llvm.x86.avx512.psll.d.512(<16 x i32> %v, <4 x i32> %a)
+; CHECK-NEXT:    ret <16 x i32> [[TMP1]]
+;
+  %1 = shufflevector <4 x i32> %a, <4 x i32> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
+  %2 = tail call <16 x i32> @llvm.x86.avx512.psll.d.512(<16 x i32> %v, <4 x i32> %1)
+  ret <16 x i32> %2
+}
+
+define <8 x i64> @avx512_psll_q_512_var(<8 x i64> %v, <2 x i64> %a) {
+; CHECK-LABEL: @avx512_psll_q_512_var(
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x i64> @llvm.x86.avx512.psll.q.512(<8 x i64> %v, <2 x i64> %a)
+; CHECK-NEXT:    ret <8 x i64> [[TMP1]]
+;
+  %1 = shufflevector <2 x i64> %a, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
+  %2 = tail call <8 x i64> @llvm.x86.avx512.psll.q.512(<8 x i64> %v, <2 x i64> %1)
+  ret <8 x i64> %2
+}
+
+;
+; Constant Folding
+;
+
+define <8 x i16> @test_sse2_psra_w_0(<8 x i16> %A) {
+; CHECK-LABEL: @test_sse2_psra_w_0(
+; CHECK-NEXT:    ret <8 x i16> %A
+;
+  %1 = tail call <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16> %A, i32 0)
+  %2 = tail call <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16> %1, <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 7, i16 0, i16 0, i16 0>)
+  %3 = tail call <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16> %2, i32 0)
+  ret <8 x i16> %3
+}
+
+define <8 x i16> @test_sse2_psra_w_8() {
+; CHECK-LABEL: @test_sse2_psra_w_8(
+; CHECK-NEXT:    ret <8 x i16> <i16 -128, i16 64, i16 32, i16 16, i16 -128, i16 64, i16 32, i16 16>
+;
+  %1 = bitcast <2 x i64> <i64 1152956690052710400, i64 1152956690052710400> to <8 x i16>
+  %2 = tail call <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16> %1, i32 3)
+  %3 = tail call <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16> %2, <8 x i16> <i16 3, i16 0, i16 0, i16 0, i16 7, i16 0, i16 0, i16 0>)
+  %4 = tail call <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16> %3, i32 2)
+  ret <8 x i16> %4
+}
+
+define <4 x i32> @test_sse2_psra_d_0(<4 x i32> %A) {
+; CHECK-LABEL: @test_sse2_psra_d_0(
+; CHECK-NEXT:    ret <4 x i32> %A
+;
+  %1 = tail call <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32> %A, i32 0)
+  %2 = tail call <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32> %1, <4 x i32> <i32 0, i32 0, i32 7, i32 0>)
+  %3 = tail call <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32> %1, i32 0)
+  ret <4 x i32> %3
+}
+
+define <4 x i32> @sse2_psra_d_8() {
+; CHECK-LABEL: @sse2_psra_d_8(
+; CHECK-NEXT:    ret <4 x i32> <i32 4194432, i32 1048608, i32 4194432, i32 1048608>
+;
+  %1 = bitcast <2 x i64> <i64 1152956690052710400, i64 1152956690052710400> to <4 x i32>
+  %2 = tail call <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32> %1, i32 3)
+  %3 = tail call <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32> %2, <4 x i32> <i32 3, i32 0, i32 7, i32 0>)
+  %4 = tail call <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32> %3, i32 2)
+  ret <4 x i32> %4
+}
+
+define <16 x i16> @test_avx2_psra_w_0(<16 x i16> %A) {
+; CHECK-LABEL: @test_avx2_psra_w_0(
+; CHECK-NEXT:    ret <16 x i16> %A
+;
+  %1 = tail call <16 x i16> @llvm.x86.avx2.psrai.w(<16 x i16> %A, i32 0)
+  %2 = tail call <16 x i16> @llvm.x86.avx2.psra.w(<16 x i16> %1, <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 7, i16 0, i16 0, i16 0>)
+  %3 = tail call <16 x i16> @llvm.x86.avx2.psrai.w(<16 x i16> %2, i32 0)
+  ret <16 x i16> %3
+}
+
+define <16 x i16> @test_avx2_psra_w_8(<16 x i16> %A) {
+; CHECK-LABEL: @test_avx2_psra_w_8(
+; CHECK-NEXT:    ret <16 x i16> <i16 -128, i16 64, i16 32, i16 16, i16 -128, i16 64, i16 32, i16 16, i16 -128, i16 64, i16 32, i16 16, i16 -128, i16 64, i16 32, i16 16>
+;
+  %1 = bitcast <4 x i64> <i64 1152956690052710400, i64 1152956690052710400, i64 1152956690052710400, i64 1152956690052710400> to <16 x i16>
+  %2 = tail call <16 x i16> @llvm.x86.avx2.psrai.w(<16 x i16> %1, i32 3)
+  %3 = tail call <16 x i16> @llvm.x86.avx2.psra.w(<16 x i16> %2, <8 x i16> <i16 3, i16 0, i16 0, i16 0, i16 7, i16 0, i16 0, i16 0>)
+  %4 = tail call <16 x i16> @llvm.x86.avx2.psrai.w(<16 x i16> %3, i32 2)
+  ret <16 x i16> %4
+}
+
+define <8 x i32> @test_avx2_psra_d_0(<8 x i32> %A) {
+; CHECK-LABEL: @test_avx2_psra_d_0(
+; CHECK-NEXT:    ret <8 x i32> %A
+;
+  %1 = tail call <8 x i32> @llvm.x86.avx2.psrai.d(<8 x i32> %A, i32 0)
+  %2 = tail call <8 x i32> @llvm.x86.avx2.psra.d(<8 x i32> %1, <4 x i32> <i32 0, i32 0, i32 7, i32 0>)
+  %3 = tail call <8 x i32> @llvm.x86.avx2.psrai.d(<8 x i32> %2, i32 0)
+  ret <8 x i32> %3
+}
+
+define <8 x i32> @test_avx2_psra_d_8() {
+; CHECK-LABEL: @test_avx2_psra_d_8(
+; CHECK-NEXT:    ret <8 x i32> <i32 4194432, i32 1048608, i32 4194432, i32 1048608, i32 4194432, i32 1048608, i32 4194432, i32 1048608>
+;
+  %1 = bitcast <4 x i64> <i64 1152956690052710400, i64 1152956690052710400, i64 1152956690052710400, i64 1152956690052710400> to <8 x i32>
+  %2 = tail call <8 x i32> @llvm.x86.avx2.psrai.d(<8 x i32> %1, i32 3)
+  %3 = tail call <8 x i32> @llvm.x86.avx2.psra.d(<8 x i32> %2, <4 x i32> <i32 3, i32 0, i32 7, i32 0>)
+  %4 = tail call <8 x i32> @llvm.x86.avx2.psrai.d(<8 x i32> %3, i32 2)
+  ret <8 x i32> %4
+}
+
+define <32 x i16> @test_avx512_psra_w_512_0(<32 x i16> %A) {
+; CHECK-LABEL: @test_avx512_psra_w_512_0(
+; CHECK-NEXT:    ret <32 x i16> %A
+;
+  %1 = tail call <32 x i16> @llvm.x86.avx512.psrai.w.512(<32 x i16> %A, i32 0)
+  %2 = tail call <32 x i16> @llvm.x86.avx512.psra.w.512(<32 x i16> %1, <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 7, i16 0, i16 0, i16 0>)
+  %3 = tail call <32 x i16> @llvm.x86.avx512.psrai.w.512(<32 x i16> %2, i32 0)
+  ret <32 x i16> %3
+}
+
+define <32 x i16> @test_avx512_psra_w_512_8(<32 x i16> %A) {
+; CHECK-LABEL: @test_avx512_psra_w_512_8(
+; CHECK-NEXT:    ret <32 x i16> <i16 -128, i16 64, i16 32, i16 16, i16 -128, i16 64, i16 32, i16 16, i16 -128, i16 64, i16 32, i16 16, i16 -128, i16 64, i16 32, i16 16, i16 -128, i16 64, i16 32, i16 16, i16 -128, i16 64, i16 32, i16 16, i16 -128, i16 64, i16 32, i16 16, i16 -128, i16 64, i16 32, i16 16>
+;
+  %1 = bitcast <8 x i64> <i64 1152956690052710400, i64 1152956690052710400, i64 1152956690052710400, i64 1152956690052710400, i64 1152956690052710400, i64 1152956690052710400, i64 1152956690052710400, i64 1152956690052710400> to <32 x i16>
+  %2 = tail call <32 x i16> @llvm.x86.avx512.psrai.w.512(<32 x i16> %1, i32 3)
+  %3 = tail call <32 x i16> @llvm.x86.avx512.psra.w.512(<32 x i16> %2, <8 x i16> <i16 3, i16 0, i16 0, i16 0, i16 7, i16 0, i16 0, i16 0>)
+  %4 = tail call <32 x i16> @llvm.x86.avx512.psrai.w.512(<32 x i16> %3, i32 2)
+  ret <32 x i16> %4
+}
+
+define <16 x i32> @test_avx512_psra_d_512_0(<16 x i32> %A) {
+; CHECK-LABEL: @test_avx512_psra_d_512_0(
+; CHECK-NEXT:    ret <16 x i32> %A
+;
+  %1 = tail call <16 x i32> @llvm.x86.avx512.psrai.d.512(<16 x i32> %A, i32 0)
+  %2 = tail call <16 x i32> @llvm.x86.avx512.psra.d.512(<16 x i32> %1, <4 x i32> <i32 0, i32 0, i32 7, i32 0>)
+  %3 = tail call <16 x i32> @llvm.x86.avx512.psrai.d.512(<16 x i32> %2, i32 0)
+  ret <16 x i32> %3
+}
+
+define <16 x i32> @test_avx512_psra_d_512_8() {
+; CHECK-LABEL: @test_avx512_psra_d_512_8(
+; CHECK-NEXT:    ret <16 x i32> <i32 4194432, i32 1048608, i32 4194432, i32 1048608, i32 4194432, i32 1048608, i32 4194432, i32 1048608, i32 4194432, i32 1048608, i32 4194432, i32 1048608, i32 4194432, i32 1048608, i32 4194432, i32 1048608>
+;
+  %1 = bitcast <8 x i64> <i64 1152956690052710400, i64 1152956690052710400, i64 1152956690052710400, i64 1152956690052710400, i64 1152956690052710400, i64 1152956690052710400, i64 1152956690052710400, i64 1152956690052710400> to <16 x i32>
+  %2 = tail call <16 x i32> @llvm.x86.avx512.psrai.d.512(<16 x i32> %1, i32 3)
+  %3 = tail call <16 x i32> @llvm.x86.avx512.psra.d.512(<16 x i32> %2, <4 x i32> <i32 3, i32 0, i32 7, i32 0>)
+  %4 = tail call <16 x i32> @llvm.x86.avx512.psrai.d.512(<16 x i32> %3, i32 2)
+  ret <16 x i32> %4
+}
+
+;
+; Old Tests
+;
+
+define <2 x i64> @test_sse2_1() {
+; CHECK-LABEL: @test_sse2_1(
+; CHECK-NEXT:    ret <2 x i64> <i64 72058418680037440, i64 144117112246370624>
+;
+  %S = bitcast i32 1 to i32
+  %1 = zext i32 %S to i64
+  %2 = insertelement <2 x i64> undef, i64 %1, i32 0
+  %3 = insertelement <2 x i64> %2, i64 0, i32 1
+  %4 = bitcast <2 x i64> %3 to <8 x i16>
+  %5 = tail call <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16> <i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8>, <8 x i16> %4)
+  %6 = bitcast <8 x i16> %5 to <4 x i32>
+  %7 = bitcast <2 x i64> %3 to <4 x i32>
+  %8 = tail call <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32> %6, <4 x i32> %7)
+  %9 = bitcast <4 x i32> %8 to <2 x i64>
+  %10 = tail call <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64> %9, <2 x i64> %3)
+  %11 = bitcast <2 x i64> %10 to <8 x i16>
+  %12 = tail call <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16> %11, i32 %S)
+  %13 = bitcast <8 x i16> %12 to <4 x i32>
+  %14 = tail call <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32> %13, i32 %S)
+  %15 = bitcast <4 x i32> %14 to <2 x i64>
+  %16 = tail call <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64> %15, i32 %S)
+  ret <2 x i64> %16
+}
+
+define <4 x i64> @test_avx2_1() {
+; CHECK-LABEL: @test_avx2_1(
+; CHECK-NEXT:    ret <4 x i64> <i64 64, i64 128, i64 192, i64 256>
+;
+  %S = bitcast i32 1 to i32
+  %1 = zext i32 %S to i64
+  %2 = insertelement <2 x i64> undef, i64 %1, i32 0
+  %3 = insertelement <2 x i64> %2, i64 0, i32 1
+  %4 = bitcast <2 x i64> %3 to <8 x i16>
+  %5 = tail call <16 x i16> @llvm.x86.avx2.psll.w(<16 x i16> <i16 1, i16 0, i16 0, i16 0, i16 2, i16 0, i16 0, i16 0, i16 3, i16 0, i16 0, i16 0, i16 4, i16 0, i16 0, i16 0>, <8 x i16> %4)
+  %6 = bitcast <16 x i16> %5 to <8 x i32>
+  %7 = bitcast <2 x i64> %3 to <4 x i32>
+  %8 = tail call <8 x i32> @llvm.x86.avx2.psll.d(<8 x i32> %6, <4 x i32> %7)
+  %9 = bitcast <8 x i32> %8 to <4 x i64>
+  %10 = tail call <4 x i64> @llvm.x86.avx2.psll.q(<4 x i64> %9, <2 x i64> %3)
+  %11 = bitcast <4 x i64> %10 to <16 x i16>
+  %12 = tail call <16 x i16> @llvm.x86.avx2.pslli.w(<16 x i16> %11, i32 %S)
+  %13 = bitcast <16 x i16> %12 to <8 x i32>
+  %14 = tail call <8 x i32> @llvm.x86.avx2.pslli.d(<8 x i32> %13, i32 %S)
+  %15 = bitcast <8 x i32> %14 to <4 x i64>
+  %16 = tail call <4 x i64> @llvm.x86.avx2.pslli.q(<4 x i64> %15, i32 %S)
+  ret <4 x i64> %16
+}
+
+define <2 x i64> @test_sse2_0() {
+; CHECK-LABEL: @test_sse2_0(
+; CHECK-NEXT:    ret <2 x i64> zeroinitializer
+;
+  %S = bitcast i32 128 to i32
+  %1 = zext i32 %S to i64
+  %2 = insertelement <2 x i64> undef, i64 %1, i32 0
+  %3 = insertelement <2 x i64> %2, i64 0, i32 1
+  %4 = bitcast <2 x i64> %3 to <8 x i16>
+  %5 = tail call <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16> <i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8>, <8 x i16> %4)
+  %6 = bitcast <8 x i16> %5 to <4 x i32>
+  %7 = bitcast <2 x i64> %3 to <4 x i32>
+  %8 = tail call <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32> %6, <4 x i32> %7)
+  %9 = bitcast <4 x i32> %8 to <2 x i64>
+  %10 = tail call <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64> %9, <2 x i64> %3)
+  %11 = bitcast <2 x i64> %10 to <8 x i16>
+  %12 = tail call <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16> %11, i32 %S)
+  %13 = bitcast <8 x i16> %12 to <4 x i32>
+  %14 = tail call <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32> %13, i32 %S)
+  %15 = bitcast <4 x i32> %14 to <2 x i64>
+  %16 = tail call <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64> %15, i32 %S)
+  ret <2 x i64> %16
+}
+
+define <4 x i64> @test_avx2_0() {
+; CHECK-LABEL: @test_avx2_0(
+; CHECK-NEXT:    ret <4 x i64> zeroinitializer
+;
+  %S = bitcast i32 128 to i32
+  %1 = zext i32 %S to i64
+  %2 = insertelement <2 x i64> undef, i64 %1, i32 0
+  %3 = insertelement <2 x i64> %2, i64 0, i32 1
+  %4 = bitcast <2 x i64> %3 to <8 x i16>
+  %5 = tail call <16 x i16> @llvm.x86.avx2.psll.w(<16 x i16> <i16 1, i16 0, i16 0, i16 0, i16 2, i16 0, i16 0, i16 0, i16 3, i16 0, i16 0, i16 0, i16 4, i16 0, i16 0, i16 0>, <8 x i16> %4)
+  %6 = bitcast <16 x i16> %5 to <8 x i32>
+  %7 = bitcast <2 x i64> %3 to <4 x i32>
+  %8 = tail call <8 x i32> @llvm.x86.avx2.psll.d(<8 x i32> %6, <4 x i32> %7)
+  %9 = bitcast <8 x i32> %8 to <4 x i64>
+  %10 = tail call <4 x i64> @llvm.x86.avx2.psll.q(<4 x i64> %9, <2 x i64> %3)
+  %11 = bitcast <4 x i64> %10 to <16 x i16>
+  %12 = tail call <16 x i16> @llvm.x86.avx2.pslli.w(<16 x i16> %11, i32 %S)
+  %13 = bitcast <16 x i16> %12 to <8 x i32>
+  %14 = tail call <8 x i32> @llvm.x86.avx2.pslli.d(<8 x i32> %13, i32 %S)
+  %15 = bitcast <8 x i32> %14 to <4 x i64>
+  %16 = tail call <4 x i64> @llvm.x86.avx2.pslli.q(<4 x i64> %15, i32 %S)
+  ret <4 x i64> %16
+}
+define <2 x i64> @test_sse2_psrl_1() {
+; CHECK-LABEL: @test_sse2_psrl_1(
+; CHECK-NEXT:    ret <2 x i64> <i64 562954248421376, i64 9007267974742020>
+;
+  %S = bitcast i32 1 to i32
+  %1 = zext i32 %S to i64
+  %2 = insertelement <2 x i64> undef, i64 %1, i32 0
+  %3 = insertelement <2 x i64> %2, i64 0, i32 1
+  %4 = bitcast <2 x i64> %3 to <8 x i16>
+  %5 = tail call <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16> <i16 16, i16 32, i16 64, i16 128, i16 256, i16 512, i16 1024, i16 2048>, <8 x i16> %4)
+  %6 = bitcast <8 x i16> %5 to <4 x i32>
+  %7 = bitcast <2 x i64> %3 to <4 x i32>
+  %8 = tail call <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32> %6, <4 x i32> %7)
+  %9 = bitcast <4 x i32> %8 to <2 x i64>
+  %10 = tail call <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64> %9, <2 x i64> %3)
+  %11 = bitcast <2 x i64> %10 to <8 x i16>
+  %12 = tail call <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16> %11, i32 %S)
+  %13 = bitcast <8 x i16> %12 to <4 x i32>
+  %14 = tail call <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32> %13, i32 %S)
+  %15 = bitcast <4 x i32> %14 to <2 x i64>
+  %16 = tail call <2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64> %15, i32 %S)
+  ret <2 x i64> %16
+}
+
+define <4 x i64> @test_avx2_psrl_1() {
+; CHECK-LABEL: @test_avx2_psrl_1(
+; CHECK-NEXT:    ret <4 x i64> <i64 16, i64 32, i64 64, i64 128>
+;
+  %S = bitcast i32 1 to i32
+  %1 = zext i32 %S to i64
+  %2 = insertelement <2 x i64> undef, i64 %1, i32 0
+  %3 = insertelement <2 x i64> %2, i64 0, i32 1
+  %4 = bitcast <2 x i64> %3 to <8 x i16>
+  %5 = tail call <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16> <i16 1024, i16 0, i16 0, i16 0, i16 2048, i16 0, i16 0, i16 0, i16 4096, i16 0, i16 0, i16 0, i16 8192, i16 0, i16 0, i16 0>, <8 x i16> %4)
+  %6 = bitcast <16 x i16> %5 to <8 x i32>
+  %7 = bitcast <2 x i64> %3 to <4 x i32>
+  %8 = tail call <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32> %6, <4 x i32> %7)
+  %9 = bitcast <8 x i32> %8 to <4 x i64>
+  %10 = tail call <4 x i64> @llvm.x86.avx2.psrl.q(<4 x i64> %9, <2 x i64> %3)
+  %11 = bitcast <4 x i64> %10 to <16 x i16>
+  %12 = tail call <16 x i16> @llvm.x86.avx2.psrli.w(<16 x i16> %11, i32 %S)
+  %13 = bitcast <16 x i16> %12 to <8 x i32>
+  %14 = tail call <8 x i32> @llvm.x86.avx2.psrli.d(<8 x i32> %13, i32 %S)
+  %15 = bitcast <8 x i32> %14 to <4 x i64>
+  %16 = tail call <4 x i64> @llvm.x86.avx2.psrli.q(<4 x i64> %15, i32 %S)
+  ret <4 x i64> %16
+}
+
+define <2 x i64> @test_sse2_psrl_0() {
+; CHECK-LABEL: @test_sse2_psrl_0(
+; CHECK-NEXT:    ret <2 x i64> zeroinitializer
+;
+  %S = bitcast i32 128 to i32
+  %1 = zext i32 %S to i64
+  %2 = insertelement <2 x i64> undef, i64 %1, i32 0
+  %3 = insertelement <2 x i64> %2, i64 0, i32 1
+  %4 = bitcast <2 x i64> %3 to <8 x i16>
+  %5 = tail call <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16> <i16 32, i16 64, i16 128, i16 256, i16 512, i16 1024, i16 2048, i16 4096>, <8 x i16> %4)
+  %6 = bitcast <8 x i16> %5 to <4 x i32>
+  %7 = bitcast <2 x i64> %3 to <4 x i32>
+  %8 = tail call <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32> %6, <4 x i32> %7)
+  %9 = bitcast <4 x i32> %8 to <2 x i64>
+  %10 = tail call <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64> %9, <2 x i64> %3)
+  %11 = bitcast <2 x i64> %10 to <8 x i16>
+  %12 = tail call <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16> %11, i32 %S)
+  %13 = bitcast <8 x i16> %12 to <4 x i32>
+  %14 = tail call <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32> %13, i32 %S)
+  %15 = bitcast <4 x i32> %14 to <2 x i64>
+  %16 = tail call <2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64> %15, i32 %S)
+  ret <2 x i64> %16
+}
+
+define <4 x i64> @test_avx2_psrl_0() {
+; CHECK-LABEL: @test_avx2_psrl_0(
+; CHECK-NEXT:    ret <4 x i64> zeroinitializer
+;
+  %S = bitcast i32 128 to i32
+  %1 = zext i32 %S to i64
+  %2 = insertelement <2 x i64> undef, i64 %1, i32 0
+  %3 = insertelement <2 x i64> %2, i64 0, i32 1
+  %4 = bitcast <2 x i64> %3 to <8 x i16>
+  %5 = tail call <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16> <i16 1024, i16 0, i16 0, i16 0, i16 2048, i16 0, i16 0, i16 0, i16 4096, i16 0, i16 0, i16 0, i16 8192, i16 0, i16 0, i16 0>, <8 x i16> %4)
+  %6 = bitcast <16 x i16> %5 to <8 x i32>
+  %7 = bitcast <2 x i64> %3 to <4 x i32>
+  %8 = tail call <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32> %6, <4 x i32> %7)
+  %9 = bitcast <8 x i32> %8 to <4 x i64>
+  %10 = tail call <4 x i64> @llvm.x86.avx2.psrl.q(<4 x i64> %9, <2 x i64> %3)
+  %11 = bitcast <4 x i64> %10 to <16 x i16>
+  %12 = tail call <16 x i16> @llvm.x86.avx2.psrli.w(<16 x i16> %11, i32 %S)
+  %13 = bitcast <16 x i16> %12 to <8 x i32>
+  %14 = tail call <8 x i32> @llvm.x86.avx2.psrli.d(<8 x i32> %13, i32 %S)
+  %15 = bitcast <8 x i32> %14 to <4 x i64>
+  %16 = tail call <4 x i64> @llvm.x86.avx2.psrli.q(<4 x i64> %15, i32 %S)
+  ret <4 x i64> %16
+}
+
+declare <8 x i64> @llvm.x86.avx512.pslli.q.512(<8 x i64>, i32) #1
+declare <16 x i32> @llvm.x86.avx512.pslli.d.512(<16 x i32>, i32) #1
+declare <32 x i16> @llvm.x86.avx512.pslli.w.512(<32 x i16>, i32) #1
+declare <8 x i64> @llvm.x86.avx512.psll.q.512(<8 x i64>, <2 x i64>) #1
+declare <16 x i32> @llvm.x86.avx512.psll.d.512(<16 x i32>, <4 x i32>) #1
+declare <32 x i16> @llvm.x86.avx512.psll.w.512(<32 x i16>, <8 x i16>) #1
+declare <4 x i64> @llvm.x86.avx2.pslli.q(<4 x i64>, i32) #1
+declare <8 x i32> @llvm.x86.avx2.pslli.d(<8 x i32>, i32) #1
+declare <16 x i16> @llvm.x86.avx2.pslli.w(<16 x i16>, i32) #1
+declare <4 x i64> @llvm.x86.avx2.psll.q(<4 x i64>, <2 x i64>) #1
+declare <8 x i32> @llvm.x86.avx2.psll.d(<8 x i32>, <4 x i32>) #1
+declare <16 x i16> @llvm.x86.avx2.psll.w(<16 x i16>, <8 x i16>) #1
+declare <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64>, i32) #1
+declare <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32>, i32) #1
+declare <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16>, i32) #1
+declare <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64>, <2 x i64>) #1
+declare <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32>, <4 x i32>) #1
+declare <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16>, <8 x i16>) #1
+
+declare <8 x i64> @llvm.x86.avx512.psrli.q.512(<8 x i64>, i32) #1
+declare <16 x i32> @llvm.x86.avx512.psrli.d.512(<16 x i32>, i32) #1
+declare <32 x i16> @llvm.x86.avx512.psrli.w.512(<32 x i16>, i32) #1
+declare <8 x i64> @llvm.x86.avx512.psrl.q.512(<8 x i64>, <2 x i64>) #1
+declare <16 x i32> @llvm.x86.avx512.psrl.d.512(<16 x i32>, <4 x i32>) #1
+declare <32 x i16> @llvm.x86.avx512.psrl.w.512(<32 x i16>, <8 x i16>) #1
+declare <4 x i64> @llvm.x86.avx2.psrli.q(<4 x i64>, i32) #1
+declare <8 x i32> @llvm.x86.avx2.psrli.d(<8 x i32>, i32) #1
+declare <16 x i16> @llvm.x86.avx2.psrli.w(<16 x i16>, i32) #1
+declare <4 x i64> @llvm.x86.avx2.psrl.q(<4 x i64>, <2 x i64>) #1
+declare <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32>, <4 x i32>) #1
+declare <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16>, <8 x i16>) #1
+declare <2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64>, i32) #1
+declare <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32>, i32) #1
+declare <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16>, i32) #1
+declare <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64>, <2 x i64>) #1
+declare <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32>, <4 x i32>) #1
+declare <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16>, <8 x i16>) #1
+
+declare <8 x i64> @llvm.x86.avx512.psrai.q.512(<8 x i64>, i32) #1
+declare <16 x i32> @llvm.x86.avx512.psrai.d.512(<16 x i32>, i32) #1
+declare <32 x i16> @llvm.x86.avx512.psrai.w.512(<32 x i16>, i32) #1
+declare <8 x i64> @llvm.x86.avx512.psra.q.512(<8 x i64>, <2 x i64>) #1
+declare <16 x i32> @llvm.x86.avx512.psra.d.512(<16 x i32>, <4 x i32>) #1
+declare <32 x i16> @llvm.x86.avx512.psra.w.512(<32 x i16>, <8 x i16>) #1
+declare <4 x i64> @llvm.x86.avx512.psrai.q.256(<4 x i64>, i32) #1
+declare <8 x i32> @llvm.x86.avx2.psrai.d(<8 x i32>, i32) #1
+declare <16 x i16> @llvm.x86.avx2.psrai.w(<16 x i16>, i32) #1
+declare <4 x i64> @llvm.x86.avx512.psra.q.256(<4 x i64>, <2 x i64>) #1
+declare <8 x i32> @llvm.x86.avx2.psra.d(<8 x i32>, <4 x i32>) #1
+declare <16 x i16> @llvm.x86.avx2.psra.w(<16 x i16>, <8 x i16>) #1
+declare <2 x i64> @llvm.x86.avx512.psrai.q.128(<2 x i64>, i32) #1
+declare <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32>, i32) #1
+declare <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16>, i32) #1
+declare <2 x i64> @llvm.x86.avx512.psra.q.128(<2 x i64>, <2 x i64>) #1
+declare <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32>, <4 x i32>) #1
+declare <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16>, <8 x i16>) #1
+
+declare <4 x i32> @llvm.x86.avx2.psrav.d(<4 x i32>, <4 x i32>) #1
+declare <8 x i32> @llvm.x86.avx2.psrav.d.256(<8 x i32>, <8 x i32>) #1
+declare <16 x i32> @llvm.x86.avx512.psrav.d.512(<16 x i32>, <16 x i32>) #1
+declare <2 x i64> @llvm.x86.avx512.psrav.q.128(<2 x i64>, <2 x i64>) #1
+declare <4 x i64> @llvm.x86.avx512.psrav.q.256(<4 x i64>, <4 x i64>) #1
+declare <8 x i64> @llvm.x86.avx512.psrav.q.512(<8 x i64>, <8 x i64>) #1
+
+declare <4 x i32> @llvm.x86.avx2.psrlv.d(<4 x i32>, <4 x i32>) #1
+declare <8 x i32> @llvm.x86.avx2.psrlv.d.256(<8 x i32>, <8 x i32>) #1
+declare <2 x i64> @llvm.x86.avx2.psrlv.q(<2 x i64>, <2 x i64>) #1
+declare <4 x i64> @llvm.x86.avx2.psrlv.q.256(<4 x i64>, <4 x i64>) #1
+declare <16 x i32> @llvm.x86.avx512.psrlv.d.512(<16 x i32>, <16 x i32>) #1
+declare <8 x i64> @llvm.x86.avx512.psrlv.q.512(<8 x i64>, <8 x i64>) #1
+
+declare <4 x i32> @llvm.x86.avx2.psllv.d(<4 x i32>, <4 x i32>) #1
+declare <8 x i32> @llvm.x86.avx2.psllv.d.256(<8 x i32>, <8 x i32>) #1
+declare <2 x i64> @llvm.x86.avx2.psllv.q(<2 x i64>, <2 x i64>) #1
+declare <4 x i64> @llvm.x86.avx2.psllv.q.256(<4 x i64>, <4 x i64>) #1
+declare <16 x i32> @llvm.x86.avx512.psllv.d.512(<16 x i32>, <16 x i32>) #1
+declare <8 x i64> @llvm.x86.avx512.psllv.q.512(<8 x i64>, <8 x i64>) #1
+
+declare <8 x i16> @llvm.x86.avx512.psrav.w.128(<8 x i16>, <8 x i16>) #1
+declare <16 x i16> @llvm.x86.avx512.psrav.w.256(<16 x i16>, <16 x i16>) #1
+declare <32 x i16> @llvm.x86.avx512.psrav.w.512(<32 x i16>, <32 x i16>) #1
+declare <8 x i16> @llvm.x86.avx512.psrlv.w.128(<8 x i16>, <8 x i16>) #1
+declare <16 x i16> @llvm.x86.avx512.psrlv.w.256(<16 x i16>, <16 x i16>) #1
+declare <32 x i16> @llvm.x86.avx512.psrlv.w.512(<32 x i16>, <32 x i16>) #1
+declare <8 x i16> @llvm.x86.avx512.psllv.w.128(<8 x i16>, <8 x i16>) #1
+declare <16 x i16> @llvm.x86.avx512.psllv.w.256(<16 x i16>, <16 x i16>) #1
+declare <32 x i16> @llvm.x86.avx512.psllv.w.512(<32 x i16>, <32 x i16>) #1
+
+attributes #1 = { nounwind readnone }

Added: llvm/trunk/test/Transforms/InstCombine/X86/x86-vpermil.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/InstCombine/X86/x86-vpermil.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/InstCombine/X86/x86-vpermil.ll (added)
+++ llvm/trunk/test/Transforms/InstCombine/X86/x86-vpermil.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,298 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -instcombine -S | FileCheck %s
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+
+; Verify that instcombine is able to fold identity shuffles.
+
+define <4 x float> @identity_test_vpermilvar_ps(<4 x float> %v) {
+; CHECK-LABEL: @identity_test_vpermilvar_ps(
+; CHECK-NEXT:    ret <4 x float> %v
+;
+  %a = tail call <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float> %v, <4 x i32> <i32 0, i32 1, i32 2, i32 3>)
+  ret <4 x float> %a
+}
+
+define <8 x float> @identity_test_vpermilvar_ps_256(<8 x float> %v) {
+; CHECK-LABEL: @identity_test_vpermilvar_ps_256(
+; CHECK-NEXT:    ret <8 x float> %v
+;
+  %a = tail call <8 x float> @llvm.x86.avx.vpermilvar.ps.256(<8 x float> %v, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>)
+  ret <8 x float> %a
+}
+
+define <16 x float> @identity_test_vpermilvar_ps_512(<16 x float> %v) {
+; CHECK-LABEL: @identity_test_vpermilvar_ps_512(
+; CHECK-NEXT:    ret <16 x float> %v
+;
+  %a = tail call <16 x float> @llvm.x86.avx512.vpermilvar.ps.512(<16 x float> %v, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>)
+  ret <16 x float> %a
+}
+
+define <2 x double> @identity_test_vpermilvar_pd(<2 x double> %v) {
+; CHECK-LABEL: @identity_test_vpermilvar_pd(
+; CHECK-NEXT:    ret <2 x double> %v
+;
+  %a = tail call <2 x double> @llvm.x86.avx.vpermilvar.pd(<2 x double> %v, <2 x i64> <i64 0, i64 2>)
+  ret <2 x double> %a
+}
+
+define <4 x double> @identity_test_vpermilvar_pd_256(<4 x double> %v) {
+; CHECK-LABEL: @identity_test_vpermilvar_pd_256(
+; CHECK-NEXT:    ret <4 x double> %v
+;
+  %a = tail call <4 x double> @llvm.x86.avx.vpermilvar.pd.256(<4 x double> %v, <4 x i64> <i64 0, i64 2, i64 0, i64 2>)
+  ret <4 x double> %a
+}
+
+define <8 x double> @identity_test_vpermilvar_pd_512(<8 x double> %v) {
+; CHECK-LABEL: @identity_test_vpermilvar_pd_512(
+; CHECK-NEXT:    ret <8 x double> %v
+;
+  %a = tail call <8 x double> @llvm.x86.avx512.vpermilvar.pd.512(<8 x double> %v, <8 x i64> <i64 0, i64 2, i64 0, i64 2, i64 0, i64 2, i64 0, i64 2>)
+  ret <8 x double> %a
+}
+
+; Instcombine should be able to fold the following byte shuffle to a builtin shufflevector
+; with a shuffle mask of all zeroes.
+
+define <4 x float> @zero_test_vpermilvar_ps_zero(<4 x float> %v) {
+; CHECK-LABEL: @zero_test_vpermilvar_ps_zero(
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x float> %v, <4 x float> undef, <4 x i32> zeroinitializer
+; CHECK-NEXT:    ret <4 x float> [[TMP1]]
+;
+  %a = tail call <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float> %v, <4 x i32> zeroinitializer)
+  ret <4 x float> %a
+}
+
+define <8 x float> @zero_test_vpermilvar_ps_256_zero(<8 x float> %v) {
+; CHECK-LABEL: @zero_test_vpermilvar_ps_256_zero(
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <8 x float> %v, <8 x float> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 4, i32 4, i32 4, i32 4>
+; CHECK-NEXT:    ret <8 x float> [[TMP1]]
+;
+  %a = tail call <8 x float> @llvm.x86.avx.vpermilvar.ps.256(<8 x float> %v, <8 x i32> zeroinitializer)
+  ret <8 x float> %a
+}
+
+define <16 x float> @zero_test_vpermilvar_ps_512_zero(<16 x float> %v) {
+; CHECK-LABEL: @zero_test_vpermilvar_ps_512_zero(
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <16 x float> %v, <16 x float> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 4, i32 4, i32 4, i32 4, i32 8, i32 8, i32 8, i32 8, i32 12, i32 12, i32 12, i32 12>
+; CHECK-NEXT:    ret <16 x float> [[TMP1]]
+;
+  %a = tail call <16 x float> @llvm.x86.avx512.vpermilvar.ps.512(<16 x float> %v, <16 x i32> zeroinitializer)
+  ret <16 x float> %a
+}
+
+define <2 x double> @zero_test_vpermilvar_pd_zero(<2 x double> %v) {
+; CHECK-LABEL: @zero_test_vpermilvar_pd_zero(
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <2 x double> %v, <2 x double> undef, <2 x i32> zeroinitializer
+; CHECK-NEXT:    ret <2 x double> [[TMP1]]
+;
+  %a = tail call <2 x double> @llvm.x86.avx.vpermilvar.pd(<2 x double> %v, <2 x i64> zeroinitializer)
+  ret <2 x double> %a
+}
+
+define <4 x double> @zero_test_vpermilvar_pd_256_zero(<4 x double> %v) {
+; CHECK-LABEL: @zero_test_vpermilvar_pd_256_zero(
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x double> %v, <4 x double> undef, <4 x i32> <i32 0, i32 0, i32 2, i32 2>
+; CHECK-NEXT:    ret <4 x double> [[TMP1]]
+;
+  %a = tail call <4 x double> @llvm.x86.avx.vpermilvar.pd.256(<4 x double> %v, <4 x i64> zeroinitializer)
+  ret <4 x double> %a
+}
+
+define <8 x double> @zero_test_vpermilvar_pd_512_zero(<8 x double> %v) {
+; CHECK-LABEL: @zero_test_vpermilvar_pd_512_zero(
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <8 x double> %v, <8 x double> undef, <8 x i32> <i32 0, i32 0, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6>
+; CHECK-NEXT:    ret <8 x double> [[TMP1]]
+;
+  %a = tail call <8 x double> @llvm.x86.avx512.vpermilvar.pd.512(<8 x double> %v, <8 x i64> zeroinitializer)
+  ret <8 x double> %a
+}
+
+; Verify that instcombine is able to fold constant shuffles.
+
+define <4 x float> @test_vpermilvar_ps(<4 x float> %v) {
+; CHECK-LABEL: @test_vpermilvar_ps(
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x float> %v, <4 x float> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+; CHECK-NEXT:    ret <4 x float> [[TMP1]]
+;
+  %a = tail call <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float> %v, <4 x i32> <i32 3, i32 2, i32 1, i32 0>)
+  ret <4 x float> %a
+}
+
+define <8 x float> @test_vpermilvar_ps_256(<8 x float> %v) {
+; CHECK-LABEL: @test_vpermilvar_ps_256(
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <8 x float> %v, <8 x float> undef, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
+; CHECK-NEXT:    ret <8 x float> [[TMP1]]
+;
+  %a = tail call <8 x float> @llvm.x86.avx.vpermilvar.ps.256(<8 x float> %v, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>)
+  ret <8 x float> %a
+}
+
+define <16 x float> @test_vpermilvar_ps_512(<16 x float> %v) {
+; CHECK-LABEL: @test_vpermilvar_ps_512(
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <16 x float> %v, <16 x float> undef, <16 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4, i32 11, i32 10, i32 9, i32 8, i32 15, i32 14, i32 13, i32 12>
+; CHECK-NEXT:    ret <16 x float> [[TMP1]]
+;
+  %a = tail call <16 x float> @llvm.x86.avx512.vpermilvar.ps.512(<16 x float> %v, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>)
+  ret <16 x float> %a
+}
+
+define <2 x double> @test_vpermilvar_pd(<2 x double> %v) {
+; CHECK-LABEL: @test_vpermilvar_pd(
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <2 x double> %v, <2 x double> undef, <2 x i32> <i32 1, i32 0>
+; CHECK-NEXT:    ret <2 x double> [[TMP1]]
+;
+  %a = tail call <2 x double> @llvm.x86.avx.vpermilvar.pd(<2 x double> %v, <2 x i64> <i64 2, i64 0>)
+  ret <2 x double> %a
+}
+
+define <4 x double> @test_vpermilvar_pd_256(<4 x double> %v) {
+; CHECK-LABEL: @test_vpermilvar_pd_256(
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x double> %v, <4 x double> undef, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
+; CHECK-NEXT:    ret <4 x double> [[TMP1]]
+;
+  %a = tail call <4 x double> @llvm.x86.avx.vpermilvar.pd.256(<4 x double> %v, <4 x i64> <i64 3, i64 1, i64 2, i64 0>)
+  ret <4 x double> %a
+}
+
+define <8 x double> @test_vpermilvar_pd_512(<8 x double> %v) {
+; CHECK-LABEL: @test_vpermilvar_pd_512(
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <8 x double> %v, <8 x double> undef, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6>
+; CHECK-NEXT:    ret <8 x double> [[TMP1]]
+;
+  %a = tail call <8 x double> @llvm.x86.avx512.vpermilvar.pd.512(<8 x double> %v, <8 x i64> <i64 3, i64 1, i64 2, i64 0, i64 7, i64 5, i64 6, i64 4>)
+  ret <8 x double> %a
+}
+
+; Verify that instcombine is able to fold constant shuffles with undef mask elements.
+
+define <4 x float> @undef_test_vpermilvar_ps(<4 x float> %v) {
+; CHECK-LABEL: @undef_test_vpermilvar_ps(
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x float> %v, <4 x float> undef, <4 x i32> <i32 undef, i32 2, i32 1, i32 undef>
+; CHECK-NEXT:    ret <4 x float> [[TMP1]]
+;
+  %a = tail call <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float> %v, <4 x i32> <i32 undef, i32 2, i32 1, i32 undef>)
+  ret <4 x float> %a
+}
+
+define <8 x float> @undef_test_vpermilvar_ps_256(<8 x float> %v) {
+; CHECK-LABEL: @undef_test_vpermilvar_ps_256(
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <8 x float> %v, <8 x float> undef, <8 x i32> <i32 undef, i32 2, i32 1, i32 undef, i32 7, i32 6, i32 5, i32 4>
+; CHECK-NEXT:    ret <8 x float> [[TMP1]]
+;
+  %a = tail call <8 x float> @llvm.x86.avx.vpermilvar.ps.256(<8 x float> %v, <8 x i32> <i32 undef, i32 6, i32 5, i32 undef, i32 3, i32 2, i32 1, i32 0>)
+  ret <8 x float> %a
+}
+
+define <16 x float> @undef_test_vpermilvar_ps_512(<16 x float> %v) {
+; CHECK-LABEL: @undef_test_vpermilvar_ps_512(
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <16 x float> %v, <16 x float> undef, <16 x i32> <i32 undef, i32 2, i32 1, i32 undef, i32 7, i32 6, i32 5, i32 4, i32 undef, i32 10, i32 9, i32 undef, i32 15, i32 14, i32 13, i32 12>
+; CHECK-NEXT:    ret <16 x float> [[TMP1]]
+;
+  %a = tail call <16 x float> @llvm.x86.avx512.vpermilvar.ps.512(<16 x float> %v, <16 x i32> <i32 undef, i32 6, i32 5, i32 undef, i32 3, i32 2, i32 1, i32 0, i32 undef, i32 6, i32 5, i32 undef, i32 3, i32 2, i32 1, i32 0>)
+  ret <16 x float> %a
+}
+
+define <2 x double> @undef_test_vpermilvar_pd(<2 x double> %v) {
+; CHECK-LABEL: @undef_test_vpermilvar_pd(
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <2 x double> %v, <2 x double> undef, <2 x i32> <i32 undef, i32 0>
+; CHECK-NEXT:    ret <2 x double> [[TMP1]]
+;
+  %a = tail call <2 x double> @llvm.x86.avx.vpermilvar.pd(<2 x double> %v, <2 x i64> <i64 undef, i64 0>)
+  ret <2 x double> %a
+}
+
+define <4 x double> @undef_test_vpermilvar_pd_256(<4 x double> %v) {
+; CHECK-LABEL: @undef_test_vpermilvar_pd_256(
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x double> %v, <4 x double> undef, <4 x i32> <i32 undef, i32 0, i32 3, i32 undef>
+; CHECK-NEXT:    ret <4 x double> [[TMP1]]
+;
+  %a = tail call <4 x double> @llvm.x86.avx.vpermilvar.pd.256(<4 x double> %v, <4 x i64> <i64 undef, i64 1, i64 2, i64 undef>)
+  ret <4 x double> %a
+}
+
+define <8 x double> @undef_test_vpermilvar_pd_512(<8 x double> %v) {
+; CHECK-LABEL: @undef_test_vpermilvar_pd_512(
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <8 x double> %v, <8 x double> undef, <8 x i32> <i32 undef, i32 0, i32 3, i32 undef, i32 undef, i32 4, i32 7, i32 undef>
+; CHECK-NEXT:    ret <8 x double> [[TMP1]]
+;
+  %a = tail call <8 x double> @llvm.x86.avx512.vpermilvar.pd.512(<8 x double> %v, <8 x i64> <i64 undef, i64 1, i64 2, i64 undef, i64 undef, i64 1, i64 2, i64 undef>)
+  ret <8 x double> %a
+}
+
+; Simplify demanded elts
+
+define <4 x float> @elts_test_vpermilvar_ps(<4 x float> %a0, i32 %a1) {
+; CHECK-LABEL: @elts_test_vpermilvar_ps(
+; CHECK-NEXT:    ret <4 x float> %a0
+;
+  %1 = insertelement <4 x i32> <i32 0, i32 1, i32 2, i32 3>, i32 %a1, i32 3
+  %2 = tail call <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float> %a0, <4 x i32> %1)
+  %3 = shufflevector <4 x float> %2, <4 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 undef>
+  ret <4 x float> %3
+}
+
+define <8 x float> @elts_test_vpermilvar_ps_256(<8 x float> %a0, <8 x i32> %a1) {
+; CHECK-LABEL: @elts_test_vpermilvar_ps_256(
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <8 x float> %a0, <8 x float> undef, <8 x i32> <i32 undef, i32 0, i32 undef, i32 1, i32 undef, i32 6, i32 undef, i32 7>
+; CHECK-NEXT:    ret <8 x float> [[TMP1]]
+;
+  %1 = shufflevector <8 x i32> %a1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 3, i32 2, i32 1, i32 0>, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
+  %2 = tail call <8 x float> @llvm.x86.avx.vpermilvar.ps.256(<8 x float> %a0, <8 x i32> %1)
+  %3 = shufflevector <8 x float> %2, <8 x float> undef, <8 x i32> <i32 undef, i32 1, i32 undef, i32 3, i32 undef, i32 5, i32 undef, i32 7>
+  ret <8 x float> %3
+}
+
+define <16 x float> @elts_test_vpermilvar_ps_512(<16 x float> %a0, <16 x i32> %a1, i32 %a2) {
+; CHECK-LABEL: @elts_test_vpermilvar_ps_512(
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call <16 x float> @llvm.x86.avx512.vpermilvar.ps.512(<16 x float> %a0, <16 x i32> %a1)
+; CHECK-NEXT:    ret <16 x float> [[TMP1]]
+;
+  %1 = insertelement <16 x i32> %a1, i32 %a2, i32 0
+  %2 = tail call <16 x float> @llvm.x86.avx512.vpermilvar.ps.512(<16 x float> %a0, <16 x i32> %1)
+  %3 = shufflevector <16 x float> %2, <16 x float> undef, <16 x i32> <i32 undef, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+  ret <16 x float> %3
+}
+
+define <2 x double> @elts_test_vpermilvar_pd(<2 x double> %a0, i64 %a1) {
+; CHECK-LABEL: @elts_test_vpermilvar_pd(
+; CHECK-NEXT:    ret <2 x double> %a0
+;
+  %1 = insertelement <2 x i64> <i64 0, i64 2>, i64 %a1, i32 1
+  %2 = tail call <2 x double> @llvm.x86.avx.vpermilvar.pd(<2 x double> %a0, <2 x i64> %1)
+  %3 = shufflevector <2 x double> %2, <2 x double> undef, <2 x i32> <i32 0, i32 undef>
+  ret <2 x double> %3
+}
+
+define <4 x double> @elts_test_vpermilvar_pd_256(<4 x double> %a0, <4 x i64> %a1) {
+; CHECK-LABEL: @elts_test_vpermilvar_pd_256(
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x double> %a0, <4 x double> undef, <4 x i32> <i32 1, i32 0, i32 3, i32 undef>
+; CHECK-NEXT:    ret <4 x double> [[TMP1]]
+;
+  %1 = shufflevector <4 x i64> <i64 0, i64 2, i64 0, i64 2>, <4 x i64> %a1, <4 x i32> <i32 1, i32 2, i32 3, i32 4>
+  %2 = tail call <4 x double> @llvm.x86.avx.vpermilvar.pd.256(<4 x double> %a0, <4 x i64> %1)
+  %3 = shufflevector <4 x double> %2, <4 x double> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 undef>
+  ret <4 x double> %3
+}
+
+define <8 x double> @elts_test_vpermilvar_pd_512(<8 x double> %a0, <8 x i64> %a1, i64 %a2) {
+; CHECK-LABEL: @elts_test_vpermilvar_pd_512(
+; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <8 x i64> undef, i64 %a2, i32 0
+; CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x double> @llvm.x86.avx512.vpermilvar.pd.512(<8 x double> %a0, <8 x i64> [[TMP1]])
+; CHECK-NEXT:    [[TMP3:%.*]] = shufflevector <8 x double> [[TMP2]], <8 x double> undef, <8 x i32> zeroinitializer
+; CHECK-NEXT:    ret <8 x double> [[TMP3]]
+;
+  %1 = insertelement <8 x i64> %a1, i64 %a2, i32 0
+  %2 = tail call <8 x double> @llvm.x86.avx512.vpermilvar.pd.512(<8 x double> %a0, <8 x i64> %1)
+  %3 = shufflevector <8 x double> %2, <8 x double> undef, <8 x i32> zeroinitializer
+  ret <8 x double> %3
+}
+
+declare <2 x double> @llvm.x86.avx.vpermilvar.pd(<2 x double>, <2 x i64>)
+declare <4 x double> @llvm.x86.avx.vpermilvar.pd.256(<4 x double>, <4 x i64>)
+declare <8 x double> @llvm.x86.avx512.vpermilvar.pd.512(<8 x double>, <8 x i64>)
+
+declare <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float>, <4 x i32>)
+declare <8 x float> @llvm.x86.avx.vpermilvar.ps.256(<8 x float>, <8 x i32>)
+declare <16 x float> @llvm.x86.avx512.vpermilvar.ps.512(<16 x float>, <16 x i32>)

Added: llvm/trunk/test/Transforms/InstCombine/X86/x86-xop.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/InstCombine/X86/x86-xop.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/InstCombine/X86/x86-xop.ll (added)
+++ llvm/trunk/test/Transforms/InstCombine/X86/x86-xop.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,305 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+define <2 x double> @test_vfrcz_sd(<2 x double> %a) {
+; CHECK-LABEL: @test_vfrcz_sd(
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x double> @llvm.x86.xop.vfrcz.sd(<2 x double> %a)
+; CHECK-NEXT:    ret <2 x double> [[TMP1]]
+;
+  %1 = insertelement <2 x double> %a, double 1.000000e+00, i32 1
+  %2 = tail call <2 x double> @llvm.x86.xop.vfrcz.sd(<2 x double> %1)
+  ret <2 x double> %2
+}
+
+define double @test_vfrcz_sd_0(double %a) {
+; CHECK-LABEL: @test_vfrcz_sd_0(
+; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <2 x double> undef, double %a, i32 0
+; CHECK-NEXT:    [[TMP2:%.*]] = tail call <2 x double> @llvm.x86.xop.vfrcz.sd(<2 x double> [[TMP1]])
+; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <2 x double> [[TMP2]], i32 0
+; CHECK-NEXT:    ret double [[TMP3]]
+;
+  %1 = insertelement <2 x double> undef, double %a, i32 0
+  %2 = insertelement <2 x double> %1, double 1.000000e+00, i32 1
+  %3 = tail call <2 x double> @llvm.x86.xop.vfrcz.sd(<2 x double> %2)
+  %4 = extractelement <2 x double> %3, i32 0
+  ret double %4
+}
+
+define double @test_vfrcz_sd_1(double %a) {
+; CHECK-LABEL: @test_vfrcz_sd_1(
+; CHECK-NEXT:    ret double 0.000000e+00
+;
+  %1 = insertelement <2 x double> undef, double %a, i32 0
+  %2 = insertelement <2 x double> %1, double 1.000000e+00, i32 1
+  %3 = tail call <2 x double> @llvm.x86.xop.vfrcz.sd(<2 x double> %2)
+  %4 = extractelement <2 x double> %3, i32 1
+  ret double %4
+}
+
+define <4 x float> @test_vfrcz_ss(<4 x float> %a) {
+; CHECK-LABEL: @test_vfrcz_ss(
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x float> @llvm.x86.xop.vfrcz.ss(<4 x float> %a)
+; CHECK-NEXT:    ret <4 x float> [[TMP1]]
+;
+  %1 = insertelement <4 x float> %a, float 1.000000e+00, i32 1
+  %2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2
+  %3 = insertelement <4 x float> %2, float 3.000000e+00, i32 3
+  %4 = tail call <4 x float> @llvm.x86.xop.vfrcz.ss(<4 x float> %3)
+  ret <4 x float> %4
+}
+
+define float @test_vfrcz_ss_0(float %a) {
+; CHECK-LABEL: @test_vfrcz_ss_0(
+; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <4 x float> undef, float %a, i32 0
+; CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x float> @llvm.x86.xop.vfrcz.ss(<4 x float> [[TMP1]])
+; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <4 x float> [[TMP2]], i32 0
+; CHECK-NEXT:    ret float [[TMP3]]
+;
+  %1 = insertelement <4 x float> undef, float %a, i32 0
+  %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
+  %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
+  %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
+  %5 = tail call <4 x float> @llvm.x86.xop.vfrcz.ss(<4 x float> %4)
+  %6 = extractelement <4 x float> %5, i32 0
+  ret float %6
+}
+
+define float @test_vfrcz_ss_3(float %a) {
+; CHECK-LABEL: @test_vfrcz_ss_3(
+; CHECK-NEXT:    ret float 0.000000e+00
+;
+  %1 = insertelement <4 x float> undef, float %a, i32 0
+  %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
+  %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
+  %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
+  %5 = tail call <4 x float> @llvm.x86.xop.vfrcz.ss(<4 x float> %4)
+  %6 = extractelement <4 x float> %5, i32 3
+  ret float %6
+}
+
+define <2 x i64> @cmp_slt_v2i64(<2 x i64> %a, <2 x i64> %b) {
+; CHECK-LABEL: @cmp_slt_v2i64(
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp slt <2 x i64> %a, %b
+; CHECK-NEXT:    [[TMP2:%.*]] = sext <2 x i1> [[TMP1]] to <2 x i64>
+; CHECK-NEXT:    ret <2 x i64> [[TMP2]]
+;
+  %1 = tail call <2 x i64> @llvm.x86.xop.vpcomltq(<2 x i64> %a, <2 x i64> %b)
+  ret <2 x i64> %1
+}
+
+define <2 x i64> @cmp_ult_v2i64(<2 x i64> %a, <2 x i64> %b) {
+; CHECK-LABEL: @cmp_ult_v2i64(
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ult <2 x i64> %a, %b
+; CHECK-NEXT:    [[TMP2:%.*]] = sext <2 x i1> [[TMP1]] to <2 x i64>
+; CHECK-NEXT:    ret <2 x i64> [[TMP2]]
+;
+  %1 = tail call <2 x i64> @llvm.x86.xop.vpcomltuq(<2 x i64> %a, <2 x i64> %b)
+  ret <2 x i64> %1
+}
+
+define <2 x i64> @cmp_sle_v2i64(<2 x i64> %a, <2 x i64> %b) {
+; CHECK-LABEL: @cmp_sle_v2i64(
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp sle <2 x i64> %a, %b
+; CHECK-NEXT:    [[TMP2:%.*]] = sext <2 x i1> [[TMP1]] to <2 x i64>
+; CHECK-NEXT:    ret <2 x i64> [[TMP2]]
+;
+  %1 = tail call <2 x i64> @llvm.x86.xop.vpcomleq(<2 x i64> %a, <2 x i64> %b)
+  ret <2 x i64> %1
+}
+
+define <2 x i64> @cmp_ule_v2i64(<2 x i64> %a, <2 x i64> %b) {
+; CHECK-LABEL: @cmp_ule_v2i64(
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ule <2 x i64> %a, %b
+; CHECK-NEXT:    [[TMP2:%.*]] = sext <2 x i1> [[TMP1]] to <2 x i64>
+; CHECK-NEXT:    ret <2 x i64> [[TMP2]]
+;
+  %1 = tail call <2 x i64> @llvm.x86.xop.vpcomleuq(<2 x i64> %a, <2 x i64> %b)
+  ret <2 x i64> %1
+}
+
+define <4 x i32> @cmp_sgt_v4i32(<4 x i32> %a, <4 x i32> %b) {
+; CHECK-LABEL: @cmp_sgt_v4i32(
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp sgt <4 x i32> %a, %b
+; CHECK-NEXT:    [[TMP2:%.*]] = sext <4 x i1> [[TMP1]] to <4 x i32>
+; CHECK-NEXT:    ret <4 x i32> [[TMP2]]
+;
+  %1 = tail call <4 x i32> @llvm.x86.xop.vpcomgtd(<4 x i32> %a, <4 x i32> %b)
+  ret <4 x i32> %1
+}
+
+define <4 x i32> @cmp_ugt_v4i32(<4 x i32> %a, <4 x i32> %b) {
+; CHECK-LABEL: @cmp_ugt_v4i32(
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ugt <4 x i32> %a, %b
+; CHECK-NEXT:    [[TMP2:%.*]] = sext <4 x i1> [[TMP1]] to <4 x i32>
+; CHECK-NEXT:    ret <4 x i32> [[TMP2]]
+;
+  %1 = tail call <4 x i32> @llvm.x86.xop.vpcomgtud(<4 x i32> %a, <4 x i32> %b)
+  ret <4 x i32> %1
+}
+
+define <4 x i32> @cmp_sge_v4i32(<4 x i32> %a, <4 x i32> %b) {
+; CHECK-LABEL: @cmp_sge_v4i32(
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp sge <4 x i32> %a, %b
+; CHECK-NEXT:    [[TMP2:%.*]] = sext <4 x i1> [[TMP1]] to <4 x i32>
+; CHECK-NEXT:    ret <4 x i32> [[TMP2]]
+;
+  %1 = tail call <4 x i32> @llvm.x86.xop.vpcomged(<4 x i32> %a, <4 x i32> %b)
+  ret <4 x i32> %1
+}
+
+define <4 x i32> @cmp_uge_v4i32(<4 x i32> %a, <4 x i32> %b) {
+; CHECK-LABEL: @cmp_uge_v4i32(
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp uge <4 x i32> %a, %b
+; CHECK-NEXT:    [[TMP2:%.*]] = sext <4 x i1> [[TMP1]] to <4 x i32>
+; CHECK-NEXT:    ret <4 x i32> [[TMP2]]
+;
+  %1 = tail call <4 x i32> @llvm.x86.xop.vpcomgeud(<4 x i32> %a, <4 x i32> %b)
+  ret <4 x i32> %1
+}
+
+define <8 x i16> @cmp_seq_v8i16(<8 x i16> %a, <8 x i16> %b) {
+; CHECK-LABEL: @cmp_seq_v8i16(
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp eq <8 x i16> %a, %b
+; CHECK-NEXT:    [[TMP2:%.*]] = sext <8 x i1> [[TMP1]] to <8 x i16>
+; CHECK-NEXT:    ret <8 x i16> [[TMP2]]
+;
+  %1 = tail call <8 x i16> @llvm.x86.xop.vpcomeqw(<8 x i16> %a, <8 x i16> %b)
+  ret <8 x i16> %1
+}
+
+define <8 x i16> @cmp_ueq_v8i16(<8 x i16> %a, <8 x i16> %b) {
+; CHECK-LABEL: @cmp_ueq_v8i16(
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp eq <8 x i16> %a, %b
+; CHECK-NEXT:    [[TMP2:%.*]] = sext <8 x i1> [[TMP1]] to <8 x i16>
+; CHECK-NEXT:    ret <8 x i16> [[TMP2]]
+;
+  %1 = tail call <8 x i16> @llvm.x86.xop.vpcomequw(<8 x i16> %a, <8 x i16> %b)
+  ret <8 x i16> %1
+}
+
+define <8 x i16> @cmp_sne_v8i16(<8 x i16> %a, <8 x i16> %b) {
+; CHECK-LABEL: @cmp_sne_v8i16(
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ne <8 x i16> %a, %b
+; CHECK-NEXT:    [[TMP2:%.*]] = sext <8 x i1> [[TMP1]] to <8 x i16>
+; CHECK-NEXT:    ret <8 x i16> [[TMP2]]
+;
+  %1 = tail call <8 x i16> @llvm.x86.xop.vpcomnew(<8 x i16> %a, <8 x i16> %b)
+  ret <8 x i16> %1
+}
+
+define <8 x i16> @cmp_une_v8i16(<8 x i16> %a, <8 x i16> %b) {
+; CHECK-LABEL: @cmp_une_v8i16(
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ne <8 x i16> %a, %b
+; CHECK-NEXT:    [[TMP2:%.*]] = sext <8 x i1> [[TMP1]] to <8 x i16>
+; CHECK-NEXT:    ret <8 x i16> [[TMP2]]
+;
+  %1 = tail call <8 x i16> @llvm.x86.xop.vpcomneuw(<8 x i16> %a, <8 x i16> %b)
+  ret <8 x i16> %1
+}
+
+define <16 x i8> @cmp_strue_v16i8(<16 x i8> %a, <16 x i8> %b) {
+; CHECK-LABEL: @cmp_strue_v16i8(
+; CHECK-NEXT:    ret <16 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
+;
+  %1 = tail call <16 x i8> @llvm.x86.xop.vpcomtrueb(<16 x i8> %a, <16 x i8> %b)
+  ret <16 x i8> %1
+}
+
+define <16 x i8> @cmp_utrue_v16i8(<16 x i8> %a, <16 x i8> %b) {
+; CHECK-LABEL: @cmp_utrue_v16i8(
+; CHECK-NEXT:    ret <16 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
+;
+  %1 = tail call <16 x i8> @llvm.x86.xop.vpcomtrueub(<16 x i8> %a, <16 x i8> %b)
+  ret <16 x i8> %1
+}
+
+define <16 x i8> @cmp_sfalse_v16i8(<16 x i8> %a, <16 x i8> %b) {
+; CHECK-LABEL: @cmp_sfalse_v16i8(
+; CHECK-NEXT:    ret <16 x i8> zeroinitializer
+;
+  %1 = tail call <16 x i8> @llvm.x86.xop.vpcomfalseb(<16 x i8> %a, <16 x i8> %b)
+  ret <16 x i8> %1
+}
+
+define <16 x i8> @cmp_ufalse_v16i8(<16 x i8> %a, <16 x i8> %b) {
+; CHECK-LABEL: @cmp_ufalse_v16i8(
+; CHECK-NEXT:    ret <16 x i8> zeroinitializer
+;
+  %1 = tail call <16 x i8> @llvm.x86.xop.vpcomfalseub(<16 x i8> %a, <16 x i8> %b)
+  ret <16 x i8> %1
+}
+
+declare <2 x double> @llvm.x86.xop.vfrcz.sd(<2 x double>) nounwind readnone
+declare <4 x float> @llvm.x86.xop.vfrcz.ss(<4 x float>) nounwind readnone
+
+declare <16 x i8> @llvm.x86.xop.vpcomltb(<16 x i8>, <16 x i8>) nounwind readnone
+declare <8 x i16> @llvm.x86.xop.vpcomltw(<8 x i16>, <8 x i16>) nounwind readnone
+declare <4 x i32> @llvm.x86.xop.vpcomltd(<4 x i32>, <4 x i32>) nounwind readnone
+declare <2 x i64> @llvm.x86.xop.vpcomltq(<2 x i64>, <2 x i64>) nounwind readnone
+declare <16 x i8> @llvm.x86.xop.vpcomltub(<16 x i8>, <16 x i8>) nounwind readnone
+declare <8 x i16> @llvm.x86.xop.vpcomltuw(<8 x i16>, <8 x i16>) nounwind readnone
+declare <4 x i32> @llvm.x86.xop.vpcomltud(<4 x i32>, <4 x i32>) nounwind readnone
+declare <2 x i64> @llvm.x86.xop.vpcomltuq(<2 x i64>, <2 x i64>) nounwind readnone
+
+declare <16 x i8> @llvm.x86.xop.vpcomleb(<16 x i8>, <16 x i8>) nounwind readnone
+declare <8 x i16> @llvm.x86.xop.vpcomlew(<8 x i16>, <8 x i16>) nounwind readnone
+declare <4 x i32> @llvm.x86.xop.vpcomled(<4 x i32>, <4 x i32>) nounwind readnone
+declare <2 x i64> @llvm.x86.xop.vpcomleq(<2 x i64>, <2 x i64>) nounwind readnone
+declare <16 x i8> @llvm.x86.xop.vpcomleub(<16 x i8>, <16 x i8>) nounwind readnone
+declare <8 x i16> @llvm.x86.xop.vpcomleuw(<8 x i16>, <8 x i16>) nounwind readnone
+declare <4 x i32> @llvm.x86.xop.vpcomleud(<4 x i32>, <4 x i32>) nounwind readnone
+declare <2 x i64> @llvm.x86.xop.vpcomleuq(<2 x i64>, <2 x i64>) nounwind readnone
+
+declare <16 x i8> @llvm.x86.xop.vpcomgtb(<16 x i8>, <16 x i8>) nounwind readnone
+declare <8 x i16> @llvm.x86.xop.vpcomgtw(<8 x i16>, <8 x i16>) nounwind readnone
+declare <4 x i32> @llvm.x86.xop.vpcomgtd(<4 x i32>, <4 x i32>) nounwind readnone
+declare <2 x i64> @llvm.x86.xop.vpcomgtq(<2 x i64>, <2 x i64>) nounwind readnone
+declare <16 x i8> @llvm.x86.xop.vpcomgtub(<16 x i8>, <16 x i8>) nounwind readnone
+declare <8 x i16> @llvm.x86.xop.vpcomgtuw(<8 x i16>, <8 x i16>) nounwind readnone
+declare <4 x i32> @llvm.x86.xop.vpcomgtud(<4 x i32>, <4 x i32>) nounwind readnone
+declare <2 x i64> @llvm.x86.xop.vpcomgtuq(<2 x i64>, <2 x i64>) nounwind readnone
+
+declare <16 x i8> @llvm.x86.xop.vpcomgeb(<16 x i8>, <16 x i8>) nounwind readnone
+declare <8 x i16> @llvm.x86.xop.vpcomgew(<8 x i16>, <8 x i16>) nounwind readnone
+declare <4 x i32> @llvm.x86.xop.vpcomged(<4 x i32>, <4 x i32>) nounwind readnone
+declare <2 x i64> @llvm.x86.xop.vpcomgeq(<2 x i64>, <2 x i64>) nounwind readnone
+declare <16 x i8> @llvm.x86.xop.vpcomgeub(<16 x i8>, <16 x i8>) nounwind readnone
+declare <8 x i16> @llvm.x86.xop.vpcomgeuw(<8 x i16>, <8 x i16>) nounwind readnone
+declare <4 x i32> @llvm.x86.xop.vpcomgeud(<4 x i32>, <4 x i32>) nounwind readnone
+declare <2 x i64> @llvm.x86.xop.vpcomgeuq(<2 x i64>, <2 x i64>) nounwind readnone
+
+declare <16 x i8> @llvm.x86.xop.vpcomeqb(<16 x i8>, <16 x i8>) nounwind readnone
+declare <8 x i16> @llvm.x86.xop.vpcomeqw(<8 x i16>, <8 x i16>) nounwind readnone
+declare <4 x i32> @llvm.x86.xop.vpcomeqd(<4 x i32>, <4 x i32>) nounwind readnone
+declare <2 x i64> @llvm.x86.xop.vpcomeqq(<2 x i64>, <2 x i64>) nounwind readnone
+declare <16 x i8> @llvm.x86.xop.vpcomequb(<16 x i8>, <16 x i8>) nounwind readnone
+declare <8 x i16> @llvm.x86.xop.vpcomequw(<8 x i16>, <8 x i16>) nounwind readnone
+declare <4 x i32> @llvm.x86.xop.vpcomequd(<4 x i32>, <4 x i32>) nounwind readnone
+declare <2 x i64> @llvm.x86.xop.vpcomequq(<2 x i64>, <2 x i64>) nounwind readnone
+
+declare <16 x i8> @llvm.x86.xop.vpcomneb(<16 x i8>, <16 x i8>) nounwind readnone
+declare <8 x i16> @llvm.x86.xop.vpcomnew(<8 x i16>, <8 x i16>) nounwind readnone
+declare <4 x i32> @llvm.x86.xop.vpcomned(<4 x i32>, <4 x i32>) nounwind readnone
+declare <2 x i64> @llvm.x86.xop.vpcomneq(<2 x i64>, <2 x i64>) nounwind readnone
+declare <16 x i8> @llvm.x86.xop.vpcomneub(<16 x i8>, <16 x i8>) nounwind readnone
+declare <8 x i16> @llvm.x86.xop.vpcomneuw(<8 x i16>, <8 x i16>) nounwind readnone
+declare <4 x i32> @llvm.x86.xop.vpcomneud(<4 x i32>, <4 x i32>) nounwind readnone
+declare <2 x i64> @llvm.x86.xop.vpcomneuq(<2 x i64>, <2 x i64>) nounwind readnone
+
+declare <16 x i8> @llvm.x86.xop.vpcomfalseb(<16 x i8>, <16 x i8>) nounwind readnone
+declare <8 x i16> @llvm.x86.xop.vpcomfalsew(<8 x i16>, <8 x i16>) nounwind readnone
+declare <4 x i32> @llvm.x86.xop.vpcomfalsed(<4 x i32>, <4 x i32>) nounwind readnone
+declare <2 x i64> @llvm.x86.xop.vpcomfalseq(<2 x i64>, <2 x i64>) nounwind readnone
+declare <16 x i8> @llvm.x86.xop.vpcomfalseub(<16 x i8>, <16 x i8>) nounwind readnone
+declare <8 x i16> @llvm.x86.xop.vpcomfalseuw(<8 x i16>, <8 x i16>) nounwind readnone
+declare <4 x i32> @llvm.x86.xop.vpcomfalseud(<4 x i32>, <4 x i32>) nounwind readnone
+declare <2 x i64> @llvm.x86.xop.vpcomfalseuq(<2 x i64>, <2 x i64>) nounwind readnone
+
+declare <16 x i8> @llvm.x86.xop.vpcomtrueb(<16 x i8>, <16 x i8>) nounwind readnone
+declare <4 x i32> @llvm.x86.xop.vpcomtrued(<4 x i32>, <4 x i32>) nounwind readnone
+declare <2 x i64> @llvm.x86.xop.vpcomtrueq(<2 x i64>, <2 x i64>) nounwind readnone
+declare <8 x i16> @llvm.x86.xop.vpcomtruew(<8 x i16>, <8 x i16>) nounwind readnone
+declare <16 x i8> @llvm.x86.xop.vpcomtrueub(<16 x i8>, <16 x i8>) nounwind readnone
+declare <8 x i16> @llvm.x86.xop.vpcomtrueuw(<8 x i16>, <8 x i16>) nounwind readnone
+declare <4 x i32> @llvm.x86.xop.vpcomtrueud(<4 x i32>, <4 x i32>) nounwind readnone
+declare <2 x i64> @llvm.x86.xop.vpcomtrueuq(<2 x i64>, <2 x i64>) nounwind readnone

Added: llvm/trunk/test/Transforms/InstCombine/abs-1.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/InstCombine/abs-1.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/InstCombine/abs-1.ll (added)
+++ llvm/trunk/test/Transforms/InstCombine/abs-1.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,571 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+
+declare i32 @abs(i32)
+declare i64 @labs(i64)
+declare i64 @llabs(i64)
+
+; Test that the abs library call simplifier works correctly.
+; abs(x) -> x <s 0 ? -x : x.
+
+define i32 @test_abs(i32 %x) {
+; CHECK-LABEL: @test_abs(
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp slt i32 [[X:%.*]], 0
+; CHECK-NEXT:    [[NEG:%.*]] = sub nsw i32 0, [[X]]
+; CHECK-NEXT:    [[TMP2:%.*]] = select i1 [[TMP1]], i32 [[NEG]], i32 [[X]]
+; CHECK-NEXT:    ret i32 [[TMP2]]
+;
+  %ret = call i32 @abs(i32 %x)
+  ret i32 %ret
+}
+
+define i64 @test_labs(i64 %x) {
+; CHECK-LABEL: @test_labs(
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp slt i64 [[X:%.*]], 0
+; CHECK-NEXT:    [[NEG:%.*]] = sub nsw i64 0, [[X]]
+; CHECK-NEXT:    [[TMP2:%.*]] = select i1 [[TMP1]], i64 [[NEG]], i64 [[X]]
+; CHECK-NEXT:    ret i64 [[TMP2]]
+;
+  %ret = call i64 @labs(i64 %x)
+  ret i64 %ret
+}
+
+define i64 @test_llabs(i64 %x) {
+; CHECK-LABEL: @test_llabs(
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp slt i64 [[X:%.*]], 0
+; CHECK-NEXT:    [[NEG:%.*]] = sub nsw i64 0, [[X]]
+; CHECK-NEXT:    [[TMP2:%.*]] = select i1 [[TMP1]], i64 [[NEG]], i64 [[X]]
+; CHECK-NEXT:    ret i64 [[TMP2]]
+;
+  %ret = call i64 @llabs(i64 %x)
+  ret i64 %ret
+}
+
+; We have a canonical form of abs to make CSE easier.
+
+define i8 @abs_canonical_1(i8 %x) {
+; CHECK-LABEL: @abs_canonical_1(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i8 [[X:%.*]], 0
+; CHECK-NEXT:    [[NEG:%.*]] = sub i8 0, [[X]]
+; CHECK-NEXT:    [[ABS:%.*]] = select i1 [[CMP]], i8 [[NEG]], i8 [[X]]
+; CHECK-NEXT:    ret i8 [[ABS]]
+;
+  %cmp = icmp sgt i8 %x, 0
+  %neg = sub i8 0, %x
+  %abs = select i1 %cmp, i8 %x, i8 %neg
+  ret i8 %abs
+}
+
+; Vectors should work too.
+
+define <2 x i8> @abs_canonical_2(<2 x i8> %x) {
+; CHECK-LABEL: @abs_canonical_2(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt <2 x i8> [[X:%.*]], zeroinitializer
+; CHECK-NEXT:    [[NEG:%.*]] = sub <2 x i8> zeroinitializer, [[X]]
+; CHECK-NEXT:    [[ABS:%.*]] = select <2 x i1> [[CMP]], <2 x i8> [[NEG]], <2 x i8> [[X]]
+; CHECK-NEXT:    ret <2 x i8> [[ABS]]
+;
+  %cmp = icmp sgt <2 x i8> %x, <i8 -1, i8 -1>
+  %neg = sub <2 x i8> zeroinitializer, %x
+  %abs = select <2 x i1> %cmp, <2 x i8> %x, <2 x i8> %neg
+  ret <2 x i8> %abs
+}
+
+; Even if a constant has undef elements.
+
+define <2 x i8> @abs_canonical_2_vec_undef_elts(<2 x i8> %x) {
+; CHECK-LABEL: @abs_canonical_2_vec_undef_elts(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt <2 x i8> [[X:%.*]], zeroinitializer
+; CHECK-NEXT:    [[NEG:%.*]] = sub <2 x i8> zeroinitializer, [[X]]
+; CHECK-NEXT:    [[ABS:%.*]] = select <2 x i1> [[CMP]], <2 x i8> [[NEG]], <2 x i8> [[X]]
+; CHECK-NEXT:    ret <2 x i8> [[ABS]]
+;
+  %cmp = icmp sgt <2 x i8> %x, <i8 undef, i8 -1>
+  %neg = sub <2 x i8> zeroinitializer, %x
+  %abs = select <2 x i1> %cmp, <2 x i8> %x, <2 x i8> %neg
+  ret <2 x i8> %abs
+}
+
+; NSW should not change.
+
+define i8 @abs_canonical_3(i8 %x) {
+; CHECK-LABEL: @abs_canonical_3(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i8 [[X:%.*]], 0
+; CHECK-NEXT:    [[NEG:%.*]] = sub nsw i8 0, [[X]]
+; CHECK-NEXT:    [[ABS:%.*]] = select i1 [[CMP]], i8 [[NEG]], i8 [[X]]
+; CHECK-NEXT:    ret i8 [[ABS]]
+;
+  %cmp = icmp slt i8 %x, 0
+  %neg = sub nsw i8 0, %x
+  %abs = select i1 %cmp, i8 %neg, i8 %x
+  ret i8 %abs
+}
+
+define i8 @abs_canonical_4(i8 %x) {
+; CHECK-LABEL: @abs_canonical_4(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i8 [[X:%.*]], 0
+; CHECK-NEXT:    [[NEG:%.*]] = sub i8 0, [[X]]
+; CHECK-NEXT:    [[ABS:%.*]] = select i1 [[CMP]], i8 [[NEG]], i8 [[X]]
+; CHECK-NEXT:    ret i8 [[ABS]]
+;
+  %cmp = icmp slt i8 %x, 1
+  %neg = sub i8 0, %x
+  %abs = select i1 %cmp, i8 %neg, i8 %x
+  ret i8 %abs
+}
+
+define i32 @abs_canonical_5(i8 %x) {
+; CHECK-LABEL: @abs_canonical_5(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i8 [[X:%.*]], 0
+; CHECK-NEXT:    [[CONV:%.*]] = sext i8 [[X]] to i32
+; CHECK-NEXT:    [[NEG:%.*]] = sub nsw i32 0, [[CONV]]
+; CHECK-NEXT:    [[ABS:%.*]] = select i1 [[CMP]], i32 [[NEG]], i32 [[CONV]]
+; CHECK-NEXT:    ret i32 [[ABS]]
+;
+  %cmp = icmp sgt i8 %x, 0
+  %conv = sext i8 %x to i32
+  %neg = sub i32 0, %conv
+  %abs = select i1 %cmp, i32 %conv, i32 %neg
+  ret i32 %abs
+}
+
+define i32 @abs_canonical_6(i32 %a, i32 %b) {
+; CHECK-LABEL: @abs_canonical_6(
+; CHECK-NEXT:    [[TMP1:%.*]] = sub i32 [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[TMP1]], 0
+; CHECK-NEXT:    [[TMP2:%.*]] = sub i32 0, [[TMP1]]
+; CHECK-NEXT:    [[ABS:%.*]] = select i1 [[CMP]], i32 [[TMP2]], i32 [[TMP1]]
+; CHECK-NEXT:    ret i32 [[ABS]]
+;
+  %tmp1 = sub i32 %a, %b
+  %cmp = icmp sgt i32 %tmp1, -1
+  %tmp2 = sub i32 %b, %a
+  %abs = select i1 %cmp, i32 %tmp1, i32 %tmp2
+  ret i32 %abs
+}
+
+define <2 x i8> @abs_canonical_7(<2 x i8> %a, <2 x i8 > %b) {
+; CHECK-LABEL: @abs_canonical_7(
+; CHECK-NEXT:    [[TMP1:%.*]] = sub <2 x i8> [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt <2 x i8> [[TMP1]], zeroinitializer
+; CHECK-NEXT:    [[TMP2:%.*]] = sub <2 x i8> zeroinitializer, [[TMP1]]
+; CHECK-NEXT:    [[ABS:%.*]] = select <2 x i1> [[CMP]], <2 x i8> [[TMP2]], <2 x i8> [[TMP1]]
+; CHECK-NEXT:    ret <2 x i8> [[ABS]]
+;
+
+  %tmp1 = sub <2 x i8> %a, %b
+  %cmp = icmp sgt <2 x i8> %tmp1, <i8 -1, i8 -1>
+  %tmp2 = sub <2 x i8> %b, %a
+  %abs = select <2 x i1> %cmp, <2 x i8> %tmp1, <2 x i8> %tmp2
+  ret <2 x i8> %abs
+}
+
+define i32 @abs_canonical_8(i32 %a) {
+; CHECK-LABEL: @abs_canonical_8(
+; CHECK-NEXT:    [[TMP:%.*]] = sub i32 0, [[A:%.*]]
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[A]], 0
+; CHECK-NEXT:    [[ABS:%.*]] = select i1 [[CMP]], i32 [[TMP]], i32 [[A]]
+; CHECK-NEXT:    ret i32 [[ABS]]
+;
+  %tmp = sub i32 0, %a
+  %cmp = icmp slt i32 %tmp, 0
+  %abs = select i1 %cmp, i32 %a, i32 %tmp
+  ret i32 %abs
+}
+
+define i32 @abs_canonical_9(i32 %a, i32 %b) {
+; CHECK-LABEL: @abs_canonical_9(
+; CHECK-NEXT:    [[TMP1:%.*]] = sub i32 [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    [[CMP:%.*]] = icmp sgt i32 [[TMP1]], -1
+; CHECK-NEXT:    [[TMP2:%.*]] = sub i32 [[B]], [[A]]
+; CHECK-NEXT:    [[ABS:%.*]] = select i1 [[CMP]], i32 [[TMP1]], i32 [[TMP2]]
+; CHECK-NEXT:    [[ADD:%.*]] = add i32 [[ABS]], [[TMP2]]
+; CHECK-NEXT:    ret i32 [[ADD]]
+;
+  %tmp1 = sub i32 %a, %b
+  %cmp = icmp sgt i32 %tmp1, -1
+  %tmp2 = sub i32 %b, %a
+  %abs = select i1 %cmp, i32 %tmp1, i32 %tmp2
+  %add = add i32 %abs, %tmp2 ; increase use count for %tmp2.
+  ret i32 %add
+}
+
+define i32 @abs_canonical_10(i32 %a, i32 %b) {
+; CHECK-LABEL: @abs_canonical_10(
+; CHECK-NEXT:    [[TMP1:%.*]] = sub i32 [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[TMP1]], 0
+; CHECK-NEXT:    [[NEGTMP:%.*]] = sub i32 0, [[TMP1]]
+; CHECK-NEXT:    [[ABS:%.*]] = select i1 [[CMP]], i32 [[NEGTMP]], i32 [[TMP1]]
+; CHECK-NEXT:    ret i32 [[ABS]]
+;
+  %tmp2 = sub i32 %b, %a
+  %tmp1 = sub i32 %a, %b
+  %cmp = icmp sgt i32 %tmp1, -1
+  %abs = select i1 %cmp, i32 %tmp1, i32 %tmp2
+  ret i32 %abs
+}
+
+; We have a canonical form of nabs to make CSE easier.
+
+define i8 @nabs_canonical_1(i8 %x) {
+; CHECK-LABEL: @nabs_canonical_1(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i8 [[X:%.*]], 0
+; CHECK-NEXT:    [[NEG:%.*]] = sub i8 0, [[X]]
+; CHECK-NEXT:    [[ABS:%.*]] = select i1 [[CMP]], i8 [[X]], i8 [[NEG]]
+; CHECK-NEXT:    ret i8 [[ABS]]
+;
+  %cmp = icmp sgt i8 %x, 0
+  %neg = sub i8 0, %x
+  %abs = select i1 %cmp, i8 %neg, i8 %x
+  ret i8 %abs
+}
+
+; Vectors should work too.
+
+define <2 x i8> @nabs_canonical_2(<2 x i8> %x) {
+; CHECK-LABEL: @nabs_canonical_2(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt <2 x i8> [[X:%.*]], zeroinitializer
+; CHECK-NEXT:    [[NEG:%.*]] = sub <2 x i8> zeroinitializer, [[X]]
+; CHECK-NEXT:    [[ABS:%.*]] = select <2 x i1> [[CMP]], <2 x i8> [[X]], <2 x i8> [[NEG]]
+; CHECK-NEXT:    ret <2 x i8> [[ABS]]
+;
+  %cmp = icmp sgt <2 x i8> %x, <i8 -1, i8 -1>
+  %neg = sub <2 x i8> zeroinitializer, %x
+  %abs = select <2 x i1> %cmp, <2 x i8> %neg, <2 x i8> %x
+  ret <2 x i8> %abs
+}
+
+; Even if a constant has undef elements.
+
+define <2 x i8> @nabs_canonical_2_vec_undef_elts(<2 x i8> %x) {
+; CHECK-LABEL: @nabs_canonical_2_vec_undef_elts(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt <2 x i8> [[X:%.*]], zeroinitializer
+; CHECK-NEXT:    [[NEG:%.*]] = sub <2 x i8> zeroinitializer, [[X]]
+; CHECK-NEXT:    [[ABS:%.*]] = select <2 x i1> [[CMP]], <2 x i8> [[X]], <2 x i8> [[NEG]]
+; CHECK-NEXT:    ret <2 x i8> [[ABS]]
+;
+  %cmp = icmp sgt <2 x i8> %x, <i8 -1, i8 undef>
+  %neg = sub <2 x i8> zeroinitializer, %x
+  %abs = select <2 x i1> %cmp, <2 x i8> %neg, <2 x i8> %x
+  ret <2 x i8> %abs
+}
+
+; NSW should not change.
+
+define i8 @nabs_canonical_3(i8 %x) {
+; CHECK-LABEL: @nabs_canonical_3(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i8 [[X:%.*]], 0
+; CHECK-NEXT:    [[NEG:%.*]] = sub nsw i8 0, [[X]]
+; CHECK-NEXT:    [[ABS:%.*]] = select i1 [[CMP]], i8 [[X]], i8 [[NEG]]
+; CHECK-NEXT:    ret i8 [[ABS]]
+;
+  %cmp = icmp slt i8 %x, 0
+  %neg = sub nsw i8 0, %x
+  %abs = select i1 %cmp, i8 %x, i8 %neg
+  ret i8 %abs
+}
+
+define i8 @nabs_canonical_4(i8 %x) {
+; CHECK-LABEL: @nabs_canonical_4(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i8 [[X:%.*]], 0
+; CHECK-NEXT:    [[NEG:%.*]] = sub i8 0, [[X]]
+; CHECK-NEXT:    [[ABS:%.*]] = select i1 [[CMP]], i8 [[X]], i8 [[NEG]]
+; CHECK-NEXT:    ret i8 [[ABS]]
+;
+  %cmp = icmp slt i8 %x, 1
+  %neg = sub i8 0, %x
+  %abs = select i1 %cmp, i8 %x, i8 %neg
+  ret i8 %abs
+}
+
+define i32 @nabs_canonical_5(i8 %x) {
+; CHECK-LABEL: @nabs_canonical_5(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i8 [[X:%.*]], 0
+; CHECK-NEXT:    [[CONV:%.*]] = sext i8 [[X]] to i32
+; CHECK-NEXT:    [[NEG:%.*]] = sub nsw i32 0, [[CONV]]
+; CHECK-NEXT:    [[ABS:%.*]] = select i1 [[CMP]], i32 [[CONV]], i32 [[NEG]]
+; CHECK-NEXT:    ret i32 [[ABS]]
+;
+  %cmp = icmp sgt i8 %x, 0
+  %conv = sext i8 %x to i32
+  %neg = sub i32 0, %conv
+  %abs = select i1 %cmp, i32 %neg, i32 %conv
+  ret i32 %abs
+}
+
+define i32 @nabs_canonical_6(i32 %a, i32 %b) {
+; CHECK-LABEL: @nabs_canonical_6(
+; CHECK-NEXT:    [[TMP1:%.*]] = sub i32 [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[TMP1]], 0
+; CHECK-NEXT:    [[TMP2:%.*]] = sub i32 0, [[TMP1]]
+; CHECK-NEXT:    [[ABS:%.*]] = select i1 [[CMP]], i32 [[TMP1]], i32 [[TMP2]]
+; CHECK-NEXT:    ret i32 [[ABS]]
+;
+  %tmp1 = sub i32 %a, %b
+  %cmp = icmp sgt i32 %tmp1, -1
+  %tmp2 = sub i32 %b, %a
+  %abs = select i1 %cmp, i32 %tmp2, i32 %tmp1
+  ret i32 %abs
+}
+
+define <2 x i8> @nabs_canonical_7(<2 x i8> %a, <2 x i8 > %b) {
+; CHECK-LABEL: @nabs_canonical_7(
+; CHECK-NEXT:    [[TMP1:%.*]] = sub <2 x i8> [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt <2 x i8> [[TMP1]], zeroinitializer
+; CHECK-NEXT:    [[TMP2:%.*]] = sub <2 x i8> zeroinitializer, [[TMP1]]
+; CHECK-NEXT:    [[ABS:%.*]] = select <2 x i1> [[CMP]], <2 x i8> [[TMP1]], <2 x i8> [[TMP2]]
+; CHECK-NEXT:    ret <2 x i8> [[ABS]]
+;
+  %tmp1 = sub <2 x i8> %a, %b
+  %cmp = icmp sgt <2 x i8> %tmp1, <i8 -1, i8 -1>
+  %tmp2 = sub <2 x i8> %b, %a
+  %abs = select <2 x i1> %cmp, <2 x i8> %tmp2, <2 x i8> %tmp1
+  ret <2 x i8> %abs
+}
+
+define i32 @nabs_canonical_8(i32 %a) {
+; CHECK-LABEL: @nabs_canonical_8(
+; CHECK-NEXT:    [[TMP:%.*]] = sub i32 0, [[A:%.*]]
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[A]], 0
+; CHECK-NEXT:    [[ABS:%.*]] = select i1 [[CMP]], i32 [[A]], i32 [[TMP]]
+; CHECK-NEXT:    ret i32 [[ABS]]
+;
+  %tmp = sub i32 0, %a
+  %cmp = icmp slt i32 %tmp, 0
+  %abs = select i1 %cmp, i32 %tmp, i32 %a
+  ret i32 %abs
+}
+
+define i32 @nabs_canonical_9(i32 %a, i32 %b) {
+; CHECK-LABEL: @nabs_canonical_9(
+; CHECK-NEXT:    [[TMP1:%.*]] = sub i32 [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    [[CMP:%.*]] = icmp sgt i32 [[TMP1]], -1
+; CHECK-NEXT:    [[TMP2:%.*]] = sub i32 [[B]], [[A]]
+; CHECK-NEXT:    [[ABS:%.*]] = select i1 [[CMP]], i32 [[TMP2]], i32 [[TMP1]]
+; CHECK-NEXT:    [[ADD:%.*]] = add i32 [[TMP2]], [[ABS]]
+; CHECK-NEXT:    ret i32 [[ADD]]
+;
+  %tmp1 = sub i32 %a, %b
+  %cmp = icmp sgt i32 %tmp1, -1
+  %tmp2 = sub i32 %b, %a
+  %abs = select i1 %cmp, i32 %tmp2, i32 %tmp1
+  %add = add i32 %tmp2, %abs ; increase use count for %tmp2
+  ret i32 %add
+}
+
+define i32 @nabs_canonical_10(i32 %a, i32 %b) {
+; CHECK-LABEL: @nabs_canonical_10(
+; CHECK-NEXT:    [[TMP1:%.*]] = sub i32 [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[TMP1]], 0
+; CHECK-NEXT:    [[NEGTMP:%.*]] = sub i32 0, [[TMP1]]
+; CHECK-NEXT:    [[ABS:%.*]] = select i1 [[CMP]], i32 [[TMP1]], i32 [[NEGTMP]]
+; CHECK-NEXT:    ret i32 [[ABS]]
+;
+  %tmp2 = sub i32 %b, %a
+  %tmp1 = sub i32 %a, %b
+  %cmp = icmp slt i32 %tmp1, 1
+  %abs = select i1 %cmp, i32 %tmp1, i32 %tmp2
+  ret i32 %abs
+}
+
+; The following 5 tests use a shift+add+xor to implement abs():
+; B = ashr i8 A, 7  -- smear the sign bit.
+; xor (add A, B), B -- add -1 and flip bits if negative
+
+define i8 @shifty_abs_commute0(i8 %x) {
+; CHECK-LABEL: @shifty_abs_commute0(
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp slt i8 [[X:%.*]], 0
+; CHECK-NEXT:    [[TMP2:%.*]] = sub i8 0, [[X]]
+; CHECK-NEXT:    [[ABS:%.*]] = select i1 [[TMP1]], i8 [[TMP2]], i8 [[X]]
+; CHECK-NEXT:    ret i8 [[ABS]]
+;
+  %signbit = ashr i8 %x, 7
+  %add = add i8 %signbit, %x
+  %abs = xor i8 %add, %signbit
+  ret i8 %abs
+}
+
+define i8 @shifty_abs_commute0_nsw(i8 %x) {
+; CHECK-LABEL: @shifty_abs_commute0_nsw(
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp slt i8 [[X:%.*]], 0
+; CHECK-NEXT:    [[TMP2:%.*]] = sub nsw i8 0, [[X]]
+; CHECK-NEXT:    [[ABS:%.*]] = select i1 [[TMP1]], i8 [[TMP2]], i8 [[X]]
+; CHECK-NEXT:    ret i8 [[ABS]]
+;
+  %signbit = ashr i8 %x, 7
+  %add = add nsw i8 %signbit, %x
+  %abs = xor i8 %add, %signbit
+  ret i8 %abs
+}
+
+; The nuw flag creates a contradiction. If the shift produces all 1s, the only
+; way for the add to not wrap is for %x to be 0, but then the shift couldn't
+; have produced all 1s. We partially optimize this.
+define i8 @shifty_abs_commute0_nuw(i8 %x) {
+; CHECK-LABEL: @shifty_abs_commute0_nuw(
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp sgt i8 [[X:%.*]], 0
+; CHECK-NEXT:    [[ABS:%.*]] = select i1 [[TMP1]], i8 [[X]], i8 0
+; CHECK-NEXT:    ret i8 [[ABS]]
+;
+  %signbit = ashr i8 %x, 7
+  %add = add nuw i8 %signbit, %x
+  %abs = xor i8 %add, %signbit
+  ret i8 %abs
+}
+
+define <2 x i8> @shifty_abs_commute1(<2 x i8> %x) {
+; CHECK-LABEL: @shifty_abs_commute1(
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp slt <2 x i8> [[X:%.*]], zeroinitializer
+; CHECK-NEXT:    [[TMP2:%.*]] = sub <2 x i8> zeroinitializer, [[X]]
+; CHECK-NEXT:    [[ABS:%.*]] = select <2 x i1> [[TMP1]], <2 x i8> [[TMP2]], <2 x i8> [[X]]
+; CHECK-NEXT:    ret <2 x i8> [[ABS]]
+;
+  %signbit = ashr <2 x i8> %x, <i8 7, i8 7>
+  %add = add <2 x i8> %signbit, %x
+  %abs = xor <2 x i8> %signbit, %add
+  ret <2 x i8> %abs
+}
+
+define <2 x i8> @shifty_abs_commute2(<2 x i8> %x) {
+; CHECK-LABEL: @shifty_abs_commute2(
+; CHECK-NEXT:    [[Y:%.*]] = mul <2 x i8> [[X:%.*]], <i8 3, i8 3>
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp slt <2 x i8> [[Y]], zeroinitializer
+; CHECK-NEXT:    [[TMP2:%.*]] = sub <2 x i8> zeroinitializer, [[Y]]
+; CHECK-NEXT:    [[ABS:%.*]] = select <2 x i1> [[TMP1]], <2 x i8> [[TMP2]], <2 x i8> [[Y]]
+; CHECK-NEXT:    ret <2 x i8> [[ABS]]
+;
+  %y = mul <2 x i8> %x, <i8 3, i8 3>   ; extra op to thwart complexity-based canonicalization
+  %signbit = ashr <2 x i8> %y, <i8 7, i8 7>
+  %add = add <2 x i8> %y, %signbit
+  %abs = xor <2 x i8> %signbit, %add
+  ret <2 x i8> %abs
+}
+
+define i8 @shifty_abs_commute3(i8 %x) {
+; CHECK-LABEL: @shifty_abs_commute3(
+; CHECK-NEXT:    [[Y:%.*]] = mul i8 [[X:%.*]], 3
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp slt i8 [[Y]], 0
+; CHECK-NEXT:    [[TMP2:%.*]] = sub i8 0, [[Y]]
+; CHECK-NEXT:    [[ABS:%.*]] = select i1 [[TMP1]], i8 [[TMP2]], i8 [[Y]]
+; CHECK-NEXT:    ret i8 [[ABS]]
+;
+  %y = mul i8 %x, 3                    ; extra op to thwart complexity-based canonicalization
+  %signbit = ashr i8 %y, 7
+  %add = add i8 %y, %signbit
+  %abs = xor i8 %add, %signbit
+  ret i8 %abs
+}
+
+; Negative test - don't transform if it would increase instruction count.
+
+declare void @extra_use(i8)
+
+define i8 @shifty_abs_too_many_uses(i8 %x) {
+; CHECK-LABEL: @shifty_abs_too_many_uses(
+; CHECK-NEXT:    [[SIGNBIT:%.*]] = ashr i8 [[X:%.*]], 7
+; CHECK-NEXT:    [[ADD:%.*]] = add i8 [[SIGNBIT]], [[X]]
+; CHECK-NEXT:    [[ABS:%.*]] = xor i8 [[ADD]], [[SIGNBIT]]
+; CHECK-NEXT:    call void @extra_use(i8 [[SIGNBIT]])
+; CHECK-NEXT:    ret i8 [[ABS]]
+;
+  %signbit = ashr i8 %x, 7
+  %add = add i8 %x, %signbit
+  %abs = xor i8 %add, %signbit
+  call void @extra_use(i8 %signbit)
+  ret i8 %abs
+}
+
+; There's another way to make abs() using shift, xor, and subtract.
+; PR36036 - https://bugs.llvm.org/show_bug.cgi?id=36036
+
+define i8 @shifty_sub(i8 %x) {
+; CHECK-LABEL: @shifty_sub(
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp slt i8 [[X:%.*]], 0
+; CHECK-NEXT:    [[TMP2:%.*]] = sub i8 0, [[X]]
+; CHECK-NEXT:    [[R:%.*]] = select i1 [[TMP1]], i8 [[TMP2]], i8 [[X]]
+; CHECK-NEXT:    ret i8 [[R]]
+;
+  %sh = ashr i8 %x, 7
+  %xor = xor i8 %x, %sh
+  %r = sub i8 %xor, %sh
+  ret i8 %r
+}
+
+define i8 @shifty_sub_nsw_commute(i8 %x) {
+; CHECK-LABEL: @shifty_sub_nsw_commute(
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp slt i8 [[X:%.*]], 0
+; CHECK-NEXT:    [[TMP2:%.*]] = sub nsw i8 0, [[X]]
+; CHECK-NEXT:    [[R:%.*]] = select i1 [[TMP1]], i8 [[TMP2]], i8 [[X]]
+; CHECK-NEXT:    ret i8 [[R]]
+;
+  %sh = ashr i8 %x, 7
+  %xor = xor i8 %sh, %x
+  %r = sub nsw i8 %xor, %sh
+  ret i8 %r
+}
+
+define <4 x i32> @shifty_sub_nuw_vec_commute(<4 x i32> %x) {
+; CHECK-LABEL: @shifty_sub_nuw_vec_commute(
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp sgt <4 x i32> [[X:%.*]], zeroinitializer
+; CHECK-NEXT:    [[R:%.*]] = select <4 x i1> [[TMP1]], <4 x i32> [[X]], <4 x i32> zeroinitializer
+; CHECK-NEXT:    ret <4 x i32> [[R]]
+;
+  %sh = ashr <4 x i32> %x, <i32 31, i32 31, i32 31, i32 31>
+  %xor = xor <4 x i32> %sh, %x
+  %r = sub nuw <4 x i32> %xor, %sh
+  ret <4 x i32> %r
+}
+
+define i12 @shifty_sub_nsw_nuw(i12 %x) {
+; CHECK-LABEL: @shifty_sub_nsw_nuw(
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp sgt i12 [[X:%.*]], 0
+; CHECK-NEXT:    [[R:%.*]] = select i1 [[TMP1]], i12 [[X]], i12 0
+; CHECK-NEXT:    ret i12 [[R]]
+;
+  %sh = ashr i12 %x, 11
+  %xor = xor i12 %x, %sh
+  %r = sub nsw nuw i12 %xor, %sh
+  ret i12 %r
+}
+
+define i8 @negate_abs(i8 %x) {
+; CHECK-LABEL: @negate_abs(
+; CHECK-NEXT:    [[N:%.*]] = sub i8 0, [[X:%.*]]
+; CHECK-NEXT:    [[C:%.*]] = icmp slt i8 [[X]], 0
+; CHECK-NEXT:    [[S:%.*]] = select i1 [[C]], i8 [[X]], i8 [[N]]
+; CHECK-NEXT:    ret i8 [[S]]
+;
+  %n = sub i8 0, %x
+  %c = icmp slt i8 %x, 0
+  %s = select i1 %c, i8 %n, i8 %x
+  %r = sub i8 0, %s
+  ret i8 %r
+}
+
+define <2 x i8> @negate_nabs(<2 x i8> %x) {
+; CHECK-LABEL: @negate_nabs(
+; CHECK-NEXT:    [[N:%.*]] = sub <2 x i8> zeroinitializer, [[X:%.*]]
+; CHECK-NEXT:    [[C:%.*]] = icmp slt <2 x i8> [[X]], zeroinitializer
+; CHECK-NEXT:    [[S:%.*]] = select <2 x i1> [[C]], <2 x i8> [[N]], <2 x i8> [[X]]
+; CHECK-NEXT:    ret <2 x i8> [[S]]
+;
+  %n = sub <2 x i8> zeroinitializer, %x
+  %c = icmp slt <2 x i8> %x, zeroinitializer
+  %s = select <2 x i1> %c, <2 x i8> %x, <2 x i8> %n
+  %r = sub <2 x i8> zeroinitializer, %s
+  ret <2 x i8> %r
+}
+
+define i1 @abs_must_be_positive(i32 %x) {
+; CHECK-LABEL: @abs_must_be_positive(
+; CHECK-NEXT:    ret i1 true
+;
+  %negx = sub nsw i32 0, %x
+  %c = icmp sge i32 %x, 0
+  %sel = select i1 %c, i32 %x, i32 %negx
+  %c2 = icmp sge i32 %sel, 0
+  ret i1 %c2
+}
+

Added: llvm/trunk/test/Transforms/InstCombine/abs_abs.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/InstCombine/abs_abs.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/InstCombine/abs_abs.ll (added)
+++ llvm/trunk/test/Transforms/InstCombine/abs_abs.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,1346 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+define i32 @abs_abs_x01(i32 %x) {
+; CHECK-LABEL: @abs_abs_x01(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[X:%.*]], 0
+; CHECK-NEXT:    [[SUB:%.*]] = sub nsw i32 0, [[X]]
+; CHECK-NEXT:    [[COND:%.*]] = select i1 [[CMP]], i32 [[SUB]], i32 [[X]]
+; CHECK-NEXT:    ret i32 [[COND]]
+;
+  %cmp = icmp sgt i32 %x, -1
+  %sub = sub nsw i32 0, %x
+  %cond = select i1 %cmp, i32 %x, i32 %sub
+  %cmp1 = icmp sgt i32 %cond, -1
+  %sub16 = sub nsw i32 0, %cond
+  %cond18 = select i1 %cmp1, i32 %cond, i32 %sub16
+  ret i32 %cond18
+}
+
+define <2 x i32> @abs_abs_x01_vec(<2 x i32> %x) {
+; CHECK-LABEL: @abs_abs_x01_vec(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt <2 x i32> [[X:%.*]], zeroinitializer
+; CHECK-NEXT:    [[SUB:%.*]] = sub nsw <2 x i32> zeroinitializer, [[X]]
+; CHECK-NEXT:    [[COND:%.*]] = select <2 x i1> [[CMP]], <2 x i32> [[SUB]], <2 x i32> [[X]]
+; CHECK-NEXT:    ret <2 x i32> [[COND]]
+;
+  %cmp = icmp sgt <2 x i32> %x, <i32 -1, i32 -1>
+  %sub = sub nsw <2 x i32> zeroinitializer, %x
+  %cond = select <2 x i1> %cmp, <2 x i32> %x, <2 x i32> %sub
+  %cmp1 = icmp sgt <2 x i32> %cond, <i32 -1, i32 -1>
+  %sub16 = sub nsw <2 x i32> zeroinitializer, %cond
+  %cond18 = select <2 x i1> %cmp1, <2 x i32> %cond, <2 x i32> %sub16
+  ret <2 x i32> %cond18
+}
+
+define i32 @abs_abs_x02(i32 %x) {
+; CHECK-LABEL: @abs_abs_x02(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[X:%.*]], 0
+; CHECK-NEXT:    [[SUB:%.*]] = sub nsw i32 0, [[X]]
+; CHECK-NEXT:    [[COND:%.*]] = select i1 [[CMP]], i32 [[SUB]], i32 [[X]]
+; CHECK-NEXT:    ret i32 [[COND]]
+;
+  %cmp = icmp sgt i32 %x, 0
+  %sub = sub nsw i32 0, %x
+  %cond = select i1 %cmp, i32 %x, i32 %sub
+  %cmp1 = icmp sgt i32 %cond, -1
+  %sub16 = sub nsw i32 0, %cond
+  %cond18 = select i1 %cmp1, i32 %cond, i32 %sub16
+  ret i32 %cond18
+}
+
+define i32 @abs_abs_x03(i32 %x) {
+; CHECK-LABEL: @abs_abs_x03(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[X:%.*]], 0
+; CHECK-NEXT:    [[SUB:%.*]] = sub nsw i32 0, [[X]]
+; CHECK-NEXT:    [[COND:%.*]] = select i1 [[CMP]], i32 [[SUB]], i32 [[X]]
+; CHECK-NEXT:    ret i32 [[COND]]
+;
+  %cmp = icmp slt i32 %x, 0
+  %sub = sub nsw i32 0, %x
+  %cond = select i1 %cmp, i32 %sub, i32 %x
+  %cmp1 = icmp sgt i32 %cond, -1
+  %sub16 = sub nsw i32 0, %cond
+  %cond18 = select i1 %cmp1, i32 %cond, i32 %sub16
+  ret i32 %cond18
+}
+
+define i32 @abs_abs_x04(i32 %x) {
+; CHECK-LABEL: @abs_abs_x04(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[X:%.*]], 0
+; CHECK-NEXT:    [[SUB:%.*]] = sub nsw i32 0, [[X]]
+; CHECK-NEXT:    [[COND:%.*]] = select i1 [[CMP]], i32 [[SUB]], i32 [[X]]
+; CHECK-NEXT:    ret i32 [[COND]]
+;
+  %cmp = icmp slt i32 %x, 1
+  %sub = sub nsw i32 0, %x
+  %cond = select i1 %cmp, i32 %sub, i32 %x
+  %cmp1 = icmp sgt i32 %cond, -1
+  %sub16 = sub nsw i32 0, %cond
+  %cond18 = select i1 %cmp1, i32 %cond, i32 %sub16
+  ret i32 %cond18
+}
+
+define <2 x i32> @abs_abs_x04_vec(<2 x i32> %x) {
+; CHECK-LABEL: @abs_abs_x04_vec(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt <2 x i32> [[X:%.*]], zeroinitializer
+; CHECK-NEXT:    [[SUB:%.*]] = sub nsw <2 x i32> zeroinitializer, [[X]]
+; CHECK-NEXT:    [[COND:%.*]] = select <2 x i1> [[CMP]], <2 x i32> [[SUB]], <2 x i32> [[X]]
+; CHECK-NEXT:    ret <2 x i32> [[COND]]
+;
+  %cmp = icmp slt <2 x i32> %x, <i32 1, i32 1>
+  %sub = sub nsw <2 x i32> zeroinitializer, %x
+  %cond = select <2 x i1> %cmp, <2 x i32> %sub, <2 x i32> %x
+  %cmp1 = icmp sgt <2 x i32> %cond, <i32 -1, i32 -1>
+  %sub16 = sub nsw <2 x i32> zeroinitializer, %cond
+  %cond18 = select <2 x i1> %cmp1, <2 x i32> %cond, <2 x i32> %sub16
+  ret <2 x i32> %cond18
+}
+
+define i32 @abs_abs_x05(i32 %x) {
+; CHECK-LABEL: @abs_abs_x05(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[X:%.*]], 0
+; CHECK-NEXT:    [[SUB:%.*]] = sub nsw i32 0, [[X]]
+; CHECK-NEXT:    [[COND:%.*]] = select i1 [[CMP]], i32 [[SUB]], i32 [[X]]
+; CHECK-NEXT:    ret i32 [[COND]]
+;
+  %cmp = icmp sgt i32 %x, -1
+  %sub = sub nsw i32 0, %x
+  %cond = select i1 %cmp, i32 %x, i32 %sub
+  %cmp1 = icmp sgt i32 %cond, 0
+  %sub16 = sub nsw i32 0, %cond
+  %cond18 = select i1 %cmp1, i32 %cond, i32 %sub16
+  ret i32 %cond18
+}
+
+define i32 @abs_abs_x06(i32 %x) {
+; CHECK-LABEL: @abs_abs_x06(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[X:%.*]], 0
+; CHECK-NEXT:    [[SUB:%.*]] = sub nsw i32 0, [[X]]
+; CHECK-NEXT:    [[COND:%.*]] = select i1 [[CMP]], i32 [[SUB]], i32 [[X]]
+; CHECK-NEXT:    ret i32 [[COND]]
+;
+  %cmp = icmp sgt i32 %x, 0
+  %sub = sub nsw i32 0, %x
+  %cond = select i1 %cmp, i32 %x, i32 %sub
+  %cmp1 = icmp sgt i32 %cond, 0
+  %sub16 = sub nsw i32 0, %cond
+  %cond18 = select i1 %cmp1, i32 %cond, i32 %sub16
+  ret i32 %cond18
+}
+
+define i32 @abs_abs_x07(i32 %x) {
+; CHECK-LABEL: @abs_abs_x07(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[X:%.*]], 0
+; CHECK-NEXT:    [[SUB:%.*]] = sub nsw i32 0, [[X]]
+; CHECK-NEXT:    [[COND:%.*]] = select i1 [[CMP]], i32 [[SUB]], i32 [[X]]
+; CHECK-NEXT:    ret i32 [[COND]]
+;
+  %cmp = icmp slt i32 %x, 0
+  %sub = sub nsw i32 0, %x
+  %cond = select i1 %cmp, i32 %sub, i32 %x
+  %cmp1 = icmp sgt i32 %cond, 0
+  %sub16 = sub nsw i32 0, %cond
+  %cond18 = select i1 %cmp1, i32 %cond, i32 %sub16
+  ret i32 %cond18
+}
+
+define i32 @abs_abs_x08(i32 %x) {
+; CHECK-LABEL: @abs_abs_x08(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[X:%.*]], 0
+; CHECK-NEXT:    [[SUB:%.*]] = sub nsw i32 0, [[X]]
+; CHECK-NEXT:    [[COND:%.*]] = select i1 [[CMP]], i32 [[SUB]], i32 [[X]]
+; CHECK-NEXT:    ret i32 [[COND]]
+;
+  %cmp = icmp slt i32 %x, 1
+  %sub = sub nsw i32 0, %x
+  %cond = select i1 %cmp, i32 %sub, i32 %x
+  %cmp1 = icmp sgt i32 %cond, 0
+  %sub16 = sub nsw i32 0, %cond
+  %cond18 = select i1 %cmp1, i32 %cond, i32 %sub16
+  ret i32 %cond18
+}
+
+define i32 @abs_abs_x09(i32 %x) {
+; CHECK-LABEL: @abs_abs_x09(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[X:%.*]], 0
+; CHECK-NEXT:    [[SUB:%.*]] = sub nsw i32 0, [[X]]
+; CHECK-NEXT:    [[COND:%.*]] = select i1 [[CMP]], i32 [[SUB]], i32 [[X]]
+; CHECK-NEXT:    ret i32 [[COND]]
+;
+  %cmp = icmp sgt i32 %x, -1
+  %sub = sub nsw i32 0, %x
+  %cond = select i1 %cmp, i32 %x, i32 %sub
+  %cmp1 = icmp slt i32 %cond, 0
+  %sub9 = sub nsw i32 0, %cond
+  %cond18 = select i1 %cmp1, i32 %sub9, i32 %cond
+  ret i32 %cond18
+}
+
+define i32 @abs_abs_x10(i32 %x) {
+; CHECK-LABEL: @abs_abs_x10(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[X:%.*]], 0
+; CHECK-NEXT:    [[SUB:%.*]] = sub nsw i32 0, [[X]]
+; CHECK-NEXT:    [[COND:%.*]] = select i1 [[CMP]], i32 [[SUB]], i32 [[X]]
+; CHECK-NEXT:    ret i32 [[COND]]
+;
+  %cmp = icmp sgt i32 %x, 0
+  %sub = sub nsw i32 0, %x
+  %cond = select i1 %cmp, i32 %x, i32 %sub
+  %cmp1 = icmp slt i32 %cond, 0
+  %sub9 = sub nsw i32 0, %cond
+  %cond18 = select i1 %cmp1, i32 %sub9, i32 %cond
+  ret i32 %cond18
+}
+
+define i32 @abs_abs_x11(i32 %x) {
+; CHECK-LABEL: @abs_abs_x11(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[X:%.*]], 0
+; CHECK-NEXT:    [[SUB:%.*]] = sub nsw i32 0, [[X]]
+; CHECK-NEXT:    [[COND:%.*]] = select i1 [[CMP]], i32 [[SUB]], i32 [[X]]
+; CHECK-NEXT:    ret i32 [[COND]]
+;
+  %cmp = icmp slt i32 %x, 0
+  %sub = sub nsw i32 0, %x
+  %cond = select i1 %cmp, i32 %sub, i32 %x
+  %cmp1 = icmp slt i32 %cond, 0
+  %sub9 = sub nsw i32 0, %cond
+  %cond18 = select i1 %cmp1, i32 %sub9, i32 %cond
+  ret i32 %cond18
+}
+
+define i32 @abs_abs_x12(i32 %x) {
+; CHECK-LABEL: @abs_abs_x12(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[X:%.*]], 0
+; CHECK-NEXT:    [[SUB:%.*]] = sub nsw i32 0, [[X]]
+; CHECK-NEXT:    [[COND:%.*]] = select i1 [[CMP]], i32 [[SUB]], i32 [[X]]
+; CHECK-NEXT:    ret i32 [[COND]]
+;
+  %cmp = icmp slt i32 %x, 1
+  %sub = sub nsw i32 0, %x
+  %cond = select i1 %cmp, i32 %sub, i32 %x
+  %cmp1 = icmp slt i32 %cond, 0
+  %sub9 = sub nsw i32 0, %cond
+  %cond18 = select i1 %cmp1, i32 %sub9, i32 %cond
+  ret i32 %cond18
+}
+
+define i32 @abs_abs_x13(i32 %x) {
+; CHECK-LABEL: @abs_abs_x13(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[X:%.*]], 0
+; CHECK-NEXT:    [[SUB:%.*]] = sub nsw i32 0, [[X]]
+; CHECK-NEXT:    [[COND:%.*]] = select i1 [[CMP]], i32 [[SUB]], i32 [[X]]
+; CHECK-NEXT:    ret i32 [[COND]]
+;
+  %cmp = icmp sgt i32 %x, -1
+  %sub = sub nsw i32 0, %x
+  %cond = select i1 %cmp, i32 %x, i32 %sub
+  %cmp1 = icmp slt i32 %cond, 1
+  %sub9 = sub nsw i32 0, %cond
+  %cond18 = select i1 %cmp1, i32 %sub9, i32 %cond
+  ret i32 %cond18
+}
+
+define i32 @abs_abs_x14(i32 %x) {
+; CHECK-LABEL: @abs_abs_x14(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[X:%.*]], 0
+; CHECK-NEXT:    [[SUB:%.*]] = sub nsw i32 0, [[X]]
+; CHECK-NEXT:    [[COND:%.*]] = select i1 [[CMP]], i32 [[SUB]], i32 [[X]]
+; CHECK-NEXT:    ret i32 [[COND]]
+;
+  %cmp = icmp sgt i32 %x, 0
+  %sub = sub nsw i32 0, %x
+  %cond = select i1 %cmp, i32 %x, i32 %sub
+  %cmp1 = icmp slt i32 %cond, 1
+  %sub9 = sub nsw i32 0, %cond
+  %cond18 = select i1 %cmp1, i32 %sub9, i32 %cond
+  ret i32 %cond18
+}
+
+define i32 @abs_abs_x15(i32 %x) {
+; CHECK-LABEL: @abs_abs_x15(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[X:%.*]], 0
+; CHECK-NEXT:    [[SUB:%.*]] = sub nsw i32 0, [[X]]
+; CHECK-NEXT:    [[COND:%.*]] = select i1 [[CMP]], i32 [[SUB]], i32 [[X]]
+; CHECK-NEXT:    ret i32 [[COND]]
+;
+  %cmp = icmp slt i32 %x, 0
+  %sub = sub nsw i32 0, %x
+  %cond = select i1 %cmp, i32 %sub, i32 %x
+  %cmp1 = icmp slt i32 %cond, 1
+  %sub9 = sub nsw i32 0, %cond
+  %cond18 = select i1 %cmp1, i32 %sub9, i32 %cond
+  ret i32 %cond18
+}
+
+define i32 @abs_abs_x16(i32 %x) {
+; CHECK-LABEL: @abs_abs_x16(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[X:%.*]], 0
+; CHECK-NEXT:    [[SUB:%.*]] = sub nsw i32 0, [[X]]
+; CHECK-NEXT:    [[COND:%.*]] = select i1 [[CMP]], i32 [[SUB]], i32 [[X]]
+; CHECK-NEXT:    ret i32 [[COND]]
+;
+  %cmp = icmp slt i32 %x, 1
+  %sub = sub nsw i32 0, %x
+  %cond = select i1 %cmp, i32 %sub, i32 %x
+  %cmp1 = icmp slt i32 %cond, 1
+  %sub9 = sub nsw i32 0, %cond
+  %cond18 = select i1 %cmp1, i32 %sub9, i32 %cond
+  ret i32 %cond18
+}
+
+; abs(abs(-x)) -> abs(-x) -> abs(x)
+define i32 @abs_abs_x17(i32 %x) {
+; CHECK-LABEL: @abs_abs_x17(
+; CHECK-NEXT:    [[SUB:%.*]] = sub nsw i32 0, [[X:%.*]]
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[X]], 0
+; CHECK-NEXT:    [[COND:%.*]] = select i1 [[CMP]], i32 [[SUB]], i32 [[X]]
+; CHECK-NEXT:    ret i32 [[COND]]
+;
+  %sub = sub nsw i32 0, %x
+  %cmp = icmp sgt i32 %sub, -1
+  %cond = select i1 %cmp, i32 %sub, i32 %x
+  %cmp1 = icmp sgt i32 %cond, -1
+  %sub16 = sub nsw i32 0, %cond
+  %cond18 = select i1 %cmp1, i32 %cond, i32 %sub16
+  ret i32 %cond18
+}
+
+; abs(abs(x - y)) -> abs(x - y)
+define i32 @abs_abs_x18(i32 %x, i32 %y) {
+; CHECK-LABEL: @abs_abs_x18(
+; CHECK-NEXT:    [[A:%.*]] = sub nsw i32 [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[A]], 0
+; CHECK-NEXT:    [[NEGA:%.*]] = sub i32 0, [[A]]
+; CHECK-NEXT:    [[COND:%.*]] = select i1 [[CMP]], i32 [[NEGA]], i32 [[A]]
+; CHECK-NEXT:    ret i32 [[COND]]
+;
+  %a = sub nsw i32 %x, %y
+  %b = sub nsw i32 %y, %x
+  %cmp = icmp sgt i32 %a, -1
+  %cond = select i1 %cmp, i32 %a, i32 %b
+  %cmp1 = icmp sgt i32 %cond, -1
+  %sub16 = sub nsw i32 0, %cond
+  %cond18 = select i1 %cmp1, i32 %cond, i32 %sub16
+  ret i32 %cond18
+}
+
+; abs(abs(-x)) -> abs(-x) -> abs(x)
+define <2 x i32> @abs_abs_x02_vec(<2 x i32> %x) {
+; CHECK-LABEL: @abs_abs_x02_vec(
+; CHECK-NEXT:    [[SUB:%.*]] = sub nsw <2 x i32> zeroinitializer, [[X:%.*]]
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt <2 x i32> [[X]], zeroinitializer
+; CHECK-NEXT:    [[COND:%.*]] = select <2 x i1> [[CMP]], <2 x i32> [[SUB]], <2 x i32> [[X]]
+; CHECK-NEXT:    ret <2 x i32> [[COND]]
+;
+  %sub = sub nsw <2 x i32> zeroinitializer, %x
+  %cmp = icmp sgt <2 x i32> %sub, <i32 -1, i32 -1>
+  %cond = select <2 x i1> %cmp, <2 x i32> %sub, <2 x i32> %x
+  %cmp1 = icmp sgt <2 x i32> %cond, <i32 -1, i32 -1>
+  %sub16 = sub nsw <2 x i32> zeroinitializer, %cond
+  %cond18 = select <2 x i1> %cmp1, <2 x i32> %cond, <2 x i32> %sub16
+  ret <2 x i32> %cond18
+}
+
+; abs(abs(x - y)) -> abs(x - y)
+define <2 x i32> @abs_abs_x03_vec(<2 x i32> %x, <2 x i32> %y) {
+; CHECK-LABEL: @abs_abs_x03_vec(
+; CHECK-NEXT:    [[A:%.*]] = sub nsw <2 x i32> [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt <2 x i32> [[A]], zeroinitializer
+; CHECK-NEXT:    [[NEGA:%.*]] = sub <2 x i32> zeroinitializer, [[A]]
+; CHECK-NEXT:    [[COND:%.*]] = select <2 x i1> [[CMP]], <2 x i32> [[NEGA]], <2 x i32> [[A]]
+; CHECK-NEXT:    ret <2 x i32> [[COND]]
+;
+  %a = sub nsw <2 x i32> %x, %y
+  %b = sub nsw <2 x i32> %y, %x
+  %cmp = icmp sgt <2 x i32> %a, <i32 -1, i32 -1>
+  %cond = select <2 x i1> %cmp, <2 x i32> %a, <2 x i32> %b
+  %cmp1 = icmp sgt <2 x i32> %cond, <i32 -1, i32 -1>
+  %sub16 = sub nsw <2 x i32> zeroinitializer, %cond
+  %cond18 = select <2 x i1> %cmp1, <2 x i32> %cond, <2 x i32> %sub16
+  ret <2 x i32> %cond18
+}
+
+define i32 @nabs_nabs_x01(i32 %x) {
+; CHECK-LABEL: @nabs_nabs_x01(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[X:%.*]], 0
+; CHECK-NEXT:    [[SUB:%.*]] = sub nsw i32 0, [[X]]
+; CHECK-NEXT:    [[COND:%.*]] = select i1 [[CMP]], i32 [[X]], i32 [[SUB]]
+; CHECK-NEXT:    ret i32 [[COND]]
+;
+  %cmp = icmp sgt i32 %x, -1
+  %sub = sub nsw i32 0, %x
+  %cond = select i1 %cmp, i32 %sub, i32 %x
+  %cmp1 = icmp sgt i32 %cond, -1
+  %sub9 = sub nsw i32 0, %cond
+  %cond18 = select i1 %cmp1, i32 %sub9, i32 %cond
+  ret i32 %cond18
+}
+
+define i32 @nabs_nabs_x02(i32 %x) {
+; CHECK-LABEL: @nabs_nabs_x02(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[X:%.*]], 0
+; CHECK-NEXT:    [[SUB:%.*]] = sub nsw i32 0, [[X]]
+; CHECK-NEXT:    [[COND:%.*]] = select i1 [[CMP]], i32 [[X]], i32 [[SUB]]
+; CHECK-NEXT:    ret i32 [[COND]]
+;
+  %cmp = icmp sgt i32 %x, 0
+  %sub = sub nsw i32 0, %x
+  %cond = select i1 %cmp, i32 %sub, i32 %x
+  %cmp1 = icmp sgt i32 %cond, -1
+  %sub9 = sub nsw i32 0, %cond
+  %cond18 = select i1 %cmp1, i32 %sub9, i32 %cond
+  ret i32 %cond18
+}
+
+define i32 @nabs_nabs_x03(i32 %x) {
+; CHECK-LABEL: @nabs_nabs_x03(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[X:%.*]], 0
+; CHECK-NEXT:    [[SUB:%.*]] = sub nsw i32 0, [[X]]
+; CHECK-NEXT:    [[COND:%.*]] = select i1 [[CMP]], i32 [[X]], i32 [[SUB]]
+; CHECK-NEXT:    ret i32 [[COND]]
+;
+  %cmp = icmp slt i32 %x, 0
+  %sub = sub nsw i32 0, %x
+  %cond = select i1 %cmp, i32 %x, i32 %sub
+  %cmp1 = icmp sgt i32 %cond, -1
+  %sub9 = sub nsw i32 0, %cond
+  %cond18 = select i1 %cmp1, i32 %sub9, i32 %cond
+  ret i32 %cond18
+}
+
+define i32 @nabs_nabs_x04(i32 %x) {
+; CHECK-LABEL: @nabs_nabs_x04(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[X:%.*]], 0
+; CHECK-NEXT:    [[SUB:%.*]] = sub nsw i32 0, [[X]]
+; CHECK-NEXT:    [[COND:%.*]] = select i1 [[CMP]], i32 [[X]], i32 [[SUB]]
+; CHECK-NEXT:    ret i32 [[COND]]
+;
+  %cmp = icmp slt i32 %x, 1
+  %sub = sub nsw i32 0, %x
+  %cond = select i1 %cmp, i32 %x, i32 %sub
+  %cmp1 = icmp sgt i32 %cond, -1
+  %sub9 = sub nsw i32 0, %cond
+  %cond18 = select i1 %cmp1, i32 %sub9, i32 %cond
+  ret i32 %cond18
+}
+
+define i32 @nabs_nabs_x05(i32 %x) {
+; CHECK-LABEL: @nabs_nabs_x05(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[X:%.*]], 0
+; CHECK-NEXT:    [[SUB:%.*]] = sub nsw i32 0, [[X]]
+; CHECK-NEXT:    [[COND:%.*]] = select i1 [[CMP]], i32 [[X]], i32 [[SUB]]
+; CHECK-NEXT:    ret i32 [[COND]]
+;
+  %cmp = icmp sgt i32 %x, -1
+  %sub = sub nsw i32 0, %x
+  %cond = select i1 %cmp, i32 %sub, i32 %x
+  %cmp1 = icmp sgt i32 %cond, 0
+  %sub9 = sub nsw i32 0, %cond
+  %cond18 = select i1 %cmp1, i32 %sub9, i32 %cond
+  ret i32 %cond18
+}
+
+define i32 @nabs_nabs_x06(i32 %x) {
+; CHECK-LABEL: @nabs_nabs_x06(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[X:%.*]], 0
+; CHECK-NEXT:    [[SUB:%.*]] = sub nsw i32 0, [[X]]
+; CHECK-NEXT:    [[COND:%.*]] = select i1 [[CMP]], i32 [[X]], i32 [[SUB]]
+; CHECK-NEXT:    ret i32 [[COND]]
+;
+  %cmp = icmp sgt i32 %x, 0
+  %sub = sub nsw i32 0, %x
+  %cond = select i1 %cmp, i32 %sub, i32 %x
+  %cmp1 = icmp sgt i32 %cond, 0
+  %sub9 = sub nsw i32 0, %cond
+  %cond18 = select i1 %cmp1, i32 %sub9, i32 %cond
+  ret i32 %cond18
+}
+
+define i32 @nabs_nabs_x07(i32 %x) {
+; CHECK-LABEL: @nabs_nabs_x07(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[X:%.*]], 0
+; CHECK-NEXT:    [[SUB:%.*]] = sub nsw i32 0, [[X]]
+; CHECK-NEXT:    [[COND:%.*]] = select i1 [[CMP]], i32 [[X]], i32 [[SUB]]
+; CHECK-NEXT:    ret i32 [[COND]]
+;
+  %cmp = icmp slt i32 %x, 0
+  %sub = sub nsw i32 0, %x
+  %cond = select i1 %cmp, i32 %x, i32 %sub
+  %cmp1 = icmp sgt i32 %cond, 0
+  %sub9 = sub nsw i32 0, %cond
+  %cond18 = select i1 %cmp1, i32 %sub9, i32 %cond
+  ret i32 %cond18
+}
+
+define i32 @nabs_nabs_x08(i32 %x) {
+; CHECK-LABEL: @nabs_nabs_x08(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[X:%.*]], 0
+; CHECK-NEXT:    [[SUB:%.*]] = sub nsw i32 0, [[X]]
+; CHECK-NEXT:    [[COND:%.*]] = select i1 [[CMP]], i32 [[X]], i32 [[SUB]]
+; CHECK-NEXT:    ret i32 [[COND]]
+;
+  %cmp = icmp slt i32 %x, 1
+  %sub = sub nsw i32 0, %x
+  %cond = select i1 %cmp, i32 %x, i32 %sub
+  %cmp1 = icmp sgt i32 %cond, 0
+  %sub9 = sub nsw i32 0, %cond
+  %cond18 = select i1 %cmp1, i32 %sub9, i32 %cond
+  ret i32 %cond18
+}
+
+define i32 @nabs_nabs_x09(i32 %x) {
+; CHECK-LABEL: @nabs_nabs_x09(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[X:%.*]], 0
+; CHECK-NEXT:    [[SUB:%.*]] = sub nsw i32 0, [[X]]
+; CHECK-NEXT:    [[COND:%.*]] = select i1 [[CMP]], i32 [[X]], i32 [[SUB]]
+; CHECK-NEXT:    ret i32 [[COND]]
+;
+  %cmp = icmp sgt i32 %x, -1
+  %sub = sub nsw i32 0, %x
+  %cond = select i1 %cmp, i32 %sub, i32 %x
+  %cmp1 = icmp slt i32 %cond, 0
+  %sub16 = sub nsw i32 0, %cond
+  %cond18 = select i1 %cmp1, i32 %cond, i32 %sub16
+  ret i32 %cond18
+}
+
+define i32 @nabs_nabs_x10(i32 %x) {
+; CHECK-LABEL: @nabs_nabs_x10(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[X:%.*]], 0
+; CHECK-NEXT:    [[SUB:%.*]] = sub nsw i32 0, [[X]]
+; CHECK-NEXT:    [[COND:%.*]] = select i1 [[CMP]], i32 [[X]], i32 [[SUB]]
+; CHECK-NEXT:    ret i32 [[COND]]
+;
+  %cmp = icmp sgt i32 %x, 0
+  %sub = sub nsw i32 0, %x
+  %cond = select i1 %cmp, i32 %sub, i32 %x
+  %cmp1 = icmp slt i32 %cond, 0
+  %sub16 = sub nsw i32 0, %cond
+  %cond18 = select i1 %cmp1, i32 %cond, i32 %sub16
+  ret i32 %cond18
+}
+
+define i32 @nabs_nabs_x11(i32 %x) {
+; CHECK-LABEL: @nabs_nabs_x11(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[X:%.*]], 0
+; CHECK-NEXT:    [[SUB:%.*]] = sub nsw i32 0, [[X]]
+; CHECK-NEXT:    [[COND:%.*]] = select i1 [[CMP]], i32 [[X]], i32 [[SUB]]
+; CHECK-NEXT:    ret i32 [[COND]]
+;
+  %cmp = icmp slt i32 %x, 0
+  %sub = sub nsw i32 0, %x
+  %cond = select i1 %cmp, i32 %x, i32 %sub
+  %cmp1 = icmp slt i32 %cond, 0
+  %sub16 = sub nsw i32 0, %cond
+  %cond18 = select i1 %cmp1, i32 %cond, i32 %sub16
+  ret i32 %cond18
+}
+
+define i32 @nabs_nabs_x12(i32 %x) {
+; CHECK-LABEL: @nabs_nabs_x12(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[X:%.*]], 0
+; CHECK-NEXT:    [[SUB:%.*]] = sub nsw i32 0, [[X]]
+; CHECK-NEXT:    [[COND:%.*]] = select i1 [[CMP]], i32 [[X]], i32 [[SUB]]
+; CHECK-NEXT:    ret i32 [[COND]]
+;
+  %cmp = icmp slt i32 %x, 1
+  %sub = sub nsw i32 0, %x
+  %cond = select i1 %cmp, i32 %x, i32 %sub
+  %cmp1 = icmp slt i32 %cond, 0
+  %sub16 = sub nsw i32 0, %cond
+  %cond18 = select i1 %cmp1, i32 %cond, i32 %sub16
+  ret i32 %cond18
+}
+
+define i32 @nabs_nabs_x13(i32 %x) {
+; CHECK-LABEL: @nabs_nabs_x13(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[X:%.*]], 0
+; CHECK-NEXT:    [[SUB:%.*]] = sub nsw i32 0, [[X]]
+; CHECK-NEXT:    [[COND:%.*]] = select i1 [[CMP]], i32 [[X]], i32 [[SUB]]
+; CHECK-NEXT:    ret i32 [[COND]]
+;
+  %cmp = icmp sgt i32 %x, -1
+  %sub = sub nsw i32 0, %x
+  %cond = select i1 %cmp, i32 %sub, i32 %x
+  %cmp1 = icmp slt i32 %cond, 1
+  %sub16 = sub nsw i32 0, %cond
+  %cond18 = select i1 %cmp1, i32 %cond, i32 %sub16
+  ret i32 %cond18
+}
+
+define i32 @nabs_nabs_x14(i32 %x) {
+; CHECK-LABEL: @nabs_nabs_x14(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[X:%.*]], 0
+; CHECK-NEXT:    [[SUB:%.*]] = sub nsw i32 0, [[X]]
+; CHECK-NEXT:    [[COND:%.*]] = select i1 [[CMP]], i32 [[X]], i32 [[SUB]]
+; CHECK-NEXT:    ret i32 [[COND]]
+;
+  %cmp = icmp sgt i32 %x, 0
+  %sub = sub nsw i32 0, %x
+  %cond = select i1 %cmp, i32 %sub, i32 %x
+  %cmp1 = icmp slt i32 %cond, 1
+  %sub16 = sub nsw i32 0, %cond
+  %cond18 = select i1 %cmp1, i32 %cond, i32 %sub16
+  ret i32 %cond18
+}
+
+define i32 @nabs_nabs_x15(i32 %x) {
+; CHECK-LABEL: @nabs_nabs_x15(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[X:%.*]], 0
+; CHECK-NEXT:    [[SUB:%.*]] = sub nsw i32 0, [[X]]
+; CHECK-NEXT:    [[COND:%.*]] = select i1 [[CMP]], i32 [[X]], i32 [[SUB]]
+; CHECK-NEXT:    ret i32 [[COND]]
+;
+  %cmp = icmp slt i32 %x, 0
+  %sub = sub nsw i32 0, %x
+  %cond = select i1 %cmp, i32 %x, i32 %sub
+  %cmp1 = icmp slt i32 %cond, 1
+  %sub16 = sub nsw i32 0, %cond
+  %cond18 = select i1 %cmp1, i32 %cond, i32 %sub16
+  ret i32 %cond18
+}
+
+define i32 @nabs_nabs_x16(i32 %x) {
+; CHECK-LABEL: @nabs_nabs_x16(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[X:%.*]], 0
+; CHECK-NEXT:    [[SUB:%.*]] = sub nsw i32 0, [[X]]
+; CHECK-NEXT:    [[COND:%.*]] = select i1 [[CMP]], i32 [[X]], i32 [[SUB]]
+; CHECK-NEXT:    ret i32 [[COND]]
+;
+  %cmp = icmp slt i32 %x, 1
+  %sub = sub nsw i32 0, %x
+  %cond = select i1 %cmp, i32 %x, i32 %sub
+  %cmp1 = icmp slt i32 %cond, 1
+  %sub16 = sub nsw i32 0, %cond
+  %cond18 = select i1 %cmp1, i32 %cond, i32 %sub16
+  ret i32 %cond18
+}
+
+; nabs(nabs(-x)) -> nabs(-x) -> nabs(x)
+define i32 @nabs_nabs_x17(i32 %x) {
+; CHECK-LABEL: @nabs_nabs_x17(
+; CHECK-NEXT:    [[SUB:%.*]] = sub nsw i32 0, [[X:%.*]]
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[X]], 0
+; CHECK-NEXT:    [[COND:%.*]] = select i1 [[CMP]], i32 [[X]], i32 [[SUB]]
+; CHECK-NEXT:    ret i32 [[COND]]
+;
+  %sub = sub nsw i32 0, %x
+  %cmp = icmp sgt i32 %sub, -1
+  %cond = select i1 %cmp, i32 %x, i32 %sub
+  %cmp1 = icmp sgt i32 %cond, -1
+  %sub16 = sub nsw i32 0, %cond
+  %cond18 = select i1 %cmp1, i32 %sub16, i32 %cond
+  ret i32 %cond18
+}
+
+; nabs(nabs(x - y)) -> nabs(x - y)
+define i32 @nabs_nabs_x18(i32 %x, i32 %y) {
+; CHECK-LABEL: @nabs_nabs_x18(
+; CHECK-NEXT:    [[A:%.*]] = sub nsw i32 [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[A]], 0
+; CHECK-NEXT:    [[NEGA:%.*]] = sub i32 0, [[A]]
+; CHECK-NEXT:    [[COND:%.*]] = select i1 [[CMP]], i32 [[A]], i32 [[NEGA]]
+; CHECK-NEXT:    ret i32 [[COND]]
+;
+  %a = sub nsw i32 %x, %y
+  %b = sub nsw i32 %y, %x
+  %cmp = icmp sgt i32 %a, -1
+  %cond = select i1 %cmp, i32 %b, i32 %a
+  %cmp1 = icmp sgt i32 %cond, -1
+  %sub16 = sub nsw i32 0, %cond
+  %cond18 = select i1 %cmp1, i32 %sub16, i32 %cond
+  ret i32 %cond18
+}
+
+; nabs(nabs(-x)) -> nabs(-x) -> nabs(x)
+define <2 x i32> @nabs_nabs_x01_vec(<2 x i32> %x) {
+; CHECK-LABEL: @nabs_nabs_x01_vec(
+; CHECK-NEXT:    [[SUB:%.*]] = sub nsw <2 x i32> zeroinitializer, [[X:%.*]]
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt <2 x i32> [[X]], zeroinitializer
+; CHECK-NEXT:    [[COND:%.*]] = select <2 x i1> [[CMP]], <2 x i32> [[X]], <2 x i32> [[SUB]]
+; CHECK-NEXT:    ret <2 x i32> [[COND]]
+;
+  %sub = sub nsw <2 x i32> zeroinitializer, %x
+  %cmp = icmp sgt <2 x i32> %sub, <i32 -1, i32 -1>
+  %cond = select <2 x i1> %cmp, <2 x i32> %x, <2 x i32> %sub
+  %cmp1 = icmp sgt <2 x i32> %cond, <i32 -1, i32 -1>
+  %sub16 = sub nsw <2 x i32> zeroinitializer, %cond
+  %cond18 = select <2 x i1> %cmp1, <2 x i32> %sub16, <2 x i32> %cond
+  ret <2 x i32> %cond18
+}
+
+; nabs(nabs(x - y)) -> nabs(x - y)
+define <2 x i32> @nabs_nabs_x02_vec(<2 x i32> %x, <2 x i32> %y) {
+; CHECK-LABEL: @nabs_nabs_x02_vec(
+; CHECK-NEXT:    [[A:%.*]] = sub nsw <2 x i32> [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt <2 x i32> [[A]], zeroinitializer
+; CHECK-NEXT:    [[NEGA:%.*]] = sub <2 x i32> zeroinitializer, [[A]]
+; CHECK-NEXT:    [[COND:%.*]] = select <2 x i1> [[CMP]], <2 x i32> [[A]], <2 x i32> [[NEGA]]
+; CHECK-NEXT:    ret <2 x i32> [[COND]]
+;
+  %a = sub nsw <2 x i32> %x, %y
+  %b = sub nsw <2 x i32> %y, %x
+  %cmp = icmp sgt <2 x i32> %a, <i32 -1, i32 -1>
+  %cond = select <2 x i1> %cmp, <2 x i32> %b, <2 x i32> %a
+  %cmp1 = icmp sgt <2 x i32> %cond, <i32 -1, i32 -1>
+  %sub16 = sub nsw <2 x i32> zeroinitializer, %cond
+  %cond18 = select <2 x i1> %cmp1, <2 x i32> %sub16, <2 x i32> %cond
+  ret <2 x i32> %cond18
+}
+
+define i32 @abs_nabs_x01(i32 %x) {
+; CHECK-LABEL: @abs_nabs_x01(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[X:%.*]], 0
+; CHECK-NEXT:    [[SUB:%.*]] = sub nsw i32 0, [[X]]
+; CHECK-NEXT:    [[COND1:%.*]] = select i1 [[CMP]], i32 [[SUB]], i32 [[X]]
+; CHECK-NEXT:    ret i32 [[COND1]]
+;
+  %cmp = icmp sgt i32 %x, -1
+  %sub = sub nsw i32 0, %x
+  %cond = select i1 %cmp, i32 %sub, i32 %x
+  %cmp1 = icmp sgt i32 %cond, -1
+  %sub16 = sub nsw i32 0, %cond
+  %cond18 = select i1 %cmp1, i32 %cond, i32 %sub16
+  ret i32 %cond18
+}
+
+define i32 @abs_nabs_x02(i32 %x) {
+; CHECK-LABEL: @abs_nabs_x02(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[X:%.*]], 0
+; CHECK-NEXT:    [[SUB:%.*]] = sub nsw i32 0, [[X]]
+; CHECK-NEXT:    [[COND1:%.*]] = select i1 [[CMP]], i32 [[SUB]], i32 [[X]]
+; CHECK-NEXT:    ret i32 [[COND1]]
+;
+  %cmp = icmp sgt i32 %x, 0
+  %sub = sub nsw i32 0, %x
+  %cond = select i1 %cmp, i32 %sub, i32 %x
+  %cmp1 = icmp sgt i32 %cond, -1
+  %sub16 = sub nsw i32 0, %cond
+  %cond18 = select i1 %cmp1, i32 %cond, i32 %sub16
+  ret i32 %cond18
+}
+
+define i32 @abs_nabs_x03(i32 %x) {
+; CHECK-LABEL: @abs_nabs_x03(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[X:%.*]], 0
+; CHECK-NEXT:    [[SUB:%.*]] = sub nsw i32 0, [[X]]
+; CHECK-NEXT:    [[COND1:%.*]] = select i1 [[CMP]], i32 [[SUB]], i32 [[X]]
+; CHECK-NEXT:    ret i32 [[COND1]]
+;
+  %cmp = icmp slt i32 %x, 0
+  %sub = sub nsw i32 0, %x
+  %cond = select i1 %cmp, i32 %x, i32 %sub
+  %cmp1 = icmp sgt i32 %cond, -1
+  %sub16 = sub nsw i32 0, %cond
+  %cond18 = select i1 %cmp1, i32 %cond, i32 %sub16
+  ret i32 %cond18
+}
+
+define i32 @abs_nabs_x04(i32 %x) {
+; CHECK-LABEL: @abs_nabs_x04(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[X:%.*]], 0
+; CHECK-NEXT:    [[SUB:%.*]] = sub nsw i32 0, [[X]]
+; CHECK-NEXT:    [[COND1:%.*]] = select i1 [[CMP]], i32 [[SUB]], i32 [[X]]
+; CHECK-NEXT:    ret i32 [[COND1]]
+;
+  %cmp = icmp slt i32 %x, 1
+  %sub = sub nsw i32 0, %x
+  %cond = select i1 %cmp, i32 %x, i32 %sub
+  %cmp1 = icmp sgt i32 %cond, -1
+  %sub16 = sub nsw i32 0, %cond
+  %cond18 = select i1 %cmp1, i32 %cond, i32 %sub16
+  ret i32 %cond18
+}
+
+define i32 @abs_nabs_x05(i32 %x) {
+; CHECK-LABEL: @abs_nabs_x05(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[X:%.*]], 0
+; CHECK-NEXT:    [[SUB:%.*]] = sub nsw i32 0, [[X]]
+; CHECK-NEXT:    [[COND1:%.*]] = select i1 [[CMP]], i32 [[SUB]], i32 [[X]]
+; CHECK-NEXT:    ret i32 [[COND1]]
+;
+  %cmp = icmp sgt i32 %x, -1
+  %sub = sub nsw i32 0, %x
+  %cond = select i1 %cmp, i32 %sub, i32 %x
+  %cmp1 = icmp sgt i32 %cond, 0
+  %sub16 = sub nsw i32 0, %cond
+  %cond18 = select i1 %cmp1, i32 %cond, i32 %sub16
+  ret i32 %cond18
+}
+
+define i32 @abs_nabs_x06(i32 %x) {
+; CHECK-LABEL: @abs_nabs_x06(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[X:%.*]], 0
+; CHECK-NEXT:    [[SUB:%.*]] = sub nsw i32 0, [[X]]
+; CHECK-NEXT:    [[COND1:%.*]] = select i1 [[CMP]], i32 [[SUB]], i32 [[X]]
+; CHECK-NEXT:    ret i32 [[COND1]]
+;
+  %cmp = icmp sgt i32 %x, 0
+  %sub = sub nsw i32 0, %x
+  %cond = select i1 %cmp, i32 %sub, i32 %x
+  %cmp1 = icmp sgt i32 %cond, 0
+  %sub16 = sub nsw i32 0, %cond
+  %cond18 = select i1 %cmp1, i32 %cond, i32 %sub16
+  ret i32 %cond18
+}
+
+define i32 @abs_nabs_x07(i32 %x) {
+; CHECK-LABEL: @abs_nabs_x07(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[X:%.*]], 0
+; CHECK-NEXT:    [[SUB:%.*]] = sub nsw i32 0, [[X]]
+; CHECK-NEXT:    [[COND1:%.*]] = select i1 [[CMP]], i32 [[SUB]], i32 [[X]]
+; CHECK-NEXT:    ret i32 [[COND1]]
+;
+  %cmp = icmp slt i32 %x, 0
+  %sub = sub nsw i32 0, %x
+  %cond = select i1 %cmp, i32 %x, i32 %sub
+  %cmp1 = icmp sgt i32 %cond, 0
+  %sub16 = sub nsw i32 0, %cond
+  %cond18 = select i1 %cmp1, i32 %cond, i32 %sub16
+  ret i32 %cond18
+}
+
+define i32 @abs_nabs_x08(i32 %x) {
+; CHECK-LABEL: @abs_nabs_x08(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[X:%.*]], 0
+; CHECK-NEXT:    [[SUB:%.*]] = sub nsw i32 0, [[X]]
+; CHECK-NEXT:    [[COND1:%.*]] = select i1 [[CMP]], i32 [[SUB]], i32 [[X]]
+; CHECK-NEXT:    ret i32 [[COND1]]
+;
+  %cmp = icmp slt i32 %x, 1
+  %sub = sub nsw i32 0, %x
+  %cond = select i1 %cmp, i32 %x, i32 %sub
+  %cmp1 = icmp sgt i32 %cond, 0
+  %sub16 = sub nsw i32 0, %cond
+  %cond18 = select i1 %cmp1, i32 %cond, i32 %sub16
+  ret i32 %cond18
+}
+
+define i32 @abs_nabs_x09(i32 %x) {
+; CHECK-LABEL: @abs_nabs_x09(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[X:%.*]], 0
+; CHECK-NEXT:    [[SUB:%.*]] = sub nsw i32 0, [[X]]
+; CHECK-NEXT:    [[COND1:%.*]] = select i1 [[CMP]], i32 [[SUB]], i32 [[X]]
+; CHECK-NEXT:    ret i32 [[COND1]]
+;
+  %cmp = icmp sgt i32 %x, -1
+  %sub = sub nsw i32 0, %x
+  %cond = select i1 %cmp, i32 %sub, i32 %x
+  %cmp1 = icmp slt i32 %cond, 0
+  %sub9 = sub nsw i32 0, %cond
+  %cond18 = select i1 %cmp1, i32 %sub9, i32 %cond
+  ret i32 %cond18
+}
+
+define i32 @abs_nabs_x10(i32 %x) {
+; CHECK-LABEL: @abs_nabs_x10(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[X:%.*]], 0
+; CHECK-NEXT:    [[SUB:%.*]] = sub nsw i32 0, [[X]]
+; CHECK-NEXT:    [[COND1:%.*]] = select i1 [[CMP]], i32 [[SUB]], i32 [[X]]
+; CHECK-NEXT:    ret i32 [[COND1]]
+;
+  %cmp = icmp sgt i32 %x, 0
+  %sub = sub nsw i32 0, %x
+  %cond = select i1 %cmp, i32 %sub, i32 %x
+  %cmp1 = icmp slt i32 %cond, 0
+  %sub9 = sub nsw i32 0, %cond
+  %cond18 = select i1 %cmp1, i32 %sub9, i32 %cond
+  ret i32 %cond18
+}
+
+define i32 @abs_nabs_x11(i32 %x) {
+; CHECK-LABEL: @abs_nabs_x11(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[X:%.*]], 0
+; CHECK-NEXT:    [[SUB:%.*]] = sub nsw i32 0, [[X]]
+; CHECK-NEXT:    [[COND1:%.*]] = select i1 [[CMP]], i32 [[SUB]], i32 [[X]]
+; CHECK-NEXT:    ret i32 [[COND1]]
+;
+  %cmp = icmp slt i32 %x, 0
+  %sub = sub nsw i32 0, %x
+  %cond = select i1 %cmp, i32 %x, i32 %sub
+  %cmp1 = icmp slt i32 %cond, 0
+  %sub9 = sub nsw i32 0, %cond
+  %cond18 = select i1 %cmp1, i32 %sub9, i32 %cond
+  ret i32 %cond18
+}
+
+define i32 @abs_nabs_x12(i32 %x) {
+; CHECK-LABEL: @abs_nabs_x12(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[X:%.*]], 0
+; CHECK-NEXT:    [[SUB:%.*]] = sub nsw i32 0, [[X]]
+; CHECK-NEXT:    [[COND1:%.*]] = select i1 [[CMP]], i32 [[SUB]], i32 [[X]]
+; CHECK-NEXT:    ret i32 [[COND1]]
+;
+  %cmp = icmp slt i32 %x, 1
+  %sub = sub nsw i32 0, %x
+  %cond = select i1 %cmp, i32 %x, i32 %sub
+  %cmp1 = icmp slt i32 %cond, 0
+  %sub9 = sub nsw i32 0, %cond
+  %cond18 = select i1 %cmp1, i32 %sub9, i32 %cond
+  ret i32 %cond18
+}
+
+define i32 @abs_nabs_x13(i32 %x) {
+; CHECK-LABEL: @abs_nabs_x13(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[X:%.*]], 0
+; CHECK-NEXT:    [[SUB:%.*]] = sub nsw i32 0, [[X]]
+; CHECK-NEXT:    [[COND1:%.*]] = select i1 [[CMP]], i32 [[SUB]], i32 [[X]]
+; CHECK-NEXT:    ret i32 [[COND1]]
+;
+  %cmp = icmp sgt i32 %x, -1
+  %sub = sub nsw i32 0, %x
+  %cond = select i1 %cmp, i32 %sub, i32 %x
+  %cmp1 = icmp slt i32 %cond, 1
+  %sub9 = sub nsw i32 0, %cond
+  %cond18 = select i1 %cmp1, i32 %sub9, i32 %cond
+  ret i32 %cond18
+}
+
+define i32 @abs_nabs_x14(i32 %x) {
+; CHECK-LABEL: @abs_nabs_x14(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[X:%.*]], 0
+; CHECK-NEXT:    [[SUB:%.*]] = sub nsw i32 0, [[X]]
+; CHECK-NEXT:    [[COND1:%.*]] = select i1 [[CMP]], i32 [[SUB]], i32 [[X]]
+; CHECK-NEXT:    ret i32 [[COND1]]
+;
+  %cmp = icmp sgt i32 %x, 0
+  %sub = sub nsw i32 0, %x
+  %cond = select i1 %cmp, i32 %sub, i32 %x
+  %cmp1 = icmp slt i32 %cond, 1
+  %sub9 = sub nsw i32 0, %cond
+  %cond18 = select i1 %cmp1, i32 %sub9, i32 %cond
+  ret i32 %cond18
+}
+
+define i32 @abs_nabs_x15(i32 %x) {
+; CHECK-LABEL: @abs_nabs_x15(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[X:%.*]], 0
+; CHECK-NEXT:    [[SUB:%.*]] = sub nsw i32 0, [[X]]
+; CHECK-NEXT:    [[COND1:%.*]] = select i1 [[CMP]], i32 [[SUB]], i32 [[X]]
+; CHECK-NEXT:    ret i32 [[COND1]]
+;
+  %cmp = icmp slt i32 %x, 0
+  %sub = sub nsw i32 0, %x
+  %cond = select i1 %cmp, i32 %x, i32 %sub
+  %cmp1 = icmp slt i32 %cond, 1
+  %sub9 = sub nsw i32 0, %cond
+  %cond18 = select i1 %cmp1, i32 %sub9, i32 %cond
+  ret i32 %cond18
+}
+
+define i32 @abs_nabs_x16(i32 %x) {
+; CHECK-LABEL: @abs_nabs_x16(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[X:%.*]], 0
+; CHECK-NEXT:    [[SUB:%.*]] = sub nsw i32 0, [[X]]
+; CHECK-NEXT:    [[COND1:%.*]] = select i1 [[CMP]], i32 [[SUB]], i32 [[X]]
+; CHECK-NEXT:    ret i32 [[COND1]]
+;
+  %cmp = icmp slt i32 %x, 1
+  %sub = sub nsw i32 0, %x
+  %cond = select i1 %cmp, i32 %x, i32 %sub
+  %cmp1 = icmp slt i32 %cond, 1
+  %sub9 = sub nsw i32 0, %cond
+  %cond18 = select i1 %cmp1, i32 %sub9, i32 %cond
+  ret i32 %cond18
+}
+
+; abs(nabs(-x)) -> abs(-x) -> abs(x)
+define i32 @abs_nabs_x17(i32 %x) {
+; CHECK-LABEL: @abs_nabs_x17(
+; CHECK-NEXT:    [[SUB:%.*]] = sub nsw i32 0, [[X:%.*]]
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[X]], 0
+; CHECK-NEXT:    [[COND:%.*]] = select i1 [[CMP]], i32 [[SUB]], i32 [[X]]
+; CHECK-NEXT:    ret i32 [[COND]]
+;
+  %sub = sub nsw i32 0, %x
+  %cmp = icmp sgt i32 %sub, -1
+  %cond = select i1 %cmp, i32 %x, i32 %sub
+  %cmp1 = icmp sgt i32 %cond, -1
+  %sub16 = sub nsw i32 0, %cond
+  %cond18 = select i1 %cmp1, i32 %cond, i32 %sub16
+  ret i32 %cond18
+}
+
+; abs(nabs(x - y)) -> abs(x - y)
+define i32 @abs_nabs_x18(i32 %x, i32 %y) {
+; CHECK-LABEL: @abs_nabs_x18(
+; CHECK-NEXT:    [[A:%.*]] = sub nsw i32 [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[A]], 0
+; CHECK-NEXT:    [[NEGA:%.*]] = sub i32 0, [[A]]
+; CHECK-NEXT:    [[COND:%.*]] = select i1 [[CMP]], i32 [[NEGA]], i32 [[A]]
+; CHECK-NEXT:    ret i32 [[COND]]
+;
+  %a = sub nsw i32 %x, %y
+  %b = sub nsw i32 %y, %x
+  %cmp = icmp sgt i32 %a, -1
+  %cond = select i1 %cmp, i32 %b, i32 %a
+  %cmp1 = icmp sgt i32 %cond, -1
+  %sub16 = sub nsw i32 0, %cond
+  %cond18 = select i1 %cmp1, i32 %cond, i32 %sub16
+  ret i32 %cond18
+}
+
+; abs(nabs(-x)) -> abs(-x) -> abs(x)
+define <2 x i32> @abs_nabs_x01_vec(<2 x i32> %x) {
+; CHECK-LABEL: @abs_nabs_x01_vec(
+; CHECK-NEXT:    [[SUB:%.*]] = sub nsw <2 x i32> zeroinitializer, [[X:%.*]]
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt <2 x i32> [[X]], zeroinitializer
+; CHECK-NEXT:    [[COND:%.*]] = select <2 x i1> [[CMP]], <2 x i32> [[SUB]], <2 x i32> [[X]]
+; CHECK-NEXT:    ret <2 x i32> [[COND]]
+;
+  %sub = sub nsw <2 x i32> zeroinitializer, %x
+  %cmp = icmp sgt <2 x i32> %sub, <i32 -1, i32 -1>
+  %cond = select <2 x i1> %cmp, <2 x i32> %x, <2 x i32> %sub
+  %cmp1 = icmp sgt <2 x i32> %cond, <i32 -1, i32 -1>
+  %sub16 = sub nsw <2 x i32> zeroinitializer, %cond
+  %cond18 = select <2 x i1> %cmp1, <2 x i32> %cond, <2 x i32> %sub16
+  ret <2 x i32> %cond18
+}
+
+; abs(nabs(x - y)) -> abs(x - y)
+define <2 x i32> @abs_nabs_x02_vec(<2 x i32> %x, <2 x i32> %y) {
+; CHECK-LABEL: @abs_nabs_x02_vec(
+; CHECK-NEXT:    [[A:%.*]] = sub nsw <2 x i32> [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt <2 x i32> [[A]], zeroinitializer
+; CHECK-NEXT:    [[NEGA:%.*]] = sub <2 x i32> zeroinitializer, [[A]]
+; CHECK-NEXT:    [[COND:%.*]] = select <2 x i1> [[CMP]], <2 x i32> [[NEGA]], <2 x i32> [[A]]
+; CHECK-NEXT:    ret <2 x i32> [[COND]]
+;
+  %a = sub nsw <2 x i32> %x, %y
+  %b = sub nsw <2 x i32> %y, %x
+  %cmp = icmp sgt <2 x i32> %a, <i32 -1, i32 -1>
+  %cond = select <2 x i1> %cmp, <2 x i32> %b, <2 x i32> %a
+  %cmp1 = icmp sgt <2 x i32> %cond, <i32 -1, i32 -1>
+  %sub16 = sub nsw <2 x i32> zeroinitializer, %cond
+  %cond18 = select <2 x i1> %cmp1, <2 x i32> %cond, <2 x i32> %sub16
+  ret <2 x i32> %cond18
+}
+
+define i32 @nabs_abs_x01(i32 %x) {
+; CHECK-LABEL: @nabs_abs_x01(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[X:%.*]], 0
+; CHECK-NEXT:    [[SUB:%.*]] = sub nsw i32 0, [[X]]
+; CHECK-NEXT:    [[COND1:%.*]] = select i1 [[CMP]], i32 [[X]], i32 [[SUB]]
+; CHECK-NEXT:    ret i32 [[COND1]]
+;
+  %cmp = icmp sgt i32 %x, -1
+  %sub = sub nsw i32 0, %x
+  %cond = select i1 %cmp, i32 %x, i32 %sub
+  %cmp1 = icmp sgt i32 %cond, -1
+  %sub9 = sub nsw i32 0, %cond
+  %cond18 = select i1 %cmp1, i32 %sub9, i32 %cond
+  ret i32 %cond18
+}
+
+define i32 @nabs_abs_x02(i32 %x) {
+; CHECK-LABEL: @nabs_abs_x02(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[X:%.*]], 0
+; CHECK-NEXT:    [[SUB:%.*]] = sub nsw i32 0, [[X]]
+; CHECK-NEXT:    [[COND1:%.*]] = select i1 [[CMP]], i32 [[X]], i32 [[SUB]]
+; CHECK-NEXT:    ret i32 [[COND1]]
+;
+  %cmp = icmp sgt i32 %x, 0
+  %sub = sub nsw i32 0, %x
+  %cond = select i1 %cmp, i32 %x, i32 %sub
+  %cmp1 = icmp sgt i32 %cond, -1
+  %sub9 = sub nsw i32 0, %cond
+  %cond18 = select i1 %cmp1, i32 %sub9, i32 %cond
+  ret i32 %cond18
+}
+
+define i32 @nabs_abs_x03(i32 %x) {
+; CHECK-LABEL: @nabs_abs_x03(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[X:%.*]], 0
+; CHECK-NEXT:    [[SUB:%.*]] = sub nsw i32 0, [[X]]
+; CHECK-NEXT:    [[COND1:%.*]] = select i1 [[CMP]], i32 [[X]], i32 [[SUB]]
+; CHECK-NEXT:    ret i32 [[COND1]]
+;
+  %cmp = icmp slt i32 %x, 0
+  %sub = sub nsw i32 0, %x
+  %cond = select i1 %cmp, i32 %sub, i32 %x
+  %cmp1 = icmp sgt i32 %cond, -1
+  %sub9 = sub nsw i32 0, %cond
+  %cond18 = select i1 %cmp1, i32 %sub9, i32 %cond
+  ret i32 %cond18
+}
+
+define i32 @nabs_abs_x04(i32 %x) {
+; CHECK-LABEL: @nabs_abs_x04(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[X:%.*]], 0
+; CHECK-NEXT:    [[SUB:%.*]] = sub nsw i32 0, [[X]]
+; CHECK-NEXT:    [[COND1:%.*]] = select i1 [[CMP]], i32 [[X]], i32 [[SUB]]
+; CHECK-NEXT:    ret i32 [[COND1]]
+;
+  %cmp = icmp slt i32 %x, 1
+  %sub = sub nsw i32 0, %x
+  %cond = select i1 %cmp, i32 %sub, i32 %x
+  %cmp1 = icmp sgt i32 %cond, -1
+  %sub9 = sub nsw i32 0, %cond
+  %cond18 = select i1 %cmp1, i32 %sub9, i32 %cond
+  ret i32 %cond18
+}
+
+define i32 @nabs_abs_x05(i32 %x) {
+; CHECK-LABEL: @nabs_abs_x05(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[X:%.*]], 0
+; CHECK-NEXT:    [[SUB:%.*]] = sub nsw i32 0, [[X]]
+; CHECK-NEXT:    [[COND1:%.*]] = select i1 [[CMP]], i32 [[X]], i32 [[SUB]]
+; CHECK-NEXT:    ret i32 [[COND1]]
+;
+  %cmp = icmp sgt i32 %x, -1
+  %sub = sub nsw i32 0, %x
+  %cond = select i1 %cmp, i32 %x, i32 %sub
+  %cmp1 = icmp sgt i32 %cond, 0
+  %sub9 = sub nsw i32 0, %cond
+  %cond18 = select i1 %cmp1, i32 %sub9, i32 %cond
+  ret i32 %cond18
+}
+
+define i32 @nabs_abs_x06(i32 %x) {
+; CHECK-LABEL: @nabs_abs_x06(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[X:%.*]], 0
+; CHECK-NEXT:    [[SUB:%.*]] = sub nsw i32 0, [[X]]
+; CHECK-NEXT:    [[COND1:%.*]] = select i1 [[CMP]], i32 [[X]], i32 [[SUB]]
+; CHECK-NEXT:    ret i32 [[COND1]]
+;
+  %cmp = icmp sgt i32 %x, 0
+  %sub = sub nsw i32 0, %x
+  %cond = select i1 %cmp, i32 %x, i32 %sub
+  %cmp1 = icmp sgt i32 %cond, 0
+  %sub9 = sub nsw i32 0, %cond
+  %cond18 = select i1 %cmp1, i32 %sub9, i32 %cond
+  ret i32 %cond18
+}
+
+define i32 @nabs_abs_x07(i32 %x) {
+; CHECK-LABEL: @nabs_abs_x07(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[X:%.*]], 0
+; CHECK-NEXT:    [[SUB:%.*]] = sub nsw i32 0, [[X]]
+; CHECK-NEXT:    [[COND1:%.*]] = select i1 [[CMP]], i32 [[X]], i32 [[SUB]]
+; CHECK-NEXT:    ret i32 [[COND1]]
+;
+  %cmp = icmp slt i32 %x, 0
+  %sub = sub nsw i32 0, %x
+  %cond = select i1 %cmp, i32 %sub, i32 %x
+  %cmp1 = icmp sgt i32 %cond, 0
+  %sub9 = sub nsw i32 0, %cond
+  %cond18 = select i1 %cmp1, i32 %sub9, i32 %cond
+  ret i32 %cond18
+}
+
+define i32 @nabs_abs_x08(i32 %x) {
+; CHECK-LABEL: @nabs_abs_x08(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[X:%.*]], 0
+; CHECK-NEXT:    [[SUB:%.*]] = sub nsw i32 0, [[X]]
+; CHECK-NEXT:    [[COND1:%.*]] = select i1 [[CMP]], i32 [[X]], i32 [[SUB]]
+; CHECK-NEXT:    ret i32 [[COND1]]
+;
+  %cmp = icmp slt i32 %x, 1
+  %sub = sub nsw i32 0, %x
+  %cond = select i1 %cmp, i32 %sub, i32 %x
+  %cmp1 = icmp sgt i32 %cond, 0
+  %sub9 = sub nsw i32 0, %cond
+  %cond18 = select i1 %cmp1, i32 %sub9, i32 %cond
+  ret i32 %cond18
+}
+
+define i32 @nabs_abs_x09(i32 %x) {
+; CHECK-LABEL: @nabs_abs_x09(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[X:%.*]], 0
+; CHECK-NEXT:    [[SUB:%.*]] = sub nsw i32 0, [[X]]
+; CHECK-NEXT:    [[COND:%.*]] = select i1 [[CMP]], i32 [[X]], i32 [[SUB]]
+; CHECK-NEXT:    ret i32 [[COND]]
+;
+  %cmp = icmp sgt i32 %x, -1
+  %sub = sub nsw i32 0, %x
+  %cond = select i1 %cmp, i32 %x, i32 %sub
+  %cmp1 = icmp slt i32 %cond, 0
+  %sub16 = sub nsw i32 0, %cond
+  %cond18 = select i1 %cmp1, i32 %cond, i32 %sub16
+  ret i32 %cond18
+}
+
+define i32 @nabs_abs_x10(i32 %x) {
+; CHECK-LABEL: @nabs_abs_x10(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[X:%.*]], 0
+; CHECK-NEXT:    [[SUB:%.*]] = sub nsw i32 0, [[X]]
+; CHECK-NEXT:    [[COND:%.*]] = select i1 [[CMP]], i32 [[X]], i32 [[SUB]]
+; CHECK-NEXT:    ret i32 [[COND]]
+;
+  %cmp = icmp sgt i32 %x, 0
+  %sub = sub nsw i32 0, %x
+  %cond = select i1 %cmp, i32 %x, i32 %sub
+  %cmp1 = icmp slt i32 %cond, 0
+  %sub16 = sub nsw i32 0, %cond
+  %cond18 = select i1 %cmp1, i32 %cond, i32 %sub16
+  ret i32 %cond18
+}
+
+define i32 @nabs_abs_x11(i32 %x) {
+; CHECK-LABEL: @nabs_abs_x11(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[X:%.*]], 0
+; CHECK-NEXT:    [[SUB:%.*]] = sub nsw i32 0, [[X]]
+; CHECK-NEXT:    [[COND:%.*]] = select i1 [[CMP]], i32 [[X]], i32 [[SUB]]
+; CHECK-NEXT:    ret i32 [[COND]]
+;
+  %cmp = icmp slt i32 %x, 0
+  %sub = sub nsw i32 0, %x
+  %cond = select i1 %cmp, i32 %sub, i32 %x
+  %cmp1 = icmp slt i32 %cond, 0
+  %sub16 = sub nsw i32 0, %cond
+  %cond18 = select i1 %cmp1, i32 %cond, i32 %sub16
+  ret i32 %cond18
+}
+
+define i32 @nabs_abs_x12(i32 %x) {
+; CHECK-LABEL: @nabs_abs_x12(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[X:%.*]], 0
+; CHECK-NEXT:    [[SUB:%.*]] = sub nsw i32 0, [[X]]
+; CHECK-NEXT:    [[COND:%.*]] = select i1 [[CMP]], i32 [[X]], i32 [[SUB]]
+; CHECK-NEXT:    ret i32 [[COND]]
+;
+  %cmp = icmp slt i32 %x, 1
+  %sub = sub nsw i32 0, %x
+  %cond = select i1 %cmp, i32 %sub, i32 %x
+  %cmp1 = icmp slt i32 %cond, 0
+  %sub16 = sub nsw i32 0, %cond
+  %cond18 = select i1 %cmp1, i32 %cond, i32 %sub16
+  ret i32 %cond18
+}
+
+define i32 @nabs_abs_x13(i32 %x) {
+; CHECK-LABEL: @nabs_abs_x13(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[X:%.*]], 0
+; CHECK-NEXT:    [[SUB:%.*]] = sub nsw i32 0, [[X]]
+; CHECK-NEXT:    [[COND1:%.*]] = select i1 [[CMP]], i32 [[X]], i32 [[SUB]]
+; CHECK-NEXT:    ret i32 [[COND1]]
+;
+  %cmp = icmp sgt i32 %x, -1
+  %sub = sub nsw i32 0, %x
+  %cond = select i1 %cmp, i32 %x, i32 %sub
+  %cmp1 = icmp slt i32 %cond, 1
+  %sub16 = sub nsw i32 0, %cond
+  %cond18 = select i1 %cmp1, i32 %cond, i32 %sub16
+  ret i32 %cond18
+}
+
+define i32 @nabs_abs_x14(i32 %x) {
+; CHECK-LABEL: @nabs_abs_x14(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[X:%.*]], 0
+; CHECK-NEXT:    [[SUB:%.*]] = sub nsw i32 0, [[X]]
+; CHECK-NEXT:    [[COND1:%.*]] = select i1 [[CMP]], i32 [[X]], i32 [[SUB]]
+; CHECK-NEXT:    ret i32 [[COND1]]
+;
+  %cmp = icmp sgt i32 %x, 0
+  %sub = sub nsw i32 0, %x
+  %cond = select i1 %cmp, i32 %x, i32 %sub
+  %cmp1 = icmp slt i32 %cond, 1
+  %sub16 = sub nsw i32 0, %cond
+  %cond18 = select i1 %cmp1, i32 %cond, i32 %sub16
+  ret i32 %cond18
+}
+
+define i32 @nabs_abs_x15(i32 %x) {
+; CHECK-LABEL: @nabs_abs_x15(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[X:%.*]], 0
+; CHECK-NEXT:    [[SUB:%.*]] = sub nsw i32 0, [[X]]
+; CHECK-NEXT:    [[COND1:%.*]] = select i1 [[CMP]], i32 [[X]], i32 [[SUB]]
+; CHECK-NEXT:    ret i32 [[COND1]]
+;
+  %cmp = icmp slt i32 %x, 0
+  %sub = sub nsw i32 0, %x
+  %cond = select i1 %cmp, i32 %sub, i32 %x
+  %cmp1 = icmp slt i32 %cond, 1
+  %sub16 = sub nsw i32 0, %cond
+  %cond18 = select i1 %cmp1, i32 %cond, i32 %sub16
+  ret i32 %cond18
+}
+
+define i32 @nabs_abs_x16(i32 %x) {
+; CHECK-LABEL: @nabs_abs_x16(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[X:%.*]], 0
+; CHECK-NEXT:    [[SUB:%.*]] = sub nsw i32 0, [[X]]
+; CHECK-NEXT:    [[COND1:%.*]] = select i1 [[CMP]], i32 [[X]], i32 [[SUB]]
+; CHECK-NEXT:    ret i32 [[COND1]]
+;
+  %cmp = icmp slt i32 %x, 1
+  %sub = sub nsw i32 0, %x
+  %cond = select i1 %cmp, i32 %sub, i32 %x
+  %cmp1 = icmp slt i32 %cond, 1
+  %sub16 = sub nsw i32 0, %cond
+  %cond18 = select i1 %cmp1, i32 %cond, i32 %sub16
+  ret i32 %cond18
+}
+
+; nabs(abs(-x)) -> nabs(-x) -> nabs(x)
+define i32 @nabs_abs_x17(i32 %x) {
+; CHECK-LABEL: @nabs_abs_x17(
+; CHECK-NEXT:    [[SUB:%.*]] = sub nsw i32 0, [[X:%.*]]
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[X]], 0
+; CHECK-NEXT:    [[COND:%.*]] = select i1 [[CMP]], i32 [[X]], i32 [[SUB]]
+; CHECK-NEXT:    ret i32 [[COND]]
+;
+  %sub = sub nsw i32 0, %x
+  %cmp = icmp sgt i32 %sub, -1
+  %cond = select i1 %cmp, i32 %sub, i32 %x
+  %cmp1 = icmp sgt i32 %cond, -1
+  %sub16 = sub nsw i32 0, %cond
+  %cond18 = select i1 %cmp1, i32 %sub16, i32 %cond
+  ret i32 %cond18
+}
+
+; nabs(abs(x - y)) -> nabs(x - y)
+define i32 @nabs_abs_x18(i32 %x, i32 %y) {
+; CHECK-LABEL: @nabs_abs_x18(
+; CHECK-NEXT:    [[A:%.*]] = sub nsw i32 [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[A]], 0
+; CHECK-NEXT:    [[NEGA:%.*]] = sub i32 0, [[A]]
+; CHECK-NEXT:    [[COND:%.*]] = select i1 [[CMP]], i32 [[A]], i32 [[NEGA]]
+; CHECK-NEXT:    ret i32 [[COND]]
+;
+  %a = sub nsw i32 %x, %y
+  %b = sub nsw i32 %y, %x
+  %cmp = icmp sgt i32 %a, -1
+  %cond = select i1 %cmp, i32 %a, i32 %b
+  %cmp1 = icmp sgt i32 %cond, -1
+  %sub16 = sub nsw i32 0, %cond
+  %cond18 = select i1 %cmp1, i32 %sub16, i32 %cond
+  ret i32 %cond18
+}
+
+; nabs(abs(-x)) -> nabs(-x) -> nabs(x)
+define <2 x i32> @nabs_abs_x01_vec(<2 x i32> %x) {
+; CHECK-LABEL: @nabs_abs_x01_vec(
+; CHECK-NEXT:    [[SUB:%.*]] = sub nsw <2 x i32> zeroinitializer, [[X:%.*]]
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt <2 x i32> [[X]], zeroinitializer
+; CHECK-NEXT:    [[COND:%.*]] = select <2 x i1> [[CMP]], <2 x i32> [[X]], <2 x i32> [[SUB]]
+; CHECK-NEXT:    ret <2 x i32> [[COND]]
+;
+  %sub = sub nsw <2 x i32> zeroinitializer, %x
+  %cmp = icmp sgt <2 x i32> %sub, <i32 -1, i32 -1>
+  %cond = select <2 x i1> %cmp, <2 x i32> %sub, <2 x i32> %x
+  %cmp1 = icmp sgt <2 x i32> %cond, <i32 -1, i32 -1>
+  %sub16 = sub nsw <2 x i32> zeroinitializer, %cond
+  %cond18 = select <2 x i1> %cmp1, <2 x i32> %sub16, <2 x i32> %cond
+  ret <2 x i32> %cond18
+}
+
+; nabs(abs(x - y)) -> nabs(x - y)
+define <2 x i32> @nabs_abs_x02_vec(<2 x i32> %x, <2 x i32> %y) {
+; CHECK-LABEL: @nabs_abs_x02_vec(
+; CHECK-NEXT:    [[A:%.*]] = sub nsw <2 x i32> [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt <2 x i32> [[A]], zeroinitializer
+; CHECK-NEXT:    [[NEGA:%.*]] = sub <2 x i32> zeroinitializer, [[A]]
+; CHECK-NEXT:    [[COND:%.*]] = select <2 x i1> [[CMP]], <2 x i32> [[A]], <2 x i32> [[NEGA]]
+; CHECK-NEXT:    ret <2 x i32> [[COND]]
+;
+  %a = sub nsw <2 x i32> %x, %y
+  %b = sub nsw <2 x i32> %y, %x
+  %cmp = icmp sgt <2 x i32> %a, <i32 -1, i32 -1>
+  %cond = select <2 x i1> %cmp, <2 x i32> %a, <2 x i32> %b
+  %cmp1 = icmp sgt <2 x i32> %cond, <i32 -1, i32 -1>
+  %sub16 = sub nsw <2 x i32> zeroinitializer, %cond
+  %cond18 = select <2 x i1> %cmp1, <2 x i32> %sub16, <2 x i32> %cond
+  ret <2 x i32> %cond18
+}

Added: llvm/trunk/test/Transforms/InstCombine/add-sitofp.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/InstCombine/add-sitofp.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/InstCombine/add-sitofp.ll (added)
+++ llvm/trunk/test/Transforms/InstCombine/add-sitofp.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,141 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+define double @x(i32 %a, i32 %b) {
+; CHECK-LABEL: @x(
+; CHECK-NEXT:    [[M:%.*]] = lshr i32 [[A:%.*]], 24
+; CHECK-NEXT:    [[N:%.*]] = and i32 [[M]], [[B:%.*]]
+; CHECK-NEXT:    [[ADDCONV:%.*]] = add nuw nsw i32 [[N]], 1
+; CHECK-NEXT:    [[P:%.*]] = sitofp i32 [[ADDCONV]] to double
+; CHECK-NEXT:    ret double [[P]]
+;
+  %m = lshr i32 %a, 24
+  %n = and i32 %m, %b
+  %o = sitofp i32 %n to double
+  %p = fadd double %o, 1.0
+  ret double %p
+}
+
+define double @test(i32 %a) {
+; CHECK-LABEL: @test(
+; CHECK-NEXT:    [[A_AND:%.*]] = and i32 [[A:%.*]], 1073741823
+; CHECK-NEXT:    [[ADDCONV:%.*]] = add nuw nsw i32 [[A_AND]], 1
+; CHECK-NEXT:    [[RES:%.*]] = sitofp i32 [[ADDCONV]] to double
+; CHECK-NEXT:    ret double [[RES]]
+;
+  ; Drop two highest bits to guarantee that %a + 1 doesn't overflow
+  %a_and = and i32 %a, 1073741823
+  %a_and_fp = sitofp i32 %a_and to double
+  %res = fadd double %a_and_fp, 1.0
+  ret double %res
+}
+
+define float @test_neg(i32 %a) {
+; CHECK-LABEL: @test_neg(
+; CHECK-NEXT:    [[A_AND:%.*]] = and i32 [[A:%.*]], 1073741823
+; CHECK-NEXT:    [[A_AND_FP:%.*]] = sitofp i32 [[A_AND]] to float
+; CHECK-NEXT:    [[RES:%.*]] = fadd float [[A_AND_FP]], 1.000000e+00
+; CHECK-NEXT:    ret float [[RES]]
+;
+  ; Drop two highest bits to guarantee that %a + 1 doesn't overflow
+  %a_and = and i32 %a, 1073741823
+  %a_and_fp = sitofp i32 %a_and to float
+  %res = fadd float %a_and_fp, 1.0
+  ret float %res
+}
+
+define double @test_2(i32 %a, i32 %b) {
+; CHECK-LABEL: @test_2(
+; CHECK-NEXT:    [[A_AND:%.*]] = and i32 [[A:%.*]], 1073741823
+; CHECK-NEXT:    [[B_AND:%.*]] = and i32 [[B:%.*]], 1073741823
+; CHECK-NEXT:    [[ADDCONV:%.*]] = add nuw nsw i32 [[A_AND]], [[B_AND]]
+; CHECK-NEXT:    [[RES:%.*]] = sitofp i32 [[ADDCONV]] to double
+; CHECK-NEXT:    ret double [[RES]]
+;
+  ; Drop two highest bits to guarantee that %a + %b doesn't overflow
+  %a_and = and i32 %a, 1073741823
+  %b_and = and i32 %b, 1073741823
+
+  %a_and_fp = sitofp i32 %a_and to double
+  %b_and_fp = sitofp i32 %b_and to double
+
+  %res = fadd double %a_and_fp, %b_and_fp
+  ret double %res
+}
+
+define float @test_2_neg(i32 %a, i32 %b) {
+; CHECK-LABEL: @test_2_neg(
+; CHECK-NEXT:    [[A_AND:%.*]] = and i32 [[A:%.*]], 1073741823
+; CHECK-NEXT:    [[B_AND:%.*]] = and i32 [[B:%.*]], 1073741823
+; CHECK-NEXT:    [[A_AND_FP:%.*]] = sitofp i32 [[A_AND]] to float
+; CHECK-NEXT:    [[B_AND_FP:%.*]] = sitofp i32 [[B_AND]] to float
+; CHECK-NEXT:    [[RES:%.*]] = fadd float [[A_AND_FP]], [[B_AND_FP]]
+; CHECK-NEXT:    ret float [[RES]]
+;
+  ; Drop two highest bits to guarantee that %a + %b doesn't overflow
+  %a_and = and i32 %a, 1073741823
+  %b_and = and i32 %b, 1073741823
+
+  %a_and_fp = sitofp i32 %a_and to float
+  %b_and_fp = sitofp i32 %b_and to float
+
+  %res = fadd float %a_and_fp, %b_and_fp
+  ret float %res
+}
+
+; This test demonstrates overly conservative legality check. The float addition
+; can be replaced with the integer addition because the result of the operation
+; can be represented in float, but we don't do that now.
+define float @test_3(i32 %a, i32 %b) {
+; CHECK-LABEL: @test_3(
+; CHECK-NEXT:    [[M:%.*]] = lshr i32 [[A:%.*]], 24
+; CHECK-NEXT:    [[N:%.*]] = and i32 [[M]], [[B:%.*]]
+; CHECK-NEXT:    [[O:%.*]] = sitofp i32 [[N]] to float
+; CHECK-NEXT:    [[P:%.*]] = fadd float [[O]], 1.000000e+00
+; CHECK-NEXT:    ret float [[P]]
+;
+  %m = lshr i32 %a, 24
+  %n = and i32 %m, %b
+  %o = sitofp i32 %n to float
+  %p = fadd float %o, 1.0
+  ret float %p
+}
+
+define <4 x double> @test_4(<4 x i32> %a, <4 x i32> %b) {
+; CHECK-LABEL: @test_4(
+; CHECK-NEXT:    [[A_AND:%.*]] = and <4 x i32> [[A:%.*]], <i32 1073741823, i32 1073741823, i32 1073741823, i32 1073741823>
+; CHECK-NEXT:    [[B_AND:%.*]] = and <4 x i32> [[B:%.*]], <i32 1073741823, i32 1073741823, i32 1073741823, i32 1073741823>
+; CHECK-NEXT:    [[ADDCONV:%.*]] = add nuw nsw <4 x i32> [[A_AND]], [[B_AND]]
+; CHECK-NEXT:    [[RES:%.*]] = sitofp <4 x i32> [[ADDCONV]] to <4 x double>
+; CHECK-NEXT:    ret <4 x double> [[RES]]
+;
+  ; Drop two highest bits to guarantee that %a + %b doesn't overflow
+  %a_and = and <4 x i32> %a, <i32 1073741823, i32 1073741823, i32 1073741823, i32 1073741823>
+  %b_and = and <4 x i32> %b, <i32 1073741823, i32 1073741823, i32 1073741823, i32 1073741823>
+
+  %a_and_fp = sitofp <4 x i32> %a_and to <4 x double>
+  %b_and_fp = sitofp <4 x i32> %b_and to <4 x double>
+
+  %res = fadd <4 x double> %a_and_fp, %b_and_fp
+  ret <4 x double> %res
+}
+
+define <4 x float> @test_4_neg(<4 x i32> %a, <4 x i32> %b) {
+; CHECK-LABEL: @test_4_neg(
+; CHECK-NEXT:    [[A_AND:%.*]] = and <4 x i32> [[A:%.*]], <i32 1073741823, i32 1073741823, i32 1073741823, i32 1073741823>
+; CHECK-NEXT:    [[B_AND:%.*]] = and <4 x i32> [[B:%.*]], <i32 1073741823, i32 1073741823, i32 1073741823, i32 1073741823>
+; CHECK-NEXT:    [[A_AND_FP:%.*]] = sitofp <4 x i32> [[A_AND]] to <4 x float>
+; CHECK-NEXT:    [[B_AND_FP:%.*]] = sitofp <4 x i32> [[B_AND]] to <4 x float>
+; CHECK-NEXT:    [[RES:%.*]] = fadd <4 x float> [[A_AND_FP]], [[B_AND_FP]]
+; CHECK-NEXT:    ret <4 x float> [[RES]]
+;
+  ; Drop two highest bits to guarantee that %a + %b doesn't overflow
+  %a_and = and <4 x i32> %a, <i32 1073741823, i32 1073741823, i32 1073741823, i32 1073741823>
+  %b_and = and <4 x i32> %b, <i32 1073741823, i32 1073741823, i32 1073741823, i32 1073741823>
+
+  %a_and_fp = sitofp <4 x i32> %a_and to <4 x float>
+  %b_and_fp = sitofp <4 x i32> %b_and to <4 x float>
+
+  %res = fadd <4 x float> %a_and_fp, %b_and_fp
+  ret <4 x float> %res
+}

Added: llvm/trunk/test/Transforms/InstCombine/add.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/InstCombine/add.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/InstCombine/add.ll (added)
+++ llvm/trunk/test/Transforms/InstCombine/add.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,980 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+define i32 @select_0_or_1_from_bool(i1 %x) {
+; CHECK-LABEL: @select_0_or_1_from_bool(
+; CHECK-NEXT:    [[TMP1:%.*]] = xor i1 [[X:%.*]], true
+; CHECK-NEXT:    [[ADD:%.*]] = zext i1 [[TMP1]] to i32
+; CHECK-NEXT:    ret i32 [[ADD]]
+;
+  %ext = sext i1 %x to i32
+  %add = add i32 %ext, 1
+  ret i32 %add
+}
+
+define <2 x i32> @select_0_or_1_from_bool_vec(<2 x i1> %x) {
+; CHECK-LABEL: @select_0_or_1_from_bool_vec(
+; CHECK-NEXT:    [[TMP1:%.*]] = xor <2 x i1> [[X:%.*]], <i1 true, i1 true>
+; CHECK-NEXT:    [[ADD:%.*]] = zext <2 x i1> [[TMP1]] to <2 x i32>
+; CHECK-NEXT:    ret <2 x i32> [[ADD]]
+;
+  %ext = sext <2 x i1> %x to <2 x i32>
+  %add = add <2 x i32> %ext, <i32 1, i32 1>
+  ret <2 x i32> %add
+}
+
+define i32 @select_C_minus_1_or_C_from_bool(i1 %x) {
+; CHECK-LABEL: @select_C_minus_1_or_C_from_bool(
+; CHECK-NEXT:    [[ADD:%.*]] = select i1 [[X:%.*]], i32 41, i32 42
+; CHECK-NEXT:    ret i32 [[ADD]]
+;
+  %ext = sext i1 %x to i32
+  %add = add i32 %ext, 42
+  ret i32 %add
+}
+
+define <2 x i32> @select_C_minus_1_or_C_from_bool_vec(<2 x i1> %x) {
+; CHECK-LABEL: @select_C_minus_1_or_C_from_bool_vec(
+; CHECK-NEXT:    [[ADD:%.*]] = select <2 x i1> [[X:%.*]], <2 x i32> <i32 41, i32 42>, <2 x i32> <i32 42, i32 43>
+; CHECK-NEXT:    ret <2 x i32> [[ADD]]
+;
+  %ext = sext <2 x i1> %x to <2 x i32>
+  %add = add <2 x i32> %ext, <i32 42, i32 43>
+  ret <2 x i32> %add
+}
+
+; This is an 'andn' of the low bit.
+
+define i32 @flip_and_mask(i32 %x) {
+; CHECK-LABEL: @flip_and_mask(
+; CHECK-NEXT:    [[TMP1:%.*]] = and i32 [[X:%.*]], 1
+; CHECK-NEXT:    [[INC:%.*]] = xor i32 [[TMP1]], 1
+; CHECK-NEXT:    ret i32 [[INC]]
+;
+  %shl = shl i32 %x, 31
+  %shr = ashr i32 %shl, 31
+  %inc = add i32 %shr, 1
+  ret i32 %inc
+}
+
+define <2 x i8> @flip_and_mask_splat(<2 x i8> %x) {
+; CHECK-LABEL: @flip_and_mask_splat(
+; CHECK-NEXT:    [[TMP1:%.*]] = and <2 x i8> [[X:%.*]], <i8 1, i8 1>
+; CHECK-NEXT:    [[INC:%.*]] = xor <2 x i8> [[TMP1]], <i8 1, i8 1>
+; CHECK-NEXT:    ret <2 x i8> [[INC]]
+;
+  %shl = shl <2 x i8> %x, <i8 7, i8 7>
+  %shr = ashr <2 x i8> %shl, <i8 7, i8 7>
+  %inc = add <2 x i8> %shr, <i8 1, i8 1>
+  ret <2 x i8> %inc
+}
+
+define i32 @test1(i32 %A) {
+; CHECK-LABEL: @test1(
+; CHECK-NEXT:    ret i32 [[A:%.*]]
+;
+  %B = add i32 %A, 0
+  ret i32 %B
+}
+
+define i32 @test2(i32 %A) {
+; CHECK-LABEL: @test2(
+; CHECK-NEXT:    ret i32 [[A:%.*]]
+;
+  %B = add i32 %A, 5
+  %C = add i32 %B, -5
+  ret i32 %C
+}
+
+define i32 @test3(i32 %A) {
+; CHECK-LABEL: @test3(
+; CHECK-NEXT:    ret i32 [[A:%.*]]
+;
+  %B = add i32 %A, 5
+  %C = sub i32 %B, 5
+  ret i32 %C
+}
+
+; D = B + -A = B - A
+define i32 @test4(i32 %A, i32 %B) {
+; CHECK-LABEL: @test4(
+; CHECK-NEXT:    [[D:%.*]] = sub i32 [[B:%.*]], [[A:%.*]]
+; CHECK-NEXT:    ret i32 [[D]]
+;
+  %C = sub i32 0, %A
+  %D = add i32 %B, %C
+  ret i32 %D
+}
+
+; D = -A + B = B - A
+define i32 @test5(i32 %A, i32 %B) {
+; CHECK-LABEL: @test5(
+; CHECK-NEXT:    [[D:%.*]] = sub i32 [[B:%.*]], [[A:%.*]]
+; CHECK-NEXT:    ret i32 [[D]]
+;
+  %C = sub i32 0, %A
+  %D = add i32 %C, %B
+  ret i32 %D
+}
+
+define <2 x i8> @neg_op0_vec_undef_elt(<2 x i8> %a, <2 x i8> %b) {
+; CHECK-LABEL: @neg_op0_vec_undef_elt(
+; CHECK-NEXT:    [[R:%.*]] = sub <2 x i8> [[B:%.*]], [[A:%.*]]
+; CHECK-NEXT:    ret <2 x i8> [[R]]
+;
+  %nega = sub <2 x i8> <i8 0, i8 undef>, %a
+  %r = add <2 x i8> %nega, %b
+  ret <2 x i8> %r
+}
+
+define <2 x i8> @neg_neg_vec_undef_elt(<2 x i8> %a, <2 x i8> %b) {
+; CHECK-LABEL: @neg_neg_vec_undef_elt(
+; CHECK-NEXT:    [[TMP1:%.*]] = add <2 x i8> [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    [[R:%.*]] = sub <2 x i8> zeroinitializer, [[TMP1]]
+; CHECK-NEXT:    ret <2 x i8> [[R]]
+;
+  %nega = sub <2 x i8> <i8 undef, i8 0>, %a
+  %negb = sub <2 x i8> <i8 undef, i8 0>, %b
+  %r = add <2 x i8> %nega, %negb
+  ret <2 x i8> %r
+}
+
+; C = 7*A+A == 8*A == A << 3
+define i32 @test6(i32 %A) {
+; CHECK-LABEL: @test6(
+; CHECK-NEXT:    [[C:%.*]] = shl i32 [[A:%.*]], 3
+; CHECK-NEXT:    ret i32 [[C]]
+;
+  %B = mul i32 7, %A
+  %C = add i32 %B, %A
+  ret i32 %C
+}
+
+; C = A+7*A == 8*A == A << 3
+define i32 @test7(i32 %A) {
+; CHECK-LABEL: @test7(
+; CHECK-NEXT:    [[C:%.*]] = shl i32 [[A:%.*]], 3
+; CHECK-NEXT:    ret i32 [[C]]
+;
+  %B = mul i32 7, %A
+  %C = add i32 %A, %B
+  ret i32 %C
+}
+
+; (A & C1)+(B & C2) -> (A & C1)|(B & C2) iff C1&C2 == 0
+define i32 @test8(i32 %A, i32 %B) {
+; CHECK-LABEL: @test8(
+; CHECK-NEXT:    [[A1:%.*]] = and i32 [[A:%.*]], 7
+; CHECK-NEXT:    [[B1:%.*]] = and i32 [[B:%.*]], 128
+; CHECK-NEXT:    [[C:%.*]] = or i32 [[A1]], [[B1]]
+; CHECK-NEXT:    ret i32 [[C]]
+;
+  %A1 = and i32 %A, 7
+  %B1 = and i32 %B, 128
+  %C = add i32 %A1, %B1
+  ret i32 %C
+}
+
+define i32 @test9(i32 %A) {
+; CHECK-LABEL: @test9(
+; CHECK-NEXT:    [[C:%.*]] = shl i32 [[A:%.*]], 5
+; CHECK-NEXT:    ret i32 [[C]]
+;
+  %B = shl i32 %A, 4
+  %C = add i32 %B, %B
+  ret i32 %C
+}
+
+; a != -b
+define i1 @test10(i8 %a, i8 %b) {
+; CHECK-LABEL: @test10(
+; CHECK-NEXT:    [[ADD:%.*]] = sub i8 0, [[B:%.*]]
+; CHECK-NEXT:    [[C:%.*]] = icmp ne i8 [[ADD]], [[A:%.*]]
+; CHECK-NEXT:    ret i1 [[C]]
+;
+  %add = add i8 %a, %b
+  %c = icmp ne i8 %add, 0
+  ret i1 %c
+}
+
+define <2 x i1> @test10vec(<2 x i8> %a, <2 x i8> %b) {
+; CHECK-LABEL: @test10vec(
+; CHECK-NEXT:    [[C:%.*]] = sub <2 x i8> zeroinitializer, [[B:%.*]]
+; CHECK-NEXT:    [[D:%.*]] = icmp ne <2 x i8> [[C]], [[A:%.*]]
+; CHECK-NEXT:    ret <2 x i1> [[D]]
+;
+  %c = add <2 x i8> %a, %b
+  %d = icmp ne <2 x i8> %c, zeroinitializer
+  ret <2 x i1> %d
+}
+
+define i1 @test11(i8 %A) {
+; CHECK-LABEL: @test11(
+; CHECK-NEXT:    [[C:%.*]] = icmp ne i8 [[A:%.*]], 1
+; CHECK-NEXT:    ret i1 [[C]]
+;
+  %B = add i8 %A, -1
+  %c = icmp ne i8 %B, 0
+  ret i1 %c
+}
+
+define <2 x i1> @test11vec(<2 x i8> %a) {
+; CHECK-LABEL: @test11vec(
+; CHECK-NEXT:    [[C:%.*]] = icmp ne <2 x i8> [[A:%.*]], <i8 1, i8 1>
+; CHECK-NEXT:    ret <2 x i1> [[C]]
+;
+  %b = add <2 x i8> %a, <i8 -1, i8 -1>
+  %c = icmp ne <2 x i8> %b, zeroinitializer
+  ret <2 x i1> %c
+}
+
+; Should be transformed into shl A, 1?
+
+define i32 @test12(i32 %A, i32 %B) {
+; CHECK-LABEL: @test12(
+; CHECK-NEXT:    br label [[X:%.*]]
+; CHECK:       X:
+; CHECK-NEXT:    [[C_OK:%.*]] = add i32 [[B:%.*]], [[A:%.*]]
+; CHECK-NEXT:    [[D:%.*]] = add i32 [[C_OK]], [[A]]
+; CHECK-NEXT:    ret i32 [[D]]
+;
+  %C_OK = add i32 %B, %A
+  br label %X
+
+X:              ; preds = %0
+  %D = add i32 %C_OK, %A
+  ret i32 %D
+}
+
+;; TODO: shl A, 1?
+define i32 @test13(i32 %A, i32 %B, i32 %C) {
+; CHECK-LABEL: @test13(
+; CHECK-NEXT:    [[D_OK:%.*]] = add i32 [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    [[E_OK:%.*]] = add i32 [[D_OK]], [[C:%.*]]
+; CHECK-NEXT:    [[F:%.*]] = add i32 [[E_OK]], [[A]]
+; CHECK-NEXT:    ret i32 [[F]]
+;
+  %D_OK = add i32 %A, %B
+  %E_OK = add i32 %D_OK, %C
+  %F = add i32 %E_OK, %A
+  ret i32 %F
+}
+
+define i32 @test14(i32 %offset, i32 %difference) {
+; CHECK-LABEL: @test14(
+; CHECK-NEXT:    [[TMP_2:%.*]] = and i32 [[DIFFERENCE:%.*]], 3
+; CHECK-NEXT:    [[TMP_3_OK:%.*]] = add i32 [[TMP_2]], [[OFFSET:%.*]]
+; CHECK-NEXT:    [[TMP_5_MASK:%.*]] = and i32 [[DIFFERENCE]], -4
+; CHECK-NEXT:    [[TMP_8:%.*]] = add i32 [[TMP_3_OK]], [[TMP_5_MASK]]
+; CHECK-NEXT:    ret i32 [[TMP_8]]
+;
+  %tmp.2 = and i32 %difference, 3
+  %tmp.3_OK = add i32 %tmp.2, %offset
+  %tmp.5.mask = and i32 %difference, -4
+  ; == add %offset, %difference
+  %tmp.8 = add i32 %tmp.3_OK, %tmp.5.mask
+  ret i32 %tmp.8
+}
+
+; Only one bit set
+define i8 @test15(i8 %A) {
+; CHECK-LABEL: @test15(
+; CHECK-NEXT:    [[C:%.*]] = and i8 [[A:%.*]], 16
+; CHECK-NEXT:    ret i8 [[C]]
+;
+  %B = add i8 %A, -64
+  %C = and i8 %B, 16
+  ret i8 %C
+}
+
+; Only one bit set
+define i8 @test16(i8 %A) {
+; CHECK-LABEL: @test16(
+; CHECK-NEXT:    [[B:%.*]] = and i8 [[A:%.*]], 16
+; CHECK-NEXT:    [[C:%.*]] = xor i8 [[B]], 16
+; CHECK-NEXT:    ret i8 [[C]]
+;
+  %B = add i8 %A, 16
+  %C = and i8 %B, 16
+  ret i8 %C
+}
+
+define i32 @test17(i32 %A) {
+; CHECK-LABEL: @test17(
+; CHECK-NEXT:    [[C:%.*]] = sub i32 0, [[A:%.*]]
+; CHECK-NEXT:    ret i32 [[C]]
+;
+  %B = xor i32 %A, -1
+  %C = add i32 %B, 1
+  ret i32 %C
+}
+
+define i8 @test18(i8 %A) {
+; CHECK-LABEL: @test18(
+; CHECK-NEXT:    [[C:%.*]] = sub i8 16, [[A:%.*]]
+; CHECK-NEXT:    ret i8 [[C]]
+;
+  %B = xor i8 %A, -1
+  %C = add i8 %B, 17
+  ret i8 %C
+}
+
+define <2 x i64> @test18vec(<2 x i64> %A) {
+; CHECK-LABEL: @test18vec(
+; CHECK-NEXT:    [[ADD:%.*]] = sub <2 x i64> <i64 1, i64 2>, [[A:%.*]]
+; CHECK-NEXT:    ret <2 x i64> [[ADD]]
+;
+  %xor = xor <2 x i64> %A, <i64 -1, i64 -1>
+  %add = add <2 x i64> %xor, <i64 2, i64 3>
+  ret <2 x i64> %add
+}
+
+define i32 @test19(i1 %C) {
+; CHECK-LABEL: @test19(
+; CHECK-NEXT:    [[V:%.*]] = select i1 [[C:%.*]], i32 1123, i32 133
+; CHECK-NEXT:    ret i32 [[V]]
+;
+  %A = select i1 %C, i32 1000, i32 10
+  %V = add i32 %A, 123
+  ret i32 %V
+}
+
+define <2 x i32> @test19vec(i1 %C) {
+; CHECK-LABEL: @test19vec(
+; CHECK-NEXT:    [[V:%.*]] = select i1 [[C:%.*]], <2 x i32> <i32 1123, i32 1123>, <2 x i32> <i32 133, i32 133>
+; CHECK-NEXT:    ret <2 x i32> [[V]]
+;
+  %A = select i1 %C, <2 x i32> <i32 1000, i32 1000>, <2 x i32> <i32 10, i32 10>
+  %V = add <2 x i32> %A, <i32 123, i32 123>
+  ret <2 x i32> %V
+}
+
+; This is an InstSimplify fold, but test it here to make sure that
+; InstCombine does not prevent the fold.
+; With NSW, add of sign bit -> or of sign bit.
+
+define i32 @test20(i32 %x) {
+; CHECK-LABEL: @test20(
+; CHECK-NEXT:    ret i32 [[X:%.*]]
+;
+  %y = xor i32 %x, -2147483648
+  %z = add nsw i32 %y, -2147483648
+  ret i32 %z
+}
+
+define i32 @xor_sign_bit(i32 %x) {
+; CHECK-LABEL: @xor_sign_bit(
+; CHECK-NEXT:    [[ADD:%.*]] = add i32 [[X:%.*]], -2147483606
+; CHECK-NEXT:    ret i32 [[ADD]]
+;
+  %xor = xor i32 %x, 2147483648
+  %add = add i32 %xor, 42
+  ret i32 %add
+}
+
+; No-wrap info allows converting the add to 'or'.
+
+define i8 @add_nsw_signbit(i8 %x) {
+; CHECK-LABEL: @add_nsw_signbit(
+; CHECK-NEXT:    [[Y:%.*]] = or i8 [[X:%.*]], -128
+; CHECK-NEXT:    ret i8 [[Y]]
+;
+  %y = add nsw i8 %x, -128
+  ret i8 %y
+}
+
+; No-wrap info allows converting the add to 'or'.
+
+define i8 @add_nuw_signbit(i8 %x) {
+; CHECK-LABEL: @add_nuw_signbit(
+; CHECK-NEXT:    [[Y:%.*]] = or i8 [[X:%.*]], -128
+; CHECK-NEXT:    ret i8 [[Y]]
+;
+  %y = add nuw i8 %x, 128
+  ret i8 %y
+}
+
+define i32 @add_nsw_sext_add(i8 %x) {
+; CHECK-LABEL: @add_nsw_sext_add(
+; CHECK-NEXT:    [[TMP1:%.*]] = sext i8 [[X:%.*]] to i32
+; CHECK-NEXT:    [[R:%.*]] = add nsw i32 [[TMP1]], 398
+; CHECK-NEXT:    ret i32 [[R]]
+;
+  %add = add nsw i8 %x, 42
+  %ext = sext i8 %add to i32
+  %r = add i32 %ext, 356
+  ret i32 %r
+}
+
+; Negative test - extra use of the sext means increase of instructions.
+
+define i32 @add_nsw_sext_add_extra_use_1(i8 %x, i32* %p) {
+; CHECK-LABEL: @add_nsw_sext_add_extra_use_1(
+; CHECK-NEXT:    [[ADD:%.*]] = add nsw i8 [[X:%.*]], 42
+; CHECK-NEXT:    [[EXT:%.*]] = sext i8 [[ADD]] to i32
+; CHECK-NEXT:    store i32 [[EXT]], i32* [[P:%.*]], align 4
+; CHECK-NEXT:    [[R:%.*]] = add nsw i32 [[EXT]], 356
+; CHECK-NEXT:    ret i32 [[R]]
+;
+  %add = add nsw i8 %x, 42
+  %ext = sext i8 %add to i32
+  store i32 %ext, i32* %p
+  %r = add i32 %ext, 356
+  ret i32 %r
+}
+
+define <2 x i32> @add_nsw_sext_add_vec_extra_use_2(<2 x i8> %x, <2 x i8>* %p) {
+; CHECK-LABEL: @add_nsw_sext_add_vec_extra_use_2(
+; CHECK-NEXT:    [[ADD:%.*]] = add nsw <2 x i8> [[X:%.*]], <i8 42, i8 -5>
+; CHECK-NEXT:    store <2 x i8> [[ADD]], <2 x i8>* [[P:%.*]], align 2
+; CHECK-NEXT:    [[TMP1:%.*]] = sext <2 x i8> [[X]] to <2 x i32>
+; CHECK-NEXT:    [[R:%.*]] = add nsw <2 x i32> [[TMP1]], <i32 398, i32 7>
+; CHECK-NEXT:    ret <2 x i32> [[R]]
+;
+  %add = add nsw <2 x i8> %x, <i8 42, i8 -5>
+  store <2 x i8> %add, <2 x i8>* %p
+  %ext = sext <2 x i8> %add to <2 x i32>
+  %r = add <2 x i32> %ext, <i32 356, i32 12>
+  ret <2 x i32> %r
+}
+
+define <2 x i32> @add_nuw_zext_add_vec(<2 x i16> %x) {
+; CHECK-LABEL: @add_nuw_zext_add_vec(
+; CHECK-NEXT:    [[TMP1:%.*]] = zext <2 x i16> [[X:%.*]] to <2 x i32>
+; CHECK-NEXT:    [[R:%.*]] = add nsw <2 x i32> [[TMP1]], <i32 65850, i32 -7>
+; CHECK-NEXT:    ret <2 x i32> [[R]]
+;
+  %add = add nuw <2 x i16> %x, <i16 -42, i16 5>
+  %ext = zext <2 x i16> %add to <2 x i32>
+  %r = add <2 x i32> %ext, <i32 356, i32 -12>
+  ret <2 x i32> %r
+}
+
+; Negative test - extra use of the zext means increase of instructions.
+
+define i64 @add_nuw_zext_add_extra_use_1(i8 %x, i64* %p) {
+; CHECK-LABEL: @add_nuw_zext_add_extra_use_1(
+; CHECK-NEXT:    [[ADD:%.*]] = add nuw i8 [[X:%.*]], 42
+; CHECK-NEXT:    [[EXT:%.*]] = zext i8 [[ADD]] to i64
+; CHECK-NEXT:    store i64 [[EXT]], i64* [[P:%.*]], align 4
+; CHECK-NEXT:    [[R:%.*]] = add nuw nsw i64 [[EXT]], 356
+; CHECK-NEXT:    ret i64 [[R]]
+;
+  %add = add nuw i8 %x, 42
+  %ext = zext i8 %add to i64
+  store i64 %ext, i64* %p
+  %r = add i64 %ext, 356
+  ret i64 %r
+}
+
+define i64 @add_nuw_zext_add_extra_use_2(i8 %x, i8* %p) {
+; CHECK-LABEL: @add_nuw_zext_add_extra_use_2(
+; CHECK-NEXT:    [[ADD:%.*]] = add nuw i8 [[X:%.*]], 42
+; CHECK-NEXT:    store i8 [[ADD]], i8* [[P:%.*]], align 1
+; CHECK-NEXT:    [[TMP1:%.*]] = zext i8 [[X]] to i64
+; CHECK-NEXT:    [[R:%.*]] = add nuw nsw i64 [[TMP1]], -314
+; CHECK-NEXT:    ret i64 [[R]]
+;
+  %add = add nuw i8 %x, 42
+  store i8 %add, i8* %p
+  %ext = zext i8 %add to i64
+  %r = add i64 %ext, -356
+  ret i64 %r
+}
+
+define i1 @test21(i32 %x) {
+; CHECK-LABEL: @test21(
+; CHECK-NEXT:    [[Y:%.*]] = icmp eq i32 [[X:%.*]], 119
+; CHECK-NEXT:    ret i1 [[Y]]
+;
+  %t = add i32 %x, 4
+  %y = icmp eq i32 %t, 123
+  ret i1 %y
+}
+
+define <2 x i1> @test21vec(<2 x i32> %x) {
+; CHECK-LABEL: @test21vec(
+; CHECK-NEXT:    [[Y:%.*]] = icmp eq <2 x i32> [[X:%.*]], <i32 119, i32 119>
+; CHECK-NEXT:    ret <2 x i1> [[Y]]
+;
+  %t = add <2 x i32> %x, <i32 4, i32 4>
+  %y = icmp eq <2 x i32> %t, <i32 123, i32 123>
+  ret <2 x i1> %y
+}
+
+define i32 @test22(i32 %V) {
+; CHECK-LABEL: @test22(
+; CHECK-NEXT:    switch i32 [[V:%.*]], label [[DEFAULT:%.*]] [
+; CHECK-NEXT:    i32 10, label [[LAB1:%.*]]
+; CHECK-NEXT:    i32 20, label [[LAB2:%.*]]
+; CHECK-NEXT:    ]
+; CHECK:       Default:
+; CHECK-NEXT:    ret i32 123
+; CHECK:       Lab1:
+; CHECK-NEXT:    ret i32 12312
+; CHECK:       Lab2:
+; CHECK-NEXT:    ret i32 1231231
+;
+  %V2 = add i32 %V, 10
+  switch i32 %V2, label %Default [
+  i32 20, label %Lab1
+  i32 30, label %Lab2
+  ]
+
+Default:                ; preds = %0
+  ret i32 123
+
+Lab1:           ; preds = %0
+  ret i32 12312
+
+Lab2:           ; preds = %0
+  ret i32 1231231
+}
+
+define i32 @test23(i1 %C, i32 %a) {
+; CHECK-LABEL: @test23(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br i1 [[C:%.*]], label [[ENDIF:%.*]], label [[ELSE:%.*]]
+; CHECK:       else:
+; CHECK-NEXT:    br label [[ENDIF]]
+; CHECK:       endif:
+; CHECK-NEXT:    [[B_0:%.*]] = phi i32 [ 1, [[ENTRY:%.*]] ], [ 2, [[ELSE]] ]
+; CHECK-NEXT:    ret i32 [[B_0]]
+;
+entry:
+  br i1 %C, label %endif, label %else
+
+else:           ; preds = %entry
+  br label %endif
+
+endif:          ; preds = %else, %entry
+  %b.0 = phi i32 [ 0, %entry ], [ 1, %else ]
+  %tmp.4 = add i32 %b.0, 1
+  ret i32 %tmp.4
+}
+
+define i32 @test24(i32 %A) {
+; CHECK-LABEL: @test24(
+; CHECK-NEXT:    [[B:%.*]] = shl i32 [[A:%.*]], 1
+; CHECK-NEXT:    ret i32 [[B]]
+;
+  %B = add i32 %A, 1
+  %C = shl i32 %B, 1
+  %D = sub i32 %C, 2
+  ret i32 %D
+}
+
+define i64 @test25(i64 %Y) {
+; CHECK-LABEL: @test25(
+; CHECK-NEXT:    [[TMP_8:%.*]] = shl i64 [[Y:%.*]], 3
+; CHECK-NEXT:    ret i64 [[TMP_8]]
+;
+  %tmp.4 = shl i64 %Y, 2
+  %tmp.12 = shl i64 %Y, 2
+  %tmp.8 = add i64 %tmp.4, %tmp.12
+  ret i64 %tmp.8
+}
+
+define i32 @test26(i32 %A, i32 %B) {
+; CHECK-LABEL: @test26(
+; CHECK-NEXT:    ret i32 [[A:%.*]]
+;
+  %C = add i32 %A, %B
+  %D = sub i32 %C, %B
+  ret i32 %D
+}
+
+; Fold add through select.
+define i32 @test27(i1 %C, i32 %X, i32 %Y) {
+; CHECK-LABEL: @test27(
+; CHECK-NEXT:    [[C_UPGRD_1_V:%.*]] = select i1 [[C:%.*]], i32 [[X:%.*]], i32 123
+; CHECK-NEXT:    ret i32 [[C_UPGRD_1_V]]
+;
+  %A = add i32 %X, %Y
+  %B = add i32 %Y, 123
+  %C.upgrd.1 = select i1 %C, i32 %A, i32 %B
+  %D = sub i32 %C.upgrd.1, %Y
+  ret i32 %D
+}
+
+define i32 @test28(i32 %X) {
+; CHECK-LABEL: @test28(
+; CHECK-NEXT:    [[Z:%.*]] = sub i32 -1192, [[X:%.*]]
+; CHECK-NEXT:    ret i32 [[Z]]
+;
+  %Y = add i32 %X, 1234
+  %Z = sub i32 42, %Y
+  ret i32 %Z
+}
+
+define i32 @test29(i32 %x, i32 %y) {
+; CHECK-LABEL: @test29(
+; CHECK-NEXT:    [[TMP_2:%.*]] = sub i32 [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    [[TMP_7:%.*]] = and i32 [[X]], 63
+; CHECK-NEXT:    [[TMP_9:%.*]] = and i32 [[TMP_2]], -64
+; CHECK-NEXT:    [[TMP_10:%.*]] = or i32 [[TMP_7]], [[TMP_9]]
+; CHECK-NEXT:    ret i32 [[TMP_10]]
+;
+  %tmp.2 = sub i32 %x, %y
+  %tmp.2.mask = and i32 %tmp.2, 63
+  %tmp.6 = add i32 %tmp.2.mask, %y
+  %tmp.7 = and i32 %tmp.6, 63
+  %tmp.9 = and i32 %tmp.2, -64
+  %tmp.10 = or i32 %tmp.7, %tmp.9
+  ret i32 %tmp.10
+}
+
+; Add of sign bit -> xor of sign bit.
+define i64 @test30(i64 %x) {
+; CHECK-LABEL: @test30(
+; CHECK-NEXT:    ret i64 [[X:%.*]]
+;
+  %tmp.2 = xor i64 %x, -9223372036854775808
+  %tmp.4 = add i64 %tmp.2, -9223372036854775808
+  ret i64 %tmp.4
+}
+
+define i32 @test31(i32 %A) {
+; CHECK-LABEL: @test31(
+; CHECK-NEXT:    [[TMP1:%.*]] = mul i32 [[A:%.*]], 5
+; CHECK-NEXT:    ret i32 [[TMP1]]
+;
+  %B = add i32 %A, 4
+  %C = mul i32 %B, 5
+  %D = sub i32 %C, 20
+  ret i32 %D
+}
+
+define i32 @test32(i32 %A) {
+; CHECK-LABEL: @test32(
+; CHECK-NEXT:    [[B:%.*]] = shl i32 [[A:%.*]], 2
+; CHECK-NEXT:    ret i32 [[B]]
+;
+  %B = add i32 %A, 4
+  %C = shl i32 %B, 2
+  %D = sub i32 %C, 16
+  ret i32 %D
+}
+
+define i8 @test33(i8 %A) {
+; CHECK-LABEL: @test33(
+; CHECK-NEXT:    [[C:%.*]] = or i8 [[A:%.*]], 1
+; CHECK-NEXT:    ret i8 [[C]]
+;
+  %B = and i8 %A, -2
+  %C = add i8 %B, 1
+  ret i8 %C
+}
+
+define i8 @test34(i8 %A) {
+; CHECK-LABEL: @test34(
+; CHECK-NEXT:    [[C:%.*]] = and i8 [[A:%.*]], 12
+; CHECK-NEXT:    ret i8 [[C]]
+;
+  %B = add i8 %A, 64
+  %C = and i8 %B, 12
+  ret i8 %C
+}
+
+; If all bits affected by the add are included
+; in the mask, do the add before the mask op.
+
+define i8 @masked_add(i8 %x) {
+; CHECK-LABEL: @masked_add(
+; CHECK-NEXT:    [[AND1:%.*]] = add i8 [[X:%.*]], 96
+; CHECK-NEXT:    [[R:%.*]] = and i8 [[AND1]], -16
+; CHECK-NEXT:    ret i8 [[R]]
+;
+  %and = and i8 %x, 240 ; 0xf0
+  %r = add i8 %and, 96  ; 0x60
+  ret i8 %r
+}
+
+define <2 x i8> @masked_add_splat(<2 x i8> %x) {
+; CHECK-LABEL: @masked_add_splat(
+; CHECK-NEXT:    [[AND:%.*]] = and <2 x i8> [[X:%.*]], <i8 -64, i8 -64>
+; CHECK-NEXT:    [[R:%.*]] = add <2 x i8> [[AND]], <i8 64, i8 64>
+; CHECK-NEXT:    ret <2 x i8> [[R]]
+;
+  %and = and <2 x i8> %x, <i8 192, i8 192> ; 0xc0
+  %r = add <2 x i8> %and, <i8 64, i8 64>  ; 0x40
+  ret <2 x i8> %r
+}
+
+define i8 @not_masked_add(i8 %x) {
+; CHECK-LABEL: @not_masked_add(
+; CHECK-NEXT:    [[AND:%.*]] = and i8 [[X:%.*]], 112
+; CHECK-NEXT:    [[R:%.*]] = add nuw i8 [[AND]], 96
+; CHECK-NEXT:    ret i8 [[R]]
+;
+  %and = and i8 %x, 112 ; 0x70
+  %r = add i8 %and, 96  ; 0x60
+  ret i8 %r
+}
+
+define i32 @test35(i32 %a) {
+; CHECK-LABEL: @test35(
+; CHECK-NEXT:    ret i32 -1
+;
+  %tmpnot = xor i32 %a, -1
+  %tmp2 = add i32 %tmpnot, %a
+  ret i32 %tmp2
+}
+
+define i32 @test36(i32 %a) {
+; CHECK-LABEL: @test36(
+; CHECK-NEXT:    ret i32 0
+;
+  %x = and i32 %a, -2
+  %y = and i32 %a, -126
+  %z = add i32 %x, %y
+  %q = and i32 %z, 1  ; always zero
+  ret i32 %q
+}
+
+define i1 @test37(i32 %a, i32 %b) {
+; CHECK-LABEL: @test37(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[B:%.*]], 0
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %add = add i32 %a, %b
+  %cmp = icmp eq i32 %add, %a
+  ret i1 %cmp
+}
+
+define i1 @test38(i32 %a, i32 %b) {
+; CHECK-LABEL: @test38(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[A:%.*]], 0
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %add = add i32 %a, %b
+  %cmp = icmp eq i32 %add, %b
+  ret i1 %cmp
+}
+
+define i1 @test39(i32 %a, i32 %b) {
+; CHECK-LABEL: @test39(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[B:%.*]], 0
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %add = add i32 %b, %a
+  %cmp = icmp eq i32 %add, %a
+  ret i1 %cmp
+}
+
+define i1 @test40(i32 %a, i32 %b) {
+; CHECK-LABEL: @test40(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[A:%.*]], 0
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %add = add i32 %b, %a
+  %cmp = icmp eq i32 %add, %b
+  ret i1 %cmp
+}
+
+; (add (zext (add nuw X, C2)), C) --> (zext (add nuw X, C2 + C))
+
+define i64 @test41(i32 %a) {
+; CHECK-LABEL: @test41(
+; CHECK-NEXT:    [[TMP1:%.*]] = add nuw i32 [[A:%.*]], 15
+; CHECK-NEXT:    [[SUB:%.*]] = zext i32 [[TMP1]] to i64
+; CHECK-NEXT:    ret i64 [[SUB]]
+;
+  %add = add nuw i32 %a, 16
+  %zext = zext i32 %add to i64
+  %sub = add i64 %zext, -1
+  ret i64 %sub
+}
+
+; (add (zext (add nuw X, C2)), C) --> (zext (add nuw X, C2 + C))
+
+define <2 x i64> @test41vec(<2 x i32> %a) {
+; CHECK-LABEL: @test41vec(
+; CHECK-NEXT:    [[TMP1:%.*]] = add nuw <2 x i32> [[A:%.*]], <i32 15, i32 15>
+; CHECK-NEXT:    [[SUB:%.*]] = zext <2 x i32> [[TMP1]] to <2 x i64>
+; CHECK-NEXT:    ret <2 x i64> [[SUB]]
+;
+  %add = add nuw <2 x i32> %a, <i32 16, i32 16>
+  %zext = zext <2 x i32> %add to <2 x i64>
+  %sub = add <2 x i64> %zext, <i64 -1, i64 -1>
+  ret <2 x i64> %sub
+}
+
+define <2 x i64> @test41vec_and_multiuse(<2 x i32> %a) {
+; CHECK-LABEL: @test41vec_and_multiuse(
+; CHECK-NEXT:    [[ADD:%.*]] = add nuw <2 x i32> [[A:%.*]], <i32 16, i32 16>
+; CHECK-NEXT:    [[ZEXT:%.*]] = zext <2 x i32> [[ADD]] to <2 x i64>
+; CHECK-NEXT:    [[SUB:%.*]] = add nsw <2 x i64> [[ZEXT]], <i64 -1, i64 -1>
+; CHECK-NEXT:    [[EXTRAUSE:%.*]] = add nsw <2 x i64> [[SUB]], [[ZEXT]]
+; CHECK-NEXT:    ret <2 x i64> [[EXTRAUSE]]
+;
+  %add = add nuw <2 x i32> %a, <i32 16, i32 16>
+  %zext = zext <2 x i32> %add to <2 x i64>
+  %sub = add <2 x i64> %zext, <i64 -1, i64 -1>
+  %extrause = add <2 x i64> %zext, %sub
+  ret <2 x i64> %extrause
+}
+
+define i32 @test42(i1 %C) {
+; CHECK-LABEL: @test42(
+; CHECK-NEXT:    [[V:%.*]] = select i1 [[C:%.*]], i32 1123, i32 133
+; CHECK-NEXT:    ret i32 [[V]]
+;
+  %A = select i1 %C, i32 1000, i32 10
+  %V = add i32 123, %A
+  ret i32 %V
+}
+
+define <2 x i32> @test42vec(i1 %C) {
+; CHECK-LABEL: @test42vec(
+; CHECK-NEXT:    [[V:%.*]] = select i1 [[C:%.*]], <2 x i32> <i32 1123, i32 1123>, <2 x i32> <i32 133, i32 133>
+; CHECK-NEXT:    ret <2 x i32> [[V]]
+;
+  %A = select i1 %C, <2 x i32> <i32 1000, i32 1000>, <2 x i32> <i32 10, i32 10>
+  %V = add <2 x i32> <i32 123, i32 123>, %A
+  ret <2 x i32> %V
+}
+
+define <2 x i32> @test42vec2(i1 %C) {
+; CHECK-LABEL: @test42vec2(
+; CHECK-NEXT:    [[V:%.*]] = select i1 [[C:%.*]], <2 x i32> <i32 1123, i32 2833>, <2 x i32> <i32 133, i32 363>
+; CHECK-NEXT:    ret <2 x i32> [[V]]
+;
+  %A = select i1 %C, <2 x i32> <i32 1000, i32 2500>, <2 x i32> <i32 10, i32 30>
+  %V = add <2 x i32> <i32 123, i32 333>, %A
+  ret <2 x i32> %V
+}
+
+define i32 @test55(i1 %which) {
+; CHECK-LABEL: @test55(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br i1 [[WHICH:%.*]], label [[FINAL:%.*]], label [[DELAY:%.*]]
+; CHECK:       delay:
+; CHECK-NEXT:    br label [[FINAL]]
+; CHECK:       final:
+; CHECK-NEXT:    [[A:%.*]] = phi i32 [ 1123, [[ENTRY:%.*]] ], [ 133, [[DELAY]] ]
+; CHECK-NEXT:    ret i32 [[A]]
+;
+entry:
+  br i1 %which, label %final, label %delay
+
+delay:
+  br label %final
+
+final:
+  %A = phi i32 [ 1000, %entry ], [ 10, %delay ]
+  %value = add i32 123, %A
+  ret i32 %value
+}
+
+define <2 x i32> @test43vec(i1 %which) {
+; CHECK-LABEL: @test43vec(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br i1 [[WHICH:%.*]], label [[FINAL:%.*]], label [[DELAY:%.*]]
+; CHECK:       delay:
+; CHECK-NEXT:    br label [[FINAL]]
+; CHECK:       final:
+; CHECK-NEXT:    [[A:%.*]] = phi <2 x i32> [ <i32 1123, i32 1123>, [[ENTRY:%.*]] ], [ <i32 133, i32 133>, [[DELAY]] ]
+; CHECK-NEXT:    ret <2 x i32> [[A]]
+;
+entry:
+  br i1 %which, label %final, label %delay
+
+delay:
+  br label %final
+
+final:
+  %A = phi <2 x i32> [ <i32 1000, i32 1000>, %entry ], [ <i32 10, i32 10>, %delay ]
+  %value = add <2 x i32> <i32 123, i32 123>, %A
+  ret <2 x i32> %value
+}
+
+define <2 x i32> @test43vec2(i1 %which) {
+; CHECK-LABEL: @test43vec2(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br i1 [[WHICH:%.*]], label [[FINAL:%.*]], label [[DELAY:%.*]]
+; CHECK:       delay:
+; CHECK-NEXT:    br label [[FINAL]]
+; CHECK:       final:
+; CHECK-NEXT:    [[A:%.*]] = phi <2 x i32> [ <i32 1123, i32 2833>, [[ENTRY:%.*]] ], [ <i32 133, i32 363>, [[DELAY]] ]
+; CHECK-NEXT:    ret <2 x i32> [[A]]
+;
+entry:
+  br i1 %which, label %final, label %delay
+
+delay:
+  br label %final
+
+final:
+  %A = phi <2 x i32> [ <i32 1000, i32 2500>, %entry ], [ <i32 10, i32 30>, %delay ]
+  %value = add <2 x i32> <i32 123, i32 333>, %A
+  ret <2 x i32> %value
+}
+
+; E = (A + 1) + ~B = A - B
+define i32 @add_not_increment(i32 %A, i32 %B) {
+; CHECK-LABEL: @add_not_increment(
+; CHECK-NEXT:    [[E:%.*]] = sub i32 [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    ret i32 [[E]]
+;
+  %C = xor i32 %B, -1
+  %D = add i32 %A, 1
+  %E = add i32 %D, %C
+  ret i32 %E
+}
+
+; E = (A + 1) + ~B = A - B
+define <2 x i32> @add_not_increment_vec(<2 x i32> %A, <2 x i32> %B) {
+; CHECK-LABEL: @add_not_increment_vec(
+; CHECK-NEXT:    [[E:%.*]] = sub <2 x i32> [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    ret <2 x i32> [[E]]
+;
+  %C = xor <2 x i32> %B, <i32 -1, i32 -1>
+  %D = add <2 x i32> %A, <i32 1, i32 1>
+  %E = add <2 x i32> %D, %C
+  ret <2 x i32> %E
+}
+
+; E = ~B + (1 + A) = A - B
+define i32 @add_not_increment_commuted(i32 %A, i32 %B) {
+; CHECK-LABEL: @add_not_increment_commuted(
+; CHECK-NEXT:    [[E:%.*]] = sub i32 [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    ret i32 [[E]]
+;
+  %C = xor i32 %B, -1
+  %D = add i32 %A, 1
+  %E = add i32 %C, %D
+  ret i32 %E
+}
+
+; E = (A + ~B) + 1 = A - B
+define i32 @add_to_sub(i32 %M, i32 %B) {
+; CHECK-LABEL: @add_to_sub(
+; CHECK-NEXT:    [[A:%.*]] = mul i32 [[M:%.*]], 42
+; CHECK-NEXT:    [[C:%.*]] = xor i32 [[B:%.*]], -1
+; CHECK-NEXT:    [[D:%.*]] = add i32 [[A]], [[C]]
+; CHECK-NEXT:    [[E:%.*]] = add i32 [[D]], 1
+; CHECK-NEXT:    ret i32 [[E]]
+;
+  %A = mul i32 %M, 42          ; thwart complexity-based ordering
+  %C = xor i32 %B, -1
+  %D = add i32 %A, %C
+  %E = add i32 %D, 1
+  ret i32 %E
+}
+
+; E = (~B + A) + 1 = A - B
+define i32 @add_to_sub2(i32 %A, i32 %M) {
+; CHECK-LABEL: @add_to_sub2(
+; CHECK-NEXT:    [[B:%.*]] = mul i32 [[M:%.*]], 42
+; CHECK-NEXT:    [[C:%.*]] = xor i32 [[B]], -1
+; CHECK-NEXT:    [[D:%.*]] = add i32 [[C]], [[A:%.*]]
+; CHECK-NEXT:    [[E:%.*]] = add i32 [[D]], 1
+; CHECK-NEXT:    ret i32 [[E]]
+;
+  %B = mul i32 %M, 42          ; thwart complexity-based ordering
+  %C = xor i32 %B, -1
+  %D = add i32 %C, %A
+  %E = add i32 %D, 1
+  ret i32 %E
+}

Added: llvm/trunk/test/Transforms/InstCombine/add2.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/InstCombine/add2.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/InstCombine/add2.ll (added)
+++ llvm/trunk/test/Transforms/InstCombine/add2.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,474 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+define i64 @test1(i64 %A, i32 %B) {
+; CHECK-LABEL: @test1(
+; CHECK-NEXT:    [[TMP6:%.*]] = and i64 [[A:%.*]], 123
+; CHECK-NEXT:    ret i64 [[TMP6]]
+;
+  %tmp12 = zext i32 %B to i64
+  %tmp3 = shl i64 %tmp12, 32
+  %tmp5 = add i64 %tmp3, %A
+  %tmp6 = and i64 %tmp5, 123
+  ret i64 %tmp6
+}
+
+define i32 @test2(i32 %A) {
+; CHECK-LABEL: @test2(
+; CHECK-NEXT:    [[F:%.*]] = and i32 [[A:%.*]], 39
+; CHECK-NEXT:    ret i32 [[F]]
+;
+  %B = and i32 %A, 7
+  %C = and i32 %A, 32
+  %F = add i32 %B, %C
+  ret i32 %F
+}
+
+define i32 @test3(i32 %A) {
+; CHECK-LABEL: @test3(
+; CHECK-NEXT:    [[B:%.*]] = and i32 [[A:%.*]], 128
+; CHECK-NEXT:    [[C:%.*]] = lshr i32 [[A]], 30
+; CHECK-NEXT:    [[F:%.*]] = or i32 [[B]], [[C]]
+; CHECK-NEXT:    ret i32 [[F]]
+;
+  %B = and i32 %A, 128
+  %C = lshr i32 %A, 30
+  %F = add i32 %B, %C
+  ret i32 %F
+}
+
+define i32 @test4(i32 %A) {
+; CHECK-LABEL: @test4(
+; CHECK-NEXT:    [[B:%.*]] = shl nuw i32 [[A:%.*]], 1
+; CHECK-NEXT:    ret i32 [[B]]
+;
+  %B = add nuw i32 %A, %A
+  ret i32 %B
+}
+
+define <2 x i1> @test5(<2 x i1> %A, <2 x i1> %B) {
+; CHECK-LABEL: @test5(
+; CHECK-NEXT:    [[ADD:%.*]] = xor <2 x i1> [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    ret <2 x i1> [[ADD]]
+;
+  %add = add <2 x i1> %A, %B
+  ret <2 x i1> %add
+}
+
+define <2 x i64> @test6(<2 x i64> %A) {
+; CHECK-LABEL: @test6(
+; CHECK-NEXT:    [[ADD:%.*]] = mul <2 x i64> [[A:%.*]], <i64 5, i64 9>
+; CHECK-NEXT:    ret <2 x i64> [[ADD]]
+;
+  %shl = shl <2 x i64> %A, <i64 2, i64 3>
+  %add = add <2 x i64> %shl, %A
+  ret <2 x i64> %add
+}
+
+define <2 x i64> @test7(<2 x i64> %A) {
+; CHECK-LABEL: @test7(
+; CHECK-NEXT:    [[ADD:%.*]] = mul <2 x i64> [[A:%.*]], <i64 7, i64 12>
+; CHECK-NEXT:    ret <2 x i64> [[ADD]]
+;
+  %shl = shl <2 x i64> %A, <i64 2, i64 3>
+  %mul = mul <2 x i64> %A, <i64 3, i64 4>
+  %add = add <2 x i64> %shl, %mul
+  ret <2 x i64> %add
+}
+
+define i16 @test9(i16 %a) {
+; CHECK-LABEL: @test9(
+; CHECK-NEXT:    [[D:%.*]] = mul i16 [[A:%.*]], -32767
+; CHECK-NEXT:    ret i16 [[D]]
+;
+  %b = mul i16 %a, 2
+  %c = mul i16 %a, 32767
+  %d = add i16 %b, %c
+  ret i16 %d
+}
+
+; y + (~((x >> 3) & 0x55555555) + 1) -> y - ((x >> 3) & 0x55555555)
+define i32 @test10(i32 %x, i32 %y) {
+; CHECK-LABEL: @test10(
+; CHECK-NEXT:    [[SHR:%.*]] = ashr i32 [[X:%.*]], 3
+; CHECK-NEXT:    [[TMP1:%.*]] = and i32 [[SHR]], 1431655765
+; CHECK-NEXT:    [[SUB:%.*]] = sub i32 [[Y:%.*]], [[TMP1]]
+; CHECK-NEXT:    ret i32 [[SUB]]
+;
+  %shr = ashr i32 %x, 3
+  %shr.not = or i32 %shr, -1431655766
+  %neg = xor i32 %shr.not, 1431655765
+  %add = add i32 %y, 1
+  %add1 = add i32 %add, %neg
+  ret i32 %add1
+}
+
+; y + (~(x & 0x55555555) + 1) -> y - (x & 0x55555555)
+define i32 @test11(i32 %x, i32 %y) {
+; CHECK-LABEL: @test11(
+; CHECK-NEXT:    [[TMP1:%.*]] = and i32 [[X:%.*]], 1431655765
+; CHECK-NEXT:    [[SUB:%.*]] = sub i32 [[Y:%.*]], [[TMP1]]
+; CHECK-NEXT:    ret i32 [[SUB]]
+;
+  %x.not = or i32 %x, -1431655766
+  %neg = xor i32 %x.not, 1431655765
+  %add = add i32 %y, 1
+  %add1 = add i32 %add, %neg
+  ret i32 %add1
+}
+
+; (y + 1) + ~(x & 0x55555555) -> y - (x & 0x55555555)
+define i32 @test12(i32 %x, i32 %y) {
+; CHECK-LABEL: @test12(
+; CHECK-NEXT:    [[TMP1:%.*]] = and i32 [[X:%.*]], 1431655765
+; CHECK-NEXT:    [[SUB:%.*]] = sub i32 [[Y:%.*]], [[TMP1]]
+; CHECK-NEXT:    ret i32 [[SUB]]
+;
+  %add = add nsw i32 %y, 1
+  %x.not = or i32 %x, -1431655766
+  %neg = xor i32 %x.not, 1431655765
+  %add1 = add nsw i32 %add, %neg
+  ret i32 %add1
+}
+
+; y + (~(x & 0x55555556) + 1) -> y - (x & 0x55555556)
+define i32 @test13(i32 %x, i32 %y) {
+; CHECK-LABEL: @test13(
+; CHECK-NEXT:    [[TMP1:%.*]] = and i32 [[X:%.*]], 1431655766
+; CHECK-NEXT:    [[SUB:%.*]] = sub i32 [[Y:%.*]], [[TMP1]]
+; CHECK-NEXT:    ret i32 [[SUB]]
+;
+  %x.not = or i32 %x, -1431655767
+  %neg = xor i32 %x.not, 1431655766
+  %add = add i32 %y, 1
+  %add1 = add i32 %add, %neg
+  ret i32 %add1
+}
+
+; (y + 1) + ~(x & 0x55555556) -> y - (x & 0x55555556)
+define i32 @test14(i32 %x, i32 %y) {
+; CHECK-LABEL: @test14(
+; CHECK-NEXT:    [[TMP1:%.*]] = and i32 [[X:%.*]], 1431655766
+; CHECK-NEXT:    [[SUB:%.*]] = sub i32 [[Y:%.*]], [[TMP1]]
+; CHECK-NEXT:    ret i32 [[SUB]]
+;
+  %add = add nsw i32 %y, 1
+  %x.not = or i32 %x, -1431655767
+  %neg = xor i32 %x.not, 1431655766
+  %add1 = add nsw i32 %add, %neg
+  ret i32 %add1
+}
+
+; y + (~(x | 0x55555556) + 1) -> y - (x | 0x55555556)
+define i32 @test15(i32 %x, i32 %y) {
+; CHECK-LABEL: @test15(
+; CHECK-NEXT:    [[TMP1:%.*]] = or i32 [[X:%.*]], 1431655766
+; CHECK-NEXT:    [[SUB:%.*]] = sub i32 [[Y:%.*]], [[TMP1]]
+; CHECK-NEXT:    ret i32 [[SUB]]
+;
+  %x.not = and i32 %x, -1431655767
+  %neg = xor i32 %x.not, -1431655767
+  %add = add i32 %y, 1
+  %add1 = add i32 %add, %neg
+  ret i32 %add1
+}
+
+; (y + 1) + ~(x | 0x55555556) -> y - (x | 0x555555556)
+define i32 @test16(i32 %x, i32 %y) {
+; CHECK-LABEL: @test16(
+; CHECK-NEXT:    [[TMP1:%.*]] = or i32 [[X:%.*]], 1431655766
+; CHECK-NEXT:    [[SUB:%.*]] = sub i32 [[Y:%.*]], [[TMP1]]
+; CHECK-NEXT:    ret i32 [[SUB]]
+;
+  %add = add nsw i32 %y, 1
+  %x.not = and i32 %x, -1431655767
+  %neg = xor i32 %x.not, -1431655767
+  %add1 = add nsw i32 %add, %neg
+  ret i32 %add1
+}
+
+; y + (~(x | 0x55555555) + 1) -> y - (x | 0x55555555)
+define i32 @test17(i32 %x, i32 %y) {
+; CHECK-LABEL: @test17(
+; CHECK-NEXT:    [[TMP1:%.*]] = or i32 [[X:%.*]], 1431655765
+; CHECK-NEXT:    [[SUB:%.*]] = sub i32 [[Y:%.*]], [[TMP1]]
+; CHECK-NEXT:    ret i32 [[SUB]]
+;
+  %x.not = and i32 %x, -1431655766
+  %add2 = xor i32 %x.not, -1431655765
+  %add1 = add nsw i32 %add2, %y
+  ret i32 %add1
+}
+
+; (y + 1) + ~(x | 0x55555555) -> y - (x | 0x55555555)
+define i32 @test18(i32 %x, i32 %y) {
+; CHECK-LABEL: @test18(
+; CHECK-NEXT:    [[TMP1:%.*]] = or i32 [[X:%.*]], 1431655765
+; CHECK-NEXT:    [[SUB:%.*]] = sub i32 [[Y:%.*]], [[TMP1]]
+; CHECK-NEXT:    ret i32 [[SUB]]
+;
+  %add = add nsw i32 %y, 1
+  %x.not = and i32 %x, -1431655766
+  %neg = xor i32 %x.not, -1431655766
+  %add1 = add nsw i32 %add, %neg
+  ret i32 %add1
+}
+
+define i16 @add_nsw_mul_nsw(i16 %x) {
+; CHECK-LABEL: @add_nsw_mul_nsw(
+; CHECK-NEXT:    [[ADD2:%.*]] = mul nsw i16 [[X:%.*]], 3
+; CHECK-NEXT:    ret i16 [[ADD2]]
+;
+  %add1 = add nsw i16 %x, %x
+  %add2 = add nsw i16 %add1, %x
+  ret i16 %add2
+}
+
+define i16 @mul_add_to_mul_1(i16 %x) {
+; CHECK-LABEL: @mul_add_to_mul_1(
+; CHECK-NEXT:    [[ADD2:%.*]] = mul nsw i16 [[X:%.*]], 9
+; CHECK-NEXT:    ret i16 [[ADD2]]
+;
+  %mul1 = mul nsw i16 %x, 8
+  %add2 = add nsw i16 %x, %mul1
+  ret i16 %add2
+}
+
+define i16 @mul_add_to_mul_2(i16 %x) {
+; CHECK-LABEL: @mul_add_to_mul_2(
+; CHECK-NEXT:    [[ADD2:%.*]] = mul nsw i16 [[X:%.*]], 9
+; CHECK-NEXT:    ret i16 [[ADD2]]
+;
+  %mul1 = mul nsw i16 %x, 8
+  %add2 = add nsw i16 %mul1, %x
+  ret i16 %add2
+}
+
+define i16 @mul_add_to_mul_3(i16 %a) {
+; CHECK-LABEL: @mul_add_to_mul_3(
+; CHECK-NEXT:    [[ADD:%.*]] = mul i16 [[A:%.*]], 5
+; CHECK-NEXT:    ret i16 [[ADD]]
+;
+  %mul1 = mul i16 %a, 2
+  %mul2 = mul i16 %a, 3
+  %add = add nsw i16 %mul1, %mul2
+  ret i16 %add
+}
+
+define i16 @mul_add_to_mul_4(i16 %a) {
+; CHECK-LABEL: @mul_add_to_mul_4(
+; CHECK-NEXT:    [[ADD:%.*]] = mul nsw i16 [[A:%.*]], 9
+; CHECK-NEXT:    ret i16 [[ADD]]
+;
+  %mul1 = mul nsw i16 %a, 2
+  %mul2 = mul nsw i16 %a, 7
+  %add = add nsw i16 %mul1, %mul2
+  ret i16 %add
+}
+
+define i16 @mul_add_to_mul_5(i16 %a) {
+; CHECK-LABEL: @mul_add_to_mul_5(
+; CHECK-NEXT:    [[ADD:%.*]] = mul nsw i16 [[A:%.*]], 10
+; CHECK-NEXT:    ret i16 [[ADD]]
+;
+  %mul1 = mul nsw i16 %a, 3
+  %mul2 = mul nsw i16 %a, 7
+  %add = add nsw i16 %mul1, %mul2
+  ret i16 %add
+}
+
+define i32 @mul_add_to_mul_6(i32 %x, i32 %y) {
+; CHECK-LABEL: @mul_add_to_mul_6(
+; CHECK-NEXT:    [[MUL1:%.*]] = mul nsw i32 [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    [[ADD:%.*]] = mul nsw i32 [[MUL1]], 6
+; CHECK-NEXT:    ret i32 [[ADD]]
+;
+  %mul1 = mul nsw i32 %x, %y
+  %mul2 = mul nsw i32 %mul1, 5
+  %add = add nsw i32 %mul1, %mul2
+  ret i32 %add
+}
+
+define i16 @mul_add_to_mul_7(i16 %x) {
+; CHECK-LABEL: @mul_add_to_mul_7(
+; CHECK-NEXT:    [[ADD2:%.*]] = shl i16 [[X:%.*]], 15
+; CHECK-NEXT:    ret i16 [[ADD2]]
+;
+  %mul1 = mul nsw i16 %x, 32767
+  %add2 = add nsw i16 %x, %mul1
+  ret i16 %add2
+}
+
+define i16 @mul_add_to_mul_8(i16 %a) {
+; CHECK-LABEL: @mul_add_to_mul_8(
+; CHECK-NEXT:    [[ADD:%.*]] = mul nsw i16 [[A:%.*]], 32767
+; CHECK-NEXT:    ret i16 [[ADD]]
+;
+  %mul1 = mul nsw i16 %a, 16383
+  %mul2 = mul nsw i16 %a, 16384
+  %add = add nsw i16 %mul1, %mul2
+  ret i16 %add
+}
+
+define i16 @mul_add_to_mul_9(i16 %a) {
+; CHECK-LABEL: @mul_add_to_mul_9(
+; CHECK-NEXT:    [[ADD:%.*]] = shl i16 [[A:%.*]], 15
+; CHECK-NEXT:    ret i16 [[ADD]]
+;
+  %mul1 = mul nsw i16 %a, 16384
+  %mul2 = mul nsw i16 %a, 16384
+  %add = add nsw i16 %mul1, %mul2
+  ret i16 %add
+}
+
+; This test and the next test verify that when a range metadata is attached to
+; llvm.cttz, ValueTracking correctly intersects the range specified by the
+; metadata and the range implied by the intrinsic.
+;
+; In this test, the range specified by the metadata is more strict. Therefore,
+; ValueTracking uses that range.
+define i16 @add_cttz(i16 %a) {
+; CHECK-LABEL: @add_cttz(
+; CHECK-NEXT:    [[CTTZ:%.*]] = call i16 @llvm.cttz.i16(i16 [[A:%.*]], i1 true), !range !0
+; CHECK-NEXT:    [[B:%.*]] = or i16 [[CTTZ]], -8
+; CHECK-NEXT:    ret i16 [[B]]
+;
+  ; llvm.cttz.i16(..., /*is_zero_undefined=*/true) implies the value returned
+  ; is in [0, 16). The range metadata indicates the value returned is in [0, 8).
+  ; Intersecting these ranges, we know the value returned is in [0, 8).
+  ; Therefore, InstCombine will transform
+  ;     add %cttz, 1111 1111 1111 1000 ; decimal -8
+  ; to
+  ;     or  %cttz, 1111 1111 1111 1000
+  %cttz = call i16 @llvm.cttz.i16(i16 %a, i1 true), !range !0
+  %b = add i16 %cttz, -8
+  ret i16 %b
+}
+declare i16 @llvm.cttz.i16(i16, i1)
+!0 = !{i16 0, i16 8}
+
+; Similar to @add_cttz, but in this test, the range implied by the
+; intrinsic is more strict. Therefore, ValueTracking uses that range.
+define i16 @add_cttz_2(i16 %a) {
+; CHECK-LABEL: @add_cttz_2(
+; CHECK-NEXT:    [[CTTZ:%.*]] = call i16 @llvm.cttz.i16(i16 [[A:%.*]], i1 true), !range !1
+; CHECK-NEXT:    [[B:%.*]] = or i16 [[CTTZ]], -16
+; CHECK-NEXT:    ret i16 [[B]]
+;
+  ; llvm.cttz.i16(..., /*is_zero_undefined=*/true) implies the value returned
+  ; is in [0, 16). The range metadata indicates the value returned is in
+  ; [0, 32). Intersecting these ranges, we know the value returned is in
+  ; [0, 16). Therefore, InstCombine will transform
+  ;     add %cttz, 1111 1111 1111 0000 ; decimal -16
+  ; to
+  ;     or  %cttz, 1111 1111 1111 0000
+  %cttz = call i16 @llvm.cttz.i16(i16 %a, i1 true), !range !1
+  %b = add i16 %cttz, -16
+  ret i16 %b
+}
+!1 = !{i16 0, i16 32}
+
+define i32 @add_or_and(i32 %x, i32 %y) {
+; CHECK-LABEL: @add_or_and(
+; CHECK-NEXT:    [[ADD:%.*]] = add i32 [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    ret i32 [[ADD]]
+;
+  %or = or i32 %x, %y
+  %and = and i32 %x, %y
+  %add = add i32 %or, %and
+  ret i32 %add
+}
+
+define i32 @add_or_and_commutative(i32 %x, i32 %y) {
+; CHECK-LABEL: @add_or_and_commutative(
+; CHECK-NEXT:    [[ADD:%.*]] = add i32 [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    ret i32 [[ADD]]
+;
+  %or = or i32 %x, %y
+  %and = and i32 %y, %x ; swapped
+  %add = add i32 %or, %and
+  ret i32 %add
+}
+
+define i32 @add_and_or(i32 %x, i32 %y) {
+; CHECK-LABEL: @add_and_or(
+; CHECK-NEXT:    [[ADD:%.*]] = add i32 [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    ret i32 [[ADD]]
+;
+  %or = or i32 %x, %y
+  %and = and i32 %x, %y
+  %add = add i32 %and, %or
+  ret i32 %add
+}
+
+define i32 @add_and_or_commutative(i32 %x, i32 %y) {
+; CHECK-LABEL: @add_and_or_commutative(
+; CHECK-NEXT:    [[ADD:%.*]] = add i32 [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    ret i32 [[ADD]]
+;
+  %or = or i32 %x, %y
+  %and = and i32 %y, %x ; swapped
+  %add = add i32 %and, %or
+  ret i32 %add
+}
+
+define i32 @add_nsw_or_and(i32 %x, i32 %y) {
+; CHECK-LABEL: @add_nsw_or_and(
+; CHECK-NEXT:    [[ADD:%.*]] = add nsw i32 [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    ret i32 [[ADD]]
+;
+  %or = or i32 %x, %y
+  %and = and i32 %x, %y
+  %add = add nsw i32 %or, %and
+  ret i32 %add
+}
+
+define i32 @add_nuw_or_and(i32 %x, i32 %y) {
+; CHECK-LABEL: @add_nuw_or_and(
+; CHECK-NEXT:    [[ADD:%.*]] = add nuw i32 [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    ret i32 [[ADD]]
+;
+  %or = or i32 %x, %y
+  %and = and i32 %x, %y
+  %add = add nuw i32 %or, %and
+  ret i32 %add
+}
+
+define i32 @add_nuw_nsw_or_and(i32 %x, i32 %y) {
+; CHECK-LABEL: @add_nuw_nsw_or_and(
+; CHECK-NEXT:    [[ADD:%.*]] = add nuw nsw i32 [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    ret i32 [[ADD]]
+;
+  %or = or i32 %x, %y
+  %and = and i32 %x, %y
+  %add = add nsw nuw i32 %or, %and
+  ret i32 %add
+}
+
+; A *nsw B + A *nsw C != A *nsw (B + C)
+; e.g. A = -1, B = 1, C = INT_SMAX
+
+define i8 @add_of_mul(i8 %x, i8 %y, i8 %z) {
+; CHECK-LABEL: @add_of_mul(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[MB1:%.*]] = add i8 [[Y:%.*]], [[Z:%.*]]
+; CHECK-NEXT:    [[SUM:%.*]] = mul i8 [[MB1]], [[X:%.*]]
+; CHECK-NEXT:    ret i8 [[SUM]]
+;
+  entry:
+  %mA = mul nsw i8 %x, %y
+  %mB = mul nsw i8 %x, %z
+  %sum = add nsw i8 %mA, %mB
+  ret i8 %sum
+}
+
+define i32 @add_of_selects(i1 %A, i32 %B) {
+; CHECK-LABEL: @add_of_selects(
+; CHECK-NEXT:    [[ADD:%.*]] = select i1 [[A:%.*]], i32 [[B:%.*]], i32 0
+; CHECK-NEXT:    ret i32 [[ADD]]
+;
+  %sel0 = select i1 %A, i32 0, i32 -2
+  %sel1 = select i1 %A, i32 %B, i32 2
+  %add = add i32 %sel0, %sel1
+  ret i32 %add
+}

Added: llvm/trunk/test/Transforms/InstCombine/add3.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/InstCombine/add3.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/InstCombine/add3.ll (added)
+++ llvm/trunk/test/Transforms/InstCombine/add3.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,21 @@
+; RUN: opt < %s -instcombine -S | grep inttoptr | count 2
+
+;; Target triple for gep raising case below.
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+target triple = "i686-apple-darwin8"
+
+; PR1795
+define void @test2(i32 %.val24) {
+EntryBlock:
+        add i32 %.val24, -12
+        inttoptr i32 %0 to i32*
+        store i32 1, i32* %1
+        add i32 %.val24, -16
+        inttoptr i32 %2 to i32*
+        getelementptr i32, i32* %3, i32 1
+        load i32, i32* %4
+        tail call i32 @callee( i32 %5 )
+        ret void
+}
+
+declare i32 @callee(i32)

Added: llvm/trunk/test/Transforms/InstCombine/add4.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/InstCombine/add4.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/InstCombine/add4.ll (added)
+++ llvm/trunk/test/Transforms/InstCombine/add4.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,94 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+; ModuleID = 'test/Transforms/InstCombine/add4.ll'
+source_filename = "test/Transforms/InstCombine/add4.ll"
+
+define i64 @match_unsigned(i64 %x) {
+; CHECK-LABEL: @match_unsigned(
+; CHECK-NEXT:    bb:
+; CHECK-NEXT:    [[UREM:%.*]] = urem i64 [[X:%.*]], 19136
+; CHECK-NEXT:    ret i64 [[UREM]]
+;
+bb:
+  %tmp = urem i64 %x, 299
+  %tmp1 = udiv i64 %x, 299
+  %tmp2 = urem i64 %tmp1, 64
+  %tmp3 = mul i64 %tmp2, 299
+  %tmp4 = add i64 %tmp, %tmp3
+  ret i64 %tmp4
+}
+
+define i64 @match_andAsRem_lshrAsDiv_shlAsMul(i64 %x) {
+; CHECK-LABEL: @match_andAsRem_lshrAsDiv_shlAsMul(
+; CHECK-NEXT:    bb:
+; CHECK-NEXT:    [[UREM:%.*]] = urem i64 [[X:%.*]], 576
+; CHECK-NEXT:    ret i64 [[UREM]]
+;
+bb:
+  %tmp = and i64 %x, 63
+  %tmp1 = lshr i64 %x, 6
+  %tmp2 = urem i64 %tmp1, 9
+  %tmp3 = shl i64 %tmp2, 6
+  %tmp4 = add i64 %tmp, %tmp3
+  ret i64 %tmp4
+}
+
+define i64 @match_signed(i64 %x) {
+; CHECK-LABEL: @match_signed(
+; CHECK-NEXT:    bb:
+; CHECK-NEXT:    [[SREM1:%.*]] = srem i64 [[X:%.*]], 172224
+; CHECK-NEXT:    ret i64 [[SREM1]]
+;
+bb:
+  %tmp = srem i64 %x, 299
+  %tmp1 = sdiv i64 %x, 299
+  %tmp2 = srem i64 %tmp1, 64
+  %tmp3 = sdiv i64 %x, 19136
+  %tmp4 = srem i64 %tmp3, 9
+  %tmp5 = mul i64 %tmp2, 299
+  %tmp6 = add i64 %tmp, %tmp5
+  %tmp7 = mul i64 %tmp4, 19136
+  %tmp8 = add i64 %tmp6, %tmp7
+  ret i64 %tmp8
+}
+
+define i64 @not_match_inconsistent_signs(i64 %x) {
+; CHECK-LABEL: @not_match_inconsistent_signs(
+; CHECK:         [[TMP:%.*]] = add
+; CHECK-NEXT:    ret i64 [[TMP]]
+;
+bb:
+  %tmp = urem i64 %x, 299
+  %tmp1 = sdiv i64 %x, 299
+  %tmp2 = urem i64 %tmp1, 64
+  %tmp3 = mul i64 %tmp2, 299
+  %tmp4 = add i64 %tmp, %tmp3
+  ret i64 %tmp4
+}
+
+define i64 @not_match_inconsistent_values(i64 %x) {
+; CHECK-LABEL: @not_match_inconsistent_values(
+; CHECK:         [[TMP:%.*]] = add
+; CHECK-NEXT:    ret i64 [[TMP]]
+;
+bb:
+  %tmp = urem i64 %x, 299
+  %tmp1 = udiv i64 %x, 29
+  %tmp2 = urem i64 %tmp1, 64
+  %tmp3 = mul i64 %tmp2, 299
+  %tmp4 = add i64 %tmp, %tmp3
+  ret i64 %tmp4
+}
+
+define i32 @not_match_overflow(i32 %x) {
+; CHECK-LABEL: @not_match_overflow(
+; CHECK:         [[TMP:%.*]] = add
+; CHECK-NEXT:    ret i32 [[TMP]]
+;
+bb:
+  %tmp = urem i32 %x, 299
+  %tmp1 = udiv i32 %x,299
+  %tmp2 = urem i32 %tmp1, 147483647
+  %tmp3 = mul i32 %tmp2, 299
+  %tmp4 = add i32 %tmp, %tmp3
+  ret i32 %tmp4
+}

Added: llvm/trunk/test/Transforms/InstCombine/addnegneg.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/InstCombine/addnegneg.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/InstCombine/addnegneg.ll (added)
+++ llvm/trunk/test/Transforms/InstCombine/addnegneg.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,11 @@
+; RUN: opt < %s -instcombine -S | grep " sub " | count 1
+; PR2047
+
+define i32 @l(i32 %a, i32 %b, i32 %c, i32 %d) {
+entry:
+	%b.neg = sub i32 0, %b		; <i32> [#uses=1]
+	%c.neg = sub i32 0, %c		; <i32> [#uses=1]
+	%sub4 = add i32 %c.neg, %b.neg		; <i32> [#uses=1]
+	%sub6 = add i32 %sub4, %d		; <i32> [#uses=1]
+	ret i32 %sub6
+}

Added: llvm/trunk/test/Transforms/InstCombine/addrspacecast.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/InstCombine/addrspacecast.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/InstCombine/addrspacecast.ll (added)
+++ llvm/trunk/test/Transforms/InstCombine/addrspacecast.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,186 @@
+; RUN: opt -instcombine -S < %s | FileCheck %s
+
+target datalayout = "e-p:64:64:64-p1:32:32:32-p2:16:16:16-n8:16:32:64"
+
+
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8*, i8*, i32, i1) nounwind
+declare void @llvm.memcpy.p0i8.p1i8.i32(i8*, i8 addrspace(1)*, i32, i1) nounwind
+declare void @llvm.memcpy.p0i8.p2i8.i32(i8*, i8 addrspace(2)*, i32, i1) nounwind
+
+
+define i32* @combine_redundant_addrspacecast(i32 addrspace(1)* %x) nounwind {
+; CHECK-LABEL: @combine_redundant_addrspacecast(
+; CHECK: addrspacecast i32 addrspace(1)* %x to i32*
+; CHECK-NEXT: ret
+  %y = addrspacecast i32 addrspace(1)* %x to i32 addrspace(3)*
+  %z = addrspacecast i32 addrspace(3)* %y to i32*
+  ret i32* %z
+}
+
+define <4 x i32*> @combine_redundant_addrspacecast_vector(<4 x i32 addrspace(1)*> %x) nounwind {
+; CHECK-LABEL: @combine_redundant_addrspacecast_vector(
+; CHECK: addrspacecast <4 x i32 addrspace(1)*> %x to <4 x i32*>
+; CHECK-NEXT: ret
+  %y = addrspacecast <4 x i32 addrspace(1)*> %x to <4 x i32 addrspace(3)*>
+  %z = addrspacecast <4 x i32 addrspace(3)*> %y to <4 x i32*>
+  ret <4 x i32*> %z
+}
+
+define float* @combine_redundant_addrspacecast_types(i32 addrspace(1)* %x) nounwind {
+; CHECK-LABEL: @combine_redundant_addrspacecast_types(
+; CHECK-NEXT: bitcast i32 addrspace(1)* %x to float addrspace(1)*
+; CHECK-NEXT: addrspacecast float addrspace(1)* %1 to float*
+; CHECK-NEXT: ret
+  %y = addrspacecast i32 addrspace(1)* %x to i32 addrspace(3)*
+  %z = addrspacecast i32 addrspace(3)* %y to float*
+  ret float* %z
+}
+
+define <4 x float*> @combine_redundant_addrspacecast_types_vector(<4 x i32 addrspace(1)*> %x) nounwind {
+; CHECK-LABEL: @combine_redundant_addrspacecast_types_vector(
+; CHECK-NEXT: bitcast <4 x i32 addrspace(1)*> %x to <4 x float addrspace(1)*>
+; CHECK-NEXT: addrspacecast <4 x float addrspace(1)*> %1 to <4 x float*>
+; CHECK-NEXT: ret
+  %y = addrspacecast <4 x i32 addrspace(1)*> %x to <4 x i32 addrspace(3)*>
+  %z = addrspacecast <4 x i32 addrspace(3)*> %y to <4 x float*>
+  ret <4 x float*> %z
+}
+
+define float addrspace(2)* @combine_addrspacecast_bitcast_1(i32 addrspace(1)* %x) nounwind {
+; CHECK-LABEL: @combine_addrspacecast_bitcast_1(
+; CHECK-NEXT: bitcast i32 addrspace(1)* %x to float addrspace(1)*
+; CHECK-NEXT: addrspacecast float addrspace(1)* %1 to float addrspace(2)*
+; CHECK-NEXT: ret
+  %y = addrspacecast i32 addrspace(1)* %x to i32 addrspace(2)*
+  %z = bitcast i32 addrspace(2)* %y to float addrspace(2)*
+  ret float addrspace(2)* %z
+}
+
+define i32 addrspace(2)* @combine_addrspacecast_bitcast_2(i32 addrspace(1)* %x) nounwind {
+; CHECK-LABEL: @combine_addrspacecast_bitcast_2(
+; CHECK: addrspacecast i32 addrspace(1)* %x to i32 addrspace(2)*
+; CHECK-NEXT: ret
+  %y = addrspacecast i32 addrspace(1)* %x to float addrspace(2)*
+  %z = bitcast float addrspace(2)* %y to i32 addrspace(2)*
+  ret i32 addrspace(2)* %z
+}
+
+define i32 addrspace(2)* @combine_bitcast_addrspacecast_1(i32 addrspace(1)* %x) nounwind {
+; CHECK-LABEL: @combine_bitcast_addrspacecast_1(
+; CHECK: addrspacecast i32 addrspace(1)* %x to i32 addrspace(2)*
+; CHECK-NEXT: ret
+  %y = bitcast i32 addrspace(1)* %x to i8 addrspace(1)*
+  %z = addrspacecast i8 addrspace(1)* %y to i32 addrspace(2)*
+  ret i32 addrspace(2)* %z
+}
+
+define float addrspace(2)* @combine_bitcast_addrspacecast_2(i32 addrspace(1)* %x) nounwind {
+; CHECK-LABEL: @combine_bitcast_addrspacecast_2(
+; CHECK: bitcast i32 addrspace(1)* %x to float addrspace(1)*
+; CHECK: addrspacecast float addrspace(1)* %1 to float addrspace(2)*
+; CHECK-NEXT: ret
+  %y = bitcast i32 addrspace(1)* %x to i8 addrspace(1)*
+  %z = addrspacecast i8 addrspace(1)* %y to float addrspace(2)*
+  ret float addrspace(2)* %z
+}
+
+define float addrspace(2)* @combine_addrspacecast_types(i32 addrspace(1)* %x) nounwind {
+; CHECK-LABEL: @combine_addrspacecast_types(
+; CHECK-NEXT: bitcast i32 addrspace(1)* %x to float addrspace(1)*
+; CHECK-NEXT: addrspacecast float addrspace(1)* %1 to float addrspace(2)*
+; CHECK-NEXT: ret
+  %y = addrspacecast i32 addrspace(1)* %x to float addrspace(2)*
+  ret float addrspace(2)* %y
+}
+
+define <4 x float addrspace(2)*> @combine_addrspacecast_types_vector(<4 x i32 addrspace(1)*> %x) nounwind {
+; CHECK-LABEL: @combine_addrspacecast_types_vector(
+; CHECK-NEXT: bitcast <4 x i32 addrspace(1)*> %x to <4 x float addrspace(1)*>
+; CHECK-NEXT: addrspacecast <4 x float addrspace(1)*> %1 to <4 x float addrspace(2)*>
+; CHECK-NEXT: ret
+  %y = addrspacecast <4 x i32 addrspace(1)*> %x to <4 x float addrspace(2)*>
+  ret <4 x float addrspace(2)*> %y
+}
+
+define i32 @canonicalize_addrspacecast([16 x i32] addrspace(1)* %arr) {
+; CHECK-LABEL: @canonicalize_addrspacecast(
+; CHECK-NEXT: getelementptr inbounds [16 x i32], [16 x i32] addrspace(1)* %arr, i32 0, i32 0
+; CHECK-NEXT: addrspacecast i32 addrspace(1)* %{{[a-zA-Z0-9]+}} to i32*
+; CHECK-NEXT: load i32, i32*
+; CHECK-NEXT: ret i32
+  %p = addrspacecast [16 x i32] addrspace(1)* %arr to i32*
+  %v = load i32, i32* %p
+  ret i32 %v
+}
+
+ at const_array = addrspace(2) constant [60 x i8] [i8 2, i8 9, i8 4, i8 22, i8 2, i8 9, i8 4, i8 22, i8 2, i8 9, i8 4, i8 22,
+                                                i8 2, i8 9, i8 4, i8 22, i8 2, i8 9, i8 4, i8 22, i8 2, i8 9, i8 4, i8 22,
+                                                i8 2, i8 9, i8 4, i8 22, i8 2, i8 9, i8 4, i8 22, i8 2, i8 9, i8 4, i8 22,
+                                                i8 2, i8 9, i8 4, i8 22, i8 2, i8 9, i8 4, i8 22, i8 2, i8 9, i8 4, i8 22,
+                                                i8 2, i8 9, i8 4, i8 22, i8 2, i8 9, i8 4, i8 22, i8 2, i8 9, i8 4, i8 22 ]
+
+declare void @foo(i8*) nounwind
+
+; A copy from a constant addrspacecast'ed global
+; CHECK-LABEL: @memcpy_addrspacecast(
+; CHECK-NOT:  call void @llvm.memcpy
+define i32 @memcpy_addrspacecast() nounwind {
+entry:
+  %alloca = alloca i8, i32 48
+  call void @llvm.memcpy.p0i8.p1i8.i32(i8* align 4 %alloca, i8 addrspace(1)* align 4 addrspacecast (i8 addrspace(2)* getelementptr inbounds ([60 x i8], [60 x i8] addrspace(2)* @const_array, i16 0, i16 4) to i8 addrspace(1)*), i32 48, i1 false) nounwind
+  br label %loop.body
+
+loop.body:
+  %i = phi i32 [ 0, %entry ], [ %i.inc, %loop.body ]
+  %sum = phi i32 [ 0, %entry ], [ %sum.inc, %loop.body]
+  %ptr = getelementptr i8, i8* %alloca, i32 %i
+  %load = load i8, i8* %ptr
+  %ext = zext i8 %load to i32
+  %sum.inc = add i32 %sum, %ext
+  %i.inc = add i32 %i, 1
+  %cmp = icmp ne i32 %i, 48
+  br i1 %cmp, label %loop.body, label %end
+
+end:
+  ret i32 %sum.inc
+}
+
+; CHECK-LABEL: @constant_fold_null(
+; CHECK: i32 addrspace(3)* null to i32 addrspace(4)*
+define void @constant_fold_null() #0 {
+  %cast = addrspacecast i32 addrspace(3)* null to i32 addrspace(4)*
+  store i32 7, i32 addrspace(4)* %cast
+  ret void
+}
+
+; CHECK-LABEL: @constant_fold_undef(
+; CHECK: ret i32 addrspace(4)* undef
+define i32 addrspace(4)* @constant_fold_undef() #0 {
+  %cast = addrspacecast i32 addrspace(3)* undef to i32 addrspace(4)*
+  ret i32 addrspace(4)* %cast
+}
+
+; CHECK-LABEL: @constant_fold_null_vector(
+; CHECK: addrspacecast (<4 x i32 addrspace(3)*> zeroinitializer to <4 x i32 addrspace(4)*>)
+define <4 x i32 addrspace(4)*> @constant_fold_null_vector() #0 {
+  %cast = addrspacecast <4 x i32 addrspace(3)*> zeroinitializer to <4 x i32 addrspace(4)*>
+  ret <4 x i32 addrspace(4)*> %cast
+}
+
+; CHECK-LABEL: @constant_fold_inttoptr(
+; CHECK: addrspacecast (i32 addrspace(3)* inttoptr (i32 -1 to i32 addrspace(3)*) to i32 addrspace(4)*)
+define void @constant_fold_inttoptr() #0 {
+  %cast = addrspacecast i32 addrspace(3)* inttoptr (i32 -1 to i32 addrspace(3)*) to i32 addrspace(4)*
+  store i32 7, i32 addrspace(4)* %cast
+  ret void
+}
+
+; CHECK-LABEL: @constant_fold_gep_inttoptr(
+; CHECK: addrspacecast (i32 addrspace(3)* inttoptr (i64 1274 to i32 addrspace(3)*) to i32 addrspace(4)*)
+define void @constant_fold_gep_inttoptr() #0 {
+  %k = inttoptr i32 1234 to i32 addrspace(3)*
+  %gep = getelementptr i32, i32 addrspace(3)* %k, i32 10
+  %cast = addrspacecast i32 addrspace(3)* %gep to i32 addrspace(4)*
+  store i32 7, i32 addrspace(4)* %cast
+  ret void
+}

Added: llvm/trunk/test/Transforms/InstCombine/adjust-for-minmax.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/InstCombine/adjust-for-minmax.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/InstCombine/adjust-for-minmax.ll (added)
+++ llvm/trunk/test/Transforms/InstCombine/adjust-for-minmax.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,486 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+; Instcombine should recognize that this code can be adjusted to fit the canonical max/min pattern.
+
+; No change
+
+define i32 @smax1(i32 %n) {
+; CHECK-LABEL: @smax1(
+; CHECK-NEXT:    [[T:%.*]] = icmp sgt i32 %n, 0
+; CHECK-NEXT:    [[M:%.*]] = select i1 [[T]], i32 %n, i32 0
+; CHECK-NEXT:    ret i32 [[M]]
+;
+  %t = icmp sgt i32 %n, 0
+  %m = select i1 %t, i32 %n, i32 0
+  ret i32 %m
+}
+
+; No change
+
+define i32 @smin1(i32 %n) {
+; CHECK-LABEL: @smin1(
+; CHECK-NEXT:    [[T:%.*]] = icmp slt i32 %n, 0
+; CHECK-NEXT:    [[M:%.*]] = select i1 [[T]], i32 %n, i32 0
+; CHECK-NEXT:    ret i32 [[M]]
+;
+  %t = icmp slt i32 %n, 0
+  %m = select i1 %t, i32 %n, i32 0
+  ret i32 %m
+}
+
+; Canonicalize min/max.
+
+define i32 @smax2(i32 %n) {
+; CHECK-LABEL: @smax2(
+; CHECK-NEXT:    [[T:%.*]] = icmp sgt i32 %n, 0
+; CHECK-NEXT:    [[M:%.*]] = select i1 [[T]], i32 %n, i32 0
+; CHECK-NEXT:    ret i32 [[M]]
+;
+  %t = icmp sge i32 %n, 1
+  %m = select i1 %t, i32 %n, i32 0
+  ret i32 %m
+}
+
+; Canonicalize min/max.
+
+define i32 @smin2(i32 %n) {
+; CHECK-LABEL: @smin2(
+; CHECK-NEXT:    [[T:%.*]] = icmp slt i32 %n, 0
+; CHECK-NEXT:    [[M:%.*]] = select i1 [[T]], i32 %n, i32 0
+; CHECK-NEXT:    ret i32 [[M]]
+;
+  %t = icmp sle i32 %n, -1
+  %m = select i1 %t, i32 %n, i32 0
+  ret i32 %m
+}
+
+; Canonicalize min/max.
+
+define i32 @smax3(i32 %n) {
+; CHECK-LABEL: @smax3(
+; CHECK-NEXT:    [[T:%.*]] = icmp sgt i32 %n, 0
+; CHECK-NEXT:    [[M:%.*]] = select i1 [[T]], i32 %n, i32 0
+; CHECK-NEXT:    ret i32 [[M]]
+;
+  %t = icmp sgt i32 %n, -1
+  %m = select i1 %t, i32 %n, i32 0
+  ret i32 %m
+}
+
+; Canonicalize min/max.
+
+define <2 x i32> @smax3_vec(<2 x i32> %n) {
+; CHECK-LABEL: @smax3_vec(
+; CHECK-NEXT:    [[T:%.*]] = icmp sgt <2 x i32> %n, zeroinitializer
+; CHECK-NEXT:    [[M:%.*]] = select <2 x i1> [[T]], <2 x i32> %n, <2 x i32> zeroinitializer
+; CHECK-NEXT:    ret <2 x i32> [[M]]
+;
+  %t = icmp sgt <2 x i32> %n, <i32 -1, i32 -1>
+  %m = select <2 x i1> %t, <2 x i32> %n, <2 x i32> zeroinitializer
+  ret <2 x i32> %m
+}
+
+; Canonicalize min/max.
+
+define i32 @smin3(i32 %n) {
+; CHECK-LABEL: @smin3(
+; CHECK-NEXT:    [[T:%.*]] = icmp slt i32 %n, 0
+; CHECK-NEXT:    [[M:%.*]] = select i1 [[T]], i32 %n, i32 0
+; CHECK-NEXT:    ret i32 [[M]]
+;
+  %t = icmp slt i32 %n, 1
+  %m = select i1 %t, i32 %n, i32 0
+  ret i32 %m
+}
+
+; Canonicalize min/max.
+
+define <2 x i32> @smin3_vec(<2 x i32> %n) {
+; CHECK-LABEL: @smin3_vec(
+; CHECK-NEXT:    [[T:%.*]] = icmp slt <2 x i32> %n, zeroinitializer
+; CHECK-NEXT:    [[M:%.*]] = select <2 x i1> [[T]], <2 x i32> %n, <2 x i32> zeroinitializer
+; CHECK-NEXT:    ret <2 x i32> [[M]]
+;
+  %t = icmp slt <2 x i32> %n, <i32 1, i32 1>
+  %m = select <2 x i1> %t, <2 x i32> %n, <2 x i32> zeroinitializer
+  ret <2 x i32> %m
+}
+
+; Canonicalize min/max.
+
+define i32 @umax3(i32 %n) {
+; CHECK-LABEL: @umax3(
+; CHECK-NEXT:    [[T:%.*]] = icmp ugt i32 %n, 5
+; CHECK-NEXT:    [[M:%.*]] = select i1 [[T]], i32 %n, i32 5
+; CHECK-NEXT:    ret i32 [[M]]
+;
+  %t = icmp ugt i32 %n, 4
+  %m = select i1 %t, i32 %n, i32 5
+  ret i32 %m
+}
+
+; Canonicalize min/max.
+
+define <2 x i32> @umax3_vec(<2 x i32> %n) {
+; CHECK-LABEL: @umax3_vec(
+; CHECK-NEXT:    [[T:%.*]] = icmp ugt <2 x i32> %n, <i32 5, i32 5>
+; CHECK-NEXT:    [[M:%.*]] = select <2 x i1> [[T]], <2 x i32> %n, <2 x i32> <i32 5, i32 5>
+; CHECK-NEXT:    ret <2 x i32> [[M]]
+;
+  %t = icmp ugt <2 x i32> %n, <i32 4, i32 4>
+  %m = select <2 x i1> %t, <2 x i32> %n, <2 x i32> <i32 5, i32 5>
+  ret <2 x i32> %m
+}
+
+; Canonicalize min/max.
+
+define i32 @umin3(i32 %n) {
+; CHECK-LABEL: @umin3(
+; CHECK-NEXT:    [[T:%.*]] = icmp ult i32 %n, 6
+; CHECK-NEXT:    [[M:%.*]] = select i1 [[T]], i32 %n, i32 6
+; CHECK-NEXT:    ret i32 [[M]]
+;
+  %t = icmp ult i32 %n, 7
+  %m = select i1 %t, i32 %n, i32 6
+  ret i32 %m
+}
+
+; Canonicalize min/max.
+
+define <2 x i32> @umin3_vec(<2 x i32> %n) {
+; CHECK-LABEL: @umin3_vec(
+; CHECK-NEXT:    [[T:%.*]] = icmp ult <2 x i32> %n, <i32 6, i32 6>
+; CHECK-NEXT:    [[M:%.*]] = select <2 x i1> [[T]], <2 x i32> %n, <2 x i32> <i32 6, i32 6>
+; CHECK-NEXT:    ret <2 x i32> [[M]]
+;
+  %t = icmp ult <2 x i32> %n, <i32 7, i32 7>
+  %m = select <2 x i1> %t, <2 x i32> %n, <2 x i32> <i32 6, i32 6>
+  ret <2 x i32> %m
+}
+
+; Canonicalize min/max.
+
+define i32 @smax4(i32 %n) {
+; CHECK-LABEL: @smax4(
+; CHECK-NEXT:    [[T:%.*]] = icmp sgt i32 %n, 0
+; CHECK-NEXT:    [[M:%.*]] = select i1 [[T]], i32 %n, i32 0
+; CHECK-NEXT:    ret i32 [[M]]
+;
+  %t = icmp sge i32 %n, 0
+  %m = select i1 %t, i32 %n, i32 0
+  ret i32 %m
+}
+
+; Canonicalize min/max.
+
+define <2 x i32> @smax4_vec(<2 x i32> %n) {
+; CHECK-LABEL: @smax4_vec(
+; CHECK-NEXT:    [[T:%.*]] = icmp sgt <2 x i32> %n, zeroinitializer
+; CHECK-NEXT:    [[M:%.*]] = select <2 x i1> [[T]], <2 x i32> %n, <2 x i32> zeroinitializer
+; CHECK-NEXT:    ret <2 x i32> [[M]]
+;
+  %t = icmp sge <2 x i32> %n, zeroinitializer
+  %m = select <2 x i1> %t, <2 x i32> %n, <2 x i32> zeroinitializer
+  ret <2 x i32> %m
+}
+
+; Canonicalize min/max.
+
+define i32 @smin4(i32 %n) {
+; CHECK-LABEL: @smin4(
+; CHECK-NEXT:    [[T:%.*]] = icmp slt i32 %n, 0
+; CHECK-NEXT:    [[M:%.*]] = select i1 [[T]], i32 %n, i32 0
+; CHECK-NEXT:    ret i32 [[M]]
+;
+  %t = icmp sle i32 %n, 0
+  %m = select i1 %t, i32 %n, i32 0
+  ret i32 %m
+}
+
+; Canonicalize min/max.
+
+define <2 x i32> @smin4_vec(<2 x i32> %n) {
+; CHECK-LABEL: @smin4_vec(
+; CHECK-NEXT:    [[T:%.*]] = icmp slt <2 x i32> %n, zeroinitializer
+; CHECK-NEXT:    [[M:%.*]] = select <2 x i1> [[T]], <2 x i32> %n, <2 x i32> zeroinitializer
+; CHECK-NEXT:    ret <2 x i32> [[M]]
+;
+  %t = icmp sle <2 x i32> %n, zeroinitializer
+  %m = select <2 x i1> %t, <2 x i32> %n, <2 x i32> zeroinitializer
+  ret <2 x i32> %m
+}
+
+; Canonicalize min/max.
+
+define i32 @umax4(i32 %n) {
+; CHECK-LABEL: @umax4(
+; CHECK-NEXT:    [[T:%.*]] = icmp ugt i32 %n, 8
+; CHECK-NEXT:    [[M:%.*]] = select i1 [[T]], i32 %n, i32 8
+; CHECK-NEXT:    ret i32 [[M]]
+;
+  %t = icmp uge i32 %n, 8
+  %m = select i1 %t, i32 %n, i32 8
+  ret i32 %m
+}
+
+; Canonicalize min/max.
+
+define <2 x i32> @umax4_vec(<2 x i32> %n) {
+; CHECK-LABEL: @umax4_vec(
+; CHECK-NEXT:    [[T:%.*]] = icmp ugt <2 x i32> %n, <i32 8, i32 8>
+; CHECK-NEXT:    [[M:%.*]] = select <2 x i1> [[T]], <2 x i32> %n, <2 x i32> <i32 8, i32 8>
+; CHECK-NEXT:    ret <2 x i32> [[M]]
+;
+  %t = icmp uge <2 x i32> %n, <i32 8, i32 8>
+  %m = select <2 x i1> %t, <2 x i32> %n, <2 x i32> <i32 8, i32 8>
+  ret <2 x i32> %m
+}
+
+; Canonicalize min/max.
+
+define i32 @umin4(i32 %n) {
+; CHECK-LABEL: @umin4(
+; CHECK-NEXT:    [[T:%.*]] = icmp ult i32 %n, 9
+; CHECK-NEXT:    [[M:%.*]] = select i1 [[T]], i32 %n, i32 9
+; CHECK-NEXT:    ret i32 [[M]]
+;
+  %t = icmp ule i32 %n, 9
+  %m = select i1 %t, i32 %n, i32 9
+  ret i32 %m
+}
+
+; Canonicalize min/max.
+
+define <2 x i32> @umin4_vec(<2 x i32> %n) {
+; CHECK-LABEL: @umin4_vec(
+; CHECK-NEXT:    [[T:%.*]] = icmp ult <2 x i32> %n, <i32 9, i32 9>
+; CHECK-NEXT:    [[M:%.*]] = select <2 x i1> [[T]], <2 x i32> %n, <2 x i32> <i32 9, i32 9>
+; CHECK-NEXT:    ret <2 x i32> [[M]]
+;
+  %t = icmp ule <2 x i32> %n, <i32 9, i32 9>
+  %m = select <2 x i1> %t, <2 x i32> %n, <2 x i32> <i32 9, i32 9>
+  ret <2 x i32> %m
+}
+
+define i64 @smax_sext(i32 %a) {
+; CHECK-LABEL: @smax_sext(
+; CHECK-NEXT:    [[A_EXT:%.*]] = sext i32 %a to i64
+; CHECK-NEXT:    [[CMP:%.*]] = icmp sgt i64 [[A_EXT]], 0
+; CHECK-NEXT:    [[MAX:%.*]] = select i1 [[CMP]], i64 [[A_EXT]], i64 0
+; CHECK-NEXT:    ret i64 [[MAX]]
+;
+  %a_ext = sext i32 %a to i64
+  %cmp = icmp sgt i32 %a, -1
+  %max = select i1 %cmp, i64 %a_ext, i64 0
+  ret i64 %max
+}
+
+define <2 x i64> @smax_sext_vec(<2 x i32> %a) {
+; CHECK-LABEL: @smax_sext_vec(
+; CHECK-NEXT:    [[A_EXT:%.*]] = sext <2 x i32> %a to <2 x i64>
+; CHECK-NEXT:    [[CMP:%.*]] = icmp sgt <2 x i64> [[A_EXT]], zeroinitializer
+; CHECK-NEXT:    [[MAX:%.*]] = select <2 x i1> [[CMP]], <2 x i64> [[A_EXT]], <2 x i64> zeroinitializer
+; CHECK-NEXT:    ret <2 x i64> [[MAX]]
+;
+  %a_ext = sext <2 x i32> %a to <2 x i64>
+  %cmp = icmp sgt <2 x i32> %a, <i32 -1, i32 -1>
+  %max = select <2 x i1> %cmp, <2 x i64> %a_ext, <2 x i64> zeroinitializer
+  ret <2 x i64> %max
+}
+
+define i64 @smin_sext(i32 %a) {
+; CHECK-LABEL: @smin_sext(
+; CHECK-NEXT:    [[A_EXT:%.*]] = sext i32 %a to i64
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i64 [[A_EXT]], 0
+; CHECK-NEXT:    [[MIN:%.*]] = select i1 [[CMP]], i64 [[A_EXT]], i64 0
+; CHECK-NEXT:    ret i64 [[MIN]]
+;
+  %a_ext = sext i32 %a to i64
+  %cmp = icmp slt i32 %a, 1
+  %min = select i1 %cmp, i64 %a_ext, i64 0
+  ret i64 %min
+}
+
+define <2 x i64>@smin_sext_vec(<2 x i32> %a) {
+; CHECK-LABEL: @smin_sext_vec(
+; CHECK-NEXT:    [[A_EXT:%.*]] = sext <2 x i32> %a to <2 x i64>
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt <2 x i64> [[A_EXT]], zeroinitializer
+; CHECK-NEXT:    [[MIN:%.*]] = select <2 x i1> [[CMP]], <2 x i64> [[A_EXT]], <2 x i64> zeroinitializer
+; CHECK-NEXT:    ret <2 x i64> [[MIN]]
+;
+  %a_ext = sext <2 x i32> %a to <2 x i64>
+  %cmp = icmp slt <2 x i32> %a, <i32 1, i32 1>
+  %min = select <2 x i1> %cmp, <2 x i64> %a_ext, <2 x i64> zeroinitializer
+  ret <2 x i64> %min
+}
+
+define i64 @umax_sext(i32 %a) {
+; CHECK-LABEL: @umax_sext(
+; CHECK-NEXT:    [[A_EXT:%.*]] = sext i32 %a to i64
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ugt i64 [[A_EXT]], 3
+; CHECK-NEXT:    [[MAX:%.*]] = select i1 [[CMP]], i64 [[A_EXT]], i64 3
+; CHECK-NEXT:    ret i64 [[MAX]]
+;
+  %a_ext = sext i32 %a to i64
+  %cmp = icmp ugt i32 %a, 2
+  %max = select i1 %cmp, i64 %a_ext, i64 3
+  ret i64 %max
+}
+
+define <2 x i64> @umax_sext_vec(<2 x i32> %a) {
+; CHECK-LABEL: @umax_sext_vec(
+; CHECK-NEXT:    [[A_EXT:%.*]] = sext <2 x i32> %a to <2 x i64>
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ugt <2 x i64> [[A_EXT]], <i64 3, i64 3>
+; CHECK-NEXT:    [[MAX:%.*]] = select <2 x i1> [[CMP]], <2 x i64> [[A_EXT]], <2 x i64> <i64 3, i64 3>
+; CHECK-NEXT:    ret <2 x i64> [[MAX]]
+;
+  %a_ext = sext <2 x i32> %a to <2 x i64>
+  %cmp = icmp ugt <2 x i32> %a, <i32 2, i32 2>
+  %max = select <2 x i1> %cmp, <2 x i64> %a_ext, <2 x i64> <i64 3, i64 3>
+  ret <2 x i64> %max
+}
+
+define i64 @umin_sext(i32 %a) {
+; CHECK-LABEL: @umin_sext(
+; CHECK-NEXT:    [[A_EXT:%.*]] = sext i32 %a to i64
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ult i64 [[A_EXT]], 2
+; CHECK-NEXT:    [[MIN:%.*]] = select i1 [[CMP]], i64 [[A_EXT]], i64 2
+; CHECK-NEXT:    ret i64 [[MIN]]
+;
+  %a_ext = sext i32 %a to i64
+  %cmp = icmp ult i32 %a, 3
+  %min = select i1 %cmp, i64 %a_ext, i64 2
+  ret i64 %min
+}
+
+define <2 x i64> @umin_sext_vec(<2 x i32> %a) {
+; CHECK-LABEL: @umin_sext_vec(
+; CHECK-NEXT:    [[A_EXT:%.*]] = sext <2 x i32> %a to <2 x i64>
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ult <2 x i64> [[A_EXT]], <i64 2, i64 2>
+; CHECK-NEXT:    [[MIN:%.*]] = select <2 x i1> [[CMP]], <2 x i64> [[A_EXT]], <2 x i64> <i64 2, i64 2>
+; CHECK-NEXT:    ret <2 x i64> [[MIN]]
+;
+  %a_ext = sext <2 x i32> %a to <2 x i64>
+  %cmp = icmp ult <2 x i32> %a, <i32 3, i32 3>
+  %min = select <2 x i1> %cmp, <2 x i64> %a_ext, <2 x i64> <i64 2, i64 2>
+  ret <2 x i64> %min
+}
+
+define i64 @umax_sext2(i32 %a) {
+; CHECK-LABEL: @umax_sext2(
+; CHECK-NEXT:    [[A_EXT:%.*]] = sext i32 %a to i64
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ugt i64 [[A_EXT]], 2
+; CHECK-NEXT:    [[MIN:%.*]] = select i1 [[CMP]], i64 [[A_EXT]], i64 2
+; CHECK-NEXT:    ret i64 [[MIN]]
+;
+  %a_ext = sext i32 %a to i64
+  %cmp = icmp ult i32 %a, 3
+  %min = select i1 %cmp, i64 2, i64 %a_ext
+  ret i64 %min
+}
+
+define <2 x i64> @umax_sext2_vec(<2 x i32> %a) {
+; CHECK-LABEL: @umax_sext2_vec(
+; CHECK-NEXT:    [[A_EXT:%.*]] = sext <2 x i32> %a to <2 x i64>
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ugt <2 x i64> [[A_EXT]], <i64 2, i64 2>
+; CHECK-NEXT:    [[MIN:%.*]] = select <2 x i1> [[CMP]], <2 x i64> [[A_EXT]], <2 x i64> <i64 2, i64 2>
+; CHECK-NEXT:    ret <2 x i64> [[MIN]]
+;
+  %a_ext = sext <2 x i32> %a to <2 x i64>
+  %cmp = icmp ult <2 x i32> %a, <i32 3, i32 3>
+  %min = select <2 x i1> %cmp, <2 x i64> <i64 2, i64 2>, <2 x i64> %a_ext
+  ret <2 x i64> %min
+}
+
+define i64 @umin_sext2(i32 %a) {
+; CHECK-LABEL: @umin_sext2(
+; CHECK-NEXT:    [[A_EXT:%.*]] = sext i32 %a to i64
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ult i64 [[A_EXT]], 3
+; CHECK-NEXT:    [[MIN:%.*]] = select i1 [[CMP]], i64 [[A_EXT]], i64 3
+; CHECK-NEXT:    ret i64 [[MIN]]
+;
+  %a_ext = sext i32 %a to i64
+  %cmp = icmp ugt i32 %a, 2
+  %min = select i1 %cmp, i64 3, i64 %a_ext
+  ret i64 %min
+}
+
+define <2 x i64> @umin_sext2_vec(<2 x i32> %a) {
+; CHECK-LABEL: @umin_sext2_vec(
+; CHECK-NEXT:    [[A_EXT:%.*]] = sext <2 x i32> %a to <2 x i64>
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ult <2 x i64> [[A_EXT]], <i64 3, i64 3>
+; CHECK-NEXT:    [[MIN:%.*]] = select <2 x i1> [[CMP]], <2 x i64> [[A_EXT]], <2 x i64> <i64 3, i64 3>
+; CHECK-NEXT:    ret <2 x i64> [[MIN]]
+;
+  %a_ext = sext <2 x i32> %a to <2 x i64>
+  %cmp = icmp ugt <2 x i32> %a, <i32 2, i32 2>
+  %min = select <2 x i1> %cmp, <2 x i64> <i64 3, i64 3>, <2 x i64> %a_ext
+  ret <2 x i64> %min
+}
+
+define i64 @umax_zext(i32 %a) {
+; CHECK-LABEL: @umax_zext(
+; CHECK-NEXT:    [[A_EXT:%.*]] = zext i32 %a to i64
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ugt i64 [[A_EXT]], 3
+; CHECK-NEXT:    [[MAX:%.*]] = select i1 [[CMP]], i64 [[A_EXT]], i64 3
+; CHECK-NEXT:    ret i64 [[MAX]]
+;
+  %a_ext = zext i32 %a to i64
+  %cmp = icmp ugt i32 %a, 2
+  %max = select i1 %cmp, i64 %a_ext, i64 3
+  ret i64 %max
+}
+
+define <2 x i64> @umax_zext_vec(<2 x i32> %a) {
+; CHECK-LABEL: @umax_zext_vec(
+; CHECK-NEXT:    [[A_EXT:%.*]] = zext <2 x i32> %a to <2 x i64>
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ugt <2 x i64> [[A_EXT]], <i64 3, i64 3>
+; CHECK-NEXT:    [[MAX:%.*]] = select <2 x i1> [[CMP]], <2 x i64> [[A_EXT]], <2 x i64> <i64 3, i64 3>
+; CHECK-NEXT:    ret <2 x i64> [[MAX]]
+;
+  %a_ext = zext <2 x i32> %a to <2 x i64>
+  %cmp = icmp ugt <2 x i32> %a, <i32 2, i32 2>
+  %max = select <2 x i1> %cmp, <2 x i64> %a_ext, <2 x i64> <i64 3, i64 3>
+  ret <2 x i64> %max
+}
+
+define i64 @umin_zext(i32 %a) {
+; CHECK-LABEL: @umin_zext(
+; CHECK-NEXT:    [[A_EXT:%.*]] = zext i32 %a to i64
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ult i64 [[A_EXT]], 2
+; CHECK-NEXT:    [[MIN:%.*]] = select i1 [[CMP]], i64 [[A_EXT]], i64 2
+; CHECK-NEXT:    ret i64 [[MIN]]
+;
+  %a_ext = zext i32 %a to i64
+  %cmp = icmp ult i32 %a, 3
+  %min = select i1 %cmp, i64 %a_ext, i64 2
+  ret i64 %min
+}
+
+define <2 x i64> @umin_zext_vec(<2 x i32> %a) {
+; CHECK-LABEL: @umin_zext_vec(
+; CHECK-NEXT:    [[A_EXT:%.*]] = zext <2 x i32> %a to <2 x i64>
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ult <2 x i64> [[A_EXT]], <i64 2, i64 2>
+; CHECK-NEXT:    [[MIN:%.*]] = select <2 x i1> [[CMP]], <2 x i64> [[A_EXT]], <2 x i64> <i64 2, i64 2>
+; CHECK-NEXT:    ret <2 x i64> [[MIN]]
+;
+  %a_ext = zext <2 x i32> %a to <2 x i64>
+  %cmp = icmp ult <2 x i32> %a, <i32 3, i32 3>
+  %min = select <2 x i1> %cmp, <2 x i64> %a_ext, <2 x i64> <i64 2, i64 2>
+  ret <2 x i64> %min
+}
+
+; Don't crash mishandling a pattern that can't be transformed.
+
+define <2 x i16> @scalar_select_of_vectors(<2 x i16> %a, <2 x i16> %b, i8 %x) {
+; CHECK-LABEL: @scalar_select_of_vectors(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i8 %x, 0
+; CHECK-NEXT:    [[SEL:%.*]] = select i1 [[CMP]], <2 x i16> %a, <2 x i16> %b
+; CHECK-NEXT:    ret <2 x i16> [[SEL]]
+;
+  %cmp = icmp slt i8 %x, 0
+  %sel = select i1 %cmp, <2 x i16> %a, <2 x i16> %b
+  ret <2 x i16> %sel
+}
+

Added: llvm/trunk/test/Transforms/InstCombine/alias-recursion.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/InstCombine/alias-recursion.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/InstCombine/alias-recursion.ll (added)
+++ llvm/trunk/test/Transforms/InstCombine/alias-recursion.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,24 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-pc-windows-msvc"
+
+%class.A = type { i32 (...)** }
+
+ at 0 = constant [1 x i8*] zeroinitializer
+
+ at vtbl = alias i8*, getelementptr inbounds ([1 x i8*], [1 x i8*]* @0, i32 0, i32 0)
+
+define i32 (%class.A*)* @test() {
+; CHECK-LABEL: test
+entry:
+  br i1 undef, label %for.body, label %for.end
+
+for.body:                                         ; preds = %for.body, %entry
+  br i1 undef, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body, %entry
+  %A = phi i32 (%class.A*)** [ bitcast (i8** @vtbl to i32 (%class.A*)**), %for.body ], [ null, %entry ]
+  %B = load i32 (%class.A*)*, i32 (%class.A*)** %A
+  ret i32 (%class.A*)* %B
+}

Added: llvm/trunk/test/Transforms/InstCombine/align-2d-gep.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/InstCombine/align-2d-gep.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/InstCombine/align-2d-gep.ll (added)
+++ llvm/trunk/test/Transforms/InstCombine/align-2d-gep.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,44 @@
+; RUN: opt < %s -instcombine -S | grep "align 16" | count 1
+target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128"
+
+; A multi-dimensional array in a nested loop doing vector stores that
+; aren't yet aligned. Instcombine can understand the addressing in the
+; Nice case to prove 16 byte alignment. In the Awkward case, the inner
+; array dimension is not even, so the stores to it won't always be
+; aligned. Instcombine should prove alignment in exactly one of the two
+; stores.
+
+ at Nice    = global [1001 x [20000 x double]] zeroinitializer, align 32
+ at Awkward = global [1001 x [20001 x double]] zeroinitializer, align 32
+
+define void @foo() nounwind  {
+entry:
+  br label %bb7.outer
+
+bb7.outer:
+  %i = phi i64 [ 0, %entry ], [ %indvar.next26, %bb11 ]
+  br label %bb1
+
+bb1:
+  %j = phi i64 [ 0, %bb7.outer ], [ %indvar.next, %bb1 ]
+
+  %t4 = getelementptr [1001 x [20000 x double]], [1001 x [20000 x double]]* @Nice, i64 0, i64 %i, i64 %j
+  %q = bitcast double* %t4 to <2 x double>*
+  store <2 x double><double 0.0, double 0.0>, <2 x double>* %q, align 8
+
+  %s4 = getelementptr [1001 x [20001 x double]], [1001 x [20001 x double]]* @Awkward, i64 0, i64 %i, i64 %j
+  %r = bitcast double* %s4 to <2 x double>*
+  store <2 x double><double 0.0, double 0.0>, <2 x double>* %r, align 8
+
+  %indvar.next = add i64 %j, 2
+  %exitcond = icmp eq i64 %indvar.next, 556
+  br i1 %exitcond, label %bb11, label %bb1
+
+bb11:
+  %indvar.next26 = add i64 %i, 1
+  %exitcond27 = icmp eq i64 %indvar.next26, 991
+  br i1 %exitcond27, label %return.split, label %bb7.outer
+
+return.split:
+  ret void
+}

Added: llvm/trunk/test/Transforms/InstCombine/align-addr.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/InstCombine/align-addr.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/InstCombine/align-addr.ll (added)
+++ llvm/trunk/test/Transforms/InstCombine/align-addr.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,97 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+target datalayout = "E-p:64:64:64-p1:32:32:32-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128"
+
+; Instcombine should be able to prove vector alignment in the
+; presence of a few mild address computation tricks.
+
+; CHECK-LABEL: @test0(
+; CHECK: align 16
+
+define void @test0(i8* %b, i64 %n, i64 %u, i64 %y) nounwind  {
+entry:
+  %c = ptrtoint i8* %b to i64
+  %d = and i64 %c, -16
+  %e = inttoptr i64 %d to double*
+  %v = mul i64 %u, 2
+  %z = and i64 %y, -2
+  %t1421 = icmp eq i64 %n, 0
+  br i1 %t1421, label %return, label %bb
+
+bb:
+  %i = phi i64 [ %indvar.next, %bb ], [ 20, %entry ]
+  %j = mul i64 %i, %v
+  %h = add i64 %j, %z
+  %t8 = getelementptr double, double* %e, i64 %h
+  %p = bitcast double* %t8 to <2 x double>*
+  store <2 x double><double 0.0, double 0.0>, <2 x double>* %p, align 8
+  %indvar.next = add i64 %i, 1
+  %exitcond = icmp eq i64 %indvar.next, %n
+  br i1 %exitcond, label %return, label %bb
+
+return:
+  ret void
+}
+
+; When we see a unaligned load from an insufficiently aligned global or
+; alloca, increase the alignment of the load, turning it into an aligned load.
+
+; CHECK-LABEL: @test1(
+; CHECK: tmp = load
+; CHECK: GLOBAL{{.*}}align 16
+
+ at GLOBAL = internal global [4 x i32] zeroinitializer
+
+define <16 x i8> @test1(<2 x i64> %x) {
+entry:
+	%tmp = load <16 x i8>, <16 x i8>* bitcast ([4 x i32]* @GLOBAL to <16 x i8>*), align 1
+	ret <16 x i8> %tmp
+}
+
+ at GLOBAL_as1 = internal addrspace(1) global [4 x i32] zeroinitializer
+
+define <16 x i8> @test1_as1(<2 x i64> %x) {
+; CHECK-LABEL: @test1_as1(
+; CHECK: tmp = load
+; CHECK: GLOBAL_as1{{.*}}align 16
+  %tmp = load <16 x i8>, <16 x i8> addrspace(1)* bitcast ([4 x i32] addrspace(1)* @GLOBAL_as1 to <16 x i8> addrspace(1)*), align 1
+  ret <16 x i8> %tmp
+}
+
+ at GLOBAL_as1_gep = internal addrspace(1) global [8 x i32] zeroinitializer
+
+define <16 x i8> @test1_as1_gep(<2 x i64> %x) {
+; CHECK-LABEL: @test1_as1_gep(
+; CHECK: tmp = load
+; CHECK: GLOBAL_as1_gep{{.*}}align 16
+  %tmp = load <16 x i8>, <16 x i8> addrspace(1)* bitcast (i32 addrspace(1)* getelementptr ([8 x i32], [8 x i32] addrspace(1)* @GLOBAL_as1_gep, i16 0, i16 4) to <16 x i8> addrspace(1)*), align 1
+  ret <16 x i8> %tmp
+}
+
+
+; When a load or store lacks an explicit alignment, add one.
+
+; CHECK-LABEL: @test2(
+; CHECK: load double, double* %p, align 8
+; CHECK: store double %n, double* %p, align 8
+
+define double @test2(double* %p, double %n) nounwind {
+  %t = load double, double* %p
+  store double %n, double* %p
+  ret double %t
+}
+
+declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i1) nounwind
+
+declare void @use(i8*)
+
+%struct.s = type { i32, i32, i32, i32 }
+
+define void @test3(%struct.s* sret %a4) {
+; Check that the alignment is bumped up the alignment of the sret type.
+; CHECK-LABEL: @test3(
+  %a4.cast = bitcast %struct.s* %a4 to i8*
+  call void @llvm.memset.p0i8.i64(i8* %a4.cast, i8 0, i64 16, i1 false)
+; CHECK: call void @llvm.memset.p0i8.i64(i8* align 4 %a4.cast, i8 0, i64 16, i1 false)
+  call void @use(i8* %a4.cast)
+  ret void
+}

Added: llvm/trunk/test/Transforms/InstCombine/align-attr.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/InstCombine/align-attr.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/InstCombine/align-attr.ll (added)
+++ llvm/trunk/test/Transforms/InstCombine/align-attr.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,28 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+; Function Attrs: nounwind uwtable
+define i32 @foo1(i32* align 32 %a) #0 {
+entry:
+  %0 = load i32, i32* %a, align 4
+  ret i32 %0
+
+; CHECK-LABEL: @foo1
+; CHECK-DAG: load i32, i32* %a, align 32
+; CHECK: ret i32
+}
+
+define i32 @foo2(i32* align 32 %a) #0 {
+entry:
+  %v = call i32* @func1(i32* %a)
+  %0 = load i32, i32* %v, align 4
+  ret i32 %0
+
+; CHECK-LABEL: @foo2
+; CHECK-DAG: load i32, i32* %v, align 32
+; CHECK: ret i32
+}
+
+declare i32* @func1(i32* returned) nounwind
+

Added: llvm/trunk/test/Transforms/InstCombine/align-external.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/InstCombine/align-external.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/InstCombine/align-external.ll (added)
+++ llvm/trunk/test/Transforms/InstCombine/align-external.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,41 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+; Don't assume that external global variables or those with weak linkage have
+; their preferred alignment. They may only have the ABI minimum alignment.
+
+target datalayout = "i32:8:32"
+
+ at A = external global i32
+ at B = weak_odr global i32 0
+
+ at C = available_externally global <4 x i32> zeroinitializer, align 4
+; CHECK: @C = available_externally global <4 x i32> zeroinitializer, align 4
+
+define i64 @foo(i64 %a) {
+  %t = ptrtoint i32* @A to i64
+  %s = shl i64 %a, 3
+  %r = or i64 %t, %s
+  %q = add i64 %r, 1
+  ret i64 %q
+}
+
+; CHECK-LABEL: define i64 @foo(i64 %a)
+; CHECK: %s = shl i64 %a, 3
+; CHECK: %r = or i64 %s, ptrtoint (i32* @A to i64)
+; CHECK: %q = add i64 %r, 1
+; CHECK: ret i64 %q
+
+define i32 @bar() {
+  %r = load i32, i32* @B, align 1
+  ret i32 %r
+}
+
+; CHECK-LABEL: @bar()
+; CHECK: align 1
+
+define void @vec_store() {
+  store <4 x i32> <i32 0, i32 1, i32 2, i32 3>, <4 x i32>* @C, align 4
+  ret void
+}
+; CHECK: define void @vec_store()
+; CHECK: store <4 x i32> <i32 0, i32 1, i32 2, i32 3>, <4 x i32>* @C, align 4

Added: llvm/trunk/test/Transforms/InstCombine/all-bits-shift.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/InstCombine/all-bits-shift.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/InstCombine/all-bits-shift.ll (added)
+++ llvm/trunk/test/Transforms/InstCombine/all-bits-shift.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,45 @@
+; RUN: opt -S -instcombine -expensive-combines < %s | FileCheck %s
+target datalayout = "E-m:e-i64:64-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+ at d = global i32 15, align 4
+ at b = global i32* @d, align 8
+ at a = common global i32 0, align 4
+
+; Function Attrs: nounwind
+define signext i32 @main() #1 {
+entry:
+  %0 = load i32*, i32** @b, align 8
+  %1 = load i32, i32* @a, align 4
+  %lnot = icmp eq i32 %1, 0
+  %lnot.ext = zext i1 %lnot to i32
+  %shr.i = lshr i32 2072, %lnot.ext
+  %call.lobit = lshr i32 %shr.i, 7
+  %2 = and i32 %call.lobit, 1
+  %3 = load i32, i32* %0, align 4
+  %or = or i32 %2, %3
+  store i32 %or, i32* %0, align 4
+  %4 = load i32, i32* @a, align 4
+  %lnot.1 = icmp eq i32 %4, 0
+  %lnot.ext.1 = zext i1 %lnot.1 to i32
+  %shr.i.1 = lshr i32 2072, %lnot.ext.1
+  %call.lobit.1 = lshr i32 %shr.i.1, 7
+  %5 = and i32 %call.lobit.1, 1
+  %or.1 = or i32 %5, %or
+  store i32 %or.1, i32* %0, align 4
+  ret i32 %or.1
+
+; Check that both InstCombine and InstSimplify can use computeKnownBits to
+; realize that:
+;   ((2072 >> (L == 0)) >> 7) & 1
+; is always zero.
+
+; CHECK-LABEL: @main
+; CHECK: %[[V1:[0-9]+]] = load i32*, i32** @b, align 8
+; CHECK: %[[V2:[0-9]+]] = load i32, i32* %[[V1]], align 4
+; CHECK: ret i32 %[[V2]]
+}
+
+attributes #0 = { nounwind readnone }
+attributes #1 = { nounwind }
+

Added: llvm/trunk/test/Transforms/InstCombine/alloca-big.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/InstCombine/alloca-big.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/InstCombine/alloca-big.ll (added)
+++ llvm/trunk/test/Transforms/InstCombine/alloca-big.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,16 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+; OSS-Fuzz: https://bugs.chromium.org/p/oss-fuzz/issues/detail?id=5223
+define void @test_bigalloc() {
+; CHECK-LABEL: @test_bigalloc(
+; CHECK-NEXT:    [[TMP1:%.*]] = alloca [18446744069414584320 x i8], align 1
+; CHECK-NEXT:    [[DOTSUB:%.*]] = getelementptr inbounds [18446744069414584320 x i8], [18446744069414584320 x i8]* [[TMP1]], i64 0, i64 0
+; CHECK-NEXT:    store i8* [[DOTSUB]], i8** undef, align 8
+; CHECK-NEXT:    ret void
+;
+  %1 = alloca i8, i864 -4294967296
+  %2 = getelementptr i8, i8* %1, i1 undef
+  store i8* %2, i8** undef
+  ret void
+}

Added: llvm/trunk/test/Transforms/InstCombine/alloca-cast-debuginfo.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/InstCombine/alloca-cast-debuginfo.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/InstCombine/alloca-cast-debuginfo.ll (added)
+++ llvm/trunk/test/Transforms/InstCombine/alloca-cast-debuginfo.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,94 @@
+; RUN: opt < %s -S -instcombine -instcombine-lower-dbg-declare=0 | FileCheck %s
+
+; In this example, instcombine wants to turn "local" into an i64, since that's
+; how it is stored. It should keep the debug info referring to the alloca when
+; it does the replacement.
+
+; C source:
+; struct Foo {
+;   int x, y;
+; };
+; void escape(const void*);
+; void f(struct Foo *p) {
+;   struct Foo local;
+;   *(__int64 *)&local = *(__int64 *)p;
+;   escape(&local);
+; }
+
+; ModuleID = '<stdin>'
+source_filename = "t.c"
+target datalayout = "e-m:w-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-pc-windows-msvc19.11.25508"
+
+%struct.Foo = type { i32, i32 }
+
+define void @f(%struct.Foo* %p) !dbg !11 {
+entry:
+  %local = alloca %struct.Foo, align 4
+  %0 = bitcast %struct.Foo* %local to i8*, !dbg !24
+  call void @llvm.dbg.declare(metadata %struct.Foo* %local, metadata !22, metadata !DIExpression()), !dbg !25
+  %1 = bitcast %struct.Foo* %p to i64*, !dbg !26
+  %2 = load i64, i64* %1, align 8, !dbg !26, !tbaa !27
+  %3 = bitcast %struct.Foo* %local to i64*, !dbg !31
+  store i64 %2, i64* %3, align 4, !dbg !32, !tbaa !27
+  %4 = bitcast %struct.Foo* %local to i8*, !dbg !33
+  call void @escape(i8* %4), !dbg !34
+  %5 = bitcast %struct.Foo* %local to i8*, !dbg !35
+  ret void, !dbg !35
+}
+
+; CHECK-LABEL: define void @f(%struct.Foo* %p)
+; CHECK: %local = alloca i64, align 8
+; CHECK: call void @llvm.dbg.declare(metadata i64* %local, metadata !22, metadata !DIExpression())
+; CHECK: [[simplified:%.*]] = bitcast i64* %local to i8*
+;
+; Another dbg.value for "local" would be redundant here.
+; CHECK-NOT: call void @llvm.dbg.value(metadata i8* [[simplified]], metadata !22, metadata !DIExpression())
+;
+; CHECK: call void @escape(i8* nonnull [[simplified]])
+; CHECK: ret void
+
+declare void @llvm.dbg.declare(metadata, metadata, metadata)
+
+declare void @escape(i8*)
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!6, !7, !8, !9}
+!llvm.ident = !{!10}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 6.0.0 ", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, enums: !2, retainedTypes: !3)
+!1 = !DIFile(filename: "t.c", directory: "C:\5Csrc\5Cllvm-project\5Cbuild", checksumkind: CSK_MD5, checksum: "d7473625866433067a75fd7d03d2abf7")
+!2 = !{}
+!3 = !{!4}
+!4 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !5, size: 64)
+!5 = !DIBasicType(name: "long long int", size: 64, encoding: DW_ATE_signed)
+!6 = !{i32 2, !"CodeView", i32 1}
+!7 = !{i32 2, !"Debug Info Version", i32 3}
+!8 = !{i32 1, !"wchar_size", i32 2}
+!9 = !{i32 7, !"PIC Level", i32 2}
+!10 = !{!"clang version 6.0.0 "}
+!11 = distinct !DISubprogram(name: "f", scope: !1, file: !1, line: 5, type: !12, isLocal: false, isDefinition: true, scopeLine: 5, flags: DIFlagPrototyped, isOptimized: true, unit: !0, retainedNodes: !20)
+!12 = !DISubroutineType(types: !13)
+!13 = !{null, !14}
+!14 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !15, size: 64)
+!15 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "Foo", file: !1, line: 1, size: 64, elements: !16)
+!16 = !{!17, !19}
+!17 = !DIDerivedType(tag: DW_TAG_member, name: "x", scope: !15, file: !1, line: 2, baseType: !18, size: 32)
+!18 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed)
+!19 = !DIDerivedType(tag: DW_TAG_member, name: "y", scope: !15, file: !1, line: 2, baseType: !18, size: 32, offset: 32)
+!20 = !{!21, !22}
+!21 = !DILocalVariable(name: "p", arg: 1, scope: !11, file: !1, line: 5, type: !14)
+!22 = !DILocalVariable(name: "local", scope: !11, file: !1, line: 6, type: !15)
+!23 = !DILocation(line: 5, column: 20, scope: !11)
+!24 = !DILocation(line: 6, column: 3, scope: !11)
+!25 = !DILocation(line: 6, column: 14, scope: !11)
+!26 = !DILocation(line: 7, column: 24, scope: !11)
+!27 = !{!28, !28, i64 0}
+!28 = !{!"long long", !29, i64 0}
+!29 = !{!"omnipotent char", !30, i64 0}
+!30 = !{!"Simple C/C++ TBAA"}
+!31 = !DILocation(line: 7, column: 3, scope: !11)
+!32 = !DILocation(line: 7, column: 22, scope: !11)
+!33 = !DILocation(line: 8, column: 10, scope: !11)
+!34 = !DILocation(line: 8, column: 3, scope: !11)
+!35 = !DILocation(line: 9, column: 1, scope: !11)

Added: llvm/trunk/test/Transforms/InstCombine/alloca.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/InstCombine/alloca.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/InstCombine/alloca.ll (added)
+++ llvm/trunk/test/Transforms/InstCombine/alloca.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,179 @@
+; RUN: opt < %s -instcombine -S -data-layout="E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128" | FileCheck %s -check-prefix=CHECK -check-prefix=ALL
+; RUN: opt < %s -instcombine -S -data-layout="E-p:32:32:32-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128" | FileCheck %s -check-prefix=P32 -check-prefix=ALL
+; RUN: opt < %s -instcombine -S | FileCheck %s -check-prefix=NODL -check-prefix=ALL
+
+
+declare void @use(...)
+
+ at int = global i32 zeroinitializer
+
+; Zero byte allocas should be merged if they can't be deleted.
+; CHECK-LABEL: @test(
+; CHECK: alloca
+; CHECK-NOT: alloca
+define void @test() {
+        %X = alloca [0 x i32]           ; <[0 x i32]*> [#uses=1]
+        call void (...) @use( [0 x i32]* %X )
+        %Y = alloca i32, i32 0          ; <i32*> [#uses=1]
+        call void (...) @use( i32* %Y )
+        %Z = alloca {  }                ; <{  }*> [#uses=1]
+        call void (...) @use( {  }* %Z )
+        %size = load i32, i32* @int
+        %A = alloca {{}}, i32 %size
+        call void (...) @use( {{}}* %A )
+        ret void
+}
+
+; Zero byte allocas should be deleted.
+; CHECK-LABEL: @test2(
+; CHECK-NOT: alloca
+define void @test2() {
+        %A = alloca i32         ; <i32*> [#uses=1]
+        store i32 123, i32* %A
+        ret void
+}
+
+; Zero byte allocas should be deleted.
+; CHECK-LABEL: @test3(
+; CHECK-NOT: alloca
+define void @test3() {
+        %A = alloca { i32 }             ; <{ i32 }*> [#uses=1]
+        %B = getelementptr { i32 }, { i32 }* %A, i32 0, i32 0            ; <i32*> [#uses=1]
+        store i32 123, i32* %B
+        ret void
+}
+
+; CHECK-LABEL: @test4(
+; CHECK: = zext i32 %n to i64
+; CHECK: %A = alloca i32, i64 %
+define i32* @test4(i32 %n) {
+  %A = alloca i32, i32 %n
+  ret i32* %A
+}
+
+; Allocas which are only used by GEPs, bitcasts, addrspacecasts, and stores
+; (transitively) should be deleted.
+define void @test5() {
+; CHECK-LABEL: @test5(
+; CHECK-NOT: alloca
+; CHECK-NOT: store
+; CHECK: ret
+
+entry:
+  %a = alloca { i32 }
+  %b = alloca i32*
+  %c = alloca i32
+  %a.1 = getelementptr { i32 }, { i32 }* %a, i32 0, i32 0
+  store i32 123, i32* %a.1
+  store i32* %a.1, i32** %b
+  %b.1 = bitcast i32** %b to i32*
+  store i32 123, i32* %b.1
+  %a.2 = getelementptr { i32 }, { i32 }* %a, i32 0, i32 0
+  store atomic i32 2, i32* %a.2 unordered, align 4
+  %a.3 = getelementptr { i32 }, { i32 }* %a, i32 0, i32 0
+  store atomic i32 3, i32* %a.3 release, align 4
+  %a.4 = getelementptr { i32 }, { i32 }* %a, i32 0, i32 0
+  store atomic i32 4, i32* %a.4 seq_cst, align 4
+  %c.1 = addrspacecast i32* %c to i32 addrspace(1)*
+  store i32 123, i32 addrspace(1)* %c.1
+  ret void
+}
+
+declare void @f(i32* %p)
+
+; Check that we don't delete allocas in some erroneous cases.
+define void @test6() {
+; CHECK-LABEL: @test6(
+; CHECK-NOT: ret
+; CHECK: alloca
+; CHECK-NEXT: alloca
+; CHECK: ret
+
+entry:
+  %a = alloca { i32 }
+  %b = alloca i32
+  %a.1 = getelementptr { i32 }, { i32 }* %a, i32 0, i32 0
+  store volatile i32 123, i32* %a.1
+  tail call void @f(i32* %b)
+  ret void
+}
+
+; PR14371
+%opaque_type = type opaque
+%real_type = type { { i32, i32* } }
+
+ at opaque_global = external constant %opaque_type, align 4
+
+define void @test7() {
+entry:
+  %0 = alloca %real_type, align 4
+  %1 = bitcast %real_type* %0 to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %1, i8* bitcast (%opaque_type* @opaque_global to i8*), i32 8, i1 false)
+  ret void
+}
+
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i1) nounwind
+
+
+; Check that the GEP indices use the pointer size, or 64 if unknown
+define void @test8() {
+; CHECK-LABEL: @test8(
+; CHECK: alloca [100 x i32]
+; CHECK: getelementptr inbounds [100 x i32], [100 x i32]* %x1, i64 0, i64 0
+
+; P32-LABEL: @test8(
+; P32: alloca [100 x i32]
+; P32: getelementptr inbounds [100 x i32], [100 x i32]* %x1, i32 0, i32 0
+
+; NODL-LABEL: @test8(
+; NODL: alloca [100 x i32]
+; NODL: getelementptr inbounds [100 x i32], [100 x i32]* %x1, i64 0, i64 0
+  %x = alloca i32, i32 100
+  call void (...) @use(i32* %x)
+  ret void
+}
+
+; PR19569
+%struct_type = type { i32, i32 }
+declare void @test9_aux(<{ %struct_type }>* inalloca)
+declare i8* @llvm.stacksave()
+declare void @llvm.stackrestore(i8*)
+
+define void @test9(%struct_type* %a) {
+; CHECK-LABEL: @test9(
+entry:
+  %inalloca.save = call i8* @llvm.stacksave()
+  %argmem = alloca inalloca <{ %struct_type }>
+; CHECK: alloca inalloca i64, align 8
+  %0 = getelementptr inbounds <{ %struct_type }>, <{ %struct_type }>* %argmem, i32 0, i32 0
+  %1 = bitcast %struct_type* %0 to i8*
+  %2 = bitcast %struct_type* %a to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 %1, i8* align 4 %2, i32 8, i1 false)
+  call void @test9_aux(<{ %struct_type }>* inalloca %argmem)
+  call void @llvm.stackrestore(i8* %inalloca.save)
+  ret void
+}
+
+define void @test10() {
+entry:
+; ALL-LABEL: @test10(
+; ALL: %v32 = alloca i1, align 8
+; ALL: %v64 = alloca i1, align 8
+; ALL: %v33 = alloca i1, align 8
+  %v32 = alloca i1, align 8
+  %v64 = alloca i1, i64 1, align 8
+  %v33 = alloca i1, i33 1, align 8
+  call void (...) @use(i1* %v32, i1* %v64, i1* %v33)
+  ret void
+}
+
+define void @test11() {
+entry:
+; ALL-LABEL: @test11(
+; ALL: %y = alloca i32
+; ALL: call void (...) @use(i32* nonnull @int) [ "blah"(i32* %y) ]
+; ALL: ret void
+  %y = alloca i32
+  call void (...) @use(i32* nonnull @int) [ "blah"(i32* %y) ]
+  ret void
+}

Added: llvm/trunk/test/Transforms/InstCombine/allocsize-32.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/InstCombine/allocsize-32.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/InstCombine/allocsize-32.ll (added)
+++ llvm/trunk/test/Transforms/InstCombine/allocsize-32.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,29 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+;
+; The idea is that we want to have sane semantics (e.g. not assertion failures)
+; when given an allocsize function that takes a 64-bit argument in the face of
+; 32-bit pointers.
+
+target datalayout="e-p:32:32:32"
+
+declare i8* @my_malloc(i8*, i64) allocsize(1)
+
+define void @test_malloc(i8** %p, i32* %r) {
+  %1 = call i8* @my_malloc(i8* null, i64 100)
+  store i8* %1, i8** %p, align 8 ; To ensure objectsize isn't killed
+
+  %2 = call i32 @llvm.objectsize.i32.p0i8(i8* %1, i1 false)
+  ; CHECK: store i32 100
+  store i32 %2, i32* %r, align 8
+
+  ; Big number is 5 billion.
+  %3 = call i8* @my_malloc(i8* null, i64 5000000000)
+  store i8* %3, i8** %p, align 8 ; To ensure objectsize isn't killed
+
+  ; CHECK: call i32 @llvm.objectsize
+  %4 = call i32 @llvm.objectsize.i32.p0i8(i8* %3, i1 false)
+  store i32 %4, i32* %r, align 8
+  ret void
+}
+
+declare i32 @llvm.objectsize.i32.p0i8(i8*, i1)

Added: llvm/trunk/test/Transforms/InstCombine/allocsize.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/InstCombine/allocsize.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/InstCombine/allocsize.ll (added)
+++ llvm/trunk/test/Transforms/InstCombine/allocsize.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,154 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+;
+; Test that instcombine folds allocsize function calls properly.
+; Dummy arguments are inserted to verify that allocsize is picking the right
+; args, and to prove that arbitrary unfoldable values don't interfere with
+; allocsize if they're not used by allocsize.
+
+declare i8* @my_malloc(i8*, i32) allocsize(1)
+declare i8* @my_calloc(i8*, i8*, i32, i32) allocsize(2, 3)
+
+; CHECK-LABEL: define void @test_malloc
+define void @test_malloc(i8** %p, i64* %r) {
+  %1 = call i8* @my_malloc(i8* null, i32 100)
+  store i8* %1, i8** %p, align 8 ; To ensure objectsize isn't killed
+
+  %2 = call i64 @llvm.objectsize.i64.p0i8(i8* %1, i1 false)
+  ; CHECK: store i64 100
+  store i64 %2, i64* %r, align 8
+  ret void
+}
+
+; CHECK-LABEL: define void @test_calloc
+define void @test_calloc(i8** %p, i64* %r) {
+  %1 = call i8* @my_calloc(i8* null, i8* null, i32 100, i32 5)
+  store i8* %1, i8** %p, align 8 ; To ensure objectsize isn't killed
+
+  %2 = call i64 @llvm.objectsize.i64.p0i8(i8* %1, i1 false)
+  ; CHECK: store i64 500
+  store i64 %2, i64* %r, align 8
+  ret void
+}
+
+; Failure cases with non-constant values...
+; CHECK-LABEL: define void @test_malloc_fails
+define void @test_malloc_fails(i8** %p, i64* %r, i32 %n) {
+  %1 = call i8* @my_malloc(i8* null, i32 %n)
+  store i8* %1, i8** %p, align 8 ; To ensure objectsize isn't killed
+
+  ; CHECK: @llvm.objectsize.i64.p0i8
+  %2 = call i64 @llvm.objectsize.i64.p0i8(i8* %1, i1 false)
+  store i64 %2, i64* %r, align 8
+  ret void
+}
+
+; CHECK-LABEL: define void @test_calloc_fails
+define void @test_calloc_fails(i8** %p, i64* %r, i32 %n) {
+  %1 = call i8* @my_calloc(i8* null, i8* null, i32 %n, i32 5)
+  store i8* %1, i8** %p, align 8 ; To ensure objectsize isn't killed
+
+  ; CHECK: @llvm.objectsize.i64.p0i8
+  %2 = call i64 @llvm.objectsize.i64.p0i8(i8* %1, i1 false)
+  store i64 %2, i64* %r, align 8
+
+
+  %3 = call i8* @my_calloc(i8* null, i8* null, i32 100, i32 %n)
+  store i8* %3, i8** %p, align 8 ; To ensure objectsize isn't killed
+
+  ; CHECK: @llvm.objectsize.i64.p0i8
+  %4 = call i64 @llvm.objectsize.i64.p0i8(i8* %3, i1 false)
+  store i64 %4, i64* %r, align 8
+  ret void
+}
+
+declare i8* @my_malloc_outofline(i8*, i32) #0
+declare i8* @my_calloc_outofline(i8*, i8*, i32, i32) #1
+
+; Verifying that out of line allocsize is parsed correctly
+; CHECK-LABEL: define void @test_outofline
+define void @test_outofline(i8** %p, i64* %r) {
+  %1 = call i8* @my_malloc_outofline(i8* null, i32 100)
+  store i8* %1, i8** %p, align 8 ; To ensure objectsize isn't killed
+
+  %2 = call i64 @llvm.objectsize.i64.p0i8(i8* %1, i1 false)
+  ; CHECK: store i64 100
+  store i64 %2, i64* %r, align 8
+
+
+  %3 = call i8* @my_calloc_outofline(i8* null, i8* null, i32 100, i32 5)
+  store i8* %3, i8** %p, align 8 ; To ensure objectsize isn't killed
+
+  %4 = call i64 @llvm.objectsize.i64.p0i8(i8* %3, i1 false)
+  ; CHECK: store i64 500
+  store i64 %4, i64* %r, align 8
+  ret void
+}
+
+declare i8* @my_malloc_i64(i8*, i64) #0
+declare i8* @my_tiny_calloc(i8*, i8*, i8, i8) #1
+declare i8* @my_varied_calloc(i8*, i8*, i32, i8) #1
+
+; CHECK-LABEL: define void @test_overflow
+define void @test_overflow(i8** %p, i32* %r) {
+  %r64 = bitcast i32* %r to i64*
+
+  ; (2**31 + 1) * 2 > 2**31. So overflow. Yay.
+  %big_malloc = call i8* @my_calloc(i8* null, i8* null, i32 2147483649, i32 2)
+  store i8* %big_malloc, i8** %p, align 8
+
+  ; CHECK: @llvm.objectsize
+  %1 = call i32 @llvm.objectsize.i32.p0i8(i8* %big_malloc, i1 false)
+  store i32 %1, i32* %r, align 4
+
+
+  %big_little_malloc = call i8* @my_tiny_calloc(i8* null, i8* null, i8 127, i8 4)
+  store i8* %big_little_malloc, i8** %p, align 8
+
+  ; CHECK: store i32 508
+  %2 = call i32 @llvm.objectsize.i32.p0i8(i8* %big_little_malloc, i1 false)
+  store i32 %2, i32* %r, align 4
+
+
+  ; malloc(2**33)
+  %big_malloc_i64 = call i8* @my_malloc_i64(i8* null, i64 8589934592)
+  store i8* %big_malloc_i64, i8** %p, align 8
+
+  ; CHECK: @llvm.objectsize
+  %3 = call i32 @llvm.objectsize.i32.p0i8(i8* %big_malloc_i64, i1 false)
+  store i32 %3, i32* %r, align 4
+
+
+  %4 = call i64 @llvm.objectsize.i64.p0i8(i8* %big_malloc_i64, i1 false)
+  ; CHECK: store i64 8589934592
+  store i64 %4, i64* %r64, align 8
+
+
+  ; Just intended to ensure that we properly handle args of different types...
+  %varied_calloc = call i8* @my_varied_calloc(i8* null, i8* null, i32 1000, i8 5)
+  store i8* %varied_calloc, i8** %p, align 8
+
+  ; CHECK: store i32 5000
+  %5 = call i32 @llvm.objectsize.i32.p0i8(i8* %varied_calloc, i1 false)
+  store i32 %5, i32* %r, align 4
+
+  ret void
+}
+
+; CHECK-LABEL: define void @test_nobuiltin
+; We had a bug where `nobuiltin` would cause `allocsize` to be ignored in
+; @llvm.objectsize calculations.
+define void @test_nobuiltin(i8** %p, i64* %r) {
+  %1 = call i8* @my_malloc(i8* null, i32 100) nobuiltin
+  store i8* %1, i8** %p, align 8
+
+  %2 = call i64 @llvm.objectsize.i64.p0i8(i8* %1, i1 false)
+  ; CHECK: store i64 100
+  store i64 %2, i64* %r, align 8
+  ret void
+}
+
+attributes #0 = { allocsize(1) }
+attributes #1 = { allocsize(2, 3) }
+
+declare i32 @llvm.objectsize.i32.p0i8(i8*, i1)
+declare i64 @llvm.objectsize.i64.p0i8(i8*, i1)

Added: llvm/trunk/test/Transforms/InstCombine/and-compare.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/InstCombine/and-compare.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/InstCombine/and-compare.ll (added)
+++ llvm/trunk/test/Transforms/InstCombine/and-compare.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,77 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+; Should be optimized to one and.
+define i1 @test1(i32 %a, i32 %b) {
+; CHECK-LABEL: @test1(
+; CHECK-NEXT:    [[TMP1:%.*]] = xor i32 %a, %b
+; CHECK-NEXT:    [[TMP2:%.*]] = and i32 [[TMP1]], 65280
+; CHECK-NEXT:    [[TMP:%.*]] = icmp ne i32 [[TMP2]], 0
+; CHECK-NEXT:    ret i1 [[TMP]]
+;
+  %tmp1 = and i32 %a, 65280
+  %tmp3 = and i32 %b, 65280
+  %tmp = icmp ne i32 %tmp1, %tmp3
+  ret i1 %tmp
+}
+
+define <2 x i1> @test1vec(<2 x i32> %a, <2 x i32> %b) {
+; CHECK-LABEL: @test1vec(
+; CHECK-NEXT:    [[TMP1:%.*]] = xor <2 x i32> %a, %b
+; CHECK-NEXT:    [[TMP2:%.*]] = and <2 x i32> [[TMP1]], <i32 65280, i32 65280>
+; CHECK-NEXT:    [[TMP:%.*]] = icmp ne <2 x i32> [[TMP2]], zeroinitializer
+; CHECK-NEXT:    ret <2 x i1> [[TMP]]
+;
+  %tmp1 = and <2 x i32> %a, <i32 65280, i32 65280>
+  %tmp3 = and <2 x i32> %b, <i32 65280, i32 65280>
+  %tmp = icmp ne <2 x i32> %tmp1, %tmp3
+  ret <2 x i1> %tmp
+}
+
+define i1 @test2(i64 %A) {
+; CHECK-LABEL: @test2(
+; CHECK-NEXT:    [[TMP1:%.*]] = trunc i64 %A to i8
+; CHECK-NEXT:    [[CMP:%.*]] = icmp sgt i8 [[TMP1]], -1
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %and = and i64 %A, 128
+  %cmp = icmp eq i64 %and, 0
+  ret i1 %cmp
+}
+
+define <2 x i1> @test2vec(<2 x i64> %A) {
+; CHECK-LABEL: @test2vec(
+; CHECK-NEXT:    [[TMP1:%.*]] = trunc <2 x i64> %A to <2 x i8>
+; CHECK-NEXT:    [[CMP:%.*]] = icmp sgt <2 x i8> [[TMP1]], <i8 -1, i8 -1>
+; CHECK-NEXT:    ret <2 x i1> [[CMP]]
+;
+  %and = and <2 x i64> %A, <i64 128, i64 128>
+  %cmp = icmp eq <2 x i64> %and, zeroinitializer
+  ret <2 x i1> %cmp
+}
+
+define i1 @test3(i64 %A) {
+; CHECK-LABEL: @test3(
+; CHECK-NEXT:    [[TMP1:%.*]] = trunc i64 %A to i8
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i8 [[TMP1]], 0
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %and = and i64 %A, 128
+  %cmp = icmp ne i64 %and, 0
+  ret i1 %cmp
+}
+
+define <2 x i1> @test3vec(<2 x i64> %A) {
+; CHECK-LABEL: @test3vec(
+; CHECK-NEXT:    [[TMP1:%.*]] = trunc <2 x i64> %A to <2 x i8>
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt <2 x i8> [[TMP1]], zeroinitializer
+; CHECK-NEXT:    ret <2 x i1> [[CMP]]
+;
+  %and = and <2 x i64> %A, <i64 128, i64 128>
+  %cmp = icmp ne <2 x i64> %and, zeroinitializer
+  ret <2 x i1> %cmp
+}
+

Added: llvm/trunk/test/Transforms/InstCombine/and-fcmp.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/InstCombine/and-fcmp.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/InstCombine/and-fcmp.ll (added)
+++ llvm/trunk/test/Transforms/InstCombine/and-fcmp.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,1584 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+define i1 @PR1738(double %x, double %y) {
+; CHECK-LABEL: @PR1738(
+; CHECK-NEXT:    [[TMP1:%.*]] = fcmp ord double [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %cmp1 = fcmp ord double %x, 0.0
+  %cmp2 = fcmp ord double %y, 0.0
+  %and = and i1 %cmp1, %cmp2
+  ret i1 %and
+}
+
+define <2 x i1> @PR1738_vec_undef(<2 x double> %x, <2 x double> %y) {
+; CHECK-LABEL: @PR1738_vec_undef(
+; CHECK-NEXT:    [[TMP1:%.*]] = fcmp ord <2 x double> [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    ret <2 x i1> [[TMP1]]
+;
+  %cmp1 = fcmp ord <2 x double> %x, <double 0.0, double undef>
+  %cmp2 = fcmp ord <2 x double> %y, <double undef, double 0.0>
+  %or = and <2 x i1> %cmp1, %cmp2
+  ret <2 x i1> %or
+}
+
+define i1 @PR41069(i1 %z, float %c, float %d) {
+; CHECK-LABEL: @PR41069(
+; CHECK-NEXT:    [[TMP1:%.*]] = fcmp ord float [[D:%.*]], [[C:%.*]]
+; CHECK-NEXT:    [[R:%.*]] = and i1 [[TMP1]], [[Z:%.*]]
+; CHECK-NEXT:    ret i1 [[R]]
+;
+  %ord1 = fcmp arcp ord float %c, 0.0
+  %and = and i1 %ord1, %z
+  %ord2 = fcmp afn ord float %d, 0.0
+  %r = and i1 %and, %ord2
+  ret i1 %r
+}
+
+define i1 @PR41069_commute(i1 %z, float %c, float %d) {
+; CHECK-LABEL: @PR41069_commute(
+; CHECK-NEXT:    [[TMP1:%.*]] = fcmp ninf ord float [[D:%.*]], [[C:%.*]]
+; CHECK-NEXT:    [[R:%.*]] = and i1 [[TMP1]], [[Z:%.*]]
+; CHECK-NEXT:    ret i1 [[R]]
+;
+  %ord1 = fcmp ninf ord float %c, 0.0
+  %and = and i1 %ord1, %z
+  %ord2 = fcmp ninf reassoc ord float %d, 0.0
+  %r = and i1 %ord2, %and
+  ret i1 %r
+}
+
+; Commute differently and make sure vectors work.
+
+define <2 x i1> @PR41069_vec(<2 x double> %a, <2 x double> %b, <2 x double> %c, <2 x double> %d) {
+; CHECK-LABEL: @PR41069_vec(
+; CHECK-NEXT:    [[ORD1:%.*]] = fcmp ord <2 x double> [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = fcmp ord <2 x double> [[D:%.*]], [[C:%.*]]
+; CHECK-NEXT:    [[R:%.*]] = and <2 x i1> [[TMP1]], [[ORD1]]
+; CHECK-NEXT:    ret <2 x i1> [[R]]
+;
+  %ord1 = fcmp ord <2 x double> %a, %b
+  %ord2 = fcmp ord <2 x double> %c, <double 0.0, double undef>
+  %and = and <2 x i1> %ord1, %ord2
+  %ord3 = fcmp ord <2 x double> %d, zeroinitializer
+  %r = and <2 x i1> %and, %ord3
+  ret <2 x i1> %r
+}
+
+define <2 x i1> @PR41069_vec_commute(<2 x double> %a, <2 x double> %b, <2 x double> %c, <2 x double> %d) {
+; CHECK-LABEL: @PR41069_vec_commute(
+; CHECK-NEXT:    [[ORD1:%.*]] = fcmp ord <2 x double> [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = fcmp ord <2 x double> [[D:%.*]], [[C:%.*]]
+; CHECK-NEXT:    [[R:%.*]] = and <2 x i1> [[TMP1]], [[ORD1]]
+; CHECK-NEXT:    ret <2 x i1> [[R]]
+;
+  %ord1 = fcmp ord <2 x double> %a, %b
+  %ord2 = fcmp ord <2 x double> %c, <double 0.0, double undef>
+  %and = and <2 x i1> %ord1, %ord2
+  %ord3 = fcmp ord <2 x double> %d, zeroinitializer
+  %r = and <2 x i1> %ord3, %and
+  ret <2 x i1> %r
+}
+
+define i1 @PR15737(float %a, double %b) {
+; CHECK-LABEL: @PR15737(
+; CHECK-NEXT:    [[CMP:%.*]] = fcmp ord float [[A:%.*]], 0.000000e+00
+; CHECK-NEXT:    [[CMP1:%.*]] = fcmp ord double [[B:%.*]], 0.000000e+00
+; CHECK-NEXT:    [[AND:%.*]] = and i1 [[CMP]], [[CMP1]]
+; CHECK-NEXT:    ret i1 [[AND]]
+;
+  %cmp = fcmp ord float %a, 0.000000e+00
+  %cmp1 = fcmp ord double %b, 0.000000e+00
+  %and = and i1 %cmp, %cmp1
+  ret i1 %and
+}
+
+define <2 x i1> @t9(<2 x float> %a, <2 x double> %b) {
+; CHECK-LABEL: @t9(
+; CHECK-NEXT:    [[CMP:%.*]] = fcmp ord <2 x float> [[A:%.*]], zeroinitializer
+; CHECK-NEXT:    [[CMP1:%.*]] = fcmp ord <2 x double> [[B:%.*]], zeroinitializer
+; CHECK-NEXT:    [[AND:%.*]] = and <2 x i1> [[CMP]], [[CMP1]]
+; CHECK-NEXT:    ret <2 x i1> [[AND]]
+;
+  %cmp = fcmp ord <2 x float> %a, zeroinitializer
+  %cmp1 = fcmp ord <2 x double> %b, zeroinitializer
+  %and = and <2 x i1> %cmp, %cmp1
+  ret <2 x i1> %and
+}
+
+define i1 @fcmp_ord_nonzero(float %x, float %y) {
+; CHECK-LABEL: @fcmp_ord_nonzero(
+; CHECK-NEXT:    [[TMP1:%.*]] = fcmp ord float [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %cmp1 = fcmp ord float %x, 1.0
+  %cmp2 = fcmp ord float %y, 2.0
+  %and = and i1 %cmp1, %cmp2
+  ret i1 %and
+}
+
+define <3 x i1> @fcmp_ord_nonzero_vec(<3 x float> %x, <3 x float> %y) {
+; CHECK-LABEL: @fcmp_ord_nonzero_vec(
+; CHECK-NEXT:    [[TMP1:%.*]] = fcmp ord <3 x float> [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    ret <3 x i1> [[TMP1]]
+;
+  %cmp1 = fcmp ord <3 x float> %x, <float 1.0, float 2.0, float 3.0>
+  %cmp2 = fcmp ord <3 x float> %y, <float 3.0, float 2.0, float 1.0>
+  %and = and <3 x i1> %cmp1, %cmp2
+  ret <3 x i1> %and
+}
+
+define i1 @auto_gen_0(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_0(
+; CHECK-NEXT:    ret i1 false
+;
+  %cmp = fcmp false double %a, %b
+  %cmp1 = fcmp false double %a, %b
+  %retval = and i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_1(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_1(
+; CHECK-NEXT:    ret i1 false
+;
+  %cmp = fcmp oeq double %a, %b
+  %cmp1 = fcmp false double %a, %b
+  %retval = and i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_2(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_2(
+; CHECK-NEXT:    [[TMP1:%.*]] = fcmp oeq double [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %cmp = fcmp oeq double %a, %b
+  %cmp1 = fcmp oeq double %a, %b
+  %retval = and i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_3(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_3(
+; CHECK-NEXT:    ret i1 false
+;
+  %cmp = fcmp ogt double %a, %b
+  %cmp1 = fcmp false double %a, %b
+  %retval = and i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_4(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_4(
+; CHECK-NEXT:    ret i1 false
+;
+  %cmp = fcmp ogt double %a, %b
+  %cmp1 = fcmp oeq double %a, %b
+  %retval = and i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_5(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_5(
+; CHECK-NEXT:    [[TMP1:%.*]] = fcmp ogt double [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %cmp = fcmp ogt double %a, %b
+  %cmp1 = fcmp ogt double %a, %b
+  %retval = and i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_6(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_6(
+; CHECK-NEXT:    ret i1 false
+;
+  %cmp = fcmp oge double %a, %b
+  %cmp1 = fcmp false double %a, %b
+  %retval = and i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_7(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_7(
+; CHECK-NEXT:    [[TMP1:%.*]] = fcmp oeq double [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %cmp = fcmp oge double %a, %b
+  %cmp1 = fcmp oeq double %a, %b
+  %retval = and i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_8(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_8(
+; CHECK-NEXT:    [[TMP1:%.*]] = fcmp ogt double [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %cmp = fcmp oge double %a, %b
+  %cmp1 = fcmp ogt double %a, %b
+  %retval = and i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_9(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_9(
+; CHECK-NEXT:    [[TMP1:%.*]] = fcmp oge double [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %cmp = fcmp oge double %a, %b
+  %cmp1 = fcmp oge double %a, %b
+  %retval = and i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_10(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_10(
+; CHECK-NEXT:    ret i1 false
+;
+  %cmp = fcmp olt double %a, %b
+  %cmp1 = fcmp false double %a, %b
+  %retval = and i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_11(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_11(
+; CHECK-NEXT:    ret i1 false
+;
+  %cmp = fcmp olt double %a, %b
+  %cmp1 = fcmp oeq double %a, %b
+  %retval = and i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_12(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_12(
+; CHECK-NEXT:    ret i1 false
+;
+  %cmp = fcmp olt double %a, %b
+  %cmp1 = fcmp ogt double %a, %b
+  %retval = and i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_13(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_13(
+; CHECK-NEXT:    ret i1 false
+;
+  %cmp = fcmp olt double %a, %b
+  %cmp1 = fcmp oge double %a, %b
+  %retval = and i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_14(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_14(
+; CHECK-NEXT:    [[TMP1:%.*]] = fcmp olt double [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %cmp = fcmp olt double %a, %b
+  %cmp1 = fcmp olt double %a, %b
+  %retval = and i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_15(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_15(
+; CHECK-NEXT:    ret i1 false
+;
+  %cmp = fcmp ole double %a, %b
+  %cmp1 = fcmp false double %a, %b
+  %retval = and i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_16(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_16(
+; CHECK-NEXT:    [[TMP1:%.*]] = fcmp oeq double [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %cmp = fcmp ole double %a, %b
+  %cmp1 = fcmp oeq double %a, %b
+  %retval = and i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_17(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_17(
+; CHECK-NEXT:    ret i1 false
+;
+  %cmp = fcmp ole double %a, %b
+  %cmp1 = fcmp ogt double %a, %b
+  %retval = and i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_18(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_18(
+; CHECK-NEXT:    [[TMP1:%.*]] = fcmp oeq double [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %cmp = fcmp ole double %a, %b
+  %cmp1 = fcmp oge double %a, %b
+  %retval = and i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_19(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_19(
+; CHECK-NEXT:    [[TMP1:%.*]] = fcmp olt double [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %cmp = fcmp ole double %a, %b
+  %cmp1 = fcmp olt double %a, %b
+  %retval = and i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_20(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_20(
+; CHECK-NEXT:    [[TMP1:%.*]] = fcmp ole double [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %cmp = fcmp ole double %a, %b
+  %cmp1 = fcmp ole double %a, %b
+  %retval = and i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_21(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_21(
+; CHECK-NEXT:    ret i1 false
+;
+  %cmp = fcmp one double %a, %b
+  %cmp1 = fcmp false double %a, %b
+  %retval = and i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_22(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_22(
+; CHECK-NEXT:    ret i1 false
+;
+  %cmp = fcmp one double %a, %b
+  %cmp1 = fcmp oeq double %a, %b
+  %retval = and i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_23(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_23(
+; CHECK-NEXT:    [[TMP1:%.*]] = fcmp ogt double [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %cmp = fcmp one double %a, %b
+  %cmp1 = fcmp ogt double %a, %b
+  %retval = and i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_24(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_24(
+; CHECK-NEXT:    [[TMP1:%.*]] = fcmp ogt double [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %cmp = fcmp one double %a, %b
+  %cmp1 = fcmp oge double %a, %b
+  %retval = and i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_25(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_25(
+; CHECK-NEXT:    [[TMP1:%.*]] = fcmp olt double [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %cmp = fcmp one double %a, %b
+  %cmp1 = fcmp olt double %a, %b
+  %retval = and i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_26(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_26(
+; CHECK-NEXT:    [[TMP1:%.*]] = fcmp olt double [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %cmp = fcmp one double %a, %b
+  %cmp1 = fcmp ole double %a, %b
+  %retval = and i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_27(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_27(
+; CHECK-NEXT:    [[TMP1:%.*]] = fcmp one double [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %cmp = fcmp one double %a, %b
+  %cmp1 = fcmp one double %a, %b
+  %retval = and i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_28(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_28(
+; CHECK-NEXT:    ret i1 false
+;
+  %cmp = fcmp ord double %a, %b
+  %cmp1 = fcmp false double %a, %b
+  %retval = and i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_29(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_29(
+; CHECK-NEXT:    [[TMP1:%.*]] = fcmp oeq double [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %cmp = fcmp ord double %a, %b
+  %cmp1 = fcmp oeq double %a, %b
+  %retval = and i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_30(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_30(
+; CHECK-NEXT:    [[TMP1:%.*]] = fcmp ogt double [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %cmp = fcmp ord double %a, %b
+  %cmp1 = fcmp ogt double %a, %b
+  %retval = and i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_31(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_31(
+; CHECK-NEXT:    [[TMP1:%.*]] = fcmp oge double [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %cmp = fcmp ord double %a, %b
+  %cmp1 = fcmp oge double %a, %b
+  %retval = and i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_32(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_32(
+; CHECK-NEXT:    [[TMP1:%.*]] = fcmp olt double [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %cmp = fcmp ord double %a, %b
+  %cmp1 = fcmp olt double %a, %b
+  %retval = and i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_33(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_33(
+; CHECK-NEXT:    [[TMP1:%.*]] = fcmp ole double [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %cmp = fcmp ord double %a, %b
+  %cmp1 = fcmp ole double %a, %b
+  %retval = and i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_34(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_34(
+; CHECK-NEXT:    [[TMP1:%.*]] = fcmp one double [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %cmp = fcmp ord double %a, %b
+  %cmp1 = fcmp one double %a, %b
+  %retval = and i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_35(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_35(
+; CHECK-NEXT:    [[TMP1:%.*]] = fcmp ord double [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %cmp = fcmp ord double %a, %b
+  %cmp1 = fcmp ord double %a, %b
+  %retval = and i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_36(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_36(
+; CHECK-NEXT:    ret i1 false
+;
+  %cmp = fcmp ueq double %a, %b
+  %cmp1 = fcmp false double %a, %b
+  %retval = and i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_37(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_37(
+; CHECK-NEXT:    [[TMP1:%.*]] = fcmp oeq double [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %cmp = fcmp ueq double %a, %b
+  %cmp1 = fcmp oeq double %a, %b
+  %retval = and i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_38(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_38(
+; CHECK-NEXT:    ret i1 false
+;
+  %cmp = fcmp ueq double %a, %b
+  %cmp1 = fcmp ogt double %a, %b
+  %retval = and i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_39(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_39(
+; CHECK-NEXT:    [[TMP1:%.*]] = fcmp oeq double [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %cmp = fcmp ueq double %a, %b
+  %cmp1 = fcmp oge double %a, %b
+  %retval = and i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_40(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_40(
+; CHECK-NEXT:    ret i1 false
+;
+  %cmp = fcmp ueq double %a, %b
+  %cmp1 = fcmp olt double %a, %b
+  %retval = and i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_41(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_41(
+; CHECK-NEXT:    [[TMP1:%.*]] = fcmp oeq double [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %cmp = fcmp ueq double %a, %b
+  %cmp1 = fcmp ole double %a, %b
+  %retval = and i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_42(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_42(
+; CHECK-NEXT:    ret i1 false
+;
+  %cmp = fcmp ueq double %a, %b
+  %cmp1 = fcmp one double %a, %b
+  %retval = and i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_43(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_43(
+; CHECK-NEXT:    [[TMP1:%.*]] = fcmp oeq double [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %cmp = fcmp ueq double %a, %b
+  %cmp1 = fcmp ord double %a, %b
+  %retval = and i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_44(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_44(
+; CHECK-NEXT:    [[TMP1:%.*]] = fcmp ueq double [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %cmp = fcmp ueq double %a, %b
+  %cmp1 = fcmp ueq double %a, %b
+  %retval = and i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_45(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_45(
+; CHECK-NEXT:    ret i1 false
+;
+  %cmp = fcmp ugt double %a, %b
+  %cmp1 = fcmp false double %a, %b
+  %retval = and i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_46(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_46(
+; CHECK-NEXT:    ret i1 false
+;
+  %cmp = fcmp ugt double %a, %b
+  %cmp1 = fcmp oeq double %a, %b
+  %retval = and i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_47(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_47(
+; CHECK-NEXT:    [[TMP1:%.*]] = fcmp ogt double [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %cmp = fcmp ugt double %a, %b
+  %cmp1 = fcmp ogt double %a, %b
+  %retval = and i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_48(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_48(
+; CHECK-NEXT:    [[TMP1:%.*]] = fcmp ogt double [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %cmp = fcmp ugt double %a, %b
+  %cmp1 = fcmp oge double %a, %b
+  %retval = and i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_49(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_49(
+; CHECK-NEXT:    ret i1 false
+;
+  %cmp = fcmp ugt double %a, %b
+  %cmp1 = fcmp olt double %a, %b
+  %retval = and i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_50(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_50(
+; CHECK-NEXT:    ret i1 false
+;
+  %cmp = fcmp ugt double %a, %b
+  %cmp1 = fcmp ole double %a, %b
+  %retval = and i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_51(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_51(
+; CHECK-NEXT:    [[TMP1:%.*]] = fcmp ogt double [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %cmp = fcmp ugt double %a, %b
+  %cmp1 = fcmp one double %a, %b
+  %retval = and i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_52(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_52(
+; CHECK-NEXT:    [[TMP1:%.*]] = fcmp ogt double [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %cmp = fcmp ugt double %a, %b
+  %cmp1 = fcmp ord double %a, %b
+  %retval = and i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_53(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_53(
+; CHECK-NEXT:    [[TMP1:%.*]] = fcmp uno double [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %cmp = fcmp ugt double %a, %b
+  %cmp1 = fcmp ueq double %a, %b
+  %retval = and i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_54(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_54(
+; CHECK-NEXT:    [[TMP1:%.*]] = fcmp ugt double [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %cmp = fcmp ugt double %a, %b
+  %cmp1 = fcmp ugt double %a, %b
+  %retval = and i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_55(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_55(
+; CHECK-NEXT:    ret i1 false
+;
+  %cmp = fcmp uge double %a, %b
+  %cmp1 = fcmp false double %a, %b
+  %retval = and i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_56(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_56(
+; CHECK-NEXT:    [[TMP1:%.*]] = fcmp oeq double [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %cmp = fcmp uge double %a, %b
+  %cmp1 = fcmp oeq double %a, %b
+  %retval = and i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_57(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_57(
+; CHECK-NEXT:    [[TMP1:%.*]] = fcmp ogt double [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %cmp = fcmp uge double %a, %b
+  %cmp1 = fcmp ogt double %a, %b
+  %retval = and i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_58(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_58(
+; CHECK-NEXT:    [[TMP1:%.*]] = fcmp oge double [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %cmp = fcmp uge double %a, %b
+  %cmp1 = fcmp oge double %a, %b
+  %retval = and i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_59(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_59(
+; CHECK-NEXT:    ret i1 false
+;
+  %cmp = fcmp uge double %a, %b
+  %cmp1 = fcmp olt double %a, %b
+  %retval = and i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_60(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_60(
+; CHECK-NEXT:    [[TMP1:%.*]] = fcmp oeq double [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %cmp = fcmp uge double %a, %b
+  %cmp1 = fcmp ole double %a, %b
+  %retval = and i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_61(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_61(
+; CHECK-NEXT:    [[TMP1:%.*]] = fcmp ogt double [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %cmp = fcmp uge double %a, %b
+  %cmp1 = fcmp one double %a, %b
+  %retval = and i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_62(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_62(
+; CHECK-NEXT:    [[TMP1:%.*]] = fcmp oge double [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %cmp = fcmp uge double %a, %b
+  %cmp1 = fcmp ord double %a, %b
+  %retval = and i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_63(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_63(
+; CHECK-NEXT:    [[TMP1:%.*]] = fcmp ueq double [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %cmp = fcmp uge double %a, %b
+  %cmp1 = fcmp ueq double %a, %b
+  %retval = and i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_64(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_64(
+; CHECK-NEXT:    [[TMP1:%.*]] = fcmp ugt double [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %cmp = fcmp uge double %a, %b
+  %cmp1 = fcmp ugt double %a, %b
+  %retval = and i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_65(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_65(
+; CHECK-NEXT:    [[TMP1:%.*]] = fcmp uge double [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %cmp = fcmp uge double %a, %b
+  %cmp1 = fcmp uge double %a, %b
+  %retval = and i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_66(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_66(
+; CHECK-NEXT:    ret i1 false
+;
+  %cmp = fcmp ult double %a, %b
+  %cmp1 = fcmp false double %a, %b
+  %retval = and i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_67(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_67(
+; CHECK-NEXT:    ret i1 false
+;
+  %cmp = fcmp ult double %a, %b
+  %cmp1 = fcmp oeq double %a, %b
+  %retval = and i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_68(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_68(
+; CHECK-NEXT:    ret i1 false
+;
+  %cmp = fcmp ult double %a, %b
+  %cmp1 = fcmp ogt double %a, %b
+  %retval = and i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_69(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_69(
+; CHECK-NEXT:    ret i1 false
+;
+  %cmp = fcmp ult double %a, %b
+  %cmp1 = fcmp oge double %a, %b
+  %retval = and i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_70(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_70(
+; CHECK-NEXT:    [[TMP1:%.*]] = fcmp olt double [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %cmp = fcmp ult double %a, %b
+  %cmp1 = fcmp olt double %a, %b
+  %retval = and i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_71(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_71(
+; CHECK-NEXT:    [[TMP1:%.*]] = fcmp olt double [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %cmp = fcmp ult double %a, %b
+  %cmp1 = fcmp ole double %a, %b
+  %retval = and i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_72(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_72(
+; CHECK-NEXT:    [[TMP1:%.*]] = fcmp olt double [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %cmp = fcmp ult double %a, %b
+  %cmp1 = fcmp one double %a, %b
+  %retval = and i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_73(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_73(
+; CHECK-NEXT:    [[TMP1:%.*]] = fcmp olt double [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %cmp = fcmp ult double %a, %b
+  %cmp1 = fcmp ord double %a, %b
+  %retval = and i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_74(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_74(
+; CHECK-NEXT:    [[TMP1:%.*]] = fcmp uno double [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %cmp = fcmp ult double %a, %b
+  %cmp1 = fcmp ueq double %a, %b
+  %retval = and i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_75(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_75(
+; CHECK-NEXT:    [[TMP1:%.*]] = fcmp uno double [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %cmp = fcmp ult double %a, %b
+  %cmp1 = fcmp ugt double %a, %b
+  %retval = and i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_76(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_76(
+; CHECK-NEXT:    [[TMP1:%.*]] = fcmp uno double [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %cmp = fcmp ult double %a, %b
+  %cmp1 = fcmp uge double %a, %b
+  %retval = and i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_77(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_77(
+; CHECK-NEXT:    [[TMP1:%.*]] = fcmp ult double [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %cmp = fcmp ult double %a, %b
+  %cmp1 = fcmp ult double %a, %b
+  %retval = and i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_78(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_78(
+; CHECK-NEXT:    ret i1 false
+;
+  %cmp = fcmp ule double %a, %b
+  %cmp1 = fcmp false double %a, %b
+  %retval = and i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_79(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_79(
+; CHECK-NEXT:    [[TMP1:%.*]] = fcmp oeq double [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %cmp = fcmp ule double %a, %b
+  %cmp1 = fcmp oeq double %a, %b
+  %retval = and i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_80(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_80(
+; CHECK-NEXT:    ret i1 false
+;
+  %cmp = fcmp ule double %a, %b
+  %cmp1 = fcmp ogt double %a, %b
+  %retval = and i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_81(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_81(
+; CHECK-NEXT:    [[TMP1:%.*]] = fcmp oeq double [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %cmp = fcmp ule double %a, %b
+  %cmp1 = fcmp oge double %a, %b
+  %retval = and i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_82(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_82(
+; CHECK-NEXT:    [[TMP1:%.*]] = fcmp olt double [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %cmp = fcmp ule double %a, %b
+  %cmp1 = fcmp olt double %a, %b
+  %retval = and i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_83(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_83(
+; CHECK-NEXT:    [[TMP1:%.*]] = fcmp ole double [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %cmp = fcmp ule double %a, %b
+  %cmp1 = fcmp ole double %a, %b
+  %retval = and i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_84(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_84(
+; CHECK-NEXT:    [[TMP1:%.*]] = fcmp olt double [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %cmp = fcmp ule double %a, %b
+  %cmp1 = fcmp one double %a, %b
+  %retval = and i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_85(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_85(
+; CHECK-NEXT:    [[TMP1:%.*]] = fcmp ole double [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %cmp = fcmp ule double %a, %b
+  %cmp1 = fcmp ord double %a, %b
+  %retval = and i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_86(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_86(
+; CHECK-NEXT:    [[TMP1:%.*]] = fcmp ueq double [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %cmp = fcmp ule double %a, %b
+  %cmp1 = fcmp ueq double %a, %b
+  %retval = and i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_87(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_87(
+; CHECK-NEXT:    [[TMP1:%.*]] = fcmp uno double [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %cmp = fcmp ule double %a, %b
+  %cmp1 = fcmp ugt double %a, %b
+  %retval = and i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_88(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_88(
+; CHECK-NEXT:    [[TMP1:%.*]] = fcmp ueq double [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %cmp = fcmp ule double %a, %b
+  %cmp1 = fcmp uge double %a, %b
+  %retval = and i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_89(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_89(
+; CHECK-NEXT:    [[TMP1:%.*]] = fcmp ult double [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %cmp = fcmp ule double %a, %b
+  %cmp1 = fcmp ult double %a, %b
+  %retval = and i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_90(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_90(
+; CHECK-NEXT:    [[TMP1:%.*]] = fcmp ule double [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %cmp = fcmp ule double %a, %b
+  %cmp1 = fcmp ule double %a, %b
+  %retval = and i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_91(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_91(
+; CHECK-NEXT:    ret i1 false
+;
+  %cmp = fcmp une double %a, %b
+  %cmp1 = fcmp false double %a, %b
+  %retval = and i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_92(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_92(
+; CHECK-NEXT:    ret i1 false
+;
+  %cmp = fcmp une double %a, %b
+  %cmp1 = fcmp oeq double %a, %b
+  %retval = and i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_93(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_93(
+; CHECK-NEXT:    [[TMP1:%.*]] = fcmp ogt double [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %cmp = fcmp une double %a, %b
+  %cmp1 = fcmp ogt double %a, %b
+  %retval = and i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_94(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_94(
+; CHECK-NEXT:    [[TMP1:%.*]] = fcmp ogt double [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %cmp = fcmp une double %a, %b
+  %cmp1 = fcmp oge double %a, %b
+  %retval = and i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_95(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_95(
+; CHECK-NEXT:    [[TMP1:%.*]] = fcmp olt double [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %cmp = fcmp une double %a, %b
+  %cmp1 = fcmp olt double %a, %b
+  %retval = and i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_96(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_96(
+; CHECK-NEXT:    [[TMP1:%.*]] = fcmp olt double [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %cmp = fcmp une double %a, %b
+  %cmp1 = fcmp ole double %a, %b
+  %retval = and i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_97(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_97(
+; CHECK-NEXT:    [[TMP1:%.*]] = fcmp one double [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %cmp = fcmp une double %a, %b
+  %cmp1 = fcmp one double %a, %b
+  %retval = and i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_98(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_98(
+; CHECK-NEXT:    [[TMP1:%.*]] = fcmp one double [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %cmp = fcmp une double %a, %b
+  %cmp1 = fcmp ord double %a, %b
+  %retval = and i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_99(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_99(
+; CHECK-NEXT:    [[TMP1:%.*]] = fcmp uno double [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %cmp = fcmp une double %a, %b
+  %cmp1 = fcmp ueq double %a, %b
+  %retval = and i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_100(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_100(
+; CHECK-NEXT:    [[TMP1:%.*]] = fcmp ugt double [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %cmp = fcmp une double %a, %b
+  %cmp1 = fcmp ugt double %a, %b
+  %retval = and i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_101(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_101(
+; CHECK-NEXT:    [[TMP1:%.*]] = fcmp ugt double [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %cmp = fcmp une double %a, %b
+  %cmp1 = fcmp uge double %a, %b
+  %retval = and i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_102(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_102(
+; CHECK-NEXT:    [[TMP1:%.*]] = fcmp ult double [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %cmp = fcmp une double %a, %b
+  %cmp1 = fcmp ult double %a, %b
+  %retval = and i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_103(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_103(
+; CHECK-NEXT:    [[TMP1:%.*]] = fcmp ult double [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %cmp = fcmp une double %a, %b
+  %cmp1 = fcmp ule double %a, %b
+  %retval = and i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_104(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_104(
+; CHECK-NEXT:    [[TMP1:%.*]] = fcmp une double [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %cmp = fcmp une double %a, %b
+  %cmp1 = fcmp une double %a, %b
+  %retval = and i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_105(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_105(
+; CHECK-NEXT:    ret i1 false
+;
+  %cmp = fcmp uno double %a, %b
+  %cmp1 = fcmp false double %a, %b
+  %retval = and i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_106(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_106(
+; CHECK-NEXT:    ret i1 false
+;
+  %cmp = fcmp uno double %a, %b
+  %cmp1 = fcmp oeq double %a, %b
+  %retval = and i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_107(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_107(
+; CHECK-NEXT:    ret i1 false
+;
+  %cmp = fcmp uno double %a, %b
+  %cmp1 = fcmp ogt double %a, %b
+  %retval = and i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_108(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_108(
+; CHECK-NEXT:    ret i1 false
+;
+  %cmp = fcmp uno double %a, %b
+  %cmp1 = fcmp oge double %a, %b
+  %retval = and i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_109(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_109(
+; CHECK-NEXT:    ret i1 false
+;
+  %cmp = fcmp uno double %a, %b
+  %cmp1 = fcmp olt double %a, %b
+  %retval = and i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_110(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_110(
+; CHECK-NEXT:    ret i1 false
+;
+  %cmp = fcmp uno double %a, %b
+  %cmp1 = fcmp ole double %a, %b
+  %retval = and i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_111(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_111(
+; CHECK-NEXT:    ret i1 false
+;
+  %cmp = fcmp uno double %a, %b
+  %cmp1 = fcmp one double %a, %b
+  %retval = and i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_112(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_112(
+; CHECK-NEXT:    ret i1 false
+;
+  %cmp = fcmp uno double %a, %b
+  %cmp1 = fcmp ord double %a, %b
+  %retval = and i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_113(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_113(
+; CHECK-NEXT:    [[TMP1:%.*]] = fcmp uno double [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %cmp = fcmp uno double %a, %b
+  %cmp1 = fcmp ueq double %a, %b
+  %retval = and i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_114(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_114(
+; CHECK-NEXT:    [[TMP1:%.*]] = fcmp uno double [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %cmp = fcmp uno double %a, %b
+  %cmp1 = fcmp ugt double %a, %b
+  %retval = and i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_115(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_115(
+; CHECK-NEXT:    [[TMP1:%.*]] = fcmp uno double [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %cmp = fcmp uno double %a, %b
+  %cmp1 = fcmp uge double %a, %b
+  %retval = and i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_116(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_116(
+; CHECK-NEXT:    [[TMP1:%.*]] = fcmp uno double [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %cmp = fcmp uno double %a, %b
+  %cmp1 = fcmp ult double %a, %b
+  %retval = and i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_117(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_117(
+; CHECK-NEXT:    [[TMP1:%.*]] = fcmp uno double [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %cmp = fcmp uno double %a, %b
+  %cmp1 = fcmp ule double %a, %b
+  %retval = and i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_118(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_118(
+; CHECK-NEXT:    [[TMP1:%.*]] = fcmp uno double [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %cmp = fcmp uno double %a, %b
+  %cmp1 = fcmp une double %a, %b
+  %retval = and i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_119(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_119(
+; CHECK-NEXT:    [[TMP1:%.*]] = fcmp uno double [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %cmp = fcmp uno double %a, %b
+  %cmp1 = fcmp uno double %a, %b
+  %retval = and i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_120(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_120(
+; CHECK-NEXT:    ret i1 false
+;
+  %cmp = fcmp true double %a, %b
+  %cmp1 = fcmp false double %a, %b
+  %retval = and i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_121(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_121(
+; CHECK-NEXT:    [[CMP1:%.*]] = fcmp oeq double [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    ret i1 [[CMP1]]
+;
+  %cmp = fcmp true double %a, %b
+  %cmp1 = fcmp oeq double %a, %b
+  %retval = and i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_122(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_122(
+; CHECK-NEXT:    [[CMP1:%.*]] = fcmp ogt double [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    ret i1 [[CMP1]]
+;
+  %cmp = fcmp true double %a, %b
+  %cmp1 = fcmp ogt double %a, %b
+  %retval = and i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_123(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_123(
+; CHECK-NEXT:    [[CMP1:%.*]] = fcmp oge double [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    ret i1 [[CMP1]]
+;
+  %cmp = fcmp true double %a, %b
+  %cmp1 = fcmp oge double %a, %b
+  %retval = and i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_124(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_124(
+; CHECK-NEXT:    [[CMP1:%.*]] = fcmp olt double [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    ret i1 [[CMP1]]
+;
+  %cmp = fcmp true double %a, %b
+  %cmp1 = fcmp olt double %a, %b
+  %retval = and i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_125(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_125(
+; CHECK-NEXT:    [[CMP1:%.*]] = fcmp ole double [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    ret i1 [[CMP1]]
+;
+  %cmp = fcmp true double %a, %b
+  %cmp1 = fcmp ole double %a, %b
+  %retval = and i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_126(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_126(
+; CHECK-NEXT:    [[CMP1:%.*]] = fcmp one double [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    ret i1 [[CMP1]]
+;
+  %cmp = fcmp true double %a, %b
+  %cmp1 = fcmp one double %a, %b
+  %retval = and i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_127(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_127(
+; CHECK-NEXT:    [[CMP1:%.*]] = fcmp ord double [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    ret i1 [[CMP1]]
+;
+  %cmp = fcmp true double %a, %b
+  %cmp1 = fcmp ord double %a, %b
+  %retval = and i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_128(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_128(
+; CHECK-NEXT:    [[CMP1:%.*]] = fcmp ueq double [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    ret i1 [[CMP1]]
+;
+  %cmp = fcmp true double %a, %b
+  %cmp1 = fcmp ueq double %a, %b
+  %retval = and i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_129(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_129(
+; CHECK-NEXT:    [[CMP1:%.*]] = fcmp ugt double [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    ret i1 [[CMP1]]
+;
+  %cmp = fcmp true double %a, %b
+  %cmp1 = fcmp ugt double %a, %b
+  %retval = and i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_130(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_130(
+; CHECK-NEXT:    [[CMP1:%.*]] = fcmp uge double [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    ret i1 [[CMP1]]
+;
+  %cmp = fcmp true double %a, %b
+  %cmp1 = fcmp uge double %a, %b
+  %retval = and i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_131(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_131(
+; CHECK-NEXT:    [[CMP1:%.*]] = fcmp ult double [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    ret i1 [[CMP1]]
+;
+  %cmp = fcmp true double %a, %b
+  %cmp1 = fcmp ult double %a, %b
+  %retval = and i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_132(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_132(
+; CHECK-NEXT:    [[CMP1:%.*]] = fcmp ule double [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    ret i1 [[CMP1]]
+;
+  %cmp = fcmp true double %a, %b
+  %cmp1 = fcmp ule double %a, %b
+  %retval = and i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_133(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_133(
+; CHECK-NEXT:    [[CMP1:%.*]] = fcmp une double [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    ret i1 [[CMP1]]
+;
+  %cmp = fcmp true double %a, %b
+  %cmp1 = fcmp une double %a, %b
+  %retval = and i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_134(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_134(
+; CHECK-NEXT:    [[CMP1:%.*]] = fcmp uno double [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    ret i1 [[CMP1]]
+;
+  %cmp = fcmp true double %a, %b
+  %cmp1 = fcmp uno double %a, %b
+  %retval = and i1 %cmp, %cmp1
+  ret i1 %retval
+}
+
+define i1 @auto_gen_135(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_135(
+; CHECK-NEXT:    ret i1 true
+;
+  %cmp = fcmp true double %a, %b
+  %cmp1 = fcmp true double %a, %b
+  %retval = and i1 %cmp, %cmp1
+  ret i1 %retval
+}

Added: llvm/trunk/test/Transforms/InstCombine/and-narrow.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/InstCombine/and-narrow.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/InstCombine/and-narrow.ll (added)
+++ llvm/trunk/test/Transforms/InstCombine/and-narrow.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,192 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -instcombine -data-layout="n8:16:32" -S | FileCheck %s
+; RUN: opt < %s -instcombine -data-layout="n16"      -S | FileCheck %s
+
+; PR35792 - https://bugs.llvm.org/show_bug.cgi?id=35792
+
+define i16 @zext_add(i8 %x) {
+; CHECK-LABEL: @zext_add(
+; CHECK-NEXT:    [[TMP1:%.*]] = add i8 [[X:%.*]], 44
+; CHECK-NEXT:    [[TMP2:%.*]] = and i8 [[TMP1]], [[X]]
+; CHECK-NEXT:    [[R:%.*]] = zext i8 [[TMP2]] to i16
+; CHECK-NEXT:    ret i16 [[R]]
+;
+  %z = zext i8 %x to i16
+  %b = add i16 %z, 44
+  %r = and i16 %b, %z
+  ret i16 %r
+}
+
+define i16 @zext_sub(i8 %x) {
+; CHECK-LABEL: @zext_sub(
+; CHECK-NEXT:    [[TMP1:%.*]] = sub i8 -5, [[X:%.*]]
+; CHECK-NEXT:    [[TMP2:%.*]] = and i8 [[TMP1]], [[X]]
+; CHECK-NEXT:    [[R:%.*]] = zext i8 [[TMP2]] to i16
+; CHECK-NEXT:    ret i16 [[R]]
+;
+  %z = zext i8 %x to i16
+  %b = sub i16 -5, %z
+  %r = and i16 %b, %z
+  ret i16 %r
+}
+
+define i16 @zext_mul(i8 %x) {
+; CHECK-LABEL: @zext_mul(
+; CHECK-NEXT:    [[TMP1:%.*]] = mul i8 [[X:%.*]], 3
+; CHECK-NEXT:    [[TMP2:%.*]] = and i8 [[TMP1]], [[X]]
+; CHECK-NEXT:    [[R:%.*]] = zext i8 [[TMP2]] to i16
+; CHECK-NEXT:    ret i16 [[R]]
+;
+  %z = zext i8 %x to i16
+  %b = mul i16 %z, 3
+  %r = and i16 %b, %z
+  ret i16 %r
+}
+
+define i16 @zext_lshr(i8 %x) {
+; CHECK-LABEL: @zext_lshr(
+; CHECK-NEXT:    [[TMP1:%.*]] = lshr i8 [[X:%.*]], 4
+; CHECK-NEXT:    [[TMP2:%.*]] = and i8 [[TMP1]], [[X]]
+; CHECK-NEXT:    [[R:%.*]] = zext i8 [[TMP2]] to i16
+; CHECK-NEXT:    ret i16 [[R]]
+;
+  %z = zext i8 %x to i16
+  %b = lshr i16 %z, 4
+  %r = and i16 %b, %z
+  ret i16 %r
+}
+
+define i16 @zext_ashr(i8 %x) {
+; CHECK-LABEL: @zext_ashr(
+; CHECK-NEXT:    [[TMP1:%.*]] = lshr i8 [[X:%.*]], 2
+; CHECK-NEXT:    [[TMP2:%.*]] = and i8 [[TMP1]], [[X]]
+; CHECK-NEXT:    [[R:%.*]] = zext i8 [[TMP2]] to i16
+; CHECK-NEXT:    ret i16 [[R]]
+;
+  %z = zext i8 %x to i16
+  %b = ashr i16 %z, 2
+  %r = and i16 %b, %z
+  ret i16 %r
+}
+
+define i16 @zext_shl(i8 %x) {
+; CHECK-LABEL: @zext_shl(
+; CHECK-NEXT:    [[TMP1:%.*]] = shl i8 [[X:%.*]], 3
+; CHECK-NEXT:    [[TMP2:%.*]] = and i8 [[TMP1]], [[X]]
+; CHECK-NEXT:    [[R:%.*]] = zext i8 [[TMP2]] to i16
+; CHECK-NEXT:    ret i16 [[R]]
+;
+  %z = zext i8 %x to i16
+  %b = shl i16 %z, 3
+  %r = and i16 %b, %z
+  ret i16 %r
+}
+
+define <2 x i16> @zext_add_vec(<2 x i8> %x) {
+; CHECK-LABEL: @zext_add_vec(
+; CHECK-NEXT:    [[TMP1:%.*]] = add <2 x i8> [[X:%.*]], <i8 44, i8 42>
+; CHECK-NEXT:    [[TMP2:%.*]] = and <2 x i8> [[TMP1]], [[X]]
+; CHECK-NEXT:    [[R:%.*]] = zext <2 x i8> [[TMP2]] to <2 x i16>
+; CHECK-NEXT:    ret <2 x i16> [[R]]
+;
+  %z = zext <2 x i8> %x to <2 x i16>
+  %b = add <2 x i16> %z, <i16 44, i16 42>
+  %r = and <2 x i16> %b, %z
+  ret <2 x i16> %r
+}
+
+define <2 x i16> @zext_sub_vec(<2 x i8> %x) {
+; CHECK-LABEL: @zext_sub_vec(
+; CHECK-NEXT:    [[TMP1:%.*]] = sub <2 x i8> <i8 -5, i8 -4>, [[X:%.*]]
+; CHECK-NEXT:    [[TMP2:%.*]] = and <2 x i8> [[TMP1]], [[X]]
+; CHECK-NEXT:    [[R:%.*]] = zext <2 x i8> [[TMP2]] to <2 x i16>
+; CHECK-NEXT:    ret <2 x i16> [[R]]
+;
+  %z = zext <2 x i8> %x to <2 x i16>
+  %b = sub <2 x i16> <i16 -5, i16 -4>, %z
+  %r = and <2 x i16> %b, %z
+  ret <2 x i16> %r
+}
+
+define <2 x i16> @zext_mul_vec(<2 x i8> %x) {
+; CHECK-LABEL: @zext_mul_vec(
+; CHECK-NEXT:    [[TMP1:%.*]] = mul <2 x i8> [[X:%.*]], <i8 3, i8 -2>
+; CHECK-NEXT:    [[TMP2:%.*]] = and <2 x i8> [[TMP1]], [[X]]
+; CHECK-NEXT:    [[R:%.*]] = zext <2 x i8> [[TMP2]] to <2 x i16>
+; CHECK-NEXT:    ret <2 x i16> [[R]]
+;
+  %z = zext <2 x i8> %x to <2 x i16>
+  %b = mul <2 x i16> %z, <i16 3, i16 -2>
+  %r = and <2 x i16> %b, %z
+  ret <2 x i16> %r
+}
+
+define <2 x i16> @zext_lshr_vec(<2 x i8> %x) {
+; CHECK-LABEL: @zext_lshr_vec(
+; CHECK-NEXT:    [[TMP1:%.*]] = lshr <2 x i8> [[X:%.*]], <i8 4, i8 2>
+; CHECK-NEXT:    [[TMP2:%.*]] = and <2 x i8> [[TMP1]], [[X]]
+; CHECK-NEXT:    [[R:%.*]] = zext <2 x i8> [[TMP2]] to <2 x i16>
+; CHECK-NEXT:    ret <2 x i16> [[R]]
+;
+  %z = zext <2 x i8> %x to <2 x i16>
+  %b = lshr <2 x i16> %z, <i16 4, i16 2>
+  %r = and <2 x i16> %b, %z
+  ret <2 x i16> %r
+}
+
+define <2 x i16> @zext_ashr_vec(<2 x i8> %x) {
+; CHECK-LABEL: @zext_ashr_vec(
+; CHECK-NEXT:    [[TMP1:%.*]] = lshr <2 x i8> [[X:%.*]], <i8 2, i8 3>
+; CHECK-NEXT:    [[TMP2:%.*]] = and <2 x i8> [[TMP1]], [[X]]
+; CHECK-NEXT:    [[R:%.*]] = zext <2 x i8> [[TMP2]] to <2 x i16>
+; CHECK-NEXT:    ret <2 x i16> [[R]]
+;
+  %z = zext <2 x i8> %x to <2 x i16>
+  %b = ashr <2 x i16> %z, <i16 2, i16 3>
+  %r = and <2 x i16> %b, %z
+  ret <2 x i16> %r
+}
+
+define <2 x i16> @zext_shl_vec(<2 x i8> %x) {
+; CHECK-LABEL: @zext_shl_vec(
+; CHECK-NEXT:    [[TMP1:%.*]] = shl <2 x i8> [[X:%.*]], <i8 3, i8 2>
+; CHECK-NEXT:    [[TMP2:%.*]] = and <2 x i8> [[TMP1]], [[X]]
+; CHECK-NEXT:    [[R:%.*]] = zext <2 x i8> [[TMP2]] to <2 x i16>
+; CHECK-NEXT:    ret <2 x i16> [[R]]
+;
+  %z = zext <2 x i8> %x to <2 x i16>
+  %b = shl <2 x i16> %z, <i16 3, i16 2>
+  %r = and <2 x i16> %b, %z
+  ret <2 x i16> %r
+}
+
+; Don't create poison by narrowing a shift below the shift amount.
+
+define <2 x i16> @zext_lshr_vec_overshift(<2 x i8> %x) {
+; CHECK-LABEL: @zext_lshr_vec_overshift(
+; CHECK-NEXT:    [[Z:%.*]] = zext <2 x i8> [[X:%.*]] to <2 x i16>
+; CHECK-NEXT:    [[B:%.*]] = lshr <2 x i16> [[Z]], <i16 4, i16 8>
+; CHECK-NEXT:    [[R:%.*]] = and <2 x i16> [[B]], [[Z]]
+; CHECK-NEXT:    ret <2 x i16> [[R]]
+;
+  %z = zext <2 x i8> %x to <2 x i16>
+  %b = lshr <2 x i16> %z, <i16 4, i16 8>
+  %r = and <2 x i16> %b, %z
+  ret <2 x i16> %r
+}
+
+; Don't create poison by narrowing a shift below the shift amount.
+
+define <2 x i16> @zext_shl_vec_overshift(<2 x i8> %x) {
+; CHECK-LABEL: @zext_shl_vec_overshift(
+; CHECK-NEXT:    [[Z:%.*]] = zext <2 x i8> [[X:%.*]] to <2 x i16>
+; CHECK-NEXT:    [[B:%.*]] = shl <2 x i16> [[Z]], <i16 8, i16 2>
+; CHECK-NEXT:    [[R:%.*]] = and <2 x i16> [[B]], [[Z]]
+; CHECK-NEXT:    ret <2 x i16> [[R]]
+;
+  %z = zext <2 x i8> %x to <2 x i16>
+  %b = shl <2 x i16> %z, <i16 8, i16 2>
+  %r = and <2 x i16> %b, %z
+  ret <2 x i16> %r
+}
+

Added: llvm/trunk/test/Transforms/InstCombine/and-or-and.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/InstCombine/and-or-and.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/InstCombine/and-or-and.ll (added)
+++ llvm/trunk/test/Transforms/InstCombine/and-or-and.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,61 @@
+; If we have an 'and' of the result of an 'or', and one of the 'or' operands
+; cannot have contributed any of the resultant bits, delete the or.  This
+; occurs for very common C/C++ code like this:
+;
+; struct foo { int A : 16; int B : 16; };
+; void test(struct foo *F, int X, int Y) {
+;        F->A = X; F->B = Y;
+; }
+;
+; Which corresponds to test1.
+
+; RUN: opt < %s -instcombine -S | \
+; RUN:   not grep "or "
+
+define i32 @test1(i32 %X, i32 %Y) {
+        %A = and i32 %X, 7              ; <i32> [#uses=1]
+        %B = and i32 %Y, 8              ; <i32> [#uses=1]
+        %C = or i32 %A, %B              ; <i32> [#uses=1]
+        ;; This cannot include any bits from %Y!
+        %D = and i32 %C, 7              ; <i32> [#uses=1]
+        ret i32 %D
+}
+
+define i32 @test2(i32 %X, i8 %Y) {
+        %B = zext i8 %Y to i32          ; <i32> [#uses=1]
+        %C = or i32 %X, %B              ; <i32> [#uses=1]
+        ;; This cannot include any bits from %Y!
+        %D = and i32 %C, 65536          ; <i32> [#uses=1]
+        ret i32 %D
+}
+
+define i32 @test3(i32 %X, i32 %Y) {
+        %B = shl i32 %Y, 1              ; <i32> [#uses=1]
+        %C = or i32 %X, %B              ; <i32> [#uses=1]
+        ;; This cannot include any bits from %Y!
+        %D = and i32 %C, 1              ; <i32> [#uses=1]
+        ret i32 %D
+}
+
+define i32 @test4(i32 %X, i32 %Y) {
+        %B = lshr i32 %Y, 31            ; <i32> [#uses=1]
+        %C = or i32 %X, %B              ; <i32> [#uses=1]
+        ;; This cannot include any bits from %Y!
+        %D = and i32 %C, 2              ; <i32> [#uses=1]
+        ret i32 %D
+}
+
+define i32 @or_test1(i32 %X, i32 %Y) {
+        %A = and i32 %X, 1              ; <i32> [#uses=1]
+        ;; This cannot include any bits from X!
+        %B = or i32 %A, 1               ; <i32> [#uses=1]
+        ret i32 %B
+}
+
+define i8 @or_test2(i8 %X, i8 %Y) {
+        %A = shl i8 %X, 7               ; <i8> [#uses=1]
+        ;; This cannot include any bits from X!
+        %B = or i8 %A, -128             ; <i8> [#uses=1]
+        ret i8 %B
+}
+

Added: llvm/trunk/test/Transforms/InstCombine/and-or-icmps.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/InstCombine/and-or-icmps.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/InstCombine/and-or-icmps.ll (added)
+++ llvm/trunk/test/Transforms/InstCombine/and-or-icmps.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,255 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+define i1 @PR1817_1(i32 %X) {
+; CHECK-LABEL: @PR1817_1(
+; CHECK-NEXT:    [[B:%.*]] = icmp ult i32 %X, 10
+; CHECK-NEXT:    ret i1 [[B]]
+;
+  %A = icmp slt i32 %X, 10
+  %B = icmp ult i32 %X, 10
+  %C = and i1 %A, %B
+  ret i1 %C
+}
+
+define i1 @PR1817_2(i32 %X) {
+; CHECK-LABEL: @PR1817_2(
+; CHECK-NEXT:    [[A:%.*]] = icmp slt i32 %X, 10
+; CHECK-NEXT:    ret i1 [[A]]
+;
+  %A = icmp slt i32 %X, 10
+  %B = icmp ult i32 %X, 10
+  %C = or i1 %A, %B
+  ret i1 %C
+}
+
+define i1 @PR2330(i32 %a, i32 %b) {
+; CHECK-LABEL: @PR2330(
+; CHECK-NEXT:    [[TMP1:%.*]] = or i32 %b, %a
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp ult i32 [[TMP1]], 8
+; CHECK-NEXT:    ret i1 [[TMP2]]
+;
+  %cmp1 = icmp ult i32 %a, 8
+  %cmp2 = icmp ult i32 %b, 8
+  %and = and i1 %cmp2, %cmp1
+  ret i1 %and
+}
+
+; if LHSC and RHSC differ only by one bit:
+; (X == C1 || X == C2) -> (X | (C1 ^ C2)) == C2
+; PR14708: https://bugs.llvm.org/show_bug.cgi?id=14708
+
+define i1 @or_eq_with_one_bit_diff_constants1(i32 %x) {
+; CHECK-LABEL: @or_eq_with_one_bit_diff_constants1(
+; CHECK-NEXT:    [[TMP1:%.*]] = or i32 %x, 1
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp eq i32 [[TMP1]], 51
+; CHECK-NEXT:    ret i1 [[TMP2]]
+;
+  %cmp1 = icmp eq i32 %x, 50
+  %cmp2 = icmp eq i32 %x, 51
+  %or = or i1 %cmp1, %cmp2
+  ret i1 %or
+}
+
+; (X != C1 && X != C2) -> (X | (C1 ^ C2)) != C2
+
+define i1 @and_ne_with_one_bit_diff_constants1(i32 %x) {
+; CHECK-LABEL: @and_ne_with_one_bit_diff_constants1(
+; CHECK-NEXT:    [[TMP1:%.*]] = or i32 %x, 1
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 51
+; CHECK-NEXT:    ret i1 [[TMP2]]
+;
+  %cmp1 = icmp ne i32 %x, 51
+  %cmp2 = icmp ne i32 %x, 50
+  %and = and i1 %cmp1, %cmp2
+  ret i1 %and
+}
+
+; The constants are not necessarily off-by-one, just off-by-one-bit.
+
+define i1 @or_eq_with_one_bit_diff_constants2(i32 %x) {
+; CHECK-LABEL: @or_eq_with_one_bit_diff_constants2(
+; CHECK-NEXT:    [[TMP1:%.*]] = or i32 %x, 32
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp eq i32 [[TMP1]], 97
+; CHECK-NEXT:    ret i1 [[TMP2]]
+;
+  %cmp1 = icmp eq i32 %x, 97
+  %cmp2 = icmp eq i32 %x, 65
+  %or = or i1 %cmp1, %cmp2
+  ret i1 %or
+}
+
+define i1 @and_ne_with_one_bit_diff_constants2(i19 %x) {
+; CHECK-LABEL: @and_ne_with_one_bit_diff_constants2(
+; CHECK-NEXT:    [[TMP1:%.*]] = or i19 %x, 128
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp ne i19 [[TMP1]], 193
+; CHECK-NEXT:    ret i1 [[TMP2]]
+;
+  %cmp1 = icmp ne i19 %x, 65
+  %cmp2 = icmp ne i19 %x, 193
+  %and = and i1 %cmp1, %cmp2
+  ret i1 %and
+}
+
+; Make sure the constants are treated as unsigned when comparing them.
+
+define i1 @or_eq_with_one_bit_diff_constants3(i8 %x) {
+; CHECK-LABEL: @or_eq_with_one_bit_diff_constants3(
+; CHECK-NEXT:    [[TMP1:%.*]] = or i8 %x, -128
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp eq i8 [[TMP1]], -2
+; CHECK-NEXT:    ret i1 [[TMP2]]
+;
+  %cmp1 = icmp eq i8 %x, 254
+  %cmp2 = icmp eq i8 %x, 126
+  %or = or i1 %cmp1, %cmp2
+  ret i1 %or
+}
+
+define i1 @and_ne_with_one_bit_diff_constants3(i8 %x) {
+; CHECK-LABEL: @and_ne_with_one_bit_diff_constants3(
+; CHECK-NEXT:    [[TMP1:%.*]] = or i8 %x, -128
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp ne i8 [[TMP1]], -63
+; CHECK-NEXT:    ret i1 [[TMP2]]
+;
+  %cmp1 = icmp ne i8 %x, 65
+  %cmp2 = icmp ne i8 %x, 193
+  %and = and i1 %cmp1, %cmp2
+  ret i1 %and
+}
+
+; Use an 'add' to eliminate an icmp if the constants are off-by-one (not off-by-one-bit).
+; (X == 13 | X == 14) -> X-13 <u 2
+
+define i1 @or_eq_with_diff_one(i8 %x) {
+; CHECK-LABEL: @or_eq_with_diff_one(
+; CHECK-NEXT:    [[TMP1:%.*]] = add i8 %x, -13
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp ult i8 [[TMP1]], 2
+; CHECK-NEXT:    ret i1 [[TMP2]]
+;
+  %cmp1 = icmp eq i8 %x, 13
+  %cmp2 = icmp eq i8 %x, 14
+  %or = or i1 %cmp1, %cmp2
+  ret i1 %or
+}
+
+; (X != 40 | X != 39) -> X-39 >u 1
+
+define i1 @and_ne_with_diff_one(i32 %x) {
+; CHECK-LABEL: @and_ne_with_diff_one(
+; CHECK-NEXT:    [[TMP1:%.*]] = add i32 %x, -39
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp ugt i32 [[TMP1]], 1
+; CHECK-NEXT:    ret i1 [[TMP2]]
+;
+  %cmp1 = icmp ne i32 %x, 40
+  %cmp2 = icmp ne i32 %x, 39
+  %and = and i1 %cmp1, %cmp2
+  ret i1 %and
+}
+
+; Make sure the constants are treated as signed when comparing them.
+; PR32524: https://bugs.llvm.org/show_bug.cgi?id=32524
+
+define i1 @or_eq_with_diff_one_signed(i32 %x) {
+; CHECK-LABEL: @or_eq_with_diff_one_signed(
+; CHECK-NEXT:    [[TMP1:%.*]] = add i32 %x, 1
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp ult i32 [[TMP1]], 2
+; CHECK-NEXT:    ret i1 [[TMP2]]
+;
+  %cmp1 = icmp eq i32 %x, 0
+  %cmp2 = icmp eq i32 %x, -1
+  %or = or i1 %cmp1, %cmp2
+  ret i1 %or
+}
+
+define i1 @and_ne_with_diff_one_signed(i64 %x) {
+; CHECK-LABEL: @and_ne_with_diff_one_signed(
+; CHECK-NEXT:    [[TMP1:%.*]] = add i64 %x, 1
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp ugt i64 [[TMP1]], 1
+; CHECK-NEXT:    ret i1 [[TMP2]]
+;
+  %cmp1 = icmp ne i64 %x, -1
+  %cmp2 = icmp ne i64 %x, 0
+  %and = and i1 %cmp1, %cmp2
+  ret i1 %and
+}
+
+; Vectors with splat constants get the same folds.
+
+define <2 x i1> @or_eq_with_one_bit_diff_constants2_splatvec(<2 x i32> %x) {
+; CHECK-LABEL: @or_eq_with_one_bit_diff_constants2_splatvec(
+; CHECK-NEXT:    [[TMP1:%.*]] = or <2 x i32> %x, <i32 32, i32 32>
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp eq <2 x i32> [[TMP1]], <i32 97, i32 97>
+; CHECK-NEXT:    ret <2 x i1> [[TMP2]]
+;
+  %cmp1 = icmp eq <2 x i32> %x, <i32 97, i32 97>
+  %cmp2 = icmp eq <2 x i32> %x, <i32 65, i32 65>
+  %or = or <2 x i1> %cmp1, %cmp2
+  ret <2 x i1> %or
+}
+
+define <2 x i1> @and_ne_with_diff_one_splatvec(<2 x i32> %x) {
+; CHECK-LABEL: @and_ne_with_diff_one_splatvec(
+; CHECK-NEXT:    [[TMP1:%.*]] = add <2 x i32> %x, <i32 -39, i32 -39>
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp ugt <2 x i32> [[TMP1]], <i32 1, i32 1>
+; CHECK-NEXT:    ret <2 x i1> [[TMP2]]
+;
+  %cmp1 = icmp ne <2 x i32> %x, <i32 40, i32 40>
+  %cmp2 = icmp ne <2 x i32> %x, <i32 39, i32 39>
+  %and = and <2 x i1> %cmp1, %cmp2
+  ret <2 x i1> %and
+}
+
+; This is a fuzzer-generated test that would assert because
+; we'd get into foldAndOfICmps() without running InstSimplify
+; on an 'and' that should have been killed. It's not obvious
+; why, but removing anything hides the bug, hence the long test.
+
+define void @simplify_before_foldAndOfICmps() {
+; CHECK-LABEL: @simplify_before_foldAndOfICmps(
+; CHECK-NEXT:    [[A8:%.*]] = alloca i16, align 2
+; CHECK-NEXT:    [[L7:%.*]] = load i16, i16* [[A8]], align 2
+; CHECK-NEXT:    [[C10:%.*]] = icmp ult i16 [[L7]], 2
+; CHECK-NEXT:    [[C7:%.*]] = icmp slt i16 [[L7]], 0
+; CHECK-NEXT:    [[C18:%.*]] = or i1 [[C7]], [[C10]]
+; CHECK-NEXT:    [[L7_LOBIT:%.*]] = ashr i16 [[L7]], 15
+; CHECK-NEXT:    [[TMP1:%.*]] = sext i16 [[L7_LOBIT]] to i64
+; CHECK-NEXT:    [[G26:%.*]] = getelementptr i1, i1* null, i64 [[TMP1]]
+; CHECK-NEXT:    store i16 [[L7]], i16* undef, align 2
+; CHECK-NEXT:    store i1 [[C18]], i1* undef, align 1
+; CHECK-NEXT:    store i1* [[G26]], i1** undef, align 8
+; CHECK-NEXT:    ret void
+;
+  %A8 = alloca i16
+  %L7 = load i16, i16* %A8
+  %G21 = getelementptr i16, i16* %A8, i8 -1
+  %B11 = udiv i16 %L7, -1
+  %G4 = getelementptr i16, i16* %A8, i16 %B11
+  %L2 = load i16, i16* %G4
+  %L = load i16, i16* %G4
+  %B23 = mul i16 %B11, %B11
+  %L4 = load i16, i16* %A8
+  %B21 = sdiv i16 %L7, %L4
+  %B7 = sub i16 0, %B21
+  %B18 = mul i16 %B23, %B7
+  %C10 = icmp ugt i16 %L, %B11
+  %B20 = and i16 %L7, %L2
+  %B1 = mul i1 %C10, true
+  %C5 = icmp sle i16 %B21, %L
+  %C11 = icmp ule i16 %B21, %L
+  %C7 = icmp slt i16 %B20, 0
+  %B29 = srem i16 %L4, %B18
+  %B15 = add i1 %C7, %C10
+  %B19 = add i1 %C11, %B15
+  %C6 = icmp sge i1 %C11, %B19
+  %B33 = or i16 %B29, %L4
+  %C13 = icmp uge i1 %C5, %B1
+  %C3 = icmp ult i1 %C13, %C6
+  store i16 undef, i16* %G21
+  %C18 = icmp ule i1 %C10, %C7
+  %G26 = getelementptr i1, i1* null, i1 %C3
+  store i16 %B33, i16* undef
+  store i1 %C18, i1* undef
+  store i1* %G26, i1** undef
+  ret void
+}
+

Added: llvm/trunk/test/Transforms/InstCombine/and-or-not.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/InstCombine/and-or-not.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/InstCombine/and-or-not.ll (added)
+++ llvm/trunk/test/Transforms/InstCombine/and-or-not.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,642 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+; PR1510
+
+; (a | b) & ~(a & b) --> a ^ b
+
+define i32 @and_to_xor1(i32 %a, i32 %b) {
+; CHECK-LABEL: @and_to_xor1(
+; CHECK-NEXT:    [[AND2:%.*]] = xor i32 [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    ret i32 [[AND2]]
+;
+  %or = or i32 %a, %b
+  %and = and i32 %a, %b
+  %not = xor i32 %and, -1
+  %and2 = and i32 %or, %not
+  ret i32 %and2
+}
+
+; ~(a & b) & (a | b) --> a ^ b
+
+define i32 @and_to_xor2(i32 %a, i32 %b) {
+; CHECK-LABEL: @and_to_xor2(
+; CHECK-NEXT:    [[AND2:%.*]] = xor i32 [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    ret i32 [[AND2]]
+;
+  %or = or i32 %a, %b
+  %and = and i32 %a, %b
+  %not = xor i32 %and, -1
+  %and2 = and i32 %not, %or
+  ret i32 %and2
+}
+
+; (a | b) & ~(b & a) --> a ^ b
+
+define i32 @and_to_xor3(i32 %a, i32 %b) {
+; CHECK-LABEL: @and_to_xor3(
+; CHECK-NEXT:    [[AND2:%.*]] = xor i32 [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    ret i32 [[AND2]]
+;
+  %or = or i32 %a, %b
+  %and = and i32 %b, %a
+  %not = xor i32 %and, -1
+  %and2 = and i32 %or, %not
+  ret i32 %and2
+}
+
+; ~(a & b) & (b | a) --> a ^ b
+
+define i32 @and_to_xor4(i32 %a, i32 %b) {
+; CHECK-LABEL: @and_to_xor4(
+; CHECK-NEXT:    [[AND2:%.*]] = xor i32 [[B:%.*]], [[A:%.*]]
+; CHECK-NEXT:    ret i32 [[AND2]]
+;
+  %or = or i32 %b, %a
+  %and = and i32 %a, %b
+  %not = xor i32 %and, -1
+  %and2 = and i32 %not, %or
+  ret i32 %and2
+}
+
+define <4 x i32> @and_to_xor1_vec(<4 x i32> %a, <4 x i32> %b) {
+; CHECK-LABEL: @and_to_xor1_vec(
+; CHECK-NEXT:    [[AND2:%.*]] = xor <4 x i32> [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    ret <4 x i32> [[AND2]]
+;
+  %or = or <4 x i32> %a, %b
+  %and = and <4 x i32> %a, %b
+  %not = xor <4 x i32> %and, < i32 -1, i32 -1, i32 -1, i32 -1 >
+  %and2 = and <4 x i32> %or, %not
+  ret <4 x i32> %and2
+}
+
+; In the next 4 tests, cast instructions are used to thwart operand complexity
+; canonicalizations, so we can test all of the commuted patterns.
+
+; (a | ~b) & (~a | b) --> ~(a ^ b)
+
+define i32 @and_to_nxor1(float %fa, float %fb) {
+; CHECK-LABEL: @and_to_nxor1(
+; CHECK-NEXT:    [[A:%.*]] = fptosi float [[FA:%.*]] to i32
+; CHECK-NEXT:    [[B:%.*]] = fptosi float [[FB:%.*]] to i32
+; CHECK-NEXT:    [[TMP1:%.*]] = xor i32 [[A]], [[B]]
+; CHECK-NEXT:    [[AND:%.*]] = xor i32 [[TMP1]], -1
+; CHECK-NEXT:    ret i32 [[AND]]
+;
+  %a = fptosi float %fa to i32
+  %b = fptosi float %fb to i32
+  %nota = xor i32 %a, -1
+  %notb = xor i32 %b, -1
+  %or1 = or i32 %a, %notb
+  %or2 = or i32 %nota, %b
+  %and = and i32 %or1, %or2
+  ret i32 %and
+}
+
+; (a | ~b) & (b | ~a) --> ~(a ^ b)
+
+define i32 @and_to_nxor2(float %fa, float %fb) {
+; CHECK-LABEL: @and_to_nxor2(
+; CHECK-NEXT:    [[A:%.*]] = fptosi float [[FA:%.*]] to i32
+; CHECK-NEXT:    [[B:%.*]] = fptosi float [[FB:%.*]] to i32
+; CHECK-NEXT:    [[TMP1:%.*]] = xor i32 [[A]], [[B]]
+; CHECK-NEXT:    [[AND:%.*]] = xor i32 [[TMP1]], -1
+; CHECK-NEXT:    ret i32 [[AND]]
+;
+  %a = fptosi float %fa to i32
+  %b = fptosi float %fb to i32
+  %nota = xor i32 %a, -1
+  %notb = xor i32 %b, -1
+  %or1 = or i32 %a, %notb
+  %or2 = or i32 %b, %nota
+  %and = and i32 %or1, %or2
+  ret i32 %and
+}
+
+; (~a | b) & (a | ~b) --> ~(a ^ b)
+
+define i32 @and_to_nxor3(float %fa, float %fb) {
+; CHECK-LABEL: @and_to_nxor3(
+; CHECK-NEXT:    [[A:%.*]] = fptosi float [[FA:%.*]] to i32
+; CHECK-NEXT:    [[B:%.*]] = fptosi float [[FB:%.*]] to i32
+; CHECK-NEXT:    [[TMP1:%.*]] = xor i32 [[B]], [[A]]
+; CHECK-NEXT:    [[AND:%.*]] = xor i32 [[TMP1]], -1
+; CHECK-NEXT:    ret i32 [[AND]]
+;
+  %a = fptosi float %fa to i32
+  %b = fptosi float %fb to i32
+  %nota = xor i32 %a, -1
+  %notb = xor i32 %b, -1
+  %or1 = or i32 %nota, %b
+  %or2 = or i32 %a, %notb
+  %and = and i32 %or1, %or2
+  ret i32 %and
+}
+
+; (~a | b) & (~b | a) --> ~(a ^ b)
+
+define i32 @and_to_nxor4(float %fa, float %fb) {
+; CHECK-LABEL: @and_to_nxor4(
+; CHECK-NEXT:    [[A:%.*]] = fptosi float [[FA:%.*]] to i32
+; CHECK-NEXT:    [[B:%.*]] = fptosi float [[FB:%.*]] to i32
+; CHECK-NEXT:    [[TMP1:%.*]] = xor i32 [[B]], [[A]]
+; CHECK-NEXT:    [[AND:%.*]] = xor i32 [[TMP1]], -1
+; CHECK-NEXT:    ret i32 [[AND]]
+;
+  %a = fptosi float %fa to i32
+  %b = fptosi float %fb to i32
+  %nota = xor i32 %a, -1
+  %notb = xor i32 %b, -1
+  %or1 = or i32 %nota, %b
+  %or2 = or i32 %notb, %a
+  %and = and i32 %or1, %or2
+  ret i32 %and
+}
+
+; (a & ~b) | (~a & b) --> a ^ b
+
+define i32 @or_to_xor1(float %fa, float %fb) {
+; CHECK-LABEL: @or_to_xor1(
+; CHECK-NEXT:    [[A:%.*]] = fptosi float [[FA:%.*]] to i32
+; CHECK-NEXT:    [[B:%.*]] = fptosi float [[FB:%.*]] to i32
+; CHECK-NEXT:    [[OR:%.*]] = xor i32 [[A]], [[B]]
+; CHECK-NEXT:    ret i32 [[OR]]
+;
+  %a = fptosi float %fa to i32
+  %b = fptosi float %fb to i32
+  %nota = xor i32 %a, -1
+  %notb = xor i32 %b, -1
+  %and1 = and i32 %a, %notb
+  %and2 = and i32 %nota, %b
+  %or = or i32 %and1, %and2
+  ret i32 %or
+}
+
+; (a & ~b) | (b & ~a) --> a ^ b
+
+define i32 @or_to_xor2(float %fa, float %fb) {
+; CHECK-LABEL: @or_to_xor2(
+; CHECK-NEXT:    [[A:%.*]] = fptosi float [[FA:%.*]] to i32
+; CHECK-NEXT:    [[B:%.*]] = fptosi float [[FB:%.*]] to i32
+; CHECK-NEXT:    [[OR:%.*]] = xor i32 [[A]], [[B]]
+; CHECK-NEXT:    ret i32 [[OR]]
+;
+  %a = fptosi float %fa to i32
+  %b = fptosi float %fb to i32
+  %nota = xor i32 %a, -1
+  %notb = xor i32 %b, -1
+  %and1 = and i32 %a, %notb
+  %and2 = and i32 %b, %nota
+  %or = or i32 %and1, %and2
+  ret i32 %or
+}
+
+; (~a & b) | (~b & a) --> a ^ b
+
+define i32 @or_to_xor3(float %fa, float %fb) {
+; CHECK-LABEL: @or_to_xor3(
+; CHECK-NEXT:    [[A:%.*]] = fptosi float [[FA:%.*]] to i32
+; CHECK-NEXT:    [[B:%.*]] = fptosi float [[FB:%.*]] to i32
+; CHECK-NEXT:    [[OR:%.*]] = xor i32 [[B]], [[A]]
+; CHECK-NEXT:    ret i32 [[OR]]
+;
+  %a = fptosi float %fa to i32
+  %b = fptosi float %fb to i32
+  %nota = xor i32 %a, -1
+  %notb = xor i32 %b, -1
+  %and1 = and i32 %nota, %b
+  %and2 = and i32 %notb, %a
+  %or = or i32 %and1, %and2
+  ret i32 %or
+}
+
+; (~a & b) | (a & ~b) --> a ^ b
+
+define i32 @or_to_xor4(float %fa, float %fb) {
+; CHECK-LABEL: @or_to_xor4(
+; CHECK-NEXT:    [[A:%.*]] = fptosi float [[FA:%.*]] to i32
+; CHECK-NEXT:    [[B:%.*]] = fptosi float [[FB:%.*]] to i32
+; CHECK-NEXT:    [[OR:%.*]] = xor i32 [[B]], [[A]]
+; CHECK-NEXT:    ret i32 [[OR]]
+;
+  %a = fptosi float %fa to i32
+  %b = fptosi float %fb to i32
+  %nota = xor i32 %a, -1
+  %notb = xor i32 %b, -1
+  %and1 = and i32 %nota, %b
+  %and2 = and i32 %a, %notb
+  %or = or i32 %and1, %and2
+  ret i32 %or
+}
+
+; (a & b) | ~(a | b) --> ~(a ^ b)
+
+define i32 @or_to_nxor1(i32 %a, i32 %b) {
+; CHECK-LABEL: @or_to_nxor1(
+; CHECK-NEXT:    [[TMP1:%.*]] = xor i32 [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    [[OR2:%.*]] = xor i32 [[TMP1]], -1
+; CHECK-NEXT:    ret i32 [[OR2]]
+;
+  %and = and i32 %a, %b
+  %or = or i32 %a, %b
+  %notor = xor i32 %or, -1
+  %or2 = or i32 %and, %notor
+  ret i32 %or2
+}
+
+; (a & b) | ~(b | a) --> ~(a ^ b)
+
+define i32 @or_to_nxor2(i32 %a, i32 %b) {
+; CHECK-LABEL: @or_to_nxor2(
+; CHECK-NEXT:    [[TMP1:%.*]] = xor i32 [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    [[OR2:%.*]] = xor i32 [[TMP1]], -1
+; CHECK-NEXT:    ret i32 [[OR2]]
+;
+  %and = and i32 %a, %b
+  %or = or i32 %b, %a
+  %notor = xor i32 %or, -1
+  %or2 = or i32 %and, %notor
+  ret i32 %or2
+}
+
+; ~(a | b) | (a & b) --> ~(a ^ b)
+
+define i32 @or_to_nxor3(i32 %a, i32 %b) {
+; CHECK-LABEL: @or_to_nxor3(
+; CHECK-NEXT:    [[TMP1:%.*]] = xor i32 [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    [[OR2:%.*]] = xor i32 [[TMP1]], -1
+; CHECK-NEXT:    ret i32 [[OR2]]
+;
+  %and = and i32 %a, %b
+  %or = or i32 %a, %b
+  %notor = xor i32 %or, -1
+  %or2 = or i32 %notor, %and
+  ret i32 %or2
+}
+
+; ~(a | b) | (b & a) --> ~(a ^ b)
+
+define i32 @or_to_nxor4(i32 %a, i32 %b) {
+; CHECK-LABEL: @or_to_nxor4(
+; CHECK-NEXT:    [[TMP1:%.*]] = xor i32 [[B:%.*]], [[A:%.*]]
+; CHECK-NEXT:    [[OR2:%.*]] = xor i32 [[TMP1]], -1
+; CHECK-NEXT:    ret i32 [[OR2]]
+;
+  %and = and i32 %b, %a
+  %or = or i32 %a, %b
+  %notor = xor i32 %or, -1
+  %or2 = or i32 %notor, %and
+  ret i32 %or2
+}
+
+; (a & b) ^ (a | b) --> a ^ b
+
+define i32 @xor_to_xor1(i32 %a, i32 %b) {
+; CHECK-LABEL: @xor_to_xor1(
+; CHECK-NEXT:    [[XOR:%.*]] = xor i32 [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    ret i32 [[XOR]]
+;
+  %and = and i32 %a, %b
+  %or = or i32 %a, %b
+  %xor = xor i32 %and, %or
+  ret i32 %xor
+}
+
+; (a & b) ^ (b | a) --> a ^ b
+
+define i32 @xor_to_xor2(i32 %a, i32 %b) {
+; CHECK-LABEL: @xor_to_xor2(
+; CHECK-NEXT:    [[XOR:%.*]] = xor i32 [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    ret i32 [[XOR]]
+;
+  %and = and i32 %a, %b
+  %or = or i32 %b, %a
+  %xor = xor i32 %and, %or
+  ret i32 %xor
+}
+
+; (a | b) ^ (a & b) --> a ^ b
+
+define i32 @xor_to_xor3(i32 %a, i32 %b) {
+; CHECK-LABEL: @xor_to_xor3(
+; CHECK-NEXT:    [[XOR:%.*]] = xor i32 [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    ret i32 [[XOR]]
+;
+  %or = or i32 %a, %b
+  %and = and i32 %a, %b
+  %xor = xor i32 %or, %and
+  ret i32 %xor
+}
+
+; (a | b) ^ (b & a) --> a ^ b
+
+define i32 @xor_to_xor4(i32 %a, i32 %b) {
+; CHECK-LABEL: @xor_to_xor4(
+; CHECK-NEXT:    [[XOR:%.*]] = xor i32 [[B:%.*]], [[A:%.*]]
+; CHECK-NEXT:    ret i32 [[XOR]]
+;
+  %or = or i32 %a, %b
+  %and = and i32 %b, %a
+  %xor = xor i32 %or, %and
+  ret i32 %xor
+}
+
+; (a | ~b) ^ (~a | b) --> a ^ b
+
+; In the next 8 tests, cast instructions are used to thwart operand complexity
+; canonicalizations, so we can test all of the commuted patterns.
+
+define i32 @xor_to_xor5(float %fa, float %fb) {
+; CHECK-LABEL: @xor_to_xor5(
+; CHECK-NEXT:    [[A:%.*]] = fptosi float [[FA:%.*]] to i32
+; CHECK-NEXT:    [[B:%.*]] = fptosi float [[FB:%.*]] to i32
+; CHECK-NEXT:    [[XOR:%.*]] = xor i32 [[A]], [[B]]
+; CHECK-NEXT:    ret i32 [[XOR]]
+;
+  %a = fptosi float %fa to i32
+  %b = fptosi float %fb to i32
+  %nota = xor i32 %a, -1
+  %notb = xor i32 %b, -1
+  %or1 = or i32 %a, %notb
+  %or2 = or i32 %nota, %b
+  %xor = xor i32 %or1, %or2
+  ret i32 %xor
+}
+
+; (a | ~b) ^ (b | ~a) --> a ^ b
+
+define i32 @xor_to_xor6(float %fa, float %fb) {
+; CHECK-LABEL: @xor_to_xor6(
+; CHECK-NEXT:    [[A:%.*]] = fptosi float [[FA:%.*]] to i32
+; CHECK-NEXT:    [[B:%.*]] = fptosi float [[FB:%.*]] to i32
+; CHECK-NEXT:    [[XOR:%.*]] = xor i32 [[A]], [[B]]
+; CHECK-NEXT:    ret i32 [[XOR]]
+;
+  %a = fptosi float %fa to i32
+  %b = fptosi float %fb to i32
+  %nota = xor i32 %a, -1
+  %notb = xor i32 %b, -1
+  %or1 = or i32 %a, %notb
+  %or2 = or i32 %b, %nota
+  %xor = xor i32 %or1, %or2
+  ret i32 %xor
+}
+
+; (~a | b) ^ (a | ~b) --> a ^ b
+
+define i32 @xor_to_xor7(float %fa, float %fb) {
+; CHECK-LABEL: @xor_to_xor7(
+; CHECK-NEXT:    [[A:%.*]] = fptosi float [[FA:%.*]] to i32
+; CHECK-NEXT:    [[B:%.*]] = fptosi float [[FB:%.*]] to i32
+; CHECK-NEXT:    [[XOR:%.*]] = xor i32 [[B]], [[A]]
+; CHECK-NEXT:    ret i32 [[XOR]]
+;
+  %a = fptosi float %fa to i32
+  %b = fptosi float %fb to i32
+  %nota = xor i32 %a, -1
+  %notb = xor i32 %b, -1
+  %or1 = or i32 %a, %notb
+  %or2 = or i32 %nota, %b
+  %xor = xor i32 %or2, %or1
+  ret i32 %xor
+}
+
+; (~a | b) ^ (~b | a) --> a ^ b
+
+define i32 @xor_to_xor8(float %fa, float %fb) {
+; CHECK-LABEL: @xor_to_xor8(
+; CHECK-NEXT:    [[A:%.*]] = fptosi float [[FA:%.*]] to i32
+; CHECK-NEXT:    [[B:%.*]] = fptosi float [[FB:%.*]] to i32
+; CHECK-NEXT:    [[XOR:%.*]] = xor i32 [[B]], [[A]]
+; CHECK-NEXT:    ret i32 [[XOR]]
+;
+  %a = fptosi float %fa to i32
+  %b = fptosi float %fb to i32
+  %nota = xor i32 %a, -1
+  %notb = xor i32 %b, -1
+  %or1 = or i32 %notb, %a
+  %or2 = or i32 %nota, %b
+  %xor = xor i32 %or2, %or1
+  ret i32 %xor
+}
+
+; (a & ~b) ^ (~a & b) --> a ^ b
+
+define i32 @xor_to_xor9(float %fa, float %fb) {
+; CHECK-LABEL: @xor_to_xor9(
+; CHECK-NEXT:    [[A:%.*]] = fptosi float [[FA:%.*]] to i32
+; CHECK-NEXT:    [[B:%.*]] = fptosi float [[FB:%.*]] to i32
+; CHECK-NEXT:    [[XOR:%.*]] = xor i32 [[A]], [[B]]
+; CHECK-NEXT:    ret i32 [[XOR]]
+;
+  %a = fptosi float %fa to i32
+  %b = fptosi float %fb to i32
+  %nota = xor i32 %a, -1
+  %notb = xor i32 %b, -1
+  %and1 = and i32 %a, %notb
+  %and2 = and i32 %nota, %b
+  %xor = xor i32 %and1, %and2
+  ret i32 %xor
+}
+
+; (a & ~b) ^ (b & ~a) --> a ^ b
+
+define i32 @xor_to_xor10(float %fa, float %fb) {
+; CHECK-LABEL: @xor_to_xor10(
+; CHECK-NEXT:    [[A:%.*]] = fptosi float [[FA:%.*]] to i32
+; CHECK-NEXT:    [[B:%.*]] = fptosi float [[FB:%.*]] to i32
+; CHECK-NEXT:    [[XOR:%.*]] = xor i32 [[A]], [[B]]
+; CHECK-NEXT:    ret i32 [[XOR]]
+;
+  %a = fptosi float %fa to i32
+  %b = fptosi float %fb to i32
+  %nota = xor i32 %a, -1
+  %notb = xor i32 %b, -1
+  %and1 = and i32 %a, %notb
+  %and2 = and i32 %b, %nota
+  %xor = xor i32 %and1, %and2
+  ret i32 %xor
+}
+
+; (~a & b) ^ (a & ~b) --> a ^ b
+
+define i32 @xor_to_xor11(float %fa, float %fb) {
+; CHECK-LABEL: @xor_to_xor11(
+; CHECK-NEXT:    [[A:%.*]] = fptosi float [[FA:%.*]] to i32
+; CHECK-NEXT:    [[B:%.*]] = fptosi float [[FB:%.*]] to i32
+; CHECK-NEXT:    [[XOR:%.*]] = xor i32 [[B]], [[A]]
+; CHECK-NEXT:    ret i32 [[XOR]]
+;
+  %a = fptosi float %fa to i32
+  %b = fptosi float %fb to i32
+  %nota = xor i32 %a, -1
+  %notb = xor i32 %b, -1
+  %and1 = and i32 %a, %notb
+  %and2 = and i32 %nota, %b
+  %xor = xor i32 %and2, %and1
+  ret i32 %xor
+}
+
+; (~a & b) ^ (~b & a) --> a ^ b
+
+define i32 @xor_to_xor12(float %fa, float %fb) {
+; CHECK-LABEL: @xor_to_xor12(
+; CHECK-NEXT:    [[A:%.*]] = fptosi float [[FA:%.*]] to i32
+; CHECK-NEXT:    [[B:%.*]] = fptosi float [[FB:%.*]] to i32
+; CHECK-NEXT:    [[XOR:%.*]] = xor i32 [[B]], [[A]]
+; CHECK-NEXT:    ret i32 [[XOR]]
+;
+  %a = fptosi float %fa to i32
+  %b = fptosi float %fb to i32
+  %nota = xor i32 %a, -1
+  %notb = xor i32 %b, -1
+  %and1 = and i32 %notb, %a
+  %and2 = and i32 %nota, %b
+  %xor = xor i32 %and2, %and1
+  ret i32 %xor
+}
+
+; https://bugs.llvm.org/show_bug.cgi?id=32830
+; Make sure we're matching operands correctly and not folding things wrongly.
+
+define i64 @PR32830(i64 %a, i64 %b, i64 %c) {
+; CHECK-LABEL: @PR32830(
+; CHECK-NEXT:    [[NOTA:%.*]] = xor i64 [[A:%.*]], -1
+; CHECK-NEXT:    [[NOTB:%.*]] = xor i64 [[B:%.*]], -1
+; CHECK-NEXT:    [[OR1:%.*]] = or i64 [[NOTB]], [[A]]
+; CHECK-NEXT:    [[OR2:%.*]] = or i64 [[NOTA]], [[C:%.*]]
+; CHECK-NEXT:    [[AND:%.*]] = and i64 [[OR1]], [[OR2]]
+; CHECK-NEXT:    ret i64 [[AND]]
+;
+  %nota = xor i64 %a, -1
+  %notb = xor i64 %b, -1
+  %or1 = or i64 %notb, %a
+  %or2 = or i64 %nota, %c
+  %and = and i64 %or1, %or2
+  ret i64 %and
+}
+
+; (~a | b) & (~b | a) --> ~(a ^ b)
+; TODO: this increases instruction count if the pieces have additional users
+define i32 @and_to_nxor_multiuse(float %fa, float %fb) {
+; CHECK-LABEL: @and_to_nxor_multiuse(
+; CHECK-NEXT:    [[A:%.*]] = fptosi float [[FA:%.*]] to i32
+; CHECK-NEXT:    [[B:%.*]] = fptosi float [[FB:%.*]] to i32
+; CHECK-NEXT:    [[NOTA:%.*]] = xor i32 [[A]], -1
+; CHECK-NEXT:    [[NOTB:%.*]] = xor i32 [[B]], -1
+; CHECK-NEXT:    [[OR1:%.*]] = or i32 [[NOTA]], [[B]]
+; CHECK-NEXT:    [[OR2:%.*]] = or i32 [[NOTB]], [[A]]
+; CHECK-NEXT:    [[AND:%.*]] = and i32 [[OR1]], [[OR2]]
+; CHECK-NEXT:    [[MUL1:%.*]] = mul i32 [[OR1]], [[OR2]]
+; CHECK-NEXT:    [[MUL2:%.*]] = mul i32 [[MUL1]], [[AND]]
+; CHECK-NEXT:    ret i32 [[MUL2]]
+;
+  %a = fptosi float %fa to i32
+  %b = fptosi float %fb to i32
+  %nota = xor i32 %a, -1
+  %notb = xor i32 %b, -1
+  %or1 = or i32 %nota, %b
+  %or2 = or i32 %notb, %a
+  %and = and i32 %or1, %or2
+  %mul1 = mul i32 %or1, %or2 ; here to increase the use count of the inputs to the and
+  %mul2 = mul i32 %mul1, %and
+  ret i32 %mul2
+}
+
+; (a & b) | ~(a | b) --> ~(a ^ b)
+; TODO: this increases instruction count if the pieces have additional users
+define i32 @or_to_nxor_multiuse(i32 %a, i32 %b) {
+; CHECK-LABEL: @or_to_nxor_multiuse(
+; CHECK-NEXT:    [[AND:%.*]] = and i32 [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    [[OR:%.*]] = or i32 [[A]], [[B]]
+; CHECK-NEXT:    [[NOTOR:%.*]] = xor i32 [[OR]], -1
+; CHECK-NEXT:    [[OR2:%.*]] = or i32 [[AND]], [[NOTOR]]
+; CHECK-NEXT:    [[MUL1:%.*]] = mul i32 [[AND]], [[NOTOR]]
+; CHECK-NEXT:    [[MUL2:%.*]] = mul i32 [[MUL1]], [[OR2]]
+; CHECK-NEXT:    ret i32 [[MUL2]]
+;
+  %and = and i32 %a, %b
+  %or = or i32 %a, %b
+  %notor = xor i32 %or, -1
+  %or2 = or i32 %and, %notor
+  %mul1 = mul i32 %and, %notor ; here to increase the use count of the inputs to the or
+  %mul2 = mul i32 %mul1, %or2
+  ret i32 %mul2
+}
+
+; (a | b) ^ (~a | ~b) --> ~(a ^ b)
+define i32 @xor_to_xnor1(float %fa, float %fb) {
+; CHECK-LABEL: @xor_to_xnor1(
+; CHECK-NEXT:    [[A:%.*]] = fptosi float [[FA:%.*]] to i32
+; CHECK-NEXT:    [[B:%.*]] = fptosi float [[FB:%.*]] to i32
+; CHECK-NEXT:    [[TMP1:%.*]] = xor i32 [[A]], [[B]]
+; CHECK-NEXT:    [[XOR:%.*]] = xor i32 [[TMP1]], -1
+; CHECK-NEXT:    ret i32 [[XOR]]
+;
+  %a = fptosi float %fa to i32
+  %b = fptosi float %fb to i32
+  %nota = xor i32 %a, -1
+  %notb = xor i32 %b, -1
+  %or1 = or i32 %a, %b
+  %or2 = or i32 %nota, %notb
+  %xor = xor i32 %or1, %or2
+  ret i32 %xor
+}
+
+; (a | b) ^ (~b | ~a) --> ~(a ^ b)
+define i32 @xor_to_xnor2(float %fa, float %fb) {
+; CHECK-LABEL: @xor_to_xnor2(
+; CHECK-NEXT:    [[A:%.*]] = fptosi float [[FA:%.*]] to i32
+; CHECK-NEXT:    [[B:%.*]] = fptosi float [[FB:%.*]] to i32
+; CHECK-NEXT:    [[TMP1:%.*]] = xor i32 [[A]], [[B]]
+; CHECK-NEXT:    [[XOR:%.*]] = xor i32 [[TMP1]], -1
+; CHECK-NEXT:    ret i32 [[XOR]]
+;
+  %a = fptosi float %fa to i32
+  %b = fptosi float %fb to i32
+  %nota = xor i32 %a, -1
+  %notb = xor i32 %b, -1
+  %or1 = or i32 %a, %b
+  %or2 = or i32 %notb, %nota
+  %xor = xor i32 %or1, %or2
+  ret i32 %xor
+}
+
+; (~a | ~b) ^ (a | b) --> ~(a ^ b)
+define i32 @xor_to_xnor3(float %fa, float %fb) {
+; CHECK-LABEL: @xor_to_xnor3(
+; CHECK-NEXT:    [[A:%.*]] = fptosi float [[FA:%.*]] to i32
+; CHECK-NEXT:    [[B:%.*]] = fptosi float [[FB:%.*]] to i32
+; CHECK-NEXT:    [[TMP1:%.*]] = xor i32 [[A]], [[B]]
+; CHECK-NEXT:    [[XOR:%.*]] = xor i32 [[TMP1]], -1
+; CHECK-NEXT:    ret i32 [[XOR]]
+;
+  %a = fptosi float %fa to i32
+  %b = fptosi float %fb to i32
+  %nota = xor i32 %a, -1
+  %notb = xor i32 %b, -1
+  %or1 = or i32 %nota, %notb
+  %or2 = or i32 %a, %b
+  %xor = xor i32 %or1, %or2
+  ret i32 %xor
+}
+
+; (~a | ~b) ^ (b | a) --> ~(a ^ b)
+define i32 @xor_to_xnor4(float %fa, float %fb) {
+; CHECK-LABEL: @xor_to_xnor4(
+; CHECK-NEXT:    [[A:%.*]] = fptosi float [[FA:%.*]] to i32
+; CHECK-NEXT:    [[B:%.*]] = fptosi float [[FB:%.*]] to i32
+; CHECK-NEXT:    [[TMP1:%.*]] = xor i32 [[B]], [[A]]
+; CHECK-NEXT:    [[XOR:%.*]] = xor i32 [[TMP1]], -1
+; CHECK-NEXT:    ret i32 [[XOR]]
+;
+  %a = fptosi float %fa to i32
+  %b = fptosi float %fb to i32
+  %nota = xor i32 %a, -1
+  %notb = xor i32 %b, -1
+  %or1 = or i32 %nota, %notb
+  %or2 = or i32 %b, %a
+  %xor = xor i32 %or1, %or2
+  ret i32 %xor
+}

Added: llvm/trunk/test/Transforms/InstCombine/and-or.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/InstCombine/and-or.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/InstCombine/and-or.ll (added)
+++ llvm/trunk/test/Transforms/InstCombine/and-or.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,132 @@
+; NOTE: Assertions have been autogenerated by update_test_checks.py
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+define i32 @func1(i32 %a, i32 %b) {
+; CHECK-LABEL: @func1(
+; CHECK-NEXT:    [[TMP1:%.*]] = and i32 %a, 1
+; CHECK-NEXT:    [[TMP3:%.*]] = or i32 [[TMP1]], %b
+; CHECK-NEXT:    ret i32 [[TMP3]]
+;
+  %tmp = or i32 %b, %a
+  %tmp1 = and i32 %tmp, 1
+  %tmp2 = and i32 %b, -2
+  %tmp3 = or i32 %tmp1, %tmp2
+  ret i32 %tmp3
+}
+
+define i32 @func2(i32 %a, i32 %b) {
+; CHECK-LABEL: @func2(
+; CHECK-NEXT:    [[TMP1:%.*]] = and i32 %a, 1
+; CHECK-NEXT:    [[TMP3:%.*]] = or i32 [[TMP1]], %b
+; CHECK-NEXT:    ret i32 [[TMP3]]
+;
+  %tmp = or i32 %a, %b
+  %tmp1 = and i32 1, %tmp
+  %tmp2 = and i32 -2, %b
+  %tmp3 = or i32 %tmp1, %tmp2
+  ret i32 %tmp3
+}
+
+define i32 @func3(i32 %a, i32 %b) {
+; CHECK-LABEL: @func3(
+; CHECK-NEXT:    [[TMP1:%.*]] = and i32 %a, 1
+; CHECK-NEXT:    [[TMP3:%.*]] = or i32 [[TMP1]], %b
+; CHECK-NEXT:    ret i32 [[TMP3]]
+;
+  %tmp = or i32 %b, %a
+  %tmp1 = and i32 %tmp, 1
+  %tmp2 = and i32 %b, -2
+  %tmp3 = or i32 %tmp2, %tmp1
+  ret i32 %tmp3
+}
+
+define i32 @func4(i32 %a, i32 %b) {
+; CHECK-LABEL: @func4(
+; CHECK-NEXT:    [[TMP1:%.*]] = and i32 %a, 1
+; CHECK-NEXT:    [[TMP3:%.*]] = or i32 [[TMP1]], %b
+; CHECK-NEXT:    ret i32 [[TMP3]]
+;
+  %tmp = or i32 %a, %b
+  %tmp1 = and i32 1, %tmp
+  %tmp2 = and i32 -2, %b
+  %tmp3 = or i32 %tmp2, %tmp1
+  ret i32 %tmp3
+}
+
+; Check variants of:
+; and ({x}or X, Y), C --> {x}or X, (and Y, C)
+; ...in the following 5 tests.
+
+define i8 @and_or_hoist_mask(i8 %a, i8 %b) {
+; CHECK-LABEL: @and_or_hoist_mask(
+; CHECK-NEXT:    [[SH:%.*]] = lshr i8 %a, 6
+; CHECK-NEXT:    [[B_MASKED:%.*]] = and i8 %b, 3
+; CHECK-NEXT:    [[AND:%.*]] = or i8 [[SH]], [[B_MASKED]]
+; CHECK-NEXT:    ret i8 [[AND]]
+;
+  %sh = lshr i8 %a, 6
+  %or = or i8 %sh, %b
+  %and = and i8 %or, 3
+  ret i8 %and
+}
+
+define <2 x i8> @and_xor_hoist_mask_vec_splat(<2 x i8> %a, <2 x i8> %b) {
+; CHECK-LABEL: @and_xor_hoist_mask_vec_splat(
+; CHECK-NEXT:    [[SH:%.*]] = lshr <2 x i8> %a, <i8 6, i8 6>
+; CHECK-NEXT:    [[B_MASKED:%.*]] = and <2 x i8> %b, <i8 3, i8 3>
+; CHECK-NEXT:    [[AND:%.*]] = xor <2 x i8> [[SH]], [[B_MASKED]]
+; CHECK-NEXT:    ret <2 x i8> [[AND]]
+;
+  %sh = lshr <2 x i8> %a, <i8 6, i8 6>
+  %xor = xor <2 x i8> %sh, %b
+  %and = and <2 x i8> %xor, <i8 3, i8 3>
+  ret <2 x i8> %and
+}
+
+define i8 @and_xor_hoist_mask_commute(i8 %a, i8 %b) {
+; CHECK-LABEL: @and_xor_hoist_mask_commute(
+; CHECK-NEXT:    [[C:%.*]] = mul i8 %b, 43
+; CHECK-NEXT:    [[SH:%.*]] = lshr i8 %a, 6
+; CHECK-NEXT:    [[C_MASKED:%.*]] = and i8 [[C]], 3
+; CHECK-NEXT:    [[AND:%.*]] = xor i8 [[C_MASKED]], [[SH]]
+; CHECK-NEXT:    ret i8 [[AND]]
+;
+  %c = mul i8 %b, 43 ; thwart complexity-based ordering
+  %sh = lshr i8 %a, 6
+  %xor = xor i8 %c, %sh
+  %and = and i8 %xor, 3
+  ret i8 %and
+}
+
+define <2 x i8> @and_or_hoist_mask_commute_vec_splat(<2 x i8> %a, <2 x i8> %b) {
+; CHECK-LABEL: @and_or_hoist_mask_commute_vec_splat(
+; CHECK-NEXT:    [[C:%.*]] = mul <2 x i8> %b, <i8 43, i8 43>
+; CHECK-NEXT:    [[SH:%.*]] = lshr <2 x i8> %a, <i8 6, i8 6>
+; CHECK-NEXT:    [[C_MASKED:%.*]] = and <2 x i8> [[C]], <i8 3, i8 3>
+; CHECK-NEXT:    [[AND:%.*]] = or <2 x i8> [[C_MASKED]], [[SH]]
+; CHECK-NEXT:    ret <2 x i8> [[AND]]
+;
+  %c = mul <2 x i8> %b, <i8 43, i8 43> ; thwart complexity-based ordering
+  %sh = lshr <2 x i8> %a, <i8 6, i8 6>
+  %or = or <2 x i8> %c, %sh
+  %and = and <2 x i8> %or, <i8 3, i8 3>
+  ret <2 x i8> %and
+}
+
+; Don't transform if the 'or' has multiple uses because that would increase instruction count.
+
+define i8 @and_or_do_not_hoist_mask(i8 %a, i8 %b) {
+; CHECK-LABEL: @and_or_do_not_hoist_mask(
+; CHECK-NEXT:    [[SH:%.*]] = lshr i8 %a, 6
+; CHECK-NEXT:    [[OR:%.*]] = or i8 [[SH]], %b
+; CHECK-NEXT:    [[AND:%.*]] = and i8 [[OR]], 3
+; CHECK-NEXT:    [[EXTRA_USE_OF_OR:%.*]] = mul i8 [[OR]], [[AND]]
+; CHECK-NEXT:    ret i8 [[EXTRA_USE_OF_OR]]
+;
+  %sh = lshr i8 %a, 6
+  %or = or i8 %sh, %b
+  %and = and i8 %or, 3
+  %extra_use_of_or = mul i8 %or, %and
+  ret i8 %extra_use_of_or
+}
+

Added: llvm/trunk/test/Transforms/InstCombine/and-xor-merge.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/InstCombine/and-xor-merge.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/InstCombine/and-xor-merge.ll (added)
+++ llvm/trunk/test/Transforms/InstCombine/and-xor-merge.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,24 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+; (x&z) ^ (y&z) -> (x^y)&z
+define i32 @test1(i32 %x, i32 %y, i32 %z) {
+; CHECK-LABEL: @test1(
+; CHECK-NEXT: %tmp61 = xor i32 %x, %y
+; CHECK-NEXT: %tmp7 = and i32 %tmp61, %z
+; CHECK-NEXT: ret i32 %tmp7
+        %tmp3 = and i32 %z, %x
+        %tmp6 = and i32 %z, %y
+        %tmp7 = xor i32 %tmp3, %tmp6
+        ret i32 %tmp7
+}
+
+; (x & y) ^ (x|y) -> x^y
+define i32 @test2(i32 %x, i32 %y, i32 %z) {
+; CHECK-LABEL: @test2(
+; CHECK-NEXT: %tmp7 = xor i32 %y, %x
+; CHECK-NEXT: ret i32 %tmp7
+        %tmp3 = and i32 %y, %x
+        %tmp6 = or i32 %y, %x
+        %tmp7 = xor i32 %tmp3, %tmp6
+        ret i32 %tmp7
+}

Added: llvm/trunk/test/Transforms/InstCombine/and-xor-or.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/InstCombine/and-xor-or.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/InstCombine/and-xor-or.ll (added)
+++ llvm/trunk/test/Transforms/InstCombine/and-xor-or.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,343 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+; a & (a ^ b) --> a & ~b
+
+define i32 @and_xor_common_op(i32 %pa, i32 %pb) {
+; CHECK-LABEL: @and_xor_common_op(
+; CHECK-NEXT:    [[A:%.*]] = udiv i32 42, [[PA:%.*]]
+; CHECK-NEXT:    [[B:%.*]] = udiv i32 43, [[PB:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = xor i32 [[B]], -1
+; CHECK-NEXT:    [[R:%.*]] = and i32 [[A]], [[TMP1]]
+; CHECK-NEXT:    ret i32 [[R]]
+;
+  %a = udiv i32 42, %pa ; thwart complexity-based canonicalization
+  %b = udiv i32 43, %pb ; thwart complexity-based canonicalization
+  %xor = xor i32 %a, %b
+  %r = and i32 %a, %xor
+  ret i32 %r
+}
+
+; a & (b ^ a) --> a & ~b
+
+define i32 @and_xor_common_op_commute1(i32 %pa, i32 %pb) {
+; CHECK-LABEL: @and_xor_common_op_commute1(
+; CHECK-NEXT:    [[A:%.*]] = udiv i32 42, [[PA:%.*]]
+; CHECK-NEXT:    [[B:%.*]] = udiv i32 43, [[PB:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = xor i32 [[B]], -1
+; CHECK-NEXT:    [[R:%.*]] = and i32 [[A]], [[TMP1]]
+; CHECK-NEXT:    ret i32 [[R]]
+;
+  %a = udiv i32 42, %pa ; thwart complexity-based canonicalization
+  %b = udiv i32 43, %pb ; thwart complexity-based canonicalization
+  %xor = xor i32 %b, %a
+  %r = and i32 %a, %xor
+  ret i32 %r
+}
+
+; (b ^ a) & a --> a & ~b
+
+define i32 @and_xor_common_op_commute2(i32 %pa, i32 %pb) {
+; CHECK-LABEL: @and_xor_common_op_commute2(
+; CHECK-NEXT:    [[A:%.*]] = udiv i32 42, [[PA:%.*]]
+; CHECK-NEXT:    [[B:%.*]] = udiv i32 43, [[PB:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = xor i32 [[B]], -1
+; CHECK-NEXT:    [[R:%.*]] = and i32 [[A]], [[TMP1]]
+; CHECK-NEXT:    ret i32 [[R]]
+;
+  %a = udiv i32 42, %pa ; thwart complexity-based canonicalization
+  %b = udiv i32 43, %pb ; thwart complexity-based canonicalization
+  %xor = xor i32 %b, %a
+  %r = and i32 %xor, %a
+  ret i32 %r
+}
+
+; (a ^ b) & a --> a & ~b
+
+define <2 x i32> @and_xor_common_op_commute3(<2 x i32> %pa, <2 x i32> %pb) {
+; CHECK-LABEL: @and_xor_common_op_commute3(
+; CHECK-NEXT:    [[A:%.*]] = udiv <2 x i32> <i32 42, i32 43>, [[PA:%.*]]
+; CHECK-NEXT:    [[B:%.*]] = udiv <2 x i32> <i32 43, i32 42>, [[PB:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = xor <2 x i32> [[B]], <i32 -1, i32 -1>
+; CHECK-NEXT:    [[R:%.*]] = and <2 x i32> [[A]], [[TMP1]]
+; CHECK-NEXT:    ret <2 x i32> [[R]]
+;
+  %a = udiv <2 x i32> <i32 42, i32 43>, %pa ; thwart complexity-based canonicalization
+  %b = udiv <2 x i32> <i32 43, i32 42>, %pb ; thwart complexity-based canonicalization
+  %xor = xor <2 x i32> %a, %b
+  %r = and <2 x i32> %xor, %a
+  ret <2 x i32> %r
+}
+
+; It's ok to match a common constant.
+; TODO: The xor should be a 'not' op (-1 constant), but demanded bits shrinks it.
+
+define <4 x i32> @and_xor_common_op_constant(<4 x i32> %A) {
+; CHECK-LABEL: @and_xor_common_op_constant(
+; CHECK-NEXT:    [[TMP1:%.*]] = xor <4 x i32> [[A:%.*]], <i32 7, i32 7, i32 7, i32 7>
+; CHECK-NEXT:    [[TMP2:%.*]] = and <4 x i32> [[TMP1]], <i32 1, i32 2, i32 3, i32 4>
+; CHECK-NEXT:    ret <4 x i32> [[TMP2]]
+;
+  %1 = xor <4 x i32> %A, <i32 1, i32 2, i32 3, i32 4>
+  %2 = and <4 x i32> <i32 1, i32 2, i32 3, i32 4>, %1
+  ret <4 x i32> %2
+}
+
+; a & (a ^ ~b) --> a & b
+
+define i32 @and_xor_not_common_op(i32 %a, i32 %b) {
+; CHECK-LABEL: @and_xor_not_common_op(
+; CHECK-NEXT:    [[T4:%.*]] = and i32 [[B:%.*]], [[A:%.*]]
+; CHECK-NEXT:    ret i32 [[T4]]
+;
+  %b2 = xor i32 %b, -1
+  %t2 = xor i32 %a, %b2
+  %t4 = and i32 %t2, %a
+  ret i32 %t4
+}
+
+; rdar://10770603
+; (x & y) | (x ^ y) -> x | y
+
+define i64 @or(i64 %x, i64 %y) {
+; CHECK-LABEL: @or(
+; CHECK-NEXT:    [[TMP1:%.*]] = or i64 [[Y:%.*]], [[X:%.*]]
+; CHECK-NEXT:    ret i64 [[TMP1]]
+;
+  %1 = and i64 %y, %x
+  %2 = xor i64 %y, %x
+  %3 = add i64 %1, %2
+  ret i64 %3
+}
+
+; (x & y) + (x ^ y) -> x | y
+
+define i64 @or2(i64 %x, i64 %y) {
+; CHECK-LABEL: @or2(
+; CHECK-NEXT:    [[TMP1:%.*]] = or i64 [[Y:%.*]], [[X:%.*]]
+; CHECK-NEXT:    ret i64 [[TMP1]]
+;
+  %1 = and i64 %y, %x
+  %2 = xor i64 %y, %x
+  %3 = or i64 %1, %2
+  ret i64 %3
+}
+
+; PR37098 - https://bugs.llvm.org/show_bug.cgi?id=37098
+; Reassociate bitwise logic to eliminate a shift.
+; There are 4 commuted * 3 shift ops * 3 logic ops = 36 potential variations of this fold.
+; Mix the commutation options to provide coverage using less tests.
+
+define i8 @and_shl(i8 %x, i8 %y, i8 %z, i8 %shamt) {
+; CHECK-LABEL: @and_shl(
+; CHECK-NEXT:    [[SX:%.*]] = shl i8 [[X:%.*]], [[SHAMT:%.*]]
+; CHECK-NEXT:    [[SY:%.*]] = shl i8 [[Y:%.*]], [[SHAMT]]
+; CHECK-NEXT:    [[A:%.*]] = and i8 [[SX]], [[Z:%.*]]
+; CHECK-NEXT:    [[R:%.*]] = and i8 [[SY]], [[A]]
+; CHECK-NEXT:    ret i8 [[R]]
+;
+  %sx = shl i8 %x, %shamt
+  %sy = shl i8 %y, %shamt
+  %a = and i8 %sx, %z
+  %r = and i8 %sy, %a
+  ret i8 %r
+}
+
+define i8 @or_shl(i8 %x, i8 %y, i8 %z, i8 %shamt) {
+; CHECK-LABEL: @or_shl(
+; CHECK-NEXT:    [[SX:%.*]] = shl i8 [[X:%.*]], [[SHAMT:%.*]]
+; CHECK-NEXT:    [[SY:%.*]] = shl i8 [[Y:%.*]], [[SHAMT]]
+; CHECK-NEXT:    [[A:%.*]] = or i8 [[SX]], [[Z:%.*]]
+; CHECK-NEXT:    [[R:%.*]] = or i8 [[A]], [[SY]]
+; CHECK-NEXT:    ret i8 [[R]]
+;
+  %sx = shl i8 %x, %shamt
+  %sy = shl i8 %y, %shamt
+  %a = or i8 %sx, %z
+  %r = or i8 %a, %sy
+  ret i8 %r
+}
+
+define i8 @xor_shl(i8 %x, i8 %y, i8 %zarg, i8 %shamt) {
+; CHECK-LABEL: @xor_shl(
+; CHECK-NEXT:    [[Z:%.*]] = sdiv i8 42, [[ZARG:%.*]]
+; CHECK-NEXT:    [[SX:%.*]] = shl i8 [[X:%.*]], [[SHAMT:%.*]]
+; CHECK-NEXT:    [[SY:%.*]] = shl i8 [[Y:%.*]], [[SHAMT]]
+; CHECK-NEXT:    [[A:%.*]] = xor i8 [[Z]], [[SX]]
+; CHECK-NEXT:    [[R:%.*]] = xor i8 [[A]], [[SY]]
+; CHECK-NEXT:    ret i8 [[R]]
+;
+  %z = sdiv i8 42, %zarg ; thwart complexity-based canonicalization
+  %sx = shl i8 %x, %shamt
+  %sy = shl i8 %y, %shamt
+  %a = xor i8 %z, %sx
+  %r = xor i8 %a, %sy
+  ret i8 %r
+}
+
+define i8 @and_lshr(i8 %x, i8 %y, i8 %zarg, i8 %shamt) {
+; CHECK-LABEL: @and_lshr(
+; CHECK-NEXT:    [[Z:%.*]] = sdiv i8 42, [[ZARG:%.*]]
+; CHECK-NEXT:    [[SX:%.*]] = lshr i8 [[X:%.*]], [[SHAMT:%.*]]
+; CHECK-NEXT:    [[SY:%.*]] = lshr i8 [[Y:%.*]], [[SHAMT]]
+; CHECK-NEXT:    [[A:%.*]] = and i8 [[Z]], [[SX]]
+; CHECK-NEXT:    [[R:%.*]] = and i8 [[SY]], [[A]]
+; CHECK-NEXT:    ret i8 [[R]]
+;
+  %z = sdiv i8 42, %zarg ; thwart complexity-based canonicalization
+  %sx = lshr i8 %x, %shamt
+  %sy = lshr i8 %y, %shamt
+  %a = and i8 %z, %sx
+  %r = and i8 %sy, %a
+  ret i8 %r
+}
+
+define i8 @or_lshr(i8 %x, i8 %y, i8 %z, i8 %shamt) {
+; CHECK-LABEL: @or_lshr(
+; CHECK-NEXT:    [[SX:%.*]] = lshr i8 [[X:%.*]], [[SHAMT:%.*]]
+; CHECK-NEXT:    [[SY:%.*]] = lshr i8 [[Y:%.*]], [[SHAMT]]
+; CHECK-NEXT:    [[A:%.*]] = or i8 [[SX]], [[Z:%.*]]
+; CHECK-NEXT:    [[R:%.*]] = or i8 [[SY]], [[A]]
+; CHECK-NEXT:    ret i8 [[R]]
+;
+  %sx = lshr i8 %x, %shamt
+  %sy = lshr i8 %y, %shamt
+  %a = or i8 %sx, %z
+  %r = or i8 %sy, %a
+  ret i8 %r
+}
+
+define i8 @xor_lshr(i8 %x, i8 %y, i8 %z, i8 %shamt) {
+; CHECK-LABEL: @xor_lshr(
+; CHECK-NEXT:    [[SX:%.*]] = lshr i8 [[X:%.*]], [[SHAMT:%.*]]
+; CHECK-NEXT:    [[SY:%.*]] = lshr i8 [[Y:%.*]], [[SHAMT]]
+; CHECK-NEXT:    [[A:%.*]] = xor i8 [[SX]], [[Z:%.*]]
+; CHECK-NEXT:    [[R:%.*]] = xor i8 [[A]], [[SY]]
+; CHECK-NEXT:    ret i8 [[R]]
+;
+  %sx = lshr i8 %x, %shamt
+  %sy = lshr i8 %y, %shamt
+  %a = xor i8 %sx, %z
+  %r = xor i8 %a, %sy
+  ret i8 %r
+}
+
+define i8 @and_ashr(i8 %x, i8 %y, i8 %zarg, i8 %shamt) {
+; CHECK-LABEL: @and_ashr(
+; CHECK-NEXT:    [[Z:%.*]] = sdiv i8 42, [[ZARG:%.*]]
+; CHECK-NEXT:    [[SX:%.*]] = ashr i8 [[X:%.*]], [[SHAMT:%.*]]
+; CHECK-NEXT:    [[SY:%.*]] = ashr i8 [[Y:%.*]], [[SHAMT]]
+; CHECK-NEXT:    [[A:%.*]] = and i8 [[Z]], [[SX]]
+; CHECK-NEXT:    [[R:%.*]] = and i8 [[A]], [[SY]]
+; CHECK-NEXT:    ret i8 [[R]]
+;
+  %z = sdiv i8 42, %zarg ; thwart complexity-based canonicalization
+  %sx = ashr i8 %x, %shamt
+  %sy = ashr i8 %y, %shamt
+  %a = and i8 %z, %sx
+  %r = and i8 %a, %sy
+  ret i8 %r
+}
+
+define i8 @or_ashr(i8 %x, i8 %y, i8 %zarg, i8 %shamt) {
+; CHECK-LABEL: @or_ashr(
+; CHECK-NEXT:    [[Z:%.*]] = sdiv i8 42, [[ZARG:%.*]]
+; CHECK-NEXT:    [[SX:%.*]] = ashr i8 [[X:%.*]], [[SHAMT:%.*]]
+; CHECK-NEXT:    [[SY:%.*]] = ashr i8 [[Y:%.*]], [[SHAMT]]
+; CHECK-NEXT:    [[A:%.*]] = or i8 [[Z]], [[SX]]
+; CHECK-NEXT:    [[R:%.*]] = or i8 [[SY]], [[A]]
+; CHECK-NEXT:    ret i8 [[R]]
+;
+  %z = sdiv i8 42, %zarg ; thwart complexity-based canonicalization
+  %sx = ashr i8 %x, %shamt
+  %sy = ashr i8 %y, %shamt
+  %a = or i8 %z, %sx
+  %r = or i8 %sy, %a
+  ret i8 %r
+}
+
+define <2 x i8> @xor_ashr(<2 x i8> %x, <2 x i8> %y, <2 x i8> %z, <2 x i8> %shamt) {
+; CHECK-LABEL: @xor_ashr(
+; CHECK-NEXT:    [[SX:%.*]] = ashr <2 x i8> [[X:%.*]], [[SHAMT:%.*]]
+; CHECK-NEXT:    [[SY:%.*]] = ashr <2 x i8> [[Y:%.*]], [[SHAMT]]
+; CHECK-NEXT:    [[A:%.*]] = xor <2 x i8> [[SX]], [[Z:%.*]]
+; CHECK-NEXT:    [[R:%.*]] = xor <2 x i8> [[A]], [[SY]]
+; CHECK-NEXT:    ret <2 x i8> [[R]]
+;
+  %sx = ashr <2 x i8> %x, %shamt
+  %sy = ashr <2 x i8> %y, %shamt
+  %a = xor <2 x i8> %sx, %z
+  %r = xor <2 x i8> %a, %sy
+  ret <2 x i8> %r
+}
+
+; Negative test - different logic ops
+
+define i8 @or_and_shl(i8 %x, i8 %y, i8 %z, i8 %shamt) {
+; CHECK-LABEL: @or_and_shl(
+; CHECK-NEXT:    [[SX:%.*]] = shl i8 [[X:%.*]], [[SHAMT:%.*]]
+; CHECK-NEXT:    [[SY:%.*]] = shl i8 [[Y:%.*]], [[SHAMT]]
+; CHECK-NEXT:    [[A:%.*]] = or i8 [[SX]], [[Z:%.*]]
+; CHECK-NEXT:    [[R:%.*]] = and i8 [[SY]], [[A]]
+; CHECK-NEXT:    ret i8 [[R]]
+;
+  %sx = shl i8 %x, %shamt
+  %sy = shl i8 %y, %shamt
+  %a = or i8 %sx, %z
+  %r = and i8 %sy, %a
+  ret i8 %r
+}
+
+; Negative test - different shift ops
+
+define i8 @or_lshr_shl(i8 %x, i8 %y, i8 %z, i8 %shamt) {
+; CHECK-LABEL: @or_lshr_shl(
+; CHECK-NEXT:    [[SX:%.*]] = lshr i8 [[X:%.*]], [[SHAMT:%.*]]
+; CHECK-NEXT:    [[SY:%.*]] = shl i8 [[Y:%.*]], [[SHAMT]]
+; CHECK-NEXT:    [[A:%.*]] = or i8 [[SX]], [[Z:%.*]]
+; CHECK-NEXT:    [[R:%.*]] = or i8 [[A]], [[SY]]
+; CHECK-NEXT:    ret i8 [[R]]
+;
+  %sx = lshr i8 %x, %shamt
+  %sy = shl i8 %y, %shamt
+  %a = or i8 %sx, %z
+  %r = or i8 %a, %sy
+  ret i8 %r
+}
+
+; Negative test - different shift amounts
+
+define i8 @or_lshr_shamt2(i8 %x, i8 %y, i8 %z, i8 %shamt) {
+; CHECK-LABEL: @or_lshr_shamt2(
+; CHECK-NEXT:    [[SX:%.*]] = lshr i8 [[X:%.*]], 5
+; CHECK-NEXT:    [[SY:%.*]] = lshr i8 [[Y:%.*]], [[SHAMT:%.*]]
+; CHECK-NEXT:    [[A:%.*]] = or i8 [[SX]], [[Z:%.*]]
+; CHECK-NEXT:    [[R:%.*]] = or i8 [[SY]], [[A]]
+; CHECK-NEXT:    ret i8 [[R]]
+;
+  %sx = lshr i8 %x, 5
+  %sy = lshr i8 %y, %shamt
+  %a = or i8 %sx, %z
+  %r = or i8 %sy, %a
+  ret i8 %r
+}
+
+; Negative test - multi-use
+
+define i8 @xor_lshr_multiuse(i8 %x, i8 %y, i8 %z, i8 %shamt) {
+; CHECK-LABEL: @xor_lshr_multiuse(
+; CHECK-NEXT:    [[SX:%.*]] = lshr i8 [[X:%.*]], [[SHAMT:%.*]]
+; CHECK-NEXT:    [[SY:%.*]] = lshr i8 [[Y:%.*]], [[SHAMT]]
+; CHECK-NEXT:    [[A:%.*]] = xor i8 [[SX]], [[Z:%.*]]
+; CHECK-NEXT:    [[R:%.*]] = xor i8 [[A]], [[SY]]
+; CHECK-NEXT:    [[R2:%.*]] = sdiv i8 [[A]], [[R]]
+; CHECK-NEXT:    ret i8 [[R2]]
+;
+  %sx = lshr i8 %x, %shamt
+  %sy = lshr i8 %y, %shamt
+  %a = xor i8 %sx, %z
+  %r = xor i8 %a, %sy
+  %r2 = sdiv i8 %a, %r
+  ret i8 %r2
+}
+

Added: llvm/trunk/test/Transforms/InstCombine/and.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/InstCombine/and.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/InstCombine/and.ll (added)
+++ llvm/trunk/test/Transforms/InstCombine/and.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,839 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+; There should be no 'and' instructions left in any test.
+
+define i32 @test1(i32 %A) {
+; CHECK-LABEL: @test1(
+; CHECK-NEXT:    ret i32 0
+;
+  %B = and i32 %A, 0
+  ret i32 %B
+}
+
+define i32 @test2(i32 %A) {
+; CHECK-LABEL: @test2(
+; CHECK-NEXT:    ret i32 %A
+;
+  %B = and i32 %A, -1
+  ret i32 %B
+}
+
+define i1 @test3(i1 %A) {
+; CHECK-LABEL: @test3(
+; CHECK-NEXT:    ret i1 false
+;
+  %B = and i1 %A, false
+  ret i1 %B
+}
+
+define i1 @test4(i1 %A) {
+; CHECK-LABEL: @test4(
+; CHECK-NEXT:    ret i1 %A
+;
+  %B = and i1 %A, true
+  ret i1 %B
+}
+
+define i32 @test5(i32 %A) {
+; CHECK-LABEL: @test5(
+; CHECK-NEXT:    ret i32 %A
+;
+  %B = and i32 %A, %A
+  ret i32 %B
+}
+
+define i1 @test6(i1 %A) {
+; CHECK-LABEL: @test6(
+; CHECK-NEXT:    ret i1 %A
+;
+  %B = and i1 %A, %A
+  ret i1 %B
+}
+
+; A & ~A == 0
+define i32 @test7(i32 %A) {
+; CHECK-LABEL: @test7(
+; CHECK-NEXT:    ret i32 0
+;
+  %NotA = xor i32 %A, -1
+  %B = and i32 %A, %NotA
+  ret i32 %B
+}
+
+; AND associates
+define i8 @test8(i8 %A) {
+; CHECK-LABEL: @test8(
+; CHECK-NEXT:    ret i8 0
+;
+  %B = and i8 %A, 3
+  %C = and i8 %B, 4
+  ret i8 %C
+}
+
+; Test of sign bit, convert to setle %A, 0
+define i1 @test9(i32 %A) {
+; CHECK-LABEL: @test9(
+; CHECK-NEXT:    [[C:%.*]] = icmp slt i32 %A, 0
+; CHECK-NEXT:    ret i1 [[C]]
+;
+  %B = and i32 %A, -2147483648
+  %C = icmp ne i32 %B, 0
+  ret i1 %C
+}
+
+; Test of sign bit, convert to setle %A, 0
+define i1 @test9a(i32 %A) {
+; CHECK-LABEL: @test9a(
+; CHECK-NEXT:    [[C:%.*]] = icmp slt i32 %A, 0
+; CHECK-NEXT:    ret i1 [[C]]
+;
+  %B = and i32 %A, -2147483648
+  %C = icmp ne i32 %B, 0
+  ret i1 %C
+}
+
+define i32 @test10(i32 %A) {
+; CHECK-LABEL: @test10(
+; CHECK-NEXT:    ret i32 1
+;
+  %B = and i32 %A, 12
+  %C = xor i32 %B, 15
+  ; (X ^ C1) & C2 --> (X & C2) ^ (C1&C2)
+  %D = and i32 %C, 1
+  ret i32 %D
+}
+
+define i32 @test11(i32 %A, i32* %P) {
+; CHECK-LABEL: @test11(
+; CHECK-NEXT:    [[B:%.*]] = or i32 %A, 3
+; CHECK-NEXT:    [[C:%.*]] = xor i32 [[B]], 12
+; CHECK-NEXT:    store i32 [[C]], i32* %P, align 4
+; CHECK-NEXT:    ret i32 3
+;
+  %B = or i32 %A, 3
+  %C = xor i32 %B, 12
+  ; additional use of C
+  store i32 %C, i32* %P
+  ; %C = and uint %B, 3 --> 3
+  %D = and i32 %C, 3
+  ret i32 %D
+}
+
+define i1 @test12(i32 %A, i32 %B) {
+; CHECK-LABEL: @test12(
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ult i32 %A, %B
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %C1 = icmp ult i32 %A, %B
+  %C2 = icmp ule i32 %A, %B
+  ; (A < B) & (A <= B) === (A < B)
+  %D = and i1 %C1, %C2
+  ret i1 %D
+}
+
+define i1 @test13(i32 %A, i32 %B) {
+; CHECK-LABEL: @test13(
+; CHECK-NEXT:    ret i1 false
+;
+  %C1 = icmp ult i32 %A, %B
+  %C2 = icmp ugt i32 %A, %B
+  ; (A < B) & (A > B) === false
+  %D = and i1 %C1, %C2
+  ret i1 %D
+}
+
+define i1 @test14(i8 %A) {
+; CHECK-LABEL: @test14(
+; CHECK-NEXT:    [[C:%.*]] = icmp slt i8 %A, 0
+; CHECK-NEXT:    ret i1 [[C]]
+;
+  %B = and i8 %A, -128
+  %C = icmp ne i8 %B, 0
+  ret i1 %C
+}
+
+define i8 @test15(i8 %A) {
+; CHECK-LABEL: @test15(
+; CHECK-NEXT:    ret i8 0
+;
+  %B = lshr i8 %A, 7
+  ; Always equals zero
+  %C = and i8 %B, 2
+  ret i8 %C
+}
+
+define i8 @test16(i8 %A) {
+; CHECK-LABEL: @test16(
+; CHECK-NEXT:    ret i8 0
+;
+  %B = shl i8 %A, 2
+  %C = and i8 %B, 3
+  ret i8 %C
+}
+
+define i1 @test18(i32 %A) {
+; CHECK-LABEL: @test18(
+; CHECK-NEXT:    [[C:%.*]] = icmp ugt i32 %A, 127
+; CHECK-NEXT:    ret i1 [[C]]
+;
+  %B = and i32 %A, -128
+  ;; C >= 128
+  %C = icmp ne i32 %B, 0
+  ret i1 %C
+}
+
+define <2 x i1> @test18_vec(<2 x i32> %A) {
+; CHECK-LABEL: @test18_vec(
+; CHECK-NEXT:    [[C:%.*]] = icmp ugt <2 x i32> %A, <i32 127, i32 127>
+; CHECK-NEXT:    ret <2 x i1> [[C]]
+;
+  %B = and <2 x i32> %A, <i32 -128, i32 -128>
+  %C = icmp ne <2 x i32> %B, zeroinitializer
+  ret <2 x i1> %C
+}
+
+define i1 @test18a(i8 %A) {
+; CHECK-LABEL: @test18a(
+; CHECK-NEXT:    [[C:%.*]] = icmp ult i8 %A, 2
+; CHECK-NEXT:    ret i1 [[C]]
+;
+  %B = and i8 %A, -2
+  %C = icmp eq i8 %B, 0
+  ret i1 %C
+}
+
+define <2 x i1> @test18a_vec(<2 x i8> %A) {
+; CHECK-LABEL: @test18a_vec(
+; CHECK-NEXT:    [[C:%.*]] = icmp ult <2 x i8> %A, <i8 2, i8 2>
+; CHECK-NEXT:    ret <2 x i1> [[C]]
+;
+  %B = and <2 x i8> %A, <i8 -2, i8 -2>
+  %C = icmp eq <2 x i8> %B, zeroinitializer
+  ret <2 x i1> %C
+}
+
+define i32 @test19(i32 %A) {
+; CHECK-LABEL: @test19(
+; CHECK-NEXT:    [[B:%.*]] = shl i32 %A, 3
+; CHECK-NEXT:    ret i32 [[B]]
+;
+  %B = shl i32 %A, 3
+  ;; Clearing a zero bit
+  %C = and i32 %B, -2
+  ret i32 %C
+}
+
+define i8 @test20(i8 %A) {
+; CHECK-LABEL: @test20(
+; CHECK-NEXT:    [[C:%.*]] = lshr i8 %A, 7
+; CHECK-NEXT:    ret i8 [[C]]
+;
+  %C = lshr i8 %A, 7
+  ;; Unneeded
+  %D = and i8 %C, 1
+  ret i8 %D
+}
+
+define i1 @test23(i32 %A) {
+; CHECK-LABEL: @test23(
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp eq i32 %A, 2
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %B = icmp sgt i32 %A, 1
+  %C = icmp sle i32 %A, 2
+  %D = and i1 %B, %C
+  ret i1 %D
+}
+
+; FIXME: Vectors should fold too.
+define <2 x i1> @test23vec(<2 x i32> %A) {
+; CHECK-LABEL: @test23vec(
+; CHECK-NEXT:    [[B:%.*]] = icmp sgt <2 x i32> %A, <i32 1, i32 1>
+; CHECK-NEXT:    [[C:%.*]] = icmp slt <2 x i32> %A, <i32 3, i32 3>
+; CHECK-NEXT:    [[D:%.*]] = and <2 x i1> [[B]], [[C]]
+; CHECK-NEXT:    ret <2 x i1> [[D]]
+;
+  %B = icmp sgt <2 x i32> %A, <i32 1, i32 1>
+  %C = icmp sle <2 x i32> %A, <i32 2, i32 2>
+  %D = and <2 x i1> %B, %C
+  ret <2 x i1> %D
+}
+
+define i1 @test24(i32 %A) {
+; CHECK-LABEL: @test24(
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp sgt i32 %A, 2
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %B = icmp sgt i32 %A, 1
+  %C = icmp ne i32 %A, 2
+  ;; A > 2
+  %D = and i1 %B, %C
+  ret i1 %D
+}
+
+define i1 @test25(i32 %A) {
+; CHECK-LABEL: @test25(
+; CHECK-NEXT:    [[A_OFF:%.*]] = add i32 %A, -50
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ult i32 [[A_OFF]], 50
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %B = icmp sge i32 %A, 50
+  %C = icmp slt i32 %A, 100
+  %D = and i1 %B, %C
+  ret i1 %D
+}
+
+; FIXME: Vectors should fold too.
+define <2 x i1> @test25vec(<2 x i32> %A) {
+; CHECK-LABEL: @test25vec(
+; CHECK-NEXT:    [[B:%.*]] = icmp sgt <2 x i32> %A, <i32 49, i32 49>
+; CHECK-NEXT:    [[C:%.*]] = icmp slt <2 x i32> %A, <i32 100, i32 100>
+; CHECK-NEXT:    [[D:%.*]] = and <2 x i1> [[B]], [[C]]
+; CHECK-NEXT:    ret <2 x i1> [[D]]
+;
+  %B = icmp sge <2 x i32> %A, <i32 50, i32 50>
+  %C = icmp slt <2 x i32> %A, <i32 100, i32 100>
+  %D = and <2 x i1> %B, %C
+  ret <2 x i1> %D
+}
+
+define i8 @test27(i8 %A) {
+; CHECK-LABEL: @test27(
+; CHECK-NEXT:    ret i8 0
+;
+  %B = and i8 %A, 4
+  %C = sub i8 %B, 16
+  ;; 0xF0
+  %D = and i8 %C, -16
+  %E = add i8 %D, 16
+  ret i8 %E
+}
+
+;; This is just a zero-extending shr.
+define i32 @test28(i32 %X) {
+; CHECK-LABEL: @test28(
+; CHECK-NEXT:    [[Y1:%.*]] = lshr i32 %X, 24
+; CHECK-NEXT:    ret i32 [[Y1]]
+;
+  ;; Sign extend
+  %Y = ashr i32 %X, 24
+  ;; Mask out sign bits
+  %Z = and i32 %Y, 255
+  ret i32 %Z
+}
+
+define i32 @test29(i8 %X) {
+; CHECK-LABEL: @test29(
+; CHECK-NEXT:    [[Y:%.*]] = zext i8 %X to i32
+; CHECK-NEXT:    ret i32 [[Y]]
+;
+  %Y = zext i8 %X to i32
+  ;; Zero extend makes this unneeded.
+  %Z = and i32 %Y, 255
+  ret i32 %Z
+}
+
+define i32 @test30(i1 %X) {
+; CHECK-LABEL: @test30(
+; CHECK-NEXT:    [[Y:%.*]] = zext i1 %X to i32
+; CHECK-NEXT:    ret i32 [[Y]]
+;
+  %Y = zext i1 %X to i32
+  %Z = and i32 %Y, 1
+  ret i32 %Z
+}
+
+define i32 @test31(i1 %X) {
+; CHECK-LABEL: @test31(
+; CHECK-NEXT:    [[Y:%.*]] = zext i1 %X to i32
+; CHECK-NEXT:    [[Z:%.*]] = shl nuw nsw i32 [[Y]], 4
+; CHECK-NEXT:    ret i32 [[Z]]
+;
+  %Y = zext i1 %X to i32
+  %Z = shl i32 %Y, 4
+  %A = and i32 %Z, 16
+  ret i32 %A
+}
+
+; Demanded bit analysis allows us to eliminate the add.
+
+define <2 x i32> @and_demanded_bits_splat_vec(<2 x i32> %x) {
+; CHECK-LABEL: @and_demanded_bits_splat_vec(
+; CHECK-NEXT:    [[Z:%.*]] = and <2 x i32> %x, <i32 7, i32 7>
+; CHECK-NEXT:    ret <2 x i32> [[Z]]
+;
+  %y = add <2 x i32> %x, <i32 8, i32 8>
+  %z = and <2 x i32> %y, <i32 7, i32 7>
+  ret <2 x i32> %z
+}
+
+; zext (x >> 8) has all zeros in the high 24-bits:  0x000000xx
+; (y | 255) has all ones in the low 8-bits: 0xyyyyyyff
+; 'and' of those is all known bits - it's just 'z'.
+
+define i32 @and_zext_demanded(i16 %x, i32 %y) {
+; CHECK-LABEL: @and_zext_demanded(
+; CHECK-NEXT:    [[S:%.*]] = lshr i16 %x, 8
+; CHECK-NEXT:    [[Z:%.*]] = zext i16 [[S]] to i32
+; CHECK-NEXT:    ret i32 [[Z]]
+;
+  %s = lshr i16 %x, 8
+  %z = zext i16 %s to i32
+  %o = or i32 %y, 255
+  %a = and i32 %o, %z
+  ret i32 %a
+}
+
+define i32 @test32(i32 %In) {
+; CHECK-LABEL: @test32(
+; CHECK-NEXT:    ret i32 0
+;
+  %Y = and i32 %In, 16
+  %Z = lshr i32 %Y, 2
+  %A = and i32 %Z, 1
+  ret i32 %A
+}
+
+;; Code corresponding to one-bit bitfield ^1.
+define i32 @test33(i32 %b) {
+; CHECK-LABEL: @test33(
+; CHECK-NEXT:    [[TMP_13:%.*]] = xor i32 %b, 1
+; CHECK-NEXT:    ret i32 [[TMP_13]]
+;
+  %tmp.4.mask = and i32 %b, 1
+  %tmp.10 = xor i32 %tmp.4.mask, 1
+  %tmp.12 = and i32 %b, -2
+  %tmp.13 = or i32 %tmp.12, %tmp.10
+  ret i32 %tmp.13
+}
+
+define i32 @test33b(i32 %b) {
+; CHECK-LABEL: @test33b(
+; CHECK-NEXT:    [[TMP_13:%.*]] = xor i32 [[B:%.*]], 1
+; CHECK-NEXT:    ret i32 [[TMP_13]]
+;
+  %tmp.4.mask = and i32 %b, 1
+  %tmp.10 = xor i32 %tmp.4.mask, 1
+  %tmp.12 = and i32 %b, -2
+  %tmp.13 = or i32 %tmp.10, %tmp.12
+  ret i32 %tmp.13
+}
+
+define <2 x i32> @test33vec(<2 x i32> %b) {
+; CHECK-LABEL: @test33vec(
+; CHECK-NEXT:    [[TMP_13:%.*]] = xor <2 x i32> [[B:%.*]], <i32 1, i32 1>
+; CHECK-NEXT:    ret <2 x i32> [[TMP_13]]
+;
+  %tmp.4.mask = and <2 x i32> %b, <i32 1, i32 1>
+  %tmp.10 = xor <2 x i32> %tmp.4.mask, <i32 1, i32 1>
+  %tmp.12 = and <2 x i32> %b, <i32 -2, i32 -2>
+  %tmp.13 = or <2 x i32> %tmp.12, %tmp.10
+  ret <2 x i32> %tmp.13
+}
+
+define <2 x i32> @test33vecb(<2 x i32> %b) {
+; CHECK-LABEL: @test33vecb(
+; CHECK-NEXT:    [[TMP_13:%.*]] = xor <2 x i32> [[B:%.*]], <i32 1, i32 1>
+; CHECK-NEXT:    ret <2 x i32> [[TMP_13]]
+;
+  %tmp.4.mask = and <2 x i32> %b, <i32 1, i32 1>
+  %tmp.10 = xor <2 x i32> %tmp.4.mask, <i32 1, i32 1>
+  %tmp.12 = and <2 x i32> %b, <i32 -2, i32 -2>
+  %tmp.13 = or <2 x i32> %tmp.10, %tmp.12
+  ret <2 x i32> %tmp.13
+}
+
+define i32 @test34(i32 %A, i32 %B) {
+; CHECK-LABEL: @test34(
+; CHECK-NEXT:    ret i32 %B
+;
+  %tmp.2 = or i32 %B, %A
+  %tmp.4 = and i32 %tmp.2, %B
+  ret i32 %tmp.4
+}
+
+; FIXME: This test should only need -instsimplify (ValueTracking / computeKnownBits), not -instcombine.
+
+define <2 x i32> @PR24942(<2 x i32> %x) {
+; CHECK-LABEL: @PR24942(
+; CHECK-NEXT:    ret <2 x i32> zeroinitializer
+;
+  %lshr = lshr <2 x i32> %x, <i32 31, i32 31>
+  %and = and <2 x i32> %lshr, <i32 2, i32 2>
+  ret <2 x i32> %and
+}
+
+define i64 @test35(i32 %X) {
+; CHECK-LABEL: @test35(
+; CHECK-NEXT:  %[[sub:.*]] = sub i32 0, %X
+; CHECK-NEXT:  %[[and:.*]] = and i32 %[[sub]], 240
+; CHECK-NEXT:  %[[cst:.*]] = zext i32 %[[and]] to i64
+; CHECK-NEXT:  ret i64 %[[cst]]
+  %zext = zext i32 %X to i64
+  %zsub = sub i64 0, %zext
+  %res = and i64 %zsub, 240
+  ret i64 %res
+}
+
+define i64 @test36(i32 %X) {
+; CHECK-LABEL: @test36(
+; CHECK-NEXT:  %[[sub:.*]] = add i32 %X, 7
+; CHECK-NEXT:  %[[and:.*]] = and i32 %[[sub]], 240
+; CHECK-NEXT:  %[[cst:.*]] = zext i32 %[[and]] to i64
+; CHECK-NEXT:  ret i64 %[[cst]]
+  %zext = zext i32 %X to i64
+  %zsub = add i64 %zext, 7
+  %res = and i64 %zsub, 240
+  ret i64 %res
+}
+
+define i64 @test37(i32 %X) {
+; CHECK-LABEL: @test37(
+; CHECK-NEXT:  %[[sub:.*]] = mul i32 %X, 7
+; CHECK-NEXT:  %[[and:.*]] = and i32 %[[sub]], 240
+; CHECK-NEXT:  %[[cst:.*]] = zext i32 %[[and]] to i64
+; CHECK-NEXT:  ret i64 %[[cst]]
+  %zext = zext i32 %X to i64
+  %zsub = mul i64 %zext, 7
+  %res = and i64 %zsub, 240
+  ret i64 %res
+}
+
+define i64 @test38(i32 %X) {
+; CHECK-LABEL: @test38(
+; CHECK-NEXT:  %[[and:.*]] = and i32 %X, 240
+; CHECK-NEXT:  %[[cst:.*]] = zext i32 %[[and]] to i64
+; CHECK-NEXT:  ret i64 %[[cst]]
+  %zext = zext i32 %X to i64
+  %zsub = xor i64 %zext, 7
+  %res = and i64 %zsub, 240
+  ret i64 %res
+}
+
+define i64 @test39(i32 %X) {
+; CHECK-LABEL: @test39(
+; CHECK-NEXT:  %[[and:.*]] = and i32 %X, 240
+; CHECK-NEXT:  %[[cst:.*]] = zext i32 %[[and]] to i64
+; CHECK-NEXT:  ret i64 %[[cst]]
+  %zext = zext i32 %X to i64
+  %zsub = or i64 %zext, 7
+  %res = and i64 %zsub, 240
+  ret i64 %res
+}
+
+define i32 @test40(i1 %C) {
+; CHECK-LABEL: @test40(
+; CHECK-NEXT:    [[A:%.*]] = select i1 [[C:%.*]], i32 104, i32 10
+; CHECK-NEXT:    ret i32 [[A]]
+;
+  %A = select i1 %C, i32 1000, i32 10
+  %V = and i32 %A, 123
+  ret i32 %V
+}
+
+define <2 x i32> @test40vec(i1 %C) {
+; CHECK-LABEL: @test40vec(
+; CHECK-NEXT:    [[A:%.*]] = select i1 [[C:%.*]], <2 x i32> <i32 104, i32 104>, <2 x i32> <i32 10, i32 10>
+; CHECK-NEXT:    ret <2 x i32> [[A]]
+;
+  %A = select i1 %C, <2 x i32> <i32 1000, i32 1000>, <2 x i32> <i32 10, i32 10>
+  %V = and <2 x i32> %A, <i32 123, i32 123>
+  ret <2 x i32> %V
+}
+
+define <2 x i32> @test40vec2(i1 %C) {
+; CHECK-LABEL: @test40vec2(
+; CHECK-NEXT:    [[V:%.*]] = select i1 [[C:%.*]], <2 x i32> <i32 104, i32 324>, <2 x i32> <i32 10, i32 12>
+; CHECK-NEXT:    ret <2 x i32> [[V]]
+;
+  %A = select i1 %C, <2 x i32> <i32 1000, i32 2500>, <2 x i32> <i32 10, i32 30>
+  %V = and <2 x i32> %A, <i32 123, i32 333>
+  ret <2 x i32> %V
+}
+
+define i32 @test41(i1 %which) {
+; CHECK-LABEL: @test41(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br i1 [[WHICH:%.*]], label [[FINAL:%.*]], label [[DELAY:%.*]]
+; CHECK:       delay:
+; CHECK-NEXT:    br label [[FINAL]]
+; CHECK:       final:
+; CHECK-NEXT:    [[A:%.*]] = phi i32 [ 104, [[ENTRY:%.*]] ], [ 10, [[DELAY]] ]
+; CHECK-NEXT:    ret i32 [[A]]
+;
+entry:
+  br i1 %which, label %final, label %delay
+
+delay:
+  br label %final
+
+final:
+  %A = phi i32 [ 1000, %entry ], [ 10, %delay ]
+  %value = and i32 %A, 123
+  ret i32 %value
+}
+
+define <2 x i32> @test41vec(i1 %which) {
+; CHECK-LABEL: @test41vec(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br i1 [[WHICH:%.*]], label [[FINAL:%.*]], label [[DELAY:%.*]]
+; CHECK:       delay:
+; CHECK-NEXT:    br label [[FINAL]]
+; CHECK:       final:
+; CHECK-NEXT:    [[A:%.*]] = phi <2 x i32> [ <i32 104, i32 104>, [[ENTRY:%.*]] ], [ <i32 10, i32 10>, [[DELAY]] ]
+; CHECK-NEXT:    ret <2 x i32> [[A]]
+;
+entry:
+  br i1 %which, label %final, label %delay
+
+delay:
+  br label %final
+
+final:
+  %A = phi <2 x i32> [ <i32 1000, i32 1000>, %entry ], [ <i32 10, i32 10>, %delay ]
+  %value = and <2 x i32> %A, <i32 123, i32 123>
+  ret <2 x i32> %value
+}
+
+define <2 x i32> @test41vec2(i1 %which) {
+; CHECK-LABEL: @test41vec2(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br i1 [[WHICH:%.*]], label [[FINAL:%.*]], label [[DELAY:%.*]]
+; CHECK:       delay:
+; CHECK-NEXT:    br label [[FINAL]]
+; CHECK:       final:
+; CHECK-NEXT:    [[A:%.*]] = phi <2 x i32> [ <i32 104, i32 324>, [[ENTRY:%.*]] ], [ <i32 10, i32 12>, [[DELAY]] ]
+; CHECK-NEXT:    ret <2 x i32> [[A]]
+;
+entry:
+  br i1 %which, label %final, label %delay
+
+delay:
+  br label %final
+
+final:
+  %A = phi <2 x i32> [ <i32 1000, i32 2500>, %entry ], [ <i32 10, i32 30>, %delay ]
+  %value = and <2 x i32> %A, <i32 123, i32 333>
+  ret <2 x i32> %value
+}
+
+define i32 @test42(i32 %a, i32 %c, i32 %d) {
+; CHECK-LABEL: @test42(
+; CHECK-NEXT:    [[FORCE:%.*]] = mul i32 [[C:%.*]], [[D:%.*]]
+; CHECK-NEXT:    [[AND:%.*]] = and i32 [[FORCE]], [[A:%.*]]
+; CHECK-NEXT:    ret i32 [[AND]]
+;
+  %force = mul i32 %c, %d ; forces the complexity sorting
+  %or = or i32 %a, %force
+  %nota = xor i32 %a, -1
+  %xor = xor i32 %nota, %force
+  %and = and i32 %xor, %or
+  ret i32 %and
+}
+
+define i32 @test43(i32 %a, i32 %c, i32 %d) {
+; CHECK-LABEL: @test43(
+; CHECK-NEXT:    [[FORCE:%.*]] = mul i32 [[C:%.*]], [[D:%.*]]
+; CHECK-NEXT:    [[AND:%.*]] = and i32 [[FORCE]], [[A:%.*]]
+; CHECK-NEXT:    ret i32 [[AND]]
+;
+  %force = mul i32 %c, %d ; forces the complexity sorting
+  %or = or i32 %a, %force
+  %nota = xor i32 %a, -1
+  %xor = xor i32 %nota, %force
+  %and = and i32 %or, %xor
+  ret i32 %and
+}
+
+; (~y | x) & y -> x & y
+define i32 @test44(i32 %x, i32 %y) nounwind {
+; CHECK-LABEL: @test44(
+; CHECK-NEXT:    [[A:%.*]] = and i32 [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    ret i32 [[A]]
+;
+  %n = xor i32 %y, -1
+  %o = or i32 %n, %x
+  %a = and i32 %o, %y
+  ret i32 %a
+}
+
+; (x | ~y) & y -> x & y
+define i32 @test45(i32 %x, i32 %y) nounwind {
+; CHECK-LABEL: @test45(
+; CHECK-NEXT:    [[A:%.*]] = and i32 [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    ret i32 [[A]]
+;
+  %n = xor i32 %y, -1
+  %o = or i32 %x, %n
+  %a = and i32 %o, %y
+  ret i32 %a
+}
+
+; y & (~y | x) -> y | x
+define i32 @test46(i32 %x, i32 %y) nounwind {
+; CHECK-LABEL: @test46(
+; CHECK-NEXT:    [[A:%.*]] = and i32 [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    ret i32 [[A]]
+;
+  %n = xor i32 %y, -1
+  %o = or i32 %n, %x
+  %a = and i32 %y, %o
+  ret i32 %a
+}
+
+; y & (x | ~y) -> y | x
+define i32 @test47(i32 %x, i32 %y) nounwind {
+; CHECK-LABEL: @test47(
+; CHECK-NEXT:    [[A:%.*]] = and i32 [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    ret i32 [[A]]
+;
+  %n = xor i32 %y, -1
+  %o = or i32 %x, %n
+  %a = and i32 %y, %o
+  ret i32 %a
+}
+
+; In the next 4 tests, vary the types and predicates for extra coverage.
+; (X & (Y | ~X)) -> (X & Y), where 'not' is an inverted cmp
+
+define i1 @and_orn_cmp_1(i32 %a, i32 %b, i32 %c) {
+; CHECK-LABEL: @and_orn_cmp_1(
+; CHECK-NEXT:    [[X:%.*]] = icmp sgt i32 [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    [[Y:%.*]] = icmp ugt i32 [[C:%.*]], 42
+; CHECK-NEXT:    [[AND:%.*]] = and i1 [[X]], [[Y]]
+; CHECK-NEXT:    ret i1 [[AND]]
+;
+  %x = icmp sgt i32 %a, %b
+  %x_inv = icmp sle i32 %a, %b
+  %y = icmp ugt i32 %c, 42      ; thwart complexity-based ordering
+  %or = or i1 %y, %x_inv
+  %and = and i1 %x, %or
+  ret i1 %and
+}
+
+; Commute the 'and':
+; ((Y | ~X) & X) -> (X & Y), where 'not' is an inverted cmp
+
+define <2 x i1> @and_orn_cmp_2(<2 x i32> %a, <2 x i32> %b, <2 x i32> %c) {
+; CHECK-LABEL: @and_orn_cmp_2(
+; CHECK-NEXT:    [[X:%.*]] = icmp sge <2 x i32> [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    [[Y:%.*]] = icmp ugt <2 x i32> [[C:%.*]], <i32 42, i32 47>
+; CHECK-NEXT:    [[AND:%.*]] = and <2 x i1> [[Y]], [[X]]
+; CHECK-NEXT:    ret <2 x i1> [[AND]]
+;
+  %x = icmp sge <2 x i32> %a, %b
+  %x_inv = icmp slt <2 x i32> %a, %b
+  %y = icmp ugt <2 x i32> %c, <i32 42, i32 47>      ; thwart complexity-based ordering
+  %or = or <2 x i1> %y, %x_inv
+  %and = and <2 x i1> %or, %x
+  ret <2 x i1> %and
+}
+
+; Commute the 'or':
+; (X & (~X | Y)) -> (X & Y), where 'not' is an inverted cmp
+
+define i1 @and_orn_cmp_3(i72 %a, i72 %b, i72 %c) {
+; CHECK-LABEL: @and_orn_cmp_3(
+; CHECK-NEXT:    [[X:%.*]] = icmp ugt i72 [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    [[Y:%.*]] = icmp ugt i72 [[C:%.*]], 42
+; CHECK-NEXT:    [[AND:%.*]] = and i1 [[X]], [[Y]]
+; CHECK-NEXT:    ret i1 [[AND]]
+;
+  %x = icmp ugt i72 %a, %b
+  %x_inv = icmp ule i72 %a, %b
+  %y = icmp ugt i72 %c, 42      ; thwart complexity-based ordering
+  %or = or i1 %x_inv, %y
+  %and = and i1 %x, %or
+  ret i1 %and
+}
+
+; Commute the 'and':
+; ((~X | Y) & X) -> (X & Y), where 'not' is an inverted cmp
+
+define <3 x i1> @or_andn_cmp_4(<3 x i32> %a, <3 x i32> %b, <3 x i32> %c) {
+; CHECK-LABEL: @or_andn_cmp_4(
+; CHECK-NEXT:    [[X:%.*]] = icmp eq <3 x i32> [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    [[Y:%.*]] = icmp ugt <3 x i32> [[C:%.*]], <i32 42, i32 43, i32 -1>
+; CHECK-NEXT:    [[AND:%.*]] = and <3 x i1> [[Y]], [[X]]
+; CHECK-NEXT:    ret <3 x i1> [[AND]]
+;
+  %x = icmp eq <3 x i32> %a, %b
+  %x_inv = icmp ne <3 x i32> %a, %b
+  %y = icmp ugt <3 x i32> %c, <i32 42, i32 43, i32 -1>      ; thwart complexity-based ordering
+  %or = or <3 x i1> %x_inv, %y
+  %and = and <3 x i1> %or, %x
+  ret <3 x i1> %and
+}
+
+; In the next 4 tests, vary the types and predicates for extra coverage.
+; (~X & (Y | X)) -> (~X & Y), where 'not' is an inverted cmp
+
+define i1 @andn_or_cmp_1(i37 %a, i37 %b, i37 %c) {
+; CHECK-LABEL: @andn_or_cmp_1(
+; CHECK-NEXT:    [[X_INV:%.*]] = icmp sle i37 [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    [[Y:%.*]] = icmp ugt i37 [[C:%.*]], 42
+; CHECK-NEXT:    [[AND:%.*]] = and i1 [[X_INV]], [[Y]]
+; CHECK-NEXT:    ret i1 [[AND]]
+;
+  %x = icmp sgt i37 %a, %b
+  %x_inv = icmp sle i37 %a, %b
+  %y = icmp ugt i37 %c, 42      ; thwart complexity-based ordering
+  %or = or i1 %y, %x
+  %and = and i1 %x_inv, %or
+  ret i1 %and
+}
+
+; Commute the 'and':
+; ((Y | X) & ~X) -> (~X & Y), where 'not' is an inverted cmp
+
+define i1 @andn_or_cmp_2(i16 %a, i16 %b, i16 %c) {
+; CHECK-LABEL: @andn_or_cmp_2(
+; CHECK-NEXT:    [[X_INV:%.*]] = icmp slt i16 [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    [[Y:%.*]] = icmp ugt i16 [[C:%.*]], 42
+; CHECK-NEXT:    [[AND:%.*]] = and i1 [[Y]], [[X_INV]]
+; CHECK-NEXT:    ret i1 [[AND]]
+;
+  %x = icmp sge i16 %a, %b
+  %x_inv = icmp slt i16 %a, %b
+  %y = icmp ugt i16 %c, 42      ; thwart complexity-based ordering
+  %or = or i1 %y, %x
+  %and = and i1 %or, %x_inv
+  ret i1 %and
+}
+
+; Commute the 'or':
+; (~X & (X | Y)) -> (~X & Y), where 'not' is an inverted cmp
+
+define <4 x i1> @andn_or_cmp_3(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
+; CHECK-LABEL: @andn_or_cmp_3(
+; CHECK-NEXT:    [[X_INV:%.*]] = icmp ule <4 x i32> [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    [[Y:%.*]] = icmp ugt <4 x i32> [[C:%.*]], <i32 42, i32 0, i32 1, i32 -1>
+; CHECK-NEXT:    [[AND:%.*]] = and <4 x i1> [[X_INV]], [[Y]]
+; CHECK-NEXT:    ret <4 x i1> [[AND]]
+;
+  %x = icmp ugt <4 x i32> %a, %b
+  %x_inv = icmp ule <4 x i32> %a, %b
+  %y = icmp ugt <4 x i32> %c, <i32 42, i32 0, i32 1, i32 -1>      ; thwart complexity-based ordering
+  %or = or <4 x i1> %x, %y
+  %and = and <4 x i1> %x_inv, %or
+  ret <4 x i1> %and
+}
+
+; Commute the 'and':
+; ((X | Y) & ~X) -> (~X & Y), where 'not' is an inverted cmp
+
+define i1 @andn_or_cmp_4(i32 %a, i32 %b, i32 %c) {
+; CHECK-LABEL: @andn_or_cmp_4(
+; CHECK-NEXT:    [[X_INV:%.*]] = icmp ne i32 [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    [[Y:%.*]] = icmp ugt i32 [[C:%.*]], 42
+; CHECK-NEXT:    [[AND:%.*]] = and i1 [[Y]], [[X_INV]]
+; CHECK-NEXT:    ret i1 [[AND]]
+;
+  %x = icmp eq i32 %a, %b
+  %x_inv = icmp ne i32 %a, %b
+  %y = icmp ugt i32 %c, 42      ; thwart complexity-based ordering
+  %or = or i1 %x, %y
+  %and = and i1 %or, %x_inv
+  ret i1 %and
+}

Added: llvm/trunk/test/Transforms/InstCombine/and2.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/InstCombine/and2.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/InstCombine/and2.ll (added)
+++ llvm/trunk/test/Transforms/InstCombine/and2.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,238 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+define i1 @test2(i1 %X, i1 %Y) {
+; CHECK-LABEL: @test2(
+; CHECK-NEXT:    [[B:%.*]] = and i1 %X, %Y
+; CHECK-NEXT:    ret i1 [[B]]
+;
+  %a = and i1 %X, %Y
+  %b = and i1 %a, %X
+  ret i1 %b
+}
+
+define i32 @test3(i32 %X, i32 %Y) {
+; CHECK-LABEL: @test3(
+; CHECK-NEXT:    [[B:%.*]] = and i32 %X, %Y
+; CHECK-NEXT:    ret i32 [[B]]
+;
+  %a = and i32 %X, %Y
+  %b = and i32 %Y, %a
+  ret i32 %b
+}
+
+define i1 @test7(i32 %i, i1 %b) {
+; CHECK-LABEL: @test7(
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp eq i32 %i, 0
+; CHECK-NEXT:    [[TMP2:%.*]] = and i1 [[TMP1]], %b
+; CHECK-NEXT:    ret i1 [[TMP2]]
+;
+  %cmp1 = icmp slt i32 %i, 1
+  %cmp2 = icmp sgt i32 %i, -1
+  %and1 = and i1 %cmp1, %b
+  %and2 = and i1 %and1, %cmp2
+  ret i1 %and2
+}
+
+define i1 @test8(i32 %i) {
+; CHECK-LABEL: @test8(
+; CHECK-NEXT:    [[I_OFF:%.*]] = add i32 %i, -1
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ult i32 [[I_OFF]], 13
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %cmp1 = icmp ne i32 %i, 0
+  %cmp2 = icmp ult i32 %i, 14
+  %cond = and i1 %cmp1, %cmp2
+  ret i1 %cond
+}
+
+; FIXME: Vectors should fold too.
+define <2 x i1> @test8vec(<2 x i32> %i) {
+; CHECK-LABEL: @test8vec(
+; CHECK-NEXT:    [[CMP1:%.*]] = icmp ne <2 x i32> %i, zeroinitializer
+; CHECK-NEXT:    [[CMP2:%.*]] = icmp ult <2 x i32> %i, <i32 14, i32 14>
+; CHECK-NEXT:    [[COND:%.*]] = and <2 x i1> [[CMP1]], [[CMP2]]
+; CHECK-NEXT:    ret <2 x i1> [[COND]]
+;
+  %cmp1 = icmp ne <2 x i32> %i, zeroinitializer
+  %cmp2 = icmp ult <2 x i32> %i, <i32 14, i32 14>
+  %cond = and <2 x i1> %cmp1, %cmp2
+  ret <2 x i1> %cond
+}
+
+; combine -x & 1 into x & 1
+define i64 @test9(i64 %x) {
+; CHECK-LABEL: @test9(
+; CHECK-NEXT:    [[AND:%.*]] = and i64 %x, 1
+; CHECK-NEXT:    ret i64 [[AND]]
+;
+  %sub = sub nsw i64 0, %x
+  %and = and i64 %sub, 1
+  ret i64 %and
+}
+
+; combine -x & 1 into x & 1
+define <2 x i64> @test9vec(<2 x i64> %x) {
+; CHECK-LABEL: @test9vec(
+; CHECK-NEXT:    [[AND:%.*]] = and <2 x i64> %x, <i64 1, i64 1>
+; CHECK-NEXT:    ret <2 x i64> [[AND]]
+;
+  %sub = sub nsw <2 x i64> <i64 0, i64 0>, %x
+  %and = and <2 x i64> %sub, <i64 1, i64 1>
+  ret <2 x i64> %and
+}
+
+define i64 @test10(i64 %x) {
+; CHECK-LABEL: @test10(
+; CHECK-NEXT:    [[AND:%.*]] = and i64 %x, 1
+; CHECK-NEXT:    [[ADD:%.*]] = sub i64 [[AND]], %x
+; CHECK-NEXT:    ret i64 [[ADD]]
+;
+  %sub = sub nsw i64 0, %x
+  %and = and i64 %sub, 1
+  %add = add i64 %sub, %and
+  ret i64 %add
+}
+
+; (1 << x) & 1 --> zext(x == 0)
+
+define i8 @and1_shl1_is_cmp_eq_0(i8 %x) {
+; CHECK-LABEL: @and1_shl1_is_cmp_eq_0(
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp eq i8 %x, 0
+; CHECK-NEXT:    [[AND:%.*]] = zext i1 [[TMP1]] to i8
+; CHECK-NEXT:    ret i8 [[AND]]
+;
+  %sh = shl i8 1, %x
+  %and = and i8 %sh, 1
+  ret i8 %and
+}
+
+; Don't do it if the shift has another use.
+
+define i8 @and1_shl1_is_cmp_eq_0_multiuse(i8 %x) {
+; CHECK-LABEL: @and1_shl1_is_cmp_eq_0_multiuse(
+; CHECK-NEXT:    [[SH:%.*]] = shl i8 1, %x
+; CHECK-NEXT:    [[AND:%.*]] = and i8 [[SH]], 1
+; CHECK-NEXT:    [[ADD:%.*]] = add i8 [[SH]], [[AND]]
+; CHECK-NEXT:    ret i8 [[ADD]]
+;
+  %sh = shl i8 1, %x
+  %and = and i8 %sh, 1
+  %add = add i8 %sh, %and
+  ret i8 %add
+}
+
+; (1 << x) & 1 --> zext(x == 0)
+
+define <2 x i8> @and1_shl1_is_cmp_eq_0_vec(<2 x i8> %x) {
+; CHECK-LABEL: @and1_shl1_is_cmp_eq_0_vec(
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp eq <2 x i8> %x, zeroinitializer
+; CHECK-NEXT:    [[AND:%.*]] = zext <2 x i1> [[TMP1]] to <2 x i8>
+; CHECK-NEXT:    ret <2 x i8> [[AND]]
+;
+  %sh = shl <2 x i8> <i8 1, i8 1>, %x
+  %and = and <2 x i8> %sh, <i8 1, i8 1>
+  ret <2 x i8> %and
+}
+
+; (1 >> x) & 1 --> zext(x == 0)
+
+define i8 @and1_lshr1_is_cmp_eq_0(i8 %x) {
+; CHECK-LABEL: @and1_lshr1_is_cmp_eq_0(
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp eq i8 %x, 0
+; CHECK-NEXT:    [[AND:%.*]] = zext i1 [[TMP1]] to i8
+; CHECK-NEXT:    ret i8 [[AND]]
+;
+  %sh = lshr i8 1, %x
+  %and = and i8 %sh, 1
+  ret i8 %and
+}
+
+; Don't do it if the shift has another use.
+
+define i8 @and1_lshr1_is_cmp_eq_0_multiuse(i8 %x) {
+; CHECK-LABEL: @and1_lshr1_is_cmp_eq_0_multiuse(
+; CHECK-NEXT:    [[SH:%.*]] = lshr i8 1, %x
+; CHECK-NEXT:    [[AND:%.*]] = and i8 [[SH]], 1
+; CHECK-NEXT:    [[ADD:%.*]] = add nuw nsw i8 [[SH]], [[AND]]
+; CHECK-NEXT:    ret i8 [[ADD]]
+;
+  %sh = lshr i8 1, %x
+  %and = and i8 %sh, 1
+  %add = add i8 %sh, %and
+  ret i8 %add
+}
+
+; (1 >> x) & 1 --> zext(x == 0)
+
+define <2 x i8> @and1_lshr1_is_cmp_eq_0_vec(<2 x i8> %x) {
+; CHECK-LABEL: @and1_lshr1_is_cmp_eq_0_vec(
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp eq <2 x i8> %x, zeroinitializer
+; CHECK-NEXT:    [[AND:%.*]] = zext <2 x i1> [[TMP1]] to <2 x i8>
+; CHECK-NEXT:    ret <2 x i8> [[AND]]
+;
+  %sh = lshr <2 x i8> <i8 1, i8 1>, %x
+  %and = and <2 x i8> %sh, <i8 1, i8 1>
+  ret <2 x i8> %and
+}
+
+; The add in this test is unnecessary because the LSBs of the LHS are 0 and the 'and' only consumes bits from those LSBs. It doesn't matter what happens to the upper bits.
+define i32 @test11(i32 %a, i32 %b) {
+; CHECK-LABEL: @test11(
+; CHECK-NEXT:    [[X:%.*]] = shl i32 [[A:%.*]], 8
+; CHECK-NEXT:    [[Z:%.*]] = and i32 [[B:%.*]], 128
+; CHECK-NEXT:    [[W:%.*]] = mul i32 [[Z]], [[X]]
+; CHECK-NEXT:    ret i32 [[W]]
+;
+  %x = shl i32 %a, 8
+  %y = add i32 %x, %b
+  %z = and i32 %y, 128
+  %w = mul i32 %z, %x ; to keep the shift from being removed
+  ret i32 %w
+}
+
+; The add in this test is unnecessary because the LSBs of the RHS are 0 and the 'and' only consumes bits from those LSBs. It doesn't matter what happens to the upper bits.
+define i32 @test12(i32 %a, i32 %b) {
+; CHECK-LABEL: @test12(
+; CHECK-NEXT:    [[X:%.*]] = shl i32 [[A:%.*]], 8
+; CHECK-NEXT:    [[Z:%.*]] = and i32 [[B:%.*]], 128
+; CHECK-NEXT:    [[W:%.*]] = mul i32 [[Z]], [[X]]
+; CHECK-NEXT:    ret i32 [[W]]
+;
+  %x = shl i32 %a, 8
+  %y = add i32 %b, %x
+  %z = and i32 %y, 128
+  %w = mul i32 %z, %x ; to keep the shift from being removed
+  ret i32 %w
+}
+
+; The sub in this test is unnecessary because the LSBs of the RHS are 0 and the 'and' only consumes bits from those LSBs. It doesn't matter what happens to the upper bits.
+define i32 @test13(i32 %a, i32 %b) {
+; CHECK-LABEL: @test13(
+; CHECK-NEXT:    [[X:%.*]] = shl i32 [[A:%.*]], 8
+; CHECK-NEXT:    [[Z:%.*]] = and i32 [[B:%.*]], 128
+; CHECK-NEXT:    [[W:%.*]] = mul i32 [[Z]], [[X]]
+; CHECK-NEXT:    ret i32 [[W]]
+;
+  %x = shl i32 %a, 8
+  %y = sub i32 %b, %x
+  %z = and i32 %y, 128
+  %w = mul i32 %z, %x ; to keep the shift from being removed
+  ret i32 %w
+}
+
+; The sub in this test cannot be removed because we need to keep the negation of %b. TODO: But we should be able to replace the LHS of it with a 0.
+define i32 @test14(i32 %a, i32 %b) {
+; CHECK-LABEL: @test14(
+; CHECK-NEXT:    [[X:%.*]] = shl i32 [[A:%.*]], 8
+; CHECK-NEXT:    [[Y:%.*]] = sub i32 0, [[B:%.*]]
+; CHECK-NEXT:    [[Z:%.*]] = and i32 [[Y]], 128
+; CHECK-NEXT:    [[W:%.*]] = mul i32 [[Z]], [[X]]
+; CHECK-NEXT:    ret i32 [[W]]
+;
+  %x = shl i32 %a, 8
+  %y = sub i32 %x, %b
+  %z = and i32 %y, 128
+  %w = mul i32 %z, %x ; to keep the shift from being removed
+  ret i32 %w
+}

Added: llvm/trunk/test/Transforms/InstCombine/apint-add.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/InstCombine/apint-add.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/InstCombine/apint-add.ll (added)
+++ llvm/trunk/test/Transforms/InstCombine/apint-add.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,159 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+; Tests for Integer BitWidth <= 64 && BitWidth % 8 != 0.
+
+;; Flip sign bit then add INT_MIN -> nop.
+define i1 @test1(i1 %x) {
+; CHECK-LABEL: @test1(
+; CHECK-NEXT:    ret i1 %x
+;
+  %tmp.2 = xor i1 %x, 1
+  %tmp.4 = add i1 %tmp.2, 1
+  ret i1 %tmp.4
+}
+
+;; Flip sign bit then add INT_MIN -> nop.
+define i47 @test2(i47 %x) {
+; CHECK-LABEL: @test2(
+; CHECK-NEXT:    ret i47 %x
+;
+  %tmp.2 = xor i47 %x, 70368744177664
+  %tmp.4 = add i47 %tmp.2, 70368744177664
+  ret i47 %tmp.4
+}
+
+;; Flip sign bit then add INT_MIN -> nop.
+define i15 @test3(i15 %x) {
+; CHECK-LABEL: @test3(
+; CHECK-NEXT:    ret i15 %x
+;
+  %tmp.2 = xor i15 %x, 16384
+  %tmp.4 = add i15 %tmp.2, 16384
+  ret i15 %tmp.4
+}
+
+; X + signbit --> X ^ signbit
+define <2 x i5> @test3vec(<2 x i5> %x) {
+; CHECK-LABEL: @test3vec(
+; CHECK-NEXT:    [[Y:%.*]] = xor <2 x i5> %x, <i5 -16, i5 -16>
+; CHECK-NEXT:    ret <2 x i5> [[Y]]
+;
+  %y = add <2 x i5> %x, <i5 16, i5 16>
+  ret <2 x i5> %y
+}
+
+;; (x & 0b1111..0) + 1 -> x | 1
+define i49 @test4(i49 %x) {
+; CHECK-LABEL: @test4(
+; CHECK-NEXT:    [[TMP_4:%.*]] = or i49 %x, 1
+; CHECK-NEXT:    ret i49 [[TMP_4]]
+;
+  %tmp.2 = and i49 %x, 562949953421310
+  %tmp.4 = add i49 %tmp.2, 1
+  ret i49 %tmp.4
+}
+
+define i7 @sext(i4 %x) {
+; CHECK-LABEL: @sext(
+; CHECK-NEXT:    [[ADD:%.*]] = sext i4 %x to i7
+; CHECK-NEXT:    ret i7 [[ADD]]
+;
+  %xor = xor i4 %x, -8
+  %zext = zext i4 %xor to i7
+  %add = add nsw i7 %zext, -8
+  ret i7 %add
+}
+
+define <2 x i10> @sext_vec(<2 x i3> %x) {
+; CHECK-LABEL: @sext_vec(
+; CHECK-NEXT:    [[ADD:%.*]] = sext <2 x i3> %x to <2 x i10>
+; CHECK-NEXT:    ret <2 x i10> [[ADD]]
+;
+  %xor = xor <2 x i3> %x, <i3 -4, i3 -4>
+  %zext = zext <2 x i3> %xor to <2 x i10>
+  %add = add nsw <2 x i10> %zext, <i10 -4, i10 -4>
+  ret <2 x i10> %add
+}
+
+; Multiple uses of the operands don't prevent the fold.
+
+define i4 @sext_multiuse(i4 %x) {
+; CHECK-LABEL: @sext_multiuse(
+; CHECK-NEXT:    [[XOR:%.*]] = xor i4 %x, -8
+; CHECK-NEXT:    [[ZEXT:%.*]] = zext i4 [[XOR]] to i7
+; CHECK-NEXT:    [[ADD:%.*]] = sext i4 %x to i7
+; CHECK-NEXT:    [[MUL:%.*]] = sdiv i7 [[ZEXT]], [[ADD]]
+; CHECK-NEXT:    [[TRUNC:%.*]] = trunc i7 [[MUL]] to i4
+; CHECK-NEXT:    [[DIV:%.*]] = sdiv i4 [[TRUNC]], [[XOR]]
+; CHECK-NEXT:    ret i4 [[DIV]]
+;
+  %xor = xor i4 %x, -8
+  %zext = zext i4 %xor to i7
+  %add = add nsw i7 %zext, -8
+  %mul = sdiv i7 %zext, %add
+  %trunc = trunc i7 %mul to i4
+  %div = sdiv i4 %trunc, %xor
+  ret i4 %div
+}
+
+; Tests for Integer BitWidth > 64 && BitWidth <= 1024.
+
+;; Flip sign bit then add INT_MIN -> nop.
+define i111 @test5(i111 %x) {
+; CHECK-LABEL: @test5(
+; CHECK-NEXT:    ret i111 %x
+;
+  %tmp.2 = shl i111 1, 110
+  %tmp.4 = xor i111 %x, %tmp.2
+  %tmp.6 = add i111 %tmp.4, %tmp.2
+  ret i111 %tmp.6
+}
+
+;; Flip sign bit then add INT_MIN -> nop.
+define i65 @test6(i65 %x) {
+; CHECK-LABEL: @test6(
+; CHECK-NEXT:    ret i65 %x
+;
+  %tmp.0 = shl i65 1, 64
+  %tmp.2 = xor i65 %x, %tmp.0
+  %tmp.4 = add i65 %tmp.2, %tmp.0
+  ret i65 %tmp.4
+}
+
+;; Flip sign bit then add INT_MIN -> nop.
+define i1024 @test7(i1024 %x) {
+; CHECK-LABEL: @test7(
+; CHECK-NEXT:    ret i1024 %x
+;
+  %tmp.0 = shl i1024 1, 1023
+  %tmp.2 = xor i1024 %x, %tmp.0
+  %tmp.4 = add i1024 %tmp.2, %tmp.0
+  ret i1024 %tmp.4
+}
+
+;; If we have add(xor(X, 0xF..F80..), 0x80..), it's an xor.
+define i128 @test8(i128 %x) {
+; CHECK-LABEL: @test8(
+; CHECK-NEXT:    [[TMP_4:%.*]] = xor i128 %x, 170141183460469231731687303715884105600
+; CHECK-NEXT:    ret i128 [[TMP_4]]
+;
+  %tmp.5 = shl i128 1, 127
+  %tmp.1 = ashr i128 %tmp.5, 120
+  %tmp.2 = xor i128 %x, %tmp.1
+  %tmp.4 = add i128 %tmp.2, %tmp.5
+  ret i128 %tmp.4
+}
+
+;; (x & 254)+1 -> (x & 254)|1
+define i77 @test9(i77 %x) {
+; CHECK-LABEL: @test9(
+; CHECK-NEXT:    [[TMP_2:%.*]] = and i77 %x, 562949953421310
+; CHECK-NEXT:    [[TMP_4:%.*]] = or i77 [[TMP_2]], 1
+; CHECK-NEXT:    ret i77 [[TMP_4]]
+;
+  %tmp.2 = and i77 %x, 562949953421310
+  %tmp.4 = add i77 %tmp.2, 1
+  ret i77 %tmp.4
+}
+

Added: llvm/trunk/test/Transforms/InstCombine/apint-and-compare.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/InstCombine/apint-and-compare.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/InstCombine/apint-and-compare.ll (added)
+++ llvm/trunk/test/Transforms/InstCombine/apint-and-compare.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,16 @@
+; RUN: opt < %s -instcombine -S | grep and | count 2
+
+; Should be optimized to one and.
+define i1 @test1(i33 %a, i33 %b) {
+        %tmp1 = and i33 %a, 65280
+        %tmp3 = and i33 %b, 65280
+        %tmp = icmp ne i33 %tmp1, %tmp3
+        ret i1 %tmp
+}
+
+define i1 @test2(i999 %a, i999 %b) {
+        %tmp1 = and i999 %a, 65280
+        %tmp3 = and i999 %b, 65280
+        %tmp = icmp ne i999 %tmp1, %tmp3
+        ret i1 %tmp
+}

Added: llvm/trunk/test/Transforms/InstCombine/apint-and-or-and.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/InstCombine/apint-and-or-and.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/InstCombine/apint-and-or-and.ll (added)
+++ llvm/trunk/test/Transforms/InstCombine/apint-and-or-and.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,50 @@
+; If we have an 'and' of the result of an 'or', and one of the 'or' operands
+; cannot have contributed any of the resultant bits, delete the or.  This
+; occurs for very common C/C++ code like this:
+;
+; struct foo { int A : 16; int B : 16; };
+; void test(struct foo *F, int X, int Y) {
+;        F->A = X; F->B = Y;
+; }
+;
+; Which corresponds to test1.
+; 
+; This tests arbitrary precision integers.
+
+; RUN: opt < %s -instcombine -S | not grep "or "
+; END.
+
+define i17 @test1(i17 %X, i17 %Y) {
+	%A = and i17 %X, 7
+	%B = and i17 %Y, 8
+	%C = or i17 %A, %B
+	%D = and i17 %C, 7  ;; This cannot include any bits from %Y!
+	ret i17 %D
+}
+
+define i49 @test3(i49 %X, i49 %Y) {
+	%B = shl i49 %Y, 1
+	%C = or i49 %X, %B
+	%D = and i49 %C, 1  ;; This cannot include any bits from %Y!
+	ret i49 %D
+}
+
+define i67 @test4(i67 %X, i67 %Y) {
+	%B = lshr i67 %Y, 66
+	%C = or i67 %X, %B
+	%D = and i67 %C, 2  ;; This cannot include any bits from %Y!
+	ret i67 %D
+}
+
+define i231 @or_test1(i231 %X, i231 %Y) {
+	%A = and i231 %X, 1
+	%B = or i231 %A, 1     ;; This cannot include any bits from X!
+	ret i231 %B
+}
+
+define i7 @or_test2(i7 %X, i7 %Y) {
+	%A = shl i7 %X, 6
+	%B = or i7 %A, 64     ;; This cannot include any bits from X!
+	ret i7 %B
+}
+

Added: llvm/trunk/test/Transforms/InstCombine/apint-and-xor-merge.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/InstCombine/apint-and-xor-merge.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/InstCombine/apint-and-xor-merge.ll (added)
+++ llvm/trunk/test/Transforms/InstCombine/apint-and-xor-merge.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,31 @@
+; NOTE: Assertions have been autogenerated by update_test_checks.py
+; This test case checks that the merge of and/xor can work on arbitrary
+; precision integers.
+
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+; (x &z ) ^ (y & z) -> (x ^ y) & z
+define i57 @test1(i57 %x, i57 %y, i57 %z) {
+; CHECK-LABEL: @test1(
+; CHECK-NEXT:    [[TMP61:%.*]] = xor i57 %x, %y
+; CHECK-NEXT:    [[TMP7:%.*]] = and i57 [[TMP61]], %z
+; CHECK-NEXT:    ret i57 [[TMP7]]
+;
+  %tmp3 = and i57 %z, %x
+  %tmp6 = and i57 %z, %y
+  %tmp7 = xor i57 %tmp3, %tmp6
+  ret i57 %tmp7
+}
+
+; (x & y) ^ (x | y) -> x ^ y
+define i23 @test2(i23 %x, i23 %y, i23 %z) {
+; CHECK-LABEL: @test2(
+; CHECK-NEXT:    [[TMP7:%.*]] = xor i23 %y, %x
+; CHECK-NEXT:    ret i23 [[TMP7]]
+;
+  %tmp3 = and i23 %y, %x
+  %tmp6 = or i23 %y, %x
+  %tmp7 = xor i23 %tmp3, %tmp6
+  ret i23 %tmp7
+}
+

Added: llvm/trunk/test/Transforms/InstCombine/apint-and.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/InstCombine/apint-and.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/InstCombine/apint-and.ll (added)
+++ llvm/trunk/test/Transforms/InstCombine/apint-and.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,126 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+; FIXME: Some of these tests belong in InstSimplify.
+
+; Integer BitWidth <= 64 && BitWidth % 8 != 0.
+
+define i39 @test0(i39 %A) {
+; CHECK-LABEL: @test0(
+; CHECK-NEXT:    ret i39 0
+;
+  %B = and i39 %A, 0 ; zero result
+  ret i39 %B
+}
+
+define i15 @test2(i15 %x) {
+; CHECK-LABEL: @test2(
+; CHECK-NEXT:    ret i15 %x
+;
+  %tmp.2 = and i15 %x, -1 ; noop
+  ret i15 %tmp.2
+}
+
+define i23 @test3(i23 %x) {
+; CHECK-LABEL: @test3(
+; CHECK-NEXT:    ret i23 0
+;
+  %tmp.0 = and i23 %x, 127
+  %tmp.2 = and i23 %tmp.0, 128
+  ret i23 %tmp.2
+}
+
+define i1 @test4(i37 %x) {
+; CHECK-LABEL: @test4(
+; CHECK-NEXT:    [[B:%.*]] = icmp ugt i37 %x, 2147483647
+; CHECK-NEXT:    ret i1 [[B]]
+;
+  %A = and i37 %x, -2147483648
+  %B = icmp ne i37 %A, 0
+  ret i1 %B
+}
+
+define i7 @test5(i7 %A, i7* %P) {
+; CHECK-LABEL: @test5(
+; CHECK-NEXT:    [[B:%.*]] = or i7 %A, 3
+; CHECK-NEXT:    [[C:%.*]] = xor i7 [[B]], 12
+; CHECK-NEXT:    store i7 [[C]], i7* %P, align 1
+; CHECK-NEXT:    ret i7 3
+;
+  %B = or i7 %A, 3
+  %C = xor i7 %B, 12
+  store i7 %C, i7* %P
+  %r = and i7 %C, 3
+  ret i7 %r
+}
+
+define i47 @test7(i47 %A) {
+; CHECK-LABEL: @test7(
+; CHECK-NEXT:    [[TMP1:%.*]] = lshr i47 %A, 39
+; CHECK-NEXT:    ret i47 [[TMP1]]
+;
+  %X = ashr i47 %A, 39 ;; sign extend
+  %C1 = and i47 %X, 255
+  ret i47 %C1
+}
+
+; Integer BitWidth > 64 && BitWidth <= 1024.
+
+define i999 @test8(i999 %A) {
+; CHECK-LABEL: @test8(
+; CHECK-NEXT:    ret i999 0
+;
+  %B = and i999 %A, 0 ; zero result
+  ret i999 %B
+}
+
+define i1005 @test9(i1005 %x) {
+; CHECK-LABEL: @test9(
+; CHECK-NEXT:    ret i1005 %x
+;
+  %tmp.2 = and i1005 %x, -1 ; noop
+  ret i1005 %tmp.2
+}
+
+define i123 @test10(i123 %x) {
+; CHECK-LABEL: @test10(
+; CHECK-NEXT:    ret i123 0
+;
+  %tmp.0 = and i123 %x, 127
+  %tmp.2 = and i123 %tmp.0, 128
+  ret i123 %tmp.2
+}
+
+define i1 @test11(i737 %x) {
+; CHECK-LABEL: @test11(
+; CHECK-NEXT:    [[B:%.*]] = icmp ugt i737 %x, 2147483647
+; CHECK-NEXT:    ret i1 [[B]]
+;
+  %A = and i737 %x, -2147483648
+  %B = icmp ne i737 %A, 0
+  ret i1 %B
+}
+
+define i117 @test12(i117 %A, i117* %P) {
+; CHECK-LABEL: @test12(
+; CHECK-NEXT:    [[B:%.*]] = or i117 %A, 3
+; CHECK-NEXT:    [[C:%.*]] = xor i117 [[B]], 12
+; CHECK-NEXT:    store i117 [[C]], i117* %P, align 4
+; CHECK-NEXT:    ret i117 3
+;
+  %B = or i117 %A, 3
+  %C = xor i117 %B, 12
+  store i117 %C, i117* %P
+  %r = and i117 %C, 3
+  ret i117 %r
+}
+
+define i1024 @test13(i1024 %A) {
+; CHECK-LABEL: @test13(
+; CHECK-NEXT:    [[TMP1:%.*]] = lshr i1024 %A, 1016
+; CHECK-NEXT:    ret i1024 [[TMP1]]
+;
+  %X = ashr i1024 %A, 1016 ;; sign extend
+  %C1 = and i1024 %X, 255
+  ret i1024 %C1
+}
+

Added: llvm/trunk/test/Transforms/InstCombine/apint-call-cast-target.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/InstCombine/apint-call-cast-target.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/InstCombine/apint-call-cast-target.ll (added)
+++ llvm/trunk/test/Transforms/InstCombine/apint-call-cast-target.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,23 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+target datalayout = "e-p:32:32"
+target triple = "i686-pc-linux-gnu"
+
+define i32 @main() {
+; CHECK-LABEL: @main(
+; CHECK: %[[call:.*]] = call i7* @ctime(i999* null)
+; CHECK: %[[cast:.*]] = ptrtoint i7* %[[call]] to i32
+; CHECK: ret i32 %[[cast]]
+entry:
+	%tmp = call i32 bitcast (i7* (i999*)* @ctime to i32 (i99*)*)( i99* null )
+	ret i32 %tmp
+}
+
+define i7* @ctime(i999*) {
+; CHECK-LABEL: define i7* @ctime(
+; CHECK: %[[call:.*]] = call i32 @main()
+; CHECK: %[[cast:.*]] = inttoptr i32 %[[call]] to i7*
+entry:
+	%tmp = call i7* bitcast (i32 ()* @main to i7* ()*)( )
+	ret i7* %tmp
+}

Added: llvm/trunk/test/Transforms/InstCombine/apint-cast-and-cast.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/InstCombine/apint-cast-and-cast.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/InstCombine/apint-cast-and-cast.ll (added)
+++ llvm/trunk/test/Transforms/InstCombine/apint-cast-and-cast.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,15 @@
+; RUN: opt < %s -instcombine -S | not grep bitcast
+
+define i19 @test1(i43 %val) {
+  %t1 = bitcast i43 %val to i43 
+  %t2 = and i43 %t1, 1
+  %t3 = trunc i43 %t2 to i19
+  ret i19 %t3
+}
+
+define i73 @test2(i677 %val) {
+  %t1 = bitcast i677 %val to i677 
+  %t2 = and i677 %t1, 1
+  %t3 = trunc i677 %t2 to i73
+  ret i73 %t3
+}

Added: llvm/trunk/test/Transforms/InstCombine/apint-cast-cast-to-and.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/InstCombine/apint-cast-cast-to-and.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/InstCombine/apint-cast-cast-to-and.ll (added)
+++ llvm/trunk/test/Transforms/InstCombine/apint-cast-cast-to-and.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,8 @@
+; RUN: opt < %s -instcombine -S | not grep i41
+
+define i61 @test1(i61 %X) {
+        %Y = trunc i61 %X to i41 ;; Turn i61o an AND
+        %Z = zext i41 %Y to i61
+        ret i61 %Z
+}
+

Added: llvm/trunk/test/Transforms/InstCombine/apint-cast.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/InstCombine/apint-cast.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/InstCombine/apint-cast.ll (added)
+++ llvm/trunk/test/Transforms/InstCombine/apint-cast.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,30 @@
+; Tests to make sure elimination of casts is working correctly
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
+
+define i17 @test1(i17 %a) {
+        %tmp = zext i17 %a to i37               ; <i37> [#uses=2]
+        %tmp21 = lshr i37 %tmp, 8               ; <i37> [#uses=1]
+; CHECK: %tmp21 = lshr i17 %a, 8
+        %tmp5 = shl i37 %tmp, 8         ; <i37> [#uses=1]
+; CHECK: %tmp5 = shl i17 %a, 8
+        %tmp.upgrd.32 = or i37 %tmp21, %tmp5            ; <i37> [#uses=1]
+; CHECK: %tmp.upgrd.32 = or i17 %tmp21, %tmp5
+        %tmp.upgrd.3 = trunc i37 %tmp.upgrd.32 to i17   ; <i17> [#uses=1]
+        ret i17 %tmp.upgrd.3
+; CHECK: ret i17 %tmp.upgrd.32
+}
+
+define i167 @test2(i167 %a) {
+        %tmp = zext i167 %a to i577               ; <i577> [#uses=2]
+        %tmp21 = lshr i577 %tmp, 9               ; <i577> [#uses=1]
+; CHECK: %tmp21 = lshr i167 %a, 9
+        %tmp5 = shl i577 %tmp, 8         ; <i577> [#uses=1]
+; CHECK: %tmp5 = shl i167 %a, 8
+        %tmp.upgrd.32 = or i577 %tmp21, %tmp5            ; <i577> [#uses=1]
+; CHECK: %tmp.upgrd.32 = or i167 %tmp21, %tmp5
+        %tmp.upgrd.3 = trunc i577 %tmp.upgrd.32 to i167  ; <i167> [#uses=1]
+        ret i167 %tmp.upgrd.3
+; CHECK: ret i167 %tmp.upgrd.32
+}

Added: llvm/trunk/test/Transforms/InstCombine/apint-div1.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/InstCombine/apint-div1.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/InstCombine/apint-div1.ll (added)
+++ llvm/trunk/test/Transforms/InstCombine/apint-div1.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,22 @@
+; This test makes sure that div instructions are properly eliminated.
+; This test is for Integer BitWidth < 64 && BitWidth % 2 != 0.
+;
+; RUN: opt < %s -instcombine -S | not grep div
+
+
+define i33 @test1(i33 %X) {
+    %Y = udiv i33 %X, 4096
+    ret i33 %Y
+}
+
+define i49 @test2(i49 %X) {
+    %tmp.0 = shl i49 4096, 17
+    %Y = udiv i49 %X, %tmp.0
+    ret i49 %Y
+}
+
+define i59 @test3(i59 %X, i1 %C) {
+        %V = select i1 %C, i59 1024, i59 4096
+        %R = udiv i59 %X, %V
+        ret i59 %R
+}

Added: llvm/trunk/test/Transforms/InstCombine/apint-div2.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/InstCombine/apint-div2.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/InstCombine/apint-div2.ll (added)
+++ llvm/trunk/test/Transforms/InstCombine/apint-div2.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,22 @@
+; This test makes sure that div instructions are properly eliminated.
+; This test is for Integer BitWidth >= 64 && BitWidth <= 1024.
+;
+; RUN: opt < %s -instcombine -S | not grep div
+
+
+define i333 @test1(i333 %X) {
+    %Y = udiv i333 %X, 70368744177664
+    ret i333 %Y
+}
+
+define i499 @test2(i499 %X) {
+    %tmp.0 = shl i499 4096, 197
+    %Y = udiv i499 %X, %tmp.0
+    ret i499 %Y
+}
+
+define i599 @test3(i599 %X, i1 %C) {
+        %V = select i1 %C, i599 70368744177664, i599 4096
+        %R = udiv i599 %X, %V
+        ret i599 %R
+}

Added: llvm/trunk/test/Transforms/InstCombine/apint-mul1.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/InstCombine/apint-mul1.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/InstCombine/apint-mul1.ll (added)
+++ llvm/trunk/test/Transforms/InstCombine/apint-mul1.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,32 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+; This test makes sure that mul instructions are properly eliminated.
+; This test is for Integer BitWidth < 64 && BitWidth % 2 != 0.
+
+define i17 @test1(i17 %X) {
+; CHECK-LABEL: @test1(
+; CHECK-NEXT:    [[Y:%.*]] = shl i17 [[X:%.*]], 10
+; CHECK-NEXT:    ret i17 [[Y]]
+;
+  %Y = mul i17 %X, 1024
+  ret i17 %Y
+}
+
+define <2 x i17> @test2(<2 x i17> %X) {
+; CHECK-LABEL: @test2(
+; CHECK-NEXT:    [[Y:%.*]] = shl <2 x i17> [[X:%.*]], <i17 10, i17 10>
+; CHECK-NEXT:    ret <2 x i17> [[Y]]
+;
+  %Y = mul <2 x i17> %X, <i17 1024, i17 1024>
+  ret <2 x i17> %Y
+}
+
+define <2 x i17> @test3(<2 x i17> %X) {
+; CHECK-LABEL: @test3(
+; CHECK-NEXT:    [[Y:%.*]] = shl <2 x i17> [[X:%.*]], <i17 10, i17 8>
+; CHECK-NEXT:    ret <2 x i17> [[Y]]
+;
+  %Y = mul <2 x i17> %X, <i17 1024, i17 256>
+  ret <2 x i17> %Y
+}

Added: llvm/trunk/test/Transforms/InstCombine/apint-mul2.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/InstCombine/apint-mul2.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/InstCombine/apint-mul2.ll (added)
+++ llvm/trunk/test/Transforms/InstCombine/apint-mul2.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,35 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+; This test makes sure that mul instructions are properly eliminated.
+; This test is for Integer BitWidth >= 64 && BitWidth % 2 >= 1024.
+
+define i177 @test1(i177 %X) {
+; CHECK-LABEL: @test1(
+; CHECK-NEXT:    [[Y:%.*]] = shl i177 [[X:%.*]], 155
+; CHECK-NEXT:    ret i177 [[Y]]
+;
+  %C = shl i177 1, 155
+  %Y = mul i177 %X, %C
+  ret i177 %Y
+}
+
+define <2 x i177> @test2(<2 x i177> %X) {
+; CHECK-LABEL: @test2(
+; CHECK-NEXT:    [[Y:%.*]] = shl <2 x i177> [[X:%.*]], <i177 155, i177 155>
+; CHECK-NEXT:    ret <2 x i177> [[Y]]
+;
+  %C = shl <2 x i177> <i177 1, i177 1>, <i177 155, i177 155>
+  %Y = mul <2 x i177> %X, %C
+  ret <2 x i177> %Y
+}
+
+define <2 x i177> @test3(<2 x i177> %X) {
+; CHECK-LABEL: @test3(
+; CHECK-NEXT:    [[Y:%.*]] = shl <2 x i177> [[X:%.*]], <i177 150, i177 155>
+; CHECK-NEXT:    ret <2 x i177> [[Y]]
+;
+  %C = shl <2 x i177> <i177 1, i177 1>, <i177 150, i177 155>
+  %Y = mul <2 x i177> %X, %C
+  ret <2 x i177> %Y
+}

Added: llvm/trunk/test/Transforms/InstCombine/apint-not.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/InstCombine/apint-not.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/InstCombine/apint-not.ll (added)
+++ llvm/trunk/test/Transforms/InstCombine/apint-not.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,17 @@
+; This test makes sure that the xor instructions are properly eliminated
+; when arbitrary precision integers are used.
+
+; RUN: opt < %s -instcombine -S | not grep xor
+
+define i33 @test1(i33 %A) {
+	%B = xor i33 %A, -1
+	%C = xor i33 %B, -1
+	ret i33 %C
+}
+
+define i1 @test2(i52 %A, i52 %B) {
+	%cond = icmp ule i52 %A, %B     ; Can change into uge
+	%Ret = xor i1 %cond, true
+	ret i1 %Ret
+}
+

Added: llvm/trunk/test/Transforms/InstCombine/apint-or.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/InstCombine/apint-or.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/InstCombine/apint-or.ll (added)
+++ llvm/trunk/test/Transforms/InstCombine/apint-or.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,56 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+; These tests are for Integer BitWidth <= 64 && BitWidth % 2 != 0.
+define i23 @test1(i23 %A) {
+    ;; A | ~A == -1
+    %NotA = xor i23 -1, %A
+    %B = or i23 %A, %NotA
+    ret i23 %B
+; CHECK-LABEL: @test1
+; CHECK-NEXT: ret i23 -1
+}
+
+define i39 @test2(i39 %V, i39 %M) {
+    ;; If we have: ((V + N) & C1) | (V & C2)
+    ;; .. and C2 = ~C1 and C2 is 0+1+ and (N & C2) == 0
+    ;; replace with V+N.
+    %C1 = xor i39 274877906943, -1 ;; C2 = 274877906943
+    %N = and i39 %M, 274877906944
+    %A = add i39 %V, %N
+    %B = and i39 %A, %C1
+    %D = and i39 %V, 274877906943
+    %R = or i39 %B, %D
+    ret i39 %R
+; CHECK-LABEL: @test2
+; CHECK-NEXT: %N = and i39 %M, -274877906944
+; CHECK-NEXT: %A = add i39 %N, %V
+; CHECK-NEXT: ret i39 %A
+}
+
+; These tests are for Integer BitWidth > 64 && BitWidth <= 1024.
+define i1023 @test4(i1023 %A) {
+    ;; A | ~A == -1
+    %NotA = xor i1023 -1, %A
+    %B = or i1023 %A, %NotA
+    ret i1023 %B
+; CHECK-LABEL: @test4
+; CHECK-NEXT: ret i1023 -1
+}
+
+define i399 @test5(i399 %V, i399 %M) {
+    ;; If we have: ((V + N) & C1) | (V & C2)
+    ;; .. and C2 = ~C1 and C2 is 0+1+ and (N & C2) == 0
+    ;; replace with V+N.
+    %C1 = xor i399 274877906943, -1 ;; C2 = 274877906943
+    %N = and i399 %M, 18446742974197923840
+    %A = add i399 %V, %N
+    %B = and i399 %A, %C1
+    %D = and i399 %V, 274877906943
+    %R = or i399 %B, %D
+    ret i399 %R
+; CHECK-LABEL: @test5
+; CHECK-NEXT: %N = and i399 %M, 18446742974197923840
+; CHECK-NEXT: %A = add i399 %N, %V
+; CHECK-NEXT: ret i399 %A
+}
+

Added: llvm/trunk/test/Transforms/InstCombine/apint-rem1.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/InstCombine/apint-rem1.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/InstCombine/apint-rem1.ll (added)
+++ llvm/trunk/test/Transforms/InstCombine/apint-rem1.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,22 @@
+; This test makes sure that these instructions are properly eliminated.
+; This test is for Integer BitWidth < 64 && BitWidth % 2 != 0.
+;
+; RUN: opt < %s -instcombine -S | not grep rem
+
+
+define i33 @test1(i33 %A) {
+    %B = urem i33 %A, 4096
+    ret i33 %B
+}
+
+define i49 @test2(i49 %A) {
+    %B = shl i49 4096, 11
+    %Y = urem i49 %A, %B
+    ret i49 %Y
+}
+
+define i59 @test3(i59 %X, i1 %C) {
+	%V = select i1 %C, i59 70368744177664, i59 4096
+	%R = urem i59 %X, %V
+	ret i59 %R
+}

Added: llvm/trunk/test/Transforms/InstCombine/apint-rem2.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/InstCombine/apint-rem2.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/InstCombine/apint-rem2.ll (added)
+++ llvm/trunk/test/Transforms/InstCombine/apint-rem2.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,22 @@
+; This test makes sure that these instructions are properly eliminated.
+; This test is for Integer BitWidth >= 64 && BitWidth <= 1024.
+;
+; RUN: opt < %s -instcombine -S | not grep rem
+
+
+define i333 @test1(i333 %A) {
+    %B = urem i333 %A, 70368744177664
+    ret i333 %B
+}
+
+define i499 @test2(i499 %A) {
+    %B = shl i499 4096, 111
+    %Y = urem i499 %A, %B
+    ret i499 %Y
+}
+
+define i599 @test3(i599 %X, i1 %C) {
+	%V = select i1 %C, i599 70368744177664, i599 4096
+	%R = urem i599 %X, %V
+	ret i599 %R
+}

Added: llvm/trunk/test/Transforms/InstCombine/apint-select.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/InstCombine/apint-select.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/InstCombine/apint-select.ll (added)
+++ llvm/trunk/test/Transforms/InstCombine/apint-select.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,118 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+; No selects should remain.
+
+define i41 @zext(i1 %C) {
+; CHECK-LABEL: @zext(
+; CHECK-NEXT:    [[V:%.*]] = zext i1 %C to i41
+; CHECK-NEXT:    ret i41 [[V]]
+;
+  %V = select i1 %C, i41 1, i41 0
+  ret i41 %V
+}
+
+define i41 @sext(i1 %C) {
+; CHECK-LABEL: @sext(
+; CHECK-NEXT:    [[V:%.*]] = sext i1 %C to i41
+; CHECK-NEXT:    ret i41 [[V]]
+;
+  %V = select i1 %C, i41 -1, i41 0
+  ret i41 %V
+}
+
+define i999 @not_zext(i1 %C) {
+; CHECK-LABEL: @not_zext(
+; CHECK-NEXT:    [[NOT_C:%.*]] = xor i1 %C, true
+; CHECK-NEXT:    [[V:%.*]] = zext i1 [[NOT_C]] to i999
+; CHECK-NEXT:    ret i999 [[V]]
+;
+  %V = select i1 %C, i999 0, i999 1
+  ret i999 %V
+}
+
+define i999 @not_sext(i1 %C) {
+; CHECK-LABEL: @not_sext(
+; CHECK-NEXT:    [[NOT_C:%.*]] = xor i1 %C, true
+; CHECK-NEXT:    [[V:%.*]] = sext i1 [[NOT_C]] to i999
+; CHECK-NEXT:    ret i999 [[V]]
+;
+  %V = select i1 %C, i999 0, i999 -1
+  ret i999 %V
+}
+
+; Vector selects of vector splat constants match APInt too.
+
+define <2 x i41> @zext_vec(<2 x i1> %C) {
+; CHECK-LABEL: @zext_vec(
+; CHECK-NEXT:    [[V:%.*]] = zext <2 x i1> %C to <2 x i41>
+; CHECK-NEXT:    ret <2 x i41> [[V]]
+;
+  %V = select <2 x i1> %C, <2 x i41> <i41 1, i41 1>, <2 x i41> <i41 0, i41 0>
+  ret <2 x i41> %V
+}
+
+define <2 x i32> @sext_vec(<2 x i1> %C) {
+; CHECK-LABEL: @sext_vec(
+; CHECK-NEXT:    [[V:%.*]] = sext <2 x i1> %C to <2 x i32>
+; CHECK-NEXT:    ret <2 x i32> [[V]]
+;
+  %V = select <2 x i1> %C, <2 x i32> <i32 -1, i32 -1>, <2 x i32> <i32 0, i32 0>
+  ret <2 x i32> %V
+}
+
+define <2 x i999> @not_zext_vec(<2 x i1> %C) {
+; CHECK-LABEL: @not_zext_vec(
+; CHECK-NEXT:    [[NOT_C:%.*]] = xor <2 x i1> %C, <i1 true, i1 true>
+; CHECK-NEXT:    [[V:%.*]] = zext <2 x i1> [[NOT_C]] to <2 x i999>
+; CHECK-NEXT:    ret <2 x i999> [[V]]
+;
+  %V = select <2 x i1> %C, <2 x i999> <i999 0, i999 0>, <2 x i999> <i999 1, i999 1>
+  ret <2 x i999> %V
+}
+
+define <2 x i64> @not_sext_vec(<2 x i1> %C) {
+; CHECK-LABEL: @not_sext_vec(
+; CHECK-NEXT:    [[NOT_C:%.*]] = xor <2 x i1> %C, <i1 true, i1 true>
+; CHECK-NEXT:    [[V:%.*]] = sext <2 x i1> [[NOT_C]] to <2 x i64>
+; CHECK-NEXT:    ret <2 x i64> [[V]]
+;
+  %V = select <2 x i1> %C, <2 x i64> <i64 0, i64 0>, <2 x i64> <i64 -1, i64 -1>
+  ret <2 x i64> %V
+}
+
+; But don't touch this - we would need 3 instructions to extend and splat the scalar select condition.
+
+define <2 x i32> @scalar_select_of_vectors(i1 %c) {
+; CHECK-LABEL: @scalar_select_of_vectors(
+; CHECK-NEXT:    [[V:%.*]] = select i1 %c, <2 x i32> <i32 1, i32 1>, <2 x i32> zeroinitializer
+; CHECK-NEXT:    ret <2 x i32> [[V]]
+;
+  %V = select i1 %c, <2 x i32> <i32 1, i32 1>, <2 x i32> zeroinitializer
+  ret <2 x i32> %V
+}
+
+;; (x <s 0) ? -1 : 0 -> ashr x, 31
+
+define i41 @test3(i41 %X) {
+; CHECK-LABEL: @test3(
+; CHECK-NEXT:    [[X_LOBIT:%.*]] = ashr i41 %X, 40
+; CHECK-NEXT:    ret i41 [[X_LOBIT]]
+;
+  %t = icmp slt i41 %X, 0
+  %V = select i1 %t, i41 -1, i41 0
+  ret i41 %V
+}
+
+;; (x <s 0) ? -1 : 0 -> ashr x, 31
+
+define i1023 @test4(i1023 %X) {
+; CHECK-LABEL: @test4(
+; CHECK-NEXT:    [[X_LOBIT:%.*]] = ashr i1023 %X, 1022
+; CHECK-NEXT:    ret i1023 [[X_LOBIT]]
+;
+  %t = icmp slt i1023 %X, 0
+  %V = select i1 %t, i1023 -1, i1023 0
+  ret i1023 %V
+}
+

Added: llvm/trunk/test/Transforms/InstCombine/apint-shift-simplify.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/InstCombine/apint-shift-simplify.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/InstCombine/apint-shift-simplify.ll (added)
+++ llvm/trunk/test/Transforms/InstCombine/apint-shift-simplify.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,34 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+define i41 @test0(i41 %A, i41 %B, i41 %C) {
+	%X = shl i41 %A, %C
+	%Y = shl i41 %B, %C
+	%Z = and i41 %X, %Y
+	ret i41 %Z
+; CHECK-LABEL: @test0(
+; CHECK-NEXT: and i41 %A, %B
+; CHECK-NEXT: shl i41
+; CHECK-NEXT: ret
+}
+
+define i57 @test1(i57 %A, i57 %B, i57 %C) {
+	%X = lshr i57 %A, %C
+	%Y = lshr i57 %B, %C
+	%Z = or i57 %X, %Y
+	ret i57 %Z
+; CHECK-LABEL: @test1(
+; CHECK-NEXT: or i57 %A, %B
+; CHECK-NEXT: lshr i57
+; CHECK-NEXT: ret
+}
+
+define i49 @test2(i49 %A, i49 %B, i49 %C) {
+	%X = ashr i49 %A, %C
+	%Y = ashr i49 %B, %C
+	%Z = xor i49 %X, %Y
+	ret i49 %Z
+; CHECK-LABEL: @test2(
+; CHECK-NEXT: xor i49 %A, %B
+; CHECK-NEXT: ashr i49
+; CHECK-NEXT: ret
+}

Added: llvm/trunk/test/Transforms/InstCombine/apint-shift.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/InstCombine/apint-shift.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/InstCombine/apint-shift.ll (added)
+++ llvm/trunk/test/Transforms/InstCombine/apint-shift.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,551 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+define i55 @test6(i55 %A) {
+; CHECK-LABEL: @test6(
+; CHECK-NEXT:    [[C:%.*]] = mul i55 [[A:%.*]], 6
+; CHECK-NEXT:    ret i55 [[C]]
+;
+  %B = shl i55 %A, 1
+  %C = mul i55 %B, 3
+  ret i55 %C
+}
+
+; (X * C2) << C1 --> X * (C2 << C1)
+
+define i55 @test6a(i55 %A) {
+; CHECK-LABEL: @test6a(
+; CHECK-NEXT:    [[C:%.*]] = mul i55 [[A:%.*]], 6
+; CHECK-NEXT:    ret i55 [[C]]
+;
+  %B = mul i55 %A, 3
+  %C = shl i55 %B, 1
+  ret i55 %C
+}
+
+; (X * C2) << C1 --> X * (C2 << C1)
+
+define <2 x i55> @test6a_vec(<2 x i55> %A) {
+; CHECK-LABEL: @test6a_vec(
+; CHECK-NEXT:    [[C:%.*]] = mul <2 x i55> [[A:%.*]], <i55 6, i55 48>
+; CHECK-NEXT:    ret <2 x i55> [[C]]
+;
+  %B = mul <2 x i55> %A, <i55 3, i55 12>
+  %C = shl <2 x i55> %B, <i55 1, i55 2>
+  ret <2 x i55> %C
+}
+
+define i29 @test7(i8 %X) {
+; CHECK-LABEL: @test7(
+; CHECK-NEXT:    ret i29 -1
+;
+  %A = zext i8 %X to i29
+  %B = ashr i29 -1, %A
+  ret i29 %B
+}
+
+define i7 @test8(i7 %A) {
+; CHECK-LABEL: @test8(
+; CHECK-NEXT:    ret i7 0
+;
+  %B = shl i7 %A, 4
+  %C = shl i7 %B, 3
+  ret i7 %C
+}
+
+define i17 @test9(i17 %A) {
+; CHECK-LABEL: @test9(
+; CHECK-NEXT:    [[B:%.*]] = and i17 [[A:%.*]], 1
+; CHECK-NEXT:    ret i17 [[B]]
+;
+  %B = shl i17 %A, 16
+  %C = lshr i17 %B, 16
+  ret i17 %C
+}
+
+; shl (lshr X, C), C --> and X, C'
+
+define i19 @test10(i19 %X) {
+; CHECK-LABEL: @test10(
+; CHECK-NEXT:    [[SH1:%.*]] = and i19 [[X:%.*]], -262144
+; CHECK-NEXT:    ret i19 [[SH1]]
+;
+  %sh1 = lshr i19 %X, 18
+  %sh2 = shl i19 %sh1, 18
+  ret i19 %sh2
+}
+
+; Two right shifts in the same direction:
+; lshr (lshr X, C1), C2 --> lshr X, C1 + C2
+
+define <2 x i19> @lshr_lshr_splat_vec(<2 x i19> %X) {
+; CHECK-LABEL: @lshr_lshr_splat_vec(
+; CHECK-NEXT:    [[SH1:%.*]] = lshr <2 x i19> [[X:%.*]], <i19 5, i19 5>
+; CHECK-NEXT:    ret <2 x i19> [[SH1]]
+;
+  %sh1 = lshr <2 x i19> %X, <i19 3, i19 3>
+  %sh2 = lshr <2 x i19> %sh1, <i19 2, i19 2>
+  ret <2 x i19> %sh2
+}
+
+define i9 @multiuse_lshr_lshr(i9 %x) {
+; CHECK-LABEL: @multiuse_lshr_lshr(
+; CHECK-NEXT:    [[SH1:%.*]] = lshr i9 [[X:%.*]], 2
+; CHECK-NEXT:    [[SH2:%.*]] = lshr i9 [[X]], 5
+; CHECK-NEXT:    [[MUL:%.*]] = mul i9 [[SH1]], [[SH2]]
+; CHECK-NEXT:    ret i9 [[MUL]]
+;
+  %sh1 = lshr i9 %x, 2
+  %sh2 = lshr i9 %sh1, 3
+  %mul = mul i9 %sh1, %sh2
+  ret i9 %mul
+}
+
+define <2 x i9> @multiuse_lshr_lshr_splat(<2 x i9> %x) {
+; CHECK-LABEL: @multiuse_lshr_lshr_splat(
+; CHECK-NEXT:    [[SH1:%.*]] = lshr <2 x i9> [[X:%.*]], <i9 2, i9 2>
+; CHECK-NEXT:    [[SH2:%.*]] = lshr <2 x i9> [[X]], <i9 5, i9 5>
+; CHECK-NEXT:    [[MUL:%.*]] = mul <2 x i9> [[SH1]], [[SH2]]
+; CHECK-NEXT:    ret <2 x i9> [[MUL]]
+;
+  %sh1 = lshr <2 x i9> %x, <i9 2, i9 2>
+  %sh2 = lshr <2 x i9> %sh1, <i9 3, i9 3>
+  %mul = mul <2 x i9> %sh1, %sh2
+  ret <2 x i9> %mul
+}
+
+; Two left shifts in the same direction:
+; shl (shl X, C1), C2 -->  shl X, C1 + C2
+
+define <2 x i19> @shl_shl_splat_vec(<2 x i19> %X) {
+; CHECK-LABEL: @shl_shl_splat_vec(
+; CHECK-NEXT:    [[SH1:%.*]] = shl <2 x i19> [[X:%.*]], <i19 5, i19 5>
+; CHECK-NEXT:    ret <2 x i19> [[SH1]]
+;
+  %sh1 = shl <2 x i19> %X, <i19 3, i19 3>
+  %sh2 = shl <2 x i19> %sh1, <i19 2, i19 2>
+  ret <2 x i19> %sh2
+}
+
+define i42 @multiuse_shl_shl(i42 %x) {
+; CHECK-LABEL: @multiuse_shl_shl(
+; CHECK-NEXT:    [[SH1:%.*]] = shl i42 [[X:%.*]], 8
+; CHECK-NEXT:    [[SH2:%.*]] = shl i42 [[X]], 17
+; CHECK-NEXT:    [[MUL:%.*]] = mul i42 [[SH1]], [[SH2]]
+; CHECK-NEXT:    ret i42 [[MUL]]
+;
+  %sh1 = shl i42 %x, 8
+  %sh2 = shl i42 %sh1, 9
+  %mul = mul i42 %sh1, %sh2
+  ret i42 %mul
+}
+
+define <2 x i42> @multiuse_shl_shl_splat(<2 x i42> %x) {
+; CHECK-LABEL: @multiuse_shl_shl_splat(
+; CHECK-NEXT:    [[SH1:%.*]] = shl <2 x i42> [[X:%.*]], <i42 8, i42 8>
+; CHECK-NEXT:    [[SH2:%.*]] = shl <2 x i42> [[X]], <i42 17, i42 17>
+; CHECK-NEXT:    [[MUL:%.*]] = mul <2 x i42> [[SH1]], [[SH2]]
+; CHECK-NEXT:    ret <2 x i42> [[MUL]]
+;
+  %sh1 = shl <2 x i42> %x, <i42 8, i42 8>
+  %sh2 = shl <2 x i42> %sh1, <i42 9, i42 9>
+  %mul = mul <2 x i42> %sh1, %sh2
+  ret <2 x i42> %mul
+}
+
+; Equal shift amounts in opposite directions become bitwise 'and':
+; lshr (shl X, C), C --> and X, C'
+
+define <2 x i19> @eq_shl_lshr_splat_vec(<2 x i19> %X) {
+; CHECK-LABEL: @eq_shl_lshr_splat_vec(
+; CHECK-NEXT:    [[SH1:%.*]] = and <2 x i19> [[X:%.*]], <i19 65535, i19 65535>
+; CHECK-NEXT:    ret <2 x i19> [[SH1]]
+;
+  %sh1 = shl <2 x i19> %X, <i19 3, i19 3>
+  %sh2 = lshr <2 x i19> %sh1, <i19 3, i19 3>
+  ret <2 x i19> %sh2
+}
+
+; Equal shift amounts in opposite directions become bitwise 'and':
+; shl (lshr X, C), C --> and X, C'
+
+define <2 x i19> @eq_lshr_shl_splat_vec(<2 x i19> %X) {
+; CHECK-LABEL: @eq_lshr_shl_splat_vec(
+; CHECK-NEXT:    [[SH1:%.*]] = and <2 x i19> [[X:%.*]], <i19 -8, i19 -8>
+; CHECK-NEXT:    ret <2 x i19> [[SH1]]
+;
+  %sh1 = lshr <2 x i19> %X, <i19 3, i19 3>
+  %sh2 = shl <2 x i19> %sh1, <i19 3, i19 3>
+  ret <2 x i19> %sh2
+}
+
+; In general, we would need an 'and' for this transform, but the masked-off bits are known zero.
+; shl (lshr X, C1), C2 --> lshr X, C1 - C2
+
+define <2 x i7> @lshr_shl_splat_vec(<2 x i7> %X) {
+; CHECK-LABEL: @lshr_shl_splat_vec(
+; CHECK-NEXT:    [[MUL:%.*]] = mul <2 x i7> [[X:%.*]], <i7 -8, i7 -8>
+; CHECK-NEXT:    [[SH1:%.*]] = lshr exact <2 x i7> [[MUL]], <i7 1, i7 1>
+; CHECK-NEXT:    ret <2 x i7> [[SH1]]
+;
+  %mul = mul <2 x i7> %X, <i7 -8, i7 -8>
+  %sh1 = lshr exact <2 x i7> %mul, <i7 3, i7 3>
+  %sh2 = shl nuw nsw <2 x i7> %sh1, <i7 2, i7 2>
+  ret <2 x i7> %sh2
+}
+
+; In general, we would need an 'and' for this transform, but the masked-off bits are known zero.
+; lshr (shl X, C1), C2 -->  shl X, C1 - C2
+
+define <2 x i7> @shl_lshr_splat_vec(<2 x i7> %X) {
+; CHECK-LABEL: @shl_lshr_splat_vec(
+; CHECK-NEXT:    [[DIV:%.*]] = udiv <2 x i7> [[X:%.*]], <i7 9, i7 9>
+; CHECK-NEXT:    [[SH1:%.*]] = shl nuw nsw <2 x i7> [[DIV]], <i7 1, i7 1>
+; CHECK-NEXT:    ret <2 x i7> [[SH1]]
+;
+  %div = udiv <2 x i7> %X, <i7 9, i7 9>
+  %sh1 = shl nuw <2 x i7> %div, <i7 3, i7 3>
+  %sh2 = lshr exact <2 x i7> %sh1, <i7 2, i7 2>
+  ret <2 x i7> %sh2
+}
+
+; Don't hide the shl from scalar evolution. DAGCombine will get it.
+
+define i23 @test11(i23 %x) {
+; CHECK-LABEL: @test11(
+; CHECK-NEXT:    [[A:%.*]] = mul i23 [[X:%.*]], 3
+; CHECK-NEXT:    [[B:%.*]] = lshr i23 [[A]], 11
+; CHECK-NEXT:    [[C:%.*]] = shl i23 [[B]], 12
+; CHECK-NEXT:    ret i23 [[C]]
+;
+  %a = mul i23 %x, 3
+  %b = lshr i23 %a, 11
+  %c = shl i23 %b, 12
+  ret i23 %c
+}
+
+; shl (ashr X, C), C --> and X, C'
+
+define i47 @test12(i47 %X) {
+; CHECK-LABEL: @test12(
+; CHECK-NEXT:    [[TMP1:%.*]] = and i47 [[X:%.*]], -256
+; CHECK-NEXT:    ret i47 [[TMP1]]
+;
+  %sh1 = ashr i47 %X, 8
+  %sh2 = shl i47 %sh1, 8
+  ret i47 %sh2
+}
+
+define <2 x i47> @test12_splat_vec(<2 x i47> %X) {
+; CHECK-LABEL: @test12_splat_vec(
+; CHECK-NEXT:    [[TMP1:%.*]] = and <2 x i47> [[X:%.*]], <i47 -256, i47 -256>
+; CHECK-NEXT:    ret <2 x i47> [[TMP1]]
+;
+  %sh1 = ashr <2 x i47> %X, <i47 8, i47 8>
+  %sh2 = shl <2 x i47> %sh1, <i47 8, i47 8>
+  ret <2 x i47> %sh2
+}
+
+; Don't hide the shl from scalar evolution. DAGCombine will get it.
+
+define i18 @test13(i18 %x) {
+; CHECK-LABEL: @test13(
+; CHECK-NEXT:    [[A:%.*]] = mul i18 [[X:%.*]], 3
+; CHECK-NEXT:    [[TMP1:%.*]] = lshr i18 [[A]], 8
+; CHECK-NEXT:    [[C:%.*]] = shl i18 [[TMP1]], 9
+; CHECK-NEXT:    ret i18 [[C]]
+;
+  %a = mul i18 %x, 3
+  %b = ashr i18 %a, 8
+  %c = shl i18 %b, 9
+  ret i18 %c
+}
+
+define i35 @test14(i35 %A) {
+; CHECK-LABEL: @test14(
+; CHECK-NEXT:    [[B:%.*]] = and i35 [[A:%.*]], -19760
+; CHECK-NEXT:    [[C:%.*]] = or i35 [[B]], 19744
+; CHECK-NEXT:    ret i35 [[C]]
+;
+  %B = lshr i35 %A, 4
+  %C = or i35 %B, 1234
+  %D = shl i35 %C, 4
+  ret i35 %D
+}
+
+define i79 @test14a(i79 %A) {
+; CHECK-LABEL: @test14a(
+; CHECK-NEXT:    [[C:%.*]] = and i79 [[A:%.*]], 77
+; CHECK-NEXT:    ret i79 [[C]]
+;
+  %B = shl i79 %A, 4
+  %C = and i79 %B, 1234
+  %D = lshr i79 %C, 4
+  ret i79 %D
+}
+
+define i45 @test15(i1 %C) {
+; CHECK-LABEL: @test15(
+; CHECK-NEXT:    [[A:%.*]] = select i1 [[C:%.*]], i45 12, i45 4
+; CHECK-NEXT:    ret i45 [[A]]
+;
+  %A = select i1 %C, i45 3, i45 1
+  %V = shl i45 %A, 2
+  ret i45 %V
+}
+
+define i53 @test15a(i1 %X) {
+; CHECK-LABEL: @test15a(
+; CHECK-NEXT:    [[V:%.*]] = select i1 [[X:%.*]], i53 512, i53 128
+; CHECK-NEXT:    ret i53 [[V]]
+;
+  %A = select i1 %X, i8 3, i8 1
+  %B = zext i8 %A to i53
+  %V = shl i53 64, %B
+  ret i53 %V
+}
+
+define i1 @test16(i84 %X) {
+; CHECK-LABEL: @test16(
+; CHECK-NEXT:    [[AND:%.*]] = and i84 [[X:%.*]], 16
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ne i84 [[AND]], 0
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %shr = ashr i84 %X, 4
+  %and = and i84 %shr, 1
+  %cmp = icmp ne i84 %and, 0
+  ret i1 %cmp
+}
+
+define <2 x i1> @test16vec(<2 x i84> %X) {
+; CHECK-LABEL: @test16vec(
+; CHECK-NEXT:    [[TMP1:%.*]] = and <2 x i84> [[X:%.*]], <i84 16, i84 16>
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ne <2 x i84> [[TMP1]], zeroinitializer
+; CHECK-NEXT:    ret <2 x i1> [[CMP]]
+;
+  %shr = ashr <2 x i84> %X, <i84 4, i84 4>
+  %and = and <2 x i84> %shr, <i84 1, i84 1>
+  %cmp = icmp ne <2 x i84> %and, zeroinitializer
+  ret <2 x i1> %cmp
+}
+
+define i1 @test17(i106 %A) {
+; CHECK-LABEL: @test17(
+; CHECK-NEXT:    [[B_MASK:%.*]] = and i106 [[A:%.*]], -8
+; CHECK-NEXT:    [[C:%.*]] = icmp eq i106 [[B_MASK]], 9872
+; CHECK-NEXT:    ret i1 [[C]]
+;
+  %B = lshr i106 %A, 3
+  %C = icmp eq i106 %B, 1234
+  ret i1 %C
+}
+
+define <2 x i1> @test17vec(<2 x i106> %A) {
+; CHECK-LABEL: @test17vec(
+; CHECK-NEXT:    [[B_MASK:%.*]] = and <2 x i106> [[A:%.*]], <i106 -8, i106 -8>
+; CHECK-NEXT:    [[C:%.*]] = icmp eq <2 x i106> [[B_MASK]], <i106 9872, i106 9872>
+; CHECK-NEXT:    ret <2 x i1> [[C]]
+;
+  %B = lshr <2 x i106> %A, <i106 3, i106 3>
+  %C = icmp eq <2 x i106> %B, <i106 1234, i106 1234>
+  ret <2 x i1> %C
+}
+
+define i1 @test18(i11 %A) {
+; CHECK-LABEL: @test18(
+; CHECK-NEXT:    ret i1 false
+;
+  %B = lshr i11 %A, 10
+  %C = icmp eq i11 %B, 123
+  ret i1 %C
+}
+
+define i1 @test19(i37 %A) {
+; CHECK-LABEL: @test19(
+; CHECK-NEXT:    [[C:%.*]] = icmp ult i37 [[A:%.*]], 4
+; CHECK-NEXT:    ret i1 [[C]]
+;
+  %B = ashr i37 %A, 2
+  %C = icmp eq i37 %B, 0
+  ret i1 %C
+}
+
+define <2 x i1> @test19vec(<2 x i37> %A) {
+; CHECK-LABEL: @test19vec(
+; CHECK-NEXT:    [[C:%.*]] = icmp ult <2 x i37> [[A:%.*]], <i37 4, i37 4>
+; CHECK-NEXT:    ret <2 x i1> [[C]]
+;
+  %B = ashr <2 x i37> %A, <i37 2, i37 2>
+  %C = icmp eq <2 x i37> %B, zeroinitializer
+  ret <2 x i1> %C
+}
+
+define i1 @test19a(i39 %A) {
+; CHECK-LABEL: @test19a(
+; CHECK-NEXT:    [[C:%.*]] = icmp ugt i39 [[A:%.*]], -5
+; CHECK-NEXT:    ret i1 [[C]]
+;
+  %B = ashr i39 %A, 2
+  %C = icmp eq i39 %B, -1
+  ret i1 %C
+}
+
+define <2 x i1> @test19a_vec(<2 x i39> %A) {
+; CHECK-LABEL: @test19a_vec(
+; CHECK-NEXT:    [[C:%.*]] = icmp ugt <2 x i39> [[A:%.*]], <i39 -5, i39 -5>
+; CHECK-NEXT:    ret <2 x i1> [[C]]
+;
+  %B = ashr <2 x i39> %A, <i39 2, i39 2>
+  %C = icmp eq <2 x i39> %B, <i39 -1, i39 -1>
+  ret <2 x i1> %C
+}
+
+define i1 @test20(i13 %A) {
+; CHECK-LABEL: @test20(
+; CHECK-NEXT:    ret i1 false
+;
+  %B = ashr i13 %A, 12
+  %C = icmp eq i13 %B, 123
+  ret i1 %C
+}
+
+define i1 @test21(i12 %A) {
+; CHECK-LABEL: @test21(
+; CHECK-NEXT:    [[B_MASK:%.*]] = and i12 [[A:%.*]], 63
+; CHECK-NEXT:    [[C:%.*]] = icmp eq i12 [[B_MASK]], 62
+; CHECK-NEXT:    ret i1 [[C]]
+;
+  %B = shl i12 %A, 6
+  %C = icmp eq i12 %B, -128
+  ret i1 %C
+}
+
+define i1 @test22(i14 %A) {
+; CHECK-LABEL: @test22(
+; CHECK-NEXT:    [[B_MASK:%.*]] = and i14 [[A:%.*]], 127
+; CHECK-NEXT:    [[C:%.*]] = icmp eq i14 [[B_MASK]], 0
+; CHECK-NEXT:    ret i1 [[C]]
+;
+  %B = shl i14 %A, 7
+  %C = icmp eq i14 %B, 0
+  ret i1 %C
+}
+
+define i11 @test23(i44 %A) {
+; CHECK-LABEL: @test23(
+; CHECK-NEXT:    [[D:%.*]] = trunc i44 [[A:%.*]] to i11
+; CHECK-NEXT:    ret i11 [[D]]
+;
+  %B = shl i44 %A, 33
+  %C = ashr i44 %B, 33
+  %D = trunc i44 %C to i11
+  ret i11 %D
+}
+
+; Fold lshr (shl X, C), C -> and X, C' regardless of the number of uses of the shl.
+
+define i44 @shl_lshr_eq_amt_multi_use(i44 %A) {
+; CHECK-LABEL: @shl_lshr_eq_amt_multi_use(
+; CHECK-NEXT:    [[B:%.*]] = shl i44 [[A:%.*]], 33
+; CHECK-NEXT:    [[C:%.*]] = and i44 [[A]], 2047
+; CHECK-NEXT:    [[D:%.*]] = or i44 [[B]], [[C]]
+; CHECK-NEXT:    ret i44 [[D]]
+;
+  %B = shl i44 %A, 33
+  %C = lshr i44 %B, 33
+  %D = add i44 %B, %C
+  ret i44 %D
+}
+
+; Fold vector lshr (shl X, C), C -> and X, C' regardless of the number of uses of the shl.
+
+define <2 x i44> @shl_lshr_eq_amt_multi_use_splat_vec(<2 x i44> %A) {
+; CHECK-LABEL: @shl_lshr_eq_amt_multi_use_splat_vec(
+; CHECK-NEXT:    [[B:%.*]] = shl <2 x i44> [[A:%.*]], <i44 33, i44 33>
+; CHECK-NEXT:    [[C:%.*]] = and <2 x i44> [[A]], <i44 2047, i44 2047>
+; CHECK-NEXT:    [[D:%.*]] = or <2 x i44> [[B]], [[C]]
+; CHECK-NEXT:    ret <2 x i44> [[D]]
+;
+  %B = shl <2 x i44> %A, <i44 33, i44 33>
+  %C = lshr <2 x i44> %B, <i44 33, i44 33>
+  %D = add <2 x i44> %B, %C
+  ret <2 x i44> %D
+}
+
+; Fold shl (lshr X, C), C -> and X, C' regardless of the number of uses of the lshr.
+
+define i43 @lshr_shl_eq_amt_multi_use(i43 %A) {
+; CHECK-LABEL: @lshr_shl_eq_amt_multi_use(
+; CHECK-NEXT:    [[B:%.*]] = lshr i43 [[A:%.*]], 23
+; CHECK-NEXT:    [[C:%.*]] = and i43 [[A]], -8388608
+; CHECK-NEXT:    [[D:%.*]] = mul i43 [[B]], [[C]]
+; CHECK-NEXT:    ret i43 [[D]]
+;
+  %B = lshr i43 %A, 23
+  %C = shl i43 %B, 23
+  %D = mul i43 %B, %C
+  ret i43 %D
+}
+
+; Fold vector shl (lshr X, C), C -> and X, C' regardless of the number of uses of the lshr.
+
+define <2 x i43> @lshr_shl_eq_amt_multi_use_splat_vec(<2 x i43> %A) {
+; CHECK-LABEL: @lshr_shl_eq_amt_multi_use_splat_vec(
+; CHECK-NEXT:    [[B:%.*]] = lshr <2 x i43> [[A:%.*]], <i43 23, i43 23>
+; CHECK-NEXT:    [[C:%.*]] = and <2 x i43> [[A]], <i43 -8388608, i43 -8388608>
+; CHECK-NEXT:    [[D:%.*]] = mul <2 x i43> [[B]], [[C]]
+; CHECK-NEXT:    ret <2 x i43> [[D]]
+;
+  %B = lshr <2 x i43> %A, <i43 23, i43 23>
+  %C = shl <2 x i43> %B, <i43 23, i43 23>
+  %D = mul <2 x i43> %B, %C
+  ret <2 x i43> %D
+}
+
+define i37 @test25(i37 %tmp.2, i37 %AA) {
+; CHECK-LABEL: @test25(
+; CHECK-NEXT:    [[TMP_3:%.*]] = and i37 [[TMP_2:%.*]], -131072
+; CHECK-NEXT:    [[X2:%.*]] = add i37 [[TMP_3]], [[AA:%.*]]
+; CHECK-NEXT:    [[TMP_6:%.*]] = and i37 [[X2]], -131072
+; CHECK-NEXT:    ret i37 [[TMP_6]]
+;
+  %x = lshr i37 %AA, 17
+  %tmp.3 = lshr i37 %tmp.2, 17
+  %tmp.5 = add i37 %tmp.3, %x
+  %tmp.6 = shl i37 %tmp.5, 17
+  ret i37 %tmp.6
+}
+
+define i40 @test26(i40 %A) {
+; CHECK-LABEL: @test26(
+; CHECK-NEXT:    [[B:%.*]] = and i40 [[A:%.*]], -2
+; CHECK-NEXT:    ret i40 [[B]]
+;
+  %B = lshr i40 %A, 1
+  %C = bitcast i40 %B to i40
+  %D = shl i40 %C, 1
+  ret i40 %D
+}
+
+; OSS-Fuzz #9880
+; https://bugs.chromium.org/p/oss-fuzz/issues/detail?id=9880
+define i177 @ossfuzz_9880(i177 %X) {
+; CHECK-LABEL: @ossfuzz_9880(
+; CHECK-NEXT:    [[A:%.*]] = alloca i177, align 8
+; CHECK-NEXT:    [[L1:%.*]] = load i177, i177* [[A]], align 8
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp eq i177 [[L1]], 0
+; CHECK-NEXT:    [[B1:%.*]] = zext i1 [[TMP1]] to i177
+; CHECK-NEXT:    ret i177 [[B1]]
+;
+  %A = alloca i177
+  %L1 = load i177, i177* %A
+  %B = or i177 0, -1
+  %B5 = udiv i177 %L1, %B
+  %B4 = add i177 %B5, %B
+  %B2 = add i177 %B, %B4
+  %B6 = mul i177 %B5, %B2
+  %B20 = shl i177 %L1, %B6
+  %B14 = sub i177 %B20, %B5
+  %B1 = udiv i177 %B14, %B6
+  ret i177 %B1
+}

Added: llvm/trunk/test/Transforms/InstCombine/apint-shl-trunc.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/InstCombine/apint-shl-trunc.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/InstCombine/apint-shl-trunc.ll (added)
+++ llvm/trunk/test/Transforms/InstCombine/apint-shl-trunc.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,38 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+define i1 @test0(i39 %X, i39 %A) {
+; CHECK-LABEL: @test0(
+; CHECK-NEXT:    [[TMP1:%.*]] = shl i39 1, [[A:%.*]]
+; CHECK-NEXT:    [[TMP2:%.*]] = and i39 [[TMP1]], [[X:%.*]]
+; CHECK-NEXT:    [[D:%.*]] = icmp ne i39 [[TMP2]], 0
+; CHECK-NEXT:    ret i1 [[D]]
+;
+  %B = lshr i39 %X, %A
+  %D = trunc i39 %B to i1
+  ret i1 %D
+}
+
+define i1 @test1(i799 %X, i799 %A) {
+; CHECK-LABEL: @test1(
+; CHECK-NEXT:    [[TMP1:%.*]] = shl i799 1, [[A:%.*]]
+; CHECK-NEXT:    [[TMP2:%.*]] = and i799 [[TMP1]], [[X:%.*]]
+; CHECK-NEXT:    [[D:%.*]] = icmp ne i799 [[TMP2]], 0
+; CHECK-NEXT:    ret i1 [[D]]
+;
+  %B = lshr i799 %X, %A
+  %D = trunc i799 %B to i1
+  ret i1 %D
+}
+
+define <2 x i1> @test0vec(<2 x i39> %X, <2 x i39> %A) {
+; CHECK-LABEL: @test0vec(
+; CHECK-NEXT:    [[B:%.*]] = lshr <2 x i39> [[X:%.*]], [[A:%.*]]
+; CHECK-NEXT:    [[D:%.*]] = trunc <2 x i39> [[B]] to <2 x i1>
+; CHECK-NEXT:    ret <2 x i1> [[D]]
+;
+  %B = lshr <2 x i39> %X, %A
+  %D = trunc <2 x i39> %B to <2 x i1>
+  ret <2 x i1> %D
+}
+

Added: llvm/trunk/test/Transforms/InstCombine/apint-sub.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/InstCombine/apint-sub.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/InstCombine/apint-sub.ll (added)
+++ llvm/trunk/test/Transforms/InstCombine/apint-sub.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,191 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+define i23 @test1(i23 %A) {
+; CHECK-LABEL: @test1(
+; CHECK-NEXT:    ret i23 0
+;
+  %B = sub i23 %A, %A
+  ret i23 %B
+}
+
+define i47 @test2(i47 %A) {
+; CHECK-LABEL: @test2(
+; CHECK-NEXT:    ret i47 %A
+;
+  %B = sub i47 %A, 0
+  ret i47 %B
+}
+
+define i97 @test3(i97 %A) {
+; CHECK-LABEL: @test3(
+; CHECK-NEXT:    ret i97 %A
+;
+  %B = sub i97 0, %A
+  %C = sub i97 0, %B
+  ret i97 %C
+}
+
+define i108 @test4(i108 %A, i108 %x) {
+; CHECK-LABEL: @test4(
+; CHECK-NEXT:    [[C:%.*]] = add i108 %x, %A
+; CHECK-NEXT:    ret i108 [[C]]
+;
+  %B = sub i108 0, %A
+  %C = sub i108 %x, %B
+  ret i108 %C
+}
+
+define i19 @test5(i19 %A, i19 %Bok, i19 %Cok) {
+; CHECK-LABEL: @test5(
+; CHECK-NEXT:    [[D1:%.*]] = sub i19 %Cok, %Bok
+; CHECK-NEXT:    [[E:%.*]] = add i19 [[D1]], %A
+; CHECK-NEXT:    ret i19 [[E]]
+;
+  %D = sub i19 %Bok, %Cok
+  %E = sub i19 %A, %D
+  ret i19 %E
+}
+
+define i57 @test6(i57 %A, i57 %B) {
+; CHECK-LABEL: @test6(
+; CHECK-NEXT:    [[B_NOT:%.*]] = xor i57 %B, -1
+; CHECK-NEXT:    [[D:%.*]] = and i57 [[B_NOT]], %A
+; CHECK-NEXT:    ret i57 [[D]]
+;
+  %C = and i57 %A, %B
+  %D = sub i57 %A, %C
+  ret i57 %D
+}
+
+define i77 @test7(i77 %A) {
+; CHECK-LABEL: @test7(
+; CHECK-NEXT:    [[B:%.*]] = xor i77 %A, -1
+; CHECK-NEXT:    ret i77 [[B]]
+;
+  %B = sub i77 -1, %A
+  ret i77 %B
+}
+
+define i27 @test8(i27 %A) {
+; CHECK-LABEL: @test8(
+; CHECK-NEXT:    [[C:%.*]] = shl i27 %A, 3
+; CHECK-NEXT:    ret i27 [[C]]
+;
+  %B = mul i27 9, %A
+  %C = sub i27 %B, %A
+  ret i27 %C
+}
+
+define i42 @test9(i42 %A) {
+; CHECK-LABEL: @test9(
+; CHECK-NEXT:    [[C:%.*]] = mul i42 %A, -2
+; CHECK-NEXT:    ret i42 [[C]]
+;
+  %B = mul i42 3, %A
+  %C = sub i42 %A, %B
+  ret i42 %C
+}
+
+define i1 @test11(i9 %A, i9 %B) {
+; CHECK-LABEL: @test11(
+; CHECK-NEXT:    [[CD:%.*]] = icmp ne i9 %A, %B
+; CHECK-NEXT:    ret i1 [[CD]]
+;
+  %C = sub i9 %A, %B
+  %cD = icmp ne i9 %C, 0
+  ret i1 %cD
+}
+
+define i43 @test12(i43 %A) {
+; CHECK-LABEL: @test12(
+; CHECK-NEXT:    [[C:%.*]] = lshr i43 %A, 42
+; CHECK-NEXT:    ret i43 [[C]]
+;
+  %B = ashr i43 %A, 42
+  %C = sub i43 0, %B
+  ret i43 %C
+}
+
+define i79 @test13(i79 %A) {
+; CHECK-LABEL: @test13(
+; CHECK-NEXT:    [[C:%.*]] = ashr i79 %A, 78
+; CHECK-NEXT:    ret i79 [[C]]
+;
+  %B = lshr i79 %A, 78
+  %C = sub i79 0, %B
+  ret i79 %C
+}
+
+define i1024 @test14(i1024 %A) {
+; CHECK-LABEL: @test14(
+; CHECK-NEXT:    [[D:%.*]] = ashr i1024 %A, 1023
+; CHECK-NEXT:    ret i1024 [[D]]
+;
+  %B = lshr i1024 %A, 1023
+  %C = bitcast i1024 %B to i1024
+  %D = sub i1024 0, %C
+  ret i1024 %D
+}
+
+define i51 @test16(i51 %A) {
+; CHECK-LABEL: @test16(
+; CHECK-NEXT:    [[Y:%.*]] = sdiv i51 %A, -1123
+; CHECK-NEXT:    ret i51 [[Y]]
+;
+  %X = sdiv i51 %A, 1123
+  %Y = sub i51 0, %X
+  ret i51 %Y
+}
+
+; Can't fold subtract here because negation might overflow.
+; PR3142
+define i25 @test17(i25 %Aok) {
+; CHECK-LABEL: @test17(
+; CHECK-NEXT:    [[B:%.*]] = sub i25 0, %Aok
+; CHECK-NEXT:    [[C:%.*]] = sdiv i25 [[B]], 1234
+; CHECK-NEXT:    ret i25 [[C]]
+;
+  %B = sub i25 0, %Aok
+  %C = sdiv i25 %B, 1234
+  ret i25 %C
+}
+
+define i128 @test18(i128 %Y) {
+; CHECK-LABEL: @test18(
+; CHECK-NEXT:    ret i128 0
+;
+  %t1 = shl i128 %Y, 2
+  %t2 = shl i128 %Y, 2
+  %t3 = sub i128 %t1, %t2
+  ret i128 %t3
+}
+
+define i39 @test19(i39 %X, i39 %Y) {
+; CHECK-LABEL: @test19(
+; CHECK-NEXT:    ret i39 %X
+;
+  %Z = sub i39 %X, %Y
+  %Q = add i39 %Z, %Y
+  ret i39 %Q
+}
+
+define i1 @test20(i33 %g, i33 %h) {
+; CHECK-LABEL: @test20(
+; CHECK-NEXT:    [[T4:%.*]] = icmp ne i33 %h, 0
+; CHECK-NEXT:    ret i1 [[T4]]
+;
+  %t2 = sub i33 %g, %h
+  %t4 = icmp ne i33 %t2, %g
+  ret i1 %t4
+}
+
+define i1 @test21(i256 %g, i256 %h) {
+; CHECK-LABEL: @test21(
+; CHECK-NEXT:    [[T4:%.*]] = icmp ne i256 %h, 0
+; CHECK-NEXT:    ret i1 [[T4]]
+;
+  %t2 = sub i256 %g, %h
+  %t4 = icmp ne i256 %t2, %g
+  ret i1 %t4
+}

Added: llvm/trunk/test/Transforms/InstCombine/apint-xor1.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/InstCombine/apint-xor1.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/InstCombine/apint-xor1.ll (added)
+++ llvm/trunk/test/Transforms/InstCombine/apint-xor1.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,50 @@
+; This test makes sure that xor instructions are properly eliminated.
+; This test is for Integer BitWidth <= 64 && BitWidth % 8 != 0.
+
+; RUN: opt < %s -instcombine -S | not grep "xor "
+
+
+define i47 @test1(i47 %A, i47 %B) {
+        ;; (A & C1)^(B & C2) -> (A & C1)|(B & C2) iff C1&C2 == 0
+        %A1 = and i47 %A, 70368744177664
+        %B1 = and i47 %B, 70368744177661
+        %C1 = xor i47 %A1, %B1
+        ret i47 %C1
+}
+
+define i15 @test2(i15 %x) {
+        %tmp.2 = xor i15 %x, 0
+        ret i15 %tmp.2
+}
+
+define i23 @test3(i23 %x) {
+        %tmp.2 = xor i23 %x, %x
+        ret i23 %tmp.2
+}
+
+define i37 @test4(i37 %x) {
+        ; x ^ ~x == -1
+        %NotX = xor i37 -1, %x
+        %B = xor i37 %x, %NotX
+        ret i37 %B
+}
+
+define i7 @test5(i7 %A) {
+        ;; (A|B)^B == A & (~B)
+        %t1 = or i7 %A, 23
+        %r = xor i7 %t1, 23
+        ret i7 %r
+}
+
+define i7 @test6(i7 %A) {
+        %t1 = xor i7 %A, 23
+        %r = xor i7 %t1, 23
+        ret i7 %r
+}
+
+define i47 @test7(i47 %A) {
+        ;; (A | C1) ^ C2 -> (A | C1) & ~C2 iff (C1&C2) == C2
+        %B1 = or i47 %A,   70368744177663
+        %C1 = xor i47 %B1, 703687463
+        ret i47 %C1
+}

Added: llvm/trunk/test/Transforms/InstCombine/apint-xor2.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/InstCombine/apint-xor2.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/InstCombine/apint-xor2.ll (added)
+++ llvm/trunk/test/Transforms/InstCombine/apint-xor2.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,51 @@
+; This test makes sure that xor instructions are properly eliminated.
+; This test is for Integer BitWidth > 64 && BitWidth <= 1024.
+
+; RUN: opt < %s -instcombine -S | not grep "xor "
+; END.
+
+
+define i447 @test1(i447 %A, i447 %B) {
+        ;; (A & C1)^(B & C2) -> (A & C1)|(B & C2) iff C1&C2 == 0
+        %A1 = and i447 %A, 70368744177664
+        %B1 = and i447 %B, 70368744177663
+        %C1 = xor i447 %A1, %B1
+        ret i447 %C1
+}
+
+define i1005 @test2(i1005 %x) {
+        %tmp.2 = xor i1005 %x, 0
+        ret i1005 %tmp.2
+}
+
+define i123 @test3(i123 %x) {
+        %tmp.2 = xor i123 %x, %x
+        ret i123 %tmp.2
+}
+
+define i737 @test4(i737 %x) {
+        ; x ^ ~x == -1
+        %NotX = xor i737 -1, %x
+        %B = xor i737 %x, %NotX
+        ret i737 %B
+}
+
+define i700 @test5(i700 %A) {
+        ;; (A|B)^B == A & (~B)
+        %t1 = or i700 %A, 288230376151711743 
+        %r = xor i700 %t1, 288230376151711743 
+        ret i700 %r
+}
+
+define i77 @test6(i77 %A) {
+        %t1 = xor i77 %A, 23
+        %r = xor i77 %t1, 23
+        ret i77 %r
+}
+
+define i1023 @test7(i1023 %A) {
+        ;; (A | C1) ^ C2 -> (A | C1) & ~C2 iff (C1&C2) == C2
+        %B1 = or i1023 %A,   70368744177663
+        %C1 = xor i1023 %B1, 703687463
+        ret i1023 %C1
+}

Added: llvm/trunk/test/Transforms/InstCombine/assoc-cast-assoc.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/InstCombine/assoc-cast-assoc.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/InstCombine/assoc-cast-assoc.ll (added)
+++ llvm/trunk/test/Transforms/InstCombine/assoc-cast-assoc.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,77 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+define i5 @XorZextXor(i3 %a) {
+; CHECK-LABEL: @XorZextXor(
+; CHECK-NEXT:    [[CAST:%.*]] = zext i3 %a to i5
+; CHECK-NEXT:    [[OP2:%.*]] = xor i5 [[CAST]], 15
+; CHECK-NEXT:    ret i5 [[OP2]]
+;
+  %op1 = xor i3 %a, 3
+  %cast = zext i3 %op1 to i5
+  %op2 = xor i5 %cast, 12
+  ret i5 %op2
+}
+
+define <2 x i32> @XorZextXorVec(<2 x i1> %a) {
+; CHECK-LABEL: @XorZextXorVec(
+; CHECK-NEXT:    [[CAST:%.*]] = zext <2 x i1> %a to <2 x i32>
+; CHECK-NEXT:    [[OP2:%.*]] = xor <2 x i32> [[CAST]], <i32 2, i32 1>
+; CHECK-NEXT:    ret <2 x i32> [[OP2]]
+;
+  %op1 = xor <2 x i1> %a, <i1 true, i1 false>
+  %cast = zext <2 x i1> %op1 to <2 x i32>
+  %op2 = xor <2 x i32> %cast, <i32 3, i32 1>
+  ret <2 x i32> %op2
+}
+
+define i5 @OrZextOr(i3 %a) {
+; CHECK-LABEL: @OrZextOr(
+; CHECK-NEXT:    [[CAST:%.*]] = zext i3 %a to i5
+; CHECK-NEXT:    [[OP2:%.*]] = or i5 [[CAST]], 11
+; CHECK-NEXT:    ret i5 [[OP2]]
+;
+  %op1 = or i3 %a, 3
+  %cast = zext i3 %op1 to i5
+  %op2 = or i5 %cast, 8
+  ret i5 %op2
+}
+
+define <2 x i32> @OrZextOrVec(<2 x i2> %a) {
+; CHECK-LABEL: @OrZextOrVec(
+; CHECK-NEXT:    [[CAST:%.*]] = zext <2 x i2> %a to <2 x i32>
+; CHECK-NEXT:    [[OP2:%.*]] = or <2 x i32> [[CAST]], <i32 3, i32 5>
+; CHECK-NEXT:    ret <2 x i32> [[OP2]]
+;
+  %op1 = or <2 x i2> %a, <i2 2, i2 0>
+  %cast = zext <2 x i2> %op1 to <2 x i32>
+  %op2 = or <2 x i32> %cast, <i32 1, i32 5>
+  ret <2 x i32> %op2
+}
+
+; Unlike the rest, this case is handled by SimplifyDemandedBits / ShrinkDemandedConstant.
+
+define i5 @AndZextAnd(i3 %a) {
+; CHECK-LABEL: @AndZextAnd(
+; CHECK-NEXT:    [[TMP1:%.*]] = and i3 %a, 2
+; CHECK-NEXT:    [[OP2:%.*]] = zext i3 [[TMP1]] to i5
+; CHECK-NEXT:    ret i5 [[OP2]]
+;
+  %op1 = and i3 %a, 3
+  %cast = zext i3 %op1 to i5
+  %op2 = and i5 %cast, 14
+  ret i5 %op2
+}
+
+define <2 x i32> @AndZextAndVec(<2 x i8> %a) {
+; CHECK-LABEL: @AndZextAndVec(
+; CHECK-NEXT:    [[TMP1:%.*]] = and <2 x i8> %a, <i8 5, i8 0>
+; CHECK-NEXT:    [[OP2:%.*]] = zext <2 x i8> [[TMP1]] to <2 x i32>
+; CHECK-NEXT:    ret <2 x i32> [[OP2]]
+;
+  %op1 = and <2 x i8> %a, <i8 7, i8 0>
+  %cast = zext <2 x i8> %op1 to <2 x i32>
+  %op2 = and <2 x i32> %cast, <i32 261, i32 1>
+  ret <2 x i32> %op2
+}
+

Added: llvm/trunk/test/Transforms/InstCombine/assume-loop-align.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/InstCombine/assume-loop-align.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/InstCombine/assume-loop-align.ll (added)
+++ llvm/trunk/test/Transforms/InstCombine/assume-loop-align.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,47 @@
+; RUN: opt -domtree -instcombine -loops -S < %s | FileCheck %s
+; Note: The -loops above can be anything that requires the domtree, and is
+; necessary to work around a pass-manager bug.
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+; Function Attrs: nounwind uwtable
+define void @foo(i32* %a, i32* %b) #0 {
+entry:
+  %ptrint = ptrtoint i32* %a to i64
+  %maskedptr = and i64 %ptrint, 63
+  %maskcond = icmp eq i64 %maskedptr, 0
+  tail call void @llvm.assume(i1 %maskcond)
+  %ptrint1 = ptrtoint i32* %b to i64
+  %maskedptr2 = and i64 %ptrint1, 63
+  %maskcond3 = icmp eq i64 %maskedptr2, 0
+  tail call void @llvm.assume(i1 %maskcond3)
+  br label %for.body
+
+; CHECK-LABEL: @foo
+; CHECK: load i32, i32* {{.*}} align 64
+; CHECK: store i32 {{.*}}  align 64
+; CHECK: ret
+
+for.body:                                         ; preds = %entry, %for.body
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %arrayidx = getelementptr inbounds i32, i32* %b, i64 %indvars.iv
+  %0 = load i32, i32* %arrayidx, align 4
+  %add = add nsw i32 %0, 1
+  %arrayidx5 = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
+  store i32 %add, i32* %arrayidx5, align 4
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 16
+  %1 = trunc i64 %indvars.iv.next to i32
+  %cmp = icmp slt i32 %1, 1648
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body
+  ret void
+}
+
+; Function Attrs: nounwind
+declare void @llvm.assume(i1) #1
+
+attributes #0 = { nounwind uwtable }
+attributes #1 = { nounwind }
+

Added: llvm/trunk/test/Transforms/InstCombine/assume-redundant.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/InstCombine/assume-redundant.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/InstCombine/assume-redundant.ll (added)
+++ llvm/trunk/test/Transforms/InstCombine/assume-redundant.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,81 @@
+; RUN: opt -domtree -instcombine -loops -S < %s | FileCheck %s
+; Note: The -loops above can be anything that requires the domtree, and is
+; necessary to work around a pass-manager bug.
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+%struct.s = type { double* }
+
+; Function Attrs: nounwind uwtable
+define void @_Z3fooR1s(%struct.s* nocapture readonly dereferenceable(8) %x) #0 {
+
+; CHECK-LABEL: @_Z3fooR1s
+; CHECK: call void @llvm.assume
+; CHECK-NOT: call void @llvm.assume
+
+entry:
+  %a = getelementptr inbounds %struct.s, %struct.s* %x, i64 0, i32 0
+  %0 = load double*, double** %a, align 8
+  %ptrint = ptrtoint double* %0 to i64
+  %maskedptr = and i64 %ptrint, 31
+  %maskcond = icmp eq i64 %maskedptr, 0
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next.1, %for.body ]
+  tail call void @llvm.assume(i1 %maskcond)
+  %arrayidx = getelementptr inbounds double, double* %0, i64 %indvars.iv
+  %1 = load double, double* %arrayidx, align 16
+  %add = fadd double %1, 1.000000e+00
+  tail call void @llvm.assume(i1 %maskcond)
+  %mul = fmul double %add, 2.000000e+00
+  store double %mul, double* %arrayidx, align 16
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  tail call void @llvm.assume(i1 %maskcond)
+  %arrayidx.1 = getelementptr inbounds double, double* %0, i64 %indvars.iv.next
+  %2 = load double, double* %arrayidx.1, align 8
+  %add.1 = fadd double %2, 1.000000e+00
+  tail call void @llvm.assume(i1 %maskcond)
+  %mul.1 = fmul double %add.1, 2.000000e+00
+  store double %mul.1, double* %arrayidx.1, align 8
+  %indvars.iv.next.1 = add nuw nsw i64 %indvars.iv.next, 1
+  %exitcond.1 = icmp eq i64 %indvars.iv.next, 1599
+  br i1 %exitcond.1, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body
+  ret void
+}
+
+declare align 8 i8* @get()
+
+; Check that redundant align assume is removed
+; CHECK-LABEL: @test
+; CHECK-NOT: call void @llvm.assume
+define void @test1() {
+  %p = call align 8 i8* @get()
+  %ptrint = ptrtoint i8* %p to i64
+  %maskedptr = and i64 %ptrint, 7
+  %maskcond = icmp eq i64 %maskedptr, 0
+  call void @llvm.assume(i1 %maskcond)
+  ret void
+}
+
+; Check that redundant align assume is removed
+; CHECK-LABEL: @test
+; CHECK-NOT: call void @llvm.assume
+define void @test3() {
+  %p = alloca i8, align 8
+  %ptrint = ptrtoint i8* %p to i64
+  %maskedptr = and i64 %ptrint, 7
+  %maskcond = icmp eq i64 %maskedptr, 0
+  call void @llvm.assume(i1 %maskcond)
+  ret void
+}
+
+; Function Attrs: nounwind
+declare void @llvm.assume(i1) #1
+
+attributes #0 = { nounwind uwtable }
+attributes #1 = { nounwind }
+

Added: llvm/trunk/test/Transforms/InstCombine/assume.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/InstCombine/assume.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/InstCombine/assume.ll (added)
+++ llvm/trunk/test/Transforms/InstCombine/assume.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,358 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+define i32 @foo1(i32* %a) #0 {
+entry:
+  %0 = load i32, i32* %a, align 4
+
+; Check that the alignment has been upgraded and that the assume has not
+; been removed:
+; CHECK-LABEL: @foo1
+; CHECK-DAG: load i32, i32* %a, align 32
+; CHECK-DAG: call void @llvm.assume
+; CHECK: ret i32
+
+  %ptrint = ptrtoint i32* %a to i64
+  %maskedptr = and i64 %ptrint, 31
+  %maskcond = icmp eq i64 %maskedptr, 0
+  tail call void @llvm.assume(i1 %maskcond)
+
+  ret i32 %0
+}
+
+define i32 @foo2(i32* %a) #0 {
+entry:
+; Same check as in @foo1, but make sure it works if the assume is first too.
+; CHECK-LABEL: @foo2
+; CHECK-DAG: load i32, i32* %a, align 32
+; CHECK-DAG: call void @llvm.assume
+; CHECK: ret i32
+
+  %ptrint = ptrtoint i32* %a to i64
+  %maskedptr = and i64 %ptrint, 31
+  %maskcond = icmp eq i64 %maskedptr, 0
+  tail call void @llvm.assume(i1 %maskcond)
+
+  %0 = load i32, i32* %a, align 4
+  ret i32 %0
+}
+
+declare void @llvm.assume(i1) #1
+
+define i32 @simple(i32 %a) #1 {
+entry:
+
+; CHECK-LABEL: @simple
+; CHECK: call void @llvm.assume
+; CHECK: ret i32 4
+
+  %cmp = icmp eq i32 %a, 4
+  tail call void @llvm.assume(i1 %cmp)
+  ret i32 %a
+}
+
+define i32 @can1(i1 %a, i1 %b, i1 %c) {
+entry:
+  %and1 = and i1 %a, %b
+  %and  = and i1 %and1, %c
+  tail call void @llvm.assume(i1 %and)
+
+; CHECK-LABEL: @can1
+; CHECK: call void @llvm.assume(i1 %a)
+; CHECK: call void @llvm.assume(i1 %b)
+; CHECK: call void @llvm.assume(i1 %c)
+; CHECK: ret i32
+
+  ret i32 5
+}
+
+define i32 @can2(i1 %a, i1 %b, i1 %c) {
+entry:
+  %v = or i1 %a, %b
+  %w = xor i1 %v, 1
+  tail call void @llvm.assume(i1 %w)
+
+; CHECK-LABEL: @can2
+; CHECK: %[[V1:[^ ]+]] = xor i1 %a, true
+; CHECK: call void @llvm.assume(i1 %[[V1]])
+; CHECK: %[[V2:[^ ]+]] = xor i1 %b, true
+; CHECK: call void @llvm.assume(i1 %[[V2]])
+; CHECK: ret i32
+
+  ret i32 5
+}
+
+define i32 @bar1(i32 %a) #0 {
+entry:
+  %and1 = and i32 %a, 3
+
+; CHECK-LABEL: @bar1
+; CHECK: call void @llvm.assume
+; CHECK: ret i32 1
+
+  %and = and i32 %a, 7
+  %cmp = icmp eq i32 %and, 1
+  tail call void @llvm.assume(i1 %cmp)
+
+  ret i32 %and1
+}
+
+define i32 @bar2(i32 %a) #0 {
+entry:
+; CHECK-LABEL: @bar2
+; CHECK: call void @llvm.assume
+; CHECK: ret i32 1
+
+  %and = and i32 %a, 7
+  %cmp = icmp eq i32 %and, 1
+  tail call void @llvm.assume(i1 %cmp)
+
+  %and1 = and i32 %a, 3
+  ret i32 %and1
+}
+
+define i32 @bar3(i32 %a, i1 %x, i1 %y) #0 {
+entry:
+  %and1 = and i32 %a, 3
+
+; Don't be fooled by other assumes around.
+; CHECK-LABEL: @bar3
+; CHECK: call void @llvm.assume
+; CHECK: ret i32 1
+
+  tail call void @llvm.assume(i1 %x)
+
+  %and = and i32 %a, 7
+  %cmp = icmp eq i32 %and, 1
+  tail call void @llvm.assume(i1 %cmp)
+
+  tail call void @llvm.assume(i1 %y)
+
+  ret i32 %and1
+}
+
+define i32 @bar4(i32 %a, i32 %b) {
+entry:
+  %and1 = and i32 %b, 3
+
+; CHECK-LABEL: @bar4
+; CHECK: call void @llvm.assume
+; CHECK: call void @llvm.assume
+; CHECK: ret i32 1
+
+  %and = and i32 %a, 7
+  %cmp = icmp eq i32 %and, 1
+  tail call void @llvm.assume(i1 %cmp)
+
+  %cmp2 = icmp eq i32 %a, %b
+  tail call void @llvm.assume(i1 %cmp2)
+
+  ret i32 %and1
+}
+
+define i32 @icmp1(i32 %a) #0 {
+; CHECK-LABEL: @icmp1(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp sgt i32 [[A:%.*]], 5
+; CHECK-NEXT:    tail call void @llvm.assume(i1 [[CMP]])
+; CHECK-NEXT:    ret i32 1
+;
+  %cmp = icmp sgt i32 %a, 5
+  tail call void @llvm.assume(i1 %cmp)
+  %conv = zext i1 %cmp to i32
+  ret i32 %conv
+}
+
+define i32 @icmp2(i32 %a) #0 {
+; CHECK-LABEL: @icmp2(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp sgt i32 [[A:%.*]], 5
+; CHECK-NEXT:    tail call void @llvm.assume(i1 [[CMP]])
+; CHECK-NEXT:    ret i32 0
+;
+  %cmp = icmp sgt i32 %a, 5
+  tail call void @llvm.assume(i1 %cmp)
+  %t0 = zext i1 %cmp to i32
+  %lnot.ext = xor i32 %t0, 1
+  ret i32 %lnot.ext
+}
+
+; If the 'not' of a condition is known true, then the condition must be false.
+
+define i1 @assume_not(i1 %cond) {
+; CHECK-LABEL: @assume_not(
+; CHECK-NEXT:    [[NOTCOND:%.*]] = xor i1 [[COND:%.*]], true
+; CHECK-NEXT:    call void @llvm.assume(i1 [[NOTCOND]])
+; CHECK-NEXT:    ret i1 false
+;
+  %notcond = xor i1 %cond, true
+  call void @llvm.assume(i1 %notcond)
+  ret i1 %cond
+}
+
+declare void @escape(i32* %a)
+
+; Canonicalize a nonnull assumption on a load into metadata form.
+
+define i1 @nonnull1(i32** %a) {
+; CHECK-LABEL: @nonnull1(
+; CHECK-NEXT:    [[LOAD:%.*]] = load i32*, i32** %a, align 8, !nonnull !6
+; CHECK-NEXT:    tail call void @escape(i32* nonnull [[LOAD]])
+; CHECK-NEXT:    ret i1 false
+;
+  %load = load i32*, i32** %a
+  %cmp = icmp ne i32* %load, null
+  tail call void @llvm.assume(i1 %cmp)
+  tail call void @escape(i32* %load)
+  %rval = icmp eq i32* %load, null
+  ret i1 %rval
+}
+
+; Make sure the above canonicalization applies only
+; to pointer types.  Doing otherwise would be illegal.
+
+define i1 @nonnull2(i32* %a) {
+; CHECK-LABEL: @nonnull2(
+; CHECK-NEXT:    [[LOAD:%.*]] = load i32, i32* %a, align 4
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ne i32 [[LOAD]], 0
+; CHECK-NEXT:    tail call void @llvm.assume(i1 [[CMP]])
+; CHECK-NEXT:    [[RVAL:%.*]] = icmp eq i32 [[LOAD]], 0
+; CHECK-NEXT:    ret i1 [[RVAL]]
+;
+  %load = load i32, i32* %a
+  %cmp = icmp ne i32 %load, 0
+  tail call void @llvm.assume(i1 %cmp)
+  %rval = icmp eq i32 %load, 0
+  ret i1 %rval
+}
+
+; Make sure the above canonicalization does not trigger
+; if the assume is control dependent on something else
+
+define i1 @nonnull3(i32** %a, i1 %control) {
+; CHECK-LABEL: @nonnull3(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[LOAD:%.*]] = load i32*, i32** %a, align 8
+; CHECK-NEXT:    br i1 %control, label %taken, label %not_taken
+; CHECK:       taken:
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ne i32* [[LOAD]], null
+; CHECK-NEXT:    tail call void @llvm.assume(i1 [[CMP]])
+; CHECK-NEXT:    [[RVAL:%.*]] = icmp eq i32* [[LOAD]], null
+; CHECK-NEXT:    ret i1 [[RVAL]]
+; CHECK:       not_taken:
+; CHECK-NEXT:    ret i1 true
+;
+entry:
+  %load = load i32*, i32** %a
+  %cmp = icmp ne i32* %load, null
+  br i1 %control, label %taken, label %not_taken
+taken:
+  tail call void @llvm.assume(i1 %cmp)
+  %rval = icmp eq i32* %load, null
+  ret i1 %rval
+not_taken:
+  ret i1 true
+}
+
+; Make sure the above canonicalization does not trigger
+; if the path from the load to the assume is potentially
+; interrupted by an exception being thrown
+
+define i1 @nonnull4(i32** %a) {
+; CHECK-LABEL: @nonnull4(
+; CHECK-NEXT:    [[LOAD:%.*]] = load i32*, i32** %a, align 8
+; CHECK-NEXT:    tail call void @escape(i32* [[LOAD]])
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ne i32* [[LOAD]], null
+; CHECK-NEXT:    tail call void @llvm.assume(i1 [[CMP]])
+; CHECK-NEXT:    [[RVAL:%.*]] = icmp eq i32* [[LOAD]], null
+; CHECK-NEXT:    ret i1 [[RVAL]]
+;
+  %load = load i32*, i32** %a
+  ;; This call may throw!
+  tail call void @escape(i32* %load)
+  %cmp = icmp ne i32* %load, null
+  tail call void @llvm.assume(i1 %cmp)
+  %rval = icmp eq i32* %load, null
+  ret i1 %rval
+}
+
+; PR35846 - https://bugs.llvm.org/show_bug.cgi?id=35846
+
+define i32 @assumption_conflicts_with_known_bits(i32 %a, i32 %b) {
+; CHECK-LABEL: @assumption_conflicts_with_known_bits(
+; CHECK-NEXT:    tail call void @llvm.assume(i1 false)
+; CHECK-NEXT:    ret i32 0
+;
+  %and1 = and i32 %b, 3
+  %B1 = lshr i32 %and1, %and1
+  %B3 = shl nuw nsw i32 %and1, %B1
+  %cmp = icmp eq i32 %B3, 1
+  tail call void @llvm.assume(i1 %cmp)
+  %cmp2 = icmp eq i32 %B1, %B3
+  tail call void @llvm.assume(i1 %cmp2)
+  ret i32 %and1
+}
+
+; PR37726 - https://bugs.llvm.org/show_bug.cgi?id=37726
+; There's a loophole in eliminating a redundant assumption when
+; we have conflicting assumptions. Verify that debuginfo doesn't
+; get in the way of the fold.
+
+define void @debug_interference(i8 %x) {
+; CHECK-LABEL: @debug_interference(
+; CHECK-NEXT:    [[CMP1:%.*]] = icmp eq i8 [[X:%.*]], 0
+; CHECK-NEXT:    tail call void @llvm.dbg.value(metadata i32 5, metadata !7, metadata !DIExpression()), !dbg !9
+; CHECK-NEXT:    tail call void @llvm.assume(i1 [[CMP1]])
+; CHECK-NEXT:    tail call void @llvm.dbg.value(metadata i32 5, metadata !7, metadata !DIExpression()), !dbg !9
+; CHECK-NEXT:    tail call void @llvm.dbg.value(metadata i32 5, metadata !7, metadata !DIExpression()), !dbg !9
+; CHECK-NEXT:    tail call void @llvm.assume(i1 false)
+; CHECK-NEXT:    ret void
+;
+  %cmp1 = icmp eq i8 %x, 0
+  %cmp2 = icmp ne i8 %x, 0
+  tail call void @llvm.assume(i1 %cmp1)
+  tail call void @llvm.dbg.value(metadata i32 5, metadata !1, metadata !DIExpression()), !dbg !9
+  tail call void @llvm.assume(i1 %cmp1)
+  tail call void @llvm.dbg.value(metadata i32 5, metadata !1, metadata !DIExpression()), !dbg !9
+  tail call void @llvm.assume(i1 %cmp2)
+  tail call void @llvm.dbg.value(metadata i32 5, metadata !1, metadata !DIExpression()), !dbg !9
+  tail call void @llvm.assume(i1 %cmp2)
+  ret void
+}
+
+; This would crash.
+; Does it ever make sense to peek through a bitcast of the icmp operand?
+
+define i32 @PR40940(<4 x i8> %x) {
+; CHECK-LABEL: @PR40940(
+; CHECK-NEXT:    [[SHUF:%.*]] = shufflevector <4 x i8> [[X:%.*]], <4 x i8> undef, <4 x i32> <i32 1, i32 1, i32 2, i32 3>
+; CHECK-NEXT:    [[T2:%.*]] = bitcast <4 x i8> [[SHUF]] to i32
+; CHECK-NEXT:    [[T3:%.*]] = icmp ult i32 [[T2]], 65536
+; CHECK-NEXT:    call void @llvm.assume(i1 [[T3]])
+; CHECK-NEXT:    ret i32 [[T2]]
+;
+  %shuf = shufflevector <4 x i8> %x, <4 x i8> undef, <4 x i32> <i32 1, i32 1, i32 2, i32 3>
+  %t2 = bitcast <4 x i8> %shuf to i32
+  %t3 = icmp ult i32 %t2, 65536
+  call void @llvm.assume(i1 %t3)
+  ret i32 %t2
+}
+
+declare void @llvm.dbg.value(metadata, metadata, metadata)
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!5, !6, !7, !8}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C, file: !3, producer: "Me", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, enums: null, retainedTypes: null, imports: null)
+!1 = !DILocalVariable(name: "", arg: 1, scope: !2, file: null, line: 1, type: null)
+!2 = distinct !DISubprogram(name: "debug", linkageName: "debug", scope: null, file: null, line: 0, type: null, isLocal: false, isDefinition: true, scopeLine: 1, flags: DIFlagPrototyped, isOptimized: true, unit: !0)
+!3 = !DIFile(filename: "consecutive-fences.ll", directory: "")
+!5 = !{i32 2, !"Dwarf Version", i32 4}
+!6 = !{i32 2, !"Debug Info Version", i32 3}
+!7 = !{i32 1, !"wchar_size", i32 4}
+!8 = !{i32 7, !"PIC Level", i32 2}
+!9 = !DILocation(line: 0, column: 0, scope: !2)
+
+
+attributes #0 = { nounwind uwtable }
+attributes #1 = { nounwind }
+

Added: llvm/trunk/test/Transforms/InstCombine/assume2.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/InstCombine/assume2.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/InstCombine/assume2.ll (added)
+++ llvm/trunk/test/Transforms/InstCombine/assume2.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,159 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -instcombine -S | FileCheck %s
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+declare void @llvm.assume(i1) #1
+
+define i32 @test1(i32 %a) #0 {
+; CHECK-LABEL: @test1(
+; CHECK-NEXT:    [[AND:%.*]] = and i32 [[A:%.*]], 15
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[AND]], 5
+; CHECK-NEXT:    tail call void @llvm.assume(i1 [[CMP]])
+; CHECK-NEXT:    ret i32 5
+;
+  %and = and i32 %a, 15
+  %cmp = icmp eq i32 %and, 5
+  tail call void @llvm.assume(i1 %cmp)
+  %and1 = and i32 %a, 7
+  ret i32 %and1
+}
+
+define i32 @test2(i32 %a) #0 {
+; CHECK-LABEL: @test2(
+; CHECK-NEXT:    [[AND:%.*]] = and i32 [[A:%.*]], 15
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[AND]], 10
+; CHECK-NEXT:    tail call void @llvm.assume(i1 [[CMP]])
+; CHECK-NEXT:    ret i32 2
+;
+  %and = and i32 %a, 15
+  %nand = xor i32 %and, -1
+  %cmp = icmp eq i32 %nand, 4294967285
+  tail call void @llvm.assume(i1 %cmp)
+  %and1 = and i32 %a, 7
+  ret i32 %and1
+}
+
+define i32 @test3(i32 %a) #0 {
+; CHECK-LABEL: @test3(
+; CHECK-NEXT:    [[V:%.*]] = or i32 [[A:%.*]], -16
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[V]], -11
+; CHECK-NEXT:    tail call void @llvm.assume(i1 [[CMP]])
+; CHECK-NEXT:    ret i32 5
+;
+  %v = or i32 %a, 4294967280
+  %cmp = icmp eq i32 %v, 4294967285
+  tail call void @llvm.assume(i1 %cmp)
+  %and1 = and i32 %a, 7
+  ret i32 %and1
+}
+
+define i32 @test4(i32 %a) #0 {
+; CHECK-LABEL: @test4(
+; CHECK-NEXT:    [[V:%.*]] = or i32 [[A:%.*]], -16
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[V]], -6
+; CHECK-NEXT:    tail call void @llvm.assume(i1 [[CMP]])
+; CHECK-NEXT:    ret i32 2
+;
+  %v = or i32 %a, 4294967280
+  %nv = xor i32 %v, -1
+  %cmp = icmp eq i32 %nv, 5
+  tail call void @llvm.assume(i1 %cmp)
+  %and1 = and i32 %a, 7
+  ret i32 %and1
+}
+
+define i32 @test5(i32 %a) #0 {
+; CHECK-LABEL: @test5(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[A:%.*]], 4
+; CHECK-NEXT:    tail call void @llvm.assume(i1 [[CMP]])
+; CHECK-NEXT:    ret i32 4
+;
+  %v = xor i32 %a, 1
+  %cmp = icmp eq i32 %v, 5
+  tail call void @llvm.assume(i1 %cmp)
+  %and1 = and i32 %a, 7
+  ret i32 %and1
+}
+
+define i32 @test6(i32 %a) #0 {
+; CHECK-LABEL: @test6(
+; CHECK-NEXT:    [[V_MASK:%.*]] = and i32 [[A:%.*]], 1073741823
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[V_MASK]], 5
+; CHECK-NEXT:    tail call void @llvm.assume(i1 [[CMP]])
+; CHECK-NEXT:    ret i32 5
+;
+  %v = shl i32 %a, 2
+  %cmp = icmp eq i32 %v, 20
+  tail call void @llvm.assume(i1 %cmp)
+  %and1 = and i32 %a, 63
+  ret i32 %and1
+}
+
+define i32 @test7(i32 %a) #0 {
+; CHECK-LABEL: @test7(
+; CHECK-NEXT:    [[V_MASK:%.*]] = and i32 [[A:%.*]], -4
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[V_MASK]], 20
+; CHECK-NEXT:    tail call void @llvm.assume(i1 [[CMP]])
+; CHECK-NEXT:    ret i32 20
+;
+  %v = lshr i32 %a, 2
+  %cmp = icmp eq i32 %v, 5
+  tail call void @llvm.assume(i1 %cmp)
+  %and1 = and i32 %a, 252
+  ret i32 %and1
+}
+
+define i32 @test8(i32 %a) #0 {
+; CHECK-LABEL: @test8(
+; CHECK-NEXT:    [[V_MASK:%.*]] = and i32 [[A:%.*]], -4
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[V_MASK]], 20
+; CHECK-NEXT:    tail call void @llvm.assume(i1 [[CMP]])
+; CHECK-NEXT:    ret i32 20
+;
+  %v = lshr i32 %a, 2
+  %cmp = icmp eq i32 %v, 5
+  tail call void @llvm.assume(i1 %cmp)
+  %and1 = and i32 %a, 252
+  ret i32 %and1
+}
+
+define i32 @test9(i32 %a) #0 {
+; CHECK-LABEL: @test9(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp sgt i32 [[A:%.*]], 5
+; CHECK-NEXT:    tail call void @llvm.assume(i1 [[CMP]])
+; CHECK-NEXT:    ret i32 0
+;
+  %cmp = icmp sgt i32 %a, 5
+  tail call void @llvm.assume(i1 %cmp)
+  %and1 = and i32 %a, 2147483648
+  ret i32 %and1
+}
+
+define i32 @test10(i32 %a) #0 {
+; CHECK-LABEL: @test10(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[A:%.*]], -1
+; CHECK-NEXT:    tail call void @llvm.assume(i1 [[CMP]])
+; CHECK-NEXT:    ret i32 -2147483648
+;
+  %cmp = icmp sle i32 %a, -2
+  tail call void @llvm.assume(i1 %cmp)
+  %and1 = and i32 %a, 2147483648
+  ret i32 %and1
+}
+
+define i32 @test11(i32 %a) #0 {
+; CHECK-LABEL: @test11(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ult i32 [[A:%.*]], 257
+; CHECK-NEXT:    tail call void @llvm.assume(i1 [[CMP]])
+; CHECK-NEXT:    ret i32 0
+;
+  %cmp = icmp ule i32 %a, 256
+  tail call void @llvm.assume(i1 %cmp)
+  %and1 = and i32 %a, 3072
+  ret i32 %and1
+}
+
+attributes #0 = { nounwind uwtable }
+attributes #1 = { nounwind }
+

Added: llvm/trunk/test/Transforms/InstCombine/atomic.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/InstCombine/atomic.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/InstCombine/atomic.ll (added)
+++ llvm/trunk/test/Transforms/InstCombine/atomic.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,333 @@
+; RUN: opt -S < %s -instcombine | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-apple-macosx10.7.0"
+
+; Check transforms involving atomic operations
+
+define i32 @test1(i32* %p) {
+; CHECK-LABEL: define i32 @test1(
+; CHECK: %x = load atomic i32, i32* %p seq_cst, align 4
+; CHECK: shl i32 %x, 1
+  %x = load atomic i32, i32* %p seq_cst, align 4
+  %y = load i32, i32* %p, align 4
+  %z = add i32 %x, %y
+  ret i32 %z
+}
+
+define i32 @test2(i32* %p) {
+; CHECK-LABEL: define i32 @test2(
+; CHECK: %x = load volatile i32, i32* %p, align 4
+; CHECK: %y = load volatile i32, i32* %p, align 4
+  %x = load volatile i32, i32* %p, align 4
+  %y = load volatile i32, i32* %p, align 4
+  %z = add i32 %x, %y
+  ret i32 %z
+}
+
+; The exact semantics of mixing volatile and non-volatile on the same
+; memory location are a bit unclear, but conservatively, we know we don't
+; want to remove the volatile.
+define i32 @test3(i32* %p) {
+; CHECK-LABEL: define i32 @test3(
+; CHECK: %x = load volatile i32, i32* %p, align 4
+  %x = load volatile i32, i32* %p, align 4
+  %y = load i32, i32* %p, align 4
+  %z = add i32 %x, %y
+  ret i32 %z
+}
+
+; Forwarding from a stronger ordered atomic is fine
+define i32 @test4(i32* %p) {
+; CHECK-LABEL: define i32 @test4(
+; CHECK: %x = load atomic i32, i32* %p seq_cst, align 4
+; CHECK: shl i32 %x, 1
+  %x = load atomic i32, i32* %p seq_cst, align 4
+  %y = load atomic i32, i32* %p unordered, align 4
+  %z = add i32 %x, %y
+  ret i32 %z
+}
+
+; Forwarding from a non-atomic is not.  (The earlier load 
+; could in priciple be promoted to atomic and then forwarded, 
+; but we can't just  drop the atomic from the load.)
+define i32 @test5(i32* %p) {
+; CHECK-LABEL: define i32 @test5(
+; CHECK: %x = load atomic i32, i32* %p unordered, align 4
+  %x = load atomic i32, i32* %p unordered, align 4
+  %y = load i32, i32* %p, align 4
+  %z = add i32 %x, %y
+  ret i32 %z
+}
+
+; Forwarding atomic to atomic is fine
+define i32 @test6(i32* %p) {
+; CHECK-LABEL: define i32 @test6(
+; CHECK: %x = load atomic i32, i32* %p unordered, align 4
+; CHECK: shl i32 %x, 1
+  %x = load atomic i32, i32* %p unordered, align 4
+  %y = load atomic i32, i32* %p unordered, align 4
+  %z = add i32 %x, %y
+  ret i32 %z
+}
+
+; FIXME: we currently don't do anything for monotonic
+define i32 @test7(i32* %p) {
+; CHECK-LABEL: define i32 @test7(
+; CHECK: %x = load atomic i32, i32* %p seq_cst, align 4
+; CHECK: %y = load atomic i32, i32* %p monotonic, align 4
+  %x = load atomic i32, i32* %p seq_cst, align 4
+  %y = load atomic i32, i32* %p monotonic, align 4
+  %z = add i32 %x, %y
+  ret i32 %z
+}
+
+; FIXME: We could forward in racy code
+define i32 @test8(i32* %p) {
+; CHECK-LABEL: define i32 @test8(
+; CHECK: %x = load atomic i32, i32* %p seq_cst, align 4
+; CHECK: %y = load atomic i32, i32* %p acquire, align 4
+  %x = load atomic i32, i32* %p seq_cst, align 4
+  %y = load atomic i32, i32* %p acquire, align 4
+  %z = add i32 %x, %y
+  ret i32 %z
+}
+
+; An unordered access to null is still unreachable.  There's no
+; ordering imposed.
+define i32 @test9() {
+; CHECK-LABEL: define i32 @test9(
+; CHECK: store i32 undef, i32* null
+  %x = load atomic i32, i32* null unordered, align 4
+  ret i32 %x
+}
+
+define i32 @test9_no_null_opt() #0 {
+; CHECK-LABEL: define i32 @test9_no_null_opt(
+; CHECK: load atomic i32, i32* null unordered
+  %x = load atomic i32, i32* null unordered, align 4
+  ret i32 %x
+}
+
+; FIXME: Could also fold
+define i32 @test10() {
+; CHECK-LABEL: define i32 @test10(
+; CHECK: load atomic i32, i32* null monotonic
+  %x = load atomic i32, i32* null monotonic, align 4
+  ret i32 %x
+}
+
+define i32 @test10_no_null_opt() #0 {
+; CHECK-LABEL: define i32 @test10_no_null_opt(
+; CHECK: load atomic i32, i32* null monotonic
+  %x = load atomic i32, i32* null monotonic, align 4
+  ret i32 %x
+}
+
+; Would this be legal to fold?  Probably?
+define i32 @test11() {
+; CHECK-LABEL: define i32 @test11(
+; CHECK: load atomic i32, i32* null seq_cst
+  %x = load atomic i32, i32* null seq_cst, align 4
+  ret i32 %x
+}
+
+define i32 @test11_no_null_opt() #0 {
+; CHECK-LABEL: define i32 @test11_no_null_opt(
+; CHECK: load atomic i32, i32* null seq_cst
+  %x = load atomic i32, i32* null seq_cst, align 4
+  ret i32 %x
+}
+
+; An unordered access to null is still unreachable.  There's no
+; ordering imposed.
+define i32 @test12() {
+; CHECK-LABEL: define i32 @test12(
+; CHECK: store atomic i32 undef, i32* null
+  store atomic i32 0, i32* null unordered, align 4
+  ret i32 0
+}
+
+define i32 @test12_no_null_opt() #0 {
+; CHECK-LABEL: define i32 @test12_no_null_opt(
+; CHECK: store atomic i32 0, i32* null unordered
+  store atomic i32 0, i32* null unordered, align 4
+  ret i32 0
+}
+
+; FIXME: Could also fold
+define i32 @test13() {
+; CHECK-LABEL: define i32 @test13(
+; CHECK: store atomic i32 0, i32* null monotonic
+  store atomic i32 0, i32* null monotonic, align 4
+  ret i32 0
+}
+
+define i32 @test13_no_null_opt() #0 {
+; CHECK-LABEL: define i32 @test13_no_null_opt(
+; CHECK: store atomic i32 0, i32* null monotonic
+  store atomic i32 0, i32* null monotonic, align 4
+  ret i32 0
+}
+
+; Would this be legal to fold?  Probably?
+define i32 @test14() {
+; CHECK-LABEL: define i32 @test14(
+; CHECK: store atomic i32 0, i32* null seq_cst
+  store atomic i32 0, i32* null seq_cst, align 4
+  ret i32 0
+}
+
+define i32 @test14_no_null_opt() #0 {
+; CHECK-LABEL: define i32 @test14_no_null_opt(
+; CHECK: store atomic i32 0, i32* null seq_cst
+  store atomic i32 0, i32* null seq_cst, align 4
+  ret i32 0
+}
+
+ at a = external global i32
+ at b = external global i32
+
+define i32 @test15(i1 %cnd) {
+; CHECK-LABEL: define i32 @test15(
+; CHECK: load atomic i32, i32* @a unordered, align 4
+; CHECK: load atomic i32, i32* @b unordered, align 4
+  %addr = select i1 %cnd, i32* @a, i32* @b
+  %x = load atomic i32, i32* %addr unordered, align 4
+  ret i32 %x
+}
+
+; FIXME: This would be legal to transform
+define i32 @test16(i1 %cnd) {
+; CHECK-LABEL: define i32 @test16(
+; CHECK: load atomic i32, i32* %addr monotonic, align 4
+  %addr = select i1 %cnd, i32* @a, i32* @b
+  %x = load atomic i32, i32* %addr monotonic, align 4
+  ret i32 %x
+}
+
+; FIXME: This would be legal to transform
+define i32 @test17(i1 %cnd) {
+; CHECK-LABEL: define i32 @test17(
+; CHECK: load atomic i32, i32* %addr seq_cst, align 4
+  %addr = select i1 %cnd, i32* @a, i32* @b
+  %x = load atomic i32, i32* %addr seq_cst, align 4
+  ret i32 %x
+}
+
+define i32 @test22(i1 %cnd) {
+; CHECK-LABEL: define i32 @test22(
+; CHECK: [[PHI:%.*]] = phi i32
+; CHECK: store atomic i32 [[PHI]], i32* @a unordered, align 4
+  br i1 %cnd, label %block1, label %block2
+
+block1:
+  store atomic i32 1, i32* @a unordered, align 4
+  br label %merge
+block2:
+  store atomic i32 2, i32* @a unordered, align 4
+  br label %merge
+
+merge:
+  ret i32 0
+}
+
+; TODO: probably also legal here
+define i32 @test23(i1 %cnd) {
+; CHECK-LABEL: define i32 @test23(
+; CHECK: br i1 %cnd, label %block1, label %block2
+  br i1 %cnd, label %block1, label %block2
+
+block1:
+  store atomic i32 1, i32* @a monotonic, align 4
+  br label %merge
+block2:
+  store atomic i32 2, i32* @a monotonic, align 4
+  br label %merge
+
+merge:
+  ret i32 0
+}
+
+declare void @clobber()
+
+define i32 @test18(float* %p) {
+; CHECK-LABEL: define i32 @test18(
+; CHECK: load atomic i32, i32* [[A:%.*]] unordered, align 4
+; CHECK: store atomic i32 [[B:%.*]], i32* [[C:%.*]] unordered, align 4
+  %x = load atomic float, float* %p unordered, align 4
+  call void @clobber() ;; keep the load around
+  store atomic float %x, float* %p unordered, align 4
+  ret i32 0
+}
+
+; TODO: probably also legal in this case
+define i32 @test19(float* %p) {
+; CHECK-LABEL: define i32 @test19(
+; CHECK: load atomic float, float* %p seq_cst, align 4
+; CHECK: store atomic float %x, float* %p seq_cst, align 4
+  %x = load atomic float, float* %p seq_cst, align 4
+  call void @clobber() ;; keep the load around
+  store atomic float %x, float* %p seq_cst, align 4
+  ret i32 0
+}
+
+define i32 @test20(i32** %p, i8* %v) {
+; CHECK-LABEL: define i32 @test20(
+; CHECK: store atomic i8* %v, i8** [[D:%.*]] unordered, align 4
+  %cast = bitcast i8* %v to i32*
+  store atomic i32* %cast, i32** %p unordered, align 4
+  ret i32 0
+}
+
+define i32 @test21(i32** %p, i8* %v) {
+; CHECK-LABEL: define i32 @test21(
+; CHECK: store atomic i32* %cast, i32** %p monotonic, align 4
+  %cast = bitcast i8* %v to i32*
+  store atomic i32* %cast, i32** %p monotonic, align 4
+  ret i32 0
+}
+
+define void @pr27490a(i8** %p1, i8** %p2) {
+; CHECK-LABEL: define void @pr27490
+; CHECK: %1 = bitcast i8** %p1 to i64*
+; CHECK: %l1 = load i64, i64* %1, align 8
+; CHECK: %2 = bitcast i8** %p2 to i64*
+; CHECK: store volatile i64 %l1, i64* %2, align 8
+  %l = load i8*, i8** %p1
+  store volatile i8* %l, i8** %p2
+  ret void
+}
+
+define void @pr27490b(i8** %p1, i8** %p2) {
+; CHECK-LABEL: define void @pr27490
+; CHECK: %1 = bitcast i8** %p1 to i64*
+; CHECK: %l1 = load i64, i64* %1, align 8
+; CHECK: %2 = bitcast i8** %p2 to i64*
+; CHECK: store atomic i64 %l1, i64* %2 seq_cst, align 8
+  %l = load i8*, i8** %p1
+  store atomic i8* %l, i8** %p2 seq_cst, align 8
+  ret void
+}
+
+;; At the moment, we can't form atomic vectors by folding since these are 
+;; not representable in the IR.  This was pr29121.  The right long term
+;; solution is to extend the IR to handle this case.
+define <2 x float> @no_atomic_vector_load(i64* %p) {
+; CHECK-LABEL @no_atomic_vector_load
+; CHECK: load atomic i64, i64* %p unordered, align 8
+  %load = load atomic i64, i64* %p unordered, align 8
+  %.cast = bitcast i64 %load to <2 x float>
+  ret <2 x float> %.cast
+}
+
+define void @no_atomic_vector_store(<2 x float> %p, i8* %p2) {
+; CHECK-LABEL: @no_atomic_vector_store
+; CHECK: store atomic i64 %1, i64* %2 unordered, align 8
+  %1 = bitcast <2 x float> %p to i64
+  %2 = bitcast i8* %p2 to i64*
+  store atomic i64 %1, i64* %2 unordered, align 8
+  ret void
+}
+
+attributes #0 = { "null-pointer-is-valid"="true" }

Added: llvm/trunk/test/Transforms/InstCombine/atomicrmw.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/InstCombine/atomicrmw.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/InstCombine/atomicrmw.ll (added)
+++ llvm/trunk/test/Transforms/InstCombine/atomicrmw.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,298 @@
+; RUN: opt -instcombine -S -o - %s | FileCheck %s
+; Check that we can replace `atomicrmw <op> LHS, 0` with `load atomic LHS`.
+; This is possible when:
+; - <op> LHS, 0 == LHS
+; - the ordering of atomicrmw is compatible with a load (i.e., no release semantic)
+
+; CHECK-LABEL: atomic_add_zero
+; CHECK-NEXT: %res = load atomic i32, i32* %addr monotonic, align 4
+; CHECK-NEXT: ret i32 %res
+define i32 @atomic_add_zero(i32* %addr) {
+  %res = atomicrmw add i32* %addr, i32 0 monotonic
+  ret i32 %res
+}
+
+; CHECK-LABEL: atomic_or_zero
+; CHECK-NEXT: %res = load atomic i32, i32* %addr monotonic, align 4
+; CHECK-NEXT: ret i32 %res
+define i32 @atomic_or_zero(i32* %addr) {
+  %res = atomicrmw add i32* %addr, i32 0 monotonic
+  ret i32 %res
+}
+
+; CHECK-LABEL: atomic_sub_zero
+; CHECK-NEXT: %res = load atomic i32, i32* %addr monotonic, align 4
+; CHECK-NEXT: ret i32 %res
+define i32 @atomic_sub_zero(i32* %addr) {
+  %res = atomicrmw sub i32* %addr, i32 0 monotonic
+  ret i32 %res
+}
+
+; CHECK-LABEL: atomic_and_allones
+; CHECK-NEXT: %res = load atomic i32, i32* %addr monotonic, align 4
+; CHECK-NEXT: ret i32 %res
+define i32 @atomic_and_allones(i32* %addr) {
+  %res = atomicrmw and i32* %addr, i32 -1 monotonic
+  ret i32 %res
+}
+; CHECK-LABEL: atomic_umin_uint_max
+; CHECK-NEXT: %res = load atomic i32, i32* %addr monotonic, align 4
+; CHECK-NEXT: ret i32 %res
+define i32 @atomic_umin_uint_max(i32* %addr) {
+  %res = atomicrmw umin i32* %addr, i32 -1 monotonic
+  ret i32 %res
+}
+
+; CHECK-LABEL: atomic_umax_zero
+; CHECK-NEXT: %res = load atomic i32, i32* %addr monotonic, align 4
+; CHECK-NEXT: ret i32 %res
+define i32 @atomic_umax_zero(i32* %addr) {
+  %res = atomicrmw umax i32* %addr, i32 0 monotonic
+  ret i32 %res
+}
+
+; CHECK-LABEL: atomic_min_smax_char
+; CHECK-NEXT: %res = load atomic i8, i8* %addr monotonic, align 1
+; CHECK-NEXT: ret i8 %res
+define i8 @atomic_min_smax_char(i8* %addr) {
+  %res = atomicrmw min i8* %addr, i8 127 monotonic
+  ret i8 %res
+}
+
+; CHECK-LABEL: atomic_max_smin_char
+; CHECK-NEXT: %res = load atomic i8, i8* %addr monotonic, align 1
+; CHECK-NEXT: ret i8 %res
+define i8 @atomic_max_smin_char(i8* %addr) {
+  %res = atomicrmw max i8* %addr, i8 -128 monotonic
+  ret i8 %res
+}
+
+; CHECK-LABEL: atomic_fsub
+; CHECK-NEXT: %res = load atomic float, float* %addr monotonic, align 4
+; CHECK-NEXT: ret float %res
+define float @atomic_fsub_zero(float* %addr) {
+  %res = atomicrmw fsub float* %addr, float 0.0 monotonic
+  ret float %res
+}
+
+; CHECK-LABEL: atomic_fadd
+; CHECK-NEXT: %res = load atomic float, float* %addr monotonic, align 4
+; CHECK-NEXT: ret float %res
+define float @atomic_fadd_zero(float* %addr) {
+  %res = atomicrmw fadd float* %addr, float -0.0 monotonic
+  ret float %res
+}
+
+; CHECK-LABEL: atomic_fsub_canon
+; CHECK-NEXT: %res = atomicrmw fadd float* %addr, float -0.000000e+00 release
+; CHECK-NEXT: ret float %res
+define float @atomic_fsub_canon(float* %addr) {
+  %res = atomicrmw fsub float* %addr, float 0.0 release
+  ret float %res
+}
+; CHECK-LABEL: atomic_fadd_canon
+; CHECK-NEXT: %res = atomicrmw fadd float* %addr, float -0.000000e+00 release
+; CHECK-NEXT: ret float %res
+define float @atomic_fadd_canon(float* %addr) {
+  %res = atomicrmw fadd float* %addr, float -0.0 release
+  ret float %res
+}
+
+; Can't replace a volatile w/a load; this would eliminate a volatile store.
+; CHECK-LABEL: atomic_sub_zero_volatile
+; CHECK-NEXT: %res = atomicrmw volatile sub i64* %addr, i64 0 acquire
+; CHECK-NEXT: ret i64 %res
+define i64 @atomic_sub_zero_volatile(i64* %addr) {
+  %res = atomicrmw volatile sub i64* %addr, i64 0 acquire
+  ret i64 %res
+}
+
+
+; Check that the transformation properly preserve the syncscope.
+; CHECK-LABEL: atomic_syncscope
+; CHECK-NEXT: %res = load atomic i16, i16* %addr syncscope("some_syncscope") acquire, align 2
+; CHECK-NEXT: ret i16 %res
+define i16 @atomic_syncscope(i16* %addr) {
+  %res = atomicrmw or i16* %addr, i16 0 syncscope("some_syncscope") acquire
+  ret i16 %res
+}
+
+; By eliminating the store part of the atomicrmw, we would get rid of the
+; release semantic, which is incorrect.  We can canonicalize the operation.
+; CHECK-LABEL: atomic_seq_cst
+; CHECK-NEXT: %res = atomicrmw or i16* %addr, i16 0 seq_cst
+; CHECK-NEXT: ret i16 %res
+define i16 @atomic_seq_cst(i16* %addr) {
+  %res = atomicrmw add i16* %addr, i16 0 seq_cst
+  ret i16 %res
+}
+
+; Check that the transformation does not apply when the value is changed by
+; the atomic operation (non zero constant).
+; CHECK-LABEL: atomic_add_non_zero
+; CHECK-NEXT: %res = atomicrmw add i16* %addr, i16 2 monotonic
+; CHECK-NEXT: ret i16 %res
+define i16 @atomic_add_non_zero(i16* %addr) {
+  %res = atomicrmw add i16* %addr, i16 2 monotonic
+  ret i16 %res
+}
+
+; CHECK-LABEL: atomic_xor_zero
+; CHECK-NEXT: %res = load atomic i16, i16* %addr monotonic, align 2
+; CHECK-NEXT: ret i16 %res
+define i16 @atomic_xor_zero(i16* %addr) {
+  %res = atomicrmw xor i16* %addr, i16 0 monotonic
+  ret i16 %res
+}
+
+; Check that the transformation does not apply when the ordering is
+; incompatible with a load (release).  Do canonicalize.
+; CHECK-LABEL: atomic_release
+; CHECK-NEXT: %res = atomicrmw or i16* %addr, i16 0 release
+; CHECK-NEXT: ret i16 %res
+define i16 @atomic_release(i16* %addr) {
+  %res = atomicrmw sub i16* %addr, i16 0 release
+  ret i16 %res
+}
+
+; Check that the transformation does not apply when the ordering is
+; incompatible with a load (acquire, release).  Do canonicalize.
+; CHECK-LABEL: atomic_acq_rel
+; CHECK-NEXT: %res = atomicrmw or i16* %addr, i16 0 acq_rel
+; CHECK-NEXT: ret i16 %res
+define i16 @atomic_acq_rel(i16* %addr) {
+  %res = atomicrmw xor i16* %addr, i16 0 acq_rel
+  ret i16 %res
+}
+
+
+; CHECK-LABEL: sat_or_allones
+; CHECK-NEXT: %res = atomicrmw xchg i32* %addr, i32 -1 monotonic
+; CHECK-NEXT: ret i32 %res
+define i32 @sat_or_allones(i32* %addr) {
+  %res = atomicrmw or i32* %addr, i32 -1 monotonic
+  ret i32 %res
+}
+
+; CHECK-LABEL: sat_and_zero
+; CHECK-NEXT: %res = atomicrmw xchg i32* %addr, i32 0 monotonic
+; CHECK-NEXT: ret i32 %res
+define i32 @sat_and_zero(i32* %addr) {
+  %res = atomicrmw and i32* %addr, i32 0 monotonic
+  ret i32 %res
+}
+; CHECK-LABEL: sat_umin_uint_min
+; CHECK-NEXT: %res = atomicrmw xchg i32* %addr, i32 0 monotonic
+; CHECK-NEXT: ret i32 %res
+define i32 @sat_umin_uint_min(i32* %addr) {
+  %res = atomicrmw umin i32* %addr, i32 0 monotonic
+  ret i32 %res
+}
+
+; CHECK-LABEL: sat_umax_uint_max
+; CHECK-NEXT: %res = atomicrmw xchg i32* %addr, i32 -1 monotonic
+; CHECK-NEXT: ret i32 %res
+define i32 @sat_umax_uint_max(i32* %addr) {
+  %res = atomicrmw umax i32* %addr, i32 -1 monotonic
+  ret i32 %res
+}
+
+; CHECK-LABEL: sat_min_smin_char
+; CHECK-NEXT: %res = atomicrmw xchg i8* %addr, i8 -128 monotonic
+; CHECK-NEXT: ret i8 %res
+define i8 @sat_min_smin_char(i8* %addr) {
+  %res = atomicrmw min i8* %addr, i8 -128 monotonic
+  ret i8 %res
+}
+
+; CHECK-LABEL: sat_max_smax_char
+; CHECK-NEXT: %res = atomicrmw xchg i8* %addr, i8 127 monotonic
+; CHECK-NEXT: ret i8 %res
+define i8 @sat_max_smax_char(i8* %addr) {
+  %res = atomicrmw max i8* %addr, i8 127 monotonic
+  ret i8 %res
+}
+
+; CHECK-LABEL: sat_fadd_nan
+; CHECK-NEXT: %res = atomicrmw xchg double* %addr, double 0x7FF00000FFFFFFFF release
+; CHECK-NEXT: ret double %res
+define double @sat_fadd_nan(double* %addr) {
+  %res = atomicrmw fadd double* %addr, double 0x7FF00000FFFFFFFF release
+  ret double %res
+}
+
+; CHECK-LABEL: sat_fsub_nan
+; CHECK-NEXT: %res = atomicrmw xchg double* %addr, double 0x7FF00000FFFFFFFF release
+; CHECK-NEXT: ret double %res
+define double @sat_fsub_nan(double* %addr) {
+  %res = atomicrmw fsub double* %addr, double 0x7FF00000FFFFFFFF release
+  ret double %res
+}
+
+; CHECK-LABEL: sat_fsub_nan_unused
+; CHECK-NEXT: store atomic double 0x7FF00000FFFFFFFF, double* %addr monotonic, align 8
+; CHECK-NEXT: ret void
+define void @sat_fsub_nan_unused(double* %addr) {
+  atomicrmw fsub double* %addr, double 0x7FF00000FFFFFFFF monotonic
+  ret void
+}
+
+; CHECK-LABEL: xchg_unused_monotonic
+; CHECK-NEXT: store atomic i32 0, i32* %addr monotonic, align 4
+; CHECK-NEXT: ret void
+define void @xchg_unused_monotonic(i32* %addr) {
+  atomicrmw xchg i32* %addr, i32 0 monotonic
+  ret void
+}
+
+; CHECK-LABEL: xchg_unused_release
+; CHECK-NEXT: store atomic i32 -1, i32* %addr release, align 4
+; CHECK-NEXT: ret void
+define void @xchg_unused_release(i32* %addr) {
+  atomicrmw xchg i32* %addr, i32 -1 release
+  ret void
+}
+
+; CHECK-LABEL: xchg_unused_seq_cst
+; CHECK-NEXT: atomicrmw xchg i32* %addr, i32 0 seq_cst
+; CHECK-NEXT: ret void
+define void @xchg_unused_seq_cst(i32* %addr) {
+  atomicrmw xchg i32* %addr, i32 0 seq_cst
+  ret void
+}
+
+; CHECK-LABEL: xchg_unused_volatile
+; CHECK-NEXT: atomicrmw volatile xchg i32* %addr, i32 0 monotonic
+; CHECK-NEXT: ret void
+define void @xchg_unused_volatile(i32* %addr) {
+  atomicrmw volatile xchg i32* %addr, i32 0 monotonic
+  ret void
+}
+
+; CHECK-LABEL: sat_or_allones_unused
+; CHECK-NEXT: store atomic i32 -1, i32* %addr monotonic, align 4
+; CHECK-NEXT: ret void
+define void @sat_or_allones_unused(i32* %addr) {
+  atomicrmw or i32* %addr, i32 -1 monotonic
+  ret void
+}
+
+
+; CHECK-LABEL: undef_operand_unused
+; CHECK-NEXT: atomicrmw or i32* %addr, i32 undef monotonic
+; CHECK-NEXT: ret void
+define void @undef_operand_unused(i32* %addr) {
+  atomicrmw or i32* %addr, i32 undef monotonic
+  ret void
+}
+
+; CHECK-LABEL: undef_operand_used
+; CHECK-NEXT: %res = atomicrmw or i32* %addr, i32 undef monotonic
+; CHECK-NEXT: ret i32 %res
+define i32 @undef_operand_used(i32* %addr) {
+  %res = atomicrmw or i32* %addr, i32 undef monotonic
+  ret i32 %res
+}
+
+
+

Added: llvm/trunk/test/Transforms/InstCombine/badmalloc.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/InstCombine/badmalloc.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/InstCombine/badmalloc.ll (added)
+++ llvm/trunk/test/Transforms/InstCombine/badmalloc.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,41 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
+target triple = "x86_64-apple-darwin10.0"
+
+declare noalias i8* @malloc(i64) nounwind
+declare void @free(i8*)
+
+; PR5130
+define i1 @test1() {
+  %A = call noalias i8* @malloc(i64 4) nounwind
+  %B = icmp eq i8* %A, null
+  store i8 0, i8* %A
+
+  call void @free(i8* %A)
+  ret i1 %B
+
+; CHECK-LABEL: @test1(
+; CHECK: ret i1 false
+}
+
+; CHECK-LABEL: @test2(
+define noalias i8* @test2() nounwind {
+entry:
+; CHECK: @malloc
+  %A = call noalias i8* @malloc(i64 4) nounwind
+; CHECK: icmp eq
+  %tobool = icmp eq i8* %A, null
+; CHECK: br i1
+  br i1 %tobool, label %return, label %if.end
+
+if.end:
+; CHECK: store
+  store i8 7, i8* %A
+  br label %return
+
+return:
+; CHECK: phi
+  %retval.0 = phi i8* [ %A, %if.end ], [ null, %entry ]
+  ret i8* %retval.0
+}

Added: llvm/trunk/test/Transforms/InstCombine/binop-cast.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/InstCombine/binop-cast.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/InstCombine/binop-cast.ll (added)
+++ llvm/trunk/test/Transforms/InstCombine/binop-cast.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,9 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+define i32 @testAdd(i32 %X, i32 %Y) {
+	%tmp = add i32 %X, %Y
+; CHECK: %tmp = add i32 %X, %Y
+	%tmp.l = bitcast i32 %tmp to i32
+	ret i32 %tmp.l
+; CHECK: ret i32 %tmp
+}

Added: llvm/trunk/test/Transforms/InstCombine/bit-checks.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/InstCombine/bit-checks.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/InstCombine/bit-checks.ll (added)
+++ llvm/trunk/test/Transforms/InstCombine/bit-checks.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,647 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+define i32 @main1(i32 %argc) {
+; CHECK-LABEL: @main1(
+; CHECK-NEXT:    [[TMP1:%.*]] = and i32 %argc, 3
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp eq i32 [[TMP1]], 3
+; CHECK-NEXT:    [[RETVAL_0:%.*]] = select i1 [[TMP2]], i32 2, i32 1
+; CHECK-NEXT:    ret i32 [[RETVAL_0]]
+;
+  %and = and i32 %argc, 1
+  %tobool = icmp ne i32 %and, 0
+  %and2 = and i32 %argc, 2
+  %tobool3 = icmp ne i32 %and2, 0
+  %or.cond = and i1 %tobool, %tobool3
+  %retval.0 = select i1 %or.cond, i32 2, i32 1
+  ret i32 %retval.0
+}
+
+define i32 @main2(i32 %argc) {
+; CHECK-LABEL: @main2(
+; CHECK-NEXT:    [[TMP1:%.*]] = and i32 %argc, 3
+; CHECK-NEXT:    [[NOT_:%.*]] = icmp eq i32 [[TMP1]], 3
+; CHECK-NEXT:    [[STOREMERGE:%.*]] = zext i1 [[NOT_]] to i32
+; CHECK-NEXT:    ret i32 [[STOREMERGE]]
+;
+  %and = and i32 %argc, 1
+  %tobool = icmp eq i32 %and, 0
+  %and2 = and i32 %argc, 2
+  %tobool3 = icmp eq i32 %and2, 0
+  %or.cond = or i1 %tobool, %tobool3
+  %storemerge = select i1 %or.cond, i32 0, i32 1
+  ret i32 %storemerge
+}
+
+; tests to check combining (icmp eq (A & B), C) & (icmp eq (A & D), E)
+; tests to check if (icmp eq (A & B), 0) is treated like (icmp eq (A & B), B)
+; if B is a single bit constant
+
+; (icmp eq (A & B), 0) & (icmp eq (A & D), 0) -> (icmp eq (A & (B|D)), 0)
+define i32 @main3(i32 %argc) {
+; CHECK-LABEL: @main3(
+; CHECK-NEXT:    [[TMP1:%.*]] = and i32 %argc, 55
+; CHECK-NEXT:    [[NOT_:%.*]] = icmp ne i32 [[TMP1]], 0
+; CHECK-NEXT:    [[STOREMERGE:%.*]] = zext i1 [[NOT_]] to i32
+; CHECK-NEXT:    ret i32 [[STOREMERGE]]
+;
+  %and = and i32 %argc, 7
+  %tobool = icmp eq i32 %and, 0
+  %and2 = and i32 %argc, 48
+  %tobool3 = icmp eq i32 %and2, 0
+  %and.cond = and i1 %tobool, %tobool3
+  %storemerge = select i1 %and.cond, i32 0, i32 1
+  ret i32 %storemerge
+}
+
+define i32 @main3b(i32 %argc) {
+; CHECK-LABEL: @main3b(
+; CHECK-NEXT:    [[TMP1:%.*]] = and i32 %argc, 23
+; CHECK-NEXT:    [[NOT_:%.*]] = icmp ne i32 [[TMP1]], 0
+; CHECK-NEXT:    [[STOREMERGE:%.*]] = zext i1 [[NOT_]] to i32
+; CHECK-NEXT:    ret i32 [[STOREMERGE]]
+;
+  %and = and i32 %argc, 7
+  %tobool = icmp eq i32 %and, 0
+  %and2 = and i32 %argc, 16
+  %tobool3 = icmp ne i32 %and2, 16
+  %and.cond = and i1 %tobool, %tobool3
+  %storemerge = select i1 %and.cond, i32 0, i32 1
+  ret i32 %storemerge
+}
+
+define i32 @main3e_like(i32 %argc, i32 %argc2, i32 %argc3) {
+; CHECK-LABEL: @main3e_like(
+; CHECK-NEXT:    [[TMP1:%.*]] = or i32 %argc2, %argc3
+; CHECK-NEXT:    [[TMP2:%.*]] = and i32 [[TMP1]], %argc
+; CHECK-NEXT:    [[NOT_:%.*]] = icmp ne i32 [[TMP2]], 0
+; CHECK-NEXT:    [[STOREMERGE:%.*]] = zext i1 [[NOT_]] to i32
+; CHECK-NEXT:    ret i32 [[STOREMERGE]]
+;
+  %and = and i32 %argc, %argc2
+  %tobool = icmp eq i32 %and, 0
+  %and2 = and i32 %argc, %argc3
+  %tobool3 = icmp eq i32 %and2, 0
+  %and.cond = and i1 %tobool, %tobool3
+  %storemerge = select i1 %and.cond, i32 0, i32 1
+  ret i32 %storemerge
+}
+
+; (icmp ne (A & B), 0) | (icmp ne (A & D), 0) -> (icmp ne (A & (B|D)), 0)
+define i32 @main3c(i32 %argc) {
+; CHECK-LABEL: @main3c(
+; CHECK-NEXT:    [[TMP1:%.*]] = and i32 %argc, 55
+; CHECK-NEXT:    [[NOT_:%.*]] = icmp eq i32 [[TMP1]], 0
+; CHECK-NEXT:    [[STOREMERGE:%.*]] = zext i1 [[NOT_]] to i32
+; CHECK-NEXT:    ret i32 [[STOREMERGE]]
+;
+  %and = and i32 %argc, 7
+  %tobool = icmp ne i32 %and, 0
+  %and2 = and i32 %argc, 48
+  %tobool3 = icmp ne i32 %and2, 0
+  %or.cond = or i1 %tobool, %tobool3
+  %storemerge = select i1 %or.cond, i32 0, i32 1
+  ret i32 %storemerge
+}
+
+define i32 @main3d(i32 %argc) {
+; CHECK-LABEL: @main3d(
+; CHECK-NEXT:    [[TMP1:%.*]] = and i32 %argc, 23
+; CHECK-NEXT:    [[NOT_:%.*]] = icmp eq i32 [[TMP1]], 0
+; CHECK-NEXT:    [[STOREMERGE:%.*]] = zext i1 [[NOT_]] to i32
+; CHECK-NEXT:    ret i32 [[STOREMERGE]]
+;
+  %and = and i32 %argc, 7
+  %tobool = icmp ne i32 %and, 0
+  %and2 = and i32 %argc, 16
+  %tobool3 = icmp eq i32 %and2, 16
+  %or.cond = or i1 %tobool, %tobool3
+  %storemerge = select i1 %or.cond, i32 0, i32 1
+  ret i32 %storemerge
+}
+
+define i32 @main3f_like(i32 %argc, i32 %argc2, i32 %argc3) {
+; CHECK-LABEL: @main3f_like(
+; CHECK-NEXT:    [[TMP1:%.*]] = or i32 %argc2, %argc3
+; CHECK-NEXT:    [[TMP2:%.*]] = and i32 [[TMP1]], %argc
+; CHECK-NEXT:    [[NOT_:%.*]] = icmp eq i32 [[TMP2]], 0
+; CHECK-NEXT:    [[STOREMERGE:%.*]] = zext i1 [[NOT_]] to i32
+; CHECK-NEXT:    ret i32 [[STOREMERGE]]
+;
+  %and = and i32 %argc, %argc2
+  %tobool = icmp ne i32 %and, 0
+  %and2 = and i32 %argc, %argc3
+  %tobool3 = icmp ne i32 %and2, 0
+  %or.cond = or i1 %tobool, %tobool3
+  %storemerge = select i1 %or.cond, i32 0, i32 1
+  ret i32 %storemerge
+}
+
+; (icmp eq (A & B), B) & (icmp eq (A & D), D) -> (icmp eq (A & (B|D)), (B|D))
+define i32 @main4(i32 %argc) {
+; CHECK-LABEL: @main4(
+; CHECK-NEXT:    [[TMP1:%.*]] = and i32 %argc, 55
+; CHECK-NEXT:    [[NOT_:%.*]] = icmp ne i32 [[TMP1]], 55
+; CHECK-NEXT:    [[STOREMERGE:%.*]] = zext i1 [[NOT_]] to i32
+; CHECK-NEXT:    ret i32 [[STOREMERGE]]
+;
+  %and = and i32 %argc, 7
+  %tobool = icmp eq i32 %and, 7
+  %and2 = and i32 %argc, 48
+  %tobool3 = icmp eq i32 %and2, 48
+  %and.cond = and i1 %tobool, %tobool3
+  %storemerge = select i1 %and.cond, i32 0, i32 1
+  ret i32 %storemerge
+}
+
+define i32 @main4b(i32 %argc) {
+; CHECK-LABEL: @main4b(
+; CHECK-NEXT:    [[TMP1:%.*]] = and i32 %argc, 23
+; CHECK-NEXT:    [[NOT_:%.*]] = icmp ne i32 [[TMP1]], 23
+; CHECK-NEXT:    [[STOREMERGE:%.*]] = zext i1 [[NOT_]] to i32
+; CHECK-NEXT:    ret i32 [[STOREMERGE]]
+;
+  %and = and i32 %argc, 7
+  %tobool = icmp eq i32 %and, 7
+  %and2 = and i32 %argc, 16
+  %tobool3 = icmp ne i32 %and2, 0
+  %and.cond = and i1 %tobool, %tobool3
+  %storemerge = select i1 %and.cond, i32 0, i32 1
+  ret i32 %storemerge
+}
+
+define i32 @main4e_like(i32 %argc, i32 %argc2, i32 %argc3) {
+; CHECK-LABEL: @main4e_like(
+; CHECK-NEXT:    [[TMP1:%.*]] = or i32 %argc2, %argc3
+; CHECK-NEXT:    [[TMP2:%.*]] = and i32 [[TMP1]], %argc
+; CHECK-NEXT:    [[NOT_:%.*]] = icmp ne i32 [[TMP2]], [[TMP1]]
+; CHECK-NEXT:    [[STOREMERGE:%.*]] = zext i1 [[NOT_]] to i32
+; CHECK-NEXT:    ret i32 [[STOREMERGE]]
+;
+  %and = and i32 %argc, %argc2
+  %tobool = icmp eq i32 %and, %argc2
+  %and2 = and i32 %argc, %argc3
+  %tobool3 = icmp eq i32 %and2, %argc3
+  %and.cond = and i1 %tobool, %tobool3
+  %storemerge = select i1 %and.cond, i32 0, i32 1
+  ret i32 %storemerge
+}
+
+; (icmp ne (A & B), B) | (icmp ne (A & D), D) -> (icmp ne (A & (B|D)), (B|D))
+define i32 @main4c(i32 %argc) {
+; CHECK-LABEL: @main4c(
+; CHECK-NEXT:    [[TMP1:%.*]] = and i32 %argc, 55
+; CHECK-NEXT:    [[NOT_:%.*]] = icmp eq i32 [[TMP1]], 55
+; CHECK-NEXT:    [[STOREMERGE:%.*]] = zext i1 [[NOT_]] to i32
+; CHECK-NEXT:    ret i32 [[STOREMERGE]]
+;
+  %and = and i32 %argc, 7
+  %tobool = icmp ne i32 %and, 7
+  %and2 = and i32 %argc, 48
+  %tobool3 = icmp ne i32 %and2, 48
+  %or.cond = or i1 %tobool, %tobool3
+  %storemerge = select i1 %or.cond, i32 0, i32 1
+  ret i32 %storemerge
+}
+
+define i32 @main4d(i32 %argc) {
+; CHECK-LABEL: @main4d(
+; CHECK-NEXT:    [[TMP1:%.*]] = and i32 %argc, 23
+; CHECK-NEXT:    [[NOT_:%.*]] = icmp eq i32 [[TMP1]], 23
+; CHECK-NEXT:    [[STOREMERGE:%.*]] = zext i1 [[NOT_]] to i32
+; CHECK-NEXT:    ret i32 [[STOREMERGE]]
+;
+  %and = and i32 %argc, 7
+  %tobool = icmp ne i32 %and, 7
+  %and2 = and i32 %argc, 16
+  %tobool3 = icmp eq i32 %and2, 0
+  %or.cond = or i1 %tobool, %tobool3
+  %storemerge = select i1 %or.cond, i32 0, i32 1
+  ret i32 %storemerge
+}
+
+define i32 @main4f_like(i32 %argc, i32 %argc2, i32 %argc3) {
+; CHECK-LABEL: @main4f_like(
+; CHECK-NEXT:    [[TMP1:%.*]] = or i32 %argc2, %argc3
+; CHECK-NEXT:    [[TMP2:%.*]] = and i32 [[TMP1]], %argc
+; CHECK-NEXT:    [[NOT_:%.*]] = icmp eq i32 [[TMP2]], [[TMP1]]
+; CHECK-NEXT:    [[STOREMERGE:%.*]] = zext i1 [[NOT_]] to i32
+; CHECK-NEXT:    ret i32 [[STOREMERGE]]
+;
+  %and = and i32 %argc, %argc2
+  %tobool = icmp ne i32 %and, %argc2
+  %and2 = and i32 %argc, %argc3
+  %tobool3 = icmp ne i32 %and2, %argc3
+  %or.cond = or i1 %tobool, %tobool3
+  %storemerge = select i1 %or.cond, i32 0, i32 1
+  ret i32 %storemerge
+}
+
+; (icmp eq (A & B), A) & (icmp eq (A & D), A) -> (icmp eq (A & (B&D)), A)
+define i32 @main5_like(i32 %argc, i32 %argc2) {
+; CHECK-LABEL: @main5_like(
+; CHECK-NEXT:    [[TMP1:%.*]] = and i32 %argc, %argc2
+; CHECK-NEXT:    [[TMP2:%.*]] = and i32 [[TMP1]], 7
+; CHECK-NEXT:    [[NOT_:%.*]] = icmp ne i32 [[TMP2]], 7
+; CHECK-NEXT:    [[STOREMERGE:%.*]] = zext i1 [[NOT_]] to i32
+; CHECK-NEXT:    ret i32 [[STOREMERGE]]
+;
+  %and = and i32 %argc, 7
+  %tobool = icmp eq i32 %and, 7
+  %and2 = and i32 %argc2, 7
+  %tobool3 = icmp eq i32 %and2, 7
+  %and.cond = and i1 %tobool, %tobool3
+  %storemerge = select i1 %and.cond, i32 0, i32 1
+  ret i32 %storemerge
+}
+
+define i32 @main5e_like(i32 %argc, i32 %argc2, i32 %argc3) {
+; CHECK-LABEL: @main5e_like(
+; CHECK-NEXT:    [[TMP1:%.*]] = and i32 %argc2, %argc3
+; CHECK-NEXT:    [[TMP2:%.*]] = and i32 [[TMP1]], %argc
+; CHECK-NEXT:    [[NOT_:%.*]] = icmp ne i32 [[TMP2]], %argc
+; CHECK-NEXT:    [[STOREMERGE:%.*]] = zext i1 [[NOT_]] to i32
+; CHECK-NEXT:    ret i32 [[STOREMERGE]]
+;
+  %and = and i32 %argc, %argc2
+  %tobool = icmp eq i32 %and, %argc
+  %and2 = and i32 %argc, %argc3
+  %tobool3 = icmp eq i32 %and2, %argc
+  %and.cond = and i1 %tobool, %tobool3
+  %storemerge = select i1 %and.cond, i32 0, i32 1
+  ret i32 %storemerge
+}
+
+; (icmp ne (A & B), A) | (icmp ne (A & D), A) -> (icmp ne (A & (B&D)), A)
+define i32 @main5c_like(i32 %argc, i32 %argc2) {
+; CHECK-LABEL: @main5c_like(
+; CHECK-NEXT:    [[TMP1:%.*]] = and i32 %argc, %argc2
+; CHECK-NEXT:    [[TMP2:%.*]] = and i32 [[TMP1]], 7
+; CHECK-NEXT:    [[NOT_:%.*]] = icmp eq i32 [[TMP2]], 7
+; CHECK-NEXT:    [[STOREMERGE:%.*]] = zext i1 [[NOT_]] to i32
+; CHECK-NEXT:    ret i32 [[STOREMERGE]]
+;
+  %and = and i32 %argc, 7
+  %tobool = icmp ne i32 %and, 7
+  %and2 = and i32 %argc2, 7
+  %tobool3 = icmp ne i32 %and2, 7
+  %or.cond = or i1 %tobool, %tobool3
+  %storemerge = select i1 %or.cond, i32 0, i32 1
+  ret i32 %storemerge
+}
+
+define i32 @main5f_like(i32 %argc, i32 %argc2, i32 %argc3) {
+; CHECK-LABEL: @main5f_like(
+; CHECK-NEXT:    [[TMP1:%.*]] = and i32 %argc2, %argc3
+; CHECK-NEXT:    [[TMP2:%.*]] = and i32 [[TMP1]], %argc
+; CHECK-NEXT:    [[NOT_:%.*]] = icmp eq i32 [[TMP2]], %argc
+; CHECK-NEXT:    [[STOREMERGE:%.*]] = zext i1 [[NOT_]] to i32
+; CHECK-NEXT:    ret i32 [[STOREMERGE]]
+;
+  %and = and i32 %argc, %argc2
+  %tobool = icmp ne i32 %and, %argc
+  %and2 = and i32 %argc, %argc3
+  %tobool3 = icmp ne i32 %and2, %argc
+  %or.cond = or i1 %tobool, %tobool3
+  %storemerge = select i1 %or.cond, i32 0, i32 1
+  ret i32 %storemerge
+}
+
+; (icmp eq (A & B), C) & (icmp eq (A & D), E) -> (icmp eq (A & (B|D)), (C|E))
+; if B, C, D, E are constant, and it's possible
+define i32 @main6(i32 %argc) {
+; CHECK-LABEL: @main6(
+; CHECK-NEXT:    [[TMP1:%.*]] = and i32 %argc, 55
+; CHECK-NEXT:    [[NOT_:%.*]] = icmp ne i32 [[TMP1]], 19
+; CHECK-NEXT:    [[STOREMERGE:%.*]] = zext i1 [[NOT_]] to i32
+; CHECK-NEXT:    ret i32 [[STOREMERGE]]
+;
+  %and = and i32 %argc, 7
+  %tobool = icmp eq i32 %and, 3
+  %and2 = and i32 %argc, 48
+  %tobool3 = icmp eq i32 %and2, 16
+  %and.cond = and i1 %tobool, %tobool3
+  %storemerge = select i1 %and.cond, i32 0, i32 1
+  ret i32 %storemerge
+}
+
+define i32 @main6b(i32 %argc) {
+; CHECK-LABEL: @main6b(
+; CHECK-NEXT:    [[TMP1:%.*]] = and i32 %argc, 23
+; CHECK-NEXT:    [[NOT_:%.*]] = icmp ne i32 [[TMP1]], 19
+; CHECK-NEXT:    [[STOREMERGE:%.*]] = zext i1 [[NOT_]] to i32
+; CHECK-NEXT:    ret i32 [[STOREMERGE]]
+;
+  %and = and i32 %argc, 7
+  %tobool = icmp eq i32 %and, 3
+  %and2 = and i32 %argc, 16
+  %tobool3 = icmp ne i32 %and2, 0
+  %and.cond = and i1 %tobool, %tobool3
+  %storemerge = select i1 %and.cond, i32 0, i32 1
+  ret i32 %storemerge
+}
+
+; (icmp ne (A & B), C) | (icmp ne (A & D), E) -> (icmp ne (A & (B|D)), (C|E))
+; if B, C, D, E are constant, and it's possible
+define i32 @main6c(i32 %argc) {
+; CHECK-LABEL: @main6c(
+; CHECK-NEXT:    [[TMP1:%.*]] = and i32 %argc, 55
+; CHECK-NEXT:    [[NOT_:%.*]] = icmp eq i32 [[TMP1]], 19
+; CHECK-NEXT:    [[STOREMERGE:%.*]] = zext i1 [[NOT_]] to i32
+; CHECK-NEXT:    ret i32 [[STOREMERGE]]
+;
+  %and = and i32 %argc, 7
+  %tobool = icmp ne i32 %and, 3
+  %and2 = and i32 %argc, 48
+  %tobool3 = icmp ne i32 %and2, 16
+  %or.cond = or i1 %tobool, %tobool3
+  %storemerge = select i1 %or.cond, i32 0, i32 1
+  ret i32 %storemerge
+}
+
+define i32 @main6d(i32 %argc) {
+; CHECK-LABEL: @main6d(
+; CHECK-NEXT:    [[TMP1:%.*]] = and i32 %argc, 23
+; CHECK-NEXT:    [[NOT_:%.*]] = icmp eq i32 [[TMP1]], 19
+; CHECK-NEXT:    [[STOREMERGE:%.*]] = zext i1 [[NOT_]] to i32
+; CHECK-NEXT:    ret i32 [[STOREMERGE]]
+;
+  %and = and i32 %argc, 7
+  %tobool = icmp ne i32 %and, 3
+  %and2 = and i32 %argc, 16
+  %tobool3 = icmp eq i32 %and2, 0
+  %or.cond = or i1 %tobool, %tobool3
+  %storemerge = select i1 %or.cond, i32 0, i32 1
+  ret i32 %storemerge
+}
+
+; test parameter permutations
+; (B & A) == B & (D & A) == D
+define i32 @main7a(i32 %argc, i32 %argc2, i32 %argc3) {
+; CHECK-LABEL: @main7a(
+; CHECK-NEXT:    [[TMP1:%.*]] = or i32 %argc2, %argc3
+; CHECK-NEXT:    [[TMP2:%.*]] = and i32 [[TMP1]], %argc
+; CHECK-NEXT:    [[NOT_:%.*]] = icmp ne i32 [[TMP2]], [[TMP1]]
+; CHECK-NEXT:    [[STOREMERGE:%.*]] = zext i1 [[NOT_]] to i32
+; CHECK-NEXT:    ret i32 [[STOREMERGE]]
+;
+  %and1 = and i32 %argc2, %argc
+  %tobool = icmp eq i32 %and1, %argc2
+  %and2 = and i32 %argc3, %argc
+  %tobool3 = icmp eq i32 %and2, %argc3
+  %and.cond = and i1 %tobool, %tobool3
+  %storemerge = select i1 %and.cond, i32 0, i32 1
+  ret i32 %storemerge
+}
+
+; B == (A & B) & D == (A & D)
+define i32 @main7b(i32 %argc, i32 %argc2, i32 %argc3) {
+; CHECK-LABEL: @main7b(
+; CHECK-NEXT:    [[TMP1:%.*]] = or i32 %argc2, %argc3
+; CHECK-NEXT:    [[TMP2:%.*]] = and i32 [[TMP1]], %argc
+; CHECK-NEXT:    [[NOT_:%.*]] = icmp ne i32 [[TMP2]], [[TMP1]]
+; CHECK-NEXT:    [[STOREMERGE:%.*]] = zext i1 [[NOT_]] to i32
+; CHECK-NEXT:    ret i32 [[STOREMERGE]]
+;
+  %and1 = and i32 %argc, %argc2
+  %tobool = icmp eq i32 %argc2, %and1
+  %and2 = and i32 %argc, %argc3
+  %tobool3 = icmp eq i32 %argc3, %and2
+  %and.cond = and i1 %tobool, %tobool3
+  %storemerge = select i1 %and.cond, i32 0, i32 1
+  ret i32 %storemerge
+}
+
+; B == (B & A) & D == (D & A)
+define i32 @main7c(i32 %argc, i32 %argc2, i32 %argc3) {
+; CHECK-LABEL: @main7c(
+; CHECK-NEXT:    [[TMP1:%.*]] = or i32 %argc2, %argc3
+; CHECK-NEXT:    [[TMP2:%.*]] = and i32 [[TMP1]], %argc
+; CHECK-NEXT:    [[NOT_:%.*]] = icmp ne i32 [[TMP2]], [[TMP1]]
+; CHECK-NEXT:    [[STOREMERGE:%.*]] = zext i1 [[NOT_]] to i32
+; CHECK-NEXT:    ret i32 [[STOREMERGE]]
+;
+  %and1 = and i32 %argc2, %argc
+  %tobool = icmp eq i32 %argc2, %and1
+  %and2 = and i32 %argc3, %argc
+  %tobool3 = icmp eq i32 %argc3, %and2
+  %and.cond = and i1 %tobool, %tobool3
+  %storemerge = select i1 %and.cond, i32 0, i32 1
+  ret i32 %storemerge
+}
+
+; (A & (B & C)) == (B & C) & (A & (D & E)) == (D & E)
+define i32 @main7d(i32 %argc, i32 %argc2, i32 %argc3, i32 %argc4, i32 %argc5) {
+; CHECK-LABEL: @main7d(
+; CHECK-NEXT:    [[BC:%.*]] = and i32 %argc2, %argc4
+; CHECK-NEXT:    [[DE:%.*]] = and i32 %argc3, %argc5
+; CHECK-NEXT:    [[TMP1:%.*]] = or i32 [[BC]], [[DE]]
+; CHECK-NEXT:    [[TMP2:%.*]] = and i32 [[TMP1]], %argc
+; CHECK-NEXT:    [[NOT_:%.*]] = icmp ne i32 [[TMP2]], [[TMP1]]
+; CHECK-NEXT:    [[STOREMERGE:%.*]] = zext i1 [[NOT_]] to i32
+; CHECK-NEXT:    ret i32 [[STOREMERGE]]
+;
+  %bc = and i32 %argc2, %argc4
+  %de = and i32 %argc3, %argc5
+  %and1 = and i32 %argc, %bc
+  %tobool = icmp eq i32 %and1, %bc
+  %and2 = and i32 %argc, %de
+  %tobool3 = icmp eq i32 %and2, %de
+  %and.cond = and i1 %tobool, %tobool3
+  %storemerge = select i1 %and.cond, i32 0, i32 1
+  ret i32 %storemerge
+}
+
+; ((B & C) & A) == (B & C) & ((D & E) & A) == (D & E)
+define i32 @main7e(i32 %argc, i32 %argc2, i32 %argc3, i32 %argc4, i32 %argc5) {
+; CHECK-LABEL: @main7e(
+; CHECK-NEXT:    [[BC:%.*]] = and i32 %argc2, %argc4
+; CHECK-NEXT:    [[DE:%.*]] = and i32 %argc3, %argc5
+; CHECK-NEXT:    [[TMP1:%.*]] = or i32 [[BC]], [[DE]]
+; CHECK-NEXT:    [[TMP2:%.*]] = and i32 [[TMP1]], %argc
+; CHECK-NEXT:    [[NOT_:%.*]] = icmp ne i32 [[TMP2]], [[TMP1]]
+; CHECK-NEXT:    [[STOREMERGE:%.*]] = zext i1 [[NOT_]] to i32
+; CHECK-NEXT:    ret i32 [[STOREMERGE]]
+;
+  %bc = and i32 %argc2, %argc4
+  %de = and i32 %argc3, %argc5
+  %and1 = and i32 %bc, %argc
+  %tobool = icmp eq i32 %and1, %bc
+  %and2 = and i32 %de, %argc
+  %tobool3 = icmp eq i32 %and2, %de
+  %and.cond = and i1 %tobool, %tobool3
+  %storemerge = select i1 %and.cond, i32 0, i32 1
+  ret i32 %storemerge
+}
+
+; (B & C) == (A & (B & C)) & (D & E) == (A & (D & E))
+define i32 @main7f(i32 %argc, i32 %argc2, i32 %argc3, i32 %argc4, i32 %argc5) {
+; CHECK-LABEL: @main7f(
+; CHECK-NEXT:    [[BC:%.*]] = and i32 %argc2, %argc4
+; CHECK-NEXT:    [[DE:%.*]] = and i32 %argc3, %argc5
+; CHECK-NEXT:    [[TMP1:%.*]] = or i32 [[BC]], [[DE]]
+; CHECK-NEXT:    [[TMP2:%.*]] = and i32 [[TMP1]], %argc
+; CHECK-NEXT:    [[NOT_:%.*]] = icmp ne i32 [[TMP2]], [[TMP1]]
+; CHECK-NEXT:    [[STOREMERGE:%.*]] = zext i1 [[NOT_]] to i32
+; CHECK-NEXT:    ret i32 [[STOREMERGE]]
+;
+  %bc = and i32 %argc2, %argc4
+  %de = and i32 %argc3, %argc5
+  %and1 = and i32 %argc, %bc
+  %tobool = icmp eq i32 %bc, %and1
+  %and2 = and i32 %argc, %de
+  %tobool3 = icmp eq i32 %de, %and2
+  %and.cond = and i1 %tobool, %tobool3
+  %storemerge = select i1 %and.cond, i32 0, i32 1
+  ret i32 %storemerge
+}
+
+; (B & C) == ((B & C) & A) & (D & E) == ((D & E) & A)
+define i32 @main7g(i32 %argc, i32 %argc2, i32 %argc3, i32 %argc4, i32 %argc5) {
+; CHECK-LABEL: @main7g(
+; CHECK-NEXT:    [[BC:%.*]] = and i32 %argc2, %argc4
+; CHECK-NEXT:    [[DE:%.*]] = and i32 %argc3, %argc5
+; CHECK-NEXT:    [[TMP1:%.*]] = or i32 [[BC]], [[DE]]
+; CHECK-NEXT:    [[TMP2:%.*]] = and i32 [[TMP1]], %argc
+; CHECK-NEXT:    [[NOT_:%.*]] = icmp ne i32 [[TMP2]], [[TMP1]]
+; CHECK-NEXT:    [[STOREMERGE:%.*]] = zext i1 [[NOT_]] to i32
+; CHECK-NEXT:    ret i32 [[STOREMERGE]]
+;
+  %bc = and i32 %argc2, %argc4
+  %de = and i32 %argc3, %argc5
+  %and1 = and i32 %bc, %argc
+  %tobool = icmp eq i32 %bc, %and1
+  %and2 = and i32 %de, %argc
+  %tobool3 = icmp eq i32 %de, %and2
+  %and.cond = and i1 %tobool, %tobool3
+  %storemerge = select i1 %and.cond, i32 0, i32 1
+  ret i32 %storemerge
+}
+
+define i32 @main8(i32 %argc) {
+; CHECK-LABEL: @main8(
+; CHECK-NEXT:    [[TMP1:%.*]] = and i32 [[ARGC:%.*]], 192
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp eq i32 [[TMP1]], 0
+; CHECK-NEXT:    [[RETVAL_0:%.*]] = select i1 [[TMP2]], i32 1, i32 2
+; CHECK-NEXT:    ret i32 [[RETVAL_0]]
+;
+  %and = and i32 %argc, 64
+  %tobool = icmp ne i32 %and, 0
+  %trunc2 = trunc i32 %argc to i8
+  %tobool3 = icmp slt i8 %trunc2, 0
+  %or.cond = or i1 %tobool, %tobool3
+  %retval.0 = select i1 %or.cond, i32 2, i32 1
+  ret i32 %retval.0
+}
+
+define i32 @main9(i32 %argc) {
+; CHECK-LABEL: @main9(
+; CHECK-NEXT:    [[TMP1:%.*]] = and i32 [[ARGC:%.*]], 192
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp eq i32 [[TMP1]], 192
+; CHECK-NEXT:    [[RETVAL_0:%.*]] = select i1 [[TMP2]], i32 2, i32 1
+; CHECK-NEXT:    ret i32 [[RETVAL_0]]
+;
+  %and = and i32 %argc, 64
+  %tobool = icmp ne i32 %and, 0
+  %trunc2 = trunc i32 %argc to i8
+  %tobool3 = icmp slt i8 %trunc2, 0
+  %or.cond = and i1 %tobool, %tobool3
+  %retval.0 = select i1 %or.cond, i32 2, i32 1
+  ret i32 %retval.0
+}
+
+define i32 @main10(i32 %argc) {
+; CHECK-LABEL: @main10(
+; CHECK-NEXT:    [[TMP1:%.*]] = and i32 [[ARGC:%.*]], 192
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp eq i32 [[TMP1]], 0
+; CHECK-NEXT:    [[RETVAL_0:%.*]] = select i1 [[TMP2]], i32 2, i32 1
+; CHECK-NEXT:    ret i32 [[RETVAL_0]]
+;
+  %and = and i32 %argc, 64
+  %tobool = icmp eq i32 %and, 0
+  %trunc2 = trunc i32 %argc to i8
+  %tobool3 = icmp sge i8 %trunc2, 0
+  %or.cond = and i1 %tobool, %tobool3
+  %retval.0 = select i1 %or.cond, i32 2, i32 1
+  ret i32 %retval.0
+}
+
+define i32 @main11(i32 %argc) {
+; CHECK-LABEL: @main11(
+; CHECK-NEXT:    [[TMP1:%.*]] = and i32 [[ARGC:%.*]], 192
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp eq i32 [[TMP1]], 192
+; CHECK-NEXT:    [[RETVAL_0:%.*]] = select i1 [[TMP2]], i32 1, i32 2
+; CHECK-NEXT:    ret i32 [[RETVAL_0]]
+;
+  %and = and i32 %argc, 64
+  %tobool = icmp eq i32 %and, 0
+  %trunc2 = trunc i32 %argc to i8
+  %tobool3 = icmp sge i8 %trunc2, 0
+  %or.cond = or i1 %tobool, %tobool3
+  %retval.0 = select i1 %or.cond, i32 2, i32 1
+  ret i32 %retval.0
+}
+
+define i32 @main12(i32 %argc) {
+; CHECK-LABEL: @main12(
+; CHECK-NEXT:    [[TMP1:%.*]] = and i32 [[ARGC:%.*]], 32896
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp eq i32 [[TMP1]], 0
+; CHECK-NEXT:    [[RETVAL_0:%.*]] = select i1 [[TMP2]], i32 1, i32 2
+; CHECK-NEXT:    ret i32 [[RETVAL_0]]
+;
+  %trunc = trunc i32 %argc to i16
+  %tobool = icmp slt i16 %trunc, 0
+  %trunc2 = trunc i32 %argc to i8
+  %tobool3 = icmp slt i8 %trunc2, 0
+  %or.cond = or i1 %tobool, %tobool3
+  %retval.0 = select i1 %or.cond, i32 2, i32 1
+  ret i32 %retval.0
+}
+
+define i32 @main13(i32 %argc) {
+; CHECK-LABEL: @main13(
+; CHECK-NEXT:    [[TMP1:%.*]] = and i32 [[ARGC:%.*]], 32896
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp eq i32 [[TMP1]], 32896
+; CHECK-NEXT:    [[RETVAL_0:%.*]] = select i1 [[TMP2]], i32 2, i32 1
+; CHECK-NEXT:    ret i32 [[RETVAL_0]]
+;
+  %trunc = trunc i32 %argc to i16
+  %tobool = icmp slt i16 %trunc, 0
+  %trunc2 = trunc i32 %argc to i8
+  %tobool3 = icmp slt i8 %trunc2, 0
+  %or.cond = and i1 %tobool, %tobool3
+  %retval.0 = select i1 %or.cond, i32 2, i32 1
+  ret i32 %retval.0
+}
+
+define i32 @main14(i32 %argc) {
+; CHECK-LABEL: @main14(
+; CHECK-NEXT:    [[TMP1:%.*]] = and i32 [[ARGC:%.*]], 32896
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp eq i32 [[TMP1]], 0
+; CHECK-NEXT:    [[RETVAL_0:%.*]] = select i1 [[TMP2]], i32 2, i32 1
+; CHECK-NEXT:    ret i32 [[RETVAL_0]]
+;
+  %trunc = trunc i32 %argc to i16
+  %tobool = icmp sge i16 %trunc, 0
+  %trunc2 = trunc i32 %argc to i8
+  %tobool3 = icmp sge i8 %trunc2, 0
+  %or.cond = and i1 %tobool, %tobool3
+  %retval.0 = select i1 %or.cond, i32 2, i32 1
+  ret i32 %retval.0
+}
+
+define i32 @main15(i32 %argc) {
+; CHECK-LABEL: @main15(
+; CHECK-NEXT:    [[TMP1:%.*]] = and i32 [[ARGC:%.*]], 32896
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp eq i32 [[TMP1]], 32896
+; CHECK-NEXT:    [[RETVAL_0:%.*]] = select i1 [[TMP2]], i32 1, i32 2
+; CHECK-NEXT:    ret i32 [[RETVAL_0]]
+;
+  %trunc = trunc i32 %argc to i16
+  %tobool = icmp sge i16 %trunc, 0
+  %trunc2 = trunc i32 %argc to i8
+  %tobool3 = icmp sge i8 %trunc2, 0
+  %or.cond = or i1 %tobool, %tobool3
+  %retval.0 = select i1 %or.cond, i32 2, i32 1
+  ret i32 %retval.0
+}

Added: llvm/trunk/test/Transforms/InstCombine/bitcast-alias-function.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/InstCombine/bitcast-alias-function.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/InstCombine/bitcast-alias-function.ll (added)
+++ llvm/trunk/test/Transforms/InstCombine/bitcast-alias-function.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,239 @@
+; RUN: opt -S -instcombine -o - %s | FileCheck %s
+target datalayout = "e-p:32:32:32-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v24:32:32-v32:32:32-v64:64:64-v128:128:128-a0:0:64"
+
+
+
+; Cases that should be bitcast
+
+; Test cast between scalars with same bit sizes
+ at alias_i32_to_f32 = alias float (float), bitcast (i32 (i32)* @func_i32 to float (float)*)
+
+; Test cast between vectors with same number of elements and bit sizes
+ at alias_v2i32_to_v2f32 = alias <2 x float> (<2 x float>), bitcast (<2 x i32> (<2 x i32>)* @func_v2i32 to <2 x float> (<2 x float>)*)
+
+; Test cast from vector to scalar with same number of bits
+ at alias_v2f32_to_i64 = alias <2 x float> (<2 x float>), bitcast (i64 (i64)* @func_i64 to <2 x float> (<2 x float>)*)
+
+; Test cast from scalar to vector with same number of bits
+ at alias_i64_to_v2f32 = alias i64 (i64), bitcast (<2 x float> (<2 x float>)* @func_v2f32 to i64 (i64)*)
+
+; Test cast between vectors of pointers
+ at alias_v2i32p_to_v2i64p = alias <2 x i64*> (<2 x i64*>), bitcast (<2 x i32*> (<2 x i32*>)* @func_v2i32p to <2 x i64*> (<2 x i64*>)*)
+
+
+; Cases that should be invalid and unchanged
+
+; Test cast between scalars with different bit sizes
+ at alias_i64_to_f32 = alias float (float), bitcast (i64 (i64)* @func_i64 to float (float)*)
+
+; Test cast between vectors with different bit sizes but the
+; same number of elements
+ at alias_v2i64_to_v2f32 = alias <2 x float> (<2 x float>), bitcast (<2 x i64> (<2 x i64>)* @func_v2i64 to <2 x float> (<2 x float>)*)
+
+; Test cast between vectors with same number of bits and different
+; numbers of elements
+ at alias_v2i32_to_v4f32 = alias <4 x float> (<4 x float>), bitcast (<2 x i32> (<2 x i32>)* @func_v2i32 to <4 x float> (<4 x float>)*)
+
+; Test cast between scalar and vector with different number of bits
+ at alias_i64_to_v4f32 = alias i64 (i64), bitcast (<4 x float> (<4 x float>)* @func_v4f32 to i64 (i64)*)
+
+; Test cast between vector and scalar with different number of bits
+ at alias_v4f32_to_i64 = alias <4 x float> (<4 x float>), bitcast (i64 (i64)* @func_i64 to <4 x float> (<4 x float>)*)
+
+; Test cast from scalar to vector of pointers with same number of bits
+; We don't know the pointer size at this point, so this can't be done
+ at alias_i64_to_v2i32p = alias i64 (i64), bitcast (<2 x i32*> (<2 x i32*>)* @func_v2i32p to i64 (i64)*)
+
+; Test cast between vector of pointers and scalar with different number of bits
+ at alias_v4i32p_to_i64 = alias <4 x i32*> (<4 x i32*>), bitcast (i64 (i64)* @func_i64 to <4 x i32*> (<4 x i32*>)*)
+
+
+
+define internal <2 x i32> @func_v2i32(<2 x i32> %v) noinline nounwind {
+entry:
+  ret <2 x i32> %v
+}
+
+define internal <2 x float> @func_v2f32(<2 x float> %v) noinline nounwind {
+entry:
+  ret <2 x float> %v
+}
+
+define internal <4 x float> @func_v4f32(<4 x float> %v) noinline nounwind {
+entry:
+  ret <4 x float> %v
+}
+
+define internal i32 @func_i32(i32 %v) noinline nounwind {
+entry:
+  ret i32 %v
+}
+
+define internal i64 @func_i64(i64 %v) noinline nounwind {
+entry:
+  ret i64 %v
+}
+
+define internal <2 x i64> @func_v2i64(<2 x i64> %v) noinline nounwind {
+entry:
+  ret <2 x i64> %v
+}
+
+define internal <2 x i32*> @func_v2i32p(<2 x i32*> %v) noinline nounwind {
+entry:
+  ret <2 x i32*> %v
+}
+
+; Valid cases, only bitcast for argument / return type and call underlying function
+
+; Sizes match, should only bitcast
+define void @bitcast_alias_scalar(float* noalias %source, float* noalias %dest) nounwind {
+entry:
+; CHECK-LABEL: @bitcast_alias_scalar
+; CHECK: bitcast float* %source to i32*
+; CHECK: load i32, i32*
+; CHECK-NOT: fptoui
+; CHECK-NOT: uitofp
+; CHECK: bitcast float* %dest to i32*
+; CHECK: store i32
+  %tmp = load float, float* %source, align 8
+  %call = call float @alias_i32_to_f32(float %tmp) nounwind
+  store float %call, float* %dest, align 8
+  ret void
+}
+
+; Sizes match, should only bitcast
+define void @bitcast_alias_vector(<2 x float>* noalias %source, <2 x float>* noalias %dest) nounwind {
+entry:
+; CHECK-LABEL: @bitcast_alias_vector
+; CHECK: bitcast <2 x float>* %source to <2 x i32>*
+; CHECK: load <2 x i32>, <2 x i32>*
+; CHECK-NOT: fptoui
+; CHECK-NOT: uitofp
+; CHECK: bitcast <2 x float>* %dest to <2 x i32>*
+; CHECK: store <2 x i32>
+  %tmp = load <2 x float>, <2 x float>* %source, align 8
+  %call = call <2 x float> @alias_v2i32_to_v2f32(<2 x float> %tmp) nounwind
+  store <2 x float> %call, <2 x float>* %dest, align 8
+  ret void
+}
+
+; Sizes match, should only bitcast
+define void @bitcast_alias_vector_scalar_same_size(<2 x float>* noalias %source, <2 x float>* noalias %dest) nounwind {
+entry:
+; CHECK-LABEL: @bitcast_alias_vector_scalar_same_size
+; CHECK: bitcast <2 x float>* %source to i64*
+; CHECK: load i64, i64*
+; CHECK: %call = call i64 @func_i64
+; CHECK: bitcast <2 x float>* %dest to i64*
+; CHECK: store i64
+  %tmp = load <2 x float>, <2 x float>* %source, align 8
+  %call = call <2 x float> @alias_v2f32_to_i64(<2 x float> %tmp) nounwind
+  store <2 x float> %call, <2 x float>* %dest, align 8
+  ret void
+}
+
+define void @bitcast_alias_scalar_vector_same_size(i64* noalias %source, i64* noalias %dest) nounwind {
+entry:
+; CHECK-LABEL: @bitcast_alias_scalar_vector_same_size
+; CHECK: bitcast i64* %source to <2 x float>*
+; CHECK: load <2 x float>, <2 x float>*
+; CHECK: call <2 x float> @func_v2f32
+; CHECK: bitcast i64* %dest to <2 x float>*
+; CHECK: store <2 x float>
+  %tmp = load i64, i64* %source, align 8
+  %call = call i64 @alias_i64_to_v2f32(i64 %tmp) nounwind
+  store i64 %call, i64* %dest, align 8
+  ret void
+}
+
+define void @bitcast_alias_vector_ptrs_same_size(<2 x i64*>* noalias %source, <2 x i64*>* noalias %dest) nounwind {
+entry:
+; CHECK-LABEL: @bitcast_alias_vector_ptrs_same_size
+; CHECK: bitcast <2 x i64*>* %source to <2 x i32*>*
+; CHECK: load <2 x i32*>, <2 x i32*>*
+; CHECK: call <2 x i32*> @func_v2i32p
+; CHECK: bitcast <2 x i64*>* %dest to <2 x i32*>*
+; CHECK: store <2 x i32*>
+  %tmp = load <2 x i64*>, <2 x i64*>* %source, align 8
+  %call = call <2 x i64*> @alias_v2i32p_to_v2i64p(<2 x i64*> %tmp) nounwind
+  store <2 x i64*> %call, <2 x i64*>* %dest, align 8
+  ret void
+}
+
+; Invalid cases:
+
+define void @bitcast_alias_mismatch_scalar_size(float* noalias %source, float* noalias %dest) nounwind {
+entry:
+; CHECK-LABEL: @bitcast_alias_mismatch_scalar_size
+; CHECK-NOT: fptoui
+; CHECK: @alias_i64_to_f32
+; CHECK-NOT: uitofp
+  %tmp = load float, float* %source, align 8
+  %call = call float @alias_i64_to_f32(float %tmp) nounwind
+  store float %call, float* %dest, align 8
+  ret void
+}
+
+define void @bitcast_alias_mismatch_vector_element_and_bit_size(<2 x float>* noalias %source, <2 x float>* noalias %dest) nounwind {
+entry:
+; CHECK-LABEL: @bitcast_alias_mismatch_vector_element_and_bit_size
+; CHECK-NOT: fptoui <2 x float> %tmp to <2 x i64>
+; CHECK: @alias_v2i64_to_v2f32
+; CHECK-NOT: uitofp <2 x i64> %call to <2 x float>
+  %tmp = load <2 x float>, <2 x float>* %source, align 8
+  %call = call <2 x float> @alias_v2i64_to_v2f32(<2 x float> %tmp) nounwind
+  store <2 x float> %call, <2 x float>* %dest, align 8
+  ret void
+}
+
+define void @bitcast_alias_vector_mismatched_number_elements(<4 x float>* noalias %source, <4 x float>* noalias %dest) nounwind {
+entry:
+; CHECK-LABEL: @bitcast_alias_vector_mismatched_number_elements
+; CHECK:  %call = call <4 x float> @alias_v2i32_to_v4f32
+  %tmp = load <4 x float>, <4 x float>* %source, align 8
+  %call = call <4 x float> @alias_v2i32_to_v4f32(<4 x float> %tmp) nounwind
+  store <4 x float> %call, <4 x float>* %dest, align 8
+  ret void
+}
+
+define void @bitcast_alias_vector_scalar_mismatched_bit_size(<4 x float>* noalias %source, <4 x float>* noalias %dest) nounwind {
+entry:
+; CHECK-LABEL: @bitcast_alias_vector_scalar_mismatched_bit_size
+; CHECK:  %call = call <4 x float> @alias_v4f32_to_i64
+  %tmp = load <4 x float>, <4 x float>* %source, align 8
+  %call = call <4 x float> @alias_v4f32_to_i64(<4 x float> %tmp) nounwind
+  store <4 x float> %call, <4 x float>* %dest, align 8
+  ret void
+}
+
+define void @bitcast_alias_vector_ptrs_scalar_mismatched_bit_size(<4 x i32*>* noalias %source, <4 x i32*>* noalias %dest) nounwind {
+entry:
+; CHECK-LABEL: @bitcast_alias_vector_ptrs_scalar_mismatched_bit_size
+; CHECK: @alias_v4i32p_to_i64
+  %tmp = load <4 x i32*>, <4 x i32*>* %source, align 8
+  %call = call <4 x i32*> @alias_v4i32p_to_i64(<4 x i32*> %tmp) nounwind
+  store <4 x i32*> %call, <4 x i32*>* %dest, align 8
+  ret void
+}
+
+define void @bitcast_alias_scalar_vector_ptrs_same_size(i64* noalias %source, i64* noalias %dest) nounwind {
+entry:
+; CHECK-LABEL: @bitcast_alias_scalar_vector_ptrs_same_size
+; CHECK: @alias_i64_to_v2i32p
+  %tmp = load i64, i64* %source, align 8
+  %call = call i64 @alias_i64_to_v2i32p(i64 %tmp) nounwind
+  store i64 %call, i64* %dest, align 8
+  ret void
+}
+
+define void @bitcast_alias_scalar_vector_mismatched_bit_size(i64* noalias %source, i64* noalias %dest) nounwind {
+entry:
+; CHECK-LABEL: @bitcast_alias_scalar_vector_mismatched_bit_size
+; CHECK: call i64 @alias_i64_to_v4f32
+  %tmp = load i64, i64* %source, align 8
+  %call = call i64 @alias_i64_to_v4f32(i64 %tmp) nounwind
+  store i64 %call, i64* %dest, align 8
+  ret void
+}
+

Added: llvm/trunk/test/Transforms/InstCombine/bitcast-bigendian.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/InstCombine/bitcast-bigendian.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/InstCombine/bitcast-bigendian.ll (added)
+++ llvm/trunk/test/Transforms/InstCombine/bitcast-bigendian.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,133 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+; These tests are extracted from bitcast.ll.
+; Verify that they also work correctly on big-endian targets.
+
+define float @test2(<2 x float> %A, <2 x i32> %B) {
+; CHECK-LABEL: @test2(
+; CHECK-NEXT:    [[TMP24:%.*]] = extractelement <2 x float> [[A:%.*]], i32 1
+; CHECK-NEXT:    [[BC:%.*]] = bitcast <2 x i32> [[B:%.*]] to <2 x float>
+; CHECK-NEXT:    [[TMP4:%.*]] = extractelement <2 x float> [[BC]], i32 1
+; CHECK-NEXT:    [[ADD:%.*]] = fadd float [[TMP24]], [[TMP4]]
+; CHECK-NEXT:    ret float [[ADD]]
+;
+  %tmp28 = bitcast <2 x float> %A to i64
+  %tmp23 = trunc i64 %tmp28 to i32
+  %tmp24 = bitcast i32 %tmp23 to float
+
+  %tmp = bitcast <2 x i32> %B to i64
+  %tmp2 = trunc i64 %tmp to i32
+  %tmp4 = bitcast i32 %tmp2 to float
+
+  %add = fadd float %tmp24, %tmp4
+  ret float %add
+}
+
+define float @test3(<2 x float> %A, <2 x i64> %B) {
+; CHECK-LABEL: @test3(
+; CHECK-NEXT:    [[TMP24:%.*]] = extractelement <2 x float> [[A:%.*]], i32 0
+; CHECK-NEXT:    [[BC2:%.*]] = bitcast <2 x i64> [[B:%.*]] to <4 x float>
+; CHECK-NEXT:    [[TMP4:%.*]] = extractelement <4 x float> [[BC2]], i32 1
+; CHECK-NEXT:    [[ADD:%.*]] = fadd float [[TMP24]], [[TMP4]]
+; CHECK-NEXT:    ret float [[ADD]]
+;
+  %tmp28 = bitcast <2 x float> %A to i64
+  %tmp29 = lshr i64 %tmp28, 32
+  %tmp23 = trunc i64 %tmp29 to i32
+  %tmp24 = bitcast i32 %tmp23 to float
+
+  %tmp = bitcast <2 x i64> %B to i128
+  %tmp1 = lshr i128 %tmp, 64
+  %tmp2 = trunc i128 %tmp1 to i32
+  %tmp4 = bitcast i32 %tmp2 to float
+
+  %add = fadd float %tmp24, %tmp4
+  ret float %add
+}
+
+define <2 x i32> @test4(i32 %A, i32 %B){
+; CHECK-LABEL: @test4(
+; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <2 x i32> undef, i32 [[B:%.*]], i32 0
+; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <2 x i32> [[TMP1]], i32 [[A:%.*]], i32 1
+; CHECK-NEXT:    ret <2 x i32> [[TMP2]]
+;
+  %tmp38 = zext i32 %A to i64
+  %tmp32 = zext i32 %B to i64
+  %tmp33 = shl i64 %tmp32, 32
+  %ins35 = or i64 %tmp33, %tmp38
+  %tmp43 = bitcast i64 %ins35 to <2 x i32>
+  ret <2 x i32> %tmp43
+}
+
+define <2 x float> @test5(float %A, float %B) {
+; CHECK-LABEL: @test5(
+; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <2 x float> undef, float [[B:%.*]], i32 0
+; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <2 x float> [[TMP1]], float [[A:%.*]], i32 1
+; CHECK-NEXT:    ret <2 x float> [[TMP2]]
+;
+  %tmp37 = bitcast float %A to i32
+  %tmp38 = zext i32 %tmp37 to i64
+  %tmp31 = bitcast float %B to i32
+  %tmp32 = zext i32 %tmp31 to i64
+  %tmp33 = shl i64 %tmp32, 32
+  %ins35 = or i64 %tmp33, %tmp38
+  %tmp43 = bitcast i64 %ins35 to <2 x float>
+  ret <2 x float> %tmp43
+}
+
+define <2 x float> @test6(float %A){
+; CHECK-LABEL: @test6(
+; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <2 x float> <float undef, float 4.200000e+01>, float [[A:%.*]], i32 0
+; CHECK-NEXT:    ret <2 x float> [[TMP1]]
+;
+  %tmp23 = bitcast float %A to i32
+  %tmp24 = zext i32 %tmp23 to i64
+  %tmp25 = shl i64 %tmp24, 32
+  %mask20 = or i64 %tmp25, 1109917696
+  %tmp35 = bitcast i64 %mask20 to <2 x float>
+  ret <2 x float> %tmp35
+}
+
+; No change. Bitcasts are canonicalized above bitwise logic.
+
+define <2 x i32> @xor_bitcast_vec_to_vec(<1 x i64> %a) {
+; CHECK-LABEL: @xor_bitcast_vec_to_vec(
+; CHECK-NEXT:    [[T1:%.*]] = bitcast <1 x i64> [[A:%.*]] to <2 x i32>
+; CHECK-NEXT:    [[T2:%.*]] = xor <2 x i32> [[T1]], <i32 1, i32 2>
+; CHECK-NEXT:    ret <2 x i32> [[T2]]
+;
+  %t1 = bitcast <1 x i64> %a to <2 x i32>
+  %t2 = xor <2 x i32> <i32 1, i32 2>, %t1
+  ret <2 x i32> %t2
+}
+
+; No change. Bitcasts are canonicalized above bitwise logic.
+
+define i64 @and_bitcast_vec_to_int(<2 x i32> %a) {
+; CHECK-LABEL: @and_bitcast_vec_to_int(
+; CHECK-NEXT:    [[T1:%.*]] = bitcast <2 x i32> [[A:%.*]] to i64
+; CHECK-NEXT:    [[T2:%.*]] = and i64 [[T1]], 3
+; CHECK-NEXT:    ret i64 [[T2]]
+;
+  %t1 = bitcast <2 x i32> %a to i64
+  %t2 = and i64 %t1, 3
+  ret i64 %t2
+}
+
+; No change. Bitcasts are canonicalized above bitwise logic.
+
+define <2 x i32> @or_bitcast_int_to_vec(i64 %a) {
+; CHECK-LABEL: @or_bitcast_int_to_vec(
+; CHECK-NEXT:    [[T1:%.*]] = bitcast i64 [[A:%.*]] to <2 x i32>
+; CHECK-NEXT:    [[T2:%.*]] = or <2 x i32> [[T1]], <i32 1, i32 2>
+; CHECK-NEXT:    ret <2 x i32> [[T2]]
+;
+  %t1 = bitcast i64 %a to <2 x i32>
+  %t2 = or <2 x i32> %t1, <i32 1, i32 2>
+  ret <2 x i32> %t2
+}
+

Added: llvm/trunk/test/Transforms/InstCombine/bitcast-bitcast.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/InstCombine/bitcast-bitcast.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/InstCombine/bitcast-bitcast.ll (added)
+++ llvm/trunk/test/Transforms/InstCombine/bitcast-bitcast.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,84 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+; Check all scalar / vector combinations for a pair of bitcasts.
+
+define ppc_fp128 @bitcast_bitcast_s_s_s(i128 %a) {
+  %bc1 = bitcast i128 %a to fp128
+  %bc2 = bitcast fp128 %bc1 to ppc_fp128
+  ret ppc_fp128 %bc2
+
+; CHECK-LABEL: @bitcast_bitcast_s_s_s(
+; CHECK-NEXT:  %bc2 = bitcast i128 %a to ppc_fp128
+; CHECK-NEXT:  ret ppc_fp128 %bc2
+}
+
+define <2 x i32> @bitcast_bitcast_s_s_v(i64 %a) {
+  %bc1 = bitcast i64 %a to double
+  %bc2 = bitcast double %bc1 to <2 x i32>
+  ret <2 x i32> %bc2
+
+; CHECK-LABEL: @bitcast_bitcast_s_s_v(
+; CHECK-NEXT:  %bc2 = bitcast i64 %a to <2 x i32>
+; CHECK-NEXT:  ret <2 x i32> %bc2
+}
+
+define double @bitcast_bitcast_s_v_s(i64 %a) {
+  %bc1 = bitcast i64 %a to <2 x i32>
+  %bc2 = bitcast <2 x i32> %bc1 to double
+  ret double %bc2
+
+; CHECK-LABEL: @bitcast_bitcast_s_v_s(
+; CHECK-NEXT:  %bc2 = bitcast i64 %a to double
+; CHECK-NEXT:  ret double %bc2
+}
+
+define <2 x i32> @bitcast_bitcast_s_v_v(i64 %a) {
+  %bc1 = bitcast i64 %a to <4 x i16>
+  %bc2 = bitcast <4 x i16> %bc1 to <2 x i32>
+  ret <2 x i32> %bc2
+
+; CHECK-LABEL: @bitcast_bitcast_s_v_v(
+; CHECK-NEXT:  %bc2 = bitcast i64 %a to <2 x i32>
+; CHECK-NEXT:  ret <2 x i32> %bc2
+}
+
+define i64 @bitcast_bitcast_v_s_s(<2 x i32> %a) {
+  %bc1 = bitcast <2 x i32> %a to double
+  %bc2 = bitcast double %bc1 to i64
+  ret i64 %bc2
+
+; CHECK-LABEL: @bitcast_bitcast_v_s_s(
+; CHECK-NEXT:  %bc2 = bitcast <2 x i32> %a to i64
+; CHECK-NEXT:  ret i64 %bc2
+}
+
+define <4 x i16> @bitcast_bitcast_v_s_v(<2 x i32> %a) {
+  %bc1 = bitcast <2 x i32> %a to double
+  %bc2 = bitcast double %bc1 to <4 x i16>
+  ret <4 x i16> %bc2
+
+; CHECK-LABEL: @bitcast_bitcast_v_s_v(
+; CHECK-NEXT:  %bc2 = bitcast <2 x i32> %a to <4 x i16>
+; CHECK-NEXT:  ret <4 x i16> %bc2
+}
+
+define double @bitcast_bitcast_v_v_s(<2 x float> %a) {
+  %bc1 = bitcast <2 x float> %a to <4 x i16>
+  %bc2 = bitcast <4 x i16> %bc1 to double
+  ret double %bc2
+
+; CHECK-LABEL: @bitcast_bitcast_v_v_s(
+; CHECK-NEXT:  %bc2 = bitcast <2 x float> %a to double
+; CHECK-NEXT:  ret double %bc2
+}
+
+define <2 x i32> @bitcast_bitcast_v_v_v(<2 x float> %a) {
+  %bc1 = bitcast <2 x float> %a to <4 x i16>
+  %bc2 = bitcast <4 x i16> %bc1 to <2 x i32>
+  ret <2 x i32> %bc2
+
+; CHECK-LABEL: @bitcast_bitcast_v_v_v(
+; CHECK-NEXT:  %bc2 = bitcast <2 x float> %a to <2 x i32>
+; CHECK-NEXT:  ret <2 x i32> %bc2
+}
+

Added: llvm/trunk/test/Transforms/InstCombine/bitcast-sext-vector.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/InstCombine/bitcast-sext-vector.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/InstCombine/bitcast-sext-vector.ll (added)
+++ llvm/trunk/test/Transforms/InstCombine/bitcast-sext-vector.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,11 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+; CHECK: sext
+; Don't fold zero/sign extensions with a bitcast between a vector and scalar.
+
+define i32 @t(<4 x i8> %src1, <4 x i8> %src2) nounwind readonly {
+entry:
+	%cmp = icmp eq <4 x i8> %src1, %src2; <<4 x i1>> [#uses=1]
+	%sext = sext <4 x i1> %cmp to <4 x i8>
+	%val = bitcast <4 x i8> %sext to i32
+	ret i32 %val
+}

Added: llvm/trunk/test/Transforms/InstCombine/bitcast-store.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/InstCombine/bitcast-store.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/InstCombine/bitcast-store.ll (added)
+++ llvm/trunk/test/Transforms/InstCombine/bitcast-store.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,51 @@
+; RUN: opt -S -instcombine < %s | FileCheck %s
+
+; Instcombine should preserve metadata and alignment while
+; folding a bitcast into a store.
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+
+%struct.A = type { i32 (...)** }
+
+ at G = external constant [5 x i8*]
+
+; CHECK-LABEL: @foo
+; CHECK: store i32 %x, i32* %{{.*}}, align 16, !noalias !0, !llvm.access.group !1
+define void @foo(i32 %x, float* %p) nounwind {
+entry:
+  %x.cast = bitcast i32 %x to float
+  store float %x.cast, float* %p, align 16, !noalias !0, !llvm.access.group !1
+  ret void
+}
+
+; Check instcombine doesn't try and fold the following bitcast into the store.
+; This transformation would not be safe since we would need to use addrspacecast
+; and addrspacecast is not guaranteed to be a no-op cast.
+
+; CHECK-LABEL: @bar
+; CHECK: %cast = bitcast i8** %b to i8 addrspace(1)**
+; CHECK: store i8 addrspace(1)* %a, i8 addrspace(1)** %cast
+define void @bar(i8 addrspace(1)* %a, i8** %b) nounwind {
+entry:
+  %cast = bitcast i8** %b to i8 addrspace(1)**
+  store i8 addrspace(1)* %a, i8 addrspace(1)** %cast
+  ret void
+}
+
+; Check that we don't combine the bitcast into the store. This would create a
+; bitcast of the swifterror which is invalid.
+
+; CHECK-LABEL; @swifterror_store
+; CHECK: bitcast i64
+; CHECK: store %swift.error
+
+%swift.error = type opaque
+define void @swifterror_store(i64* %x, %swift.error** swifterror %err) {
+entry:
+  %casted = bitcast i64* %x to %swift.error*
+  store %swift.error* %casted, %swift.error** %err
+  ret void
+}
+
+!0 = !{!0}
+!1 = !{}
\ No newline at end of file

Added: llvm/trunk/test/Transforms/InstCombine/bitcast-vec-canon.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/InstCombine/bitcast-vec-canon.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/InstCombine/bitcast-vec-canon.ll (added)
+++ llvm/trunk/test/Transforms/InstCombine/bitcast-vec-canon.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,40 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+define double @a(<1 x i64> %y) {
+; CHECK-LABEL: @a(
+; CHECK-NEXT:    [[BC:%.*]] = bitcast <1 x i64> %y to <1 x double>
+; CHECK-NEXT:    [[C:%.*]] = extractelement <1 x double> [[BC]], i32 0
+; CHECK-NEXT:    ret double [[C]]
+;
+  %c = bitcast <1 x i64> %y to double
+  ret double %c
+}
+
+define i64 @b(<1 x i64> %y) {
+; CHECK-LABEL: @b(
+; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <1 x i64> %y, i32 0
+; CHECK-NEXT:    ret i64 [[TMP1]]
+;
+  %c = bitcast <1 x i64> %y to i64
+  ret i64 %c
+}
+
+define <1 x i64> @c(double %y) {
+; CHECK-LABEL: @c(
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast double %y to i64
+; CHECK-NEXT:    [[C:%.*]] = insertelement <1 x i64> undef, i64 [[TMP1]], i32 0
+; CHECK-NEXT:    ret <1 x i64> [[C]]
+;
+  %c = bitcast double %y to <1 x i64>
+  ret <1 x i64> %c
+}
+
+define <1 x i64> @d(i64 %y) {
+; CHECK-LABEL: @d(
+; CHECK-NEXT:    [[C:%.*]] = insertelement <1 x i64> undef, i64 %y, i32 0
+; CHECK-NEXT:    ret <1 x i64> [[C]]
+;
+  %c = bitcast i64 %y to <1 x i64>
+  ret <1 x i64> %c
+}
+

Added: llvm/trunk/test/Transforms/InstCombine/bitcast.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/InstCombine/bitcast.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/InstCombine/bitcast.ll (added)
+++ llvm/trunk/test/Transforms/InstCombine/bitcast.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,563 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-apple-darwin10.0.0"
+
+; Bitcasts between vectors and scalars are valid.
+; PR4487
+define i32 @test1(i64 %a) {
+; CHECK-LABEL: @test1(
+; CHECK-NEXT:    ret i32 0
+;
+  %t1 = bitcast i64 %a to <2 x i32>
+  %t2 = bitcast i64 %a to <2 x i32>
+  %t3 = xor <2 x i32> %t1, %t2
+  %t4 = extractelement <2 x i32> %t3, i32 0
+  ret i32 %t4
+}
+
+; Perform the bitwise logic in the source type of the operands to eliminate bitcasts.
+
+define <2 x i32> @xor_two_vector_bitcasts(<1 x i64> %a, <1 x i64> %b) {
+; CHECK-LABEL: @xor_two_vector_bitcasts(
+; CHECK-NEXT:    [[T31:%.*]] = xor <1 x i64> [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    [[T3:%.*]] = bitcast <1 x i64> [[T31]] to <2 x i32>
+; CHECK-NEXT:    ret <2 x i32> [[T3]]
+;
+  %t1 = bitcast <1 x i64> %a to <2 x i32>
+  %t2 = bitcast <1 x i64> %b to <2 x i32>
+  %t3 = xor <2 x i32> %t1, %t2
+  ret <2 x i32> %t3
+}
+
+; No change. Bitcasts are canonicalized above bitwise logic.
+
+define <2 x i32> @xor_bitcast_vec_to_vec(<1 x i64> %a) {
+; CHECK-LABEL: @xor_bitcast_vec_to_vec(
+; CHECK-NEXT:    [[T1:%.*]] = bitcast <1 x i64> [[A:%.*]] to <2 x i32>
+; CHECK-NEXT:    [[T2:%.*]] = xor <2 x i32> [[T1]], <i32 1, i32 2>
+; CHECK-NEXT:    ret <2 x i32> [[T2]]
+;
+  %t1 = bitcast <1 x i64> %a to <2 x i32>
+  %t2 = xor <2 x i32> <i32 1, i32 2>, %t1
+  ret <2 x i32> %t2
+}
+
+; No change. Bitcasts are canonicalized above bitwise logic.
+
+define i64 @and_bitcast_vec_to_int(<2 x i32> %a) {
+; CHECK-LABEL: @and_bitcast_vec_to_int(
+; CHECK-NEXT:    [[T1:%.*]] = bitcast <2 x i32> [[A:%.*]] to i64
+; CHECK-NEXT:    [[T2:%.*]] = and i64 [[T1]], 3
+; CHECK-NEXT:    ret i64 [[T2]]
+;
+  %t1 = bitcast <2 x i32> %a to i64
+  %t2 = and i64 %t1, 3
+  ret i64 %t2
+}
+
+; No change. Bitcasts are canonicalized above bitwise logic.
+
+define <2 x i32> @or_bitcast_int_to_vec(i64 %a) {
+; CHECK-LABEL: @or_bitcast_int_to_vec(
+; CHECK-NEXT:    [[T1:%.*]] = bitcast i64 [[A:%.*]] to <2 x i32>
+; CHECK-NEXT:    [[T2:%.*]] = or <2 x i32> [[T1]], <i32 1, i32 2>
+; CHECK-NEXT:    ret <2 x i32> [[T2]]
+;
+  %t1 = bitcast i64 %a to <2 x i32>
+  %t2 = or <2 x i32> %t1, <i32 1, i32 2>
+  ret <2 x i32> %t2
+}
+
+; PR26702 - https://bugs.llvm.org//show_bug.cgi?id=26702
+; Bitcast is canonicalized above logic, so we can see the not-not pattern.
+
+define <2 x i64> @is_negative(<4 x i32> %x) {
+; CHECK-LABEL: @is_negative(
+; CHECK-NEXT:    [[LOBIT:%.*]] = ashr <4 x i32> %x, <i32 31, i32 31, i32 31, i32 31>
+; CHECK-NEXT:    [[NOTNOT:%.*]] = bitcast <4 x i32> [[LOBIT]] to <2 x i64>
+; CHECK-NEXT:    ret <2 x i64> [[NOTNOT]]
+;
+  %lobit = ashr <4 x i32> %x, <i32 31, i32 31, i32 31, i32 31>
+  %not = xor <4 x i32> %lobit, <i32 -1, i32 -1, i32 -1, i32 -1>
+  %bc = bitcast <4 x i32> %not to <2 x i64>
+  %notnot = xor <2 x i64> %bc, <i64 -1, i64 -1>
+  ret <2 x i64> %notnot
+}
+
+; This variation has an extra bitcast at the end. This means that the 2nd xor
+; can be done in <4 x i32> to eliminate a bitcast regardless of canonicalizaion.
+
+define <4 x i32> @is_negative_bonus_bitcast(<4 x i32> %x) {
+; CHECK-LABEL: @is_negative_bonus_bitcast(
+; CHECK-NEXT:    [[LOBIT:%.*]] = ashr <4 x i32> %x, <i32 31, i32 31, i32 31, i32 31>
+; CHECK-NEXT:    ret <4 x i32> [[LOBIT]]
+;
+  %lobit = ashr <4 x i32> %x, <i32 31, i32 31, i32 31, i32 31>
+  %not = xor <4 x i32> %lobit, <i32 -1, i32 -1, i32 -1, i32 -1>
+  %bc = bitcast <4 x i32> %not to <2 x i64>
+  %notnot = xor <2 x i64> %bc, <i64 -1, i64 -1>
+  %bc2 = bitcast <2 x i64> %notnot to <4 x i32>
+  ret <4 x i32> %bc2
+}
+
+; Bitcasts are canonicalized above bitwise logic.
+
+define <2 x i8> @canonicalize_bitcast_logic_with_constant(<4 x i4> %x) {
+; CHECK-LABEL: @canonicalize_bitcast_logic_with_constant(
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i4> [[X:%.*]] to <2 x i8>
+; CHECK-NEXT:    [[B:%.*]] = and <2 x i8> [[TMP1]], <i8 -128, i8 -128>
+; CHECK-NEXT:    ret <2 x i8> [[B]]
+;
+  %a = and <4 x i4> %x, <i4 0, i4 8, i4 0, i4 8>
+  %b = bitcast <4 x i4> %a to <2 x i8>
+  ret <2 x i8> %b
+}
+
+; PR27925 - https://llvm.org/bugs/show_bug.cgi?id=27925
+
+define <4 x i32> @bitcasts_and_bitcast(<4 x i32> %a, <8 x i16> %b) {
+; CHECK-LABEL: @bitcasts_and_bitcast(
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <8 x i16> [[B:%.*]] to <4 x i32>
+; CHECK-NEXT:    [[BC3:%.*]] = and <4 x i32> [[TMP1]], [[A:%.*]]
+; CHECK-NEXT:    ret <4 x i32> [[BC3]]
+;
+  %bc1 = bitcast <4 x i32> %a to <2 x i64>
+  %bc2 = bitcast <8 x i16> %b to <2 x i64>
+  %and = and <2 x i64> %bc2, %bc1
+  %bc3 = bitcast <2 x i64> %and to <4 x i32>
+  ret <4 x i32> %bc3
+}
+
+; The destination must have an integer element type.
+; FIXME: We can still eliminate one bitcast in this test by doing the logic op
+; in the type of the input that has an integer element type.
+
+define <4 x float> @bitcasts_and_bitcast_to_fp(<4 x float> %a, <8 x i16> %b) {
+; CHECK-LABEL: @bitcasts_and_bitcast_to_fp(
+; CHECK-NEXT:    [[BC1:%.*]] = bitcast <4 x float> [[A:%.*]] to <2 x i64>
+; CHECK-NEXT:    [[BC2:%.*]] = bitcast <8 x i16> [[B:%.*]] to <2 x i64>
+; CHECK-NEXT:    [[AND:%.*]] = and <2 x i64> [[BC2]], [[BC1]]
+; CHECK-NEXT:    [[BC3:%.*]] = bitcast <2 x i64> [[AND]] to <4 x float>
+; CHECK-NEXT:    ret <4 x float> [[BC3]]
+;
+  %bc1 = bitcast <4 x float> %a to <2 x i64>
+  %bc2 = bitcast <8 x i16> %b to <2 x i64>
+  %and = and <2 x i64> %bc2, %bc1
+  %bc3 = bitcast <2 x i64> %and to <4 x float>
+  ret <4 x float> %bc3
+}
+
+; FIXME: Transform limited from changing vector op to integer op to avoid codegen problems.
+
+define i128 @bitcast_or_bitcast(i128 %a, <2 x i64> %b) {
+; CHECK-LABEL: @bitcast_or_bitcast(
+; CHECK-NEXT:    [[BC1:%.*]] = bitcast i128 [[A:%.*]] to <2 x i64>
+; CHECK-NEXT:    [[OR:%.*]] = or <2 x i64> [[BC1]], [[B:%.*]]
+; CHECK-NEXT:    [[BC2:%.*]] = bitcast <2 x i64> [[OR]] to i128
+; CHECK-NEXT:    ret i128 [[BC2]]
+;
+  %bc1 = bitcast i128 %a to <2 x i64>
+  %or = or <2 x i64> %b, %bc1
+  %bc2 = bitcast <2 x i64> %or to i128
+  ret i128 %bc2
+}
+
+; FIXME: Transform limited from changing integer op to vector op to avoid codegen problems.
+
+define <4 x i32> @bitcast_xor_bitcast(<4 x i32> %a, i128 %b) {
+; CHECK-LABEL: @bitcast_xor_bitcast(
+; CHECK-NEXT:    [[BC1:%.*]] = bitcast <4 x i32> [[A:%.*]] to i128
+; CHECK-NEXT:    [[XOR:%.*]] = xor i128 [[BC1]], [[B:%.*]]
+; CHECK-NEXT:    [[BC2:%.*]] = bitcast i128 [[XOR]] to <4 x i32>
+; CHECK-NEXT:    ret <4 x i32> [[BC2]]
+;
+  %bc1 = bitcast <4 x i32> %a to i128
+  %xor = xor i128 %bc1, %b
+  %bc2 = bitcast i128 %xor to <4 x i32>
+  ret <4 x i32> %bc2
+}
+
+; https://llvm.org/bugs/show_bug.cgi?id=6137#c6
+
+define <4 x float> @bitcast_vector_select(<4 x float> %x, <2 x i64> %y, <4 x i1> %cmp) {
+; CHECK-LABEL: @bitcast_vector_select(
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <2 x i64> [[Y:%.*]] to <4 x float>
+; CHECK-NEXT:    [[T7:%.*]] = select <4 x i1> [[CMP:%.*]], <4 x float> [[X:%.*]], <4 x float> [[TMP1]]
+; CHECK-NEXT:    ret <4 x float> [[T7]]
+;
+  %t4 = bitcast <4 x float> %x to <4 x i32>
+  %t5 = bitcast <2 x i64> %y to <4 x i32>
+  %t6 = select <4 x i1> %cmp, <4 x i32> %t4, <4 x i32> %t5
+  %t7 = bitcast <4 x i32> %t6 to <4 x float>
+  ret <4 x float> %t7
+}
+
+define float @bitcast_scalar_select_of_scalars(float %x, i32 %y, i1 %cmp) {
+; CHECK-LABEL: @bitcast_scalar_select_of_scalars(
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i32 [[Y:%.*]] to float
+; CHECK-NEXT:    [[T7:%.*]] = select i1 [[CMP:%.*]], float [[X:%.*]], float [[TMP1]]
+; CHECK-NEXT:    ret float [[T7]]
+;
+  %t4 = bitcast float %x to i32
+  %t6 = select i1 %cmp, i32 %t4, i32 %y
+  %t7 = bitcast i32 %t6 to float
+  ret float %t7
+}
+
+; FIXME: We should change the select operand types to scalars, but we need to make
+; sure the backend can reverse that transform if needed.
+
+define float @bitcast_scalar_select_type_mismatch1(float %x, <4 x i8> %y, i1 %cmp) {
+; CHECK-LABEL: @bitcast_scalar_select_type_mismatch1(
+; CHECK-NEXT:    [[T4:%.*]] = bitcast float [[X:%.*]] to <4 x i8>
+; CHECK-NEXT:    [[T6:%.*]] = select i1 [[CMP:%.*]], <4 x i8> [[T4]], <4 x i8> [[Y:%.*]]
+; CHECK-NEXT:    [[T7:%.*]] = bitcast <4 x i8> [[T6]] to float
+; CHECK-NEXT:    ret float [[T7]]
+;
+  %t4 = bitcast float %x to <4 x i8>
+  %t6 = select i1 %cmp, <4 x i8> %t4, <4 x i8> %y
+  %t7 = bitcast <4 x i8> %t6 to float
+  ret float %t7
+}
+
+; FIXME: We should change the select operand types to vectors, but we need to make
+; sure the backend can reverse that transform if needed.
+
+define <4 x i8> @bitcast_scalar_select_type_mismatch2(<4 x i8> %x, float %y, i1 %cmp) {
+; CHECK-LABEL: @bitcast_scalar_select_type_mismatch2(
+; CHECK-NEXT:    [[T4:%.*]] = bitcast <4 x i8> [[X:%.*]] to float
+; CHECK-NEXT:    [[T6:%.*]] = select i1 [[CMP:%.*]], float [[T4]], float [[Y:%.*]]
+; CHECK-NEXT:    [[T7:%.*]] = bitcast float [[T6]] to <4 x i8>
+; CHECK-NEXT:    ret <4 x i8> [[T7]]
+;
+  %t4 = bitcast <4 x i8> %x to float
+  %t6 = select i1 %cmp, float %t4, float %y
+  %t7 = bitcast float %t6 to <4 x i8>
+  ret <4 x i8> %t7
+}
+
+define <4 x float> @bitcast_scalar_select_of_vectors(<4 x float> %x, <2 x i64> %y, i1 %cmp) {
+; CHECK-LABEL: @bitcast_scalar_select_of_vectors(
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <2 x i64> [[Y:%.*]] to <4 x float>
+; CHECK-NEXT:    [[T7:%.*]] = select i1 [[CMP:%.*]], <4 x float> [[X:%.*]], <4 x float> [[TMP1]]
+; CHECK-NEXT:    ret <4 x float> [[T7]]
+;
+  %t4 = bitcast <4 x float> %x to <4 x i32>
+  %t5 = bitcast <2 x i64> %y to <4 x i32>
+  %t6 = select i1 %cmp, <4 x i32> %t4, <4 x i32> %t5
+  %t7 = bitcast <4 x i32> %t6 to <4 x float>
+  ret <4 x float> %t7
+}
+
+; Can't change the type of the vector select if the dest type is scalar.
+
+define float @bitcast_vector_select_no_fold1(float %x, <2 x i16> %y, <4 x i1> %cmp) {
+; CHECK-LABEL: @bitcast_vector_select_no_fold1(
+; CHECK-NEXT:    [[T4:%.*]] = bitcast float [[X:%.*]] to <4 x i8>
+; CHECK-NEXT:    [[T5:%.*]] = bitcast <2 x i16> [[Y:%.*]] to <4 x i8>
+; CHECK-NEXT:    [[T6:%.*]] = select <4 x i1> [[CMP:%.*]], <4 x i8> [[T4]], <4 x i8> [[T5]]
+; CHECK-NEXT:    [[T7:%.*]] = bitcast <4 x i8> [[T6]] to float
+; CHECK-NEXT:    ret float [[T7]]
+;
+  %t4 = bitcast float %x to <4 x i8>
+  %t5 = bitcast <2 x i16> %y to <4 x i8>
+  %t6 = select <4 x i1> %cmp, <4 x i8> %t4, <4 x i8> %t5
+  %t7 = bitcast <4 x i8> %t6 to float
+  ret float %t7
+}
+
+; Can't change the type of the vector select if the number of elements in the dest type is not the same.
+
+define <2 x float> @bitcast_vector_select_no_fold2(<2 x float> %x, <4 x i16> %y, <8 x i1> %cmp) {
+; CHECK-LABEL: @bitcast_vector_select_no_fold2(
+; CHECK-NEXT:    [[T4:%.*]] = bitcast <2 x float> [[X:%.*]] to <8 x i8>
+; CHECK-NEXT:    [[T5:%.*]] = bitcast <4 x i16> [[Y:%.*]] to <8 x i8>
+; CHECK-NEXT:    [[T6:%.*]] = select <8 x i1> [[CMP:%.*]], <8 x i8> [[T4]], <8 x i8> [[T5]]
+; CHECK-NEXT:    [[T7:%.*]] = bitcast <8 x i8> [[T6]] to <2 x float>
+; CHECK-NEXT:    ret <2 x float> [[T7]]
+;
+  %t4 = bitcast <2 x float> %x to <8 x i8>
+  %t5 = bitcast <4 x i16> %y to <8 x i8>
+  %t6 = select <8 x i1> %cmp, <8 x i8> %t4, <8 x i8> %t5
+  %t7 = bitcast <8 x i8> %t6 to <2 x float>
+  ret <2 x float> %t7
+}
+
+; Optimize bitcasts that are extracting low element of vector.  This happens because of SRoA.
+; rdar://7892780
+define float @test2(<2 x float> %A, <2 x i32> %B) {
+; CHECK-LABEL: @test2(
+; CHECK-NEXT:    [[TMP24:%.*]] = extractelement <2 x float> [[A:%.*]], i32 0
+; CHECK-NEXT:    [[BC:%.*]] = bitcast <2 x i32> [[B:%.*]] to <2 x float>
+; CHECK-NEXT:    [[TMP4:%.*]] = extractelement <2 x float> [[BC]], i32 0
+; CHECK-NEXT:    [[ADD:%.*]] = fadd float [[TMP24]], [[TMP4]]
+; CHECK-NEXT:    ret float [[ADD]]
+;
+  %tmp28 = bitcast <2 x float> %A to i64  ; <i64> [#uses=2]
+  %tmp23 = trunc i64 %tmp28 to i32                ; <i32> [#uses=1]
+  %tmp24 = bitcast i32 %tmp23 to float            ; <float> [#uses=1]
+
+  %tmp = bitcast <2 x i32> %B to i64
+  %tmp2 = trunc i64 %tmp to i32                ; <i32> [#uses=1]
+  %tmp4 = bitcast i32 %tmp2 to float            ; <float> [#uses=1]
+
+  %add = fadd float %tmp24, %tmp4
+  ret float %add
+}
+
+; Optimize bitcasts that are extracting other elements of a vector.  This happens because of SRoA.
+; rdar://7892780
+define float @test3(<2 x float> %A, <2 x i64> %B) {
+; CHECK-LABEL: @test3(
+; CHECK-NEXT:    [[TMP24:%.*]] = extractelement <2 x float> [[A:%.*]], i32 1
+; CHECK-NEXT:    [[BC2:%.*]] = bitcast <2 x i64> [[B:%.*]] to <4 x float>
+; CHECK-NEXT:    [[TMP4:%.*]] = extractelement <4 x float> [[BC2]], i32 2
+; CHECK-NEXT:    [[ADD:%.*]] = fadd float [[TMP24]], [[TMP4]]
+; CHECK-NEXT:    ret float [[ADD]]
+;
+  %tmp28 = bitcast <2 x float> %A to i64
+  %tmp29 = lshr i64 %tmp28, 32
+  %tmp23 = trunc i64 %tmp29 to i32
+  %tmp24 = bitcast i32 %tmp23 to float
+
+  %tmp = bitcast <2 x i64> %B to i128
+  %tmp1 = lshr i128 %tmp, 64
+  %tmp2 = trunc i128 %tmp1 to i32
+  %tmp4 = bitcast i32 %tmp2 to float
+
+  %add = fadd float %tmp24, %tmp4
+  ret float %add
+}
+
+; Both bitcasts are unnecessary; change the extractelement.
+
+define float @bitcast_extelt1(<2 x float> %A) {
+; CHECK-LABEL: @bitcast_extelt1(
+; CHECK-NEXT:    [[BC2:%.*]] = extractelement <2 x float> [[A:%.*]], i32 0
+; CHECK-NEXT:    ret float [[BC2]]
+;
+  %bc1 = bitcast <2 x float> %A to <2 x i32>
+  %ext = extractelement <2 x i32> %bc1, i32 0
+  %bc2 = bitcast i32 %ext to float
+  ret float %bc2
+}
+
+; Second bitcast can be folded into the first.
+
+define i64 @bitcast_extelt2(<4 x float> %A) {
+; CHECK-LABEL: @bitcast_extelt2(
+; CHECK-NEXT:    [[BC:%.*]] = bitcast <4 x float> [[A:%.*]] to <2 x i64>
+; CHECK-NEXT:    [[BC2:%.*]] = extractelement <2 x i64> [[BC]], i32 1
+; CHECK-NEXT:    ret i64 [[BC2]]
+;
+  %bc1 = bitcast <4 x float> %A to <2 x double>
+  %ext = extractelement <2 x double> %bc1, i32 1
+  %bc2 = bitcast double %ext to i64
+  ret i64 %bc2
+}
+
+; TODO: This should return %A.
+
+define <2 x i32> @bitcast_extelt3(<2 x i32> %A) {
+; CHECK-LABEL: @bitcast_extelt3(
+; CHECK-NEXT:    [[BC1:%.*]] = bitcast <2 x i32> [[A:%.*]] to <1 x i64>
+; CHECK-NEXT:    [[EXT:%.*]] = extractelement <1 x i64> [[BC1]], i32 0
+; CHECK-NEXT:    [[BC2:%.*]] = bitcast i64 [[EXT]] to <2 x i32>
+; CHECK-NEXT:    ret <2 x i32> [[BC2]]
+;
+  %bc1 = bitcast <2 x i32> %A to <1 x i64>
+  %ext = extractelement <1 x i64> %bc1, i32 0
+  %bc2 = bitcast i64 %ext to <2 x i32>
+  ret <2 x i32> %bc2
+}
+
+; Handle the case where the input is not a vector.
+
+define double @bitcast_extelt4(i128 %A) {
+; CHECK-LABEL: @bitcast_extelt4(
+; CHECK-NEXT:    [[BC:%.*]] = bitcast i128 [[A:%.*]] to <2 x double>
+; CHECK-NEXT:    [[BC2:%.*]] = extractelement <2 x double> [[BC]], i32 0
+; CHECK-NEXT:    ret double [[BC2]]
+;
+  %bc1 = bitcast i128 %A to <2 x i64>
+  %ext = extractelement <2 x i64> %bc1, i32 0
+  %bc2 = bitcast i64 %ext to double
+  ret double %bc2
+}
+
+define <2 x i32> @test4(i32 %A, i32 %B){
+; CHECK-LABEL: @test4(
+; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <2 x i32> undef, i32 [[A:%.*]], i32 0
+; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <2 x i32> [[TMP1]], i32 [[B:%.*]], i32 1
+; CHECK-NEXT:    ret <2 x i32> [[TMP2]]
+;
+  %tmp38 = zext i32 %A to i64
+  %tmp32 = zext i32 %B to i64
+  %tmp33 = shl i64 %tmp32, 32
+  %ins35 = or i64 %tmp33, %tmp38
+  %tmp43 = bitcast i64 %ins35 to <2 x i32>
+  ret <2 x i32> %tmp43
+}
+
+; rdar://8360454
+define <2 x float> @test5(float %A, float %B) {
+; CHECK-LABEL: @test5(
+; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <2 x float> undef, float [[A:%.*]], i32 0
+; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <2 x float> [[TMP1]], float [[B:%.*]], i32 1
+; CHECK-NEXT:    ret <2 x float> [[TMP2]]
+;
+  %tmp37 = bitcast float %A to i32
+  %tmp38 = zext i32 %tmp37 to i64
+  %tmp31 = bitcast float %B to i32
+  %tmp32 = zext i32 %tmp31 to i64
+  %tmp33 = shl i64 %tmp32, 32
+  %ins35 = or i64 %tmp33, %tmp38
+  %tmp43 = bitcast i64 %ins35 to <2 x float>
+  ret <2 x float> %tmp43
+}
+
+define <2 x float> @test6(float %A){
+; CHECK-LABEL: @test6(
+; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <2 x float> <float 4.200000e+01, float undef>, float [[A:%.*]], i32 1
+; CHECK-NEXT:    ret <2 x float> [[TMP1]]
+;
+  %tmp23 = bitcast float %A to i32
+  %tmp24 = zext i32 %tmp23 to i64
+  %tmp25 = shl i64 %tmp24, 32
+  %mask20 = or i64 %tmp25, 1109917696
+  %tmp35 = bitcast i64 %mask20 to <2 x float>
+  ret <2 x float> %tmp35
+}
+
+define i64 @ISPC0(i64 %in) {
+; CHECK-LABEL: @ISPC0(
+; CHECK-NEXT:    ret i64 0
+;
+  %out = and i64 %in, xor (i64 bitcast (<4 x i16> <i16 -1, i16 -1, i16 -1, i16 -1> to i64), i64 -1)
+  ret i64 %out
+}
+
+
+define i64 @Vec2(i64 %in) {
+; CHECK-LABEL: @Vec2(
+; CHECK-NEXT:    ret i64 0
+;
+  %out = and i64 %in, xor (i64 bitcast (<4 x i16> <i16 0, i16 0, i16 0, i16 0> to i64), i64 0)
+  ret i64 %out
+}
+
+define i64 @All11(i64 %in) {
+; CHECK-LABEL: @All11(
+; CHECK-NEXT:    ret i64 0
+;
+  %out = and i64 %in, xor (i64 bitcast (<2 x float> bitcast (i64 -1 to <2 x float>) to i64), i64 -1)
+  ret i64 %out
+}
+
+
+define i32 @All111(i32 %in) {
+; CHECK-LABEL: @All111(
+; CHECK-NEXT:    ret i32 0
+;
+  %out = and i32 %in, xor (i32 bitcast (<1 x float> bitcast (i32 -1 to <1 x float>) to i32), i32 -1)
+  ret i32 %out
+}
+
+define <2 x i16> @BitcastInsert(i32 %a) {
+; CHECK-LABEL: @BitcastInsert(
+; CHECK-NEXT:    [[R:%.*]] = bitcast i32 [[A:%.*]] to <2 x i16>
+; CHECK-NEXT:    ret <2 x i16> [[R]]
+;
+  %v = insertelement <1 x i32> undef, i32 %a, i32 0
+  %r = bitcast <1 x i32> %v to <2 x i16>
+  ret <2 x i16> %r
+}
+
+; PR17293
+define <2 x i64> @test7(<2 x i8*>* %arg) nounwind {
+; CHECK-LABEL: @test7(
+; CHECK-NEXT:    [[CAST:%.*]] = bitcast <2 x i8*>* [[ARG:%.*]] to <2 x i64>*
+; CHECK-NEXT:    [[LOAD:%.*]] = load <2 x i64>, <2 x i64>* [[CAST]], align 16
+; CHECK-NEXT:    ret <2 x i64> [[LOAD]]
+;
+  %cast = bitcast <2 x i8*>* %arg to <2 x i64>*
+  %load = load <2 x i64>, <2 x i64>* %cast, align 16
+  ret <2 x i64> %load
+}
+
+define i8 @test8() {
+; CHECK-LABEL: @test8(
+; CHECK-NEXT:    ret i8 -85
+;
+  %res = bitcast <8 x i1> <i1 true, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true> to i8
+  ret i8 %res
+}
+
+ at g = internal unnamed_addr global i32 undef
+
+define void @constant_fold_vector_to_double() {
+; CHECK-LABEL: @constant_fold_vector_to_double(
+; CHECK-NEXT:    store volatile double 1.000000e+00, double* undef, align 8
+; CHECK-NEXT:    store volatile double 1.000000e+00, double* undef, align 8
+; CHECK-NEXT:    store volatile double 1.000000e+00, double* undef, align 8
+; CHECK-NEXT:    store volatile double 1.000000e+00, double* undef, align 8
+; CHECK-NEXT:    store volatile double 0xFFFFFFFFFFFFFFFF, double* undef, align 8
+; CHECK-NEXT:    store volatile double 0x162E000004D2, double* undef, align 8
+; CHECK-NEXT:    store volatile double bitcast (<2 x i32> <i32 1234, i32 ptrtoint (i32* @g to i32)> to double), double* undef, align 8
+; CHECK-NEXT:    store volatile double 0x400000003F800000, double* undef, align 8
+; CHECK-NEXT:    store volatile double 0.000000e+00, double* undef, align 8
+; CHECK-NEXT:    store volatile double 0.000000e+00, double* undef, align 8
+; CHECK-NEXT:    store volatile double 0.000000e+00, double* undef, align 8
+; CHECK-NEXT:    store volatile double 0.000000e+00, double* undef, align 8
+; CHECK-NEXT:    store volatile double 0.000000e+00, double* undef, align 8
+; CHECK-NEXT:    store volatile double 0.000000e+00, double* undef, align 8
+; CHECK-NEXT:    ret void
+;
+  store volatile double bitcast (<1 x i64> <i64 4607182418800017408> to double), double* undef
+  store volatile double bitcast (<2 x i32> <i32 0, i32 1072693248> to double), double* undef
+  store volatile double bitcast (<4 x i16> <i16 0, i16 0, i16 0, i16 16368> to double), double* undef
+  store volatile double bitcast (<8 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 240, i8 63> to double), double* undef
+
+  store volatile double bitcast (<2 x i32> <i32 -1, i32 -1> to double), double* undef
+  store volatile double bitcast (<2 x i32> <i32 1234, i32 5678> to double), double* undef
+
+  store volatile double bitcast (<2 x i32> <i32 1234, i32 ptrtoint (i32* @g to i32)> to double), double* undef
+  store volatile double bitcast (<2 x float> <float 1.0, float 2.0> to double), double* undef
+
+  store volatile double bitcast (<2 x i32> zeroinitializer to double), double* undef
+  store volatile double bitcast (<4 x i16> zeroinitializer to double), double* undef
+  store volatile double bitcast (<8 x i8> zeroinitializer to double), double* undef
+  store volatile double bitcast (<16 x i4> zeroinitializer to double), double* undef
+  store volatile double bitcast (<32 x i2> zeroinitializer to double), double* undef
+  store volatile double bitcast (<64 x i1> zeroinitializer to double), double* undef
+  ret void
+}
+
+define void @constant_fold_vector_to_float() {
+; CHECK-LABEL: @constant_fold_vector_to_float(
+; CHECK-NEXT:    store volatile float 1.000000e+00, float* undef, align 4
+; CHECK-NEXT:    store volatile float 1.000000e+00, float* undef, align 4
+; CHECK-NEXT:    store volatile float 1.000000e+00, float* undef, align 4
+; CHECK-NEXT:    store volatile float 1.000000e+00, float* undef, align 4
+; CHECK-NEXT:    ret void
+;
+  store volatile float bitcast (<1 x i32> <i32 1065353216> to float), float* undef
+  store volatile float bitcast (<2 x i16> <i16 0, i16 16256> to float), float* undef
+  store volatile float bitcast (<4 x i8> <i8 0, i8 0, i8 128, i8 63> to float), float* undef
+  store volatile float bitcast (<32 x i1> <i1 0, i1 0, i1 0, i1 0, i1 0, i1 0, i1 0, i1 0, i1 0, i1 0, i1 0, i1 0, i1 0, i1 0, i1 0, i1 0, i1 0, i1 0, i1 0, i1 0, i1 0, i1 0, i1 0, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 0, i1 0> to float), float* undef
+
+  ret void
+}
+
+define void @constant_fold_vector_to_half() {
+; CHECK-LABEL: @constant_fold_vector_to_half(
+; CHECK-NEXT:    store volatile half 0xH4000, half* undef, align 2
+; CHECK-NEXT:    store volatile half 0xH4000, half* undef, align 2
+; CHECK-NEXT:    ret void
+;
+  store volatile half bitcast (<2 x i8> <i8 0, i8 64> to half), half* undef
+  store volatile half bitcast (<4 x i4> <i4 0, i4 0, i4 0, i4 4> to half), half* undef
+  ret void
+}

Added: llvm/trunk/test/Transforms/InstCombine/bitreverse-hang.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/InstCombine/bitreverse-hang.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/InstCombine/bitreverse-hang.ll (added)
+++ llvm/trunk/test/Transforms/InstCombine/bitreverse-hang.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,53 @@
+; RUN: opt < %s -loop-unroll -instcombine -S | FileCheck %s
+
+; This test is a worst-case scenario for bitreversal/byteswap detection.
+; After loop unrolling (the unrolled loop is unreadably large so it has been kept
+; rolled here), we have a binary tree of OR operands (as bitreversal detection
+; looks straight through shifts):
+;
+;  OR
+;  | \
+;  |  LSHR
+;  | /
+;  OR
+;  | \
+;  |  LSHR
+;  | /
+;  OR
+;
+; This results in exponential runtime. The loop here is 32 iterations which will
+; totally hang if we don't deal with this case cleverly.
+
+ at b = common global i32 0, align 4
+
+; CHECK: define i32 @fn1
+define i32 @fn1() #0 {
+entry:
+  %b.promoted = load i32, i32* @b, align 4, !tbaa !2
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %or4 = phi i32 [ %b.promoted, %entry ], [ %or, %for.body ]
+  %i.03 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
+  %shr = lshr i32 %or4, 1
+  %or = or i32 %shr, %or4
+  %inc = add nuw nsw i32 %i.03, 1
+  %exitcond = icmp eq i32 %inc, 32
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body
+  store i32 %or, i32* @b, align 4, !tbaa !2
+  ret i32 undef
+}
+
+attributes #0 = { norecurse nounwind ssp uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="core2" "target-features"="+cx16,+fxsr,+mmx,+sse,+sse2,+sse3,+ssse3" "unsafe-fp-math"="false" "use-soft-float"="false" }
+
+!llvm.module.flags = !{!0}
+!llvm.ident = !{!1}
+
+!0 = !{i32 1, !"PIC Level", i32 2}
+!1 = !{!"clang version 3.8.0"}
+!2 = !{!3, !3, i64 0}
+!3 = !{!"int", !4, i64 0}
+!4 = !{!"omnipotent char", !5, i64 0}
+!5 = !{!"Simple C/C++ TBAA"}

Added: llvm/trunk/test/Transforms/InstCombine/bitreverse-known-bits.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/InstCombine/bitreverse-known-bits.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/InstCombine/bitreverse-known-bits.ll (added)
+++ llvm/trunk/test/Transforms/InstCombine/bitreverse-known-bits.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,51 @@
+; RUN: opt < %s -S -instcombine | FileCheck %s
+
+declare i8 @llvm.bitreverse.i8(i8)
+declare i32 @llvm.bitreverse.i32(i32)
+
+; CHECK-LABEL: @test1
+; CHECK: ret i1 true
+define i1 @test1(i32 %arg) {
+  %a = or i32 %arg, 4294901760
+  %b = call i32 @llvm.bitreverse.i32(i32 %a)
+  %and = and i32 %b, 65535
+  %res = icmp eq i32 %and, 65535
+  ret i1 %res
+}
+
+; CHECK-LABEL: @test2
+; CHECK: ret i1 true
+define i1 @test2(i32 %arg) {
+  %a = or i32 %arg, 1
+  %b = call i32 @llvm.bitreverse.i32(i32 %a)
+  %c = and i32 %b, 2147483648
+  %d = call i32 @llvm.bitreverse.i32(i32 %c)
+  %res = icmp eq i32 %d, 1
+  ret i1 %res
+}
+
+; CHECK-LABEL: @test3
+; CHECK: ret i1 false
+define i1 @test3(i32 %arg) {
+  %a = or i32 %arg, 65536
+  %b = call i32 @llvm.bitreverse.i32(i32 %a)
+  %and = and i32 %b, 32768
+  %res = icmp eq i32 %and, 0
+  ret i1 %res
+}
+
+; CHECK-LABEL: @add_bitreverse
+; Make sure we process range metadata on bitreverse
+define i8 @add_bitreverse(i8 %a) {
+  %b = and i8 %a, 252
+  ; known bits for the bitreverse will say the result is in the range [0, 64)
+  ; but the metadata says [0, 16). So make sure the range metadata wins.
+  ;    add %reverse, 1111 0000
+  ; should become
+  ;    or  %reverse, 1111 0000
+  %reverse = call i8 @llvm.bitreverse.i8(i8 %b), !range !1
+  %c = add i8 %reverse, -16
+; CHECK: or i8 %reverse, -16
+  ret i8 %c
+}
+!1 = !{i8 0, i8 16}

Added: llvm/trunk/test/Transforms/InstCombine/bittest.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/InstCombine/bittest.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/InstCombine/bittest.ll (added)
+++ llvm/trunk/test/Transforms/InstCombine/bittest.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,30 @@
+; RUN: opt < %s -instcombine -simplifycfg -S |\
+; RUN:    not grep "call void @abort"
+
+ at b_rec.0 = external global i32          ; <i32*> [#uses=2]
+
+define void @_Z12h000007_testv(i32* %P) {
+entry:
+        %tmp.2 = load i32, i32* @b_rec.0             ; <i32> [#uses=1]
+        %tmp.9 = or i32 %tmp.2, -989855744              ; <i32> [#uses=2]
+        %tmp.16 = and i32 %tmp.9, -805306369            ; <i32> [#uses=2]
+        %tmp.17 = and i32 %tmp.9, -973078529            ; <i32> [#uses=1]
+        store i32 %tmp.17, i32* @b_rec.0
+        %tmp.17.shrunk = bitcast i32 %tmp.16 to i32             ; <i32> [#uses=1]
+        %tmp.22 = and i32 %tmp.17.shrunk, -1073741824           ; <i32> [#uses=1]
+        %tmp.23 = icmp eq i32 %tmp.22, -1073741824              ; <i1> [#uses=1]
+        br i1 %tmp.23, label %endif.0, label %then.0
+
+then.0:         ; preds = %entry
+        tail call void @abort( )
+        unreachable
+
+endif.0:                ; preds = %entry
+        %tmp.17.shrunk2 = bitcast i32 %tmp.16 to i32            ; <i32> [#uses=1]
+        %tmp.27.mask = and i32 %tmp.17.shrunk2, 100663295               ; <i32> [#uses=1]
+        store i32 %tmp.27.mask, i32* %P
+        ret void
+}
+
+declare void @abort()
+

Added: llvm/trunk/test/Transforms/InstCombine/branch.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/InstCombine/branch.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/InstCombine/branch.ll (added)
+++ llvm/trunk/test/Transforms/InstCombine/branch.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,27 @@
+; Check that we fold the condition of branches of the
+; form: br <condition> dest1, dest2, where dest1 == dest2.
+; RUN: opt -instcombine -S < %s | FileCheck %s
+
+define i32 @test(i32 %x) {
+; CHECK-LABEL: @test
+entry:
+; CHECK-NOT: icmp
+; CHECK: br i1 false
+  %cmp = icmp ult i32 %x, 7
+  br i1 %cmp, label %merge, label %merge
+merge:
+; CHECK-LABEL: merge:
+; CHECK: ret i32 %x
+  ret i32 %x
+}
+
+ at global = global i8 0
+
+define i32 @pat(i32 %x) {
+; CHECK-NOT: icmp false
+; CHECK: br i1 false
+  %y = icmp eq i32 27, ptrtoint(i8* @global to i32)
+  br i1 %y, label %patatino, label %patatino
+patatino:
+  ret i32 %x
+}

Added: llvm/trunk/test/Transforms/InstCombine/broadcast.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/InstCombine/broadcast.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/InstCombine/broadcast.ll (added)
+++ llvm/trunk/test/Transforms/InstCombine/broadcast.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,137 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -instcombine -S < %s | FileCheck %s
+
+; CHECK-LABEL: good1
+; CHECK: %[[INS:.*]] = insertelement <4 x float> undef, float %arg, i32 0
+; CHECK-NEXT: %[[BCAST:.*]] = shufflevector <4 x float> %[[INS]], <4 x float> undef, <4 x i32> zeroinitializer
+; CHECK-NEXT: ret <4 x float> %[[BCAST]]
+define <4 x float> @good1(float %arg) {
+  %tmp = insertelement <4 x float> undef, float %arg, i32 0
+  %tmp4 = insertelement <4 x float> %tmp, float %arg, i32 1
+  %tmp5 = insertelement <4 x float> %tmp4, float %arg, i32 2
+  %tmp6 = insertelement <4 x float> %tmp5, float %arg, i32 3
+  ret <4 x float> %tmp6
+}
+
+; CHECK-LABEL: good2
+; CHECK: %[[INS:.*]] = insertelement <4 x float> undef, float %arg, i32 0
+; CHECK-NEXT: %[[BCAST:.*]] = shufflevector <4 x float> %[[INS]], <4 x float> undef, <4 x i32> zeroinitializer
+; CHECK-NEXT: ret <4 x float> %[[BCAST]]
+define <4 x float> @good2(float %arg) {
+  %tmp = insertelement <4 x float> undef, float %arg, i32 1
+  %tmp4 = insertelement <4 x float> %tmp, float %arg, i32 2
+  %tmp5 = insertelement <4 x float> %tmp4, float %arg, i32 0
+  %tmp6 = insertelement <4 x float> %tmp5, float %arg, i32 3
+  ret <4 x float> %tmp6
+}
+
+; CHECK-LABEL: good3
+; CHECK: %[[INS:.*]] = insertelement <4 x float> undef, float %arg, i32 0
+; CHECK-NEXT: %[[BCAST:.*]] = shufflevector <4 x float> %[[INS]], <4 x float> undef, <4 x i32> zeroinitializer
+; CHECK-NEXT: ret <4 x float> %[[BCAST]]
+define <4 x float> @good3(float %arg) {
+  %tmp = insertelement <4 x float> zeroinitializer, float %arg, i32 0
+  %tmp4 = insertelement <4 x float> %tmp, float %arg, i32 1
+  %tmp5 = insertelement <4 x float> %tmp4, float %arg, i32 2
+  %tmp6 = insertelement <4 x float> %tmp5, float %arg, i32 3
+  ret <4 x float> %tmp6
+}
+
+; CHECK-LABEL: good4
+; CHECK: %[[INS:.*]] = insertelement <4 x float> undef, float %arg, i32 0
+; CHECK-NEXT: %[[ADD:.*]] = fadd <4 x float> %[[INS]], %[[INS]]
+; CHECK-NEXT: %[[BCAST:.*]] = shufflevector <4 x float> %[[ADD]], <4 x float> undef, <4 x i32> zeroinitializer
+; CHECK-NEXT: ret <4 x float> %[[BCAST]]
+define <4 x float> @good4(float %arg) {
+  %tmp = insertelement <4 x float> zeroinitializer, float %arg, i32 0
+  %tmp4 = insertelement <4 x float> %tmp, float %arg, i32 1
+  %tmp5 = insertelement <4 x float> %tmp4, float %arg, i32 2
+  %tmp6 = insertelement <4 x float> %tmp5, float %arg, i32 3
+  %tmp7 = fadd <4 x float> %tmp6, %tmp6
+  ret <4 x float> %tmp7
+}
+
+; CHECK-LABEL: @good5(
+; CHECK-NEXT:    %ins1 = insertelement <4 x float> undef, float %v, i32 0
+; CHECK-NEXT:    %a1 = fadd <4 x float> %ins1, %ins1
+; CHECK-NEXT:    %ins4 = shufflevector <4 x float> %ins1, <4 x float> undef, <4 x i32> zeroinitializer
+; CHECK-NEXT:    %res = fadd <4 x float> %a1, %ins4
+; CHECK-NEXT: ret <4 x float> %res
+define <4 x float> @good5(float %v) {
+  %ins1 = insertelement <4 x float> undef, float %v, i32 0
+  %a1 = fadd <4 x float> %ins1, %ins1
+  %ins2 = insertelement<4 x float> %ins1, float %v, i32 1
+  %ins3 = insertelement<4 x float> %ins2, float %v, i32 2
+  %ins4 = insertelement<4 x float> %ins3, float %v, i32 3
+  %res = fadd <4 x float> %a1, %ins4
+  ret <4 x float> %res
+}
+
+; CHECK-LABEL: bad1
+; CHECK-NOT: shufflevector
+define <4 x float> @bad1(float %arg) {
+  %tmp = insertelement <4 x float> undef, float %arg, i32 1
+  %tmp4 = insertelement <4 x float> %tmp, float %arg, i32 1
+  %tmp5 = insertelement <4 x float> %tmp4, float %arg, i32 2
+  %tmp6 = insertelement <4 x float> %tmp5, float %arg, i32 3
+  ret <4 x float> %tmp6
+}
+
+; CHECK-LABEL: bad2
+; CHECK-NOT: shufflevector
+define <4 x float> @bad2(float %arg) {
+  %tmp = insertelement <4 x float> undef, float %arg, i32 0
+  %tmp5 = insertelement <4 x float> %tmp, float %arg, i32 2
+  %tmp6 = insertelement <4 x float> %tmp5, float %arg, i32 3
+  ret <4 x float> %tmp6
+}
+
+; CHECK-LABEL: bad3
+; CHECK-NOT: shufflevector
+define <4 x float> @bad3(float %arg, float %arg2) {
+  %tmp = insertelement <4 x float> undef, float %arg, i32 0
+  %tmp4 = insertelement <4 x float> %tmp, float %arg2, i32 1
+  %tmp5 = insertelement <4 x float> %tmp4, float %arg, i32 2
+  %tmp6 = insertelement <4 x float> %tmp5, float %arg, i32 3
+  ret <4 x float> %tmp6
+}
+
+; CHECK-LABEL: bad4
+; CHECK-NOT: shufflevector
+define <1 x float> @bad4(float %arg) {
+  %tmp = insertelement <1 x float> undef, float %arg, i32 0
+  ret <1 x float> %tmp
+}
+
+; CHECK-LABEL: bad5
+; CHECK-NOT: shufflevector
+define <4 x float> @bad5(float %arg) {
+  %tmp = insertelement <4 x float> undef, float %arg, i32 0
+  %tmp4 = insertelement <4 x float> %tmp, float %arg, i32 1
+  %tmp5 = insertelement <4 x float> %tmp4, float %arg, i32 2
+  %tmp6 = insertelement <4 x float> %tmp5, float %arg, i32 3
+  %tmp7 = fadd <4 x float> %tmp6, %tmp4
+  ret <4 x float> %tmp7
+}
+
+; CHECK-LABEL: bad6
+; CHECK-NOT: shufflevector
+define <4 x float> @bad6(float %arg, i32 %k) {
+  %tmp = insertelement <4 x float> undef, float %arg, i32 0
+  %tmp4 = insertelement <4 x float> %tmp, float %arg, i32 1
+  %tmp5 = insertelement <4 x float> %tmp4, float %arg, i32 %k
+  %tmp6 = insertelement <4 x float> %tmp5, float %arg, i32 3
+  ret <4 x float> %tmp6
+}
+
+; CHECK-LABEL: @bad7(
+; CHECK-NOT: shufflevector
+define <4 x float> @bad7(float %v) {
+  %ins1 = insertelement <4 x float> undef, float %v, i32 1
+  %a1 = fadd <4 x float> %ins1, %ins1
+  %ins2 = insertelement<4 x float> %ins1, float %v, i32 2
+  %ins3 = insertelement<4 x float> %ins2, float %v, i32 3
+  %ins4 = insertelement<4 x float> %ins3, float %v, i32 0
+  %res = fadd <4 x float> %a1, %ins4
+  ret <4 x float> %res
+}

Added: llvm/trunk/test/Transforms/InstCombine/bswap-fold.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/InstCombine/bswap-fold.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/InstCombine/bswap-fold.ll (added)
+++ llvm/trunk/test/Transforms/InstCombine/bswap-fold.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,337 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+; rdar://5992453
+; A & 255
+define i32 @test4(i32 %a) nounwind  {
+; CHECK-LABEL: @test4(
+; CHECK-NEXT:    [[TMP2:%.*]] = and i32 %a, 255
+; CHECK-NEXT:    ret i32 [[TMP2]]
+;
+  %tmp2 = tail call i32 @llvm.bswap.i32( i32 %a )
+  %tmp4 = lshr i32 %tmp2, 24
+  ret i32 %tmp4
+}
+
+; a >> 24
+define i32 @test6(i32 %a) nounwind {
+; CHECK-LABEL: @test6(
+; CHECK-NEXT:    [[TMP2:%.*]] = lshr i32 %a, 24
+; CHECK-NEXT:    ret i32 [[TMP2]]
+;
+  %tmp2 = tail call i32 @llvm.bswap.i32( i32 %a )
+  %tmp4 = and i32 %tmp2, 255
+  ret i32 %tmp4
+}
+
+; PR5284
+define i16 @test7(i32 %A) {
+; CHECK-LABEL: @test7(
+; CHECK-NEXT:    [[TMP1:%.*]] = lshr i32 %A, 16
+; CHECK-NEXT:    [[D:%.*]] = trunc i32 [[TMP1]] to i16
+; CHECK-NEXT:    ret i16 [[D]]
+;
+  %B = tail call i32 @llvm.bswap.i32(i32 %A) nounwind
+  %C = trunc i32 %B to i16
+  %D = tail call i16 @llvm.bswap.i16(i16 %C) nounwind
+  ret i16 %D
+}
+
+define i16 @test8(i64 %A) {
+; CHECK-LABEL: @test8(
+; CHECK-NEXT:    [[TMP1:%.*]] = lshr i64 %A, 48
+; CHECK-NEXT:    [[D:%.*]] = trunc i64 [[TMP1]] to i16
+; CHECK-NEXT:    ret i16 [[D]]
+;
+  %B = tail call i64 @llvm.bswap.i64(i64 %A) nounwind
+  %C = trunc i64 %B to i16
+  %D = tail call i16 @llvm.bswap.i16(i16 %C) nounwind
+  ret i16 %D
+}
+
+; Misc: Fold bswap(undef) to undef.
+define i64 @foo() {
+; CHECK-LABEL: @foo(
+; CHECK-NEXT:    ret i64 undef
+;
+  %a = call i64 @llvm.bswap.i64(i64 undef)
+  ret i64 %a
+}
+
+; PR15782
+; Fold: OP( BSWAP(x), BSWAP(y) ) -> BSWAP( OP(x, y) )
+; Fold: OP( BSWAP(x), CONSTANT ) -> BSWAP( OP(x, BSWAP(CONSTANT) ) )
+define i16 @bs_and16i(i16 %a, i16 %b) #0 {
+; CHECK-LABEL: @bs_and16i(
+; CHECK-NEXT:    [[TMP1:%.*]] = and i16 %a, 4391
+; CHECK-NEXT:    [[TMP2:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP1]])
+; CHECK-NEXT:    ret i16 [[TMP2]]
+;
+  %1 = tail call i16 @llvm.bswap.i16(i16 %a)
+  %2 = and i16 %1, 10001
+  ret i16 %2
+}
+
+define i16 @bs_and16(i16 %a, i16 %b) #0 {
+; CHECK-LABEL: @bs_and16(
+; CHECK-NEXT:    [[TMP1:%.*]] = and i16 %a, %b
+; CHECK-NEXT:    [[TMP2:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP1]])
+; CHECK-NEXT:    ret i16 [[TMP2]]
+;
+  %tmp1 = tail call i16 @llvm.bswap.i16(i16 %a)
+  %tmp2 = tail call i16 @llvm.bswap.i16(i16 %b)
+  %tmp3 = and i16 %tmp1, %tmp2
+  ret i16 %tmp3
+}
+
+define i16 @bs_or16(i16 %a, i16 %b) #0 {
+; CHECK-LABEL: @bs_or16(
+; CHECK-NEXT:    [[TMP1:%.*]] = or i16 %a, %b
+; CHECK-NEXT:    [[TMP2:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP1]])
+; CHECK-NEXT:    ret i16 [[TMP2]]
+;
+  %tmp1 = tail call i16 @llvm.bswap.i16(i16 %a)
+  %tmp2 = tail call i16 @llvm.bswap.i16(i16 %b)
+  %tmp3 = or i16 %tmp1, %tmp2
+  ret i16 %tmp3
+}
+
+define i16 @bs_xor16(i16 %a, i16 %b) #0 {
+; CHECK-LABEL: @bs_xor16(
+; CHECK-NEXT:    [[TMP1:%.*]] = xor i16 %a, %b
+; CHECK-NEXT:    [[TMP2:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP1]])
+; CHECK-NEXT:    ret i16 [[TMP2]]
+;
+  %tmp1 = tail call i16 @llvm.bswap.i16(i16 %a)
+  %tmp2 = tail call i16 @llvm.bswap.i16(i16 %b)
+  %tmp3 = xor i16 %tmp1, %tmp2
+  ret i16 %tmp3
+}
+
+define i32 @bs_and32i(i32 %a, i32 %b) #0 {
+; CHECK-LABEL: @bs_and32i(
+; CHECK-NEXT:    [[TMP1:%.*]] = and i32 %a, -1585053440
+; CHECK-NEXT:    [[TMP2:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP1]])
+; CHECK-NEXT:    ret i32 [[TMP2]]
+;
+  %tmp1 = tail call i32 @llvm.bswap.i32(i32 %a)
+  %tmp2 = and i32 %tmp1, 100001
+  ret i32 %tmp2
+}
+
+define i32 @bs_and32(i32 %a, i32 %b) #0 {
+; CHECK-LABEL: @bs_and32(
+; CHECK-NEXT:    [[TMP1:%.*]] = and i32 %a, %b
+; CHECK-NEXT:    [[TMP2:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP1]])
+; CHECK-NEXT:    ret i32 [[TMP2]]
+;
+  %tmp1 = tail call i32 @llvm.bswap.i32(i32 %a)
+  %tmp2 = tail call i32 @llvm.bswap.i32(i32 %b)
+  %tmp3 = and i32 %tmp1, %tmp2
+  ret i32 %tmp3
+}
+
+define i32 @bs_or32(i32 %a, i32 %b) #0 {
+; CHECK-LABEL: @bs_or32(
+; CHECK-NEXT:    [[TMP1:%.*]] = or i32 %a, %b
+; CHECK-NEXT:    [[TMP2:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP1]])
+; CHECK-NEXT:    ret i32 [[TMP2]]
+;
+  %tmp1 = tail call i32 @llvm.bswap.i32(i32 %a)
+  %tmp2 = tail call i32 @llvm.bswap.i32(i32 %b)
+  %tmp3 = or i32 %tmp1, %tmp2
+  ret i32 %tmp3
+}
+
+define i32 @bs_xor32(i32 %a, i32 %b) #0 {
+; CHECK-LABEL: @bs_xor32(
+; CHECK-NEXT:    [[TMP1:%.*]] = xor i32 %a, %b
+; CHECK-NEXT:    [[TMP2:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP1]])
+; CHECK-NEXT:    ret i32 [[TMP2]]
+;
+  %tmp1 = tail call i32 @llvm.bswap.i32(i32 %a)
+  %tmp2 = tail call i32 @llvm.bswap.i32(i32 %b)
+  %tmp3 = xor i32 %tmp1, %tmp2
+  ret i32 %tmp3
+}
+
+define i64 @bs_and64i(i64 %a, i64 %b) #0 {
+; CHECK-LABEL: @bs_and64i(
+; CHECK-NEXT:    [[TMP1:%.*]] = and i64 %a, 129085117527228416
+; CHECK-NEXT:    [[TMP2:%.*]] = call i64 @llvm.bswap.i64(i64 [[TMP1]])
+; CHECK-NEXT:    ret i64 [[TMP2]]
+;
+  %tmp1 = tail call i64 @llvm.bswap.i64(i64 %a)
+  %tmp2 = and i64 %tmp1, 1000000001
+  ret i64 %tmp2
+}
+
+define i64 @bs_and64(i64 %a, i64 %b) #0 {
+; CHECK-LABEL: @bs_and64(
+; CHECK-NEXT:    [[TMP1:%.*]] = and i64 %a, %b
+; CHECK-NEXT:    [[TMP2:%.*]] = call i64 @llvm.bswap.i64(i64 [[TMP1]])
+; CHECK-NEXT:    ret i64 [[TMP2]]
+;
+  %tmp1 = tail call i64 @llvm.bswap.i64(i64 %a)
+  %tmp2 = tail call i64 @llvm.bswap.i64(i64 %b)
+  %tmp3 = and i64 %tmp1, %tmp2
+  ret i64 %tmp3
+}
+
+define i64 @bs_or64(i64 %a, i64 %b) #0 {
+; CHECK-LABEL: @bs_or64(
+; CHECK-NEXT:    [[TMP1:%.*]] = or i64 %a, %b
+; CHECK-NEXT:    [[TMP2:%.*]] = call i64 @llvm.bswap.i64(i64 [[TMP1]])
+; CHECK-NEXT:    ret i64 [[TMP2]]
+;
+  %tmp1 = tail call i64 @llvm.bswap.i64(i64 %a)
+  %tmp2 = tail call i64 @llvm.bswap.i64(i64 %b)
+  %tmp3 = or i64 %tmp1, %tmp2
+  ret i64 %tmp3
+}
+
+define i64 @bs_xor64(i64 %a, i64 %b) #0 {
+; CHECK-LABEL: @bs_xor64(
+; CHECK-NEXT:    [[TMP1:%.*]] = xor i64 %a, %b
+; CHECK-NEXT:    [[TMP2:%.*]] = call i64 @llvm.bswap.i64(i64 [[TMP1]])
+; CHECK-NEXT:    ret i64 [[TMP2]]
+;
+  %tmp1 = tail call i64 @llvm.bswap.i64(i64 %a)
+  %tmp2 = tail call i64 @llvm.bswap.i64(i64 %b)
+  %tmp3 = xor i64 %tmp1, %tmp2
+  ret i64 %tmp3
+}
+
+define <2 x i32> @bs_and32vec(<2 x i32> %a, <2 x i32> %b) #0 {
+; CHECK-LABEL: @bs_and32vec(
+; CHECK-NEXT:    [[TMP1:%.*]] = and <2 x i32> [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    [[TMP2:%.*]] = call <2 x i32> @llvm.bswap.v2i32(<2 x i32> [[TMP1]])
+; CHECK-NEXT:    ret <2 x i32> [[TMP2]]
+;
+  %tmp1 = tail call <2 x i32> @llvm.bswap.v2i32(<2 x i32> %a)
+  %tmp2 = tail call <2 x i32> @llvm.bswap.v2i32(<2 x i32> %b)
+  %tmp3 = and <2 x i32> %tmp1, %tmp2
+  ret <2 x i32> %tmp3
+}
+
+define <2 x i32> @bs_or32vec(<2 x i32> %a, <2 x i32> %b) #0 {
+; CHECK-LABEL: @bs_or32vec(
+; CHECK-NEXT:    [[TMP1:%.*]] = or <2 x i32> [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    [[TMP2:%.*]] = call <2 x i32> @llvm.bswap.v2i32(<2 x i32> [[TMP1]])
+; CHECK-NEXT:    ret <2 x i32> [[TMP2]]
+;
+  %tmp1 = tail call <2 x i32> @llvm.bswap.v2i32(<2 x i32> %a)
+  %tmp2 = tail call <2 x i32> @llvm.bswap.v2i32(<2 x i32> %b)
+  %tmp3 = or <2 x i32> %tmp1, %tmp2
+  ret <2 x i32> %tmp3
+}
+
+define <2 x i32> @bs_xor32vec(<2 x i32> %a, <2 x i32> %b) #0 {
+; CHECK-LABEL: @bs_xor32vec(
+; CHECK-NEXT:    [[TMP1:%.*]] = xor <2 x i32> [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    [[TMP2:%.*]] = call <2 x i32> @llvm.bswap.v2i32(<2 x i32> [[TMP1]])
+; CHECK-NEXT:    ret <2 x i32> [[TMP2]]
+;
+  %tmp1 = tail call <2 x i32> @llvm.bswap.v2i32(<2 x i32> %a)
+  %tmp2 = tail call <2 x i32> @llvm.bswap.v2i32(<2 x i32> %b)
+  %tmp3 = xor <2 x i32> %tmp1, %tmp2
+  ret <2 x i32> %tmp3
+}
+
+define <2 x i32> @bs_and32ivec(<2 x i32> %a, <2 x i32> %b) #0 {
+; CHECK-LABEL: @bs_and32ivec(
+; CHECK-NEXT:    [[TMP1:%.*]] = and <2 x i32> [[A:%.*]], <i32 -1585053440, i32 -1585053440>
+; CHECK-NEXT:    [[TMP2:%.*]] = call <2 x i32> @llvm.bswap.v2i32(<2 x i32> [[TMP1]])
+; CHECK-NEXT:    ret <2 x i32> [[TMP2]]
+;
+  %tmp1 = tail call <2 x i32> @llvm.bswap.v2i32(<2 x i32> %a)
+  %tmp2 = and <2 x i32> %tmp1, <i32 100001, i32 100001>
+  ret <2 x i32> %tmp2
+}
+
+define <2 x i32> @bs_or32ivec(<2 x i32> %a, <2 x i32> %b) #0 {
+; CHECK-LABEL: @bs_or32ivec(
+; CHECK-NEXT:    [[TMP1:%.*]] = or <2 x i32> [[A:%.*]], <i32 -1585053440, i32 -1585053440>
+; CHECK-NEXT:    [[TMP2:%.*]] = call <2 x i32> @llvm.bswap.v2i32(<2 x i32> [[TMP1]])
+; CHECK-NEXT:    ret <2 x i32> [[TMP2]]
+;
+  %tmp1 = tail call <2 x i32> @llvm.bswap.v2i32(<2 x i32> %a)
+  %tmp2 = or <2 x i32> %tmp1, <i32 100001, i32 100001>
+  ret <2 x i32> %tmp2
+}
+
+define <2 x i32> @bs_xor32ivec(<2 x i32> %a, <2 x i32> %b) #0 {
+; CHECK-LABEL: @bs_xor32ivec(
+; CHECK-NEXT:    [[TMP1:%.*]] = xor <2 x i32> [[A:%.*]], <i32 -1585053440, i32 -1585053440>
+; CHECK-NEXT:    [[TMP2:%.*]] = call <2 x i32> @llvm.bswap.v2i32(<2 x i32> [[TMP1]])
+; CHECK-NEXT:    ret <2 x i32> [[TMP2]]
+;
+  %tmp1 = tail call <2 x i32> @llvm.bswap.v2i32(<2 x i32> %a)
+  %tmp2 = xor <2 x i32> %tmp1, <i32 100001, i32 100001>
+  ret <2 x i32> %tmp2
+}
+
+define i64 @bs_and64_multiuse1(i64 %a, i64 %b) #0 {
+; CHECK-LABEL: @bs_and64_multiuse1(
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.bswap.i64(i64 [[A:%.*]])
+; CHECK-NEXT:    [[TMP2:%.*]] = tail call i64 @llvm.bswap.i64(i64 [[B:%.*]])
+; CHECK-NEXT:    [[TMP3:%.*]] = and i64 [[TMP1]], [[TMP2]]
+; CHECK-NEXT:    [[TMP4:%.*]] = mul i64 [[TMP3]], [[TMP1]]
+; CHECK-NEXT:    [[TMP5:%.*]] = mul i64 [[TMP4]], [[TMP2]]
+; CHECK-NEXT:    ret i64 [[TMP5]]
+;
+  %tmp1 = tail call i64 @llvm.bswap.i64(i64 %a)
+  %tmp2 = tail call i64 @llvm.bswap.i64(i64 %b)
+  %tmp3 = and i64 %tmp1, %tmp2
+  %tmp4 = mul i64 %tmp3, %tmp1 ; to increase use count of the bswaps
+  %tmp5 = mul i64 %tmp4, %tmp2 ; to increase use count of the bswaps
+  ret i64 %tmp5
+}
+
+define i64 @bs_and64_multiuse2(i64 %a, i64 %b) #0 {
+; CHECK-LABEL: @bs_and64_multiuse2(
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.bswap.i64(i64 [[A:%.*]])
+; CHECK-NEXT:    [[TMP2:%.*]] = and i64 [[A]], [[B:%.*]]
+; CHECK-NEXT:    [[TMP3:%.*]] = call i64 @llvm.bswap.i64(i64 [[TMP2]])
+; CHECK-NEXT:    [[TMP4:%.*]] = mul i64 [[TMP3]], [[TMP1]]
+; CHECK-NEXT:    ret i64 [[TMP4]]
+;
+  %tmp1 = tail call i64 @llvm.bswap.i64(i64 %a)
+  %tmp2 = tail call i64 @llvm.bswap.i64(i64 %b)
+  %tmp3 = and i64 %tmp1, %tmp2
+  %tmp4 = mul i64 %tmp3, %tmp1 ; to increase use count of the bswaps
+  ret i64 %tmp4
+}
+
+define i64 @bs_and64_multiuse3(i64 %a, i64 %b) #0 {
+; CHECK-LABEL: @bs_and64_multiuse3(
+; CHECK-NEXT:    [[TMP2:%.*]] = tail call i64 @llvm.bswap.i64(i64 [[B:%.*]])
+; CHECK-NEXT:    [[TMP1:%.*]] = and i64 [[A:%.*]], [[B]]
+; CHECK-NEXT:    [[TMP3:%.*]] = call i64 @llvm.bswap.i64(i64 [[TMP1]])
+; CHECK-NEXT:    [[TMP4:%.*]] = mul i64 [[TMP3]], [[TMP2]]
+; CHECK-NEXT:    ret i64 [[TMP4]]
+;
+  %tmp1 = tail call i64 @llvm.bswap.i64(i64 %a)
+  %tmp2 = tail call i64 @llvm.bswap.i64(i64 %b)
+  %tmp3 = and i64 %tmp1, %tmp2
+  %tmp4 = mul i64 %tmp3, %tmp2 ; to increase use count of the bswaps
+  ret i64 %tmp4
+}
+
+define i64 @bs_and64i_multiuse(i64 %a, i64 %b) #0 {
+; CHECK-LABEL: @bs_and64i_multiuse(
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.bswap.i64(i64 [[A:%.*]])
+; CHECK-NEXT:    [[TMP2:%.*]] = and i64 [[TMP1]], 1000000001
+; CHECK-NEXT:    [[TMP3:%.*]] = mul i64 [[TMP2]], [[TMP1]]
+; CHECK-NEXT:    ret i64 [[TMP3]]
+;
+  %tmp1 = tail call i64 @llvm.bswap.i64(i64 %a)
+  %tmp2 = and i64 %tmp1, 1000000001
+  %tmp3 = mul i64 %tmp2, %tmp1 ; to increase use count of the bswap
+  ret i64 %tmp3
+}
+
+declare i16 @llvm.bswap.i16(i16)
+declare i32 @llvm.bswap.i32(i32)
+declare i64 @llvm.bswap.i64(i64)
+declare <2 x i32> @llvm.bswap.v2i32(<2 x i32>)

Added: llvm/trunk/test/Transforms/InstCombine/bswap-known-bits.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/InstCombine/bswap-known-bits.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/InstCombine/bswap-known-bits.ll (added)
+++ llvm/trunk/test/Transforms/InstCombine/bswap-known-bits.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,47 @@
+; RUN: opt < %s -S -instcombine | FileCheck %s
+; Note: This is testing functionality in computeKnownBits.  I'd have rather
+; used instsimplify, but the bit test folding is apparently only in instcombine.
+
+declare i16 @llvm.bswap.i16(i16)
+declare i32 @llvm.bswap.i32(i32)
+
+define i1 @test1(i16 %arg) {
+; CHECK-LABEL: @test1
+; CHECK: ret i1 true
+  %a = or i16 %arg, 511
+  %b = call i16 @llvm.bswap.i16(i16 %a)
+  %and = and i16 %b, 256
+  %res = icmp eq i16 %and, 256
+  ret i1 %res
+}
+
+define i1 @test2(i16 %arg) {
+; CHECK-LABEL: @test2
+; CHECK: ret i1 true
+  %a = or i16 %arg, 1
+  %b = call i16 @llvm.bswap.i16(i16 %a)
+  %and = and i16 %b, 256
+  %res = icmp eq i16 %and, 256
+  ret i1 %res
+}
+
+
+define i1 @test3(i16 %arg) {
+; CHECK-LABEL: @test3
+; CHECK: ret i1 true
+  %a = or i16 %arg, 256
+  %b = call i16 @llvm.bswap.i16(i16 %a)
+  %and = and i16 %b, 1
+  %res = icmp eq i16 %and, 1
+  ret i1 %res
+}
+
+define i1 @test4(i32 %arg) {
+; CHECK-LABEL: @test4
+; CHECK: ret i1 true
+  %a = or i32 %arg, 2147483647  ; i32_MAX
+  %b = call i32 @llvm.bswap.i32(i32 %a)
+  %and = and i32 %b, 127
+  %res = icmp eq i32 %and, 127
+  ret i1 %res
+}

Added: llvm/trunk/test/Transforms/InstCombine/bswap.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/InstCombine/bswap.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/InstCombine/bswap.ll (added)
+++ llvm/trunk/test/Transforms/InstCombine/bswap.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,232 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128-n8:16:32"
+
+define i32 @test1(i32 %i) {
+; CHECK-LABEL: @test1(
+; CHECK-NEXT:    [[TMP12:%.*]] = call i32 @llvm.bswap.i32(i32 %i)
+; CHECK-NEXT:    ret i32 [[TMP12]]
+;
+  %tmp1 = lshr i32 %i, 24
+  %tmp3 = lshr i32 %i, 8
+  %tmp4 = and i32 %tmp3, 65280
+  %tmp5 = or i32 %tmp1, %tmp4
+  %tmp7 = shl i32 %i, 8
+  %tmp8 = and i32 %tmp7, 16711680
+  %tmp9 = or i32 %tmp5, %tmp8
+  %tmp11 = shl i32 %i, 24
+  %tmp12 = or i32 %tmp9, %tmp11
+  ret i32 %tmp12
+}
+
+define i32 @test2(i32 %arg) {
+; CHECK-LABEL: @test2(
+; CHECK-NEXT:    [[TMP14:%.*]] = call i32 @llvm.bswap.i32(i32 %arg)
+; CHECK-NEXT:    ret i32 [[TMP14]]
+;
+  %tmp2 = shl i32 %arg, 24
+  %tmp4 = shl i32 %arg, 8
+  %tmp5 = and i32 %tmp4, 16711680
+  %tmp6 = or i32 %tmp2, %tmp5
+  %tmp8 = lshr i32 %arg, 8
+  %tmp9 = and i32 %tmp8, 65280
+  %tmp10 = or i32 %tmp6, %tmp9
+  %tmp12 = lshr i32 %arg, 24
+  %tmp14 = or i32 %tmp10, %tmp12
+  ret i32 %tmp14
+}
+
+define i16 @test3(i16 %s) {
+; CHECK-LABEL: @test3(
+; CHECK-NEXT:    [[TMP5:%.*]] = call i16 @llvm.bswap.i16(i16 %s)
+; CHECK-NEXT:    ret i16 [[TMP5]]
+;
+  %tmp2 = lshr i16 %s, 8
+  %tmp4 = shl i16 %s, 8
+  %tmp5 = or i16 %tmp2, %tmp4
+  ret i16 %tmp5
+}
+
+define i16 @test4(i16 %s) {
+; CHECK-LABEL: @test4(
+; CHECK-NEXT:    [[TMP5:%.*]] = call i16 @llvm.bswap.i16(i16 %s)
+; CHECK-NEXT:    ret i16 [[TMP5]]
+;
+  %tmp2 = lshr i16 %s, 8
+  %tmp4 = shl i16 %s, 8
+  %tmp5 = or i16 %tmp4, %tmp2
+  ret i16 %tmp5
+}
+
+define i16 @test5(i16 %a) {
+; CHECK-LABEL: @test5(
+; CHECK-NEXT:    [[TMP_UPGRD_3:%.*]] = call i16 @llvm.bswap.i16(i16 %a)
+; CHECK-NEXT:    ret i16 [[TMP_UPGRD_3]]
+;
+  %tmp = zext i16 %a to i32
+  %tmp1 = and i32 %tmp, 65280
+  %tmp2 = ashr i32 %tmp1, 8
+  %tmp2.upgrd.1 = trunc i32 %tmp2 to i16
+  %tmp4 = and i32 %tmp, 255
+  %tmp5 = shl i32 %tmp4, 8
+  %tmp5.upgrd.2 = trunc i32 %tmp5 to i16
+  %tmp.upgrd.3 = or i16 %tmp2.upgrd.1, %tmp5.upgrd.2
+  %tmp6 = bitcast i16 %tmp.upgrd.3 to i16
+  %tmp6.upgrd.4 = zext i16 %tmp6 to i32
+  %retval = trunc i32 %tmp6.upgrd.4 to i16
+  ret i16 %retval
+}
+
+; PR2842
+define i32 @test6(i32 %x) nounwind readnone {
+; CHECK-LABEL: @test6(
+; CHECK-NEXT:    [[TMP7:%.*]] = call i32 @llvm.bswap.i32(i32 %x)
+; CHECK-NEXT:    ret i32 [[TMP7]]
+;
+  %tmp = shl i32 %x, 16
+  %x.mask = and i32 %x, 65280
+  %tmp1 = lshr i32 %x, 16
+  %tmp2 = and i32 %tmp1, 255
+  %tmp3 = or i32 %x.mask, %tmp
+  %tmp4 = or i32 %tmp3, %tmp2
+  %tmp5 = shl i32 %tmp4, 8
+  %tmp6 = lshr i32 %x, 24
+  %tmp7 = or i32 %tmp5, %tmp6
+  ret i32 %tmp7
+}
+
+declare void @extra_use(i32)
+
+; swaphalf = (x << 16 | x >> 16)
+; ((swaphalf & 0x00ff00ff) << 8) | ((swaphalf >> 8) & 0x00ff00ff)
+
+define i32 @bswap32_and_first(i32 %x) {
+; CHECK-LABEL: @bswap32_and_first(
+; CHECK-NEXT:    [[BSWAP:%.*]] = call i32 @llvm.bswap.i32(i32 %x)
+; CHECK-NEXT:    ret i32 [[BSWAP]]
+;
+  %shl = shl i32 %x, 16
+  %shr = lshr i32 %x, 16
+  %swaphalf = or i32 %shl, %shr
+  %t = and i32 %swaphalf, 16711935
+  %tshl = shl nuw i32 %t, 8
+  %b = lshr i32 %swaphalf, 8
+  %band = and i32 %b, 16711935
+  %bswap = or i32 %tshl, %band
+  ret i32 %bswap
+}
+
+; Extra use should not prevent matching to bswap.
+; swaphalf = (x << 16 | x >> 16)
+; ((swaphalf & 0x00ff00ff) << 8) | ((swaphalf >> 8) & 0x00ff00ff)
+
+define i32 @bswap32_and_first_extra_use(i32 %x) {
+; CHECK-LABEL: @bswap32_and_first_extra_use(
+; CHECK-NEXT:    [[SHL:%.*]] = shl i32 %x, 16
+; CHECK-NEXT:    [[SHR:%.*]] = lshr i32 %x, 16
+; CHECK-NEXT:    [[SWAPHALF:%.*]] = or i32 [[SHL]], [[SHR]]
+; CHECK-NEXT:    [[T:%.*]] = and i32 [[SWAPHALF]], 16711935
+; CHECK-NEXT:    [[BSWAP:%.*]] = call i32 @llvm.bswap.i32(i32 %x)
+; CHECK-NEXT:    call void @extra_use(i32 [[T]])
+; CHECK-NEXT:    ret i32 [[BSWAP]]
+;
+  %shl = shl i32 %x, 16
+  %shr = lshr i32 %x, 16
+  %swaphalf = or i32 %shl, %shr
+  %t = and i32 %swaphalf, 16711935
+  %tshl = shl nuw i32 %t, 8
+  %b = lshr i32 %swaphalf, 8
+  %band = and i32 %b, 16711935
+  %bswap = or i32 %tshl, %band
+  call void @extra_use(i32 %t)
+  ret i32 %bswap
+}
+
+; swaphalf = (x << 16 | x >> 16)
+; ((swaphalf << 8) & 0xff00ff00) | ((swaphalf >> 8) & 0x00ff00ff)
+
+; PR23863
+define i32 @bswap32_shl_first(i32 %x) {
+; CHECK-LABEL: @bswap32_shl_first(
+; CHECK-NEXT:    [[BSWAP:%.*]] = call i32 @llvm.bswap.i32(i32 %x)
+; CHECK-NEXT:    ret i32 [[BSWAP]]
+;
+  %shl = shl i32 %x, 16
+  %shr = lshr i32 %x, 16
+  %swaphalf = or i32 %shl, %shr
+  %t = shl i32 %swaphalf, 8
+  %tand = and i32 %t, -16711936
+  %b = lshr i32 %swaphalf, 8
+  %band = and i32 %b, 16711935
+  %bswap = or i32 %tand, %band
+  ret i32 %bswap
+}
+
+; Extra use should not prevent matching to bswap.
+; swaphalf = (x << 16 | x >> 16)
+; ((swaphalf << 8) & 0xff00ff00) | ((swaphalf >> 8) & 0x00ff00ff)
+
+define i32 @bswap32_shl_first_extra_use(i32 %x) {
+; CHECK-LABEL: @bswap32_shl_first_extra_use(
+; CHECK-NEXT:    [[SHL:%.*]] = shl i32 %x, 16
+; CHECK-NEXT:    [[SHR:%.*]] = lshr i32 %x, 16
+; CHECK-NEXT:    [[SWAPHALF:%.*]] = or i32 [[SHL]], [[SHR]]
+; CHECK-NEXT:    [[T:%.*]] = shl i32 [[SWAPHALF]], 8
+; CHECK-NEXT:    [[BSWAP:%.*]] = call i32 @llvm.bswap.i32(i32 %x)
+; CHECK-NEXT:    call void @extra_use(i32 [[T]])
+; CHECK-NEXT:    ret i32 [[BSWAP]]
+;
+  %shl = shl i32 %x, 16
+  %shr = lshr i32 %x, 16
+  %swaphalf = or i32 %shl, %shr
+  %t = shl i32 %swaphalf, 8
+  %tand = and i32 %t, -16711936
+  %b = lshr i32 %swaphalf, 8
+  %band = and i32 %b, 16711935
+  %bswap = or i32 %tand, %band
+  call void @extra_use(i32 %t)
+  ret i32 %bswap
+}
+
+define i16 @test8(i16 %a) {
+; CHECK-LABEL: @test8(
+; CHECK-NEXT:    [[REV:%.*]] = call i16 @llvm.bswap.i16(i16 %a)
+; CHECK-NEXT:    ret i16 [[REV]]
+;
+  %conv = zext i16 %a to i32
+  %shr = lshr i16 %a, 8
+  %shl = shl i32 %conv, 8
+  %conv1 = zext i16 %shr to i32
+  %or = or i32 %conv1, %shl
+  %conv2 = trunc i32 %or to i16
+  ret i16 %conv2
+}
+
+define i16 @test9(i16 %a) {
+; CHECK-LABEL: @test9(
+; CHECK-NEXT:    [[REV:%.*]] = call i16 @llvm.bswap.i16(i16 %a)
+; CHECK-NEXT:    ret i16 [[REV]]
+;
+  %conv = zext i16 %a to i32
+  %shr = lshr i32 %conv, 8
+  %shl = shl i32 %conv, 8
+  %or = or i32 %shr, %shl
+  %conv2 = trunc i32 %or to i16
+  ret i16 %conv2
+}
+
+define i16 @test10(i32 %a) {
+; CHECK-LABEL: @test10(
+; CHECK-NEXT:    [[TRUNC:%.*]] = trunc i32 %a to i16
+; CHECK-NEXT:    [[REV:%.*]] = call i16 @llvm.bswap.i16(i16 [[TRUNC]])
+; CHECK-NEXT:    ret i16 [[REV]]
+;
+  %shr1 = lshr i32 %a, 8
+  %and1 = and i32 %shr1, 255
+  %and2 = shl i32 %a, 8
+  %shl1 = and i32 %and2, 65280
+  %or = or i32 %and1, %shl1
+  %conv = trunc i32 %or to i16
+  ret i16 %conv
+}
+

Added: llvm/trunk/test/Transforms/InstCombine/builtin-dynamic-object-size.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/InstCombine/builtin-dynamic-object-size.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/InstCombine/builtin-dynamic-object-size.ll (added)
+++ llvm/trunk/test/Transforms/InstCombine/builtin-dynamic-object-size.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,117 @@
+; RUN: opt -instcombine -S < %s | FileCheck %s --dump-input-on-failure
+
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.14.0"
+
+; Function Attrs: nounwind ssp uwtable
+define i64 @weird_identity_but_ok(i64 %sz) {
+entry:
+  %call = tail call i8* @malloc(i64 %sz)
+  %calc_size = tail call i64 @llvm.objectsize.i64.p0i8(i8* %call, i1 false, i1 true, i1 true)
+  tail call void @free(i8* %call)
+  ret i64 %calc_size
+}
+
+; CHECK:      define i64 @weird_identity_but_ok(i64 %sz)
+; CHECK-NEXT: entry:
+; CHECK-NEXT:   ret i64 %sz
+; CHECK-NEXT: }
+
+define i64 @phis_are_neat(i1 %which) {
+entry:
+  br i1 %which, label %first_label, label %second_label
+
+first_label:
+  %first_call = call i8* @malloc(i64 10)
+  br label %join_label
+
+second_label:
+  %second_call = call i8* @malloc(i64 30)
+  br label %join_label
+
+join_label:
+  %joined = phi i8* [ %first_call, %first_label ], [ %second_call, %second_label ]
+  %calc_size = tail call i64 @llvm.objectsize.i64.p0i8(i8* %joined, i1 false, i1 true, i1 true)
+  ret i64 %calc_size
+}
+
+; CHECK:      %0 = phi i64 [ 10, %first_label ], [ 30, %second_label ]
+; CHECK-NEXT: ret i64 %0
+
+define i64 @internal_pointer(i64 %sz) {
+entry:
+  %ptr = call i8* @malloc(i64 %sz)
+  %ptr2 = getelementptr inbounds i8, i8* %ptr, i32 2
+  %calc_size = call i64 @llvm.objectsize.i64.p0i8(i8* %ptr2, i1 false, i1 true, i1 true)
+  ret i64 %calc_size
+}
+
+; CHECK:      define i64 @internal_pointer(i64 %sz)
+; CHECK-NEXT: entry:
+; CHECK-NEXT:   %0 = add i64 %sz, -2
+; CHECK-NEXT:   %1 = icmp ult i64 %sz, 2
+; CHECK-NEXT:   %2 = select i1 %1, i64 0, i64 %0
+; CHECK-NEXT:   ret i64 %2
+; CHECK-NEXT: }
+
+define i64 @uses_nullptr_no_fold() {
+entry:
+  %res = call i64 @llvm.objectsize.i64.p0i8(i8* null, i1 false, i1 true, i1 true)
+  ret i64 %res
+}
+
+; CHECK: %res = call i64 @llvm.objectsize.i64.p0i8(i8* null, i1 false, i1 true, i1 true)
+
+define i64 @uses_nullptr_fold() {
+entry:
+  ; NOTE: the third parameter to this call is false, unlike above.
+  %res = call i64 @llvm.objectsize.i64.p0i8(i8* null, i1 false, i1 false, i1 true)
+  ret i64 %res
+}
+
+; CHECK: ret i64 0
+
+ at d = common global i8 0, align 1
+ at c = common global i32 0, align 4
+
+; Function Attrs: nounwind
+define void @f() {
+entry:
+  %.pr = load i32, i32* @c, align 4
+  %tobool4 = icmp eq i32 %.pr, 0
+  br i1 %tobool4, label %for.end, label %for.body
+
+for.body:                                         ; preds = %entry, %for.body
+  %dp.05 = phi i8* [ %add.ptr, %for.body ], [ @d, %entry ]
+  %0 = tail call i64 @llvm.objectsize.i64.p0i8(i8* %dp.05, i1 false, i1 true, i1 true)
+  %conv = trunc i64 %0 to i32
+  tail call void @bury(i32 %conv) #3
+  %1 = load i32, i32* @c, align 4
+  %idx.ext = sext i32 %1 to i64
+  %add.ptr.offs = add i64 %idx.ext, 0
+  %2 = add i64 undef, %add.ptr.offs
+  %add.ptr = getelementptr inbounds i8, i8* %dp.05, i64 %idx.ext
+  %add = shl nsw i32 %1, 1
+  store i32 %add, i32* @c, align 4
+  %tobool = icmp eq i32 %1, 0
+  br i1 %tobool, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+; CHECK:   define void @f()
+; CHECK:     call i64 @llvm.objectsize.i64.p0i8(
+
+declare void @bury(i32) local_unnamed_addr #2
+
+; Function Attrs: nounwind allocsize(0)
+declare i8* @malloc(i64)
+
+declare i8* @get_unknown_buffer()
+
+; Function Attrs: nounwind
+declare void @free(i8* nocapture)
+
+; Function Attrs: nounwind readnone speculatable
+declare i64 @llvm.objectsize.i64.p0i8(i8*, i1, i1, i1)

Added: llvm/trunk/test/Transforms/InstCombine/builtin-object-size-offset.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/InstCombine/builtin-object-size-offset.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/InstCombine/builtin-object-size-offset.ll (added)
+++ llvm/trunk/test/Transforms/InstCombine/builtin-object-size-offset.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,58 @@
+; RUN: opt -instcombine -S < %s | FileCheck %s
+
+; #include <stdlib.h>
+; #include <stdio.h>
+;
+; int foo1(int N) {
+;   char Big[20];
+;   char Small[10];
+;   char *Ptr = N ? Big + 10 : Small;
+;   return __builtin_object_size(Ptr, 0);
+; }
+;
+; void foo() {
+;   size_t ret;
+;   ret = foo1(0);
+;   printf("\n %d", ret);
+; }
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+ at .str = private unnamed_addr constant [5 x i8] c"\0A %d\00", align 1
+
+define i32 @foo1(i32 %N) {
+entry:
+  %Big = alloca [20 x i8], align 16
+  %Small = alloca [10 x i8], align 1
+  %0 = getelementptr inbounds [20 x i8], [20 x i8]* %Big, i64 0, i64 0
+  call void @llvm.lifetime.start.p0i8(i64 20, i8* %0)
+  %1 = getelementptr inbounds [10 x i8], [10 x i8]* %Small, i64 0, i64 0
+  call void @llvm.lifetime.start.p0i8(i64 10, i8* %1)
+  %tobool = icmp ne i32 %N, 0
+  %add.ptr = getelementptr inbounds [20 x i8], [20 x i8]* %Big, i64 0, i64 10
+  %cond = select i1 %tobool, i8* %add.ptr, i8* %1
+  %2 = call i64 @llvm.objectsize.i64.p0i8(i8* %cond, i1 false)
+  %conv = trunc i64 %2 to i32
+  call void @llvm.lifetime.end.p0i8(i64 10, i8* %1)
+  call void @llvm.lifetime.end.p0i8(i64 20, i8* %0)
+  ret i32 %conv
+; CHECK: ret i32 10 
+}
+
+declare void @llvm.lifetime.start.p0i8(i64, i8* nocapture)
+
+declare i64 @llvm.objectsize.i64.p0i8(i8*, i1)
+
+declare void @llvm.lifetime.end.p0i8(i64, i8* nocapture)
+
+define void @foo() {
+entry:
+  %call = tail call i32 @foo1(i32 0)
+  %conv = sext i32 %call to i64
+  %call1 = tail call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([5 x i8], [5 x i8]* @.str, i64 0, i64 0), i64 %conv)
+  ret void
+}
+
+declare i32 @printf(i8* nocapture readonly, ...)
+

Added: llvm/trunk/test/Transforms/InstCombine/builtin-object-size-ptr.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/InstCombine/builtin-object-size-ptr.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/InstCombine/builtin-object-size-ptr.ll (added)
+++ llvm/trunk/test/Transforms/InstCombine/builtin-object-size-ptr.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,34 @@
+; RUN: opt -instcombine -S < %s | FileCheck %s
+
+; int foo() {
+; struct V { char buf1[10];
+;            int b;
+;            char buf2[10];
+;           } var;
+;
+;           char *p = &var.buf1[1];
+;           return __builtin_object_size (p, 0);
+; }
+
+%struct.V = type { [10 x i8], i32, [10 x i8] }
+
+define i32 @foo() #0 {
+entry:
+  %var = alloca %struct.V, align 4
+  %0 = bitcast %struct.V* %var to i8*
+  call void @llvm.lifetime.start.p0i8(i64 28, i8* %0) #3
+  %buf1 = getelementptr inbounds %struct.V, %struct.V* %var, i32 0, i32 0
+  %arrayidx = getelementptr inbounds [10 x i8], [10 x i8]* %buf1, i64 0, i64 1
+  %1 = call i64 @llvm.objectsize.i64.p0i8(i8* %arrayidx, i1 false)
+  %conv = trunc i64 %1 to i32
+  call void @llvm.lifetime.end.p0i8(i64 28, i8* %0) #3
+  ret i32 %conv
+; CHECK: ret i32 27
+; CHECK-NOT: ret i32 -1
+}
+
+declare void @llvm.lifetime.start.p0i8(i64, i8* nocapture) #1
+
+declare i64 @llvm.objectsize.i64.p0i8(i8*, i1) #2
+
+declare void @llvm.lifetime.end.p0i8(i64, i8* nocapture) #1

Added: llvm/trunk/test/Transforms/InstCombine/cabs-array.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/InstCombine/cabs-array.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/InstCombine/cabs-array.ll (added)
+++ llvm/trunk/test/Transforms/InstCombine/cabs-array.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,65 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+define double @std_cabs([2 x double] %z) {
+; CHECK-LABEL: define double @std_cabs(
+; CHECK: tail call double @cabs(
+  %call = tail call double @cabs([2 x double] %z)
+  ret double %call
+}
+
+define float @std_cabsf([2 x float] %z) {
+; CHECK-LABEL: define float @std_cabsf(
+; CHECK: tail call float @cabsf(
+  %call = tail call float @cabsf([2 x float] %z)
+  ret float %call
+}
+
+define fp128 @std_cabsl([2 x fp128] %z) {
+; CHECK-LABEL: define fp128 @std_cabsl(
+; CHECK: tail call fp128 @cabsl(
+  %call = tail call fp128 @cabsl([2 x fp128] %z)
+  ret fp128 %call
+}
+
+define double @fast_cabs([2 x double] %z) {
+; CHECK-LABEL: define double @fast_cabs(
+; CHECK: %real = extractvalue [2 x double] %z, 0
+; CHECK: %imag = extractvalue [2 x double] %z, 1
+; CHECK: %1 = fmul fast double %real, %real
+; CHECK: %2 = fmul fast double %imag, %imag
+; CHECK: %3 = fadd fast double %1, %2
+; CHECK: %cabs = call fast double @llvm.sqrt.f64(double %3)
+; CHECK: ret double %cabs
+  %call = tail call fast double @cabs([2 x double] %z)
+  ret double %call
+}
+
+define float @fast_cabsf([2 x float] %z) {
+; CHECK-LABEL: define float @fast_cabsf(
+; CHECK: %real = extractvalue [2 x float] %z, 0
+; CHECK: %imag = extractvalue [2 x float] %z, 1
+; CHECK: %1 = fmul fast float %real, %real
+; CHECK: %2 = fmul fast float %imag, %imag
+; CHECK: %3 = fadd fast float %1, %2
+; CHECK: %cabs = call fast float @llvm.sqrt.f32(float %3)
+; CHECK: ret float %cabs
+  %call = tail call fast float @cabsf([2 x float] %z)
+  ret float %call
+}
+
+define fp128 @fast_cabsl([2 x fp128] %z) {
+; CHECK-LABEL: define fp128 @fast_cabsl(
+; CHECK: %real = extractvalue [2 x fp128] %z, 0
+; CHECK: %imag = extractvalue [2 x fp128] %z, 1
+; CHECK: %1 = fmul fast fp128 %real, %real
+; CHECK: %2 = fmul fast fp128 %imag, %imag
+; CHECK: %3 = fadd fast fp128 %1, %2
+; CHECK: %cabs = call fast fp128 @llvm.sqrt.f128(fp128 %3)
+; CHECK: ret fp128 %cabs
+  %call = tail call fast fp128 @cabsl([2 x fp128] %z)
+  ret fp128 %call
+}
+
+declare double @cabs([2 x double])
+declare float @cabsf([2 x float])
+declare fp128 @cabsl([2 x fp128])

Added: llvm/trunk/test/Transforms/InstCombine/cabs-discrete.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/InstCombine/cabs-discrete.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/InstCombine/cabs-discrete.ll (added)
+++ llvm/trunk/test/Transforms/InstCombine/cabs-discrete.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,59 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+define double @std_cabs(double %real, double %imag) {
+; CHECK-LABEL: define double @std_cabs(
+; CHECK: tail call double @cabs(
+  %call = tail call double @cabs(double %real, double %imag)
+  ret double %call
+}
+
+define float @std_cabsf(float %real, float %imag) {
+; CHECK-LABEL: define float @std_cabsf(
+; CHECK: tail call float @cabsf(
+  %call = tail call float @cabsf(float %real, float %imag)
+  ret float %call
+}
+
+define fp128 @std_cabsl(fp128 %real, fp128 %imag) {
+; CHECK-LABEL: define fp128 @std_cabsl(
+; CHECK: tail call fp128 @cabsl(
+  %call = tail call fp128 @cabsl(fp128 %real, fp128 %imag)
+  ret fp128 %call
+}
+
+define double @fast_cabs(double %real, double %imag) {
+; CHECK-LABEL: define double @fast_cabs(
+; CHECK: %1 = fmul fast double %real, %real
+; CHECK: %2 = fmul fast double %imag, %imag
+; CHECK: %3 = fadd fast double %1, %2
+; CHECK: %cabs = call fast double @llvm.sqrt.f64(double %3)
+; CHECK: ret double %cabs
+  %call = tail call fast double @cabs(double %real, double %imag)
+  ret double %call
+}
+
+define float @fast_cabsf(float %real, float %imag) {
+; CHECK-LABEL: define float @fast_cabsf(
+; CHECK: %1 = fmul fast float %real, %real
+; CHECK: %2 = fmul fast float %imag, %imag
+; CHECK: %3 = fadd fast float %1, %2
+; CHECK: %cabs = call fast float @llvm.sqrt.f32(float %3)
+; CHECK: ret float %cabs
+  %call = tail call fast float @cabsf(float %real, float %imag)
+  ret float %call
+}
+
+define fp128 @fast_cabsl(fp128 %real, fp128 %imag) {
+; CHECK-LABEL: define fp128 @fast_cabsl(
+; CHECK: %1 = fmul fast fp128 %real, %real
+; CHECK: %2 = fmul fast fp128 %imag, %imag
+; CHECK: %3 = fadd fast fp128 %1, %2
+; CHECK: %cabs = call fast fp128 @llvm.sqrt.f128(fp128 %3)
+; CHECK: ret fp128 %cabs
+  %call = tail call fast fp128 @cabsl(fp128 %real, fp128 %imag)
+  ret fp128 %call
+}
+
+declare double @cabs(double %real, double %imag)
+declare float @cabsf(float %real, float %imag)
+declare fp128 @cabsl(fp128 %real, fp128 %imag)

Added: llvm/trunk/test/Transforms/InstCombine/call-callconv.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/InstCombine/call-callconv.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/InstCombine/call-callconv.ll (added)
+++ llvm/trunk/test/Transforms/InstCombine/call-callconv.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,53 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -instcombine -S | FileCheck %s
+; Verify that the non-default calling conv doesn't prevent the libcall simplification
+
+ at .str = private unnamed_addr constant [4 x i8] c"abc\00", align 1
+
+define arm_aapcscc i32 @_abs(i32 %i) nounwind readnone {
+; CHECK-LABEL: @_abs(
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp slt i32 [[I:%.*]], 0
+; CHECK-NEXT:    [[NEG:%.*]] = sub nsw i32 0, [[I]]
+; CHECK-NEXT:    [[TMP2:%.*]] = select i1 [[TMP1]], i32 [[NEG]], i32 [[I]]
+; CHECK-NEXT:    ret i32 [[TMP2]]
+;
+  %call = tail call arm_aapcscc i32 @abs(i32 %i) nounwind readnone
+  ret i32 %call
+}
+
+declare arm_aapcscc i32 @abs(i32) nounwind readnone
+
+define arm_aapcscc i32 @_labs(i32 %i) nounwind readnone {
+; CHECK-LABEL: @_labs(
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp slt i32 [[I:%.*]], 0
+; CHECK-NEXT:    [[NEG:%.*]] = sub nsw i32 0, [[I]]
+; CHECK-NEXT:    [[TMP2:%.*]] = select i1 [[TMP1]], i32 [[NEG]], i32 [[I]]
+; CHECK-NEXT:    ret i32 [[TMP2]]
+;
+  %call = tail call arm_aapcscc i32 @labs(i32 %i) nounwind readnone
+  ret i32 %call
+}
+
+declare arm_aapcscc i32 @labs(i32) nounwind readnone
+
+define arm_aapcscc i32 @_strlen1() {
+; CHECK-LABEL: @_strlen1(
+; CHECK-NEXT:    ret i32 3
+;
+  %call = tail call arm_aapcscc i32 @strlen(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i32 0, i32 0))
+  ret i32 %call
+}
+
+declare arm_aapcscc i32 @strlen(i8*)
+
+define arm_aapcscc zeroext i1 @_strlen2(i8* %str) {
+; CHECK-LABEL: @_strlen2(
+; CHECK-NEXT:    [[STRLENFIRST:%.*]] = load i8, i8* [[STR:%.*]], align 1
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ne i8 [[STRLENFIRST]], 0
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %call = tail call arm_aapcscc i32 @strlen(i8* %str)
+  %cmp = icmp ne i32 %call, 0
+  ret i1 %cmp
+}
+

Added: llvm/trunk/test/Transforms/InstCombine/call-cast-attrs.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/InstCombine/call-cast-attrs.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/InstCombine/call-cast-attrs.ll (added)
+++ llvm/trunk/test/Transforms/InstCombine/call-cast-attrs.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,29 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+define signext i32 @b(i32* inreg %x)   {
+  ret i32 0
+}
+
+define void @c(...) {
+  ret void
+}
+
+declare void @useit(i32)
+
+define void @d(i32 %x, ...) {
+  call void @useit(i32 %x)
+  ret void
+}
+
+define void @g(i32* %y) {
+  call i32 bitcast (i32 (i32*)* @b to i32 (i32)*)(i32 zeroext 0)
+  call void bitcast (void (...)* @c to void (i32*)*)(i32* %y)
+  call void bitcast (void (...)* @c to void (i32*)*)(i32* sret %y)
+  call void bitcast (void (i32, ...)* @d to void (i32, i32*)*)(i32 0, i32* sret %y)
+  ret void
+}
+; CHECK-LABEL: define void @g(i32* %y)
+; CHECK: call i32 bitcast (i32 (i32*)* @b to i32 (i32)*)(i32 zeroext 0)
+; CHECK: call void (...) @c(i32* %y)
+; CHECK: call void bitcast (void (...)* @c to void (i32*)*)(i32* sret %y)
+; CHECK: call void bitcast (void (i32, ...)* @d to void (i32, i32*)*)(i32 0, i32* sret %y)

Added: llvm/trunk/test/Transforms/InstCombine/call-cast-target-inalloca.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/InstCombine/call-cast-target-inalloca.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/InstCombine/call-cast-target-inalloca.ll (added)
+++ llvm/trunk/test/Transforms/InstCombine/call-cast-target-inalloca.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,22 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+target datalayout = "e-p:32:32"
+target triple = "i686-pc-linux-gnu"
+
+declare void @takes_i32(i32)
+declare void @takes_i32_inalloca(i32* inalloca)
+
+define void @f() {
+; CHECK-LABEL: define void @f()
+  %args = alloca inalloca i32
+  call void bitcast (void (i32)* @takes_i32 to void (i32*)*)(i32* inalloca %args)
+; CHECK: call void bitcast
+  ret void
+}
+
+define void @g() {
+; CHECK-LABEL: define void @g()
+  call void bitcast (void (i32*)* @takes_i32_inalloca to void (i32)*)(i32 0)
+; CHECK: call void bitcast
+  ret void
+}

Added: llvm/trunk/test/Transforms/InstCombine/call-cast-target.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/InstCombine/call-cast-target.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/InstCombine/call-cast-target.ll (added)
+++ llvm/trunk/test/Transforms/InstCombine/call-cast-target.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,89 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+target datalayout = "e-p:32:32"
+target triple = "i686-pc-linux-gnu"
+
+define i32 @main() {
+; CHECK-LABEL: @main
+; CHECK: %[[call:.*]] = call i8* @ctime(i32* null)
+; CHECK: %[[cast:.*]] = ptrtoint i8* %[[call]] to i32
+; CHECK: ret i32 %[[cast]]
+entry:
+  %tmp = call i32 bitcast (i8* (i32*)* @ctime to i32 (i32*)*)( i32* null )          ; <i32> [#uses=1]
+  ret i32 %tmp
+}
+
+declare i8* @ctime(i32*)
+
+define internal { i8 } @foo(i32*) {
+entry:
+  ret { i8 } { i8 0 }
+}
+
+define void @test_struct_ret() {
+; CHECK-LABEL: @test_struct_ret
+; CHECK-NOT: bitcast
+entry:
+  %0 = call { i8 } bitcast ({ i8 } (i32*)* @foo to { i8 } (i16*)*)(i16* null)
+  ret void
+}
+
+declare i32 @fn1(i32)
+
+define i32 @test1(i32* %a) {
+; CHECK-LABEL: @test1
+; CHECK:      %[[cast:.*]] = ptrtoint i32* %a to i32
+; CHECK-NEXT: %[[call:.*]] = tail call i32 @fn1(i32 %[[cast]])
+; CHECK-NEXT: ret i32 %[[call]]
+entry:
+  %call = tail call i32 bitcast (i32 (i32)* @fn1 to i32 (i32*)*)(i32* %a)
+  ret i32 %call
+}
+
+declare i32 @fn2(i16)
+
+define i32 @test2(i32* %a) {
+; CHECK-LABEL: @test2
+; CHECK:      %[[call:.*]] = tail call i32 bitcast (i32 (i16)* @fn2 to i32 (i32*)*)(i32* %a)
+; CHECK-NEXT: ret i32 %[[call]]
+entry:
+  %call = tail call i32 bitcast (i32 (i16)* @fn2 to i32 (i32*)*)(i32* %a)
+  ret i32 %call
+}
+
+declare i32 @fn3(i64)
+
+define i32 @test3(i32* %a) {
+; CHECK-LABEL: @test3
+; CHECK:      %[[call:.*]] = tail call i32 bitcast (i32 (i64)* @fn3 to i32 (i32*)*)(i32* %a)
+; CHECK-NEXT: ret i32 %[[call]]
+entry:
+  %call = tail call i32 bitcast (i32 (i64)* @fn3 to i32 (i32*)*)(i32* %a)
+  ret i32 %call
+}
+
+declare i32 @fn4(i32) "thunk"
+
+define i32 @test4(i32* %a) {
+; CHECK-LABEL: @test4
+; CHECK:      %[[call:.*]] = tail call i32 bitcast (i32 (i32)* @fn4 to i32 (i32*)*)(i32* %a)
+; CHECK-NEXT: ret i32 %[[call]]
+entry:
+  %call = tail call i32 bitcast (i32 (i32)* @fn4 to i32 (i32*)*)(i32* %a)
+  ret i32 %call
+}
+
+declare i1 @fn5({ i32, i32 }* byval align 4 %r)
+
+define i1 @test5() {
+; CHECK-LABEL: @test5
+; CHECK:      %[[call:.*]] = call i1 bitcast (i1 ({ i32, i32 }*)* @fn5 to i1 (i32, i32)*)(i32 {{.*}}, i32 {{.*}})
+; CHECK-NEXT: ret i1 %[[call]]
+  %1 = alloca { i32, i32 }, align 4
+  %2 = getelementptr inbounds { i32, i32 }, { i32, i32 }* %1, i32 0, i32 0
+  %3 = load i32, i32* %2, align 4
+  %4 = getelementptr inbounds { i32, i32 }, { i32, i32 }* %1, i32 0, i32 1
+  %5 = load i32, i32* %4, align 4
+  %6 = call i1 bitcast (i1 ({ i32, i32 }*)* @fn5 to i1 (i32, i32)*)(i32 %3, i32 %5)
+  ret i1 %6
+}

Added: llvm/trunk/test/Transforms/InstCombine/call-guard.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/InstCombine/call-guard.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/InstCombine/call-guard.ll (added)
+++ llvm/trunk/test/Transforms/InstCombine/call-guard.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,110 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+declare void @llvm.experimental.guard(i1, ...)
+
+define void @test_guard_adjacent_same_cond(i1 %A) {
+; CHECK-LABEL: @test_guard_adjacent_same_cond(
+; CHECK-NEXT:    call void (i1, ...) @llvm.experimental.guard(i1 %A) [ "deopt"() ]
+; CHECK-NEXT:    ret void
+  call void(i1, ...) @llvm.experimental.guard( i1 %A )[ "deopt"() ]
+  call void(i1, ...) @llvm.experimental.guard( i1 %A )[ "deopt"() ]
+  call void(i1, ...) @llvm.experimental.guard( i1 %A )[ "deopt"() ]
+  call void(i1, ...) @llvm.experimental.guard( i1 %A )[ "deopt"() ]
+  call void(i1, ...) @llvm.experimental.guard( i1 %A )[ "deopt"() ]
+  call void(i1, ...) @llvm.experimental.guard( i1 %A )[ "deopt"() ]
+  call void(i1, ...) @llvm.experimental.guard( i1 %A )[ "deopt"() ]
+  call void(i1, ...) @llvm.experimental.guard( i1 %A )[ "deopt"() ]
+  call void(i1, ...) @llvm.experimental.guard( i1 %A )[ "deopt"() ]
+  call void(i1, ...) @llvm.experimental.guard( i1 %A )[ "deopt"() ]
+  ret void
+}
+
+define void @test_guard_adjacent_diff_cond(i1 %A, i1 %B, i1 %C) {
+; CHECK-LABEL: @test_guard_adjacent_diff_cond(
+; CHECK-NEXT:    %1 = and i1 %A, %B
+; CHECK-NEXT:    %2 = and i1 %1, %C
+; CHECK-NEXT:    call void (i1, ...) @llvm.experimental.guard(i1 %2, i32 123) [ "deopt"() ]
+; CHECK-NEXT:    ret void
+  call void(i1, ...) @llvm.experimental.guard( i1 %A, i32 123 )[ "deopt"() ]
+  call void(i1, ...) @llvm.experimental.guard( i1 %B, i32 456 )[ "deopt"() ]
+  call void(i1, ...) @llvm.experimental.guard( i1 %C, i32 789 )[ "deopt"() ]
+  ret void
+}
+
+; This version tests for the common form where the conditions are
+; between the guards
+define void @test_guard_adjacent_diff_cond2(i32 %V1, i32 %V2) {
+; CHECK-LABEL: @test_guard_adjacent_diff_cond2(
+; CHECK-NEXT:    %1 = and i32 %V1, %V2
+; CHECK-NEXT:    %2 = icmp slt i32 %1, 0
+; CHECK-NEXT:    %and = and i32 %V1, 255
+; CHECK-NEXT:    %C = icmp ult i32 %and, 129
+; CHECK-NEXT:    %3 = and i1 %2, %C
+; CHECK-NEXT:    call void (i1, ...) @llvm.experimental.guard(i1 %3, i32 123) [ "deopt"() ]
+; CHECK-NEXT:    ret void
+  %A = icmp slt i32 %V1, 0
+  call void(i1, ...) @llvm.experimental.guard( i1 %A, i32 123 )[ "deopt"() ]
+  %B = icmp slt i32 %V2, 0
+  call void(i1, ...) @llvm.experimental.guard( i1 %B, i32 456 )[ "deopt"() ]
+  %and = and i32 %V1, 255
+  %C = icmp sle i32 %and, 128
+  call void(i1, ...) @llvm.experimental.guard( i1 %C, i32 789 )[ "deopt"() ]
+  ret void
+}
+
+; Might not be legal to hoist the load above the first guard since the
+; guard might control dereferenceability
+define void @negative_load(i32 %V1, i32* %P) {
+; CHECK-LABEL: @negative_load
+; CHECK:    @llvm.experimental.guard
+; CHECK:    @llvm.experimental.guard
+  %A = icmp slt i32 %V1, 0
+  call void(i1, ...) @llvm.experimental.guard( i1 %A, i32 123 )[ "deopt"() ]
+  %V2 = load i32, i32* %P
+  %B = icmp slt i32 %V2, 0
+  call void(i1, ...) @llvm.experimental.guard( i1 %B, i32 456 )[ "deopt"() ]
+  ret void
+}
+
+define void @deref_load(i32 %V1, i32* dereferenceable(4) %P) {
+; CHECK-LABEL: @deref_load
+; CHECK-NEXT:  %V2 = load i32, i32* %P, align 4
+; CHECK-NEXT:  %1 = and i32 %V2, %V1
+; CHECK-NEXT:  %2 = icmp slt i32 %1, 0
+; CHECK-NEXT:  call void (i1, ...) @llvm.experimental.guard(i1 %2, i32 123) [ "deopt"() ]
+  %A = icmp slt i32 %V1, 0
+  call void(i1, ...) @llvm.experimental.guard( i1 %A, i32 123 )[ "deopt"() ]
+  %V2 = load i32, i32* %P
+  %B = icmp slt i32 %V2, 0
+  call void(i1, ...) @llvm.experimental.guard( i1 %B, i32 456 )[ "deopt"() ]
+  ret void
+}
+
+; The divide might fault above the guard
+define void @negative_div(i32 %V1, i32 %D) {
+; CHECK-LABEL: @negative_div
+; CHECK:    @llvm.experimental.guard
+; CHECK:    @llvm.experimental.guard
+  %A = icmp slt i32 %V1, 0
+  call void(i1, ...) @llvm.experimental.guard( i1 %A, i32 123 )[ "deopt"() ]
+  %V2 = udiv i32 %V1, %D 
+  %B = icmp slt i32 %V2, 0
+  call void(i1, ...) @llvm.experimental.guard( i1 %B, i32 456 )[ "deopt"() ]
+  ret void
+}
+
+; Highlight the limit of the window in a case which would otherwise be mergable
+define void @negative_window(i32 %V1, i32 %a, i32 %b, i32 %c, i32 %d) {
+; CHECK-LABEL: @negative_window
+; CHECK:    @llvm.experimental.guard
+; CHECK:    @llvm.experimental.guard
+  %A = icmp slt i32 %V1, 0
+  call void(i1, ...) @llvm.experimental.guard( i1 %A, i32 123 )[ "deopt"() ]
+  %V2 = add i32 %a, %b
+  %V3 = add i32 %V2, %c
+  %V4 = add i32 %V3, %d
+  %B = icmp slt i32 %V4, 0
+  call void(i1, ...) @llvm.experimental.guard( i1 %B, i32 456 )[ "deopt"() ]
+  ret void
+}
+

Added: llvm/trunk/test/Transforms/InstCombine/call-intrinsics.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/InstCombine/call-intrinsics.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/InstCombine/call-intrinsics.ll (added)
+++ llvm/trunk/test/Transforms/InstCombine/call-intrinsics.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,19 @@
+; RUN: opt < %s -instcombine | llvm-dis
+
+ at X = global i8 0                ; <i8*> [#uses=3]
+ at Y = global i8 12               ; <i8*> [#uses=2]
+
+declare void @llvm.memmove.p0i8.p0i8.i32(i8*, i8*, i32, i1)
+
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8*, i8*, i32, i1)
+
+declare void @llvm.memset.p0i8.i32(i8*, i8, i32, i1)
+
+define void @zero_byte_test() {
+        ; These process zero bytes, so they are a noop.
+        call void @llvm.memmove.p0i8.p0i8.i32(i8* align 128 @X, i8* align 128 @Y, i32 0, i1 false )
+        call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 128 @X, i8* align 128 @Y, i32 0, i1 false )
+        call void @llvm.memset.p0i8.i32(i8* align 128 @X, i8 123, i32 0, i1 false )
+        ret void
+}
+

Added: llvm/trunk/test/Transforms/InstCombine/call.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/InstCombine/call.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/InstCombine/call.ll (added)
+++ llvm/trunk/test/Transforms/InstCombine/call.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,300 @@
+; Ignore stderr, we expect warnings there
+; RUN: opt < %s -instcombine 2> /dev/null -S | FileCheck %s
+
+target datalayout = "E-p:64:64:64-p1:16:16:16-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128"
+
+; Simple case, argument translatable without changing the value
+declare void @test1a(i8*)
+
+define void @test1(i32* %A) {
+; CHECK-LABEL: @test1(
+; CHECK: %1 = bitcast i32* %A to i8*
+; CHECK: call void @test1a(i8* %1)
+; CHECK: ret void
+  call void bitcast (void (i8*)* @test1a to void (i32*)*)( i32* %A )
+  ret void
+}
+
+
+; Should not do because of change in address space of the parameter
+define void @test1_as1_illegal(i32 addrspace(1)* %A) {
+; CHECK-LABEL: @test1_as1_illegal(
+; CHECK: call void bitcast
+  call void bitcast (void (i8*)* @test1a to void (i32 addrspace(1)*)*)(i32 addrspace(1)* %A)
+  ret void
+}
+
+; Test1, but the argument has a different sized address-space
+declare void @test1a_as1(i8 addrspace(1)*)
+
+; This one is OK to perform
+define void @test1_as1(i32 addrspace(1)* %A) {
+; CHECK-LABEL: @test1_as1(
+; CHECK: %1 = bitcast i32 addrspace(1)* %A to i8 addrspace(1)*
+; CHECK: call void @test1a_as1(i8 addrspace(1)* %1)
+; CHECK: ret void
+  call void bitcast (void (i8 addrspace(1)*)* @test1a_as1 to void (i32 addrspace(1)*)*)(i32 addrspace(1)* %A )
+  ret void
+}
+
+; More complex case, translate argument because of resolution.  This is safe
+; because we have the body of the function
+define void @test2a(i8 %A) {
+; CHECK-LABEL: @test2a(
+; CHECK: ret void
+  ret void
+}
+
+define i32 @test2(i32 %A) {
+; CHECK-LABEL: @test2(
+; CHECK: call void bitcast
+; CHECK: ret i32 %A
+  call void bitcast (void (i8)* @test2a to void (i32)*)( i32 %A )
+  ret i32 %A
+}
+
+
+; Resolving this should insert a cast from sbyte to int, following the C
+; promotion rules.
+define void @test3a(i8, ...) {unreachable }
+
+define void @test3(i8 %A, i8 %B) {
+; CHECK-LABEL: @test3(
+; CHECK: %1 = zext i8 %B to i32
+; CHECK: call void (i8, ...) @test3a(i8 %A, i32 %1)
+; CHECK: ret void
+  call void bitcast (void (i8, ...)* @test3a to void (i8, i8)*)( i8 %A, i8 %B)
+  ret void
+}
+
+; test conversion of return value...
+define i8 @test4a() {
+; CHECK-LABEL: @test4a(
+; CHECK: ret i8 0
+  ret i8 0
+}
+
+define i32 @test4() {
+; CHECK-LABEL: @test4(
+; CHECK: call i32 bitcast
+  %X = call i32 bitcast (i8 ()* @test4a to i32 ()*)( )            ; <i32> [#uses=1]
+  ret i32 %X
+}
+
+; test conversion of return value... no value conversion occurs so we can do
+; this with just a prototype...
+declare i32 @test5a()
+
+define i32 @test5() {
+; CHECK-LABEL: @test5(
+; CHECK: %X = call i32 @test5a()
+; CHECK: ret i32 %X
+  %X = call i32 @test5a( )                ; <i32> [#uses=1]
+  ret i32 %X
+}
+
+; test addition of new arguments...
+declare i32 @test6a(i32)
+
+define i32 @test6() {
+; CHECK-LABEL: @test6(
+; CHECK: %X = call i32 @test6a(i32 0)
+; CHECK: ret i32 %X
+  %X = call i32 bitcast (i32 (i32)* @test6a to i32 ()*)( )
+  ret i32 %X
+}
+
+; test removal of arguments, only can happen with a function body
+define void @test7a() {
+; CHECK-LABEL: @test7a(
+; CHECK: ret void
+  ret void
+}
+
+define void @test7() {
+; CHECK-LABEL: @test7(
+; CHECK: call void @test7a()
+; CHECK: ret void
+  call void bitcast (void ()* @test7a to void (i32)*)( i32 5 )
+  ret void
+}
+
+
+; rdar://7590304
+declare void @test8a()
+
+define i8* @test8() personality i32 (...)* @__gxx_personality_v0 {
+; CHECK-LABEL: @test8(
+; CHECK-NEXT: invoke void @test8a()
+; Don't turn this into "unreachable": the callee and caller don't agree in
+; calling conv, but the implementation of test8a may actually end up using the
+; right calling conv.
+  invoke void @test8a()
+          to label %invoke.cont unwind label %try.handler
+
+invoke.cont:                                      ; preds = %entry
+  unreachable
+
+try.handler:                                      ; preds = %entry
+  %exn = landingpad {i8*, i32}
+            cleanup
+  ret i8* null
+}
+
+declare i32 @__gxx_personality_v0(...)
+
+
+; Don't turn this into a direct call, because test9x is just a prototype and
+; doing so will make it varargs.
+; rdar://9038601
+declare i8* @test9x(i8*, i8*, ...) noredzone
+define i8* @test9(i8* %arg, i8* %tmp3) nounwind ssp noredzone {
+; CHECK-LABEL: @test9
+entry:
+  %call = call i8* bitcast (i8* (i8*, i8*, ...)* @test9x to i8* (i8*, i8*)*)(i8* %arg, i8* %tmp3) noredzone
+  ret i8* %call
+; CHECK-LABEL: @test9(
+; CHECK: call i8* bitcast
+}
+
+
+; Parameter that's a vector of pointers
+declare void @test10a(<2 x i8*>)
+
+define void @test10(<2 x i32*> %A) {
+; CHECK-LABEL: @test10(
+; CHECK: %1 = bitcast <2 x i32*> %A to <2 x i8*>
+; CHECK: call void @test10a(<2 x i8*> %1)
+; CHECK: ret void
+  call void bitcast (void (<2 x i8*>)* @test10a to void (<2 x i32*>)*)(<2 x i32*> %A)
+  ret void
+}
+
+; Don't transform because different address spaces
+declare void @test10a_mixed_as(<2 x i8 addrspace(1)*>)
+
+define void @test10_mixed_as(<2 x i8*> %A) {
+; CHECK-LABEL: @test10_mixed_as(
+; CHECK: call void bitcast
+  call void bitcast (void (<2 x i8 addrspace(1)*>)* @test10a_mixed_as to void (<2 x i8*>)*)(<2 x i8*> %A)
+  ret void
+}
+
+; Return type that's a pointer
+define i8* @test11a() {
+  ret i8* zeroinitializer
+}
+
+define i32* @test11() {
+; CHECK-LABEL: @test11(
+; CHECK: %X = call i8* @test11a()
+; CHECK: %1 = bitcast i8* %X to i32*
+  %X = call i32* bitcast (i8* ()* @test11a to i32* ()*)()
+  ret i32* %X
+}
+
+; Return type that's a pointer with a different address space
+define i8 addrspace(1)* @test11a_mixed_as() {
+  ret i8 addrspace(1)* zeroinitializer
+}
+
+define i8* @test11_mixed_as() {
+; CHECK-LABEL: @test11_mixed_as(
+; CHECK: call i8* bitcast
+  %X = call i8* bitcast (i8 addrspace(1)* ()* @test11a_mixed_as to i8* ()*)()
+  ret i8* %X
+}
+
+; Return type that's a vector of pointers
+define <2 x i8*> @test12a() {
+  ret <2 x i8*> zeroinitializer
+}
+
+define <2 x i32*> @test12() {
+; CHECK-LABEL: @test12(
+; CHECK: %X = call <2 x i8*> @test12a()
+; CHECK: %1 = bitcast <2 x i8*> %X to <2 x i32*>
+  %X = call <2 x i32*> bitcast (<2 x i8*> ()* @test12a to <2 x i32*> ()*)()
+  ret <2 x i32*> %X
+}
+
+define <2 x i8 addrspace(1)*> @test12a_mixed_as() {
+  ret <2 x i8 addrspace(1)*> zeroinitializer
+}
+
+define <2 x i8*> @test12_mixed_as() {
+; CHECK-LABEL: @test12_mixed_as(
+; CHECK: call <2 x i8*> bitcast
+  %X = call <2 x i8*> bitcast (<2 x i8 addrspace(1)*> ()* @test12a_mixed_as to <2 x i8*> ()*)()
+  ret <2 x i8*> %X
+}
+
+
+; Mix parameter that's a vector of integers and pointers of the same size
+declare void @test13a(<2 x i64>)
+
+define void @test13(<2 x i32*> %A) {
+; CHECK-LABEL: @test13(
+; CHECK: call void bitcast
+  call void bitcast (void (<2 x i64>)* @test13a to void (<2 x i32*>)*)(<2 x i32*> %A)
+  ret void
+}
+
+; Mix parameter that's a vector of integers and pointers of the same
+; size, but the other way around
+declare void @test14a(<2 x i8*>)
+
+define void @test14(<2 x i64> %A) {
+; CHECK-LABEL: @test14(
+; CHECK: call void bitcast
+  call void bitcast (void (<2 x i8*>)* @test14a to void (<2 x i64>)*)(<2 x i64> %A)
+  ret void
+}
+
+
+; Return type that's a vector
+define <2 x i16> @test15a() {
+  ret <2 x i16> zeroinitializer
+}
+
+define i32 @test15() {
+; CHECK-LABEL: @test15(
+; CHECK: %X = call <2 x i16> @test15a()
+; CHECK: %1 = bitcast <2 x i16> %X to i32
+  %X = call i32 bitcast (<2 x i16> ()* @test15a to i32 ()*)( )
+  ret i32 %X
+}
+
+define i32 @test16a() {
+  ret i32 0
+}
+
+define <2 x i16> @test16() {
+; CHECK-LABEL: @test16(
+; CHECK: %X = call i32 @test16a()
+; CHECK: %1 = bitcast i32 %X to <2 x i16>
+  %X = call <2 x i16> bitcast (i32 ()* @test16a to <2 x i16> ()*)( )
+  ret <2 x i16> %X
+}
+
+declare i32 @pr28655(i32 returned %V)
+
+define i32 @test17() {
+entry:
+  %C = call i32 @pr28655(i32 0)
+  ret i32 %C
+}
+; CHECK-LABEL: @test17(
+; CHECK: call i32 @pr28655(i32 0)
+; CHECK: ret i32 0
+
+define void @non_vararg(i8*, i32) {
+  ret void
+}
+
+define void @test_cast_to_vararg(i8* %this) {
+; CHECK-LABEL: test_cast_to_vararg
+; CHECK:  call void @non_vararg(i8* %this, i32 42)
+  call void (i8*, ...) bitcast (void (i8*, i32)* @non_vararg to void (i8*, ...)*)(i8* %this, i32 42)
+  ret void
+}

Added: llvm/trunk/test/Transforms/InstCombine/call2.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/InstCombine/call2.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/InstCombine/call2.ll (added)
+++ llvm/trunk/test/Transforms/InstCombine/call2.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,25 @@
+; RUN: opt < %s -instcombine | llvm-dis
+
+; This used to crash trying to do a double-to-pointer conversion
+define i32 @bar() {
+entry:
+	%retval = alloca i32, align 4		; <i32*> [#uses=1]
+	%tmp = call i32 (...) bitcast (i32 (i8*)* @f to i32 (...)*)( double 3.000000e+00 )		; <i32> [#uses=0]
+	br label %return
+
+return:		; preds = %entry
+	%retval1 = load i32, i32* %retval		; <i32> [#uses=1]
+	ret i32 %retval1
+}
+
+define i32 @f(i8* %p) {
+entry:
+	%p_addr = alloca i8*		; <i8**> [#uses=1]
+	%retval = alloca i32, align 4		; <i32*> [#uses=1]
+	store i8* %p, i8** %p_addr
+	br label %return
+
+return:		; preds = %entry
+	%retval1 = load i32, i32* %retval		; <i32> [#uses=1]
+	ret i32 %retval1
+}

Added: llvm/trunk/test/Transforms/InstCombine/call_nonnull_arg.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/InstCombine/call_nonnull_arg.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/InstCombine/call_nonnull_arg.ll (added)
+++ llvm/trunk/test/Transforms/InstCombine/call_nonnull_arg.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,50 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+; InstCombine should mark null-checked argument as nonnull at callsite
+declare void @dummy(i32*, i32)
+
+define void @test(i32* %a, i32 %b) {
+; CHECK-LABEL: @test(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[COND1:%.*]] = icmp eq i32* %a, null
+; CHECK-NEXT:    br i1 [[COND1]], label %dead, label %not_null
+; CHECK:       not_null:
+; CHECK-NEXT:    [[COND2:%.*]] = icmp eq i32 %b, 0
+; CHECK-NEXT:    br i1 [[COND2]], label %dead, label %not_zero
+; CHECK:       not_zero:
+; CHECK-NEXT:    call void @dummy(i32* nonnull %a, i32 %b)
+; CHECK-NEXT:    ret void
+; CHECK:       dead:
+; CHECK-NEXT:    unreachable
+;
+entry:
+  %cond1 = icmp eq i32* %a, null
+  br i1 %cond1, label %dead, label %not_null
+not_null:
+  %cond2 = icmp eq i32 %b, 0
+  br i1 %cond2, label %dead, label %not_zero
+not_zero:
+  call void @dummy(i32* %a, i32 %b)
+  ret void
+dead:
+  unreachable
+}
+
+; The nonnull attribute in the 'bar' declaration is 
+; propagated to the parameters of the 'baz' callsite. 
+
+declare void @bar(i8*, i8* nonnull)
+declare void @baz(i8*, i8*)
+
+define void @deduce_nonnull_from_another_call(i8* %a, i8* %b) {
+; CHECK-LABEL: @deduce_nonnull_from_another_call(
+; CHECK-NEXT:    call void @bar(i8* %a, i8* %b)
+; CHECK-NEXT:    call void @baz(i8* nonnull %b, i8* nonnull %b)
+; CHECK-NEXT:    ret void
+;
+  call void @bar(i8* %a, i8* %b)
+  call void @baz(i8* %b, i8* %b)
+  ret void
+}
+

Added: llvm/trunk/test/Transforms/InstCombine/callsite_nonnull_args_through_casts.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/InstCombine/callsite_nonnull_args_through_casts.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/InstCombine/callsite_nonnull_args_through_casts.ll (added)
+++ llvm/trunk/test/Transforms/InstCombine/callsite_nonnull_args_through_casts.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,99 @@
+; RUN: opt -instcombine -S < %s | FileCheck %s
+;
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+
+declare void @foo(i8*)
+declare void @bar(i8 addrspace(1)*)
+
+define void @nonnullAfterBitCast() {
+entry:
+  %i = alloca i32, align 4
+  %tmp1 = bitcast i32* %i to i8*
+; CHECK: call void @foo(i8* nonnull %tmp1)
+  call void @foo(i8* %tmp1)
+  ret void
+}
+
+define void @nonnullAfterSExt(i8 %a) {
+entry:
+  %b = zext i8 %a to i32              ; <- %b is >= 0
+  %c = add nsw nuw i32 %b, 2          ; <- %c is > 0
+  %sext = sext i32 %c to i64          ; <- %sext cannot be 0 because %c is not 0
+  %i2p = inttoptr i64 %sext to i8*    ; <- no-op int2ptr cast
+; CHECK: call void @foo(i8* nonnull %i2p)
+  call void @foo(i8* %i2p)
+  ret void
+}
+
+define void @nonnullAfterZExt(i8 %a) {
+entry:
+  %b = zext i8 %a to i32              ; <- %b is >= 0
+  %c = add nsw nuw i32 %b, 2          ; <- %c is > 0
+  %zext = zext i32 %c to i64          ; <- %zext cannot be 0 because %c is not 0
+  %i2p = inttoptr i64 %zext to i8*    ; <- no-op int2ptr cast
+; CHECK: call void @foo(i8* nonnull %i2p)
+  call void @foo(i8* %i2p)
+  ret void
+}
+
+declare void @llvm.assume(i1 %b)
+
+define void @nonnullAfterInt2Ptr(i32 %u, i64 %lu) {
+entry:
+  %nz = sdiv exact i32 100, %u         ; %nz cannot be null
+  %i2p = inttoptr i32 %nz to i8*       ; extending int2ptr as sizeof(i32) < sizeof(i8*)
+; CHECK:  call void @foo(i8* nonnull %i2p)
+  call void @foo(i8* %i2p)
+
+  %nz.2 = sdiv exact i64 100, %lu      ; %nz.2 cannot be null
+  %i2p.2 = inttoptr i64 %nz.2 to i8*   ; no-op int2ptr as sizeof(i64) == sizeof(i8*)
+; CHECK:  call void @foo(i8* nonnull %i2p.2)
+  call void @foo(i8* %i2p.2)
+  ret void
+}
+
+define void @nonnullAfterPtr2Int() {
+entry:
+  %a = alloca i32
+  %p2i = ptrtoint i32* %a to i64      ; no-op ptr2int as sizeof(i32*) == sizeof(i64)
+  %i2p = inttoptr i64 %p2i to i8*
+; CHECK:  call void @foo(i8* nonnull %i2p)
+  call void @foo(i8* %i2p)
+  ret void
+}
+
+define void @maybenullAfterInt2Ptr(i128 %llu) {
+entry:
+  %cmp = icmp ne i128 %llu, 0
+  call void @llvm.assume(i1 %cmp)          ; %llu != 0
+  %i2p = inttoptr i128 %llu to i8*    ; truncating int2ptr as sizeof(i128) > sizeof(i8*)
+; CHECK:  call void @foo(i8* %i2p)
+  call void @foo(i8* %i2p)
+  ret void
+}
+
+define void @maybenullAfterPtr2Int() {
+entry:
+  %a = alloca i32
+  %p2i = ptrtoint i32* %a to i32      ; truncating ptr2int as sizeof(i32*) > sizeof(i32)
+  %i2p = inttoptr i32 %p2i to i8*
+; CHECK:  call void @foo(i8* %i2p)
+  call void @foo(i8* %i2p)
+  ret void
+}
+
+define void @maybenullAfterAddrspacecast(i8* nonnull %p) {
+entry:
+  %addrspcast = addrspacecast i8* %p to i8 addrspace(1)*
+
+; An address space cast can be "a no-op cast or a complex value modification,
+; depending on the target and the address space pair". As a consequence, we
+; cannot simply assume non-nullness of %p is preserved by the cast.
+;
+; CHECK:  call void @bar(i8 addrspace(1)* %addrspcast)
+  call void @bar(i8 addrspace(1)* %addrspcast)
+
+; CHECK:  call void @foo(i8* nonnull %p)
+  call void @foo(i8* %p)
+  ret void
+}

Added: llvm/trunk/test/Transforms/InstCombine/canonicalize-ashr-shl-to-masking.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/InstCombine/canonicalize-ashr-shl-to-masking.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/InstCombine/canonicalize-ashr-shl-to-masking.ll (added)
+++ llvm/trunk/test/Transforms/InstCombine/canonicalize-ashr-shl-to-masking.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,359 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+; https://bugs.llvm.org/show_bug.cgi?id=37603
+; https://reviews.llvm.org/D46760#1123713
+
+; Pattern:
+;   x >> y << y
+; Should be transformed into:
+;   x & (-1 << y)
+
+; ============================================================================ ;
+; Basic positive tests
+; ============================================================================ ;
+
+define i32 @positive_samevar(i32 %x, i32 %y) {
+; CHECK-LABEL: @positive_samevar(
+; CHECK-NEXT:    [[TMP1:%.*]] = shl i32 -1, [[Y:%.*]]
+; CHECK-NEXT:    [[RET:%.*]] = and i32 [[TMP1]], [[X:%.*]]
+; CHECK-NEXT:    ret i32 [[RET]]
+;
+  %tmp0 = ashr i32 %x, %y
+  %ret = shl i32 %tmp0, %y
+  ret i32 %ret
+}
+
+define i32 @positive_sameconst(i32 %x) {
+; CHECK-LABEL: @positive_sameconst(
+; CHECK-NEXT:    [[TMP1:%.*]] = and i32 [[X:%.*]], -32
+; CHECK-NEXT:    ret i32 [[TMP1]]
+;
+  %tmp0 = ashr i32 %x, 5
+  %ret = shl i32 %tmp0, 5
+  ret i32 %ret
+}
+
+define i32 @positive_biggerashr(i32 %x) {
+; CHECK-LABEL: @positive_biggerashr(
+; CHECK-NEXT:    [[TMP0:%.*]] = ashr i32 [[X:%.*]], 10
+; CHECK-NEXT:    [[RET:%.*]] = shl nsw i32 [[TMP0]], 5
+; CHECK-NEXT:    ret i32 [[RET]]
+;
+  %tmp0 = ashr i32 %x, 10
+  %ret = shl i32 %tmp0, 5
+  ret i32 %ret
+}
+
+define i32 @positive_biggershl(i32 %x) {
+; CHECK-LABEL: @positive_biggershl(
+; CHECK-NEXT:    [[TMP1:%.*]] = lshr i32 [[X:%.*]], 5
+; CHECK-NEXT:    [[RET:%.*]] = shl i32 [[TMP1]], 10
+; CHECK-NEXT:    ret i32 [[RET]]
+;
+  %tmp0 = ashr i32 %x, 5
+  %ret = shl i32 %tmp0, 10
+  ret i32 %ret
+}
+
+; ============================================================================ ;
+; EXACT on the first shift
+; ============================================================================ ;
+
+define i32 @positive_samevar_ashrexact(i32 %x, i32 %y) {
+; CHECK-LABEL: @positive_samevar_ashrexact(
+; CHECK-NEXT:    ret i32 [[X:%.*]]
+;
+  %tmp0 = ashr exact i32 %x, %y
+  %ret = shl i32 %tmp0, %y ; this one is obviously 'nuw'.
+  ret i32 %ret
+}
+
+define i32 @positive_sameconst_ashrexact(i32 %x) {
+; CHECK-LABEL: @positive_sameconst_ashrexact(
+; CHECK-NEXT:    ret i32 [[X:%.*]]
+;
+  %tmp0 = ashr exact i32 %x, 5
+  %ret = shl i32 %tmp0, 5 ; this one is obviously 'nuw'.
+  ret i32 %ret
+}
+
+define i32 @positive_biggerashr_ashrexact(i32 %x) {
+; CHECK-LABEL: @positive_biggerashr_ashrexact(
+; CHECK-NEXT:    [[RET:%.*]] = ashr exact i32 [[X:%.*]], 5
+; CHECK-NEXT:    ret i32 [[RET]]
+;
+  %tmp0 = ashr exact i32 %x, 10
+  %ret = shl i32 %tmp0, 5 ; this one is obviously 'nuw'.
+  ret i32 %ret
+}
+
+define i32 @positive_biggershl_ashrexact(i32 %x) {
+; CHECK-LABEL: @positive_biggershl_ashrexact(
+; CHECK-NEXT:    [[RET:%.*]] = shl i32 [[X:%.*]], 5
+; CHECK-NEXT:    ret i32 [[RET]]
+;
+  %tmp0 = ashr exact i32 %x, 5
+  %ret = shl i32 %tmp0, 10
+  ret i32 %ret
+}
+
+define i32 @positive_biggershl_ashrexact_shlnuw(i32 %x) {
+; CHECK-LABEL: @positive_biggershl_ashrexact_shlnuw(
+; CHECK-NEXT:    [[RET:%.*]] = shl nuw i32 [[X:%.*]], 5
+; CHECK-NEXT:    ret i32 [[RET]]
+;
+  %tmp0 = ashr exact i32 %x, 5
+  %ret = shl nuw i32 %tmp0, 10
+  ret i32 %ret
+}
+
+; ============================================================================ ;
+; Vector
+; ============================================================================ ;
+
+define <2 x i32> @positive_samevar_vec(<2 x i32> %x, <2 x i32> %y) {
+; CHECK-LABEL: @positive_samevar_vec(
+; CHECK-NEXT:    [[TMP1:%.*]] = shl <2 x i32> <i32 -1, i32 -1>, [[Y:%.*]]
+; CHECK-NEXT:    [[RET:%.*]] = and <2 x i32> [[TMP1]], [[X:%.*]]
+; CHECK-NEXT:    ret <2 x i32> [[RET]]
+;
+  %tmp0 = ashr <2 x i32> %x, %y
+  %ret = shl <2 x i32> %tmp0, %y
+  ret <2 x i32> %ret
+}
+
+; ============================================================================ ;
+; Constant Vectors
+; ============================================================================ ;
+
+define <2 x i32> @positive_sameconst_vec(<2 x i32> %x) {
+; CHECK-LABEL: @positive_sameconst_vec(
+; CHECK-NEXT:    [[TMP1:%.*]] = and <2 x i32> [[X:%.*]], <i32 -32, i32 -32>
+; CHECK-NEXT:    ret <2 x i32> [[TMP1]]
+;
+  %tmp0 = ashr <2 x i32> %x, <i32 5, i32 5>
+  %ret = shl <2 x i32> %tmp0, <i32 5, i32 5>
+  ret <2 x i32> %ret
+}
+
+define <3 x i32> @positive_sameconst_vec_undef0(<3 x i32> %x) {
+; CHECK-LABEL: @positive_sameconst_vec_undef0(
+; CHECK-NEXT:    [[TMP0:%.*]] = ashr <3 x i32> [[X:%.*]], <i32 5, i32 undef, i32 5>
+; CHECK-NEXT:    [[RET:%.*]] = shl <3 x i32> [[TMP0]], <i32 5, i32 5, i32 5>
+; CHECK-NEXT:    ret <3 x i32> [[RET]]
+;
+  %tmp0 = ashr <3 x i32> %x, <i32 5, i32 undef, i32 5>
+  %ret = shl <3 x i32> %tmp0, <i32 5, i32 5, i32 5>
+  ret <3 x i32> %ret
+}
+
+define <3 x i32> @positive_sameconst_vec_undef1(<3 x i32> %x) {
+; CHECK-LABEL: @positive_sameconst_vec_undef1(
+; CHECK-NEXT:    [[TMP0:%.*]] = ashr <3 x i32> [[X:%.*]], <i32 5, i32 5, i32 5>
+; CHECK-NEXT:    [[RET:%.*]] = shl <3 x i32> [[TMP0]], <i32 5, i32 undef, i32 5>
+; CHECK-NEXT:    ret <3 x i32> [[RET]]
+;
+  %tmp0 = ashr <3 x i32> %x, <i32 5, i32 5, i32 5>
+  %ret = shl <3 x i32> %tmp0, <i32 5, i32 undef, i32 5>
+  ret <3 x i32> %ret
+}
+
+define <3 x i32> @positive_sameconst_vec_undef2(<3 x i32> %x) {
+; CHECK-LABEL: @positive_sameconst_vec_undef2(
+; CHECK-NEXT:    [[RET:%.*]] = and <3 x i32> [[X:%.*]], <i32 -32, i32 undef, i32 -32>
+; CHECK-NEXT:    ret <3 x i32> [[RET]]
+;
+  %tmp0 = ashr <3 x i32> %x, <i32 5, i32 undef, i32 5>
+  %ret = shl <3 x i32> %tmp0, <i32 5, i32 undef, i32 5>
+  ret <3 x i32> %ret
+}
+
+define <2 x i32> @positive_biggerashr_vec(<2 x i32> %x) {
+; CHECK-LABEL: @positive_biggerashr_vec(
+; CHECK-NEXT:    [[TMP0:%.*]] = ashr <2 x i32> [[X:%.*]], <i32 10, i32 10>
+; CHECK-NEXT:    [[RET:%.*]] = shl nsw <2 x i32> [[TMP0]], <i32 5, i32 5>
+; CHECK-NEXT:    ret <2 x i32> [[RET]]
+;
+  %tmp0 = ashr <2 x i32> %x, <i32 10, i32 10>
+  %ret = shl <2 x i32> %tmp0, <i32 5, i32 5>
+  ret <2 x i32> %ret
+}
+
+define <3 x i32> @positive_biggerashr_vec_undef0(<3 x i32> %x) {
+; CHECK-LABEL: @positive_biggerashr_vec_undef0(
+; CHECK-NEXT:    [[TMP0:%.*]] = ashr <3 x i32> [[X:%.*]], <i32 10, i32 undef, i32 10>
+; CHECK-NEXT:    [[RET:%.*]] = shl <3 x i32> [[TMP0]], <i32 5, i32 5, i32 5>
+; CHECK-NEXT:    ret <3 x i32> [[RET]]
+;
+  %tmp0 = ashr <3 x i32> %x, <i32 10, i32 undef, i32 10>
+  %ret = shl <3 x i32> %tmp0, <i32 5, i32 5, i32 5>
+  ret <3 x i32> %ret
+}
+
+define <3 x i32> @positive_biggerashr_vec_undef1(<3 x i32> %x) {
+; CHECK-LABEL: @positive_biggerashr_vec_undef1(
+; CHECK-NEXT:    [[TMP0:%.*]] = ashr <3 x i32> [[X:%.*]], <i32 10, i32 10, i32 10>
+; CHECK-NEXT:    [[RET:%.*]] = shl <3 x i32> [[TMP0]], <i32 5, i32 undef, i32 5>
+; CHECK-NEXT:    ret <3 x i32> [[RET]]
+;
+  %tmp0 = ashr <3 x i32> %x, <i32 10, i32 10, i32 10>
+  %ret = shl <3 x i32> %tmp0, <i32 5, i32 undef, i32 5>
+  ret <3 x i32> %ret
+}
+
+define <3 x i32> @positive_biggerashr_vec_undef2(<3 x i32> %x) {
+; CHECK-LABEL: @positive_biggerashr_vec_undef2(
+; CHECK-NEXT:    [[TMP0:%.*]] = ashr <3 x i32> [[X:%.*]], <i32 10, i32 undef, i32 10>
+; CHECK-NEXT:    [[RET:%.*]] = shl <3 x i32> [[TMP0]], <i32 5, i32 undef, i32 5>
+; CHECK-NEXT:    ret <3 x i32> [[RET]]
+;
+  %tmp0 = ashr <3 x i32> %x, <i32 10, i32 undef, i32 10>
+  %ret = shl <3 x i32> %tmp0, <i32 5, i32 undef, i32 5>
+  ret <3 x i32> %ret
+}
+
+define <2 x i32> @positive_biggershl_vec(<2 x i32> %x) {
+; CHECK-LABEL: @positive_biggershl_vec(
+; CHECK-NEXT:    [[TMP1:%.*]] = lshr <2 x i32> [[X:%.*]], <i32 5, i32 5>
+; CHECK-NEXT:    [[RET:%.*]] = shl <2 x i32> [[TMP1]], <i32 10, i32 10>
+; CHECK-NEXT:    ret <2 x i32> [[RET]]
+;
+  %tmp0 = ashr <2 x i32> %x, <i32 5, i32 5>
+  %ret = shl <2 x i32> %tmp0, <i32 10, i32 10>
+  ret <2 x i32> %ret
+}
+
+define <3 x i32> @positive_biggershl_vec_undef0(<3 x i32> %x) {
+; CHECK-LABEL: @positive_biggershl_vec_undef0(
+; CHECK-NEXT:    [[TMP0:%.*]] = ashr <3 x i32> [[X:%.*]], <i32 5, i32 undef, i32 5>
+; CHECK-NEXT:    [[RET:%.*]] = shl <3 x i32> [[TMP0]], <i32 10, i32 10, i32 10>
+; CHECK-NEXT:    ret <3 x i32> [[RET]]
+;
+  %tmp0 = ashr <3 x i32> %x, <i32 5, i32 undef, i32 5>
+  %ret = shl <3 x i32> %tmp0, <i32 10, i32 10, i32 10>
+  ret <3 x i32> %ret
+}
+
+define <3 x i32> @positive_biggershl_vec_undef1(<3 x i32> %x) {
+; CHECK-LABEL: @positive_biggershl_vec_undef1(
+; CHECK-NEXT:    [[TMP0:%.*]] = ashr <3 x i32> [[X:%.*]], <i32 5, i32 5, i32 5>
+; CHECK-NEXT:    [[RET:%.*]] = shl <3 x i32> [[TMP0]], <i32 10, i32 undef, i32 10>
+; CHECK-NEXT:    ret <3 x i32> [[RET]]
+;
+  %tmp0 = ashr <3 x i32> %x, <i32 5, i32 5, i32 5>
+  %ret = shl <3 x i32> %tmp0, <i32 10, i32 undef, i32 10>
+  ret <3 x i32> %ret
+}
+
+define <3 x i32> @positive_biggershl_vec_undef2(<3 x i32> %x) {
+; CHECK-LABEL: @positive_biggershl_vec_undef2(
+; CHECK-NEXT:    [[TMP0:%.*]] = ashr <3 x i32> [[X:%.*]], <i32 5, i32 undef, i32 5>
+; CHECK-NEXT:    [[RET:%.*]] = shl <3 x i32> [[TMP0]], <i32 10, i32 undef, i32 10>
+; CHECK-NEXT:    ret <3 x i32> [[RET]]
+;
+  %tmp0 = ashr <3 x i32> %x, <i32 5, i32 undef, i32 5>
+  %ret = shl <3 x i32> %tmp0, <i32 10, i32 undef, i32 10>
+  ret <3 x i32> %ret
+}
+
+; ============================================================================ ;
+; Positive multi-use tests with constant
+; ============================================================================ ;
+
+; FIXME: drop 'exact' once it is no longer needed.
+
+define i32 @positive_sameconst_multiuse(i32 %x) {
+; CHECK-LABEL: @positive_sameconst_multiuse(
+; CHECK-NEXT:    [[TMP0:%.*]] = ashr exact i32 [[X:%.*]], 5
+; CHECK-NEXT:    call void @use32(i32 [[TMP0]])
+; CHECK-NEXT:    ret i32 [[X]]
+;
+  %tmp0 = ashr exact i32 %x, 5
+  call void @use32(i32 %tmp0)
+  %ret = shl i32 %tmp0, 5
+  ret i32 %ret
+}
+
+define i32 @positive_biggerashr_multiuse(i32 %x) {
+; CHECK-LABEL: @positive_biggerashr_multiuse(
+; CHECK-NEXT:    [[TMP0:%.*]] = ashr exact i32 [[X:%.*]], 10
+; CHECK-NEXT:    call void @use32(i32 [[TMP0]])
+; CHECK-NEXT:    [[RET:%.*]] = ashr exact i32 [[X]], 5
+; CHECK-NEXT:    ret i32 [[RET]]
+;
+  %tmp0 = ashr exact i32 %x, 10
+  call void @use32(i32 %tmp0)
+  %ret = shl i32 %tmp0, 5
+  ret i32 %ret
+}
+
+define i32 @positive_biggershl_multiuse(i32 %x) {
+; CHECK-LABEL: @positive_biggershl_multiuse(
+; CHECK-NEXT:    [[TMP0:%.*]] = ashr exact i32 [[X:%.*]], 5
+; CHECK-NEXT:    call void @use32(i32 [[TMP0]])
+; CHECK-NEXT:    [[RET:%.*]] = shl i32 [[X]], 5
+; CHECK-NEXT:    ret i32 [[RET]]
+;
+  %tmp0 = ashr exact i32 %x, 5
+  call void @use32(i32 %tmp0)
+  %ret = shl i32 %tmp0, 10
+  ret i32 %ret
+}
+
+; ============================================================================ ;
+; Constant Non-Splat Vectors
+; ============================================================================ ;
+
+define <2 x i32> @positive_biggerashr_vec_nonsplat(<2 x i32> %x) {
+; CHECK-LABEL: @positive_biggerashr_vec_nonsplat(
+; CHECK-NEXT:    [[TMP0:%.*]] = ashr <2 x i32> [[X:%.*]], <i32 5, i32 5>
+; CHECK-NEXT:    [[RET:%.*]] = shl <2 x i32> [[TMP0]], <i32 5, i32 10>
+; CHECK-NEXT:    ret <2 x i32> [[RET]]
+;
+  %tmp0 = ashr <2 x i32> %x, <i32 5, i32 5>
+  %ret = shl <2 x i32> %tmp0, <i32 5, i32 10>
+  ret <2 x i32> %ret
+}
+
+define <2 x i32> @positive_biggerLashr_vec_nonsplat(<2 x i32> %x) {
+; CHECK-LABEL: @positive_biggerLashr_vec_nonsplat(
+; CHECK-NEXT:    [[TMP0:%.*]] = ashr <2 x i32> [[X:%.*]], <i32 5, i32 10>
+; CHECK-NEXT:    [[RET:%.*]] = shl <2 x i32> [[TMP0]], <i32 5, i32 5>
+; CHECK-NEXT:    ret <2 x i32> [[RET]]
+;
+  %tmp0 = ashr <2 x i32> %x, <i32 5, i32 10>
+  %ret = shl <2 x i32> %tmp0, <i32 5, i32 5>
+  ret <2 x i32> %ret
+}
+
+; ============================================================================ ;
+; Negative tests. Should not be folded.
+; ============================================================================ ;
+
+define i32 @negative_twovars(i32 %x, i32 %y, i32 %z) {
+; CHECK-LABEL: @negative_twovars(
+; CHECK-NEXT:    [[TMP0:%.*]] = ashr i32 [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    [[RET:%.*]] = shl i32 [[TMP0]], [[Z:%.*]]
+; CHECK-NEXT:    ret i32 [[RET]]
+;
+  %tmp0 = ashr i32 %x, %y
+  %ret = shl i32 %tmp0, %z ; $z, not %y
+  ret i32 %ret
+}
+
+declare void @use32(i32)
+
+; One use only.
+define i32 @negative_oneuse(i32 %x, i32 %y) {
+; CHECK-LABEL: @negative_oneuse(
+; CHECK-NEXT:    [[TMP0:%.*]] = ashr i32 [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    call void @use32(i32 [[TMP0]])
+; CHECK-NEXT:    [[RET:%.*]] = shl i32 [[TMP0]], [[Y]]
+; CHECK-NEXT:    ret i32 [[RET]]
+;
+  %tmp0 = ashr i32 %x, %y
+  call void @use32(i32 %tmp0)
+  %ret = shl i32 %tmp0, %y
+  ret i32 %ret
+}

Added: llvm/trunk/test/Transforms/InstCombine/canonicalize-constant-low-bit-mask-and-icmp-eq-to-icmp-ule.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/InstCombine/canonicalize-constant-low-bit-mask-and-icmp-eq-to-icmp-ule.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/InstCombine/canonicalize-constant-low-bit-mask-and-icmp-eq-to-icmp-ule.ll (added)
+++ llvm/trunk/test/Transforms/InstCombine/canonicalize-constant-low-bit-mask-and-icmp-eq-to-icmp-ule.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,190 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+; https://bugs.llvm.org/show_bug.cgi?id=38123
+
+; Pattern:
+;   x & C == x
+; Should be transformed into:
+;   x u<= C
+; Iff: isPowerOf2(C + 1)
+
+; ============================================================================ ;
+; Basic positive tests
+; ============================================================================ ;
+
+define i1 @p0(i8 %x) {
+; CHECK-LABEL: @p0(
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ult i8 [[X:%.*]], 4
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %tmp0 = and i8 %x, 3
+  %ret = icmp eq i8 %tmp0, %x
+  ret i1 %ret
+}
+
+define i1 @pv(i8 %x, i8 %y) {
+; CHECK-LABEL: @pv(
+; CHECK-NEXT:    [[TMP0:%.*]] = lshr i8 -1, [[Y:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp uge i8 [[TMP0]], [[X:%.*]]
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %tmp0 = lshr i8 -1, %y
+  %tmp1 = and i8 %tmp0, %x
+  %ret = icmp eq i8 %tmp1, %x
+  ret i1 %ret
+}
+
+; ============================================================================ ;
+; Vector tests
+; ============================================================================ ;
+
+define <2 x i1> @p1_vec_splat(<2 x i8> %x) {
+; CHECK-LABEL: @p1_vec_splat(
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ult <2 x i8> [[X:%.*]], <i8 4, i8 4>
+; CHECK-NEXT:    ret <2 x i1> [[TMP1]]
+;
+  %tmp0 = and <2 x i8> %x, <i8 3, i8 3>
+  %ret = icmp eq <2 x i8> %tmp0, %x
+  ret <2 x i1> %ret
+}
+
+define <2 x i1> @p2_vec_nonsplat(<2 x i8> %x) {
+; CHECK-LABEL: @p2_vec_nonsplat(
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ult <2 x i8> [[X:%.*]], <i8 4, i8 16>
+; CHECK-NEXT:    ret <2 x i1> [[TMP1]]
+;
+  %tmp0 = and <2 x i8> %x, <i8 3, i8 15> ; doesn't have to be splat.
+  %ret = icmp eq <2 x i8> %tmp0, %x
+  ret <2 x i1> %ret
+}
+
+define <3 x i1> @p3_vec_splat_undef(<3 x i8> %x) {
+; CHECK-LABEL: @p3_vec_splat_undef(
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ult <3 x i8> [[X:%.*]], <i8 4, i8 undef, i8 4>
+; CHECK-NEXT:    ret <3 x i1> [[TMP1]]
+;
+  %tmp0 = and <3 x i8> %x, <i8 3, i8 undef, i8 3>
+  %ret = icmp eq <3 x i8> %tmp0, %x
+  ret <3 x i1> %ret
+}
+
+; ============================================================================ ;
+; Commutativity tests.
+; ============================================================================ ;
+
+declare i8 @gen8()
+
+define i1 @c0() {
+; CHECK-LABEL: @c0(
+; CHECK-NEXT:    [[X:%.*]] = call i8 @gen8()
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ult i8 [[X]], 4
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %x = call i8 @gen8()
+  %tmp0 = and i8 %x, 3
+  %ret = icmp eq i8 %x, %tmp0 ; swapped order
+  ret i1 %ret
+}
+
+; ============================================================================ ;
+; Commutativity tests with variable
+; ============================================================================ ;
+
+define i1 @cv0(i8 %y) {
+; CHECK-LABEL: @cv0(
+; CHECK-NEXT:    [[X:%.*]] = call i8 @gen8()
+; CHECK-NEXT:    [[TMP0:%.*]] = lshr i8 -1, [[Y:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ule i8 [[X]], [[TMP0]]
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %x = call i8 @gen8()
+  %tmp0 = lshr i8 -1, %y
+  %tmp1 = and i8 %x, %tmp0 ; swapped order
+  %ret = icmp eq i8 %tmp1, %x
+  ret i1 %ret
+}
+
+define i1 @cv1(i8 %y) {
+; CHECK-LABEL: @cv1(
+; CHECK-NEXT:    [[X:%.*]] = call i8 @gen8()
+; CHECK-NEXT:    [[TMP0:%.*]] = lshr i8 -1, [[Y:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ule i8 [[X]], [[TMP0]]
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %x = call i8 @gen8()
+  %tmp0 = lshr i8 -1, %y
+  %tmp1 = and i8 %tmp0, %x
+  %ret = icmp eq i8 %x, %tmp1 ; swapped order
+  ret i1 %ret
+}
+
+define i1 @cv2(i8 %y) {
+; CHECK-LABEL: @cv2(
+; CHECK-NEXT:    [[X:%.*]] = call i8 @gen8()
+; CHECK-NEXT:    [[TMP0:%.*]] = lshr i8 -1, [[Y:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ule i8 [[X]], [[TMP0]]
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %x = call i8 @gen8()
+  %tmp0 = lshr i8 -1, %y
+  %tmp1 = and i8 %x, %tmp0 ; swapped order
+  %ret = icmp eq i8 %x, %tmp1 ; swapped order
+  ret i1 %ret
+}
+
+; ============================================================================ ;
+; One-use tests. We don't care about multi-uses here.
+; ============================================================================ ;
+
+declare void @use8(i8)
+
+define i1 @oneuse0(i8 %x) {
+; CHECK-LABEL: @oneuse0(
+; CHECK-NEXT:    [[TMP0:%.*]] = and i8 [[X:%.*]], 3
+; CHECK-NEXT:    call void @use8(i8 [[TMP0]])
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ult i8 [[X]], 4
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %tmp0 = and i8 %x, 3
+  call void @use8(i8 %tmp0)
+  %ret = icmp eq i8 %tmp0, %x
+  ret i1 %ret
+}
+
+; ============================================================================ ;
+; Negative tests
+; ============================================================================ ;
+
+define i1 @n0(i8 %x) {
+; CHECK-LABEL: @n0(
+; CHECK-NEXT:    [[TMP0:%.*]] = and i8 [[X:%.*]], 4
+; CHECK-NEXT:    [[RET:%.*]] = icmp eq i8 [[TMP0]], [[X]]
+; CHECK-NEXT:    ret i1 [[RET]]
+;
+  %tmp0 = and i8 %x, 4 ; power-of-two, but invalid.
+  %ret = icmp eq i8 %tmp0, %x
+  ret i1 %ret
+}
+
+define i1 @n1(i8 %x, i8 %y, i8 %notx) {
+; CHECK-LABEL: @n1(
+; CHECK-NEXT:    [[TMP0:%.*]] = and i8 [[X:%.*]], 3
+; CHECK-NEXT:    [[RET:%.*]] = icmp eq i8 [[TMP0]], [[NOTX:%.*]]
+; CHECK-NEXT:    ret i1 [[RET]]
+;
+  %tmp0 = and i8 %x, 3
+  %ret = icmp eq i8 %tmp0, %notx ; not %x
+  ret i1 %ret
+}
+
+define <2 x i1> @n2(<2 x i8> %x) {
+; CHECK-LABEL: @n2(
+; CHECK-NEXT:    [[TMP0:%.*]] = and <2 x i8> [[X:%.*]], <i8 3, i8 16>
+; CHECK-NEXT:    [[RET:%.*]] = icmp eq <2 x i8> [[TMP0]], [[X]]
+; CHECK-NEXT:    ret <2 x i1> [[RET]]
+;
+  %tmp0 = and <2 x i8> %x, <i8 3, i8 16> ; only the first one is valid.
+  %ret = icmp eq <2 x i8> %tmp0, %x
+  ret <2 x i1> %ret
+}

Added: llvm/trunk/test/Transforms/InstCombine/canonicalize-constant-low-bit-mask-and-icmp-ne-to-icmp-ugt.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/InstCombine/canonicalize-constant-low-bit-mask-and-icmp-ne-to-icmp-ugt.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/InstCombine/canonicalize-constant-low-bit-mask-and-icmp-ne-to-icmp-ugt.ll (added)
+++ llvm/trunk/test/Transforms/InstCombine/canonicalize-constant-low-bit-mask-and-icmp-ne-to-icmp-ugt.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,190 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+; https://bugs.llvm.org/show_bug.cgi?id=38123
+
+; Pattern:
+;   x & C != x
+; Should be transformed into:
+;   x u> C
+; Iff: isPowerOf2(C + 1)
+
+; ============================================================================ ;
+; Basic positive tests
+; ============================================================================ ;
+
+define i1 @p0(i8 %x) {
+; CHECK-LABEL: @p0(
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ugt i8 [[X:%.*]], 3
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %tmp0 = and i8 %x, 3
+  %ret = icmp ne i8 %tmp0, %x
+  ret i1 %ret
+}
+
+define i1 @pv(i8 %x, i8 %y) {
+; CHECK-LABEL: @pv(
+; CHECK-NEXT:    [[TMP0:%.*]] = lshr i8 -1, [[Y:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ult i8 [[TMP0]], [[X:%.*]]
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %tmp0 = lshr i8 -1, %y
+  %tmp1 = and i8 %tmp0, %x
+  %ret = icmp ne i8 %tmp1, %x
+  ret i1 %ret
+}
+
+; ============================================================================ ;
+; Vector tests
+; ============================================================================ ;
+
+define <2 x i1> @p1_vec_splat(<2 x i8> %x) {
+; CHECK-LABEL: @p1_vec_splat(
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ugt <2 x i8> [[X:%.*]], <i8 3, i8 3>
+; CHECK-NEXT:    ret <2 x i1> [[TMP1]]
+;
+  %tmp0 = and <2 x i8> %x, <i8 3, i8 3>
+  %ret = icmp ne <2 x i8> %tmp0, %x
+  ret <2 x i1> %ret
+}
+
+define <2 x i1> @p2_vec_nonsplat(<2 x i8> %x) {
+; CHECK-LABEL: @p2_vec_nonsplat(
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ugt <2 x i8> [[X:%.*]], <i8 3, i8 15>
+; CHECK-NEXT:    ret <2 x i1> [[TMP1]]
+;
+  %tmp0 = and <2 x i8> %x, <i8 3, i8 15> ; doesn't have to be splat.
+  %ret = icmp ne <2 x i8> %tmp0, %x
+  ret <2 x i1> %ret
+}
+
+define <3 x i1> @p3_vec_splat_undef(<3 x i8> %x) {
+; CHECK-LABEL: @p3_vec_splat_undef(
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ugt <3 x i8> [[X:%.*]], <i8 3, i8 undef, i8 3>
+; CHECK-NEXT:    ret <3 x i1> [[TMP1]]
+;
+  %tmp0 = and <3 x i8> %x, <i8 3, i8 undef, i8 3>
+  %ret = icmp ne <3 x i8> %tmp0, %x
+  ret <3 x i1> %ret
+}
+
+; ============================================================================ ;
+; Commutativity tests.
+; ============================================================================ ;
+
+declare i8 @gen8()
+
+define i1 @c0() {
+; CHECK-LABEL: @c0(
+; CHECK-NEXT:    [[X:%.*]] = call i8 @gen8()
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ugt i8 [[X]], 3
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %x = call i8 @gen8()
+  %tmp0 = and i8 %x, 3
+  %ret = icmp ne i8 %x, %tmp0 ; swapped order
+  ret i1 %ret
+}
+
+; ============================================================================ ;
+; Commutativity tests with variable
+; ============================================================================ ;
+
+define i1 @cv0(i8 %y) {
+; CHECK-LABEL: @cv0(
+; CHECK-NEXT:    [[X:%.*]] = call i8 @gen8()
+; CHECK-NEXT:    [[TMP0:%.*]] = lshr i8 -1, [[Y:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ugt i8 [[X]], [[TMP0]]
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %x = call i8 @gen8()
+  %tmp0 = lshr i8 -1, %y
+  %tmp1 = and i8 %x, %tmp0 ; swapped order
+  %ret = icmp ne i8 %tmp1, %x
+  ret i1 %ret
+}
+
+define i1 @cv1(i8 %y) {
+; CHECK-LABEL: @cv1(
+; CHECK-NEXT:    [[X:%.*]] = call i8 @gen8()
+; CHECK-NEXT:    [[TMP0:%.*]] = lshr i8 -1, [[Y:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ugt i8 [[X]], [[TMP0]]
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %x = call i8 @gen8()
+  %tmp0 = lshr i8 -1, %y
+  %tmp1 = and i8 %tmp0, %x
+  %ret = icmp ne i8 %x, %tmp1 ; swapped order
+  ret i1 %ret
+}
+
+define i1 @cv2(i8 %y) {
+; CHECK-LABEL: @cv2(
+; CHECK-NEXT:    [[X:%.*]] = call i8 @gen8()
+; CHECK-NEXT:    [[TMP0:%.*]] = lshr i8 -1, [[Y:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ugt i8 [[X]], [[TMP0]]
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %x = call i8 @gen8()
+  %tmp0 = lshr i8 -1, %y
+  %tmp1 = and i8 %x, %tmp0 ; swapped order
+  %ret = icmp ne i8 %x, %tmp1 ; swapped order
+  ret i1 %ret
+}
+
+; ============================================================================ ;
+; One-use tests. We don't care about multi-uses here.
+; ============================================================================ ;
+
+declare void @use8(i8)
+
+define i1 @oneuse0(i8 %x) {
+; CHECK-LABEL: @oneuse0(
+; CHECK-NEXT:    [[TMP0:%.*]] = and i8 [[X:%.*]], 3
+; CHECK-NEXT:    call void @use8(i8 [[TMP0]])
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ugt i8 [[X]], 3
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %tmp0 = and i8 %x, 3
+  call void @use8(i8 %tmp0)
+  %ret = icmp ne i8 %tmp0, %x
+  ret i1 %ret
+}
+
+; ============================================================================ ;
+; Negative tests
+; ============================================================================ ;
+
+define i1 @n0(i8 %x) {
+; CHECK-LABEL: @n0(
+; CHECK-NEXT:    [[TMP0:%.*]] = and i8 [[X:%.*]], 4
+; CHECK-NEXT:    [[RET:%.*]] = icmp ne i8 [[TMP0]], [[X]]
+; CHECK-NEXT:    ret i1 [[RET]]
+;
+  %tmp0 = and i8 %x, 4 ; power-of-two, but invalid.
+  %ret = icmp ne i8 %tmp0, %x
+  ret i1 %ret
+}
+
+define i1 @n1(i8 %x, i8 %y, i8 %notx) {
+; CHECK-LABEL: @n1(
+; CHECK-NEXT:    [[TMP0:%.*]] = and i8 [[X:%.*]], 3
+; CHECK-NEXT:    [[RET:%.*]] = icmp ne i8 [[TMP0]], [[NOTX:%.*]]
+; CHECK-NEXT:    ret i1 [[RET]]
+;
+  %tmp0 = and i8 %x, 3
+  %ret = icmp ne i8 %tmp0, %notx ; not %x
+  ret i1 %ret
+}
+
+define <2 x i1> @n2(<2 x i8> %x) {
+; CHECK-LABEL: @n2(
+; CHECK-NEXT:    [[TMP0:%.*]] = and <2 x i8> [[X:%.*]], <i8 3, i8 16>
+; CHECK-NEXT:    [[RET:%.*]] = icmp ne <2 x i8> [[TMP0]], [[X]]
+; CHECK-NEXT:    ret <2 x i1> [[RET]]
+;
+  %tmp0 = and <2 x i8> %x, <i8 3, i8 16> ; only the first one is valid.
+  %ret = icmp ne <2 x i8> %tmp0, %x
+  ret <2 x i1> %ret
+}

Added: llvm/trunk/test/Transforms/InstCombine/canonicalize-constant-low-bit-mask-and-icmp-sge-to-icmp-sle.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/InstCombine/canonicalize-constant-low-bit-mask-and-icmp-sge-to-icmp-sle.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/InstCombine/canonicalize-constant-low-bit-mask-and-icmp-sge-to-icmp-sle.ll (added)
+++ llvm/trunk/test/Transforms/InstCombine/canonicalize-constant-low-bit-mask-and-icmp-sge-to-icmp-sle.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,226 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+; https://bugs.llvm.org/show_bug.cgi?id=38123
+
+; Pattern:
+;   x & C s>= x
+; Should be transformed into:
+;   x s<= C
+; Iff: isPowerOf2(C + 1)
+
+; ============================================================================ ;
+; Basic positive tests
+; ============================================================================ ;
+
+define i1 @p0(i8 %x) {
+; CHECK-LABEL: @p0(
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp slt i8 [[X:%.*]], 4
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %tmp0 = and i8 %x, 3
+  %ret = icmp sge i8 %tmp0, %x
+  ret i1 %ret
+}
+
+; ============================================================================ ;
+; Vector tests
+; ============================================================================ ;
+
+define <2 x i1> @p1_vec_splat(<2 x i8> %x) {
+; CHECK-LABEL: @p1_vec_splat(
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp slt <2 x i8> [[X:%.*]], <i8 4, i8 4>
+; CHECK-NEXT:    ret <2 x i1> [[TMP1]]
+;
+  %tmp0 = and <2 x i8> %x, <i8 3, i8 3>
+  %ret = icmp sge <2 x i8> %tmp0, %x
+  ret <2 x i1> %ret
+}
+
+define <2 x i1> @p2_vec_nonsplat(<2 x i8> %x) {
+; CHECK-LABEL: @p2_vec_nonsplat(
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp slt <2 x i8> [[X:%.*]], <i8 4, i8 16>
+; CHECK-NEXT:    ret <2 x i1> [[TMP1]]
+;
+  %tmp0 = and <2 x i8> %x, <i8 3, i8 15> ; doesn't have to be splat.
+  %ret = icmp sge <2 x i8> %tmp0, %x
+  ret <2 x i1> %ret
+}
+
+define <3 x i1> @p3_vec_splat_undef(<3 x i8> %x) {
+; CHECK-LABEL: @p3_vec_splat_undef(
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp slt <3 x i8> [[X:%.*]], <i8 4, i8 undef, i8 4>
+; CHECK-NEXT:    ret <3 x i1> [[TMP1]]
+;
+  %tmp0 = and <3 x i8> %x, <i8 3, i8 undef, i8 3>
+  %ret = icmp sge <3 x i8> %tmp0, %x
+  ret <3 x i1> %ret
+}
+
+; ============================================================================ ;
+; One-use tests. We don't care about multi-uses here.
+; ============================================================================ ;
+
+declare void @use8(i8)
+
+define i1 @oneuse0(i8 %x) {
+; CHECK-LABEL: @oneuse0(
+; CHECK-NEXT:    [[TMP0:%.*]] = and i8 [[X:%.*]], 3
+; CHECK-NEXT:    call void @use8(i8 [[TMP0]])
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp slt i8 [[X]], 4
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %tmp0 = and i8 %x, 3
+  call void @use8(i8 %tmp0)
+  %ret = icmp sge i8 %tmp0, %x
+  ret i1 %ret
+}
+
+; ============================================================================ ;
+; Negative tests
+; ============================================================================ ;
+
+; ============================================================================ ;
+; Commutativity tests.
+; ============================================================================ ;
+
+declare i8 @gen8()
+
+define i1 @c0() {
+; CHECK-LABEL: @c0(
+; CHECK-NEXT:    [[X:%.*]] = call i8 @gen8()
+; CHECK-NEXT:    [[TMP0:%.*]] = and i8 [[X]], 3
+; CHECK-NEXT:    [[RET:%.*]] = icmp sge i8 [[X]], [[TMP0]]
+; CHECK-NEXT:    ret i1 [[RET]]
+;
+  %x = call i8 @gen8()
+  %tmp0 = and i8 %x, 3
+  %ret = icmp sge i8 %x, %tmp0 ; swapped order
+  ret i1 %ret
+}
+
+; ============================================================================ ;
+; Commutativity tests with variable
+; ============================================================================ ;
+
+; Ok, this one should fold. We only testing commutativity of 'and'.
+define i1 @cv0(i8 %y) {
+; CHECK-LABEL: @cv0(
+; CHECK-NEXT:    [[X:%.*]] = call i8 @gen8()
+; CHECK-NEXT:    [[TMP0:%.*]] = lshr i8 -1, [[Y:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = and i8 [[X]], [[TMP0]]
+; CHECK-NEXT:    [[RET:%.*]] = icmp sge i8 [[TMP1]], [[X]]
+; CHECK-NEXT:    ret i1 [[RET]]
+;
+  %x = call i8 @gen8()
+  %tmp0 = lshr i8 -1, %y
+  %tmp1 = and i8 %x, %tmp0 ; swapped order
+  %ret = icmp sge i8 %tmp1, %x
+  ret i1 %ret
+}
+
+define i1 @cv1(i8 %y) {
+; CHECK-LABEL: @cv1(
+; CHECK-NEXT:    [[X:%.*]] = call i8 @gen8()
+; CHECK-NEXT:    [[TMP0:%.*]] = lshr i8 -1, [[Y:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = and i8 [[TMP0]], [[X]]
+; CHECK-NEXT:    [[RET:%.*]] = icmp sge i8 [[X]], [[TMP1]]
+; CHECK-NEXT:    ret i1 [[RET]]
+;
+  %x = call i8 @gen8()
+  %tmp0 = lshr i8 -1, %y
+  %tmp1 = and i8 %tmp0, %x
+  %ret = icmp sge i8 %x, %tmp1 ; swapped order
+  ret i1 %ret
+}
+
+define i1 @cv2(i8 %y) {
+; CHECK-LABEL: @cv2(
+; CHECK-NEXT:    [[X:%.*]] = call i8 @gen8()
+; CHECK-NEXT:    [[TMP0:%.*]] = lshr i8 -1, [[Y:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = and i8 [[X]], [[TMP0]]
+; CHECK-NEXT:    [[RET:%.*]] = icmp sge i8 [[X]], [[TMP1]]
+; CHECK-NEXT:    ret i1 [[RET]]
+;
+  %x = call i8 @gen8()
+  %tmp0 = lshr i8 -1, %y
+  %tmp1 = and i8 %x, %tmp0 ; swapped order
+  %ret = icmp sge i8 %x, %tmp1 ; swapped order
+  ret i1 %ret
+}
+
+; ============================================================================ ;
+; Normal negative tests
+; ============================================================================ ;
+
+define i1 @n0(i8 %x) {
+; CHECK-LABEL: @n0(
+; CHECK-NEXT:    [[TMP0:%.*]] = and i8 [[X:%.*]], 4
+; CHECK-NEXT:    [[RET:%.*]] = icmp sge i8 [[TMP0]], [[X]]
+; CHECK-NEXT:    ret i1 [[RET]]
+;
+  %tmp0 = and i8 %x, 4 ; power-of-two, but invalid.
+  %ret = icmp sge i8 %tmp0, %x
+  ret i1 %ret
+}
+
+define i1 @n1(i8 %x, i8 %y, i8 %notx) {
+; CHECK-LABEL: @n1(
+; CHECK-NEXT:    [[TMP0:%.*]] = and i8 [[X:%.*]], 3
+; CHECK-NEXT:    [[RET:%.*]] = icmp sge i8 [[TMP0]], [[NOTX:%.*]]
+; CHECK-NEXT:    ret i1 [[RET]]
+;
+  %tmp0 = and i8 %x, 3
+  %ret = icmp sge i8 %tmp0, %notx ; not %x
+  ret i1 %ret
+}
+
+define <2 x i1> @n2(<2 x i8> %x) {
+; CHECK-LABEL: @n2(
+; CHECK-NEXT:    [[TMP0:%.*]] = and <2 x i8> [[X:%.*]], <i8 3, i8 16>
+; CHECK-NEXT:    [[RET:%.*]] = icmp sge <2 x i8> [[TMP0]], [[X]]
+; CHECK-NEXT:    ret <2 x i1> [[RET]]
+;
+  %tmp0 = and <2 x i8> %x, <i8 3, i8 16> ; only the first one is valid.
+  %ret = icmp sge <2 x i8> %tmp0, %x
+  ret <2 x i1> %ret
+}
+
+; ============================================================================ ;
+; Potential miscompiles.
+; ============================================================================ ;
+
+define i1 @nv(i8 %x, i8 %y) {
+; CHECK-LABEL: @nv(
+; CHECK-NEXT:    [[TMP0:%.*]] = lshr i8 -1, [[Y:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = and i8 [[TMP0]], [[X:%.*]]
+; CHECK-NEXT:    [[RET:%.*]] = icmp sge i8 [[TMP1]], [[X]]
+; CHECK-NEXT:    ret i1 [[RET]]
+;
+  %tmp0 = lshr i8 -1, %y
+  %tmp1 = and i8 %tmp0, %x
+  %ret = icmp sge i8 %tmp1, %x
+  ret i1 %ret
+}
+
+define <2 x i1> @n3_vec(<2 x i8> %x) {
+; CHECK-LABEL: @n3_vec(
+; CHECK-NEXT:    [[TMP0:%.*]] = and <2 x i8> [[X:%.*]], <i8 3, i8 -1>
+; CHECK-NEXT:    [[RET:%.*]] = icmp sge <2 x i8> [[TMP0]], [[X]]
+; CHECK-NEXT:    ret <2 x i1> [[RET]]
+;
+  %tmp0 = and <2 x i8> %x, <i8 3, i8 -1>
+  %ret = icmp sge <2 x i8> %tmp0, %x
+  ret <2 x i1> %ret
+}
+
+define <3 x i1> @n4_vec(<3 x i8> %x) {
+; CHECK-LABEL: @n4_vec(
+; CHECK-NEXT:    [[TMP0:%.*]] = and <3 x i8> [[X:%.*]], <i8 3, i8 undef, i8 -1>
+; CHECK-NEXT:    [[RET:%.*]] = icmp sge <3 x i8> [[TMP0]], [[X]]
+; CHECK-NEXT:    ret <3 x i1> [[RET]]
+;
+  %tmp0 = and <3 x i8> %x, <i8 3, i8 undef, i8 -1>
+  %ret = icmp sge <3 x i8> %tmp0, %x
+  ret <3 x i1> %ret
+}

Added: llvm/trunk/test/Transforms/InstCombine/canonicalize-constant-low-bit-mask-and-icmp-sgt-to-icmp-sgt.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/InstCombine/canonicalize-constant-low-bit-mask-and-icmp-sgt-to-icmp-sgt.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/InstCombine/canonicalize-constant-low-bit-mask-and-icmp-sgt-to-icmp-sgt.ll (added)
+++ llvm/trunk/test/Transforms/InstCombine/canonicalize-constant-low-bit-mask-and-icmp-sgt-to-icmp-sgt.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,216 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+; https://bugs.llvm.org/show_bug.cgi?id=38123
+
+; Pattern:
+;   x s> x & C
+; Should be transformed into:
+;   x s> C
+; Iff: isPowerOf2(C + 1)
+
+; NOTE: this pattern is not commutative!
+
+declare i8 @gen8()
+declare <2 x i8> @gen2x8()
+declare <3 x i8> @gen3x8()
+
+; ============================================================================ ;
+; Basic positive tests
+; ============================================================================ ;
+
+define i1 @p0() {
+; CHECK-LABEL: @p0(
+; CHECK-NEXT:    [[X:%.*]] = call i8 @gen8()
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp sgt i8 [[X]], 3
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %x = call i8 @gen8()
+  %tmp0 = and i8 %x, 3
+  %ret = icmp sgt i8 %x, %tmp0
+  ret i1 %ret
+}
+
+define i1 @pv(i8 %y) {
+; CHECK-LABEL: @pv(
+; CHECK-NEXT:    [[X:%.*]] = call i8 @gen8()
+; CHECK-NEXT:    [[TMP0:%.*]] = lshr i8 -1, [[Y:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp sgt i8 [[X]], [[TMP0]]
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %x = call i8 @gen8()
+  %tmp0 = lshr i8 -1, %y
+  %tmp1 = and i8 %tmp0, %x
+  %ret = icmp sgt i8 %x, %tmp1
+  ret i1 %ret
+}
+
+; ============================================================================ ;
+; Vector tests
+; ============================================================================ ;
+
+define <2 x i1> @p1_vec_splat() {
+; CHECK-LABEL: @p1_vec_splat(
+; CHECK-NEXT:    [[X:%.*]] = call <2 x i8> @gen2x8()
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp sgt <2 x i8> [[X]], <i8 3, i8 3>
+; CHECK-NEXT:    ret <2 x i1> [[TMP1]]
+;
+  %x = call <2 x i8> @gen2x8()
+  %tmp0 = and <2 x i8> %x, <i8 3, i8 3>
+  %ret = icmp sgt <2 x i8> %x, %tmp0
+  ret <2 x i1> %ret
+}
+
+define <2 x i1> @p2_vec_nonsplat() {
+; CHECK-LABEL: @p2_vec_nonsplat(
+; CHECK-NEXT:    [[X:%.*]] = call <2 x i8> @gen2x8()
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp sgt <2 x i8> [[X]], <i8 3, i8 15>
+; CHECK-NEXT:    ret <2 x i1> [[TMP1]]
+;
+  %x = call <2 x i8> @gen2x8()
+  %tmp0 = and <2 x i8> %x, <i8 3, i8 15> ; doesn't have to be splat.
+  %ret = icmp sgt <2 x i8> %x, %tmp0
+  ret <2 x i1> %ret
+}
+
+define <3 x i1> @p3_vec_splat_undef() {
+; CHECK-LABEL: @p3_vec_splat_undef(
+; CHECK-NEXT:    [[X:%.*]] = call <3 x i8> @gen3x8()
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp sgt <3 x i8> [[X]], <i8 3, i8 undef, i8 3>
+; CHECK-NEXT:    ret <3 x i1> [[TMP1]]
+;
+  %x = call <3 x i8> @gen3x8()
+  %tmp0 = and <3 x i8> %x, <i8 3, i8 undef, i8 3>
+  %ret = icmp sgt <3 x i8> %x, %tmp0
+  ret <3 x i1> %ret
+}
+
+; ============================================================================ ;
+; One-use tests. We don't care about multi-uses here.
+; ============================================================================ ;
+
+declare void @use8(i8)
+
+define i1 @oneuse0() {
+; CHECK-LABEL: @oneuse0(
+; CHECK-NEXT:    [[X:%.*]] = call i8 @gen8()
+; CHECK-NEXT:    [[TMP0:%.*]] = and i8 [[X]], 3
+; CHECK-NEXT:    call void @use8(i8 [[TMP0]])
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp sgt i8 [[X]], 3
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %x = call i8 @gen8()
+  %tmp0 = and i8 %x, 3
+  call void @use8(i8 %tmp0)
+  %ret = icmp sgt i8 %x, %tmp0
+  ret i1 %ret
+}
+
+; ============================================================================ ;
+; Negative tests
+; ============================================================================ ;
+
+; ============================================================================ ;
+; Commutativity tests.
+; ============================================================================ ;
+
+define i1 @c0(i8 %x) {
+; CHECK-LABEL: @c0(
+; CHECK-NEXT:    [[TMP0:%.*]] = and i8 [[X:%.*]], 3
+; CHECK-NEXT:    [[RET:%.*]] = icmp sgt i8 [[TMP0]], [[X]]
+; CHECK-NEXT:    ret i1 [[RET]]
+;
+  %tmp0 = and i8 %x, 3
+  %ret = icmp sgt i8 %tmp0, %x ; swapped order
+  ret i1 %ret
+}
+
+; ============================================================================ ;
+; Commutativity tests with variable
+; ============================================================================ ;
+
+; Ok, this one should fold. We only testing commutativity of 'and'.
+define i1 @cv0_GOOD(i8 %y) {
+; CHECK-LABEL: @cv0_GOOD(
+; CHECK-NEXT:    [[X:%.*]] = call i8 @gen8()
+; CHECK-NEXT:    [[TMP0:%.*]] = lshr i8 -1, [[Y:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp sgt i8 [[X]], [[TMP0]]
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %x = call i8 @gen8()
+  %tmp0 = lshr i8 -1, %y
+  %tmp1 = and i8 %tmp0, %x ; swapped order
+  %ret = icmp sgt i8 %x, %tmp1
+  ret i1 %ret
+}
+
+define i1 @cv1(i8 %y) {
+; CHECK-LABEL: @cv1(
+; CHECK-NEXT:    [[X:%.*]] = call i8 @gen8()
+; CHECK-NEXT:    [[TMP0:%.*]] = lshr i8 -1, [[Y:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = and i8 [[X]], [[TMP0]]
+; CHECK-NEXT:    [[RET:%.*]] = icmp sgt i8 [[TMP1]], [[X]]
+; CHECK-NEXT:    ret i1 [[RET]]
+;
+  %x = call i8 @gen8()
+  %tmp0 = lshr i8 -1, %y
+  %tmp1 = and i8 %x, %tmp0
+  %ret = icmp sgt i8 %tmp1, %x ; swapped order
+  ret i1 %ret
+}
+
+define i1 @cv2(i8 %x, i8 %y) {
+; CHECK-LABEL: @cv2(
+; CHECK-NEXT:    [[TMP0:%.*]] = lshr i8 -1, [[Y:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = and i8 [[TMP0]], [[X:%.*]]
+; CHECK-NEXT:    [[RET:%.*]] = icmp sgt i8 [[TMP1]], [[X]]
+; CHECK-NEXT:    ret i1 [[RET]]
+;
+  %tmp0 = lshr i8 -1, %y
+  %tmp1 = and i8 %tmp0, %x ; swapped order
+  %ret = icmp sgt i8 %tmp1, %x ; swapped order
+  ret i1 %ret
+}
+
+; ============================================================================ ;
+; Normal negative tests
+; ============================================================================ ;
+
+define i1 @n0() {
+; CHECK-LABEL: @n0(
+; CHECK-NEXT:    [[X:%.*]] = call i8 @gen8()
+; CHECK-NEXT:    [[TMP0:%.*]] = and i8 [[X]], 4
+; CHECK-NEXT:    [[RET:%.*]] = icmp sgt i8 [[X]], [[TMP0]]
+; CHECK-NEXT:    ret i1 [[RET]]
+;
+  %x = call i8 @gen8()
+  %tmp0 = and i8 %x, 4 ; power-of-two, but invalid.
+  %ret = icmp sgt i8 %x, %tmp0
+  ret i1 %ret
+}
+
+define i1 @n1(i8 %y, i8 %notx) {
+; CHECK-LABEL: @n1(
+; CHECK-NEXT:    [[X:%.*]] = call i8 @gen8()
+; CHECK-NEXT:    [[TMP0:%.*]] = and i8 [[X]], 3
+; CHECK-NEXT:    [[RET:%.*]] = icmp sgt i8 [[TMP0]], [[NOTX:%.*]]
+; CHECK-NEXT:    ret i1 [[RET]]
+;
+  %x = call i8 @gen8()
+  %tmp0 = and i8 %x, 3
+  %ret = icmp sgt i8 %tmp0, %notx ; not %x
+  ret i1 %ret
+}
+
+define <2 x i1> @n2() {
+; CHECK-LABEL: @n2(
+; CHECK-NEXT:    [[X:%.*]] = call <2 x i8> @gen2x8()
+; CHECK-NEXT:    [[TMP0:%.*]] = and <2 x i8> [[X]], <i8 3, i8 16>
+; CHECK-NEXT:    [[RET:%.*]] = icmp sgt <2 x i8> [[X]], [[TMP0]]
+; CHECK-NEXT:    ret <2 x i1> [[RET]]
+;
+  %x = call <2 x i8> @gen2x8()
+  %tmp0 = and <2 x i8> %x, <i8 3, i8 16> ; only the first one is valid.
+  %ret = icmp sgt <2 x i8> %x, %tmp0
+  ret <2 x i1> %ret
+}

Added: llvm/trunk/test/Transforms/InstCombine/canonicalize-constant-low-bit-mask-and-icmp-sle-to-icmp-sle.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/InstCombine/canonicalize-constant-low-bit-mask-and-icmp-sle-to-icmp-sle.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/InstCombine/canonicalize-constant-low-bit-mask-and-icmp-sle-to-icmp-sle.ll (added)
+++ llvm/trunk/test/Transforms/InstCombine/canonicalize-constant-low-bit-mask-and-icmp-sle-to-icmp-sle.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,216 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+; https://bugs.llvm.org/show_bug.cgi?id=38123
+
+; Pattern:
+;   x s<= x & C
+; Should be transformed into:
+;   x s<= C
+; Iff: isPowerOf2(C + 1)
+
+; NOTE: this pattern is not commutative!
+
+declare i8 @gen8()
+declare <2 x i8> @gen2x8()
+declare <3 x i8> @gen3x8()
+
+; ============================================================================ ;
+; Basic positive tests
+; ============================================================================ ;
+
+define i1 @p0() {
+; CHECK-LABEL: @p0(
+; CHECK-NEXT:    [[X:%.*]] = call i8 @gen8()
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp slt i8 [[X]], 4
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %x = call i8 @gen8()
+  %tmp0 = and i8 %x, 3
+  %ret = icmp sle i8 %x, %tmp0
+  ret i1 %ret
+}
+
+define i1 @pv(i8 %y) {
+; CHECK-LABEL: @pv(
+; CHECK-NEXT:    [[X:%.*]] = call i8 @gen8()
+; CHECK-NEXT:    [[TMP0:%.*]] = lshr i8 -1, [[Y:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp sle i8 [[X]], [[TMP0]]
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %x = call i8 @gen8()
+  %tmp0 = lshr i8 -1, %y
+  %tmp1 = and i8 %tmp0, %x
+  %ret = icmp sle i8 %x, %tmp1
+  ret i1 %ret
+}
+
+; ============================================================================ ;
+; Vector tests
+; ============================================================================ ;
+
+define <2 x i1> @p1_vec_splat() {
+; CHECK-LABEL: @p1_vec_splat(
+; CHECK-NEXT:    [[X:%.*]] = call <2 x i8> @gen2x8()
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp slt <2 x i8> [[X]], <i8 4, i8 4>
+; CHECK-NEXT:    ret <2 x i1> [[TMP1]]
+;
+  %x = call <2 x i8> @gen2x8()
+  %tmp0 = and <2 x i8> %x, <i8 3, i8 3>
+  %ret = icmp sle <2 x i8> %x, %tmp0
+  ret <2 x i1> %ret
+}
+
+define <2 x i1> @p2_vec_nonsplat() {
+; CHECK-LABEL: @p2_vec_nonsplat(
+; CHECK-NEXT:    [[X:%.*]] = call <2 x i8> @gen2x8()
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp slt <2 x i8> [[X]], <i8 4, i8 16>
+; CHECK-NEXT:    ret <2 x i1> [[TMP1]]
+;
+  %x = call <2 x i8> @gen2x8()
+  %tmp0 = and <2 x i8> %x, <i8 3, i8 15> ; doesn't have to be splat.
+  %ret = icmp sle <2 x i8> %x, %tmp0
+  ret <2 x i1> %ret
+}
+
+define <3 x i1> @p3_vec_splat_undef() {
+; CHECK-LABEL: @p3_vec_splat_undef(
+; CHECK-NEXT:    [[X:%.*]] = call <3 x i8> @gen3x8()
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp slt <3 x i8> [[X]], <i8 4, i8 undef, i8 4>
+; CHECK-NEXT:    ret <3 x i1> [[TMP1]]
+;
+  %x = call <3 x i8> @gen3x8()
+  %tmp0 = and <3 x i8> %x, <i8 3, i8 undef, i8 3>
+  %ret = icmp sle <3 x i8> %x, %tmp0
+  ret <3 x i1> %ret
+}
+
+; ============================================================================ ;
+; One-use tests. We don't care about multi-uses here.
+; ============================================================================ ;
+
+declare void @use8(i8)
+
+define i1 @oneuse0() {
+; CHECK-LABEL: @oneuse0(
+; CHECK-NEXT:    [[X:%.*]] = call i8 @gen8()
+; CHECK-NEXT:    [[TMP0:%.*]] = and i8 [[X]], 3
+; CHECK-NEXT:    call void @use8(i8 [[TMP0]])
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp slt i8 [[X]], 4
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %x = call i8 @gen8()
+  %tmp0 = and i8 %x, 3
+  call void @use8(i8 %tmp0)
+  %ret = icmp sle i8 %x, %tmp0
+  ret i1 %ret
+}
+
+; ============================================================================ ;
+; Negative tests
+; ============================================================================ ;
+
+; ============================================================================ ;
+; Commutativity tests.
+; ============================================================================ ;
+
+define i1 @c0(i8 %x) {
+; CHECK-LABEL: @c0(
+; CHECK-NEXT:    [[TMP0:%.*]] = and i8 [[X:%.*]], 3
+; CHECK-NEXT:    [[RET:%.*]] = icmp sle i8 [[TMP0]], [[X]]
+; CHECK-NEXT:    ret i1 [[RET]]
+;
+  %tmp0 = and i8 %x, 3
+  %ret = icmp sle i8 %tmp0, %x ; swapped order
+  ret i1 %ret
+}
+
+; ============================================================================ ;
+; Commutativity tests with variable
+; ============================================================================ ;
+
+; Ok, this one should fold. We only testing commutativity of 'and'.
+define i1 @cv0_GOOD(i8 %y) {
+; CHECK-LABEL: @cv0_GOOD(
+; CHECK-NEXT:    [[X:%.*]] = call i8 @gen8()
+; CHECK-NEXT:    [[TMP0:%.*]] = lshr i8 -1, [[Y:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp sle i8 [[X]], [[TMP0]]
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %x = call i8 @gen8()
+  %tmp0 = lshr i8 -1, %y
+  %tmp1 = and i8 %tmp0, %x ; swapped order
+  %ret = icmp sle i8 %x, %tmp1
+  ret i1 %ret
+}
+
+define i1 @cv1(i8 %y) {
+; CHECK-LABEL: @cv1(
+; CHECK-NEXT:    [[X:%.*]] = call i8 @gen8()
+; CHECK-NEXT:    [[TMP0:%.*]] = lshr i8 -1, [[Y:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = and i8 [[X]], [[TMP0]]
+; CHECK-NEXT:    [[RET:%.*]] = icmp sle i8 [[TMP1]], [[X]]
+; CHECK-NEXT:    ret i1 [[RET]]
+;
+  %x = call i8 @gen8()
+  %tmp0 = lshr i8 -1, %y
+  %tmp1 = and i8 %x, %tmp0
+  %ret = icmp sle i8 %tmp1, %x ; swapped order
+  ret i1 %ret
+}
+
+define i1 @cv2(i8 %x, i8 %y) {
+; CHECK-LABEL: @cv2(
+; CHECK-NEXT:    [[TMP0:%.*]] = lshr i8 -1, [[Y:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = and i8 [[TMP0]], [[X:%.*]]
+; CHECK-NEXT:    [[RET:%.*]] = icmp sle i8 [[TMP1]], [[X]]
+; CHECK-NEXT:    ret i1 [[RET]]
+;
+  %tmp0 = lshr i8 -1, %y
+  %tmp1 = and i8 %tmp0, %x ; swapped order
+  %ret = icmp sle i8 %tmp1, %x ; swapped order
+  ret i1 %ret
+}
+
+; ============================================================================ ;
+; Normal negative tests
+; ============================================================================ ;
+
+define i1 @n0() {
+; CHECK-LABEL: @n0(
+; CHECK-NEXT:    [[X:%.*]] = call i8 @gen8()
+; CHECK-NEXT:    [[TMP0:%.*]] = and i8 [[X]], 4
+; CHECK-NEXT:    [[RET:%.*]] = icmp sle i8 [[X]], [[TMP0]]
+; CHECK-NEXT:    ret i1 [[RET]]
+;
+  %x = call i8 @gen8()
+  %tmp0 = and i8 %x, 4 ; power-of-two, but invalid.
+  %ret = icmp sle i8 %x, %tmp0
+  ret i1 %ret
+}
+
+define i1 @n1(i8 %y, i8 %notx) {
+; CHECK-LABEL: @n1(
+; CHECK-NEXT:    [[X:%.*]] = call i8 @gen8()
+; CHECK-NEXT:    [[TMP0:%.*]] = and i8 [[X]], 3
+; CHECK-NEXT:    [[RET:%.*]] = icmp sle i8 [[TMP0]], [[NOTX:%.*]]
+; CHECK-NEXT:    ret i1 [[RET]]
+;
+  %x = call i8 @gen8()
+  %tmp0 = and i8 %x, 3
+  %ret = icmp sle i8 %tmp0, %notx ; not %x
+  ret i1 %ret
+}
+
+define <2 x i1> @n2() {
+; CHECK-LABEL: @n2(
+; CHECK-NEXT:    [[X:%.*]] = call <2 x i8> @gen2x8()
+; CHECK-NEXT:    [[TMP0:%.*]] = and <2 x i8> [[X]], <i8 3, i8 16>
+; CHECK-NEXT:    [[RET:%.*]] = icmp sle <2 x i8> [[X]], [[TMP0]]
+; CHECK-NEXT:    ret <2 x i1> [[RET]]
+;
+  %x = call <2 x i8> @gen2x8()
+  %tmp0 = and <2 x i8> %x, <i8 3, i8 16> ; only the first one is valid.
+  %ret = icmp sle <2 x i8> %x, %tmp0
+  ret <2 x i1> %ret
+}

Added: llvm/trunk/test/Transforms/InstCombine/canonicalize-constant-low-bit-mask-and-icmp-slt-to-icmp-sgt.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/InstCombine/canonicalize-constant-low-bit-mask-and-icmp-slt-to-icmp-sgt.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/InstCombine/canonicalize-constant-low-bit-mask-and-icmp-slt-to-icmp-sgt.ll (added)
+++ llvm/trunk/test/Transforms/InstCombine/canonicalize-constant-low-bit-mask-and-icmp-slt-to-icmp-sgt.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,226 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+; https://bugs.llvm.org/show_bug.cgi?id=38123
+
+; Pattern:
+;   x & C s< x
+; Should be transformed into:
+;   x s> C
+; Iff: isPowerOf2(C + 1)
+
+; ============================================================================ ;
+; Basic positive tests
+; ============================================================================ ;
+
+define i1 @p0(i8 %x) {
+; CHECK-LABEL: @p0(
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp sgt i8 [[X:%.*]], 3
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %tmp0 = and i8 %x, 3
+  %ret = icmp slt i8 %tmp0, %x
+  ret i1 %ret
+}
+
+; ============================================================================ ;
+; Vector tests
+; ============================================================================ ;
+
+define <2 x i1> @p1_vec_splat(<2 x i8> %x) {
+; CHECK-LABEL: @p1_vec_splat(
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp sgt <2 x i8> [[X:%.*]], <i8 3, i8 3>
+; CHECK-NEXT:    ret <2 x i1> [[TMP1]]
+;
+  %tmp0 = and <2 x i8> %x, <i8 3, i8 3>
+  %ret = icmp slt <2 x i8> %tmp0, %x
+  ret <2 x i1> %ret
+}
+
+define <2 x i1> @p2_vec_nonsplat(<2 x i8> %x) {
+; CHECK-LABEL: @p2_vec_nonsplat(
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp sgt <2 x i8> [[X:%.*]], <i8 3, i8 15>
+; CHECK-NEXT:    ret <2 x i1> [[TMP1]]
+;
+  %tmp0 = and <2 x i8> %x, <i8 3, i8 15> ; doesn't have to be splat.
+  %ret = icmp slt <2 x i8> %tmp0, %x
+  ret <2 x i1> %ret
+}
+
+define <3 x i1> @p3_vec_splat_undef(<3 x i8> %x) {
+; CHECK-LABEL: @p3_vec_splat_undef(
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp sgt <3 x i8> [[X:%.*]], <i8 3, i8 undef, i8 3>
+; CHECK-NEXT:    ret <3 x i1> [[TMP1]]
+;
+  %tmp0 = and <3 x i8> %x, <i8 3, i8 undef, i8 3>
+  %ret = icmp slt <3 x i8> %tmp0, %x
+  ret <3 x i1> %ret
+}
+
+; ============================================================================ ;
+; One-use tests. We don't care about multi-uses here.
+; ============================================================================ ;
+
+declare void @use8(i8)
+
+define i1 @oneuse0(i8 %x) {
+; CHECK-LABEL: @oneuse0(
+; CHECK-NEXT:    [[TMP0:%.*]] = and i8 [[X:%.*]], 3
+; CHECK-NEXT:    call void @use8(i8 [[TMP0]])
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp sgt i8 [[X]], 3
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %tmp0 = and i8 %x, 3
+  call void @use8(i8 %tmp0)
+  %ret = icmp slt i8 %tmp0, %x
+  ret i1 %ret
+}
+
+; ============================================================================ ;
+; Negative tests
+; ============================================================================ ;
+
+; ============================================================================ ;
+; Commutativity tests.
+; ============================================================================ ;
+
+declare i8 @gen8()
+
+define i1 @c0() {
+; CHECK-LABEL: @c0(
+; CHECK-NEXT:    [[X:%.*]] = call i8 @gen8()
+; CHECK-NEXT:    [[TMP0:%.*]] = and i8 [[X]], 3
+; CHECK-NEXT:    [[RET:%.*]] = icmp slt i8 [[X]], [[TMP0]]
+; CHECK-NEXT:    ret i1 [[RET]]
+;
+  %x = call i8 @gen8()
+  %tmp0 = and i8 %x, 3
+  %ret = icmp slt i8 %x, %tmp0 ; swapped order
+  ret i1 %ret
+}
+
+; ============================================================================ ;
+; Commutativity tests with variable
+; ============================================================================ ;
+
+; Ok, this one should fold. We only testing commutativity of 'and'.
+define i1 @cv0(i8 %y) {
+; CHECK-LABEL: @cv0(
+; CHECK-NEXT:    [[X:%.*]] = call i8 @gen8()
+; CHECK-NEXT:    [[TMP0:%.*]] = lshr i8 -1, [[Y:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = and i8 [[X]], [[TMP0]]
+; CHECK-NEXT:    [[RET:%.*]] = icmp slt i8 [[TMP1]], [[X]]
+; CHECK-NEXT:    ret i1 [[RET]]
+;
+  %x = call i8 @gen8()
+  %tmp0 = lshr i8 -1, %y
+  %tmp1 = and i8 %x, %tmp0 ; swapped order
+  %ret = icmp slt i8 %tmp1, %x
+  ret i1 %ret
+}
+
+define i1 @cv1(i8 %y) {
+; CHECK-LABEL: @cv1(
+; CHECK-NEXT:    [[X:%.*]] = call i8 @gen8()
+; CHECK-NEXT:    [[TMP0:%.*]] = lshr i8 -1, [[Y:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = and i8 [[TMP0]], [[X]]
+; CHECK-NEXT:    [[RET:%.*]] = icmp slt i8 [[X]], [[TMP1]]
+; CHECK-NEXT:    ret i1 [[RET]]
+;
+  %x = call i8 @gen8()
+  %tmp0 = lshr i8 -1, %y
+  %tmp1 = and i8 %tmp0, %x
+  %ret = icmp slt i8 %x, %tmp1 ; swapped order
+  ret i1 %ret
+}
+
+define i1 @cv2(i8 %y) {
+; CHECK-LABEL: @cv2(
+; CHECK-NEXT:    [[X:%.*]] = call i8 @gen8()
+; CHECK-NEXT:    [[TMP0:%.*]] = lshr i8 -1, [[Y:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = and i8 [[X]], [[TMP0]]
+; CHECK-NEXT:    [[RET:%.*]] = icmp slt i8 [[X]], [[TMP1]]
+; CHECK-NEXT:    ret i1 [[RET]]
+;
+  %x = call i8 @gen8()
+  %tmp0 = lshr i8 -1, %y
+  %tmp1 = and i8 %x, %tmp0 ; swapped order
+  %ret = icmp slt i8 %x, %tmp1 ; swapped order
+  ret i1 %ret
+}
+
+; ============================================================================ ;
+; Normal negative tests
+; ============================================================================ ;
+
+define i1 @n0(i8 %x) {
+; CHECK-LABEL: @n0(
+; CHECK-NEXT:    [[TMP0:%.*]] = and i8 [[X:%.*]], 4
+; CHECK-NEXT:    [[RET:%.*]] = icmp slt i8 [[TMP0]], [[X]]
+; CHECK-NEXT:    ret i1 [[RET]]
+;
+  %tmp0 = and i8 %x, 4 ; power-of-two, but invalid.
+  %ret = icmp slt i8 %tmp0, %x
+  ret i1 %ret
+}
+
+define i1 @n1(i8 %x, i8 %y, i8 %notx) {
+; CHECK-LABEL: @n1(
+; CHECK-NEXT:    [[TMP0:%.*]] = and i8 [[X:%.*]], 3
+; CHECK-NEXT:    [[RET:%.*]] = icmp slt i8 [[TMP0]], [[NOTX:%.*]]
+; CHECK-NEXT:    ret i1 [[RET]]
+;
+  %tmp0 = and i8 %x, 3
+  %ret = icmp slt i8 %tmp0, %notx ; not %x
+  ret i1 %ret
+}
+
+define <2 x i1> @n2(<2 x i8> %x) {
+; CHECK-LABEL: @n2(
+; CHECK-NEXT:    [[TMP0:%.*]] = and <2 x i8> [[X:%.*]], <i8 3, i8 16>
+; CHECK-NEXT:    [[RET:%.*]] = icmp slt <2 x i8> [[TMP0]], [[X]]
+; CHECK-NEXT:    ret <2 x i1> [[RET]]
+;
+  %tmp0 = and <2 x i8> %x, <i8 3, i8 16> ; only the first one is valid.
+  %ret = icmp slt <2 x i8> %tmp0, %x
+  ret <2 x i1> %ret
+}
+
+; ============================================================================ ;
+; Potential miscompiles.
+; ============================================================================ ;
+
+define i1 @nv(i8 %x, i8 %y) {
+; CHECK-LABEL: @nv(
+; CHECK-NEXT:    [[TMP0:%.*]] = lshr i8 -1, [[Y:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = and i8 [[TMP0]], [[X:%.*]]
+; CHECK-NEXT:    [[RET:%.*]] = icmp slt i8 [[TMP1]], [[X]]
+; CHECK-NEXT:    ret i1 [[RET]]
+;
+  %tmp0 = lshr i8 -1, %y
+  %tmp1 = and i8 %tmp0, %x
+  %ret = icmp slt i8 %tmp1, %x
+  ret i1 %ret
+}
+
+define <2 x i1> @n3(<2 x i8> %x) {
+; CHECK-LABEL: @n3(
+; CHECK-NEXT:    [[TMP0:%.*]] = and <2 x i8> [[X:%.*]], <i8 3, i8 -1>
+; CHECK-NEXT:    [[RET:%.*]] = icmp slt <2 x i8> [[TMP0]], [[X]]
+; CHECK-NEXT:    ret <2 x i1> [[RET]]
+;
+  %tmp0 = and <2 x i8> %x, <i8 3, i8 -1>
+  %ret = icmp slt <2 x i8> %tmp0, %x
+  ret <2 x i1> %ret
+}
+
+define <3 x i1> @n4(<3 x i8> %x) {
+; CHECK-LABEL: @n4(
+; CHECK-NEXT:    [[TMP0:%.*]] = and <3 x i8> [[X:%.*]], <i8 3, i8 undef, i8 -1>
+; CHECK-NEXT:    [[RET:%.*]] = icmp slt <3 x i8> [[TMP0]], [[X]]
+; CHECK-NEXT:    ret <3 x i1> [[RET]]
+;
+  %tmp0 = and <3 x i8> %x, <i8 3, i8 undef, i8 -1>
+  %ret = icmp slt <3 x i8> %tmp0, %x
+  ret <3 x i1> %ret
+}

Added: llvm/trunk/test/Transforms/InstCombine/canonicalize-constant-low-bit-mask-and-icmp-uge-to-icmp-ule.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/InstCombine/canonicalize-constant-low-bit-mask-and-icmp-uge-to-icmp-ule.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/InstCombine/canonicalize-constant-low-bit-mask-and-icmp-uge-to-icmp-ule.ll (added)
+++ llvm/trunk/test/Transforms/InstCombine/canonicalize-constant-low-bit-mask-and-icmp-uge-to-icmp-ule.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,186 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+; https://bugs.llvm.org/show_bug.cgi?id=38123
+
+; Pattern:
+;   x & C u>= x
+; Should be transformed into:
+;   x u<= C
+; Iff: isPowerOf2(C + 1)
+
+; ============================================================================ ;
+; Basic positive tests
+; ============================================================================ ;
+
+define i1 @p0(i8 %x) {
+; CHECK-LABEL: @p0(
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ult i8 [[X:%.*]], 4
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %tmp0 = and i8 %x, 3
+  %ret = icmp uge i8 %tmp0, %x
+  ret i1 %ret
+}
+
+define i1 @pv(i8 %x, i8 %y) {
+; CHECK-LABEL: @pv(
+; CHECK-NEXT:    [[TMP0:%.*]] = lshr i8 -1, [[Y:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp uge i8 [[TMP0]], [[X:%.*]]
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %tmp0 = lshr i8 -1, %y
+  %tmp1 = and i8 %tmp0, %x
+  %ret = icmp uge i8 %tmp1, %x
+  ret i1 %ret
+}
+
+; ============================================================================ ;
+; Vector tests
+; ============================================================================ ;
+
+define <2 x i1> @p1_vec_splat(<2 x i8> %x) {
+; CHECK-LABEL: @p1_vec_splat(
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ult <2 x i8> [[X:%.*]], <i8 4, i8 4>
+; CHECK-NEXT:    ret <2 x i1> [[TMP1]]
+;
+  %tmp0 = and <2 x i8> %x, <i8 3, i8 3>
+  %ret = icmp uge <2 x i8> %tmp0, %x
+  ret <2 x i1> %ret
+}
+
+define <2 x i1> @p2_vec_nonsplat(<2 x i8> %x) {
+; CHECK-LABEL: @p2_vec_nonsplat(
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ult <2 x i8> [[X:%.*]], <i8 4, i8 16>
+; CHECK-NEXT:    ret <2 x i1> [[TMP1]]
+;
+  %tmp0 = and <2 x i8> %x, <i8 3, i8 15> ; doesn't have to be splat.
+  %ret = icmp uge <2 x i8> %tmp0, %x
+  ret <2 x i1> %ret
+}
+
+define <3 x i1> @p3_vec_splat_undef(<3 x i8> %x) {
+; CHECK-LABEL: @p3_vec_splat_undef(
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ult <3 x i8> [[X:%.*]], <i8 4, i8 undef, i8 4>
+; CHECK-NEXT:    ret <3 x i1> [[TMP1]]
+;
+  %tmp0 = and <3 x i8> %x, <i8 3, i8 undef, i8 3>
+  %ret = icmp uge <3 x i8> %tmp0, %x
+  ret <3 x i1> %ret
+}
+
+; ============================================================================ ;
+; Commutativity tests.
+; ============================================================================ ;
+
+declare i8 @gen8()
+
+; The pattern is not commutative. instsimplify will already take care of it.
+define i1 @c0() {
+; CHECK-LABEL: @c0(
+; CHECK-NEXT:    [[X:%.*]] = call i8 @gen8()
+; CHECK-NEXT:    ret i1 true
+;
+  %x = call i8 @gen8()
+  %tmp0 = and i8 %x, 3
+  %ret = icmp uge i8 %x, %tmp0 ; swapped order
+  ret i1 %ret
+}
+
+; ============================================================================ ;
+; Commutativity tests with variable
+; ============================================================================ ;
+
+define i1 @cv0(i8 %y) {
+; CHECK-LABEL: @cv0(
+; CHECK-NEXT:    [[X:%.*]] = call i8 @gen8()
+; CHECK-NEXT:    [[TMP0:%.*]] = lshr i8 -1, [[Y:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ule i8 [[X]], [[TMP0]]
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %x = call i8 @gen8()
+  %tmp0 = lshr i8 -1, %y
+  %tmp1 = and i8 %x, %tmp0 ; swapped order
+  %ret = icmp uge i8 %tmp1, %x
+  ret i1 %ret
+}
+
+define i1 @cv1(i8 %y) {
+; CHECK-LABEL: @cv1(
+; CHECK-NEXT:    [[X:%.*]] = call i8 @gen8()
+; CHECK-NEXT:    ret i1 true
+;
+  %x = call i8 @gen8()
+  %tmp0 = lshr i8 -1, %y
+  %tmp1 = and i8 %tmp0, %x
+  %ret = icmp uge i8 %x, %tmp1 ; swapped order
+  ret i1 %ret
+}
+
+define i1 @cv2(i8 %y) {
+; CHECK-LABEL: @cv2(
+; CHECK-NEXT:    [[X:%.*]] = call i8 @gen8()
+; CHECK-NEXT:    ret i1 true
+;
+  %x = call i8 @gen8()
+  %tmp0 = lshr i8 -1, %y
+  %tmp1 = and i8 %x, %tmp0 ; swapped order
+  %ret = icmp uge i8 %x, %tmp1 ; swapped order
+  ret i1 %ret
+}
+
+; ============================================================================ ;
+; One-use tests. We don't care about multi-uses here.
+; ============================================================================ ;
+
+declare void @use8(i8)
+
+define i1 @oneuse0(i8 %x) {
+; CHECK-LABEL: @oneuse0(
+; CHECK-NEXT:    [[TMP0:%.*]] = and i8 [[X:%.*]], 3
+; CHECK-NEXT:    call void @use8(i8 [[TMP0]])
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ult i8 [[X]], 4
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %tmp0 = and i8 %x, 3
+  call void @use8(i8 %tmp0)
+  %ret = icmp uge i8 %tmp0, %x
+  ret i1 %ret
+}
+
+; ============================================================================ ;
+; Negative tests
+; ============================================================================ ;
+
+define i1 @n0(i8 %x) {
+; CHECK-LABEL: @n0(
+; CHECK-NEXT:    [[TMP0:%.*]] = and i8 [[X:%.*]], 4
+; CHECK-NEXT:    [[RET:%.*]] = icmp uge i8 [[TMP0]], [[X]]
+; CHECK-NEXT:    ret i1 [[RET]]
+;
+  %tmp0 = and i8 %x, 4 ; power-of-two, but invalid.
+  %ret = icmp uge i8 %tmp0, %x
+  ret i1 %ret
+}
+
+define i1 @n1(i8 %x, i8 %y, i8 %notx) {
+; CHECK-LABEL: @n1(
+; CHECK-NEXT:    [[TMP0:%.*]] = and i8 [[X:%.*]], 3
+; CHECK-NEXT:    [[RET:%.*]] = icmp uge i8 [[TMP0]], [[NOTX:%.*]]
+; CHECK-NEXT:    ret i1 [[RET]]
+;
+  %tmp0 = and i8 %x, 3
+  %ret = icmp uge i8 %tmp0, %notx ; not %x
+  ret i1 %ret
+}
+
+define <2 x i1> @n2(<2 x i8> %x) {
+; CHECK-LABEL: @n2(
+; CHECK-NEXT:    [[TMP0:%.*]] = and <2 x i8> [[X:%.*]], <i8 3, i8 16>
+; CHECK-NEXT:    [[RET:%.*]] = icmp uge <2 x i8> [[TMP0]], [[X]]
+; CHECK-NEXT:    ret <2 x i1> [[RET]]
+;
+  %tmp0 = and <2 x i8> %x, <i8 3, i8 16> ; only the first one is valid.
+  %ret = icmp uge <2 x i8> %tmp0, %x
+  ret <2 x i1> %ret
+}

Added: llvm/trunk/test/Transforms/InstCombine/canonicalize-constant-low-bit-mask-and-icmp-ugt-to-icmp-ugt.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/InstCombine/canonicalize-constant-low-bit-mask-and-icmp-ugt-to-icmp-ugt.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/InstCombine/canonicalize-constant-low-bit-mask-and-icmp-ugt-to-icmp-ugt.ll (added)
+++ llvm/trunk/test/Transforms/InstCombine/canonicalize-constant-low-bit-mask-and-icmp-ugt-to-icmp-ugt.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,201 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+; https://bugs.llvm.org/show_bug.cgi?id=38123
+
+; Pattern:
+;   x u> x & C
+; Should be transformed into:
+;   x u> C
+; Iff: isPowerOf2(C + 1)
+
+declare i8 @gen8()
+declare <2 x i8> @gen2x8()
+declare <3 x i8> @gen3x8()
+
+; ============================================================================ ;
+; Basic positive tests
+; ============================================================================ ;
+
+define i1 @p0() {
+; CHECK-LABEL: @p0(
+; CHECK-NEXT:    [[X:%.*]] = call i8 @gen8()
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ugt i8 [[X]], 3
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %x = call i8 @gen8()
+  %tmp0 = and i8 %x, 3
+  %ret = icmp ugt i8 %x, %tmp0
+  ret i1 %ret
+}
+
+define i1 @pv(i8 %y) {
+; CHECK-LABEL: @pv(
+; CHECK-NEXT:    [[X:%.*]] = call i8 @gen8()
+; CHECK-NEXT:    [[TMP0:%.*]] = lshr i8 -1, [[Y:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ugt i8 [[X]], [[TMP0]]
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %x = call i8 @gen8()
+  %tmp0 = lshr i8 -1, %y
+  %tmp1 = and i8 %tmp0, %x
+  %ret = icmp ugt i8 %x, %tmp1
+  ret i1 %ret
+}
+
+; ============================================================================ ;
+; Vector tests
+; ============================================================================ ;
+
+define <2 x i1> @p1_vec_splat() {
+; CHECK-LABEL: @p1_vec_splat(
+; CHECK-NEXT:    [[X:%.*]] = call <2 x i8> @gen2x8()
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ugt <2 x i8> [[X]], <i8 3, i8 3>
+; CHECK-NEXT:    ret <2 x i1> [[TMP1]]
+;
+  %x = call <2 x i8> @gen2x8()
+  %tmp0 = and <2 x i8> %x, <i8 3, i8 3>
+  %ret = icmp ugt <2 x i8> %x, %tmp0
+  ret <2 x i1> %ret
+}
+
+define <2 x i1> @p2_vec_nonsplat() {
+; CHECK-LABEL: @p2_vec_nonsplat(
+; CHECK-NEXT:    [[X:%.*]] = call <2 x i8> @gen2x8()
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ugt <2 x i8> [[X]], <i8 3, i8 15>
+; CHECK-NEXT:    ret <2 x i1> [[TMP1]]
+;
+  %x = call <2 x i8> @gen2x8()
+  %tmp0 = and <2 x i8> %x, <i8 3, i8 15> ; doesn't have to be splat.
+  %ret = icmp ugt <2 x i8> %x, %tmp0
+  ret <2 x i1> %ret
+}
+
+define <3 x i1> @p3_vec_splat_undef() {
+; CHECK-LABEL: @p3_vec_splat_undef(
+; CHECK-NEXT:    [[X:%.*]] = call <3 x i8> @gen3x8()
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ugt <3 x i8> [[X]], <i8 3, i8 undef, i8 3>
+; CHECK-NEXT:    ret <3 x i1> [[TMP1]]
+;
+  %x = call <3 x i8> @gen3x8()
+  %tmp0 = and <3 x i8> %x, <i8 3, i8 undef, i8 3>
+  %ret = icmp ugt <3 x i8> %x, %tmp0
+  ret <3 x i1> %ret
+}
+
+; ============================================================================ ;
+; Commutativity tests.
+; ============================================================================ ;
+
+define i1 @c0(i8 %x) {
+; CHECK-LABEL: @c0(
+; CHECK-NEXT:    ret i1 false
+;
+  %tmp0 = and i8 %x, 3
+  %ret = icmp ugt i8 %tmp0, %x ; swapped order
+  ret i1 %ret
+}
+
+; ============================================================================ ;
+; Commutativity tests with variable
+; ============================================================================ ;
+
+define i1 @cv0(i8 %y) {
+; CHECK-LABEL: @cv0(
+; CHECK-NEXT:    [[X:%.*]] = call i8 @gen8()
+; CHECK-NEXT:    [[TMP0:%.*]] = lshr i8 -1, [[Y:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ugt i8 [[X]], [[TMP0]]
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %x = call i8 @gen8()
+  %tmp0 = lshr i8 -1, %y
+  %tmp1 = and i8 %tmp0, %x ; swapped order
+  %ret = icmp ugt i8 %x, %tmp1
+  ret i1 %ret
+}
+
+define i1 @cv1(i8 %y) {
+; CHECK-LABEL: @cv1(
+; CHECK-NEXT:    [[X:%.*]] = call i8 @gen8()
+; CHECK-NEXT:    ret i1 false
+;
+  %x = call i8 @gen8()
+  %tmp0 = lshr i8 -1, %y
+  %tmp1 = and i8 %x, %tmp0
+  %ret = icmp ugt i8 %tmp1, %x ; swapped order
+  ret i1 %ret
+}
+
+define i1 @cv2(i8 %x, i8 %y) {
+; CHECK-LABEL: @cv2(
+; CHECK-NEXT:    ret i1 false
+;
+  %tmp0 = lshr i8 -1, %y
+  %tmp1 = and i8 %tmp0, %x ; swapped order
+  %ret = icmp ugt i8 %tmp1, %x ; swapped order
+  ret i1 %ret
+}
+
+; ============================================================================ ;
+; One-use tests. We don't care about multi-uses here.
+; ============================================================================ ;
+
+declare void @use8(i8)
+
+define i1 @oneuse0() {
+; CHECK-LABEL: @oneuse0(
+; CHECK-NEXT:    [[X:%.*]] = call i8 @gen8()
+; CHECK-NEXT:    [[TMP0:%.*]] = and i8 [[X]], 3
+; CHECK-NEXT:    call void @use8(i8 [[TMP0]])
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ugt i8 [[X]], 3
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %x = call i8 @gen8()
+  %tmp0 = and i8 %x, 3
+  call void @use8(i8 %tmp0)
+  %ret = icmp ugt i8 %x, %tmp0
+  ret i1 %ret
+}
+
+; ============================================================================ ;
+; Negative tests
+; ============================================================================ ;
+
+define i1 @n0() {
+; CHECK-LABEL: @n0(
+; CHECK-NEXT:    [[X:%.*]] = call i8 @gen8()
+; CHECK-NEXT:    [[TMP0:%.*]] = and i8 [[X]], 4
+; CHECK-NEXT:    [[RET:%.*]] = icmp ugt i8 [[X]], [[TMP0]]
+; CHECK-NEXT:    ret i1 [[RET]]
+;
+  %x = call i8 @gen8()
+  %tmp0 = and i8 %x, 4 ; power-of-two, but invalid.
+  %ret = icmp ugt i8 %x, %tmp0
+  ret i1 %ret
+}
+
+define i1 @n1(i8 %y, i8 %notx) {
+; CHECK-LABEL: @n1(
+; CHECK-NEXT:    [[X:%.*]] = call i8 @gen8()
+; CHECK-NEXT:    [[TMP0:%.*]] = and i8 [[X]], 3
+; CHECK-NEXT:    [[RET:%.*]] = icmp ugt i8 [[TMP0]], [[NOTX:%.*]]
+; CHECK-NEXT:    ret i1 [[RET]]
+;
+  %x = call i8 @gen8()
+  %tmp0 = and i8 %x, 3
+  %ret = icmp ugt i8 %tmp0, %notx ; not %x
+  ret i1 %ret
+}
+
+define <2 x i1> @n2() {
+; CHECK-LABEL: @n2(
+; CHECK-NEXT:    [[X:%.*]] = call <2 x i8> @gen2x8()
+; CHECK-NEXT:    [[TMP0:%.*]] = and <2 x i8> [[X]], <i8 3, i8 16>
+; CHECK-NEXT:    [[RET:%.*]] = icmp ugt <2 x i8> [[X]], [[TMP0]]
+; CHECK-NEXT:    ret <2 x i1> [[RET]]
+;
+  %x = call <2 x i8> @gen2x8()
+  %tmp0 = and <2 x i8> %x, <i8 3, i8 16> ; only the first one is valid.
+  %ret = icmp ugt <2 x i8> %x, %tmp0
+  ret <2 x i1> %ret
+}

Added: llvm/trunk/test/Transforms/InstCombine/canonicalize-constant-low-bit-mask-and-icmp-ule-to-icmp-ule.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/InstCombine/canonicalize-constant-low-bit-mask-and-icmp-ule-to-icmp-ule.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/InstCombine/canonicalize-constant-low-bit-mask-and-icmp-ule-to-icmp-ule.ll (added)
+++ llvm/trunk/test/Transforms/InstCombine/canonicalize-constant-low-bit-mask-and-icmp-ule-to-icmp-ule.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,201 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+; https://bugs.llvm.org/show_bug.cgi?id=38123
+
+; Pattern:
+;   x u<= x & C
+; Should be transformed into:
+;   x u<= C
+; Iff: isPowerOf2(C + 1)
+
+declare i8 @gen8()
+declare <2 x i8> @gen2x8()
+declare <3 x i8> @gen3x8()
+
+; ============================================================================ ;
+; Basic positive tests
+; ============================================================================ ;
+
+define i1 @p0() {
+; CHECK-LABEL: @p0(
+; CHECK-NEXT:    [[X:%.*]] = call i8 @gen8()
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ult i8 [[X]], 4
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %x = call i8 @gen8()
+  %tmp0 = and i8 %x, 3
+  %ret = icmp ule i8 %x, %tmp0
+  ret i1 %ret
+}
+
+define i1 @pv(i8 %y) {
+; CHECK-LABEL: @pv(
+; CHECK-NEXT:    [[X:%.*]] = call i8 @gen8()
+; CHECK-NEXT:    [[TMP0:%.*]] = lshr i8 -1, [[Y:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ule i8 [[X]], [[TMP0]]
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %x = call i8 @gen8()
+  %tmp0 = lshr i8 -1, %y
+  %tmp1 = and i8 %tmp0, %x
+  %ret = icmp ule i8 %x, %tmp1
+  ret i1 %ret
+}
+
+; ============================================================================ ;
+; Vector tests
+; ============================================================================ ;
+
+define <2 x i1> @p1_vec_splat() {
+; CHECK-LABEL: @p1_vec_splat(
+; CHECK-NEXT:    [[X:%.*]] = call <2 x i8> @gen2x8()
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ult <2 x i8> [[X]], <i8 4, i8 4>
+; CHECK-NEXT:    ret <2 x i1> [[TMP1]]
+;
+  %x = call <2 x i8> @gen2x8()
+  %tmp0 = and <2 x i8> %x, <i8 3, i8 3>
+  %ret = icmp ule <2 x i8> %x, %tmp0
+  ret <2 x i1> %ret
+}
+
+define <2 x i1> @p2_vec_nonsplat() {
+; CHECK-LABEL: @p2_vec_nonsplat(
+; CHECK-NEXT:    [[X:%.*]] = call <2 x i8> @gen2x8()
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ult <2 x i8> [[X]], <i8 4, i8 16>
+; CHECK-NEXT:    ret <2 x i1> [[TMP1]]
+;
+  %x = call <2 x i8> @gen2x8()
+  %tmp0 = and <2 x i8> %x, <i8 3, i8 15> ; doesn't have to be splat.
+  %ret = icmp ule <2 x i8> %x, %tmp0
+  ret <2 x i1> %ret
+}
+
+define <3 x i1> @p3_vec_splat_undef() {
+; CHECK-LABEL: @p3_vec_splat_undef(
+; CHECK-NEXT:    [[X:%.*]] = call <3 x i8> @gen3x8()
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ult <3 x i8> [[X]], <i8 4, i8 undef, i8 4>
+; CHECK-NEXT:    ret <3 x i1> [[TMP1]]
+;
+  %x = call <3 x i8> @gen3x8()
+  %tmp0 = and <3 x i8> %x, <i8 3, i8 undef, i8 3>
+  %ret = icmp ule <3 x i8> %x, %tmp0
+  ret <3 x i1> %ret
+}
+
+; ============================================================================ ;
+; Commutativity tests.
+; ============================================================================ ;
+
+define i1 @c0(i8 %x) {
+; CHECK-LABEL: @c0(
+; CHECK-NEXT:    ret i1 true
+;
+  %tmp0 = and i8 %x, 3
+  %ret = icmp ule i8 %tmp0, %x ; swapped order
+  ret i1 %ret
+}
+
+; ============================================================================ ;
+; Commutativity tests with variable
+; ============================================================================ ;
+
+define i1 @cv0(i8 %y) {
+; CHECK-LABEL: @cv0(
+; CHECK-NEXT:    [[X:%.*]] = call i8 @gen8()
+; CHECK-NEXT:    [[TMP0:%.*]] = lshr i8 -1, [[Y:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ule i8 [[X]], [[TMP0]]
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %x = call i8 @gen8()
+  %tmp0 = lshr i8 -1, %y
+  %tmp1 = and i8 %tmp0, %x ; swapped order
+  %ret = icmp ule i8 %x, %tmp1
+  ret i1 %ret
+}
+
+define i1 @cv1(i8 %y) {
+; CHECK-LABEL: @cv1(
+; CHECK-NEXT:    [[X:%.*]] = call i8 @gen8()
+; CHECK-NEXT:    ret i1 true
+;
+  %x = call i8 @gen8()
+  %tmp0 = lshr i8 -1, %y
+  %tmp1 = and i8 %x, %tmp0
+  %ret = icmp ule i8 %tmp1, %x ; swapped order
+  ret i1 %ret
+}
+
+define i1 @cv2(i8 %x, i8 %y) {
+; CHECK-LABEL: @cv2(
+; CHECK-NEXT:    ret i1 true
+;
+  %tmp0 = lshr i8 -1, %y
+  %tmp1 = and i8 %tmp0, %x ; swapped order
+  %ret = icmp ule i8 %tmp1, %x ; swapped order
+  ret i1 %ret
+}
+
+; ============================================================================ ;
+; One-use tests. We don't care about multi-uses here.
+; ============================================================================ ;
+
+declare void @use8(i8)
+
+define i1 @oneuse0() {
+; CHECK-LABEL: @oneuse0(
+; CHECK-NEXT:    [[X:%.*]] = call i8 @gen8()
+; CHECK-NEXT:    [[TMP0:%.*]] = and i8 [[X]], 3
+; CHECK-NEXT:    call void @use8(i8 [[TMP0]])
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ult i8 [[X]], 4
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %x = call i8 @gen8()
+  %tmp0 = and i8 %x, 3
+  call void @use8(i8 %tmp0)
+  %ret = icmp ule i8 %x, %tmp0
+  ret i1 %ret
+}
+
+; ============================================================================ ;
+; Negative tests
+; ============================================================================ ;
+
+define i1 @n0() {
+; CHECK-LABEL: @n0(
+; CHECK-NEXT:    [[X:%.*]] = call i8 @gen8()
+; CHECK-NEXT:    [[TMP0:%.*]] = and i8 [[X]], 4
+; CHECK-NEXT:    [[RET:%.*]] = icmp ule i8 [[X]], [[TMP0]]
+; CHECK-NEXT:    ret i1 [[RET]]
+;
+  %x = call i8 @gen8()
+  %tmp0 = and i8 %x, 4 ; power-of-two, but invalid.
+  %ret = icmp ule i8 %x, %tmp0
+  ret i1 %ret
+}
+
+define i1 @n1(i8 %y, i8 %notx) {
+; CHECK-LABEL: @n1(
+; CHECK-NEXT:    [[X:%.*]] = call i8 @gen8()
+; CHECK-NEXT:    [[TMP0:%.*]] = and i8 [[X]], 3
+; CHECK-NEXT:    [[RET:%.*]] = icmp ule i8 [[TMP0]], [[NOTX:%.*]]
+; CHECK-NEXT:    ret i1 [[RET]]
+;
+  %x = call i8 @gen8()
+  %tmp0 = and i8 %x, 3
+  %ret = icmp ule i8 %tmp0, %notx ; not %x
+  ret i1 %ret
+}
+
+define <2 x i1> @n2() {
+; CHECK-LABEL: @n2(
+; CHECK-NEXT:    [[X:%.*]] = call <2 x i8> @gen2x8()
+; CHECK-NEXT:    [[TMP0:%.*]] = and <2 x i8> [[X]], <i8 3, i8 16>
+; CHECK-NEXT:    [[RET:%.*]] = icmp ule <2 x i8> [[X]], [[TMP0]]
+; CHECK-NEXT:    ret <2 x i1> [[RET]]
+;
+  %x = call <2 x i8> @gen2x8()
+  %tmp0 = and <2 x i8> %x, <i8 3, i8 16> ; only the first one is valid.
+  %ret = icmp ule <2 x i8> %x, %tmp0
+  ret <2 x i1> %ret
+}

Added: llvm/trunk/test/Transforms/InstCombine/canonicalize-constant-low-bit-mask-and-icmp-ult-to-icmp-ugt.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/InstCombine/canonicalize-constant-low-bit-mask-and-icmp-ult-to-icmp-ugt.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/InstCombine/canonicalize-constant-low-bit-mask-and-icmp-ult-to-icmp-ugt.ll (added)
+++ llvm/trunk/test/Transforms/InstCombine/canonicalize-constant-low-bit-mask-and-icmp-ult-to-icmp-ugt.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,186 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+; https://bugs.llvm.org/show_bug.cgi?id=38123
+
+; Pattern:
+;   x & C u< x
+; Should be transformed into:
+;   x u> C
+; Iff: isPowerOf2(C + 1)
+
+; ============================================================================ ;
+; Basic positive tests
+; ============================================================================ ;
+
+define i1 @p0(i8 %x) {
+; CHECK-LABEL: @p0(
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ugt i8 [[X:%.*]], 3
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %tmp0 = and i8 %x, 3
+  %ret = icmp ult i8 %tmp0, %x
+  ret i1 %ret
+}
+
+define i1 @pv(i8 %x, i8 %y) {
+; CHECK-LABEL: @pv(
+; CHECK-NEXT:    [[TMP0:%.*]] = lshr i8 -1, [[Y:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ult i8 [[TMP0]], [[X:%.*]]
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %tmp0 = lshr i8 -1, %y
+  %tmp1 = and i8 %tmp0, %x
+  %ret = icmp ult i8 %tmp1, %x
+  ret i1 %ret
+}
+
+; ============================================================================ ;
+; Vector tests
+; ============================================================================ ;
+
+define <2 x i1> @p1_vec_splat(<2 x i8> %x) {
+; CHECK-LABEL: @p1_vec_splat(
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ugt <2 x i8> [[X:%.*]], <i8 3, i8 3>
+; CHECK-NEXT:    ret <2 x i1> [[TMP1]]
+;
+  %tmp0 = and <2 x i8> %x, <i8 3, i8 3>
+  %ret = icmp ult <2 x i8> %tmp0, %x
+  ret <2 x i1> %ret
+}
+
+define <2 x i1> @p2_vec_nonsplat(<2 x i8> %x) {
+; CHECK-LABEL: @p2_vec_nonsplat(
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ugt <2 x i8> [[X:%.*]], <i8 3, i8 15>
+; CHECK-NEXT:    ret <2 x i1> [[TMP1]]
+;
+  %tmp0 = and <2 x i8> %x, <i8 3, i8 15> ; doesn't have to be splat.
+  %ret = icmp ult <2 x i8> %tmp0, %x
+  ret <2 x i1> %ret
+}
+
+define <3 x i1> @p3_vec_splat_undef(<3 x i8> %x) {
+; CHECK-LABEL: @p3_vec_splat_undef(
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ugt <3 x i8> [[X:%.*]], <i8 3, i8 undef, i8 3>
+; CHECK-NEXT:    ret <3 x i1> [[TMP1]]
+;
+  %tmp0 = and <3 x i8> %x, <i8 3, i8 undef, i8 3>
+  %ret = icmp ult <3 x i8> %tmp0, %x
+  ret <3 x i1> %ret
+}
+
+; ============================================================================ ;
+; Commutativity tests.
+; ============================================================================ ;
+
+declare i8 @gen8()
+
+; The pattern is not commutative. instsimplify will already take care of it.
+define i1 @c0() {
+; CHECK-LABEL: @c0(
+; CHECK-NEXT:    [[X:%.*]] = call i8 @gen8()
+; CHECK-NEXT:    ret i1 false
+;
+  %x = call i8 @gen8()
+  %tmp0 = and i8 %x, 3
+  %ret = icmp ult i8 %x, %tmp0 ; swapped order
+  ret i1 %ret
+}
+
+; ============================================================================ ;
+; Commutativity tests with variable
+; ============================================================================ ;
+
+define i1 @cv0(i8 %y) {
+; CHECK-LABEL: @cv0(
+; CHECK-NEXT:    [[X:%.*]] = call i8 @gen8()
+; CHECK-NEXT:    [[TMP0:%.*]] = lshr i8 -1, [[Y:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ugt i8 [[X]], [[TMP0]]
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %x = call i8 @gen8()
+  %tmp0 = lshr i8 -1, %y
+  %tmp1 = and i8 %x, %tmp0 ; swapped order
+  %ret = icmp ult i8 %tmp1, %x
+  ret i1 %ret
+}
+
+define i1 @cv1(i8 %y) {
+; CHECK-LABEL: @cv1(
+; CHECK-NEXT:    [[X:%.*]] = call i8 @gen8()
+; CHECK-NEXT:    ret i1 false
+;
+  %x = call i8 @gen8()
+  %tmp0 = lshr i8 -1, %y
+  %tmp1 = and i8 %tmp0, %x
+  %ret = icmp ult i8 %x, %tmp1 ; swapped order
+  ret i1 %ret
+}
+
+define i1 @cv2(i8 %y) {
+; CHECK-LABEL: @cv2(
+; CHECK-NEXT:    [[X:%.*]] = call i8 @gen8()
+; CHECK-NEXT:    ret i1 false
+;
+  %x = call i8 @gen8()
+  %tmp0 = lshr i8 -1, %y
+  %tmp1 = and i8 %x, %tmp0 ; swapped order
+  %ret = icmp ult i8 %x, %tmp1 ; swapped order
+  ret i1 %ret
+}
+
+; ============================================================================ ;
+; One-use tests. We don't care about multi-uses here.
+; ============================================================================ ;
+
+declare void @use8(i8)
+
+define i1 @oneuse0(i8 %x) {
+; CHECK-LABEL: @oneuse0(
+; CHECK-NEXT:    [[TMP0:%.*]] = and i8 [[X:%.*]], 3
+; CHECK-NEXT:    call void @use8(i8 [[TMP0]])
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ugt i8 [[X]], 3
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %tmp0 = and i8 %x, 3
+  call void @use8(i8 %tmp0)
+  %ret = icmp ult i8 %tmp0, %x
+  ret i1 %ret
+}
+
+; ============================================================================ ;
+; Negative tests
+; ============================================================================ ;
+
+define i1 @n0(i8 %x) {
+; CHECK-LABEL: @n0(
+; CHECK-NEXT:    [[TMP0:%.*]] = and i8 [[X:%.*]], 4
+; CHECK-NEXT:    [[RET:%.*]] = icmp ult i8 [[TMP0]], [[X]]
+; CHECK-NEXT:    ret i1 [[RET]]
+;
+  %tmp0 = and i8 %x, 4 ; power-of-two, but invalid.
+  %ret = icmp ult i8 %tmp0, %x
+  ret i1 %ret
+}
+
+define i1 @n1(i8 %x, i8 %y, i8 %notx) {
+; CHECK-LABEL: @n1(
+; CHECK-NEXT:    [[TMP0:%.*]] = and i8 [[X:%.*]], 3
+; CHECK-NEXT:    [[RET:%.*]] = icmp ult i8 [[TMP0]], [[NOTX:%.*]]
+; CHECK-NEXT:    ret i1 [[RET]]
+;
+  %tmp0 = and i8 %x, 3
+  %ret = icmp ult i8 %tmp0, %notx ; not %x
+  ret i1 %ret
+}
+
+define <2 x i1> @n2(<2 x i8> %x) {
+; CHECK-LABEL: @n2(
+; CHECK-NEXT:    [[TMP0:%.*]] = and <2 x i8> [[X:%.*]], <i8 3, i8 16>
+; CHECK-NEXT:    [[RET:%.*]] = icmp ult <2 x i8> [[TMP0]], [[X]]
+; CHECK-NEXT:    ret <2 x i1> [[RET]]
+;
+  %tmp0 = and <2 x i8> %x, <i8 3, i8 16> ; only the first one is valid.
+  %ret = icmp ult <2 x i8> %tmp0, %x
+  ret <2 x i1> %ret
+}

Added: llvm/trunk/test/Transforms/InstCombine/canonicalize-lack-of-signed-truncation-check.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/InstCombine/canonicalize-lack-of-signed-truncation-check.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/InstCombine/canonicalize-lack-of-signed-truncation-check.ll (added)
+++ llvm/trunk/test/Transforms/InstCombine/canonicalize-lack-of-signed-truncation-check.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,234 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+; https://bugs.llvm.org/show_bug.cgi?id=38149
+
+; Pattern:
+;   ((%x << MaskedBits) a>> MaskedBits) == %x
+; Should be transformed into:
+;   (add %x, (1 << (KeptBits-1))) u< (1 << KeptBits)
+; Where  KeptBits = bitwidth(%x) - MaskedBits
+
+; ============================================================================ ;
+; Basic positive tests
+; ============================================================================ ;
+
+define i1 @p0(i8 %x) {
+; CHECK-LABEL: @p0(
+; CHECK-NEXT:    [[TMP1:%.*]] = add i8 [[X:%.*]], 4
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp ult i8 [[TMP1]], 8
+; CHECK-NEXT:    ret i1 [[TMP2]]
+;
+  %tmp0 = shl i8 %x, 5
+  %tmp1 = ashr exact i8 %tmp0, 5
+  %tmp2 = icmp eq i8 %tmp1, %x
+  ret i1 %tmp2
+}
+
+; Big unusual bit width, https://bugs.llvm.org/show_bug.cgi?id=38204
+define i1 @pb(i65 %x) {
+; CHECK-LABEL: @pb(
+; CHECK-NEXT:    [[TMP1:%.*]] = add i65 [[X:%.*]], 9223372036854775808
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp sgt i65 [[TMP1]], -1
+; CHECK-NEXT:    ret i1 [[TMP2]]
+;
+  %tmp0 = shl i65 %x, 1
+  %tmp1 = ashr exact i65 %tmp0, 1
+  %tmp2 = icmp eq i65 %x, %tmp1
+  ret i1 %tmp2
+}
+
+; ============================================================================ ;
+; Vector tests
+; ============================================================================ ;
+
+define <2 x i1> @p1_vec_splat(<2 x i8> %x) {
+; CHECK-LABEL: @p1_vec_splat(
+; CHECK-NEXT:    [[TMP1:%.*]] = add <2 x i8> [[X:%.*]], <i8 4, i8 4>
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp ult <2 x i8> [[TMP1]], <i8 8, i8 8>
+; CHECK-NEXT:    ret <2 x i1> [[TMP2]]
+;
+  %tmp0 = shl <2 x i8> %x, <i8 5, i8 5>
+  %tmp1 = ashr exact <2 x i8> %tmp0, <i8 5, i8 5>
+  %tmp2 = icmp eq <2 x i8> %tmp1, %x
+  ret <2 x i1> %tmp2
+}
+
+define <2 x i1> @p2_vec_nonsplat(<2 x i8> %x) {
+; CHECK-LABEL: @p2_vec_nonsplat(
+; CHECK-NEXT:    [[TMP0:%.*]] = shl <2 x i8> [[X:%.*]], <i8 5, i8 6>
+; CHECK-NEXT:    [[TMP1:%.*]] = ashr exact <2 x i8> [[TMP0]], <i8 5, i8 6>
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp eq <2 x i8> [[TMP1]], [[X]]
+; CHECK-NEXT:    ret <2 x i1> [[TMP2]]
+;
+  %tmp0 = shl <2 x i8> %x, <i8 5, i8 6>
+  %tmp1 = ashr exact <2 x i8> %tmp0, <i8 5, i8 6>
+  %tmp2 = icmp eq <2 x i8> %tmp1, %x
+  ret <2 x i1> %tmp2
+}
+
+define <3 x i1> @p3_vec_undef0(<3 x i8> %x) {
+; CHECK-LABEL: @p3_vec_undef0(
+; CHECK-NEXT:    [[TMP0:%.*]] = shl <3 x i8> [[X:%.*]], <i8 5, i8 undef, i8 5>
+; CHECK-NEXT:    [[TMP1:%.*]] = ashr exact <3 x i8> [[TMP0]], <i8 5, i8 5, i8 5>
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp eq <3 x i8> [[TMP1]], [[X]]
+; CHECK-NEXT:    ret <3 x i1> [[TMP2]]
+;
+  %tmp0 = shl <3 x i8> %x, <i8 5, i8 undef, i8 5>
+  %tmp1 = ashr exact <3 x i8> %tmp0, <i8 5, i8 5, i8 5>
+  %tmp2 = icmp eq <3 x i8> %tmp1, %x
+  ret <3 x i1> %tmp2
+}
+
+define <3 x i1> @p4_vec_undef1(<3 x i8> %x) {
+; CHECK-LABEL: @p4_vec_undef1(
+; CHECK-NEXT:    [[TMP0:%.*]] = shl <3 x i8> [[X:%.*]], <i8 5, i8 5, i8 5>
+; CHECK-NEXT:    [[TMP1:%.*]] = ashr exact <3 x i8> [[TMP0]], <i8 5, i8 undef, i8 5>
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp eq <3 x i8> [[TMP1]], [[X]]
+; CHECK-NEXT:    ret <3 x i1> [[TMP2]]
+;
+  %tmp0 = shl <3 x i8> %x, <i8 5, i8 5, i8 5>
+  %tmp1 = ashr exact <3 x i8> %tmp0, <i8 5, i8 undef, i8 5>
+  %tmp2 = icmp eq <3 x i8> %tmp1, %x
+  ret <3 x i1> %tmp2
+}
+
+define <3 x i1> @p5_vec_undef2(<3 x i8> %x) {
+; CHECK-LABEL: @p5_vec_undef2(
+; CHECK-NEXT:    [[TMP0:%.*]] = shl <3 x i8> [[X:%.*]], <i8 5, i8 undef, i8 5>
+; CHECK-NEXT:    [[TMP1:%.*]] = ashr exact <3 x i8> [[TMP0]], <i8 5, i8 undef, i8 5>
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp eq <3 x i8> [[TMP1]], [[X]]
+; CHECK-NEXT:    ret <3 x i1> [[TMP2]]
+;
+  %tmp0 = shl <3 x i8> %x, <i8 5, i8 undef, i8 5>
+  %tmp1 = ashr exact <3 x i8> %tmp0, <i8 5, i8 undef, i8 5>
+  %tmp2 = icmp eq <3 x i8> %tmp1, %x
+  ret <3 x i1> %tmp2
+}
+
+; ============================================================================ ;
+; Commutativity tests.
+; ============================================================================ ;
+
+declare i8 @gen8()
+
+define i1 @c0() {
+; CHECK-LABEL: @c0(
+; CHECK-NEXT:    [[X:%.*]] = call i8 @gen8()
+; CHECK-NEXT:    [[TMP1:%.*]] = add i8 [[X]], 4
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp ult i8 [[TMP1]], 8
+; CHECK-NEXT:    ret i1 [[TMP2]]
+;
+  %x = call i8 @gen8()
+  %tmp0 = shl i8 %x, 5
+  %tmp1 = ashr exact i8 %tmp0, 5
+  %tmp2 = icmp eq i8 %x, %tmp1 ; swapped order
+  ret i1 %tmp2
+}
+
+; ============================================================================ ;
+; One-use tests.
+; ============================================================================ ;
+
+declare void @use8(i8)
+
+define i1 @n_oneuse0(i8 %x) {
+; CHECK-LABEL: @n_oneuse0(
+; CHECK-NEXT:    [[TMP0:%.*]] = shl i8 [[X:%.*]], 5
+; CHECK-NEXT:    call void @use8(i8 [[TMP0]])
+; CHECK-NEXT:    [[TMP1:%.*]] = add i8 [[X]], 4
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp ult i8 [[TMP1]], 8
+; CHECK-NEXT:    ret i1 [[TMP2]]
+;
+  %tmp0 = shl i8 %x, 5
+  call void @use8(i8 %tmp0)
+  %tmp1 = ashr exact i8 %tmp0, 5
+  %tmp2 = icmp eq i8 %tmp1, %x
+  ret i1 %tmp2
+}
+
+define i1 @n_oneuse1(i8 %x) {
+; CHECK-LABEL: @n_oneuse1(
+; CHECK-NEXT:    [[TMP0:%.*]] = shl i8 [[X:%.*]], 5
+; CHECK-NEXT:    [[TMP1:%.*]] = ashr exact i8 [[TMP0]], 5
+; CHECK-NEXT:    call void @use8(i8 [[TMP1]])
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp eq i8 [[TMP1]], [[X]]
+; CHECK-NEXT:    ret i1 [[TMP2]]
+;
+  %tmp0 = shl i8 %x, 5
+  %tmp1 = ashr exact i8 %tmp0, 5
+  call void @use8(i8 %tmp1)
+  %tmp2 = icmp eq i8 %tmp1, %x
+  ret i1 %tmp2
+}
+
+define i1 @n_oneuse2(i8 %x) {
+; CHECK-LABEL: @n_oneuse2(
+; CHECK-NEXT:    [[TMP0:%.*]] = shl i8 [[X:%.*]], 5
+; CHECK-NEXT:    call void @use8(i8 [[TMP0]])
+; CHECK-NEXT:    [[TMP1:%.*]] = ashr exact i8 [[TMP0]], 5
+; CHECK-NEXT:    call void @use8(i8 [[TMP1]])
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp eq i8 [[TMP1]], [[X]]
+; CHECK-NEXT:    ret i1 [[TMP2]]
+;
+  %tmp0 = shl i8 %x, 5
+  call void @use8(i8 %tmp0)
+  %tmp1 = ashr exact i8 %tmp0, 5
+  call void @use8(i8 %tmp1)
+  %tmp2 = icmp eq i8 %tmp1, %x
+  ret i1 %tmp2
+}
+
+; ============================================================================ ;
+; Negative tests
+; ============================================================================ ;
+
+define i1 @n0(i8 %x) {
+; CHECK-LABEL: @n0(
+; CHECK-NEXT:    [[TMP0:%.*]] = shl i8 [[X:%.*]], 5
+; CHECK-NEXT:    [[TMP1:%.*]] = ashr exact i8 [[TMP0]], 3
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp eq i8 [[TMP1]], [[X]]
+; CHECK-NEXT:    ret i1 [[TMP2]]
+;
+  %tmp0 = shl i8 %x, 5
+  %tmp1 = ashr exact i8 %tmp0, 3 ; not 5
+  %tmp2 = icmp eq i8 %tmp1, %x
+  ret i1 %tmp2
+}
+
+define i1 @n1(i8 %x) {
+; CHECK-LABEL: @n1(
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ult i8 [[X:%.*]], 8
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %tmp0 = shl i8 %x, 5
+  %tmp1 = lshr exact i8 %tmp0, 5 ; not ashr
+  %tmp2 = icmp eq i8 %tmp1, %x
+  ret i1 %tmp2
+}
+
+define i1 @n2(i8 %x, i8 %y) {
+; CHECK-LABEL: @n2(
+; CHECK-NEXT:    [[TMP0:%.*]] = shl i8 [[X:%.*]], 5
+; CHECK-NEXT:    [[TMP1:%.*]] = ashr exact i8 [[TMP0]], 5
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp eq i8 [[TMP1]], [[Y:%.*]]
+; CHECK-NEXT:    ret i1 [[TMP2]]
+;
+  %tmp0 = shl i8 %x, 5
+  %tmp1 = ashr exact i8 %tmp0, 5
+  %tmp2 = icmp eq i8 %tmp1, %y ; not %x
+  ret i1 %tmp2
+}
+
+define <2 x i1> @n3_vec_nonsplat(<2 x i8> %x) {
+; CHECK-LABEL: @n3_vec_nonsplat(
+; CHECK-NEXT:    [[TMP0:%.*]] = shl <2 x i8> [[X:%.*]], <i8 5, i8 5>
+; CHECK-NEXT:    [[TMP1:%.*]] = ashr exact <2 x i8> [[TMP0]], <i8 5, i8 3>
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp eq <2 x i8> [[TMP1]], [[X]]
+; CHECK-NEXT:    ret <2 x i1> [[TMP2]]
+;
+  %tmp0 = shl <2 x i8> %x, <i8 5, i8 5>
+  %tmp1 = ashr exact <2 x i8> %tmp0, <i8 5, i8 3> ; 3 instead of 5
+  %tmp2 = icmp eq <2 x i8> %tmp1, %x
+  ret <2 x i1> %tmp2
+}

Added: llvm/trunk/test/Transforms/InstCombine/canonicalize-low-bit-mask-and-icmp-eq-to-icmp-ule.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/InstCombine/canonicalize-low-bit-mask-and-icmp-eq-to-icmp-ule.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/InstCombine/canonicalize-low-bit-mask-and-icmp-eq-to-icmp-ule.ll (added)
+++ llvm/trunk/test/Transforms/InstCombine/canonicalize-low-bit-mask-and-icmp-eq-to-icmp-ule.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,170 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+; https://bugs.llvm.org/show_bug.cgi?id=38123
+
+; Pattern:
+;   x & (-1 >> y) == x
+; Should be transformed into:
+;   x u<= (-1 >> y)
+
+; ============================================================================ ;
+; Basic positive tests
+; ============================================================================ ;
+
+define i1 @p0(i8 %x, i8 %y) {
+; CHECK-LABEL: @p0(
+; CHECK-NEXT:    [[TMP0:%.*]] = lshr i8 -1, [[Y:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp uge i8 [[TMP0]], [[X:%.*]]
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %tmp0 = lshr i8 -1, %y
+  %tmp1 = and i8 %tmp0, %x
+  %ret = icmp eq i8 %tmp1, %x
+  ret i1 %ret
+}
+
+; ============================================================================ ;
+; Vector tests
+; ============================================================================ ;
+
+define <2 x i1> @p1_vec(<2 x i8> %x, <2 x i8> %y) {
+; CHECK-LABEL: @p1_vec(
+; CHECK-NEXT:    [[TMP0:%.*]] = lshr <2 x i8> <i8 -1, i8 -1>, [[Y:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp uge <2 x i8> [[TMP0]], [[X:%.*]]
+; CHECK-NEXT:    ret <2 x i1> [[TMP1]]
+;
+  %tmp0 = lshr <2 x i8> <i8 -1, i8 -1>, %y
+  %tmp1 = and <2 x i8> %tmp0, %x
+  %ret = icmp eq <2 x i8> %tmp1, %x
+  ret <2 x i1> %ret
+}
+
+define <3 x i1> @p2_vec_undef(<3 x i8> %x, <3 x i8> %y) {
+; CHECK-LABEL: @p2_vec_undef(
+; CHECK-NEXT:    [[TMP0:%.*]] = lshr <3 x i8> <i8 -1, i8 undef, i8 -1>, [[Y:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp uge <3 x i8> [[TMP0]], [[X:%.*]]
+; CHECK-NEXT:    ret <3 x i1> [[TMP1]]
+;
+  %tmp0 = lshr <3 x i8> <i8 -1, i8 undef, i8 -1>, %y
+  %tmp1 = and <3 x i8> %tmp0, %x
+  %ret = icmp eq <3 x i8> %tmp1, %x
+  ret <3 x i1> %ret
+}
+
+; ============================================================================ ;
+; Commutativity tests.
+; ============================================================================ ;
+
+declare i8 @gen8()
+
+define i1 @c0(i8 %y) {
+; CHECK-LABEL: @c0(
+; CHECK-NEXT:    [[TMP0:%.*]] = lshr i8 -1, [[Y:%.*]]
+; CHECK-NEXT:    [[X:%.*]] = call i8 @gen8()
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ule i8 [[X]], [[TMP0]]
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %tmp0 = lshr i8 -1, %y
+  %x = call i8 @gen8()
+  %tmp1 = and i8 %x, %tmp0 ; swapped order
+  %ret = icmp eq i8 %tmp1, %x
+  ret i1 %ret
+}
+
+define i1 @c1(i8 %y) {
+; CHECK-LABEL: @c1(
+; CHECK-NEXT:    [[TMP0:%.*]] = lshr i8 -1, [[Y:%.*]]
+; CHECK-NEXT:    [[X:%.*]] = call i8 @gen8()
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ule i8 [[X]], [[TMP0]]
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %tmp0 = lshr i8 -1, %y
+  %x = call i8 @gen8()
+  %tmp1 = and i8 %tmp0, %x
+  %ret = icmp eq i8 %x, %tmp1 ; swapped order
+  ret i1 %ret
+}
+
+define i1 @c2(i8 %y) {
+; CHECK-LABEL: @c2(
+; CHECK-NEXT:    [[TMP0:%.*]] = lshr i8 -1, [[Y:%.*]]
+; CHECK-NEXT:    [[X:%.*]] = call i8 @gen8()
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ule i8 [[X]], [[TMP0]]
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %tmp0 = lshr i8 -1, %y
+  %x = call i8 @gen8()
+  %tmp1 = and i8 %x, %tmp0 ; swapped order
+  %ret = icmp eq i8 %x, %tmp1 ; swapped order
+  ret i1 %ret
+}
+
+; ============================================================================ ;
+; One-use tests. We don't care about multi-uses here.
+; ============================================================================ ;
+
+declare void @use8(i8)
+
+define i1 @oneuse0(i8 %x, i8 %y) {
+; CHECK-LABEL: @oneuse0(
+; CHECK-NEXT:    [[TMP0:%.*]] = lshr i8 -1, [[Y:%.*]]
+; CHECK-NEXT:    call void @use8(i8 [[TMP0]])
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp uge i8 [[TMP0]], [[X:%.*]]
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %tmp0 = lshr i8 -1, %y
+  call void @use8(i8 %tmp0)
+  %tmp1 = and i8 %tmp0, %x
+  %ret = icmp eq i8 %tmp1, %x
+  ret i1 %ret
+}
+
+define i1 @oneuse1(i8 %x, i8 %y) {
+; CHECK-LABEL: @oneuse1(
+; CHECK-NEXT:    [[TMP0:%.*]] = lshr i8 -1, [[Y:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = and i8 [[TMP0]], [[X:%.*]]
+; CHECK-NEXT:    call void @use8(i8 [[TMP1]])
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp uge i8 [[TMP0]], [[X]]
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %tmp0 = lshr i8 -1, %y
+  %tmp1 = and i8 %tmp0, %x
+  call void @use8(i8 %tmp1)
+  %ret = icmp eq i8 %tmp1, %x
+  ret i1 %ret
+}
+
+define i1 @oneuse2(i8 %x, i8 %y) {
+; CHECK-LABEL: @oneuse2(
+; CHECK-NEXT:    [[TMP0:%.*]] = lshr i8 -1, [[Y:%.*]]
+; CHECK-NEXT:    call void @use8(i8 [[TMP0]])
+; CHECK-NEXT:    [[TMP1:%.*]] = and i8 [[TMP0]], [[X:%.*]]
+; CHECK-NEXT:    call void @use8(i8 [[TMP1]])
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp uge i8 [[TMP0]], [[X]]
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %tmp0 = lshr i8 -1, %y
+  call void @use8(i8 %tmp0)
+  %tmp1 = and i8 %tmp0, %x
+  call void @use8(i8 %tmp1)
+  %ret = icmp eq i8 %tmp1, %x
+  ret i1 %ret
+}
+
+; ============================================================================ ;
+; Negative tests
+; ============================================================================ ;
+
+define i1 @n0(i8 %x, i8 %y, i8 %notx) {
+; CHECK-LABEL: @n0(
+; CHECK-NEXT:    [[TMP0:%.*]] = lshr i8 -1, [[Y:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = and i8 [[TMP0]], [[X:%.*]]
+; CHECK-NEXT:    [[RET:%.*]] = icmp eq i8 [[TMP1]], [[NOTX:%.*]]
+; CHECK-NEXT:    ret i1 [[RET]]
+;
+  %tmp0 = lshr i8 -1, %y
+  %tmp1 = and i8 %tmp0, %x
+  %ret = icmp eq i8 %tmp1, %notx ; not %x
+  ret i1 %ret
+}

Added: llvm/trunk/test/Transforms/InstCombine/canonicalize-low-bit-mask-and-icmp-ne-to-icmp-ugt.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/InstCombine/canonicalize-low-bit-mask-and-icmp-ne-to-icmp-ugt.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/InstCombine/canonicalize-low-bit-mask-and-icmp-ne-to-icmp-ugt.ll (added)
+++ llvm/trunk/test/Transforms/InstCombine/canonicalize-low-bit-mask-and-icmp-ne-to-icmp-ugt.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,170 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+; https://bugs.llvm.org/show_bug.cgi?id=38123
+
+; Pattern:
+;   x & (-1 >> y) != x
+; Should be transformed into:
+;   x u> (-1 >> y)
+
+; ============================================================================ ;
+; Basic positive tests
+; ============================================================================ ;
+
+define i1 @p0(i8 %x, i8 %y) {
+; CHECK-LABEL: @p0(
+; CHECK-NEXT:    [[TMP0:%.*]] = lshr i8 -1, [[Y:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ult i8 [[TMP0]], [[X:%.*]]
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %tmp0 = lshr i8 -1, %y
+  %tmp1 = and i8 %tmp0, %x
+  %ret = icmp ne i8 %tmp1, %x
+  ret i1 %ret
+}
+
+; ============================================================================ ;
+; Vector tests
+; ============================================================================ ;
+
+define <2 x i1> @p1_vec(<2 x i8> %x, <2 x i8> %y) {
+; CHECK-LABEL: @p1_vec(
+; CHECK-NEXT:    [[TMP0:%.*]] = lshr <2 x i8> <i8 -1, i8 -1>, [[Y:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ult <2 x i8> [[TMP0]], [[X:%.*]]
+; CHECK-NEXT:    ret <2 x i1> [[TMP1]]
+;
+  %tmp0 = lshr <2 x i8> <i8 -1, i8 -1>, %y
+  %tmp1 = and <2 x i8> %tmp0, %x
+  %ret = icmp ne <2 x i8> %tmp1, %x
+  ret <2 x i1> %ret
+}
+
+define <3 x i1> @p2_vec_undef(<3 x i8> %x, <3 x i8> %y) {
+; CHECK-LABEL: @p2_vec_undef(
+; CHECK-NEXT:    [[TMP0:%.*]] = lshr <3 x i8> <i8 -1, i8 undef, i8 -1>, [[Y:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ult <3 x i8> [[TMP0]], [[X:%.*]]
+; CHECK-NEXT:    ret <3 x i1> [[TMP1]]
+;
+  %tmp0 = lshr <3 x i8> <i8 -1, i8 undef, i8 -1>, %y
+  %tmp1 = and <3 x i8> %tmp0, %x
+  %ret = icmp ne <3 x i8> %tmp1, %x
+  ret <3 x i1> %ret
+}
+
+; ============================================================================ ;
+; Commutativity tests.
+; ============================================================================ ;
+
+declare i8 @gen8()
+
+define i1 @c0(i8 %y) {
+; CHECK-LABEL: @c0(
+; CHECK-NEXT:    [[TMP0:%.*]] = lshr i8 -1, [[Y:%.*]]
+; CHECK-NEXT:    [[X:%.*]] = call i8 @gen8()
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ugt i8 [[X]], [[TMP0]]
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %tmp0 = lshr i8 -1, %y
+  %x = call i8 @gen8()
+  %tmp1 = and i8 %x, %tmp0 ; swapped order
+  %ret = icmp ne i8 %tmp1, %x
+  ret i1 %ret
+}
+
+define i1 @c1(i8 %y) {
+; CHECK-LABEL: @c1(
+; CHECK-NEXT:    [[TMP0:%.*]] = lshr i8 -1, [[Y:%.*]]
+; CHECK-NEXT:    [[X:%.*]] = call i8 @gen8()
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ugt i8 [[X]], [[TMP0]]
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %tmp0 = lshr i8 -1, %y
+  %x = call i8 @gen8()
+  %tmp1 = and i8 %tmp0, %x
+  %ret = icmp ne i8 %x, %tmp1 ; swapped order
+  ret i1 %ret
+}
+
+define i1 @c2(i8 %y) {
+; CHECK-LABEL: @c2(
+; CHECK-NEXT:    [[TMP0:%.*]] = lshr i8 -1, [[Y:%.*]]
+; CHECK-NEXT:    [[X:%.*]] = call i8 @gen8()
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ugt i8 [[X]], [[TMP0]]
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %tmp0 = lshr i8 -1, %y
+  %x = call i8 @gen8()
+  %tmp1 = and i8 %x, %tmp0 ; swapped order
+  %ret = icmp ne i8 %x, %tmp1 ; swapped order
+  ret i1 %ret
+}
+
+; ============================================================================ ;
+; One-use tests. We don't care about multi-uses here.
+; ============================================================================ ;
+
+declare void @use8(i8)
+
+define i1 @oneuse0(i8 %x, i8 %y) {
+; CHECK-LABEL: @oneuse0(
+; CHECK-NEXT:    [[TMP0:%.*]] = lshr i8 -1, [[Y:%.*]]
+; CHECK-NEXT:    call void @use8(i8 [[TMP0]])
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ult i8 [[TMP0]], [[X:%.*]]
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %tmp0 = lshr i8 -1, %y
+  call void @use8(i8 %tmp0)
+  %tmp1 = and i8 %tmp0, %x
+  %ret = icmp ne i8 %tmp1, %x
+  ret i1 %ret
+}
+
+define i1 @oneuse1(i8 %x, i8 %y) {
+; CHECK-LABEL: @oneuse1(
+; CHECK-NEXT:    [[TMP0:%.*]] = lshr i8 -1, [[Y:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = and i8 [[TMP0]], [[X:%.*]]
+; CHECK-NEXT:    call void @use8(i8 [[TMP1]])
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ult i8 [[TMP0]], [[X]]
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %tmp0 = lshr i8 -1, %y
+  %tmp1 = and i8 %tmp0, %x
+  call void @use8(i8 %tmp1)
+  %ret = icmp ne i8 %tmp1, %x
+  ret i1 %ret
+}
+
+define i1 @oneuse2(i8 %x, i8 %y) {
+; CHECK-LABEL: @oneuse2(
+; CHECK-NEXT:    [[TMP0:%.*]] = lshr i8 -1, [[Y:%.*]]
+; CHECK-NEXT:    call void @use8(i8 [[TMP0]])
+; CHECK-NEXT:    [[TMP1:%.*]] = and i8 [[TMP0]], [[X:%.*]]
+; CHECK-NEXT:    call void @use8(i8 [[TMP1]])
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ult i8 [[TMP0]], [[X]]
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %tmp0 = lshr i8 -1, %y
+  call void @use8(i8 %tmp0)
+  %tmp1 = and i8 %tmp0, %x
+  call void @use8(i8 %tmp1)
+  %ret = icmp ne i8 %tmp1, %x
+  ret i1 %ret
+}
+
+; ============================================================================ ;
+; Negative tests
+; ============================================================================ ;
+
+define i1 @n0(i8 %x, i8 %y, i8 %notx) {
+; CHECK-LABEL: @n0(
+; CHECK-NEXT:    [[TMP0:%.*]] = lshr i8 -1, [[Y:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = and i8 [[TMP0]], [[X:%.*]]
+; CHECK-NEXT:    [[RET:%.*]] = icmp ne i8 [[TMP1]], [[NOTX:%.*]]
+; CHECK-NEXT:    ret i1 [[RET]]
+;
+  %tmp0 = lshr i8 -1, %y
+  %tmp1 = and i8 %tmp0, %x
+  %ret = icmp ne i8 %tmp1, %notx ; not %x
+  ret i1 %ret
+}

Added: llvm/trunk/test/Transforms/InstCombine/canonicalize-low-bit-mask-v2-and-icmp-eq-to-icmp-ule.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/InstCombine/canonicalize-low-bit-mask-v2-and-icmp-eq-to-icmp-ule.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/InstCombine/canonicalize-low-bit-mask-v2-and-icmp-eq-to-icmp-ule.ll (added)
+++ llvm/trunk/test/Transforms/InstCombine/canonicalize-low-bit-mask-v2-and-icmp-eq-to-icmp-ule.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,297 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+; https://bugs.llvm.org/show_bug.cgi?id=38123
+
+; Pattern:
+;   x & ~(-1 << y) == x
+; Should be transformed into:
+;   x u<= ~(-1 << y)
+; That is then later transformed into:
+;   (x >> y) == 0
+
+; ============================================================================ ;
+; Basic positive tests
+; ============================================================================ ;
+
+define i1 @p0(i8 %x, i8 %y) {
+; CHECK-LABEL: @p0(
+; CHECK-NEXT:    [[X_HIGHBITS:%.*]] = lshr i8 [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp eq i8 [[X_HIGHBITS]], 0
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %t0 = shl i8 -1, %y
+  %t1 = xor i8 %t0, -1
+  %t2 = and i8 %t1, %x
+  %ret = icmp eq i8 %t2, %x
+  ret i1 %ret
+}
+
+; ============================================================================ ;
+; Vector tests
+; ============================================================================ ;
+
+define <2 x i1> @p1_vec(<2 x i8> %x, <2 x i8> %y) {
+; CHECK-LABEL: @p1_vec(
+; CHECK-NEXT:    [[X_HIGHBITS:%.*]] = lshr <2 x i8> [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp eq <2 x i8> [[X_HIGHBITS]], zeroinitializer
+; CHECK-NEXT:    ret <2 x i1> [[TMP1]]
+;
+  %t0 = shl <2 x i8> <i8 -1, i8 -1>, %y
+  %t1 = xor <2 x i8> %t0, <i8 -1, i8 -1>
+  %t2 = and <2 x i8> %t1, %x
+  %ret = icmp eq <2 x i8> %t2, %x
+  ret <2 x i1> %ret
+}
+
+define <3 x i1> @p2_vec_undef0(<3 x i8> %x, <3 x i8> %y) {
+; CHECK-LABEL: @p2_vec_undef0(
+; CHECK-NEXT:    [[X_HIGHBITS:%.*]] = lshr <3 x i8> [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp eq <3 x i8> [[X_HIGHBITS]], zeroinitializer
+; CHECK-NEXT:    ret <3 x i1> [[TMP1]]
+;
+  %t0 = shl <3 x i8> <i8 -1, i8 undef, i8 -1>, %y
+  %t1 = xor <3 x i8> %t0, <i8 -1, i8 -1, i8 -1>
+  %t2 = and <3 x i8> %t1, %x
+  %ret = icmp eq <3 x i8> %t2, %x
+  ret <3 x i1> %ret
+}
+
+define <3 x i1> @p3_vec_undef0(<3 x i8> %x, <3 x i8> %y) {
+; CHECK-LABEL: @p3_vec_undef0(
+; CHECK-NEXT:    [[X_HIGHBITS:%.*]] = lshr <3 x i8> [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp eq <3 x i8> [[X_HIGHBITS]], zeroinitializer
+; CHECK-NEXT:    ret <3 x i1> [[TMP1]]
+;
+  %t0 = shl <3 x i8> <i8 -1, i8 -1, i8 -1>, %y
+  %t1 = xor <3 x i8> %t0, <i8 -1, i8 undef, i8 -1>
+  %t2 = and <3 x i8> %t1, %x
+  %ret = icmp eq <3 x i8> %t2, %x
+  ret <3 x i1> %ret
+}
+
+define <3 x i1> @p4_vec_undef2(<3 x i8> %x, <3 x i8> %y) {
+; CHECK-LABEL: @p4_vec_undef2(
+; CHECK-NEXT:    [[X_HIGHBITS:%.*]] = lshr <3 x i8> [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp eq <3 x i8> [[X_HIGHBITS]], zeroinitializer
+; CHECK-NEXT:    ret <3 x i1> [[TMP1]]
+;
+  %t0 = shl <3 x i8> <i8 -1, i8 undef, i8 -1>, %y
+  %t1 = xor <3 x i8> %t0, <i8 -1, i8 undef, i8 -1>
+  %t2 = and <3 x i8> %t1, %x
+  %ret = icmp eq <3 x i8> %t2, %x
+  ret <3 x i1> %ret
+}
+
+; ============================================================================ ;
+; Commutativity tests.
+; ============================================================================ ;
+
+declare i8 @gen8()
+
+define i1 @c0(i8 %y) {
+; CHECK-LABEL: @c0(
+; CHECK-NEXT:    [[X:%.*]] = call i8 @gen8()
+; CHECK-NEXT:    [[X_HIGHBITS:%.*]] = lshr i8 [[X]], [[Y:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp eq i8 [[X_HIGHBITS]], 0
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %t0 = shl i8 -1, %y
+  %t1 = xor i8 %t0, -1
+  %x = call i8 @gen8()
+  %t2 = and i8 %x, %t1 ; swapped order
+  %ret = icmp eq i8 %t2, %x
+  ret i1 %ret
+}
+
+define i1 @c1(i8 %y) {
+; CHECK-LABEL: @c1(
+; CHECK-NEXT:    [[X:%.*]] = call i8 @gen8()
+; CHECK-NEXT:    [[X_HIGHBITS:%.*]] = lshr i8 [[X]], [[Y:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp eq i8 [[X_HIGHBITS]], 0
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %t0 = shl i8 -1, %y
+  %t1 = xor i8 %t0, -1
+  %x = call i8 @gen8()
+  %t2 = and i8 %t1, %x
+  %ret = icmp eq i8 %x, %t2 ; swapped order
+  ret i1 %ret
+}
+
+define i1 @c2(i8 %y) {
+; CHECK-LABEL: @c2(
+; CHECK-NEXT:    [[X:%.*]] = call i8 @gen8()
+; CHECK-NEXT:    [[X_HIGHBITS:%.*]] = lshr i8 [[X]], [[Y:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp eq i8 [[X_HIGHBITS]], 0
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %t0 = shl i8 -1, %y
+  %t1 = xor i8 %t0, -1
+  %x = call i8 @gen8()
+  %t2 = and i8 %x, %t1 ; swapped order
+  %ret = icmp eq i8 %x, %t2 ; swapped order
+  ret i1 %ret
+}
+
+; ============================================================================ ;
+; One-use tests. We don't care about multi-uses here.
+; ============================================================================ ;
+
+declare void @use8(i8)
+
+define i1 @oneuse0(i8 %x, i8 %y) {
+; CHECK-LABEL: @oneuse0(
+; CHECK-NEXT:    [[T0:%.*]] = shl i8 -1, [[Y:%.*]]
+; CHECK-NEXT:    call void @use8(i8 [[T0]])
+; CHECK-NEXT:    [[X_HIGHBITS:%.*]] = lshr i8 [[X:%.*]], [[Y]]
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp eq i8 [[X_HIGHBITS]], 0
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %t0 = shl i8 -1, %y
+  call void @use8(i8 %t0)
+  %t1 = xor i8 %t0, -1
+  %t2 = and i8 %t1, %x
+  %ret = icmp eq i8 %t2, %x
+  ret i1 %ret
+}
+
+define i1 @oneuse1(i8 %x, i8 %y) {
+; CHECK-LABEL: @oneuse1(
+; CHECK-NEXT:    [[T0:%.*]] = shl i8 -1, [[Y:%.*]]
+; CHECK-NEXT:    [[T1:%.*]] = xor i8 [[T0]], -1
+; CHECK-NEXT:    call void @use8(i8 [[T1]])
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp uge i8 [[T1]], [[X:%.*]]
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %t0 = shl i8 -1, %y
+  %t1 = xor i8 %t0, -1
+  call void @use8(i8 %t1)
+  %t2 = and i8 %t1, %x
+  %ret = icmp eq i8 %t2, %x
+  ret i1 %ret
+}
+
+define i1 @oneuse2(i8 %x, i8 %y) {
+; CHECK-LABEL: @oneuse2(
+; CHECK-NEXT:    [[T0:%.*]] = shl i8 -1, [[Y:%.*]]
+; CHECK-NEXT:    [[T1:%.*]] = xor i8 [[T0]], -1
+; CHECK-NEXT:    [[T2:%.*]] = and i8 [[T1]], [[X:%.*]]
+; CHECK-NEXT:    call void @use8(i8 [[T2]])
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp uge i8 [[T1]], [[X]]
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %t0 = shl i8 -1, %y
+  %t1 = xor i8 %t0, -1
+  %t2 = and i8 %t1, %x
+  call void @use8(i8 %t2)
+  %ret = icmp eq i8 %t2, %x
+  ret i1 %ret
+}
+
+define i1 @oneuse3(i8 %x, i8 %y) {
+; CHECK-LABEL: @oneuse3(
+; CHECK-NEXT:    [[T0:%.*]] = shl i8 -1, [[Y:%.*]]
+; CHECK-NEXT:    call void @use8(i8 [[T0]])
+; CHECK-NEXT:    [[T1:%.*]] = xor i8 [[T0]], -1
+; CHECK-NEXT:    call void @use8(i8 [[T1]])
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp uge i8 [[T1]], [[X:%.*]]
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %t0 = shl i8 -1, %y
+  call void @use8(i8 %t0)
+  %t1 = xor i8 %t0, -1
+  call void @use8(i8 %t1)
+  %t2 = and i8 %t1, %x
+  %ret = icmp eq i8 %t2, %x
+  ret i1 %ret
+}
+
+define i1 @oneuse4(i8 %x, i8 %y) {
+; CHECK-LABEL: @oneuse4(
+; CHECK-NEXT:    [[T0:%.*]] = shl i8 -1, [[Y:%.*]]
+; CHECK-NEXT:    call void @use8(i8 [[T0]])
+; CHECK-NEXT:    [[T1:%.*]] = xor i8 [[T0]], -1
+; CHECK-NEXT:    [[T2:%.*]] = and i8 [[T1]], [[X:%.*]]
+; CHECK-NEXT:    call void @use8(i8 [[T2]])
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp uge i8 [[T1]], [[X]]
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %t0 = shl i8 -1, %y
+  call void @use8(i8 %t0)
+  %t1 = xor i8 %t0, -1
+  %t2 = and i8 %t1, %x
+  call void @use8(i8 %t2)
+  %ret = icmp eq i8 %t2, %x
+  ret i1 %ret
+}
+
+define i1 @oneuse5(i8 %x, i8 %y) {
+; CHECK-LABEL: @oneuse5(
+; CHECK-NEXT:    [[T0:%.*]] = shl i8 -1, [[Y:%.*]]
+; CHECK-NEXT:    call void @use8(i8 [[T0]])
+; CHECK-NEXT:    [[T1:%.*]] = xor i8 [[T0]], -1
+; CHECK-NEXT:    call void @use8(i8 [[T1]])
+; CHECK-NEXT:    [[T2:%.*]] = and i8 [[T1]], [[X:%.*]]
+; CHECK-NEXT:    call void @use8(i8 [[T2]])
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp uge i8 [[T1]], [[X]]
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %t0 = shl i8 -1, %y
+  call void @use8(i8 %t0)
+  %t1 = xor i8 %t0, -1
+  call void @use8(i8 %t1)
+  %t2 = and i8 %t1, %x
+  call void @use8(i8 %t2)
+  %ret = icmp eq i8 %t2, %x
+  ret i1 %ret
+}
+
+; ============================================================================ ;
+; Negative tests
+; ============================================================================ ;
+
+define i1 @n0(i8 %x, i8 %y, i8 %notx) {
+; CHECK-LABEL: @n0(
+; CHECK-NEXT:    [[T0:%.*]] = shl i8 -1, [[Y:%.*]]
+; CHECK-NEXT:    [[T1:%.*]] = xor i8 [[T0]], -1
+; CHECK-NEXT:    [[T2:%.*]] = and i8 [[T1]], [[X:%.*]]
+; CHECK-NEXT:    [[RET:%.*]] = icmp eq i8 [[T2]], [[NOTX:%.*]]
+; CHECK-NEXT:    ret i1 [[RET]]
+;
+  %t0 = shl i8 -1, %y
+  %t1 = xor i8 %t0, -1
+  %t2 = and i8 %t1, %x
+  %ret = icmp eq i8 %t2, %notx ; not %x
+  ret i1 %ret
+}
+
+define i1 @n1(i8 %x, i8 %y) {
+; CHECK-LABEL: @n1(
+; CHECK-NEXT:    [[T0:%.*]] = shl i8 1, [[Y:%.*]]
+; CHECK-NEXT:    [[T1:%.*]] = xor i8 [[T0]], -1
+; CHECK-NEXT:    [[T2:%.*]] = and i8 [[T1]], [[X:%.*]]
+; CHECK-NEXT:    [[RET:%.*]] = icmp eq i8 [[T2]], [[X]]
+; CHECK-NEXT:    ret i1 [[RET]]
+;
+  %t0 = shl i8 1, %y ; not -1
+  %t1 = xor i8 %t0, -1
+  %t2 = and i8 %t1, %x
+  %ret = icmp eq i8 %t2, %x
+  ret i1 %ret
+}
+
+define i1 @n2(i8 %x, i8 %y) {
+; CHECK-LABEL: @n2(
+; CHECK-NEXT:    [[T0:%.*]] = shl i8 -1, [[Y:%.*]]
+; CHECK-NEXT:    [[T1:%.*]] = xor i8 [[T0]], 1
+; CHECK-NEXT:    [[T2:%.*]] = and i8 [[T1]], [[X:%.*]]
+; CHECK-NEXT:    [[RET:%.*]] = icmp eq i8 [[T2]], [[X]]
+; CHECK-NEXT:    ret i1 [[RET]]
+;
+  %t0 = shl i8 -1, %y
+  %t1 = xor i8 %t0, 1 ; not -1
+  %t2 = and i8 %t1, %x
+  %ret = icmp eq i8 %t2, %x
+  ret i1 %ret
+}

Added: llvm/trunk/test/Transforms/InstCombine/canonicalize-low-bit-mask-v2-and-icmp-ne-to-icmp-ugt.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/InstCombine/canonicalize-low-bit-mask-v2-and-icmp-ne-to-icmp-ugt.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/InstCombine/canonicalize-low-bit-mask-v2-and-icmp-ne-to-icmp-ugt.ll (added)
+++ llvm/trunk/test/Transforms/InstCombine/canonicalize-low-bit-mask-v2-and-icmp-ne-to-icmp-ugt.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,297 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+; https://bugs.llvm.org/show_bug.cgi?id=38123
+
+; Pattern:
+;   x & ~(-1 << y) != x
+; Should be transformed into:
+;   x u> ~(-1 << y)
+; That is then later transformed into:
+;   (x >> y) != 0
+
+; ============================================================================ ;
+; Basic positive tests
+; ============================================================================ ;
+
+define i1 @p0(i8 %x, i8 %y) {
+; CHECK-LABEL: @p0(
+; CHECK-NEXT:    [[X_HIGHBITS:%.*]] = lshr i8 [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ne i8 [[X_HIGHBITS]], 0
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %t0 = shl i8 -1, %y
+  %t1 = xor i8 %t0, -1
+  %t2 = and i8 %t1, %x
+  %ret = icmp ne i8 %t2, %x
+  ret i1 %ret
+}
+
+; ============================================================================ ;
+; Vector tests
+; ============================================================================ ;
+
+define <2 x i1> @p1_vec(<2 x i8> %x, <2 x i8> %y) {
+; CHECK-LABEL: @p1_vec(
+; CHECK-NEXT:    [[X_HIGHBITS:%.*]] = lshr <2 x i8> [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ne <2 x i8> [[X_HIGHBITS]], zeroinitializer
+; CHECK-NEXT:    ret <2 x i1> [[TMP1]]
+;
+  %t0 = shl <2 x i8> <i8 -1, i8 -1>, %y
+  %t1 = xor <2 x i8> %t0, <i8 -1, i8 -1>
+  %t2 = and <2 x i8> %t1, %x
+  %ret = icmp ne <2 x i8> %t2, %x
+  ret <2 x i1> %ret
+}
+
+define <3 x i1> @p2_vec_undef0(<3 x i8> %x, <3 x i8> %y) {
+; CHECK-LABEL: @p2_vec_undef0(
+; CHECK-NEXT:    [[X_HIGHBITS:%.*]] = lshr <3 x i8> [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ne <3 x i8> [[X_HIGHBITS]], zeroinitializer
+; CHECK-NEXT:    ret <3 x i1> [[TMP1]]
+;
+  %t0 = shl <3 x i8> <i8 -1, i8 undef, i8 -1>, %y
+  %t1 = xor <3 x i8> %t0, <i8 -1, i8 -1, i8 -1>
+  %t2 = and <3 x i8> %t1, %x
+  %ret = icmp ne <3 x i8> %t2, %x
+  ret <3 x i1> %ret
+}
+
+define <3 x i1> @p3_vec_undef0(<3 x i8> %x, <3 x i8> %y) {
+; CHECK-LABEL: @p3_vec_undef0(
+; CHECK-NEXT:    [[X_HIGHBITS:%.*]] = lshr <3 x i8> [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ne <3 x i8> [[X_HIGHBITS]], zeroinitializer
+; CHECK-NEXT:    ret <3 x i1> [[TMP1]]
+;
+  %t0 = shl <3 x i8> <i8 -1, i8 -1, i8 -1>, %y
+  %t1 = xor <3 x i8> %t0, <i8 -1, i8 undef, i8 -1>
+  %t2 = and <3 x i8> %t1, %x
+  %ret = icmp ne <3 x i8> %t2, %x
+  ret <3 x i1> %ret
+}
+
+define <3 x i1> @p4_vec_undef2(<3 x i8> %x, <3 x i8> %y) {
+; CHECK-LABEL: @p4_vec_undef2(
+; CHECK-NEXT:    [[X_HIGHBITS:%.*]] = lshr <3 x i8> [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ne <3 x i8> [[X_HIGHBITS]], zeroinitializer
+; CHECK-NEXT:    ret <3 x i1> [[TMP1]]
+;
+  %t0 = shl <3 x i8> <i8 -1, i8 undef, i8 -1>, %y
+  %t1 = xor <3 x i8> %t0, <i8 -1, i8 undef, i8 -1>
+  %t2 = and <3 x i8> %t1, %x
+  %ret = icmp ne <3 x i8> %t2, %x
+  ret <3 x i1> %ret
+}
+
+; ============================================================================ ;
+; Commutativity tests.
+; ============================================================================ ;
+
+declare i8 @gen8()
+
+define i1 @c0(i8 %y) {
+; CHECK-LABEL: @c0(
+; CHECK-NEXT:    [[X:%.*]] = call i8 @gen8()
+; CHECK-NEXT:    [[X_HIGHBITS:%.*]] = lshr i8 [[X]], [[Y:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ne i8 [[X_HIGHBITS]], 0
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %t0 = shl i8 -1, %y
+  %t1 = xor i8 %t0, -1
+  %x = call i8 @gen8()
+  %t2 = and i8 %x, %t1 ; swapped order
+  %ret = icmp ne i8 %t2, %x
+  ret i1 %ret
+}
+
+define i1 @c1(i8 %y) {
+; CHECK-LABEL: @c1(
+; CHECK-NEXT:    [[X:%.*]] = call i8 @gen8()
+; CHECK-NEXT:    [[X_HIGHBITS:%.*]] = lshr i8 [[X]], [[Y:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ne i8 [[X_HIGHBITS]], 0
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %t0 = shl i8 -1, %y
+  %t1 = xor i8 %t0, -1
+  %x = call i8 @gen8()
+  %t2 = and i8 %t1, %x
+  %ret = icmp ne i8 %x, %t2 ; swapped order
+  ret i1 %ret
+}
+
+define i1 @c2(i8 %y) {
+; CHECK-LABEL: @c2(
+; CHECK-NEXT:    [[X:%.*]] = call i8 @gen8()
+; CHECK-NEXT:    [[X_HIGHBITS:%.*]] = lshr i8 [[X]], [[Y:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ne i8 [[X_HIGHBITS]], 0
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %t0 = shl i8 -1, %y
+  %t1 = xor i8 %t0, -1
+  %x = call i8 @gen8()
+  %t2 = and i8 %x, %t1 ; swapped order
+  %ret = icmp ne i8 %x, %t2 ; swapped order
+  ret i1 %ret
+}
+
+; ============================================================================ ;
+; One-use tests. We don't care about multi-uses here.
+; ============================================================================ ;
+
+declare void @use8(i8)
+
+define i1 @oneuse0(i8 %x, i8 %y) {
+; CHECK-LABEL: @oneuse0(
+; CHECK-NEXT:    [[T0:%.*]] = shl i8 -1, [[Y:%.*]]
+; CHECK-NEXT:    call void @use8(i8 [[T0]])
+; CHECK-NEXT:    [[X_HIGHBITS:%.*]] = lshr i8 [[X:%.*]], [[Y]]
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ne i8 [[X_HIGHBITS]], 0
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %t0 = shl i8 -1, %y
+  call void @use8(i8 %t0)
+  %t1 = xor i8 %t0, -1
+  %t2 = and i8 %t1, %x
+  %ret = icmp ne i8 %t2, %x
+  ret i1 %ret
+}
+
+define i1 @oneuse1(i8 %x, i8 %y) {
+; CHECK-LABEL: @oneuse1(
+; CHECK-NEXT:    [[T0:%.*]] = shl i8 -1, [[Y:%.*]]
+; CHECK-NEXT:    [[T1:%.*]] = xor i8 [[T0]], -1
+; CHECK-NEXT:    call void @use8(i8 [[T1]])
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ult i8 [[T1]], [[X:%.*]]
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %t0 = shl i8 -1, %y
+  %t1 = xor i8 %t0, -1
+  call void @use8(i8 %t1)
+  %t2 = and i8 %t1, %x
+  %ret = icmp ne i8 %t2, %x
+  ret i1 %ret
+}
+
+define i1 @oneuse2(i8 %x, i8 %y) {
+; CHECK-LABEL: @oneuse2(
+; CHECK-NEXT:    [[T0:%.*]] = shl i8 -1, [[Y:%.*]]
+; CHECK-NEXT:    [[T1:%.*]] = xor i8 [[T0]], -1
+; CHECK-NEXT:    [[T2:%.*]] = and i8 [[T1]], [[X:%.*]]
+; CHECK-NEXT:    call void @use8(i8 [[T2]])
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ult i8 [[T1]], [[X]]
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %t0 = shl i8 -1, %y
+  %t1 = xor i8 %t0, -1
+  %t2 = and i8 %t1, %x
+  call void @use8(i8 %t2)
+  %ret = icmp ne i8 %t2, %x
+  ret i1 %ret
+}
+
+define i1 @oneuse3(i8 %x, i8 %y) {
+; CHECK-LABEL: @oneuse3(
+; CHECK-NEXT:    [[T0:%.*]] = shl i8 -1, [[Y:%.*]]
+; CHECK-NEXT:    call void @use8(i8 [[T0]])
+; CHECK-NEXT:    [[T1:%.*]] = xor i8 [[T0]], -1
+; CHECK-NEXT:    call void @use8(i8 [[T1]])
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ult i8 [[T1]], [[X:%.*]]
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %t0 = shl i8 -1, %y
+  call void @use8(i8 %t0)
+  %t1 = xor i8 %t0, -1
+  call void @use8(i8 %t1)
+  %t2 = and i8 %t1, %x
+  %ret = icmp ne i8 %t2, %x
+  ret i1 %ret
+}
+
+define i1 @oneuse4(i8 %x, i8 %y) {
+; CHECK-LABEL: @oneuse4(
+; CHECK-NEXT:    [[T0:%.*]] = shl i8 -1, [[Y:%.*]]
+; CHECK-NEXT:    call void @use8(i8 [[T0]])
+; CHECK-NEXT:    [[T1:%.*]] = xor i8 [[T0]], -1
+; CHECK-NEXT:    [[T2:%.*]] = and i8 [[T1]], [[X:%.*]]
+; CHECK-NEXT:    call void @use8(i8 [[T2]])
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ult i8 [[T1]], [[X]]
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %t0 = shl i8 -1, %y
+  call void @use8(i8 %t0)
+  %t1 = xor i8 %t0, -1
+  %t2 = and i8 %t1, %x
+  call void @use8(i8 %t2)
+  %ret = icmp ne i8 %t2, %x
+  ret i1 %ret
+}
+
+define i1 @oneuse5(i8 %x, i8 %y) {
+; CHECK-LABEL: @oneuse5(
+; CHECK-NEXT:    [[T0:%.*]] = shl i8 -1, [[Y:%.*]]
+; CHECK-NEXT:    call void @use8(i8 [[T0]])
+; CHECK-NEXT:    [[T1:%.*]] = xor i8 [[T0]], -1
+; CHECK-NEXT:    call void @use8(i8 [[T1]])
+; CHECK-NEXT:    [[T2:%.*]] = and i8 [[T1]], [[X:%.*]]
+; CHECK-NEXT:    call void @use8(i8 [[T2]])
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ult i8 [[T1]], [[X]]
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %t0 = shl i8 -1, %y
+  call void @use8(i8 %t0)
+  %t1 = xor i8 %t0, -1
+  call void @use8(i8 %t1)
+  %t2 = and i8 %t1, %x
+  call void @use8(i8 %t2)
+  %ret = icmp ne i8 %t2, %x
+  ret i1 %ret
+}
+
+; ============================================================================ ;
+; Negative tests
+; ============================================================================ ;
+
+define i1 @n0(i8 %x, i8 %y, i8 %notx) {
+; CHECK-LABEL: @n0(
+; CHECK-NEXT:    [[T0:%.*]] = shl i8 -1, [[Y:%.*]]
+; CHECK-NEXT:    [[T1:%.*]] = xor i8 [[T0]], -1
+; CHECK-NEXT:    [[T2:%.*]] = and i8 [[T1]], [[X:%.*]]
+; CHECK-NEXT:    [[RET:%.*]] = icmp ne i8 [[T2]], [[NOTX:%.*]]
+; CHECK-NEXT:    ret i1 [[RET]]
+;
+  %t0 = shl i8 -1, %y
+  %t1 = xor i8 %t0, -1
+  %t2 = and i8 %t1, %x
+  %ret = icmp ne i8 %t2, %notx ; not %x
+  ret i1 %ret
+}
+
+define i1 @n1(i8 %x, i8 %y) {
+; CHECK-LABEL: @n1(
+; CHECK-NEXT:    [[T0:%.*]] = shl i8 1, [[Y:%.*]]
+; CHECK-NEXT:    [[T1:%.*]] = xor i8 [[T0]], -1
+; CHECK-NEXT:    [[T2:%.*]] = and i8 [[T1]], [[X:%.*]]
+; CHECK-NEXT:    [[RET:%.*]] = icmp ne i8 [[T2]], [[X]]
+; CHECK-NEXT:    ret i1 [[RET]]
+;
+  %t0 = shl i8 1, %y ; not -1
+  %t1 = xor i8 %t0, -1
+  %t2 = and i8 %t1, %x
+  %ret = icmp ne i8 %t2, %x
+  ret i1 %ret
+}
+
+define i1 @n2(i8 %x, i8 %y) {
+; CHECK-LABEL: @n2(
+; CHECK-NEXT:    [[T0:%.*]] = shl i8 -1, [[Y:%.*]]
+; CHECK-NEXT:    [[T1:%.*]] = xor i8 [[T0]], 1
+; CHECK-NEXT:    [[T2:%.*]] = and i8 [[T1]], [[X:%.*]]
+; CHECK-NEXT:    [[RET:%.*]] = icmp ne i8 [[T2]], [[X]]
+; CHECK-NEXT:    ret i1 [[RET]]
+;
+  %t0 = shl i8 -1, %y
+  %t1 = xor i8 %t0, 1 ; not -1
+  %t2 = and i8 %t1, %x
+  %ret = icmp ne i8 %t2, %x
+  ret i1 %ret
+}

Added: llvm/trunk/test/Transforms/InstCombine/canonicalize-low-bit-mask-v3-and-icmp-eq-to-icmp-ule.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/InstCombine/canonicalize-low-bit-mask-v3-and-icmp-eq-to-icmp-ule.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/InstCombine/canonicalize-low-bit-mask-v3-and-icmp-eq-to-icmp-ule.ll (added)
+++ llvm/trunk/test/Transforms/InstCombine/canonicalize-low-bit-mask-v3-and-icmp-eq-to-icmp-ule.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,282 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+; https://bugs.llvm.org/show_bug.cgi?id=38123
+
+; Pattern:
+;   x & ((1 << y) + (-1)) == x
+; Should be transformed into:
+;   x u<= ((1 << y) + (-1))
+; That is then later transformed into:
+;   (x >> y) == 0
+
+; This pattern is uncanonical, but we can not canonicalize it due to extra uses.
+
+declare void @use8(i8)
+declare void @use2i8(<2 x i8>)
+declare void @use3i8(<3 x i8>)
+
+; ============================================================================ ;
+; Basic positive tests
+; ============================================================================ ;
+
+define i1 @p0(i8 %x, i8 %y) {
+; CHECK-LABEL: @p0(
+; CHECK-NEXT:    [[T0:%.*]] = shl i8 1, [[Y:%.*]]
+; CHECK-NEXT:    call void @use8(i8 [[T0]])
+; CHECK-NEXT:    [[X_HIGHBITS:%.*]] = lshr i8 [[X:%.*]], [[Y]]
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp eq i8 [[X_HIGHBITS]], 0
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %t0 = shl i8 1, %y
+  call void @use8(i8 %t0)
+  %t1 = add i8 %t0, -1
+  %t2 = and i8 %t1, %x
+  %ret = icmp eq i8 %t2, %x
+  ret i1 %ret
+}
+
+; ============================================================================ ;
+; Vector tests
+; ============================================================================ ;
+
+define <2 x i1> @p1_vec(<2 x i8> %x, <2 x i8> %y) {
+; CHECK-LABEL: @p1_vec(
+; CHECK-NEXT:    [[T0:%.*]] = shl <2 x i8> <i8 1, i8 1>, [[Y:%.*]]
+; CHECK-NEXT:    call void @use2i8(<2 x i8> [[T0]])
+; CHECK-NEXT:    [[X_HIGHBITS:%.*]] = lshr <2 x i8> [[X:%.*]], [[Y]]
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp eq <2 x i8> [[X_HIGHBITS]], zeroinitializer
+; CHECK-NEXT:    ret <2 x i1> [[TMP1]]
+;
+  %t0 = shl <2 x i8> <i8 1, i8 1>, %y
+  call void @use2i8(<2 x i8> %t0)
+  %t1 = add <2 x i8> %t0, <i8 -1, i8 -1>
+  %t2 = and <2 x i8> %t1, %x
+  %ret = icmp eq <2 x i8> %t2, %x
+  ret <2 x i1> %ret
+}
+
+define <3 x i1> @p2_vec_undef0(<3 x i8> %x, <3 x i8> %y) {
+; CHECK-LABEL: @p2_vec_undef0(
+; CHECK-NEXT:    [[T0:%.*]] = shl <3 x i8> <i8 1, i8 undef, i8 1>, [[Y:%.*]]
+; CHECK-NEXT:    call void @use3i8(<3 x i8> [[T0]])
+; CHECK-NEXT:    [[X_HIGHBITS:%.*]] = lshr <3 x i8> [[X:%.*]], [[Y]]
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp eq <3 x i8> [[X_HIGHBITS]], zeroinitializer
+; CHECK-NEXT:    ret <3 x i1> [[TMP1]]
+;
+  %t0 = shl <3 x i8> <i8 1, i8 undef, i8 1>, %y
+  call void @use3i8(<3 x i8> %t0)
+  %t1 = add <3 x i8> %t0, <i8 -1, i8 -1, i8 -1>
+  %t2 = and <3 x i8> %t1, %x
+  %ret = icmp eq <3 x i8> %t2, %x
+  ret <3 x i1> %ret
+}
+
+define <3 x i1> @p3_vec_undef0(<3 x i8> %x, <3 x i8> %y) {
+; CHECK-LABEL: @p3_vec_undef0(
+; CHECK-NEXT:    [[T0:%.*]] = shl <3 x i8> <i8 1, i8 1, i8 1>, [[Y:%.*]]
+; CHECK-NEXT:    call void @use3i8(<3 x i8> [[T0]])
+; CHECK-NEXT:    [[X_HIGHBITS:%.*]] = lshr <3 x i8> [[X:%.*]], [[Y]]
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp eq <3 x i8> [[X_HIGHBITS]], zeroinitializer
+; CHECK-NEXT:    ret <3 x i1> [[TMP1]]
+;
+  %t0 = shl <3 x i8> <i8 1, i8 1, i8 1>, %y
+  call void @use3i8(<3 x i8> %t0)
+  %t1 = add <3 x i8> %t0, <i8 -1, i8 undef, i8 -1>
+  %t2 = and <3 x i8> %t1, %x
+  %ret = icmp eq <3 x i8> %t2, %x
+  ret <3 x i1> %ret
+}
+
+define <3 x i1> @p4_vec_undef2(<3 x i8> %x, <3 x i8> %y) {
+; CHECK-LABEL: @p4_vec_undef2(
+; CHECK-NEXT:    [[T0:%.*]] = shl <3 x i8> <i8 1, i8 undef, i8 1>, [[Y:%.*]]
+; CHECK-NEXT:    call void @use3i8(<3 x i8> [[T0]])
+; CHECK-NEXT:    [[X_HIGHBITS:%.*]] = lshr <3 x i8> [[X:%.*]], [[Y]]
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp eq <3 x i8> [[X_HIGHBITS]], zeroinitializer
+; CHECK-NEXT:    ret <3 x i1> [[TMP1]]
+;
+  %t0 = shl <3 x i8> <i8 1, i8 undef, i8 1>, %y
+  call void @use3i8(<3 x i8> %t0)
+  %t1 = add <3 x i8> %t0, <i8 -1, i8 undef, i8 -1>
+  %t2 = and <3 x i8> %t1, %x
+  %ret = icmp eq <3 x i8> %t2, %x
+  ret <3 x i1> %ret
+}
+
+; ============================================================================ ;
+; Commutativity tests.
+; ============================================================================ ;
+
+declare i8 @gen8()
+
+define i1 @c0(i8 %y) {
+; CHECK-LABEL: @c0(
+; CHECK-NEXT:    [[T0:%.*]] = shl i8 1, [[Y:%.*]]
+; CHECK-NEXT:    call void @use8(i8 [[T0]])
+; CHECK-NEXT:    [[X:%.*]] = call i8 @gen8()
+; CHECK-NEXT:    [[X_HIGHBITS:%.*]] = lshr i8 [[X]], [[Y]]
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp eq i8 [[X_HIGHBITS]], 0
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %t0 = shl i8 1, %y
+  call void @use8(i8 %t0)
+  %t1 = add i8 %t0, -1
+  %x = call i8 @gen8()
+  %t2 = and i8 %x, %t1 ; swapped order
+  %ret = icmp eq i8 %t2, %x
+  ret i1 %ret
+}
+
+define i1 @c1(i8 %y) {
+; CHECK-LABEL: @c1(
+; CHECK-NEXT:    [[T0:%.*]] = shl i8 1, [[Y:%.*]]
+; CHECK-NEXT:    call void @use8(i8 [[T0]])
+; CHECK-NEXT:    [[X:%.*]] = call i8 @gen8()
+; CHECK-NEXT:    [[X_HIGHBITS:%.*]] = lshr i8 [[X]], [[Y]]
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp eq i8 [[X_HIGHBITS]], 0
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %t0 = shl i8 1, %y
+  call void @use8(i8 %t0)
+  %t1 = add i8 %t0, -1
+  %x = call i8 @gen8()
+  %t2 = and i8 %t1, %x
+  %ret = icmp eq i8 %x, %t2 ; swapped order
+  ret i1 %ret
+}
+
+define i1 @c2(i8 %y) {
+; CHECK-LABEL: @c2(
+; CHECK-NEXT:    [[T0:%.*]] = shl i8 1, [[Y:%.*]]
+; CHECK-NEXT:    call void @use8(i8 [[T0]])
+; CHECK-NEXT:    [[X:%.*]] = call i8 @gen8()
+; CHECK-NEXT:    [[X_HIGHBITS:%.*]] = lshr i8 [[X]], [[Y]]
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp eq i8 [[X_HIGHBITS]], 0
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %t0 = shl i8 1, %y
+  call void @use8(i8 %t0)
+  %t1 = add i8 %t0, -1
+  %x = call i8 @gen8()
+  %t2 = and i8 %x, %t1 ; swapped order
+  %ret = icmp eq i8 %x, %t2 ; swapped order
+  ret i1 %ret
+}
+
+; ============================================================================ ;
+; One-use tests. We don't care about multi-uses here.
+; ============================================================================ ;
+
+define i1 @oneuse0(i8 %x, i8 %y) {
+; CHECK-LABEL: @oneuse0(
+; CHECK-NEXT:    [[T0:%.*]] = shl i8 1, [[Y:%.*]]
+; CHECK-NEXT:    call void @use8(i8 [[T0]])
+; CHECK-NEXT:    [[T1:%.*]] = add i8 [[T0]], -1
+; CHECK-NEXT:    call void @use8(i8 [[T1]])
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp uge i8 [[T1]], [[X:%.*]]
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %t0 = shl i8 1, %y
+  call void @use8(i8 %t0) ; needed anyway
+  %t1 = add i8 %t0, -1
+  call void @use8(i8 %t1)
+  %t2 = and i8 %t1, %x
+  %ret = icmp eq i8 %t2, %x
+  ret i1 %ret
+}
+
+define i1 @oneuse1(i8 %x, i8 %y) {
+; CHECK-LABEL: @oneuse1(
+; CHECK-NEXT:    [[T0:%.*]] = shl i8 1, [[Y:%.*]]
+; CHECK-NEXT:    call void @use8(i8 [[T0]])
+; CHECK-NEXT:    [[T1:%.*]] = add i8 [[T0]], -1
+; CHECK-NEXT:    [[T2:%.*]] = and i8 [[T1]], [[X:%.*]]
+; CHECK-NEXT:    call void @use8(i8 [[T2]])
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp uge i8 [[T1]], [[X]]
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %t0 = shl i8 1, %y
+  call void @use8(i8 %t0) ; needed anyway
+  %t1 = add i8 %t0, -1
+  %t2 = and i8 %t1, %x
+  call void @use8(i8 %t2)
+  %ret = icmp eq i8 %t2, %x
+  ret i1 %ret
+}
+
+define i1 @oneuse2(i8 %x, i8 %y) {
+; CHECK-LABEL: @oneuse2(
+; CHECK-NEXT:    [[T0:%.*]] = shl i8 1, [[Y:%.*]]
+; CHECK-NEXT:    call void @use8(i8 [[T0]])
+; CHECK-NEXT:    [[T1:%.*]] = add i8 [[T0]], -1
+; CHECK-NEXT:    call void @use8(i8 [[T1]])
+; CHECK-NEXT:    [[T2:%.*]] = and i8 [[T1]], [[X:%.*]]
+; CHECK-NEXT:    call void @use8(i8 [[T2]])
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp uge i8 [[T1]], [[X]]
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %t0 = shl i8 1, %y
+  call void @use8(i8 %t0)
+  %t1 = add i8 %t0, -1
+  call void @use8(i8 %t1)
+  %t2 = and i8 %t1, %x
+  call void @use8(i8 %t2)
+  %ret = icmp eq i8 %t2, %x
+  ret i1 %ret
+}
+
+; ============================================================================ ;
+; Negative tests
+; ============================================================================ ;
+
+define i1 @n0(i8 %x, i8 %y, i8 %notx) {
+; CHECK-LABEL: @n0(
+; CHECK-NEXT:    [[T0:%.*]] = shl i8 1, [[Y:%.*]]
+; CHECK-NEXT:    call void @use8(i8 [[T0]])
+; CHECK-NEXT:    [[T1:%.*]] = add i8 [[T0]], -1
+; CHECK-NEXT:    [[T2:%.*]] = and i8 [[T1]], [[X:%.*]]
+; CHECK-NEXT:    [[RET:%.*]] = icmp eq i8 [[T2]], [[NOTX:%.*]]
+; CHECK-NEXT:    ret i1 [[RET]]
+;
+  %t0 = shl i8 1, %y
+  call void @use8(i8 %t0)
+  %t1 = add i8 %t0, -1
+  %t2 = and i8 %t1, %x
+  %ret = icmp eq i8 %t2, %notx ; not %x
+  ret i1 %ret
+}
+
+define i1 @n1(i8 %x, i8 %y) {
+; CHECK-LABEL: @n1(
+; CHECK-NEXT:    [[T0:%.*]] = shl i8 -1, [[Y:%.*]]
+; CHECK-NEXT:    call void @use8(i8 [[T0]])
+; CHECK-NEXT:    [[T1:%.*]] = add i8 [[T0]], -1
+; CHECK-NEXT:    [[T2:%.*]] = and i8 [[T1]], [[X:%.*]]
+; CHECK-NEXT:    [[RET:%.*]] = icmp eq i8 [[T2]], [[X]]
+; CHECK-NEXT:    ret i1 [[RET]]
+;
+  %t0 = shl i8 -1, %y ; not 1
+  call void @use8(i8 %t0)
+  %t1 = add i8 %t0, -1
+  %t2 = and i8 %t1, %x
+  %ret = icmp eq i8 %t2, %x
+  ret i1 %ret
+}
+
+define i1 @n2(i8 %x, i8 %y) {
+; CHECK-LABEL: @n2(
+; CHECK-NEXT:    [[T0:%.*]] = shl i8 1, [[Y:%.*]]
+; CHECK-NEXT:    call void @use8(i8 [[T0]])
+; CHECK-NEXT:    [[T1:%.*]] = add i8 [[T0]], 1
+; CHECK-NEXT:    [[T2:%.*]] = and i8 [[T1]], [[X:%.*]]
+; CHECK-NEXT:    [[RET:%.*]] = icmp eq i8 [[T2]], [[X]]
+; CHECK-NEXT:    ret i1 [[RET]]
+;
+  %t0 = shl i8 1, %y
+  call void @use8(i8 %t0)
+  %t1 = add i8 %t0, 1 ; not -1
+  %t2 = and i8 %t1, %x
+  %ret = icmp eq i8 %t2, %x
+  ret i1 %ret
+}

Added: llvm/trunk/test/Transforms/InstCombine/canonicalize-low-bit-mask-v3-and-icmp-ne-to-icmp-ugt.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/InstCombine/canonicalize-low-bit-mask-v3-and-icmp-ne-to-icmp-ugt.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/InstCombine/canonicalize-low-bit-mask-v3-and-icmp-ne-to-icmp-ugt.ll (added)
+++ llvm/trunk/test/Transforms/InstCombine/canonicalize-low-bit-mask-v3-and-icmp-ne-to-icmp-ugt.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,282 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+; https://bugs.llvm.org/show_bug.cgi?id=38123
+
+; Pattern:
+;   x & ((1 << y) + (-1)) != x
+; Should be transformed into:
+;   x u> ((1 << y) + (-1))
+; That is then later transformed into:
+;   (x >> y) != 0
+
+; This pattern is uncanonical, but we can not canonicalize it due to extra uses.
+
+declare void @use8(i8)
+declare void @use2i8(<2 x i8>)
+declare void @use3i8(<3 x i8>)
+
+; ============================================================================ ;
+; Basic positive tests
+; ============================================================================ ;
+
+define i1 @p0(i8 %x, i8 %y) {
+; CHECK-LABEL: @p0(
+; CHECK-NEXT:    [[T0:%.*]] = shl i8 1, [[Y:%.*]]
+; CHECK-NEXT:    call void @use8(i8 [[T0]])
+; CHECK-NEXT:    [[X_HIGHBITS:%.*]] = lshr i8 [[X:%.*]], [[Y]]
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ne i8 [[X_HIGHBITS]], 0
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %t0 = shl i8 1, %y
+  call void @use8(i8 %t0)
+  %t1 = add i8 %t0, -1
+  %t2 = and i8 %t1, %x
+  %ret = icmp ne i8 %t2, %x
+  ret i1 %ret
+}
+
+; ============================================================================ ;
+; Vector tests
+; ============================================================================ ;
+
+define <2 x i1> @p1_vec(<2 x i8> %x, <2 x i8> %y) {
+; CHECK-LABEL: @p1_vec(
+; CHECK-NEXT:    [[T0:%.*]] = shl <2 x i8> <i8 1, i8 1>, [[Y:%.*]]
+; CHECK-NEXT:    call void @use2i8(<2 x i8> [[T0]])
+; CHECK-NEXT:    [[X_HIGHBITS:%.*]] = lshr <2 x i8> [[X:%.*]], [[Y]]
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ne <2 x i8> [[X_HIGHBITS]], zeroinitializer
+; CHECK-NEXT:    ret <2 x i1> [[TMP1]]
+;
+  %t0 = shl <2 x i8> <i8 1, i8 1>, %y
+  call void @use2i8(<2 x i8> %t0)
+  %t1 = add <2 x i8> %t0, <i8 -1, i8 -1>
+  %t2 = and <2 x i8> %t1, %x
+  %ret = icmp ne <2 x i8> %t2, %x
+  ret <2 x i1> %ret
+}
+
+define <3 x i1> @p2_vec_undef0(<3 x i8> %x, <3 x i8> %y) {
+; CHECK-LABEL: @p2_vec_undef0(
+; CHECK-NEXT:    [[T0:%.*]] = shl <3 x i8> <i8 1, i8 undef, i8 1>, [[Y:%.*]]
+; CHECK-NEXT:    call void @use3i8(<3 x i8> [[T0]])
+; CHECK-NEXT:    [[X_HIGHBITS:%.*]] = lshr <3 x i8> [[X:%.*]], [[Y]]
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ne <3 x i8> [[X_HIGHBITS]], zeroinitializer
+; CHECK-NEXT:    ret <3 x i1> [[TMP1]]
+;
+  %t0 = shl <3 x i8> <i8 1, i8 undef, i8 1>, %y
+  call void @use3i8(<3 x i8> %t0)
+  %t1 = add <3 x i8> %t0, <i8 -1, i8 -1, i8 -1>
+  %t2 = and <3 x i8> %t1, %x
+  %ret = icmp ne <3 x i8> %t2, %x
+  ret <3 x i1> %ret
+}
+
+define <3 x i1> @p3_vec_undef0(<3 x i8> %x, <3 x i8> %y) {
+; CHECK-LABEL: @p3_vec_undef0(
+; CHECK-NEXT:    [[T0:%.*]] = shl <3 x i8> <i8 1, i8 1, i8 1>, [[Y:%.*]]
+; CHECK-NEXT:    call void @use3i8(<3 x i8> [[T0]])
+; CHECK-NEXT:    [[X_HIGHBITS:%.*]] = lshr <3 x i8> [[X:%.*]], [[Y]]
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ne <3 x i8> [[X_HIGHBITS]], zeroinitializer
+; CHECK-NEXT:    ret <3 x i1> [[TMP1]]
+;
+  %t0 = shl <3 x i8> <i8 1, i8 1, i8 1>, %y
+  call void @use3i8(<3 x i8> %t0)
+  %t1 = add <3 x i8> %t0, <i8 -1, i8 undef, i8 -1>
+  %t2 = and <3 x i8> %t1, %x
+  %ret = icmp ne <3 x i8> %t2, %x
+  ret <3 x i1> %ret
+}
+
+define <3 x i1> @p4_vec_undef2(<3 x i8> %x, <3 x i8> %y) {
+; CHECK-LABEL: @p4_vec_undef2(
+; CHECK-NEXT:    [[T0:%.*]] = shl <3 x i8> <i8 1, i8 undef, i8 1>, [[Y:%.*]]
+; CHECK-NEXT:    call void @use3i8(<3 x i8> [[T0]])
+; CHECK-NEXT:    [[X_HIGHBITS:%.*]] = lshr <3 x i8> [[X:%.*]], [[Y]]
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ne <3 x i8> [[X_HIGHBITS]], zeroinitializer
+; CHECK-NEXT:    ret <3 x i1> [[TMP1]]
+;
+  %t0 = shl <3 x i8> <i8 1, i8 undef, i8 1>, %y
+  call void @use3i8(<3 x i8> %t0)
+  %t1 = add <3 x i8> %t0, <i8 -1, i8 undef, i8 -1>
+  %t2 = and <3 x i8> %t1, %x
+  %ret = icmp ne <3 x i8> %t2, %x
+  ret <3 x i1> %ret
+}
+
+; ============================================================================ ;
+; Commutativity tests.
+; ============================================================================ ;
+
+declare i8 @gen8()
+
+define i1 @c0(i8 %y) {
+; CHECK-LABEL: @c0(
+; CHECK-NEXT:    [[T0:%.*]] = shl i8 1, [[Y:%.*]]
+; CHECK-NEXT:    call void @use8(i8 [[T0]])
+; CHECK-NEXT:    [[X:%.*]] = call i8 @gen8()
+; CHECK-NEXT:    [[X_HIGHBITS:%.*]] = lshr i8 [[X]], [[Y]]
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ne i8 [[X_HIGHBITS]], 0
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %t0 = shl i8 1, %y
+  call void @use8(i8 %t0)
+  %t1 = add i8 %t0, -1
+  %x = call i8 @gen8()
+  %t2 = and i8 %x, %t1 ; swapped order
+  %ret = icmp ne i8 %t2, %x
+  ret i1 %ret
+}
+
+define i1 @c1(i8 %y) {
+; CHECK-LABEL: @c1(
+; CHECK-NEXT:    [[T0:%.*]] = shl i8 1, [[Y:%.*]]
+; CHECK-NEXT:    call void @use8(i8 [[T0]])
+; CHECK-NEXT:    [[X:%.*]] = call i8 @gen8()
+; CHECK-NEXT:    [[X_HIGHBITS:%.*]] = lshr i8 [[X]], [[Y]]
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ne i8 [[X_HIGHBITS]], 0
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %t0 = shl i8 1, %y
+  call void @use8(i8 %t0)
+  %t1 = add i8 %t0, -1
+  %x = call i8 @gen8()
+  %t2 = and i8 %t1, %x
+  %ret = icmp ne i8 %x, %t2 ; swapped order
+  ret i1 %ret
+}
+
+define i1 @c2(i8 %y) {
+; CHECK-LABEL: @c2(
+; CHECK-NEXT:    [[T0:%.*]] = shl i8 1, [[Y:%.*]]
+; CHECK-NEXT:    call void @use8(i8 [[T0]])
+; CHECK-NEXT:    [[X:%.*]] = call i8 @gen8()
+; CHECK-NEXT:    [[X_HIGHBITS:%.*]] = lshr i8 [[X]], [[Y]]
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ne i8 [[X_HIGHBITS]], 0
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %t0 = shl i8 1, %y
+  call void @use8(i8 %t0)
+  %t1 = add i8 %t0, -1
+  %x = call i8 @gen8()
+  %t2 = and i8 %x, %t1 ; swapped order
+  %ret = icmp ne i8 %x, %t2 ; swapped order
+  ret i1 %ret
+}
+
+; ============================================================================ ;
+; One-use tests. We don't care about multi-uses here.
+; ============================================================================ ;
+
+define i1 @oneuse0(i8 %x, i8 %y) {
+; CHECK-LABEL: @oneuse0(
+; CHECK-NEXT:    [[T0:%.*]] = shl i8 1, [[Y:%.*]]
+; CHECK-NEXT:    call void @use8(i8 [[T0]])
+; CHECK-NEXT:    [[T1:%.*]] = add i8 [[T0]], -1
+; CHECK-NEXT:    call void @use8(i8 [[T1]])
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ult i8 [[T1]], [[X:%.*]]
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %t0 = shl i8 1, %y
+  call void @use8(i8 %t0) ; needed anyway
+  %t1 = add i8 %t0, -1
+  call void @use8(i8 %t1)
+  %t2 = and i8 %t1, %x
+  %ret = icmp ne i8 %t2, %x
+  ret i1 %ret
+}
+
+define i1 @oneuse1(i8 %x, i8 %y) {
+; CHECK-LABEL: @oneuse1(
+; CHECK-NEXT:    [[T0:%.*]] = shl i8 1, [[Y:%.*]]
+; CHECK-NEXT:    call void @use8(i8 [[T0]])
+; CHECK-NEXT:    [[T1:%.*]] = add i8 [[T0]], -1
+; CHECK-NEXT:    [[T2:%.*]] = and i8 [[T1]], [[X:%.*]]
+; CHECK-NEXT:    call void @use8(i8 [[T2]])
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ult i8 [[T1]], [[X]]
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %t0 = shl i8 1, %y
+  call void @use8(i8 %t0) ; needed anyway
+  %t1 = add i8 %t0, -1
+  %t2 = and i8 %t1, %x
+  call void @use8(i8 %t2)
+  %ret = icmp ne i8 %t2, %x
+  ret i1 %ret
+}
+
+define i1 @oneuse2(i8 %x, i8 %y) {
+; CHECK-LABEL: @oneuse2(
+; CHECK-NEXT:    [[T0:%.*]] = shl i8 1, [[Y:%.*]]
+; CHECK-NEXT:    call void @use8(i8 [[T0]])
+; CHECK-NEXT:    [[T1:%.*]] = add i8 [[T0]], -1
+; CHECK-NEXT:    call void @use8(i8 [[T1]])
+; CHECK-NEXT:    [[T2:%.*]] = and i8 [[T1]], [[X:%.*]]
+; CHECK-NEXT:    call void @use8(i8 [[T2]])
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ult i8 [[T1]], [[X]]
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %t0 = shl i8 1, %y
+  call void @use8(i8 %t0)
+  %t1 = add i8 %t0, -1
+  call void @use8(i8 %t1)
+  %t2 = and i8 %t1, %x
+  call void @use8(i8 %t2)
+  %ret = icmp ne i8 %t2, %x
+  ret i1 %ret
+}
+
+; ============================================================================ ;
+; Negative tests
+; ============================================================================ ;
+
+define i1 @n0(i8 %x, i8 %y, i8 %notx) {
+; CHECK-LABEL: @n0(
+; CHECK-NEXT:    [[T0:%.*]] = shl i8 1, [[Y:%.*]]
+; CHECK-NEXT:    call void @use8(i8 [[T0]])
+; CHECK-NEXT:    [[T1:%.*]] = add i8 [[T0]], -1
+; CHECK-NEXT:    [[T2:%.*]] = and i8 [[T1]], [[X:%.*]]
+; CHECK-NEXT:    [[RET:%.*]] = icmp ne i8 [[T2]], [[NOTX:%.*]]
+; CHECK-NEXT:    ret i1 [[RET]]
+;
+  %t0 = shl i8 1, %y
+  call void @use8(i8 %t0)
+  %t1 = add i8 %t0, -1
+  %t2 = and i8 %t1, %x
+  %ret = icmp ne i8 %t2, %notx ; not %x
+  ret i1 %ret
+}
+
+define i1 @n1(i8 %x, i8 %y) {
+; CHECK-LABEL: @n1(
+; CHECK-NEXT:    [[T0:%.*]] = shl i8 -1, [[Y:%.*]]
+; CHECK-NEXT:    call void @use8(i8 [[T0]])
+; CHECK-NEXT:    [[T1:%.*]] = add i8 [[T0]], -1
+; CHECK-NEXT:    [[T2:%.*]] = and i8 [[T1]], [[X:%.*]]
+; CHECK-NEXT:    [[RET:%.*]] = icmp ne i8 [[T2]], [[X]]
+; CHECK-NEXT:    ret i1 [[RET]]
+;
+  %t0 = shl i8 -1, %y ; not 1
+  call void @use8(i8 %t0)
+  %t1 = add i8 %t0, -1
+  %t2 = and i8 %t1, %x
+  %ret = icmp ne i8 %t2, %x
+  ret i1 %ret
+}
+
+define i1 @n2(i8 %x, i8 %y) {
+; CHECK-LABEL: @n2(
+; CHECK-NEXT:    [[T0:%.*]] = shl i8 1, [[Y:%.*]]
+; CHECK-NEXT:    call void @use8(i8 [[T0]])
+; CHECK-NEXT:    [[T1:%.*]] = add i8 [[T0]], 1
+; CHECK-NEXT:    [[T2:%.*]] = and i8 [[T1]], [[X:%.*]]
+; CHECK-NEXT:    [[RET:%.*]] = icmp ne i8 [[T2]], [[X]]
+; CHECK-NEXT:    ret i1 [[RET]]
+;
+  %t0 = shl i8 1, %y
+  call void @use8(i8 %t0)
+  %t1 = add i8 %t0, 1 ; not -1
+  %t2 = and i8 %t1, %x
+  %ret = icmp ne i8 %t2, %x
+  ret i1 %ret
+}

Added: llvm/trunk/test/Transforms/InstCombine/canonicalize-low-bit-mask-v4-and-icmp-eq-to-icmp-ule.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/InstCombine/canonicalize-low-bit-mask-v4-and-icmp-eq-to-icmp-ule.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/InstCombine/canonicalize-low-bit-mask-v4-and-icmp-eq-to-icmp-ule.ll (added)
+++ llvm/trunk/test/Transforms/InstCombine/canonicalize-low-bit-mask-v4-and-icmp-eq-to-icmp-ule.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,248 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+; https://bugs.llvm.org/show_bug.cgi?id=38123
+
+; Pattern:
+;   x & ((-1 << y) >> y) == x
+; Should be transformed into:
+;   x u<= ((-1 << y) >> y)
+
+; This pattern is uncanonical, but we can not canonicalize it due to extra uses.
+
+declare void @use8(i8)
+declare void @use2i8(<2 x i8>)
+declare void @use3i8(<3 x i8>)
+
+; ============================================================================ ;
+; Basic positive tests
+; ============================================================================ ;
+
+define i1 @p0(i8 %x, i8 %y) {
+; CHECK-LABEL: @p0(
+; CHECK-NEXT:    [[T0:%.*]] = shl i8 -1, [[Y:%.*]]
+; CHECK-NEXT:    call void @use8(i8 [[T0]])
+; CHECK-NEXT:    [[T1:%.*]] = lshr i8 [[T0]], [[Y]]
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp uge i8 [[T1]], [[X:%.*]]
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %t0 = shl i8 -1, %y
+  call void @use8(i8 %t0)
+  %t1 = lshr i8 %t0, %y
+  %t2 = and i8 %t1, %x
+  %ret = icmp eq i8 %t2, %x
+  ret i1 %ret
+}
+
+; ============================================================================ ;
+; Vector tests
+; ============================================================================ ;
+
+define <2 x i1> @p1_vec(<2 x i8> %x, <2 x i8> %y) {
+; CHECK-LABEL: @p1_vec(
+; CHECK-NEXT:    [[T0:%.*]] = shl <2 x i8> <i8 -1, i8 -1>, [[Y:%.*]]
+; CHECK-NEXT:    call void @use2i8(<2 x i8> [[T0]])
+; CHECK-NEXT:    [[T1:%.*]] = lshr <2 x i8> [[T0]], [[Y]]
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp uge <2 x i8> [[T1]], [[X:%.*]]
+; CHECK-NEXT:    ret <2 x i1> [[TMP1]]
+;
+  %t0 = shl <2 x i8> <i8 -1, i8 -1>, %y
+  call void @use2i8(<2 x i8> %t0)
+  %t1 = lshr <2 x i8> %t0, %y
+  %t2 = and <2 x i8> %t1, %x
+  %ret = icmp eq <2 x i8> %t2, %x
+  ret <2 x i1> %ret
+}
+
+define <3 x i1> @p2_vec_undef0(<3 x i8> %x, <3 x i8> %y) {
+; CHECK-LABEL: @p2_vec_undef0(
+; CHECK-NEXT:    [[T0:%.*]] = shl <3 x i8> <i8 -1, i8 undef, i8 -1>, [[Y:%.*]]
+; CHECK-NEXT:    call void @use3i8(<3 x i8> [[T0]])
+; CHECK-NEXT:    [[T1:%.*]] = lshr <3 x i8> [[T0]], [[Y]]
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp uge <3 x i8> [[T1]], [[X:%.*]]
+; CHECK-NEXT:    ret <3 x i1> [[TMP1]]
+;
+  %t0 = shl <3 x i8> <i8 -1, i8 undef, i8 -1>, %y
+  call void @use3i8(<3 x i8> %t0)
+  %t1 = lshr <3 x i8> %t0, %y
+  %t2 = and <3 x i8> %t1, %x
+  %ret = icmp eq <3 x i8> %t2, %x
+  ret <3 x i1> %ret
+}
+
+; ============================================================================ ;
+; Commutativity tests.
+; ============================================================================ ;
+
+declare i8 @gen8()
+
+define i1 @c0(i8 %y) {
+; CHECK-LABEL: @c0(
+; CHECK-NEXT:    [[T0:%.*]] = shl i8 -1, [[Y:%.*]]
+; CHECK-NEXT:    call void @use8(i8 [[T0]])
+; CHECK-NEXT:    [[T1:%.*]] = lshr i8 [[T0]], [[Y]]
+; CHECK-NEXT:    [[X:%.*]] = call i8 @gen8()
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ule i8 [[X]], [[T1]]
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %t0 = shl i8 -1, %y
+  call void @use8(i8 %t0)
+  %t1 = lshr i8 %t0, %y
+  %x = call i8 @gen8()
+  %t2 = and i8 %x, %t1 ; swapped order
+  %ret = icmp eq i8 %t2, %x
+  ret i1 %ret
+}
+
+define i1 @c1(i8 %y) {
+; CHECK-LABEL: @c1(
+; CHECK-NEXT:    [[T0:%.*]] = shl i8 -1, [[Y:%.*]]
+; CHECK-NEXT:    call void @use8(i8 [[T0]])
+; CHECK-NEXT:    [[T1:%.*]] = lshr i8 [[T0]], [[Y]]
+; CHECK-NEXT:    [[X:%.*]] = call i8 @gen8()
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ule i8 [[X]], [[T1]]
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %t0 = shl i8 -1, %y
+  call void @use8(i8 %t0)
+  %t1 = lshr i8 %t0, %y
+  %x = call i8 @gen8()
+  %t2 = and i8 %t1, %x
+  %ret = icmp eq i8 %x, %t2 ; swapped order
+  ret i1 %ret
+}
+
+define i1 @c2(i8 %y) {
+; CHECK-LABEL: @c2(
+; CHECK-NEXT:    [[T0:%.*]] = shl i8 -1, [[Y:%.*]]
+; CHECK-NEXT:    call void @use8(i8 [[T0]])
+; CHECK-NEXT:    [[T1:%.*]] = lshr i8 [[T0]], [[Y]]
+; CHECK-NEXT:    [[X:%.*]] = call i8 @gen8()
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ule i8 [[X]], [[T1]]
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %t0 = shl i8 -1, %y
+  call void @use8(i8 %t0)
+  %t1 = lshr i8 %t0, %y
+  %x = call i8 @gen8()
+  %t2 = and i8 %x, %t1 ; swapped order
+  %ret = icmp eq i8 %x, %t2 ; swapped order
+  ret i1 %ret
+}
+
+; ============================================================================ ;
+; One-use tests. We don't care about multi-uses here.
+; ============================================================================ ;
+
+define i1 @oneuse0(i8 %x, i8 %y) {
+; CHECK-LABEL: @oneuse0(
+; CHECK-NEXT:    [[T0:%.*]] = shl i8 -1, [[Y:%.*]]
+; CHECK-NEXT:    call void @use8(i8 [[T0]])
+; CHECK-NEXT:    [[T1:%.*]] = lshr i8 [[T0]], [[Y]]
+; CHECK-NEXT:    call void @use8(i8 [[T1]])
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp uge i8 [[T1]], [[X:%.*]]
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %t0 = shl i8 -1, %y
+  call void @use8(i8 %t0) ; needed anyway
+  %t1 = lshr i8 %t0, %y
+  call void @use8(i8 %t1)
+  %t2 = and i8 %t1, %x
+  %ret = icmp eq i8 %t2, %x
+  ret i1 %ret
+}
+
+define i1 @oneuse1(i8 %x, i8 %y) {
+; CHECK-LABEL: @oneuse1(
+; CHECK-NEXT:    [[T0:%.*]] = shl i8 -1, [[Y:%.*]]
+; CHECK-NEXT:    call void @use8(i8 [[T0]])
+; CHECK-NEXT:    [[T1:%.*]] = lshr i8 [[T0]], [[Y]]
+; CHECK-NEXT:    [[T2:%.*]] = and i8 [[T1]], [[X:%.*]]
+; CHECK-NEXT:    call void @use8(i8 [[T2]])
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp uge i8 [[T1]], [[X]]
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %t0 = shl i8 -1, %y
+  call void @use8(i8 %t0) ; needed anyway
+  %t1 = lshr i8 %t0, %y
+  %t2 = and i8 %t1, %x
+  call void @use8(i8 %t2)
+  %ret = icmp eq i8 %t2, %x
+  ret i1 %ret
+}
+
+define i1 @oneuse2(i8 %x, i8 %y) {
+; CHECK-LABEL: @oneuse2(
+; CHECK-NEXT:    [[T0:%.*]] = shl i8 -1, [[Y:%.*]]
+; CHECK-NEXT:    call void @use8(i8 [[T0]])
+; CHECK-NEXT:    [[T1:%.*]] = lshr i8 [[T0]], [[Y]]
+; CHECK-NEXT:    call void @use8(i8 [[T1]])
+; CHECK-NEXT:    [[T2:%.*]] = and i8 [[T1]], [[X:%.*]]
+; CHECK-NEXT:    call void @use8(i8 [[T2]])
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp uge i8 [[T1]], [[X]]
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %t0 = shl i8 -1, %y
+  call void @use8(i8 %t0)
+  %t1 = lshr i8 %t0, %y
+  call void @use8(i8 %t1)
+  %t2 = and i8 %t1, %x
+  call void @use8(i8 %t2)
+  %ret = icmp eq i8 %t2, %x
+  ret i1 %ret
+}
+
+; ============================================================================ ;
+; Negative tests
+; ============================================================================ ;
+
+define i1 @n0(i8 %x, i8 %y, i8 %notx) {
+; CHECK-LABEL: @n0(
+; CHECK-NEXT:    [[T0:%.*]] = shl i8 -1, [[Y:%.*]]
+; CHECK-NEXT:    call void @use8(i8 [[T0]])
+; CHECK-NEXT:    [[T1:%.*]] = lshr i8 [[T0]], [[Y]]
+; CHECK-NEXT:    [[T2:%.*]] = and i8 [[T1]], [[X:%.*]]
+; CHECK-NEXT:    [[RET:%.*]] = icmp eq i8 [[T2]], [[NOTX:%.*]]
+; CHECK-NEXT:    ret i1 [[RET]]
+;
+  %t0 = shl i8 -1, %y
+  call void @use8(i8 %t0)
+  %t1 = lshr i8 %t0, %y
+  %t2 = and i8 %t1, %x
+  %ret = icmp eq i8 %t2, %notx ; not %x
+  ret i1 %ret
+}
+
+define i1 @n1(i8 %x, i8 %y) {
+; CHECK-LABEL: @n1(
+; CHECK-NEXT:    [[T0:%.*]] = shl i8 1, [[Y:%.*]]
+; CHECK-NEXT:    call void @use8(i8 [[T0]])
+; CHECK-NEXT:    [[T1:%.*]] = lshr i8 [[T0]], [[Y]]
+; CHECK-NEXT:    [[T2:%.*]] = and i8 [[T1]], [[X:%.*]]
+; CHECK-NEXT:    [[RET:%.*]] = icmp eq i8 [[T2]], [[X]]
+; CHECK-NEXT:    ret i1 [[RET]]
+;
+  %t0 = shl i8 1, %y ; not -1
+  call void @use8(i8 %t0)
+  %t1 = lshr i8 %t0, %y
+  %t2 = and i8 %t1, %x
+  %ret = icmp eq i8 %t2, %x
+  ret i1 %ret
+}
+
+define i1 @n2(i8 %x, i8 %y1, i8 %y2) {
+; CHECK-LABEL: @n2(
+; CHECK-NEXT:    [[T0:%.*]] = shl i8 -1, [[Y1:%.*]]
+; CHECK-NEXT:    call void @use8(i8 [[T0]])
+; CHECK-NEXT:    [[T1:%.*]] = lshr i8 [[T0]], [[Y2:%.*]]
+; CHECK-NEXT:    [[T2:%.*]] = and i8 [[T1]], [[X:%.*]]
+; CHECK-NEXT:    [[RET:%.*]] = icmp eq i8 [[T2]], [[X]]
+; CHECK-NEXT:    ret i1 [[RET]]
+;
+  %t0 = shl i8 -1, %y1 ; not %y2
+  call void @use8(i8 %t0)
+  %t1 = lshr i8 %t0, %y2 ; not %y1
+  %t2 = and i8 %t1, %x
+  %ret = icmp eq i8 %t2, %x
+  ret i1 %ret
+}

Added: llvm/trunk/test/Transforms/InstCombine/canonicalize-low-bit-mask-v4-and-icmp-ne-to-icmp-ugt.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/InstCombine/canonicalize-low-bit-mask-v4-and-icmp-ne-to-icmp-ugt.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/InstCombine/canonicalize-low-bit-mask-v4-and-icmp-ne-to-icmp-ugt.ll (added)
+++ llvm/trunk/test/Transforms/InstCombine/canonicalize-low-bit-mask-v4-and-icmp-ne-to-icmp-ugt.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,248 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+; https://bugs.llvm.org/show_bug.cgi?id=38123
+
+; Pattern:
+;   x & ((-1 << y) >> y) != x
+; Should be transformed into:
+;   x u> ((-1 << y) >> y)
+
+; This pattern is uncanonical, but we can not canonicalize it due to extra uses.
+
+declare void @use8(i8)
+declare void @use2i8(<2 x i8>)
+declare void @use3i8(<3 x i8>)
+
+; ============================================================================ ;
+; Basic positive tests
+; ============================================================================ ;
+
+define i1 @p0(i8 %x, i8 %y) {
+; CHECK-LABEL: @p0(
+; CHECK-NEXT:    [[T0:%.*]] = shl i8 -1, [[Y:%.*]]
+; CHECK-NEXT:    call void @use8(i8 [[T0]])
+; CHECK-NEXT:    [[T1:%.*]] = lshr i8 [[T0]], [[Y]]
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ult i8 [[T1]], [[X:%.*]]
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %t0 = shl i8 -1, %y
+  call void @use8(i8 %t0)
+  %t1 = lshr i8 %t0, %y
+  %t2 = and i8 %t1, %x
+  %ret = icmp ne i8 %t2, %x
+  ret i1 %ret
+}
+
+; ============================================================================ ;
+; Vector tests
+; ============================================================================ ;
+
+define <2 x i1> @p1_vec(<2 x i8> %x, <2 x i8> %y) {
+; CHECK-LABEL: @p1_vec(
+; CHECK-NEXT:    [[T0:%.*]] = shl <2 x i8> <i8 -1, i8 -1>, [[Y:%.*]]
+; CHECK-NEXT:    call void @use2i8(<2 x i8> [[T0]])
+; CHECK-NEXT:    [[T1:%.*]] = lshr <2 x i8> [[T0]], [[Y]]
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ult <2 x i8> [[T1]], [[X:%.*]]
+; CHECK-NEXT:    ret <2 x i1> [[TMP1]]
+;
+  %t0 = shl <2 x i8> <i8 -1, i8 -1>, %y
+  call void @use2i8(<2 x i8> %t0)
+  %t1 = lshr <2 x i8> %t0, %y
+  %t2 = and <2 x i8> %t1, %x
+  %ret = icmp ne <2 x i8> %t2, %x
+  ret <2 x i1> %ret
+}
+
+define <3 x i1> @p2_vec_undef0(<3 x i8> %x, <3 x i8> %y) {
+; CHECK-LABEL: @p2_vec_undef0(
+; CHECK-NEXT:    [[T0:%.*]] = shl <3 x i8> <i8 -1, i8 undef, i8 -1>, [[Y:%.*]]
+; CHECK-NEXT:    call void @use3i8(<3 x i8> [[T0]])
+; CHECK-NEXT:    [[T1:%.*]] = lshr <3 x i8> [[T0]], [[Y]]
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ult <3 x i8> [[T1]], [[X:%.*]]
+; CHECK-NEXT:    ret <3 x i1> [[TMP1]]
+;
+  %t0 = shl <3 x i8> <i8 -1, i8 undef, i8 -1>, %y
+  call void @use3i8(<3 x i8> %t0)
+  %t1 = lshr <3 x i8> %t0, %y
+  %t2 = and <3 x i8> %t1, %x
+  %ret = icmp ne <3 x i8> %t2, %x
+  ret <3 x i1> %ret
+}
+
+; ============================================================================ ;
+; Commutativity tests.
+; ============================================================================ ;
+
+declare i8 @gen8()
+
+define i1 @c0(i8 %y) {
+; CHECK-LABEL: @c0(
+; CHECK-NEXT:    [[T0:%.*]] = shl i8 -1, [[Y:%.*]]
+; CHECK-NEXT:    call void @use8(i8 [[T0]])
+; CHECK-NEXT:    [[T1:%.*]] = lshr i8 [[T0]], [[Y]]
+; CHECK-NEXT:    [[X:%.*]] = call i8 @gen8()
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ugt i8 [[X]], [[T1]]
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %t0 = shl i8 -1, %y
+  call void @use8(i8 %t0)
+  %t1 = lshr i8 %t0, %y
+  %x = call i8 @gen8()
+  %t2 = and i8 %x, %t1 ; swapped order
+  %ret = icmp ne i8 %t2, %x
+  ret i1 %ret
+}
+
+define i1 @c1(i8 %y) {
+; CHECK-LABEL: @c1(
+; CHECK-NEXT:    [[T0:%.*]] = shl i8 -1, [[Y:%.*]]
+; CHECK-NEXT:    call void @use8(i8 [[T0]])
+; CHECK-NEXT:    [[T1:%.*]] = lshr i8 [[T0]], [[Y]]
+; CHECK-NEXT:    [[X:%.*]] = call i8 @gen8()
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ugt i8 [[X]], [[T1]]
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %t0 = shl i8 -1, %y
+  call void @use8(i8 %t0)
+  %t1 = lshr i8 %t0, %y
+  %x = call i8 @gen8()
+  %t2 = and i8 %t1, %x
+  %ret = icmp ne i8 %x, %t2 ; swapped order
+  ret i1 %ret
+}
+
+define i1 @c2(i8 %y) {
+; CHECK-LABEL: @c2(
+; CHECK-NEXT:    [[T0:%.*]] = shl i8 -1, [[Y:%.*]]
+; CHECK-NEXT:    call void @use8(i8 [[T0]])
+; CHECK-NEXT:    [[T1:%.*]] = lshr i8 [[T0]], [[Y]]
+; CHECK-NEXT:    [[X:%.*]] = call i8 @gen8()
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ugt i8 [[X]], [[T1]]
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %t0 = shl i8 -1, %y
+  call void @use8(i8 %t0)
+  %t1 = lshr i8 %t0, %y
+  %x = call i8 @gen8()
+  %t2 = and i8 %x, %t1 ; swapped order
+  %ret = icmp ne i8 %x, %t2 ; swapped order
+  ret i1 %ret
+}
+
+; ============================================================================ ;
+; One-use tests. We don't care about multi-uses here.
+; ============================================================================ ;
+
+define i1 @oneuse0(i8 %x, i8 %y) {
+; CHECK-LABEL: @oneuse0(
+; CHECK-NEXT:    [[T0:%.*]] = shl i8 -1, [[Y:%.*]]
+; CHECK-NEXT:    call void @use8(i8 [[T0]])
+; CHECK-NEXT:    [[T1:%.*]] = lshr i8 [[T0]], [[Y]]
+; CHECK-NEXT:    call void @use8(i8 [[T1]])
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ult i8 [[T1]], [[X:%.*]]
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %t0 = shl i8 -1, %y
+  call void @use8(i8 %t0) ; needed anyway
+  %t1 = lshr i8 %t0, %y
+  call void @use8(i8 %t1)
+  %t2 = and i8 %t1, %x
+  %ret = icmp ne i8 %t2, %x
+  ret i1 %ret
+}
+
+define i1 @oneuse1(i8 %x, i8 %y) {
+; CHECK-LABEL: @oneuse1(
+; CHECK-NEXT:    [[T0:%.*]] = shl i8 -1, [[Y:%.*]]
+; CHECK-NEXT:    call void @use8(i8 [[T0]])
+; CHECK-NEXT:    [[T1:%.*]] = lshr i8 [[T0]], [[Y]]
+; CHECK-NEXT:    [[T2:%.*]] = and i8 [[T1]], [[X:%.*]]
+; CHECK-NEXT:    call void @use8(i8 [[T2]])
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ult i8 [[T1]], [[X]]
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %t0 = shl i8 -1, %y
+  call void @use8(i8 %t0) ; needed anyway
+  %t1 = lshr i8 %t0, %y
+  %t2 = and i8 %t1, %x
+  call void @use8(i8 %t2)
+  %ret = icmp ne i8 %t2, %x
+  ret i1 %ret
+}
+
+define i1 @oneuse2(i8 %x, i8 %y) {
+; CHECK-LABEL: @oneuse2(
+; CHECK-NEXT:    [[T0:%.*]] = shl i8 -1, [[Y:%.*]]
+; CHECK-NEXT:    call void @use8(i8 [[T0]])
+; CHECK-NEXT:    [[T1:%.*]] = lshr i8 [[T0]], [[Y]]
+; CHECK-NEXT:    call void @use8(i8 [[T1]])
+; CHECK-NEXT:    [[T2:%.*]] = and i8 [[T1]], [[X:%.*]]
+; CHECK-NEXT:    call void @use8(i8 [[T2]])
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ult i8 [[T1]], [[X]]
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %t0 = shl i8 -1, %y
+  call void @use8(i8 %t0)
+  %t1 = lshr i8 %t0, %y
+  call void @use8(i8 %t1)
+  %t2 = and i8 %t1, %x
+  call void @use8(i8 %t2)
+  %ret = icmp ne i8 %t2, %x
+  ret i1 %ret
+}
+
+; ============================================================================ ;
+; Negative tests
+; ============================================================================ ;
+
+define i1 @n0(i8 %x, i8 %y, i8 %notx) {
+; CHECK-LABEL: @n0(
+; CHECK-NEXT:    [[T0:%.*]] = shl i8 -1, [[Y:%.*]]
+; CHECK-NEXT:    call void @use8(i8 [[T0]])
+; CHECK-NEXT:    [[T1:%.*]] = lshr i8 [[T0]], [[Y]]
+; CHECK-NEXT:    [[T2:%.*]] = and i8 [[T1]], [[X:%.*]]
+; CHECK-NEXT:    [[RET:%.*]] = icmp ne i8 [[T2]], [[NOTX:%.*]]
+; CHECK-NEXT:    ret i1 [[RET]]
+;
+  %t0 = shl i8 -1, %y
+  call void @use8(i8 %t0)
+  %t1 = lshr i8 %t0, %y
+  %t2 = and i8 %t1, %x
+  %ret = icmp ne i8 %t2, %notx ; not %x
+  ret i1 %ret
+}
+
+define i1 @n1(i8 %x, i8 %y) {
+; CHECK-LABEL: @n1(
+; CHECK-NEXT:    [[T0:%.*]] = shl i8 1, [[Y:%.*]]
+; CHECK-NEXT:    call void @use8(i8 [[T0]])
+; CHECK-NEXT:    [[T1:%.*]] = lshr i8 [[T0]], [[Y]]
+; CHECK-NEXT:    [[T2:%.*]] = and i8 [[T1]], [[X:%.*]]
+; CHECK-NEXT:    [[RET:%.*]] = icmp ne i8 [[T2]], [[X]]
+; CHECK-NEXT:    ret i1 [[RET]]
+;
+  %t0 = shl i8 1, %y ; not -1
+  call void @use8(i8 %t0)
+  %t1 = lshr i8 %t0, %y
+  %t2 = and i8 %t1, %x
+  %ret = icmp ne i8 %t2, %x
+  ret i1 %ret
+}
+
+define i1 @n2(i8 %x, i8 %y1, i8 %y2) {
+; CHECK-LABEL: @n2(
+; CHECK-NEXT:    [[T0:%.*]] = shl i8 -1, [[Y1:%.*]]
+; CHECK-NEXT:    call void @use8(i8 [[T0]])
+; CHECK-NEXT:    [[T1:%.*]] = lshr i8 [[T0]], [[Y2:%.*]]
+; CHECK-NEXT:    [[T2:%.*]] = and i8 [[T1]], [[X:%.*]]
+; CHECK-NEXT:    [[RET:%.*]] = icmp ne i8 [[T2]], [[X]]
+; CHECK-NEXT:    ret i1 [[RET]]
+;
+  %t0 = shl i8 -1, %y1 ; not %y2
+  call void @use8(i8 %t0)
+  %t1 = lshr i8 %t0, %y2 ; not %y1
+  %t2 = and i8 %t1, %x
+  %ret = icmp ne i8 %t2, %x
+  ret i1 %ret
+}

Added: llvm/trunk/test/Transforms/InstCombine/canonicalize-lshr-shl-to-masking.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/InstCombine/canonicalize-lshr-shl-to-masking.ll?rev=358552&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/InstCombine/canonicalize-lshr-shl-to-masking.ll (added)
+++ llvm/trunk/test/Transforms/InstCombine/canonicalize-lshr-shl-to-masking.ll Tue Apr 16 21:52:47 2019
@@ -0,0 +1,359 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+; https://bugs.llvm.org/show_bug.cgi?id=37603
+; https://reviews.llvm.org/D46760#1123713
+
+; Pattern:
+;   x >> y << y
+; Should be transformed into:
+;   x & (-1 << y)
+
+; ============================================================================ ;
+; Basic positive tests
+; ============================================================================ ;
+
+define i32 @positive_samevar(i32 %x, i32 %y) {
+; CHECK-LABEL: @positive_samevar(
+; CHECK-NEXT:    [[TMP1:%.*]] = shl i32 -1, [[Y:%.*]]
+; CHECK-NEXT:    [[RET:%.*]] = and i32 [[TMP1]], [[X:%.*]]
+; CHECK-NEXT:    ret i32 [[RET]]
+;
+  %tmp0 = lshr i32 %x, %y
+  %ret = shl i32 %tmp0, %y
+  ret i32 %ret
+}
+
+define i32 @positive_sameconst(i32 %x) {
+; CHECK-LABEL: @positive_sameconst(
+; CHECK-NEXT:    [[TMP0:%.*]] = and i32 [[X:%.*]], -32
+; CHECK-NEXT:    ret i32 [[TMP0]]
+;
+  %tmp0 = lshr i32 %x, 5
+  %ret = shl i32 %tmp0, 5
+  ret i32 %ret
+}
+
+define i32 @positive_biggerlshr(i32 %x) {
+; CHECK-LABEL: @positive_biggerlshr(
+; CHECK-NEXT:    [[TMP0:%.*]] = lshr i32 [[X:%.*]], 10
+; CHECK-NEXT:    [[RET:%.*]] = shl nuw nsw i32 [[TMP0]], 5
+; CHECK-NEXT:    ret i32 [[RET]]
+;
+  %tmp0 = lshr i32 %x, 10
+  %ret = shl i32 %tmp0, 5
+  ret i32 %ret
+}
+
+define i32 @positive_biggershl(i32 %x) {
+; CHECK-LABEL: @positive_biggershl(
+; CHECK-NEXT:    [[TMP0:%.*]] = lshr i32 [[X:%.*]], 5
+; CHECK-NEXT:    [[RET:%.*]] = shl i32 [[TMP0]], 10
+; CHECK-NEXT:    ret i32 [[RET]]
+;
+  %tmp0 = lshr i32 %x, 5
+  %ret = shl i32 %tmp0, 10
+  ret i32 %ret
+}
+
+; ============================================================================ ;
+; EXACT on the first shift
+; ============================================================================ ;
+
+define i32 @positive_samevar_lshrexact(i32 %x, i32 %y) {
+; CHECK-LABEL: @positive_samevar_lshrexact(
+; CHECK-NEXT:    ret i32 [[X:%.*]]
+;
+  %tmp0 = lshr exact i32 %x, %y
+  %ret = shl i32 %tmp0, %y ; this one is obviously 'nuw'.
+  ret i32 %ret
+}
+
+define i32 @positive_sameconst_lshrexact(i32 %x) {
+; CHECK-LABEL: @positive_sameconst_lshrexact(
+; CHECK-NEXT:    ret i32 [[X:%.*]]
+;
+  %tmp0 = lshr exact i32 %x, 5
+  %ret = shl i32 %tmp0, 5 ; this one is obviously 'nuw'.
+  ret i32 %ret
+}
+
+define i32 @positive_biggerlshr_lshrexact(i32 %x) {
+; CHECK-LABEL: @positive_biggerlshr_lshrexact(
+; CHECK-NEXT:    [[RET:%.*]] = lshr exact i32 [[X:%.*]], 5
+; CHECK-NEXT:    ret i32 [[RET]]
+;
+  %tmp0 = lshr exact i32 %x, 10
+  %ret = shl i32 %tmp0, 5 ; this one is obviously 'nuw'.
+  ret i32 %ret
+}
+
+define i32 @positive_biggershl_lshrexact(i32 %x) {
+; CHECK-LABEL: @positive_biggershl_lshrexact(
+; CHECK-NEXT:    [[RET:%.*]] = shl i32 [[X:%.*]], 5
+; CHECK-NEXT:    ret i32 [[RET]]
+;
+  %tmp0 = lshr exact i32 %x, 5
+  %ret = shl i32 %tmp0, 10
+  ret i32 %ret
+}
+
+define i32 @positive_biggershl_lshrexact_shlnuw(i32 %x) {
+; CHECK-LABEL: @positive_biggershl_lshrexact_shlnuw(
+; CHECK-NEXT:    [[RET:%.*]] = shl nuw i32 [[X:%.*]], 5
+; CHECK-NEXT:    ret i32 [[RET]]
+;
+  %tmp0 = lshr exact i32 %x, 5
+  %ret = shl nuw i32 %tmp0, 10
+  ret i32 %ret
+}
+
+; ============================================================================ ;
+; Vector
+; ============================================================================ ;
+
+define <2 x i32> @positive_samevar_vec(<2 x i32> %x, <2 x i32> %y) {
+; CHECK-LABEL: @positive_samevar_vec(
+; CHECK-NEXT:    [[TMP1:%.*]] = shl <2 x i32> <i32 -1, i32 -1>, [[Y:%.*]]
+; CHECK-NEXT:    [[RET:%.*]] = and <2 x i32> [[TMP1]], [[X:%.*]]
+; CHECK-NEXT:    ret <2 x i32> [[RET]]
+;
+  %tmp0 = lshr <2 x i32> %x, %y
+  %ret = shl <2 x i32> %tmp0, %y
+  ret <2 x i32> %ret
+}
+
+; ============================================================================ ;
+; Constant Vectors
+; ============================================================================ ;
+
+define <2 x i32> @positive_sameconst_vec(<2 x i32> %x) {
+; CHECK-LABEL: @positive_sameconst_vec(
+; CHECK-NEXT:    [[TMP0:%.*]] = and <2 x i32> [[X:%.*]], <i32 -32, i32 -32>
+; CHECK-NEXT:    ret <2 x i32> [[TMP0]]
+;
+  %tmp0 = lshr <2 x i32> %x, <i32 5, i32 5>
+  %ret = shl <2 x i32> %tmp0, <i32 5, i32 5>
+  ret <2 x i32> %ret
+}
+
+define <3 x i32> @positive_sameconst_vec_undef0(<3 x i32> %x) {
+; CHECK-LABEL: @positive_sameconst_vec_undef0(
+; CHECK-NEXT:    [[TMP0:%.*]] = lshr <3 x i32> [[X:%.*]], <i32 5, i32 undef, i32 5>
+; CHECK-NEXT:    [[RET:%.*]] = shl <3 x i32> [[TMP0]], <i32 5, i32 5, i32 5>
+; CHECK-NEXT:    ret <3 x i32> [[RET]]
+;
+  %tmp0 = lshr <3 x i32> %x, <i32 5, i32 undef, i32 5>
+  %ret = shl <3 x i32> %tmp0, <i32 5, i32 5, i32 5>
+  ret <3 x i32> %ret
+}
+
+define <3 x i32> @positive_sameconst_vec_undef1(<3 x i32> %x) {
+; CHECK-LABEL: @positive_sameconst_vec_undef1(
+; CHECK-NEXT:    [[TMP0:%.*]] = lshr <3 x i32> [[X:%.*]], <i32 5, i32 5, i32 5>
+; CHECK-NEXT:    [[RET:%.*]] = shl <3 x i32> [[TMP0]], <i32 5, i32 undef, i32 5>
+; CHECK-NEXT:    ret <3 x i32> [[RET]]
+;
+  %tmp0 = lshr <3 x i32> %x, <i32 5, i32 5, i32 5>
+  %ret = shl <3 x i32> %tmp0, <i32 5, i32 undef, i32 5>
+  ret <3 x i32> %ret
+}
+
+define <3 x i32> @positive_sameconst_vec_undef2(<3 x i32> %x) {
+; CHECK-LABEL: @positive_sameconst_vec_undef2(
+; CHECK-NEXT:    [[RET:%.*]] = and <3 x i32> [[X:%.*]], <i32 -32, i32 undef, i32 -32>
+; CHECK-NEXT:    ret <3 x i32> [[RET]]
+;
+  %tmp0 = lshr <3 x i32> %x, <i32 5, i32 undef, i32 5>
+  %ret = shl <3 x i32> %tmp0, <i32 5, i32 undef, i32 5>
+  ret <3 x i32> %ret
+}
+
+define <2 x i32> @positive_biggerlshr_vec(<2 x i32> %x) {
+; CHECK-LABEL: @positive_biggerlshr_vec(
+; CHECK-NEXT:    [[TMP0:%.*]] = lshr <2 x i32> [[X:%.*]], <i32 10, i32 10>
+; CHECK-NEXT:    [[RET:%.*]] = shl nuw nsw <2 x i32> [[TMP0]], <i32 5, i32 5>
+; CHECK-NEXT:    ret <2 x i32> [[RET]]
+;
+  %tmp0 = lshr <2 x i32> %x, <i32 10, i32 10>
+  %ret = shl <2 x i32> %tmp0, <i32 5, i32 5>
+  ret <2 x i32> %ret
+}
+
+define <3 x i32> @positive_biggerlshr_vec_undef0(<3 x i32> %x) {
+; CHECK-LABEL: @positive_biggerlshr_vec_undef0(
+; CHECK-NEXT:    [[TMP0:%.*]] = lshr <3 x i32> [[X:%.*]], <i32 10, i32 undef, i32 10>
+; CHECK-NEXT:    [[RET:%.*]] = shl <3 x i32> [[TMP0]], <i32 5, i32 5, i32 5>
+; CHECK-NEXT:    ret <3 x i32> [[RET]]
+;
+  %tmp0 = lshr <3 x i32> %x, <i32 10, i32 undef, i32 10>
+  %ret = shl <3 x i32> %tmp0, <i32 5, i32 5, i32 5>
+  ret <3 x i32> %ret
+}
+
+define <3 x i32> @positive_biggerlshr_vec_undef1(<3 x i32> %x) {
+; CHECK-LABEL: @positive_biggerlshr_vec_undef1(
+; CHECK-NEXT:    [[TMP0:%.*]] = lshr <3 x i32> [[X:%.*]], <i32 10, i32 10, i32 10>
+; CHECK-NEXT:    [[RET:%.*]] = shl <3 x i32> [[TMP0]], <i32 5, i32 undef, i32 5>
+; CHECK-NEXT:    ret <3 x i32> [[RET]]
+;
+  %tmp0 = lshr <3 x i32> %x, <i32 10, i32 10, i32 10>
+  %ret = shl <3 x i32> %tmp0, <i32 5, i32 undef, i32 5>
+  ret <3 x i32> %ret
+}
+
+define <3 x i32> @positive_biggerlshr_vec_undef2(<3 x i32> %x) {
+; CHECK-LABEL: @positive_biggerlshr_vec_undef2(
+; CHECK-NEXT:    [[TMP0:%.*]] = lshr <3 x i32> [[X:%.*]], <i32 10, i32 undef, i32 10>
+; CHECK-NEXT:    [[RET:%.*]] = shl <3 x i32> [[TMP0]], <i32 5, i32 undef, i32 5>
+; CHECK-NEXT:    ret <3 x i32> [[RET]]
+;
+  %tmp0 = lshr <3 x i32> %x, <i32 10, i32 undef, i32 10>
+  %ret = shl <3 x i32> %tmp0, <i32 5, i32 undef, i32 5>
+  ret <3 x i32> %ret
+}
+
+define <2 x i32> @positive_biggershl_vec(<2 x i32> %x) {
+; CHECK-LABEL: @positive_biggershl_vec(
+; CHECK-NEXT:    [[TMP0:%.*]] = lshr <2 x i32> [[X:%.*]], <i32 5, i32 5>
+; CHECK-NEXT:    [[RET:%.*]] = shl <2 x i32> [[TMP0]], <i32 10, i32 10>
+; CHECK-NEXT:    ret <2 x i32> [[RET]]
+;
+  %tmp0 = lshr <2 x i32> %x, <i32 5, i32 5>
+  %ret = shl <2 x i32> %tmp0, <i32 10, i32 10>
+  ret <2 x i32> %ret
+}
+
+define <3 x i32> @positive_biggershl_vec_undef0(<3 x i32> %x) {
+; CHECK-LABEL: @positive_biggershl_vec_undef0(
+; CHECK-NEXT:    [[TMP0:%.*]] = lshr <3 x i32> [[X:%.*]], <i32 5, i32 undef, i32 5>
+; CHECK-NEXT:    [[RET:%.*]] = shl <3 x i32> [[TMP0]], <i32 10, i32 10, i32 10>
+; CHECK-NEXT:    ret <3 x i32> [[RET]]
+;
+  %tmp0 = lshr <3 x i32> %x, <i32 5, i32 undef, i32 5>
+  %ret = shl <3 x i32> %tmp0, <i32 10, i32 10, i32 10>
+  ret <3 x i32> %ret
+}
+
+define <3 x i32> @positive_biggershl_vec_undef1(<3 x i32> %x) {
+; CHECK-LABEL: @positive_biggershl_vec_undef1(
+; CHECK-NEXT:    [[TMP0:%.*]] = lshr <3 x i32> [[X:%.*]], <i32 5, i32 5, i32 5>
+; CHECK-NEXT:    [[RET:%.*]] = shl <3 x i32> [[TMP0]], <i32 10, i32 undef, i32 10>
+; CHECK-NEXT:    ret <3 x i32> [[RET]]
+;
+  %tmp0 = lshr <3 x i32> %x, <i32 5, i32 5, i32 5>
+  %ret = shl <3 x i32> %tmp0, <i32 10, i32 undef, i32 10>
+  ret <3 x i32> %ret
+}
+
+define <3 x i32> @positive_biggershl_vec_undef2(<3 x i32> %x) {
+; CHECK-LABEL: @positive_biggershl_vec_undef2(
+; CHECK-NEXT:    [[TMP0:%.*]] = lshr <3 x i32> [[X:%.*]], <i32 5, i32 undef, i32 5>
+; CHECK-NEXT:    [[RET:%.*]] = shl <3 x i32> [[TMP0]], <i32 10, i32 undef, i32 10>
+; CHECK-NEXT:    ret <3 x i32> [[RET]]
+;
+  %tmp0 = lshr <3 x i32> %x, <i32 5, i32 undef, i32 5>
+  %ret = shl <3 x i32> %tmp0, <i32 10, i32 undef, i32 10>
+  ret <3 x i32> %ret
+}
+
+; ============================================================================ ;
+; Positive multi-use tests with constant
+; ============================================================================ ;
+
+; FIXME: drop 'exact' once it is no longer needed.
+
+define i32 @positive_sameconst_multiuse(i32 %x) {
+; CHECK-LABEL: @positive_sameconst_multiuse(
+; CHECK-NEXT:    [[TMP0:%.*]] = lshr exact i32 [[X:%.*]], 5
+; CHECK-NEXT:    call void @use32(i32 [[TMP0]])
+; CHECK-NEXT:    ret i32 [[X]]
+;
+  %tmp0 = lshr exact i32 %x, 5
+  call void @use32(i32 %tmp0)
+  %ret = shl i32 %tmp0, 5
+  ret i32 %ret
+}
+
+define i32 @positive_biggerlshr_multiuse(i32 %x) {
+; CHECK-LABEL: @positive_biggerlshr_multiuse(
+; CHECK-NEXT:    [[TMP0:%.*]] = lshr exact i32 [[X:%.*]], 10
+; CHECK-NEXT:    call void @use32(i32 [[TMP0]])
+; CHECK-NEXT:    [[RET:%.*]] = lshr exact i32 [[X]], 5
+; CHECK-NEXT:    ret i32 [[RET]]
+;
+  %tmp0 = lshr exact i32 %x, 10
+  call void @use32(i32 %tmp0)
+  %ret = shl i32 %tmp0, 5
+  ret i32 %ret
+}
+
+define i32 @positive_biggershl_multiuse(i32 %x) {
+; CHECK-LABEL: @positive_biggershl_multiuse(
+; CHECK-NEXT:    [[TMP0:%.*]] = lshr exact i32 [[X:%.*]], 5
+; CHECK-NEXT:    call void @use32(i32 [[TMP0]])
+; CHECK-NEXT:    [[RET:%.*]] = shl i32 [[X]], 5
+; CHECK-NEXT:    ret i32 [[RET]]
+;
+  %tmp0 = lshr exact i32 %x, 5
+  call void @use32(i32 %tmp0)
+  %ret = shl i32 %tmp0, 10
+  ret i32 %ret
+}
+
+; ============================================================================ ;
+; Constant Non-Splat Vectors
+; ============================================================================ ;
+
+define <2 x i32> @positive_biggerlshr_vec_nonsplat(<2 x i32> %x) {
+; CHECK-LABEL: @positive_biggerlshr_vec_nonsplat(
+; CHECK-NEXT:    [[TMP0:%.*]] = lshr <2 x i32> [[X:%.*]], <i32 5, i32 5>
+; CHECK-NEXT:    [[RET:%.*]] = shl <2 x i32> [[TMP0]], <i32 5, i32 10>
+; CHECK-NEXT:    ret <2 x i32> [[RET]]
+;
+  %tmp0 = lshr <2 x i32> %x, <i32 5, i32 5>
+  %ret = shl <2 x i32> %tmp0, <i32 5, i32 10>
+  ret <2 x i32> %ret
+}
+
+define <2 x i32> @positive_biggerLlshr_vec_nonsplat(<2 x i32> %x) {
+; CHECK-LABEL: @positive_biggerLlshr_vec_nonsplat(
+; CHECK-NEXT:    [[TMP0:%.*]] = lshr <2 x i32> [[X:%.*]], <i32 5, i32 10>
+; CHECK-NEXT:    [[RET:%.*]] = shl <2 x i32> [[TMP0]], <i32 5, i32 5>
+; CHECK-NEXT:    ret <2 x i32> [[RET]]
+;
+  %tmp0 = lshr <2 x i32> %x, <i32 5, i32 10>
+  %ret = shl <2 x i32> %tmp0, <i32 5, i32 5>
+  ret <2 x i32> %ret
+}
+
+; ============================================================================ ;
+; Negative tests. Should not be folded.
+; ============================================================================ ;
+
+define i32 @negative_twovars(i32 %x, i32 %y, i32 %z) {
+; CHECK-LABEL: @negative_twovars(
+; CHECK-NEXT:    [[TMP0:%.*]] = lshr i32 [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    [[RET:%.*]] = shl i32 [[TMP0]], [[Z:%.*]]
+; CHECK-NEXT:    ret i32 [[RET]]
+;
+  %tmp0 = lshr i32 %x, %y
+  %ret = shl i32 %tmp0, %z ; $z, not %y
+  ret i32 %ret
+}
+
+declare void @use32(i32)
+
+; One use only.
+define i32 @negative_oneuse(i32 %x, i32 %y) {
+; CHECK-LABEL: @negative_oneuse(
+; CHECK-NEXT:    [[TMP0:%.*]] = lshr i32 [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    call void @use32(i32 [[TMP0]])
+; CHECK-NEXT:    [[RET:%.*]] = shl i32 [[TMP0]], [[Y]]
+; CHECK-NEXT:    ret i32 [[RET]]
+;
+  %tmp0 = lshr i32 %x, %y
+  call void @use32(i32 %tmp0)
+  %ret = shl i32 %tmp0, %y
+  ret i32 %ret
+}




More information about the llvm-commits mailing list