[llvm] r318323 - [InstCombine] trunc (binop X, C) --> binop (trunc X, C')
Sanjay Patel via llvm-commits
llvm-commits at lists.llvm.org
Fri Dec 8 06:34:50 PST 2017
Filed here:
Looks like it's stuck toggling between mul and shl nodes. :)
I'm still not sure what the policy is for this kind of situation. Clearly,
the bug exists independent of this commit, but if you think this commit
makes the bug much more likely to be hit, then we still want to revert?
On Fri, Dec 8, 2017 at 7:20 AM, Sanjay Patel <spatel at rotateright.com> wrote:
> Just started looking at the example - this hangs somewhere in the backend,
> not in instcombine (IR below). Does that affect the decision about
> reverting?
> $ ./llc -o - inf.ll -mtriple=x86_64
> .text
> .file "inf.c"
> ^C
> $ cat inf.ll
> target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
> target triple = "x86_64--linux-gnu"
> %struct.c = type { i32, [0 x i8] }
> @d = common local_unnamed_addr global i32 0, align 4
> @llvm.global_ctors = appending global [1 x { i32, void ()*, i8* }] [{ i32,
> void ()*, i8* } { i32 0, void ()* @msan.module_ctor, i8* null }]
> @__msan_retval_tls = external thread_local(initialexec) global [100 x i64]
> @__msan_retval_origin_tls = external thread_local(initialexec) global i32
> @__msan_param_tls = external thread_local(initialexec) global [100 x i64]
> @__msan_param_origin_tls = external thread_local(initialexec) global [200
> x i32]
> @__msan_va_arg_tls = external thread_local(initialexec) global [100 x i64]
> @__msan_va_arg_overflow_size_tls = external thread_local(initialexec)
> global i64
> @__msan_origin_tls = external thread_local(initialexec) global i32
> ; Function Attrs: norecurse nounwind sanitize_memory
> define i32 @e(%struct.c* %f) local_unnamed_addr #0 {
> entry:
> %0 = load i64, i64* getelementptr inbounds ([100 x i64], [100 x i64]*
> @__msan_param_tls, i64 0, i64 0), align 8
> %1 = ptrtoint %struct.c* %f to i64
> %_msprop = trunc i64 %0 to i32
> %2 = trunc i64 %1 to i32
> store i32 0, i32* inttoptr (i64 xor (i64 ptrtoint (i32* @d to i64), i64
> 87960930222080) to i32*), align 4
> store i32 0, i32* @d, align 4, !tbaa !2
> %3 = icmp eq i32 %_msprop, 0
> br i1 %3, label %5, label %4, !prof !6
> ; <label>:4: ; preds = %entry
> call void @__msan_warning_noreturn() #1
> call void asm sideeffect "", ""() #1
> unreachable
> ; <label>:5: ; preds = %entry
> %cmp9 = icmp sgt i32 %2, 0
> br i1 %cmp9, label %for.body.lr.ph, label %for.end
> for.body.lr.ph: ; preds = %5
> %_msprop23 = trunc i64 %0 to i8
> %6 = trunc i64 %1 to i8
> %arrayidx.phi.trans.insert = getelementptr inbounds %struct.c,
> %struct.c* %f, i64 0, i32 1, i64 0
> %_mscmp54 = icmp eq i64 %0, 0
> br i1 %_mscmp54, label %8, label %7, !prof !6
> ; <label>:7: ; preds = %
> for.body.lr.ph
> call void @__msan_warning_noreturn() #1
> call void asm sideeffect "", ""() #1
> unreachable
> ; <label>:8: ; preds = %
> for.body.lr.ph
> %.pre = load i8, i8* %arrayidx.phi.trans.insert, align 1, !tbaa !7
> %9 = ptrtoint i8* %arrayidx.phi.trans.insert to i64
> %10 = xor i64 %9, 87960930222080
> %11 = inttoptr i64 %10 to i8*
> %_msld = load i8, i8* %11, align 1
> %wide.trip.count = and i64 %1, 4294967295
> br i1 false, label %12, label %13, !prof !8
> ; <label>:12: ; preds = %8
> unreachable
> ; <label>:13: ; preds = %8
> %14 = icmp ult i64 %wide.trip.count, 32
> br i1 %14, label %for.body.preheader, label %vector.ph
> vector.ph: ; preds = %13
> %n.mod.vf = and i64 %1, 31
> %n.vec = sub nsw i64 %wide.trip.count, %n.mod.vf
> %cast.crd = trunc i64 %n.vec to i8
> %15 = mul i8 %cast.crd, %6
> %_msprop33 = or i8 %_msld, %_msprop23
> %ind.end = add i8 %.pre, %15
> %_msprop34 = insertelement <16 x i8> undef, i8 %_msld, i32 0
> %.splatinsert = insertelement <16 x i8> undef, i8 %.pre, i32 0
> %_msprop35 = shufflevector <16 x i8> %_msprop34, <16 x i8> undef, <16 x
> i32> zeroinitializer
> %.splat = shufflevector <16 x i8> %.splatinsert, <16 x i8> undef, <16 x
> i32> zeroinitializer
> %_msprop36 = insertelement <16 x i8> undef, i8 %_msprop23, i32 0
> %.splatinsert12 = insertelement <16 x i8> undef, i8 %6, i32 0
> %_msprop37 = shufflevector <16 x i8> %_msprop36, <16 x i8> undef, <16 x
> i32> zeroinitializer
> %.splat13 = shufflevector <16 x i8> %.splatinsert12, <16 x i8> undef,
> <16 x i32> zeroinitializer
> %msprop_mul_cst = mul <16 x i8> %_msprop37, <i8 0, i8 1, i8 2, i8 1, i8
> 4, i8 1, i8 2, i8 1, i8 8, i8 1, i8 2, i8 1, i8 4, i8 1, i8 2, i8 1>
> %16 = mul <16 x i8> %.splat13, <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8
> 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>
> %_msprop38 = or <16 x i8> %_msprop35, %msprop_mul_cst
> %induction = add <16 x i8> %.splat, %16
> %17 = shl i8 %_msprop23, 4
> %18 = shl i8 %6, 4
> %_msprop39 = insertelement <16 x i8> undef, i8 %17, i32 0
> %.splatinsert14 = insertelement <16 x i8> undef, i8 %18, i32 0
> %_msprop40 = shufflevector <16 x i8> %_msprop39, <16 x i8> undef, <16 x
> i32> zeroinitializer
> %.splat15 = shufflevector <16 x i8> %.splatinsert14, <16 x i8> undef,
> <16 x i32> zeroinitializer
> br label %vector.body
> vector.body: ; preds = %37, %
> vector.ph
> %index = phi i64 [ 0, %vector.ph ], [ %index.next, %37 ]
> %_msphi_s46 = phi <16 x i8> [ %_msprop38, %vector.ph ], [ %_msprop47,
> %37 ]
> %vec.ind = phi <16 x i8> [ %induction, %vector.ph ], [ %vec.ind.next,
> %37 ]
> %_msprop47 = or <16 x i8> %_msphi_s46, %_msprop40
> %step.add = add <16 x i8> %vec.ind, %.splat15
> %_msprop49 = or <16 x i8> %_msprop47, %_msprop37
> %19 = add <16 x i8> %step.add, %.splat13
> %20 = or i64 %index, 1
> %21 = getelementptr inbounds %struct.c, %struct.c* %f, i64 0, i32 1, i64
> %20
> br i1 false, label %22, label %23, !prof !8
> ; <label>:22: ; preds = %vector.body
> unreachable
> ; <label>:23: ; preds = %vector.body
> %24 = ptrtoint i8* %21 to i64
> %25 = xor i64 %24, 87960930222080
> %26 = inttoptr i64 %25 to <16 x i8>*
> %27 = bitcast i8* %21 to <16 x i8>*
> %28 = add <16 x i8> %vec.ind, %.splat13
> %_msprop48 = or <16 x i8> %_msphi_s46, %_msprop37
> store <16 x i8> %_msprop48, <16 x i8>* %26, align 1
> store <16 x i8> %28, <16 x i8>* %27, align 1, !tbaa !7
> %29 = getelementptr i8, i8* %21, i64 16
> br i1 false, label %30, label %31, !prof !8
> ; <label>:30: ; preds = %23
> unreachable
> ; <label>:31: ; preds = %23
> %32 = ptrtoint i8* %29 to i64
> %33 = xor i64 %32, 87960930222080
> %34 = inttoptr i64 %33 to <16 x i8>*
> %35 = bitcast i8* %29 to <16 x i8>*
> store <16 x i8> %_msprop49, <16 x i8>* %34, align 1
> store <16 x i8> %19, <16 x i8>* %35, align 1, !tbaa !7
> %index.next = add i64 %index, 32
> br i1 false, label %36, label %37, !prof !8
> ; <label>:36: ; preds = %31
> unreachable
> ; <label>:37: ; preds = %31
> %38 = icmp eq i64 %index.next, %n.vec
> %vec.ind.next = add <16 x i8> %step.add, %.splat15
> br i1 %38, label %middle.block, label %vector.body, !llvm.loop !9
> middle.block: ; preds = %37
> br i1 false, label %39, label %40, !prof !8
> ; <label>:39: ; preds = %middle.block
> unreachable
> ; <label>:40: ; preds = %middle.block
> %41 = icmp eq i64 %n.mod.vf, 0
> br i1 %41, label %for.cond.for.end_crit_edge, label %for.body.preheader
> for.body.preheader: ; preds = %40, %13
> %_msphi_s = phi i8 [ %_msld, %13 ], [ %_msprop33, %40 ]
> %.ph = phi i8 [ %.pre, %13 ], [ %ind.end, %40 ]
> %indvars.iv.ph = phi i64 [ 0, %13 ], [ %n.vec, %40 ]
> br label %for.body
> for.body: ; preds =
> %for.body.preheader, %49
> %_msphi_s25 = phi i8 [ %_msprop27, %49 ], [ %_msphi_s,
> %for.body.preheader ]
> %42 = phi i8 [ %conv1, %49 ], [ %.ph, %for.body.preheader ]
> %indvars.iv = phi i64 [ %indvars.iv.next, %49 ], [ %indvars.iv.ph,
> %for.body.preheader ]
> %_msprop27 = or i8 %_msphi_s25, %_msprop23
> %conv1 = add i8 %42, %6
> %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
> %arrayidx5 = getelementptr inbounds %struct.c, %struct.c* %f, i64 0, i32
> 1, i64 %indvars.iv.next
> br i1 false, label %43, label %44, !prof !8
> ; <label>:43: ; preds = %for.body
> unreachable
> ; <label>:44: ; preds = %for.body
> %45 = ptrtoint i8* %arrayidx5 to i64
> %46 = xor i64 %45, 87960930222080
> %47 = inttoptr i64 %46 to i8*
> store i8 %_msprop27, i8* %47, align 1
> store i8 %conv1, i8* %arrayidx5, align 1, !tbaa !7
> br i1 false, label %48, label %49, !prof !8
> ; <label>:48: ; preds = %44
> unreachable
> ; <label>:49: ; preds = %44
> %exitcond = icmp eq i64 %indvars.iv.next, %wide.trip.count
> br i1 %exitcond, label %for.cond.for.end_crit_edge.loopexit, label
> %for.body, !llvm.loop !11
> for.cond.for.end_crit_edge.loopexit: ; preds = %49
> br label %for.cond.for.end_crit_edge
> for.cond.for.end_crit_edge: ; preds =
> %for.cond.for.end_crit_edge.loopexit, %40
> store i32 0, i32* inttoptr (i64 xor (i64 ptrtoint (i32* @d to i64), i64
> 87960930222080) to i32*), align 4
> store i32 %2, i32* @d, align 4, !tbaa !2
> br label %for.end
> for.end: ; preds =
> %for.cond.for.end_crit_edge, %5
> store i32 -1, i32* bitcast ([100 x i64]* @__msan_retval_tls to i32*),
> align 8
> ret i32 undef
> }
> declare void @__msan_init() local_unnamed_addr
> define internal void @msan.module_ctor() {
> tail call void @__msan_init()
> ret void
> }
> declare void @__msan_warning_noreturn()
> declare void @__msan_maybe_warning_1(i8, i32)
> declare void @__msan_maybe_store_origin_1(i8, i8*, i32)
> declare void @__msan_maybe_warning_2(i16, i32)
> declare void @__msan_maybe_store_origin_2(i16, i8*, i32)
> declare void @__msan_maybe_warning_4(i32, i32)
> declare void @__msan_maybe_store_origin_4(i32, i8*, i32)
> declare void @__msan_maybe_warning_8(i64, i32)
> declare void @__msan_maybe_store_origin_8(i64, i8*, i32)
> declare void @__msan_set_alloca_origin4(i8*, i64, i8*, i64)
> declare void @__msan_poison_stack(i8*, i64)
> declare i32 @__msan_chain_origin(i32)
> declare i8* @__msan_memmove(i8*, i8*, i64)
> declare i8* @__msan_memcpy(i8*, i8*, i64)
> declare i8* @__msan_memset(i8*, i32, i64)
> attributes #0 = { norecurse nounwind sanitize_memory
> "correctly-rounded-divide-sqrt-fp-math"="false"
> "disable-tail-calls"="false" "less-precise-fpmad"="false"
> "no-frame-pointer-elim"="false" "no-infs-fp-math"="false"
> "no-jump-tables"="false" "no-nans-fp-math"="false"
> "no-signed-zeros-fp-math"="false" "no-trapping-math"="false"
> "stack-protector-buffer-size"="8" "target-features"="+mmx,+
> popcnt,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87"
> "unsafe-fp-math"="false" "use-soft-float"="false" }
> attributes #1 = { nounwind }
> !llvm.module.flags = !{!0}
> !llvm.ident = !{!1}
> !0 = !{i32 1, !"wchar_size", i32 4}
> !1 = !{!"clang version 6.0.0 (trunk 320064) (llvm/trunk 320063)"}
> !2 = !{!3, !3, i64 0}
> !3 = !{!"int", !4, i64 0}
> !4 = !{!"omnipotent char", !5, i64 0}
> !5 = !{!"Simple C/C++ TBAA"}
> !6 = !{!"branch_weights", i32 1000, i32 1}
> !7 = !{!4, !4, i64 0}
> !8 = !{!"branch_weights", i32 1, i32 1000}
> !9 = distinct !{!9, !10}
> !10 = !{!"llvm.loop.isvectorized", i32 1}
> !11 = distinct !{!11, !12, !10}
> !12 = !{!"llvm.loop.unroll.runtime.disable"}
> On Fri, Dec 8, 2017 at 3:24 AM, Chandler Carruth <chandlerc at gmail.com>
> wrote:
>> I think this is still enough of a test case to revert while we sort it
>> out.
>> On Fri, Dec 8, 2017 at 4:38 AM David Blaikie <dblaikie at gmail.com> wrote:
>>> Haven't looked too far yet, but here's my current repro:
>>> typedef struct {
>>> int a;
>>> char b[]
>>> } c;
>>> d;
>>> e(c *f) {
>>> int g, h = g = f;
>>> d = 0;
>>> for (; d < h; d++)
>>> f->b[d + 1] = f->b[d] + g;
>>> }
>>> clang -cc1 -emit-obj -triple x86_64-linux-gnu -target-feature +sse4.2
>>> -O2 -w -fsanitize=memory -vectorize-loops -o /dev/null foo.ii -x c
>>> Appears to run forever/a long time.
>>> Interestingly, a nearby test case seems to fail with or without the
>>> patch:
>>> typedef struct {
>>> int a;
>>> char b[]
>>> } c;
>>> d;
>>> e(c *f) {
>>> int g = f;
>>> for (; d; d++)
>>> f->b[d + 1] = f->b[d] + g;
>>> }
>>> So I guess this might be one of those unfortunate cases of an
>>> optimization causing further exposure to an existing bug, but still..
>>> On Thu, Dec 7, 2017 at 1:56 PM Sanjay Patel <spatel at rotateright.com>
>>> wrote:
>>>> Thanks for letting me know.
>>>> This might be a good time to check out (though I haven't gotten to it
>>>> yet):
>>>> https://github.com/rutgers-apl/alive-loops
>>>> This was mentioned on llvm-dev:
>>>> http://lists.llvm.org/pipermail/llvm-dev/2017-September/117466.html
>>>> On Thu, Dec 7, 2017 at 12:48 PM, Chandler Carruth <chandlerc at gmail.com>
>>>> wrote:
>>>>> FYI, we've root caused an compile timeout to this revision. It seems
>>>>> quite likely this is fighting another instcombine.
>>>>> We're still working on a test case, but wanted to go ahead and give a
>>>>> heads-up in case you can spot the place where we reverse this transform.
>>>>> On Wed, Nov 15, 2017 at 8:12 PM Sanjay Patel via llvm-commits <
>>>>> llvm-commits at lists.llvm.org> wrote:
>>>>>> Author: spatel
>>>>>> Date: Wed Nov 15 11:12:01 2017
>>>>>> New Revision: 318323
>>>>>> URL: http://llvm.org/viewvc/llvm-project?rev=318323&view=rev
>>>>>> Log:
>>>>>> [InstCombine] trunc (binop X, C) --> binop (trunc X, C')
>>>>>> Note that one-use and shouldChangeType() are checked ahead of the
>>>>>> switch.
>>>>>> Without the narrowing folds, we can produce inferior vector code as
>>>>>> shown in PR35299:
>>>>>> https://bugs.llvm.org/show_bug.cgi?id=35299
>>>>>> Modified:
>>>>>> llvm/trunk/lib/Transforms/InstCombine/InstCombineCasts.cpp
>>>>>> llvm/trunk/test/Transforms/InstCombine/pr33765.ll
>>>>>> llvm/trunk/test/Transforms/InstCombine/trunc-binop-ext.ll
>>>>>> Modified: llvm/trunk/lib/Transforms/InstCombine/InstCombineCasts.cpp
>>>>>> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transform
>>>>>> s/InstCombine/InstCombineCasts.cpp?rev=318323&r1=318322&r2=
>>>>>> 318323&view=diff
>>>>>> ============================================================
>>>>>> ==================
>>>>>> --- llvm/trunk/lib/Transforms/InstCombine/InstCombineCasts.cpp
>>>>>> (original)
>>>>>> +++ llvm/trunk/lib/Transforms/InstCombine/InstCombineCasts.cpp Wed
>>>>>> Nov 15 11:12:01 2017
>>>>>> @@ -545,6 +545,8 @@ Instruction *InstCombiner::narrowBinOp(T
>>>>>> if (!match(Trunc.getOperand(0), m_OneUse(m_BinOp(BinOp))))
>>>>>> return nullptr;
>>>>>> + Value *BinOp0 = BinOp->getOperand(0);
>>>>>> + Value *BinOp1 = BinOp->getOperand(1);
>>>>>> switch (BinOp->getOpcode()) {
>>>>>> case Instruction::And:
>>>>>> case Instruction::Or:
>>>>>> @@ -552,20 +554,31 @@ Instruction *InstCombiner::narrowBinOp(T
>>>>>> case Instruction::Add:
>>>>>> case Instruction::Mul: {
>>>>>> Constant *C;
>>>>>> - if (match(BinOp->getOperand(1), m_Constant(C))) {
>>>>>> + if (match(BinOp1, m_Constant(C))) {
>>>>>> // trunc (binop X, C) --> binop (trunc X, C')
>>>>>> Constant *NarrowC = ConstantExpr::getTrunc(C, DestTy);
>>>>>> - Value *TruncX = Builder.CreateTrunc(BinOp->getOperand(0),
>>>>>> DestTy);
>>>>>> + Value *TruncX = Builder.CreateTrunc(BinOp0, DestTy);
>>>>>> return BinaryOperator::Create(BinOp->getOpcode(), TruncX,
>>>>>> NarrowC);
>>>>>> }
>>>>>> + Value *X;
>>>>>> + if (match(BinOp0, m_ZExtOrSExt(m_Value(X))) && X->getType() ==
>>>>>> DestTy) {
>>>>>> + // trunc (binop (ext X), Y) --> binop X, (trunc Y)
>>>>>> + Value *NarrowOp1 = Builder.CreateTrunc(BinOp1, DestTy);
>>>>>> + return BinaryOperator::Create(BinOp->getOpcode(), X,
>>>>>> NarrowOp1);
>>>>>> + }
>>>>>> + if (match(BinOp1, m_ZExtOrSExt(m_Value(X))) && X->getType() ==
>>>>>> DestTy) {
>>>>>> + // trunc (binop Y, (ext X)) --> binop (trunc Y), X
>>>>>> + Value *NarrowOp0 = Builder.CreateTrunc(BinOp0, DestTy);
>>>>>> + return BinaryOperator::Create(BinOp->getOpcode(), NarrowOp0,
>>>>>> X);
>>>>>> + }
>>>>>> break;
>>>>>> }
>>>>>> case Instruction::Sub: {
>>>>>> Constant *C;
>>>>>> - if (match(BinOp->getOperand(0), m_Constant(C))) {
>>>>>> + if (match(BinOp0, m_Constant(C))) {
>>>>>> // trunc (binop C, X) --> binop (trunc C', X)
>>>>>> Constant *NarrowC = ConstantExpr::getTrunc(C, DestTy);
>>>>>> - Value *TruncX = Builder.CreateTrunc(BinOp->getOperand(1),
>>>>>> DestTy);
>>>>>> + Value *TruncX = Builder.CreateTrunc(BinOp1, DestTy);
>>>>>> return BinaryOperator::Create(BinOp->getOpcode(), NarrowC,
>>>>>> TruncX);
>>>>>> }
>>>>>> break;
>>>>>> Modified: llvm/trunk/test/Transforms/InstCombine/pr33765.ll
>>>>>> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transfor
>>>>>> ms/InstCombine/pr33765.ll?rev=318323&r1=318322&r2=318323&view=diff
>>>>>> ============================================================
>>>>>> ==================
>>>>>> --- llvm/trunk/test/Transforms/InstCombine/pr33765.ll (original)
>>>>>> +++ llvm/trunk/test/Transforms/InstCombine/pr33765.ll Wed Nov 15
>>>>>> 11:12:01 2017
>>>>>> @@ -10,9 +10,8 @@ define void @patatino(i8 %beth) {
>>>>>> ; CHECK: if.then9:
>>>>>> ; CHECK-NEXT: [[MUL:%.*]] = mul nuw nsw i32 [[CONV]], [[CONV]]
>>>>>> ; CHECK-NEXT: [[TINKY:%.*]] = load i16, i16* @glob, align 2
>>>>>> -; CHECK-NEXT: [[CONV131:%.*]] = zext i16 [[TINKY]] to i32
>>>>>> -; CHECK-NEXT: [[AND:%.*]] = and i32 [[MUL]], [[CONV131]]
>>>>>> -; CHECK-NEXT: [[CONV14:%.*]] = trunc i32 [[AND]] to i16
>>>>>> +; CHECK-NEXT: [[TMP1:%.*]] = trunc i32 [[MUL]] to i16
>>>>>> +; CHECK-NEXT: [[CONV14:%.*]] = and i16 [[TINKY]], [[TMP1]]
>>>>>> ; CHECK-NEXT: store i16 [[CONV14]], i16* @glob, align 2
>>>>>> ; CHECK-NEXT: ret void
>>>>>> ;
>>>>>> Modified: llvm/trunk/test/Transforms/InstCombine/trunc-binop-ext.ll
>>>>>> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transfor
>>>>>> ms/InstCombine/trunc-binop-ext.ll?rev=318323&r1=318322&
>>>>>> r2=318323&view=diff
>>>>>> ============================================================
>>>>>> ==================
>>>>>> --- llvm/trunk/test/Transforms/InstCombine/trunc-binop-ext.ll
>>>>>> (original)
>>>>>> +++ llvm/trunk/test/Transforms/InstCombine/trunc-binop-ext.ll Wed
>>>>>> Nov 15 11:12:01 2017
>>>>>> @@ -2,9 +2,8 @@
>>>>>> define i16 @narrow_sext_and(i16 %x16, i32 %y32) {
>>>>>> ; CHECK-LABEL: @narrow_sext_and(
>>>>>> -; CHECK-NEXT: [[X321:%.*]] = zext i16 %x16 to i32
>>>>>> -; CHECK-NEXT: [[B:%.*]] = and i32 [[X321]], %y32
>>>>>> -; CHECK-NEXT: [[R:%.*]] = trunc i32 [[B]] to i16
>>>>>> +; CHECK-NEXT: [[TMP1:%.*]] = trunc i32 %y32 to i16
>>>>>> +; CHECK-NEXT: [[R:%.*]] = and i16 [[TMP1]], %x16
>>>>>> ; CHECK-NEXT: ret i16 [[R]]
>>>>>> ;
>>>>>> %x32 = sext i16 %x16 to i32
>>>>>> @@ -15,9 +14,8 @@ define i16 @narrow_sext_and(i16 %x16, i3
>>>>>> define i16 @narrow_zext_and(i16 %x16, i32 %y32) {
>>>>>> ; CHECK-LABEL: @narrow_zext_and(
>>>>>> -; CHECK-NEXT: [[X32:%.*]] = zext i16 %x16 to i32
>>>>>> -; CHECK-NEXT: [[B:%.*]] = and i32 [[X32]], %y32
>>>>>> -; CHECK-NEXT: [[R:%.*]] = trunc i32 [[B]] to i16
>>>>>> +; CHECK-NEXT: [[TMP1:%.*]] = trunc i32 %y32 to i16
>>>>>> +; CHECK-NEXT: [[R:%.*]] = and i16 [[TMP1]], %x16
>>>>>> ; CHECK-NEXT: ret i16 [[R]]
>>>>>> ;
>>>>>> %x32 = zext i16 %x16 to i32
>>>>>> @@ -28,9 +26,8 @@ define i16 @narrow_zext_and(i16 %x16, i3
>>>>>> define i16 @narrow_sext_or(i16 %x16, i32 %y32) {
>>>>>> ; CHECK-LABEL: @narrow_sext_or(
>>>>>> -; CHECK-NEXT: [[X321:%.*]] = zext i16 %x16 to i32
>>>>>> -; CHECK-NEXT: [[B:%.*]] = or i32 [[X321]], %y32
>>>>>> -; CHECK-NEXT: [[R:%.*]] = trunc i32 [[B]] to i16
>>>>>> +; CHECK-NEXT: [[TMP1:%.*]] = trunc i32 %y32 to i16
>>>>>> +; CHECK-NEXT: [[R:%.*]] = or i16 [[TMP1]], %x16
>>>>>> ; CHECK-NEXT: ret i16 [[R]]
>>>>>> ;
>>>>>> %x32 = sext i16 %x16 to i32
>>>>>> @@ -41,9 +38,8 @@ define i16 @narrow_sext_or(i16 %x16, i32
>>>>>> define i16 @narrow_zext_or(i16 %x16, i32 %y32) {
>>>>>> ; CHECK-LABEL: @narrow_zext_or(
>>>>>> -; CHECK-NEXT: [[X32:%.*]] = zext i16 %x16 to i32
>>>>>> -; CHECK-NEXT: [[B:%.*]] = or i32 [[X32]], %y32
>>>>>> -; CHECK-NEXT: [[R:%.*]] = trunc i32 [[B]] to i16
>>>>>> +; CHECK-NEXT: [[TMP1:%.*]] = trunc i32 %y32 to i16
>>>>>> +; CHECK-NEXT: [[R:%.*]] = or i16 [[TMP1]], %x16
>>>>>> ; CHECK-NEXT: ret i16 [[R]]
>>>>>> ;
>>>>>> %x32 = zext i16 %x16 to i32
>>>>>> @@ -54,9 +50,8 @@ define i16 @narrow_zext_or(i16 %x16, i32
>>>>>> define i16 @narrow_sext_xor(i16 %x16, i32 %y32) {
>>>>>> ; CHECK-LABEL: @narrow_sext_xor(
>>>>>> -; CHECK-NEXT: [[X321:%.*]] = zext i16 %x16 to i32
>>>>>> -; CHECK-NEXT: [[B:%.*]] = xor i32 [[X321]], %y32
>>>>>> -; CHECK-NEXT: [[R:%.*]] = trunc i32 [[B]] to i16
>>>>>> +; CHECK-NEXT: [[TMP1:%.*]] = trunc i32 %y32 to i16
>>>>>> +; CHECK-NEXT: [[R:%.*]] = xor i16 [[TMP1]], %x16
>>>>>> ; CHECK-NEXT: ret i16 [[R]]
>>>>>> ;
>>>>>> %x32 = sext i16 %x16 to i32
>>>>>> @@ -67,9 +62,8 @@ define i16 @narrow_sext_xor(i16 %x16, i3
>>>>>> define i16 @narrow_zext_xor(i16 %x16, i32 %y32) {
>>>>>> ; CHECK-LABEL: @narrow_zext_xor(
>>>>>> -; CHECK-NEXT: [[X32:%.*]] = zext i16 %x16 to i32
>>>>>> -; CHECK-NEXT: [[B:%.*]] = xor i32 [[X32]], %y32
>>>>>> -; CHECK-NEXT: [[R:%.*]] = trunc i32 [[B]] to i16
>>>>>> +; CHECK-NEXT: [[TMP1:%.*]] = trunc i32 %y32 to i16
>>>>>> +; CHECK-NEXT: [[R:%.*]] = xor i16 [[TMP1]], %x16
>>>>>> ; CHECK-NEXT: ret i16 [[R]]
>>>>>> ;
>>>>>> %x32 = zext i16 %x16 to i32
>>>>>> @@ -80,9 +74,8 @@ define i16 @narrow_zext_xor(i16 %x16, i3
>>>>>> define i16 @narrow_sext_add(i16 %x16, i32 %y32) {
>>>>>> ; CHECK-LABEL: @narrow_sext_add(
>>>>>> -; CHECK-NEXT: [[X321:%.*]] = zext i16 %x16 to i32
>>>>>> -; CHECK-NEXT: [[B:%.*]] = add i32 [[X321]], %y32
>>>>>> -; CHECK-NEXT: [[R:%.*]] = trunc i32 [[B]] to i16
>>>>>> +; CHECK-NEXT: [[TMP1:%.*]] = trunc i32 %y32 to i16
>>>>>> +; CHECK-NEXT: [[R:%.*]] = add i16 [[TMP1]], %x16
>>>>>> ; CHECK-NEXT: ret i16 [[R]]
>>>>>> ;
>>>>>> %x32 = sext i16 %x16 to i32
>>>>>> @@ -93,9 +86,8 @@ define i16 @narrow_sext_add(i16 %x16, i3
>>>>>> define i16 @narrow_zext_add(i16 %x16, i32 %y32) {
>>>>>> ; CHECK-LABEL: @narrow_zext_add(
>>>>>> -; CHECK-NEXT: [[X32:%.*]] = zext i16 %x16 to i32
>>>>>> -; CHECK-NEXT: [[B:%.*]] = add i32 [[X32]], %y32
>>>>>> -; CHECK-NEXT: [[R:%.*]] = trunc i32 [[B]] to i16
>>>>>> +; CHECK-NEXT: [[TMP1:%.*]] = trunc i32 %y32 to i16
>>>>>> +; CHECK-NEXT: [[R:%.*]] = add i16 [[TMP1]], %x16
>>>>>> ; CHECK-NEXT: ret i16 [[R]]
>>>>>> ;
>>>>>> %x32 = zext i16 %x16 to i32
>>>>>> @@ -106,9 +98,8 @@ define i16 @narrow_zext_add(i16 %x16, i3
>>>>>> define i16 @narrow_sext_mul(i16 %x16, i32 %y32) {
>>>>>> ; CHECK-LABEL: @narrow_sext_mul(
>>>>>> -; CHECK-NEXT: [[X32:%.*]] = sext i16 %x16 to i32
>>>>>> -; CHECK-NEXT: [[B:%.*]] = mul i32 [[X32]], %y32
>>>>>> -; CHECK-NEXT: [[R:%.*]] = trunc i32 [[B]] to i16
>>>>>> +; CHECK-NEXT: [[TMP1:%.*]] = trunc i32 %y32 to i16
>>>>>> +; CHECK-NEXT: [[R:%.*]] = mul i16 [[TMP1]], %x16
>>>>>> ; CHECK-NEXT: ret i16 [[R]]
>>>>>> ;
>>>>>> %x32 = sext i16 %x16 to i32
>>>>>> @@ -119,9 +110,8 @@ define i16 @narrow_sext_mul(i16 %x16, i3
>>>>>> define i16 @narrow_zext_mul(i16 %x16, i32 %y32) {
>>>>>> ; CHECK-LABEL: @narrow_zext_mul(
>>>>>> -; CHECK-NEXT: [[X32:%.*]] = zext i16 %x16 to i32
>>>>>> -; CHECK-NEXT: [[B:%.*]] = mul i32 [[X32]], %y32
>>>>>> -; CHECK-NEXT: [[R:%.*]] = trunc i32 [[B]] to i16
>>>>>> +; CHECK-NEXT: [[TMP1:%.*]] = trunc i32 %y32 to i16
>>>>>> +; CHECK-NEXT: [[R:%.*]] = mul i16 [[TMP1]], %x16
>>>>>> ; CHECK-NEXT: ret i16 [[R]]
>>>>>> ;
>>>>>> %x32 = zext i16 %x16 to i32
>>>>>> @@ -130,15 +120,14 @@ define i16 @narrow_zext_mul(i16 %x16, i3
>>>>>> ret i16 %r
>>>>>> }
>>>>>> -; Verify that the commuted patterns work. The div is to ensure that
>>>>>> complexity-based
>>>>>> +; Verify that the commuted patterns work. The div is to ensure that
>>>>>> complexity-based
>>>>>> ; canonicalization doesn't swap the binop operands. Use vector types
>>>>>> to show those work too.
>>>>>> define <2 x i16> @narrow_sext_and_commute(<2 x i16> %x16, <2 x i32>
>>>>>> %y32) {
>>>>>> ; CHECK-LABEL: @narrow_sext_and_commute(
>>>>>> ; CHECK-NEXT: [[Y32OP0:%.*]] = sdiv <2 x i32> %y32, <i32 7, i32
>>>>>> -17>
>>>>>> -; CHECK-NEXT: [[X321:%.*]] = zext <2 x i16> %x16 to <2 x i32>
>>>>>> -; CHECK-NEXT: [[B:%.*]] = and <2 x i32> [[Y32OP0]], [[X321]]
>>>>>> -; CHECK-NEXT: [[R:%.*]] = trunc <2 x i32> [[B]] to <2 x i16>
>>>>>> +; CHECK-NEXT: [[TMP1:%.*]] = trunc <2 x i32> [[Y32OP0]] to <2 x
>>>>>> i16>
>>>>>> +; CHECK-NEXT: [[R:%.*]] = and <2 x i16> [[TMP1]], %x16
>>>>>> ; CHECK-NEXT: ret <2 x i16> [[R]]
>>>>>> ;
>>>>>> %y32op0 = sdiv <2 x i32> %y32, <i32 7, i32 -17>
>>>>>> @@ -151,9 +140,8 @@ define <2 x i16> @narrow_sext_and_commut
>>>>>> define <2 x i16> @narrow_zext_and_commute(<2 x i16> %x16, <2 x i32>
>>>>>> %y32) {
>>>>>> ; CHECK-LABEL: @narrow_zext_and_commute(
>>>>>> ; CHECK-NEXT: [[Y32OP0:%.*]] = sdiv <2 x i32> %y32, <i32 7, i32
>>>>>> -17>
>>>>>> -; CHECK-NEXT: [[X32:%.*]] = zext <2 x i16> %x16 to <2 x i32>
>>>>>> -; CHECK-NEXT: [[B:%.*]] = and <2 x i32> [[Y32OP0]], [[X32]]
>>>>>> -; CHECK-NEXT: [[R:%.*]] = trunc <2 x i32> [[B]] to <2 x i16>
>>>>>> +; CHECK-NEXT: [[TMP1:%.*]] = trunc <2 x i32> [[Y32OP0]] to <2 x
>>>>>> i16>
>>>>>> +; CHECK-NEXT: [[R:%.*]] = and <2 x i16> [[TMP1]], %x16
>>>>>> ; CHECK-NEXT: ret <2 x i16> [[R]]
>>>>>> ;
>>>>>> %y32op0 = sdiv <2 x i32> %y32, <i32 7, i32 -17>
>>>>>> @@ -166,9 +154,8 @@ define <2 x i16> @narrow_zext_and_commut
>>>>>> define <2 x i16> @narrow_sext_or_commute(<2 x i16> %x16, <2 x i32>
>>>>>> %y32) {
>>>>>> ; CHECK-LABEL: @narrow_sext_or_commute(
>>>>>> ; CHECK-NEXT: [[Y32OP0:%.*]] = sdiv <2 x i32> %y32, <i32 7, i32
>>>>>> -17>
>>>>>> -; CHECK-NEXT: [[X321:%.*]] = zext <2 x i16> %x16 to <2 x i32>
>>>>>> -; CHECK-NEXT: [[B:%.*]] = or <2 x i32> [[Y32OP0]], [[X321]]
>>>>>> -; CHECK-NEXT: [[R:%.*]] = trunc <2 x i32> [[B]] to <2 x i16>
>>>>>> +; CHECK-NEXT: [[TMP1:%.*]] = trunc <2 x i32> [[Y32OP0]] to <2 x
>>>>>> i16>
>>>>>> +; CHECK-NEXT: [[R:%.*]] = or <2 x i16> [[TMP1]], %x16
>>>>>> ; CHECK-NEXT: ret <2 x i16> [[R]]
>>>>>> ;
>>>>>> %y32op0 = sdiv <2 x i32> %y32, <i32 7, i32 -17>
>>>>>> @@ -181,9 +168,8 @@ define <2 x i16> @narrow_sext_or_commute
>>>>>> define <2 x i16> @narrow_zext_or_commute(<2 x i16> %x16, <2 x i32>
>>>>>> %y32) {
>>>>>> ; CHECK-LABEL: @narrow_zext_or_commute(
>>>>>> ; CHECK-NEXT: [[Y32OP0:%.*]] = sdiv <2 x i32> %y32, <i32 7, i32
>>>>>> -17>
>>>>>> -; CHECK-NEXT: [[X32:%.*]] = zext <2 x i16> %x16 to <2 x i32>
>>>>>> -; CHECK-NEXT: [[B:%.*]] = or <2 x i32> [[Y32OP0]], [[X32]]
>>>>>> -; CHECK-NEXT: [[R:%.*]] = trunc <2 x i32> [[B]] to <2 x i16>
>>>>>> +; CHECK-NEXT: [[TMP1:%.*]] = trunc <2 x i32> [[Y32OP0]] to <2 x
>>>>>> i16>
>>>>>> +; CHECK-NEXT: [[R:%.*]] = or <2 x i16> [[TMP1]], %x16
>>>>>> ; CHECK-NEXT: ret <2 x i16> [[R]]
>>>>>> ;
>>>>>> %y32op0 = sdiv <2 x i32> %y32, <i32 7, i32 -17>
>>>>>> @@ -196,9 +182,8 @@ define <2 x i16> @narrow_zext_or_commute
>>>>>> define <2 x i16> @narrow_sext_xor_commute(<2 x i16> %x16, <2 x i32>
>>>>>> %y32) {
>>>>>> ; CHECK-LABEL: @narrow_sext_xor_commute(
>>>>>> ; CHECK-NEXT: [[Y32OP0:%.*]] = sdiv <2 x i32> %y32, <i32 7, i32
>>>>>> -17>
>>>>>> -; CHECK-NEXT: [[X321:%.*]] = zext <2 x i16> %x16 to <2 x i32>
>>>>>> -; CHECK-NEXT: [[B:%.*]] = xor <2 x i32> [[Y32OP0]], [[X321]]
>>>>>> -; CHECK-NEXT: [[R:%.*]] = trunc <2 x i32> [[B]] to <2 x i16>
>>>>>> +; CHECK-NEXT: [[TMP1:%.*]] = trunc <2 x i32> [[Y32OP0]] to <2 x
>>>>>> i16>
>>>>>> +; CHECK-NEXT: [[R:%.*]] = xor <2 x i16> [[TMP1]], %x16
>>>>>> ; CHECK-NEXT: ret <2 x i16> [[R]]
>>>>>> ;
>>>>>> %y32op0 = sdiv <2 x i32> %y32, <i32 7, i32 -17>
>>>>>> @@ -211,9 +196,8 @@ define <2 x i16> @narrow_sext_xor_commut
>>>>>> define <2 x i16> @narrow_zext_xor_commute(<2 x i16> %x16, <2 x i32>
>>>>>> %y32) {
>>>>>> ; CHECK-LABEL: @narrow_zext_xor_commute(
>>>>>> ; CHECK-NEXT: [[Y32OP0:%.*]] = sdiv <2 x i32> %y32, <i32 7, i32
>>>>>> -17>
>>>>>> -; CHECK-NEXT: [[X32:%.*]] = zext <2 x i16> %x16 to <2 x i32>
>>>>>> -; CHECK-NEXT: [[B:%.*]] = xor <2 x i32> [[Y32OP0]], [[X32]]
>>>>>> -; CHECK-NEXT: [[R:%.*]] = trunc <2 x i32> [[B]] to <2 x i16>
>>>>>> +; CHECK-NEXT: [[TMP1:%.*]] = trunc <2 x i32> [[Y32OP0]] to <2 x
>>>>>> i16>
>>>>>> +; CHECK-NEXT: [[R:%.*]] = xor <2 x i16> [[TMP1]], %x16
>>>>>> ; CHECK-NEXT: ret <2 x i16> [[R]]
>>>>>> ;
>>>>>> %y32op0 = sdiv <2 x i32> %y32, <i32 7, i32 -17>
>>>>>> @@ -226,9 +210,8 @@ define <2 x i16> @narrow_zext_xor_commut
>>>>>> define <2 x i16> @narrow_sext_add_commute(<2 x i16> %x16, <2 x i32>
>>>>>> %y32) {
>>>>>> ; CHECK-LABEL: @narrow_sext_add_commute(
>>>>>> ; CHECK-NEXT: [[Y32OP0:%.*]] = sdiv <2 x i32> %y32, <i32 7, i32
>>>>>> -17>
>>>>>> -; CHECK-NEXT: [[X321:%.*]] = zext <2 x i16> %x16 to <2 x i32>
>>>>>> -; CHECK-NEXT: [[B:%.*]] = add <2 x i32> [[Y32OP0]], [[X321]]
>>>>>> -; CHECK-NEXT: [[R:%.*]] = trunc <2 x i32> [[B]] to <2 x i16>
>>>>>> +; CHECK-NEXT: [[TMP1:%.*]] = trunc <2 x i32> [[Y32OP0]] to <2 x
>>>>>> i16>
>>>>>> +; CHECK-NEXT: [[R:%.*]] = add <2 x i16> [[TMP1]], %x16
>>>>>> ; CHECK-NEXT: ret <2 x i16> [[R]]
>>>>>> ;
>>>>>> %y32op0 = sdiv <2 x i32> %y32, <i32 7, i32 -17>
>>>>>> @@ -241,9 +224,8 @@ define <2 x i16> @narrow_sext_add_commut
>>>>>> define <2 x i16> @narrow_zext_add_commute(<2 x i16> %x16, <2 x i32>
>>>>>> %y32) {
>>>>>> ; CHECK-LABEL: @narrow_zext_add_commute(
>>>>>> ; CHECK-NEXT: [[Y32OP0:%.*]] = sdiv <2 x i32> %y32, <i32 7, i32
>>>>>> -17>
>>>>>> -; CHECK-NEXT: [[X32:%.*]] = zext <2 x i16> %x16 to <2 x i32>
>>>>>> -; CHECK-NEXT: [[B:%.*]] = add <2 x i32> [[Y32OP0]], [[X32]]
>>>>>> -; CHECK-NEXT: [[R:%.*]] = trunc <2 x i32> [[B]] to <2 x i16>
>>>>>> +; CHECK-NEXT: [[TMP1:%.*]] = trunc <2 x i32> [[Y32OP0]] to <2 x
>>>>>> i16>
>>>>>> +; CHECK-NEXT: [[R:%.*]] = add <2 x i16> [[TMP1]], %x16
>>>>>> ; CHECK-NEXT: ret <2 x i16> [[R]]
>>>>>> ;
>>>>>> %y32op0 = sdiv <2 x i32> %y32, <i32 7, i32 -17>
>>>>>> @@ -256,9 +238,8 @@ define <2 x i16> @narrow_zext_add_commut
>>>>>> define <2 x i16> @narrow_sext_mul_commute(<2 x i16> %x16, <2 x i32>
>>>>>> %y32) {
>>>>>> ; CHECK-LABEL: @narrow_sext_mul_commute(
>>>>>> ; CHECK-NEXT: [[Y32OP0:%.*]] = sdiv <2 x i32> %y32, <i32 7, i32
>>>>>> -17>
>>>>>> -; CHECK-NEXT: [[X32:%.*]] = sext <2 x i16> %x16 to <2 x i32>
>>>>>> -; CHECK-NEXT: [[B:%.*]] = mul <2 x i32> [[Y32OP0]], [[X32]]
>>>>>> -; CHECK-NEXT: [[R:%.*]] = trunc <2 x i32> [[B]] to <2 x i16>
>>>>>> +; CHECK-NEXT: [[TMP1:%.*]] = trunc <2 x i32> [[Y32OP0]] to <2 x
>>>>>> i16>
>>>>>> +; CHECK-NEXT: [[R:%.*]] = mul <2 x i16> [[TMP1]], %x16
>>>>>> ; CHECK-NEXT: ret <2 x i16> [[R]]
>>>>>> ;
>>>>>> %y32op0 = sdiv <2 x i32> %y32, <i32 7, i32 -17>
>>>>>> @@ -271,9 +252,8 @@ define <2 x i16> @narrow_sext_mul_commut
>>>>>> define <2 x i16> @narrow_zext_mul_commute(<2 x i16> %x16, <2 x i32>
>>>>>> %y32) {
>>>>>> ; CHECK-LABEL: @narrow_zext_mul_commute(
>>>>>> ; CHECK-NEXT: [[Y32OP0:%.*]] = sdiv <2 x i32> %y32, <i32 7, i32
>>>>>> -17>
>>>>>> -; CHECK-NEXT: [[X32:%.*]] = zext <2 x i16> %x16 to <2 x i32>
>>>>>> -; CHECK-NEXT: [[B:%.*]] = mul <2 x i32> [[Y32OP0]], [[X32]]
>>>>>> -; CHECK-NEXT: [[R:%.*]] = trunc <2 x i32> [[B]] to <2 x i16>
>>>>>> +; CHECK-NEXT: [[TMP1:%.*]] = trunc <2 x i32> [[Y32OP0]] to <2 x
>>>>>> i16>
>>>>>> +; CHECK-NEXT: [[R:%.*]] = mul <2 x i16> [[TMP1]], %x16
>>>>>> ; CHECK-NEXT: ret <2 x i16> [[R]]
>>>>>> ;
>>>>>> %y32op0 = sdiv <2 x i32> %y32, <i32 7, i32 -17>
>>>>>> _______________________________________________
>>>>>> llvm-commits mailing list
>>>>>> llvm-commits at lists.llvm.org
>>>>>> http://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-commits
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20171208/0eb0cc52/attachment.html>
More information about the llvm-commits
mailing list