[llvm] r212493 - [FastISel][X86] Fix smul.with.overflow.i8 lowering.
Eric Christopher
echristo at gmail.com
Mon Jul 7 15:25:45 PDT 2014
On Mon, Jul 7, 2014 at 3:24 PM, Juergen Ributzka <juergen at apple.com> wrote:
> The pattern is actually there, but tablgen currently doesn’t generate code for it (X86ISD::SMUL) at all for FastISel.
Hrm. Bah.
>Using ISD::MUL was a mistake to begin with, because it doesn’t guarantee that the signed multiply will be used nor is it required. The only way I can make sure right now that I will get the signed multiply is by explicitly emitting it.
>
Fair point.
Thanks.
-eric
> -Juergen
>
> On Jul 7, 2014, at 3:14 PM, Eric Christopher <echristo at gmail.com> wrote:
>
>> I might be missing something, but why not just define the pattern?
>> imul supports i8.
>>
>> -eric
>>
>> On Mon, Jul 7, 2014 at 2:52 PM, Juergen Ributzka <juergen at apple.com> wrote:
>>> Author: ributzka
>>> Date: Mon Jul 7 16:52:21 2014
>>> New Revision: 212493
>>>
>>> URL: http://llvm.org/viewvc/llvm-project?rev=212493&view=rev
>>> Log:
>>> [FastISel][X86] Fix smul.with.overflow.i8 lowering.
>>>
>>> Add custom lowering code for signed multiply instruction selection, because the
>>> default FastISel instruction selection for ISD::MUL will use unsigned multiply
>>> for the i8 type and signed multiply for all other types. This would set the
>>> incorrect flags for the overflow check.
>>>
>>> This fixes <rdar://problem/17549300>
>>>
>>> Modified:
>>> llvm/trunk/lib/Target/X86/X86FastISel.cpp
>>> llvm/trunk/test/CodeGen/X86/xaluo.ll
>>>
>>> Modified: llvm/trunk/lib/Target/X86/X86FastISel.cpp
>>> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86FastISel.cpp?rev=212493&r1=212492&r2=212493&view=diff
>>> ==============================================================================
>>> --- llvm/trunk/lib/Target/X86/X86FastISel.cpp (original)
>>> +++ llvm/trunk/lib/Target/X86/X86FastISel.cpp Mon Jul 7 16:52:21 2014
>>> @@ -2402,7 +2402,7 @@ bool X86FastISel::X86VisitIntrinsicCall(
>>> case Intrinsic::usub_with_overflow:
>>> BaseOpc = ISD::SUB; CondOpc = X86::SETBr; break;
>>> case Intrinsic::smul_with_overflow:
>>> - BaseOpc = ISD::MUL; CondOpc = X86::SETOr; break;
>>> + BaseOpc = X86ISD::SMUL; CondOpc = X86::SETOr; break;
>>> case Intrinsic::umul_with_overflow:
>>> BaseOpc = X86ISD::UMUL; CondOpc = X86::SETOr; break;
>>> }
>>> @@ -2430,10 +2430,11 @@ bool X86FastISel::X86VisitIntrinsicCall(
>>> RHSIsKill);
>>> }
>>>
>>> - // FastISel doesn't have a pattern for X86::MUL*r. Emit it manually.
>>> + // FastISel doesn't have a pattern for all X86::MUL*r and X86::IMUL*r. Emit
>>> + // it manually.
>>> if (BaseOpc == X86ISD::UMUL && !ResultReg) {
>>> static const unsigned MULOpc[] =
>>> - { X86::MUL8r, X86::MUL16r, X86::MUL32r, X86::MUL64r };
>>> + { X86::MUL8r, X86::MUL16r, X86::MUL32r, X86::MUL64r };
>>> static const unsigned Reg[] = { X86::AL, X86::AX, X86::EAX, X86::RAX };
>>> // First copy the first operand into RAX, which is an implicit input to
>>> // the X86::MUL*r instruction.
>>> @@ -2442,6 +2443,21 @@ bool X86FastISel::X86VisitIntrinsicCall(
>>> .addReg(LHSReg, getKillRegState(LHSIsKill));
>>> ResultReg = FastEmitInst_r(MULOpc[VT.SimpleTy-MVT::i8],
>>> TLI.getRegClassFor(VT), RHSReg, RHSIsKill);
>>> + } else if (BaseOpc == X86ISD::SMUL && !ResultReg) {
>>> + static const unsigned MULOpc[] =
>>> + { X86::IMUL8r, X86::IMUL16rr, X86::IMUL32rr, X86::IMUL64rr };
>>> + if (VT == MVT::i8) {
>>> + // Copy the first operand into AL, which is an implicit input to the
>>> + // X86::IMUL8r instruction.
>>> + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
>>> + TII.get(TargetOpcode::COPY), X86::AL)
>>> + .addReg(LHSReg, getKillRegState(LHSIsKill));
>>> + ResultReg = FastEmitInst_r(MULOpc[0], TLI.getRegClassFor(VT), RHSReg,
>>> + RHSIsKill);
>>> + } else
>>> + ResultReg = FastEmitInst_rr(MULOpc[VT.SimpleTy-MVT::i8],
>>> + TLI.getRegClassFor(VT), LHSReg, LHSIsKill,
>>> + RHSReg, RHSIsKill);
>>> }
>>>
>>> if (!ResultReg)
>>>
>>> Modified: llvm/trunk/test/CodeGen/X86/xaluo.ll
>>> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/xaluo.ll?rev=212493&r1=212492&r2=212493&view=diff
>>> ==============================================================================
>>> --- llvm/trunk/test/CodeGen/X86/xaluo.ll (original)
>>> +++ llvm/trunk/test/CodeGen/X86/xaluo.ll Mon Jul 7 16:52:21 2014
>>> @@ -261,6 +261,34 @@ entry:
>>> }
>>>
>>> ; SMULO
>>> +define zeroext i1 @smulo.i8(i8 %v1, i8 %v2, i8* %res) {
>>> +entry:
>>> +; FAST-LABEL: smulo.i8
>>> +; FAST: movb %dil, %al
>>> +; FAST-NEXT: imulb %sil
>>> +; FAST-NEXT: seto %cl
>>> + %t = call {i8, i1} @llvm.smul.with.overflow.i8(i8 %v1, i8 %v2)
>>> + %val = extractvalue {i8, i1} %t, 0
>>> + %obit = extractvalue {i8, i1} %t, 1
>>> + store i8 %val, i8* %res
>>> + ret i1 %obit
>>> +}
>>> +
>>> +define zeroext i1 @smulo.i16(i16 %v1, i16 %v2, i16* %res) {
>>> +entry:
>>> +; DAG-LABEL: smulo.i16
>>> +; DAG: imulw %si, %di
>>> +; DAG-NEXT: seto %al
>>> +; FAST-LABEL: smulo.i16
>>> +; FAST: imulw %si, %di
>>> +; FAST-NEXT: seto %al
>>> + %t = call {i16, i1} @llvm.smul.with.overflow.i16(i16 %v1, i16 %v2)
>>> + %val = extractvalue {i16, i1} %t, 0
>>> + %obit = extractvalue {i16, i1} %t, 1
>>> + store i16 %val, i16* %res
>>> + ret i1 %obit
>>> +}
>>> +
>>> define zeroext i1 @smulo.i32(i32 %v1, i32 %v2, i32* %res) {
>>> entry:
>>> ; DAG-LABEL: smulo.i32
>>> @@ -292,6 +320,34 @@ entry:
>>> }
>>>
>>> ; UMULO
>>> +define zeroext i1 @umulo.i8(i8 %v1, i8 %v2, i8* %res) {
>>> +entry:
>>> +; FAST-LABEL: umulo.i8
>>> +; FAST: movb %dil, %al
>>> +; FAST-NEXT: mulb %sil
>>> +; FAST-NEXT: seto %cl
>>> + %t = call {i8, i1} @llvm.umul.with.overflow.i8(i8 %v1, i8 %v2)
>>> + %val = extractvalue {i8, i1} %t, 0
>>> + %obit = extractvalue {i8, i1} %t, 1
>>> + store i8 %val, i8* %res
>>> + ret i1 %obit
>>> +}
>>> +
>>> +define zeroext i1 @umulo.i16(i16 %v1, i16 %v2, i16* %res) {
>>> +entry:
>>> +; DAG-LABEL: umulo.i16
>>> +; DAG: mulw %si
>>> +; DAG-NEXT: seto
>>> +; FAST-LABEL: umulo.i16
>>> +; FAST: mulw %si
>>> +; FAST-NEXT: seto
>>> + %t = call {i16, i1} @llvm.umul.with.overflow.i16(i16 %v1, i16 %v2)
>>> + %val = extractvalue {i16, i1} %t, 0
>>> + %obit = extractvalue {i16, i1} %t, 1
>>> + store i16 %val, i16* %res
>>> + ret i1 %obit
>>> +}
>>> +
>>> define zeroext i1 @umulo.i32(i32 %v1, i32 %v2, i32* %res) {
>>> entry:
>>> ; DAG-LABEL: umulo.i32
>>> @@ -665,7 +721,7 @@ continue:
>>> ret i1 true
>>> }
>>>
>>> -declare {i8, i1} @llvm.sadd.with.overflow.i8(i8, i8) nounwind readnone
>>> +declare {i8, i1} @llvm.sadd.with.overflow.i8 (i8, i8 ) nounwind readnone
>>> declare {i16, i1} @llvm.sadd.with.overflow.i16(i16, i16) nounwind readnone
>>> declare {i32, i1} @llvm.sadd.with.overflow.i32(i32, i32) nounwind readnone
>>> declare {i64, i1} @llvm.sadd.with.overflow.i64(i64, i64) nounwind readnone
>>> @@ -675,8 +731,12 @@ declare {i32, i1} @llvm.ssub.with.overfl
>>> declare {i64, i1} @llvm.ssub.with.overflow.i64(i64, i64) nounwind readnone
>>> declare {i32, i1} @llvm.usub.with.overflow.i32(i32, i32) nounwind readnone
>>> declare {i64, i1} @llvm.usub.with.overflow.i64(i64, i64) nounwind readnone
>>> +declare {i8, i1} @llvm.smul.with.overflow.i8 (i8, i8 ) nounwind readnone
>>> +declare {i16, i1} @llvm.smul.with.overflow.i16(i16, i16) nounwind readnone
>>> declare {i32, i1} @llvm.smul.with.overflow.i32(i32, i32) nounwind readnone
>>> declare {i64, i1} @llvm.smul.with.overflow.i64(i64, i64) nounwind readnone
>>> +declare {i8, i1} @llvm.umul.with.overflow.i8 (i8, i8 ) nounwind readnone
>>> +declare {i16, i1} @llvm.umul.with.overflow.i16(i16, i16) nounwind readnone
>>> declare {i32, i1} @llvm.umul.with.overflow.i32(i32, i32) nounwind readnone
>>> declare {i64, i1} @llvm.umul.with.overflow.i64(i64, i64) nounwind readnone
>>>
>>>
>>>
>>> _______________________________________________
>>> llvm-commits mailing list
>>> llvm-commits at cs.uiuc.edu
>>> http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits
>
More information about the llvm-commits
mailing list