[llvm] r212493 - [FastISel][X86] Fix smul.with.overflow.i8 lowering.

Mon Jul 7 15:14:40 PDT 2014

I might be missing something, but why not just define the pattern?
imul supports i8.

-eric

On Mon, Jul 7, 2014 at 2:52 PM, Juergen Ributzka <juergen at apple.com> wrote:
> Author: ributzka
> Date: Mon Jul  7 16:52:21 2014
> New Revision: 212493
>
> URL: http://llvm.org/viewvc/llvm-project?rev=212493&view=rev
> Log:
> [FastISel][X86] Fix smul.with.overflow.i8 lowering.
>
> Add custom lowering code for signed multiply instruction selection, because the
> default FastISel instruction selection for ISD::MUL will use unsigned multiply
> for the i8 type and signed multiply for all other types. This would set the
> incorrect flags for the overflow check.
>
> This fixes <rdar://problem/17549300>
>
> Modified:
>     llvm/trunk/lib/Target/X86/X86FastISel.cpp
>     llvm/trunk/test/CodeGen/X86/xaluo.ll
>
> Modified: llvm/trunk/lib/Target/X86/X86FastISel.cpp
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86FastISel.cpp?rev=212493&r1=212492&r2=212493&view=diff
> ==============================================================================
> --- llvm/trunk/lib/Target/X86/X86FastISel.cpp (original)
> +++ llvm/trunk/lib/Target/X86/X86FastISel.cpp Mon Jul  7 16:52:21 2014
> @@ -2402,7 +2402,7 @@ bool X86FastISel::X86VisitIntrinsicCall(
>      case Intrinsic::usub_with_overflow:
>        BaseOpc = ISD::SUB; CondOpc = X86::SETBr; break;
>      case Intrinsic::smul_with_overflow:
> -      BaseOpc = ISD::MUL; CondOpc = X86::SETOr; break;
> +      BaseOpc = X86ISD::SMUL; CondOpc = X86::SETOr; break;
>      case Intrinsic::umul_with_overflow:
>        BaseOpc = X86ISD::UMUL; CondOpc = X86::SETOr; break;
>      }
> @@ -2430,10 +2430,11 @@ bool X86FastISel::X86VisitIntrinsicCall(
>                                RHSIsKill);
>      }
>
> -    // FastISel doesn't have a pattern for X86::MUL*r. Emit it manually.
> +    // FastISel doesn't have a pattern for all X86::MUL*r and X86::IMUL*r. Emit
> +    // it manually.
>      if (BaseOpc == X86ISD::UMUL && !ResultReg) {
>        static const unsigned MULOpc[] =
> -      { X86::MUL8r, X86::MUL16r, X86::MUL32r, X86::MUL64r };
> +        { X86::MUL8r, X86::MUL16r, X86::MUL32r, X86::MUL64r };
>        static const unsigned Reg[] = { X86::AL, X86::AX, X86::EAX, X86::RAX };
>        // First copy the first operand into RAX, which is an implicit input to
>        // the X86::MUL*r instruction.
> @@ -2442,6 +2443,21 @@ bool X86FastISel::X86VisitIntrinsicCall(
>          .addReg(LHSReg, getKillRegState(LHSIsKill));
>        ResultReg = FastEmitInst_r(MULOpc[VT.SimpleTy-MVT::i8],
>                                   TLI.getRegClassFor(VT), RHSReg, RHSIsKill);
> +    } else if (BaseOpc == X86ISD::SMUL && !ResultReg) {
> +      static const unsigned MULOpc[] =
> +        { X86::IMUL8r, X86::IMUL16rr, X86::IMUL32rr, X86::IMUL64rr };
> +      if (VT == MVT::i8) {
> +        // Copy the first operand into AL, which is an implicit input to the
> +        // X86::IMUL8r instruction.
> +        BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
> +               TII.get(TargetOpcode::COPY), X86::AL)
> +          .addReg(LHSReg, getKillRegState(LHSIsKill));
> +        ResultReg = FastEmitInst_r(MULOpc[0], TLI.getRegClassFor(VT), RHSReg,
> +                                   RHSIsKill);
> +      } else
> +        ResultReg = FastEmitInst_rr(MULOpc[VT.SimpleTy-MVT::i8],
> +                                    TLI.getRegClassFor(VT), LHSReg, LHSIsKill,
> +                                    RHSReg, RHSIsKill);
>      }
>
>      if (!ResultReg)
>
> Modified: llvm/trunk/test/CodeGen/X86/xaluo.ll
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/xaluo.ll?rev=212493&r1=212492&r2=212493&view=diff
> ==============================================================================
> --- llvm/trunk/test/CodeGen/X86/xaluo.ll (original)
> +++ llvm/trunk/test/CodeGen/X86/xaluo.ll Mon Jul  7 16:52:21 2014
> @@ -261,6 +261,34 @@ entry:
>  }
>
>  ; SMULO
> +define zeroext i1 @smulo.i8(i8 %v1, i8 %v2, i8* %res) {
> +entry:
> +; FAST-LABEL:   smulo.i8
> +; FAST:         movb %dil, %al
> +; FAST-NEXT:    imulb %sil
> +; FAST-NEXT:    seto %cl
> +  %t = call {i8, i1} @llvm.smul.with.overflow.i8(i8 %v1, i8 %v2)
> +  %val = extractvalue {i8, i1} %t, 0
> +  %obit = extractvalue {i8, i1} %t, 1
> +  store i8 %val, i8* %res
> +  ret i1 %obit
> +}
> +
> +define zeroext i1 @smulo.i16(i16 %v1, i16 %v2, i16* %res) {
> +entry:
> +; DAG-LABEL:    smulo.i16
> +; DAG:          imulw %si, %di
> +; DAG-NEXT:     seto %al
> +; FAST-LABEL:   smulo.i16
> +; FAST:         imulw %si, %di
> +; FAST-NEXT:    seto %al
> +  %t = call {i16, i1} @llvm.smul.with.overflow.i16(i16 %v1, i16 %v2)
> +  %val = extractvalue {i16, i1} %t, 0
> +  %obit = extractvalue {i16, i1} %t, 1
> +  store i16 %val, i16* %res
> +  ret i1 %obit
> +}
> +
>  define zeroext i1 @smulo.i32(i32 %v1, i32 %v2, i32* %res) {
>  entry:
>  ; DAG-LABEL:    smulo.i32
> @@ -292,6 +320,34 @@ entry:
>  }
>
>  ; UMULO
> +define zeroext i1 @umulo.i8(i8 %v1, i8 %v2, i8* %res) {
> +entry:
> +; FAST-LABEL:   umulo.i8
> +; FAST:         movb %dil, %al
> +; FAST-NEXT:    mulb %sil
> +; FAST-NEXT:    seto %cl
> +  %t = call {i8, i1} @llvm.umul.with.overflow.i8(i8 %v1, i8 %v2)
> +  %val = extractvalue {i8, i1} %t, 0
> +  %obit = extractvalue {i8, i1} %t, 1
> +  store i8 %val, i8* %res
> +  ret i1 %obit
> +}
> +
> +define zeroext i1 @umulo.i16(i16 %v1, i16 %v2, i16* %res) {
> +entry:
> +; DAG-LABEL:    umulo.i16
> +; DAG:          mulw %si
> +; DAG-NEXT:     seto
> +; FAST-LABEL:   umulo.i16
> +; FAST:         mulw %si
> +; FAST-NEXT:    seto
> +  %t = call {i16, i1} @llvm.umul.with.overflow.i16(i16 %v1, i16 %v2)
> +  %val = extractvalue {i16, i1} %t, 0
> +  %obit = extractvalue {i16, i1} %t, 1
> +  store i16 %val, i16* %res
> +  ret i1 %obit
> +}
> +
>  define zeroext i1 @umulo.i32(i32 %v1, i32 %v2, i32* %res) {
>  entry:
>  ; DAG-LABEL:    umulo.i32
> @@ -665,7 +721,7 @@ continue:
>    ret i1 true
>  }
>
> -declare {i8, i1} @llvm.sadd.with.overflow.i8(i8, i8) nounwind readnone
> +declare {i8,  i1} @llvm.sadd.with.overflow.i8 (i8,  i8 ) nounwind readnone
>  declare {i16, i1} @llvm.sadd.with.overflow.i16(i16, i16) nounwind readnone
>  declare {i32, i1} @llvm.sadd.with.overflow.i32(i32, i32) nounwind readnone
>  declare {i64, i1} @llvm.sadd.with.overflow.i64(i64, i64) nounwind readnone
> @@ -675,8 +731,12 @@ declare {i32, i1} @llvm.ssub.with.overfl
>  declare {i64, i1} @llvm.ssub.with.overflow.i64(i64, i64) nounwind readnone
>  declare {i32, i1} @llvm.usub.with.overflow.i32(i32, i32) nounwind readnone
>  declare {i64, i1} @llvm.usub.with.overflow.i64(i64, i64) nounwind readnone
> +declare {i8,  i1} @llvm.smul.with.overflow.i8 (i8,  i8 ) nounwind readnone
> +declare {i16, i1} @llvm.smul.with.overflow.i16(i16, i16) nounwind readnone
>  declare {i32, i1} @llvm.smul.with.overflow.i32(i32, i32) nounwind readnone
>  declare {i64, i1} @llvm.smul.with.overflow.i64(i64, i64) nounwind readnone
> +declare {i8,  i1} @llvm.umul.with.overflow.i8 (i8,  i8 ) nounwind readnone
> +declare {i16, i1} @llvm.umul.with.overflow.i16(i16, i16) nounwind readnone
>  declare {i32, i1} @llvm.umul.with.overflow.i32(i32, i32) nounwind readnone
>  declare {i64, i1} @llvm.umul.with.overflow.i64(i64, i64) nounwind readnone
>
>
>
> _______________________________________________
> llvm-commits mailing list
> llvm-commits at cs.uiuc.edu
> http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits