[llvm-commits] [llvm] r54223 - in /llvm/trunk: lib/Target/X86/X86Instr64bit.td lib/Target/X86/X86InstrInfo.td test/CodeGen/X86/zext-inreg-0.ll test/CodeGen/X86/zext-inreg-1.ll test/CodeGen/X86/zext-inreg-2.ll

Thu Jul 31 17:39:50 PDT 2008

Can we handle the them the same way as truncate by inserting  
MOV16to16_ or MOV32to32_?

Evan

On Jul 30, 2008, at 11:09 AM, Dan Gohman wrote:

> Author: djg
> Date: Wed Jul 30 13:09:17 2008
> New Revision: 54223
>
> URL: http://llvm.org/viewvc/llvm-project?rev=54223&view=rev
> Log:
> Reapply r54147 with a constraint to only use the 8-bit
> subreg form on x86-64, to avoid the problem with x86-32
> having GPRs that don't have 8-bit subregs.
>
> Also, change several 16-bit instructions to use
> equivalent 32-bit instructions. These have a smaller
> encoding and avoid partial-register updates.
>
> Added:
>    llvm/trunk/test/CodeGen/X86/zext-inreg-0.ll
>    llvm/trunk/test/CodeGen/X86/zext-inreg-1.ll
>    llvm/trunk/test/CodeGen/X86/zext-inreg-2.ll
> Modified:
>    llvm/trunk/lib/Target/X86/X86Instr64bit.td
>    llvm/trunk/lib/Target/X86/X86InstrInfo.td
>
> Modified: llvm/trunk/lib/Target/X86/X86Instr64bit.td
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86Instr64bit.td?rev=54223&r1=54222&r2=54223&view=diff
>
> = 
> = 
> = 
> = 
> = 
> = 
> = 
> = 
> ======================================================================
> --- llvm/trunk/lib/Target/X86/X86Instr64bit.td (original)
> +++ llvm/trunk/lib/Target/X86/X86Instr64bit.td Wed Jul 30 13:09:17  
> 2008
> @@ -241,18 +241,22 @@
>                     "movs{lq|xd}\t{$src, $dst|$dst, $src}",
>                     [(set GR64:$dst, (sextloadi64i32 addr:$src))]>;
>
> -def MOVZX64rr8 : RI<0xB6, MRMSrcReg, (outs GR64:$dst), (ins GR8 : 
> $src),
> -                    "movz{bq|x}\t{$src, $dst|$dst, $src}",
> -                    [(set GR64:$dst, (zext GR8:$src))]>, TB;
> -def MOVZX64rm8 : RI<0xB6, MRMSrcMem, (outs GR64:$dst), (ins i8mem : 
> $src),
> -                    "movz{bq|x}\t{$src, $dst|$dst, $src}",
> -                    [(set GR64:$dst, (zextloadi64i8 addr:$src))]>,  
> TB;
> -def MOVZX64rr16: RI<0xB7, MRMSrcReg, (outs GR64:$dst), (ins  
> GR16:$src),
> -                    "movz{wq|x}\t{$src, $dst|$dst, $src}",
> -                    [(set GR64:$dst, (zext GR16:$src))]>, TB;
> -def MOVZX64rm16: RI<0xB7, MRMSrcMem, (outs GR64:$dst), (ins i16mem: 
> $src),
> -                    "movz{wq|x}\t{$src, $dst|$dst, $src}",
> -                    [(set GR64:$dst, (zextloadi64i16 addr:$src))]>,  
> TB;
> +// Use movzbl instead of movzbq when the destination is a register;  
> it's
> +// equivalent due to implicit zero-extending, and it has a smaller  
> encoding.
> +def MOVZX64rr8 : I<0xB6, MRMSrcReg, (outs GR64:$dst), (ins GR8 : 
> $src),
> +                   "movz{bl|x}\t{$src, ${dst:subreg32}|$ 
> {dst:subreg32}, $src}",
> +                   [(set GR64:$dst, (zext GR8:$src))]>, TB;
> +def MOVZX64rm8 : I<0xB6, MRMSrcMem, (outs GR64:$dst), (ins i8mem : 
> $src),
> +                   "movz{bl|x}\t{$src, ${dst:subreg32}|$ 
> {dst:subreg32}, $src}",
> +                   [(set GR64:$dst, (zextloadi64i8 addr:$src))]>, TB;
> +// Use movzwl instead of movzwq when the destination is a register;  
> it's
> +// equivalent due to implicit zero-extending, and it has a smaller  
> encoding.
> +def MOVZX64rr16: I<0xB7, MRMSrcReg, (outs GR64:$dst), (ins  
> GR16:$src),
> +                   "movz{wl|x}\t{$src, ${dst:subreg32}|$ 
> {dst:subreg32}, $src}",
> +                   [(set GR64:$dst, (zext GR16:$src))]>, TB;
> +def MOVZX64rm16: I<0xB7, MRMSrcMem, (outs GR64:$dst), (ins i16mem: 
> $src),
> +                   "movz{wl|x}\t{$src, ${dst:subreg32}|$ 
> {dst:subreg32}, $src}",
> +                   [(set GR64:$dst, (zextloadi64i16 addr:$src))]>,  
> TB;
>
> let neverHasSideEffects = 1 in {
>   let Defs = [RAX], Uses = [EAX] in
> @@ -1093,9 +1097,9 @@
> // when we have a better way to specify isel priority.
> let Defs = [EFLAGS], AddedComplexity = 1,
>     isReMaterializable = 1, isAsCheapAsAMove = 1 in
> -def MOV64r0  : RI<0x31, MRMInitReg,  (outs GR64:$dst), (ins),
> -                 "xor{l}\t${dst:subreg32}, ${dst:subreg32}",
> -                 [(set GR64:$dst, 0)]>;
> +def MOV64r0  : I<0x31, MRMInitReg,  (outs GR64:$dst), (ins),
> +                "xor{l}\t${dst:subreg32}, ${dst:subreg32}",
> +                [(set GR64:$dst, 0)]>;
>
> // Materialize i64 constant where top 32-bits are zero.
> let AddedComplexity = 1, isReMaterializable = 1 in
> @@ -1240,6 +1244,26 @@
>           (SUBREG_TO_REG (i64 0),
>             (i32 (EXTRACT_SUBREG GR64:$src, x86_subreg_32bit)),
>             x86_subreg_32bit)>;
> +// r & (2^16-1) ==> movz
> +def : Pat<(and GR64:$src, 0xffff),
> +          (MOVZX64rr16 (i16 (EXTRACT_SUBREG GR64:$src,  
> x86_subreg_16bit)))>;
> +// r & (2^8-1) ==> movz
> +def : Pat<(and GR64:$src, 0xff),
> +          (MOVZX64rr8 (i8 (EXTRACT_SUBREG GR64:$src,  
> x86_subreg_8bit)))>;
> +
> +// TODO: The following two patterns could be adapted to apply to  
> x86-32, except
> +// that they'll need some way to deal with the fact that in x86-32  
> not all GPRs
> +// have 8-bit subregs. The GR32_ and GR16_ classes are a step in  
> this direction,
> +// but they aren't ready for this purpose yet.
> +
> +// r & (2^8-1) ==> movz
> +def : Pat<(and GR32:$src1, 0xff),
> +           (MOVZX32rr8 (i8 (EXTRACT_SUBREG GR32:$src1,  
> x86_subreg_8bit)))>,
> +      Requires<[In64BitMode]>;
> +// r & (2^8-1) ==> movz
> +def : Pat<(and GR16:$src1, 0xff),
> +           (MOVZX16rr8 (i8 (EXTRACT_SUBREG GR16:$src1,  
> x86_subreg_8bit)))>,
> +      Requires<[In64BitMode]>;
>
> // (shl x, 1) ==> (add x, x)
> def : Pat<(shl GR64:$src1, (i8 1)), (ADD64rr GR64:$src1, GR64:$src1)>;
>
> Modified: llvm/trunk/lib/Target/X86/X86InstrInfo.td
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrInfo.td?rev=54223&r1=54222&r2=54223&view=diff
>
> = 
> = 
> = 
> = 
> = 
> = 
> = 
> = 
> ======================================================================
> --- llvm/trunk/lib/Target/X86/X86InstrInfo.td (original)
> +++ llvm/trunk/lib/Target/X86/X86InstrInfo.td Wed Jul 30 13:09:17 2008
> @@ -2424,12 +2424,15 @@
> } // Defs = [EFLAGS]
>
> // Sign/Zero extenders
> +// Use movsbl intead of movsbw; we don't care about the high 16 bits
> +// of the register here. This has a smaller encoding and avoids a
> +// partial-register update.
> def MOVSX16rr8 : I<0xBE, MRMSrcReg, (outs GR16:$dst), (ins GR8 :$src),
> -                   "movs{bw|x}\t{$src, $dst|$dst, $src}",
> -                   [(set GR16:$dst, (sext GR8:$src))]>, TB, OpSize;
> +                   "movs{bl|x}\t{$src, ${dst:subreg32}|$ 
> {dst:subreg32}, $src}",
> +                   [(set GR16:$dst, (sext GR8:$src))]>, TB;
> def MOVSX16rm8 : I<0xBE, MRMSrcMem, (outs GR16:$dst), (ins i8mem : 
> $src),
> -                   "movs{bw|x}\t{$src, $dst|$dst, $src}",
> -                   [(set GR16:$dst, (sextloadi16i8 addr:$src))]>,  
> TB, OpSize;
> +                   "movs{bl|x}\t{$src, ${dst:subreg32}|$ 
> {dst:subreg32}, $src}",
> +                   [(set GR16:$dst, (sextloadi16i8 addr:$src))]>, TB;
> def MOVSX32rr8 : I<0xBE, MRMSrcReg, (outs GR32:$dst), (ins GR8 :$src),
>                    "movs{bl|x}\t{$src, $dst|$dst, $src}",
>                    [(set GR32:$dst, (sext GR8:$src))]>, TB;
> @@ -2443,12 +2446,15 @@
>                    "movs{wl|x}\t{$src, $dst|$dst, $src}",
>                    [(set GR32:$dst, (sextloadi32i16 addr:$src))]>, TB;
>
> +// Use movzbl intead of movzbw; we don't care about the high 16 bits
> +// of the register here. This has a smaller encoding and avoids a
> +// partial-register update.
> def MOVZX16rr8 : I<0xB6, MRMSrcReg, (outs GR16:$dst), (ins GR8 :$src),
> -                   "movz{bw|x}\t{$src, $dst|$dst, $src}",
> -                   [(set GR16:$dst, (zext GR8:$src))]>, TB, OpSize;
> +                   "movz{bl|x}\t{$src, ${dst:subreg32}|$ 
> {dst:subreg32}, $src}",
> +                   [(set GR16:$dst, (zext GR8:$src))]>, TB;
> def MOVZX16rm8 : I<0xB6, MRMSrcMem, (outs GR16:$dst), (ins i8mem : 
> $src),
> -                   "movz{bw|x}\t{$src, $dst|$dst, $src}",
> -                   [(set GR16:$dst, (zextloadi16i8 addr:$src))]>,  
> TB, OpSize;
> +                   "movz{bl|x}\t{$src, ${dst:subreg32}|$ 
> {dst:subreg32}, $src}",
> +                   [(set GR16:$dst, (zextloadi16i8 addr:$src))]>, TB;
> def MOVZX32rr8 : I<0xB6, MRMSrcReg, (outs GR32:$dst), (ins GR8 :$src),
>                    "movz{bl|x}\t{$src, $dst|$dst, $src}",
>                    [(set GR32:$dst, (zext GR8:$src))]>, TB;
> @@ -2488,9 +2494,11 @@
> def MOV8r0   : I<0x30, MRMInitReg, (outs GR8 :$dst), (ins),
>                  "xor{b}\t$dst, $dst",
>                  [(set GR8:$dst, 0)]>;
> +// Use xorl instead of xorw since we don't care about the high 16  
> bits,
> +// it's smaller, and it avoids a partial-register update.
> def MOV16r0  : I<0x31, MRMInitReg,  (outs GR16:$dst), (ins),
> -                 "xor{w}\t$dst, $dst",
> -                 [(set GR16:$dst, 0)]>, OpSize;
> +                 "xor{l}\t${dst:subreg32}, ${dst:subreg32}",
> +                 [(set GR16:$dst, 0)]>;
> def MOV32r0  : I<0x31, MRMInitReg,  (outs GR32:$dst), (ins),
>                  "xor{l}\t$dst, $dst",
>                  [(set GR32:$dst, 0)]>;
> @@ -2763,6 +2771,10 @@
> // Some peepholes
> // 
> = 
> = 
> = 
> ----------------------------------------------------------------------= 
> ==//
>
> +// r & (2^16-1) ==> movz
> +def : Pat<(and GR32:$src1, 0xffff),
> +           (MOVZX32rr16 (i16 (EXTRACT_SUBREG GR32:$src1,  
> x86_subreg_16bit)))>;
> +
> // (shl x, 1) ==> (add x, x)
> def : Pat<(shl GR8 :$src1, (i8 1)), (ADD8rr  GR8 :$src1, GR8 :$src1)>;
> def : Pat<(shl GR16:$src1, (i8 1)), (ADD16rr GR16:$src1, GR16:$src1)>;
>
> Added: llvm/trunk/test/CodeGen/X86/zext-inreg-0.ll
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/zext-inreg-0.ll?rev=54223&view=auto
>
> = 
> = 
> = 
> = 
> = 
> = 
> = 
> = 
> ======================================================================
> --- llvm/trunk/test/CodeGen/X86/zext-inreg-0.ll (added)
> +++ llvm/trunk/test/CodeGen/X86/zext-inreg-0.ll Wed Jul 30 13:09:17  
> 2008
> @@ -0,0 +1,51 @@
> +; RUN: llvm-as < %s | llc -march=x86 | not grep and
> +; RUN: llvm-as < %s | llc -march=x86-64 > %t
> +; RUN: not grep and %t
> +; RUN: not grep movzbq %t
> +; RUN: not grep movzwq %t
> +; RUN: not grep movzlq %t
> +
> +; These should use movzbl instead of 'and 255'.
> +; This related to not having a ZERO_EXTEND_REG opcode.
> +
> +define i32 @c(i32 %d) nounwind  {
> +        %e = add i32 %d, 1
> +        %retval = and i32 %e, 65535
> +        ret i32 %retval
> +}
> +define i64 @e(i64 %d) nounwind  {
> +        %e = add i64 %d, 1
> +        %retval = and i64 %e, 65535
> +        ret i64 %retval
> +}
> +define i64 @f(i64 %d) nounwind  {
> +        %e = add i64 %d, 1
> +        %retval = and i64 %e, 4294967295
> +        ret i64 %retval
> +}
> +
> +define i32 @g(i8 %d) nounwind  {
> +        %e = add i8 %d, 1
> +        %retval = zext i8 %e to i32
> +        ret i32 %retval
> +}
> +define i32 @h(i16 %d) nounwind  {
> +        %e = add i16 %d, 1
> +        %retval = zext i16 %e to i32
> +        ret i32 %retval
> +}
> +define i64 @i(i8 %d) nounwind  {
> +        %e = add i8 %d, 1
> +        %retval = zext i8 %e to i64
> +        ret i64 %retval
> +}
> +define i64 @j(i16 %d) nounwind  {
> +        %e = add i16 %d, 1
> +        %retval = zext i16 %e to i64
> +        ret i64 %retval
> +}
> +define i64 @k(i32 %d) nounwind  {
> +        %e = add i32 %d, 1
> +        %retval = zext i32 %e to i64
> +        ret i64 %retval
> +}
>
> Added: llvm/trunk/test/CodeGen/X86/zext-inreg-1.ll
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/zext-inreg-1.ll?rev=54223&view=auto
>
> = 
> = 
> = 
> = 
> = 
> = 
> = 
> = 
> ======================================================================
> --- llvm/trunk/test/CodeGen/X86/zext-inreg-1.ll (added)
> +++ llvm/trunk/test/CodeGen/X86/zext-inreg-1.ll Wed Jul 30 13:09:17  
> 2008
> @@ -0,0 +1,13 @@
> +; RUN: llvm-as < %s | llc -march=x86 | not grep and
> +
> +; These tests differ from the ones in zext-inreg-0.ll in that
> +; on x86-64 they do require and instructions.
> +
> +; These should use movzbl instead of 'and 255'.
> +; This related to not having ZERO_EXTEND_REG node.
> +
> +define i64 @h(i64 %d) nounwind  {
> +        %e = add i64 %d, 1
> +        %retval = and i64 %e, 281474976710655
> +        ret i64 %retval
> +}
>
> Added: llvm/trunk/test/CodeGen/X86/zext-inreg-2.ll
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/zext-inreg-2.ll?rev=54223&view=auto
>
> = 
> = 
> = 
> = 
> = 
> = 
> = 
> = 
> ======================================================================
> --- llvm/trunk/test/CodeGen/X86/zext-inreg-2.ll (added)
> +++ llvm/trunk/test/CodeGen/X86/zext-inreg-2.ll Wed Jul 30 13:09:17  
> 2008
> @@ -0,0 +1,28 @@
> +; RUN: llvm-as < %s | llc -march=x86-64 > %t
> +; RUN: not grep and %t
> +; RUN: not grep movzbq %t
> +; RUN: not grep movzwq %t
> +; RUN: not grep movzlq %t
> +
> +; These should use movzbl instead of 'and 255'.
> +; This related to not having a ZERO_EXTEND_REG opcode.
> +
> +; This test was split out of zext-inreg-0.ll because these
> +; cases don't yet work on x86-32 due to the 8-bit subreg
> +; issue.
> +
> +define i32 @a(i32 %d) nounwind  {
> +        %e = add i32 %d, 1
> +        %retval = and i32 %e, 255
> +        ret i32 %retval
> +}
> +define i32 @b(float %d) nounwind  {
> +        %tmp12 = fptoui float %d to i8
> +        %retval = zext i8 %tmp12 to i32
> +        ret i32 %retval
> +}
> +define i64 @d(i64 %d) nounwind  {
> +        %e = add i64 %d, 1
> +        %retval = and i64 %e, 255
> +        ret i64 %retval
> +}
>
>
> _______________________________________________
> llvm-commits mailing list
> llvm-commits at cs.uiuc.edu
> http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits