[llvm] r357743 - [X86] Promote i16 SRA instructions to i32

Thu Apr 4 23:32:51 PDT 2019

Author: ctopper
Date: Thu Apr  4 23:32:50 2019
New Revision: 357743

URL: http://llvm.org/viewvc/llvm-project?rev=357743&view=rev
Log:
[X86] Promote i16 SRA instructions to i32

We already promote SRL and SHL to i32.

This will introduce sign extends sometimes which might be harder to deal with than the zero we use for promoting SRL. I ran this through some of our internal benchmark lists and didn't see any major regressions.

I think there might be some DAG combine improvement opportunities in the test changes here.

Differential Revision: https://reviews.llvm.org/D60278

Modified:
    llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
    llvm/trunk/test/CodeGen/X86/dagcombine-shifts.ll
    llvm/trunk/test/CodeGen/X86/iabs.ll
    llvm/trunk/test/CodeGen/X86/load-scalar-as-vector.ll
    llvm/trunk/test/CodeGen/X86/pr32420.ll
    llvm/trunk/test/CodeGen/X86/speculative-load-hardening.ll

Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=357743&r1=357742&r2=357743&view=diff
==============================================================================

--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Thu Apr  4 23:32:50 2019
@@ -42796,6 +42796,7 @@ bool X86TargetLowering::isTypeDesirableF
     case ISD::ZERO_EXTEND:
     case ISD::ANY_EXTEND:
     case ISD::SHL:
+    case ISD::SRA:
     case ISD::SRL:
     case ISD::SUB:
     case ISD::ADD:
@@ -42871,6 +42872,7 @@ bool X86TargetLowering::IsDesirableToPro
   case ISD::ANY_EXTEND:
     break;
   case ISD::SHL:
+  case ISD::SRA:
   case ISD::SRL: {
     SDValue N0 = Op.getOperand(0);
     // Look out for (store (shl (load), x)).

Modified: llvm/trunk/test/CodeGen/X86/dagcombine-shifts.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/dagcombine-shifts.ll?rev=357743&r1=357742&r2=357743&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/dagcombine-shifts.ll (original)
+++ llvm/trunk/test/CodeGen/X86/dagcombine-shifts.ll Thu Apr  4 23:32:50 2019
@@ -108,8 +108,9 @@ entry:
 define i64 @fun8(i16 zeroext %v) {
 ; CHECK-LABEL: fun8:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    sarw $4, %di
-; CHECK-NEXT:    movzwl %di, %eax
+; CHECK-NEXT:    movswl %di, %eax
+; CHECK-NEXT:    shrl $4, %eax
+; CHECK-NEXT:    movzwl %ax, %eax
 ; CHECK-NEXT:    shlq $4, %rax
 ; CHECK-NEXT:    retq
 entry:

Modified: llvm/trunk/test/CodeGen/X86/iabs.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/iabs.ll?rev=357743&r1=357742&r2=357743&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/iabs.ll (original)
+++ llvm/trunk/test/CodeGen/X86/iabs.ll Thu Apr  4 23:32:50 2019
@@ -37,9 +37,9 @@ define i8 @test_i8(i8 %a) nounwind {
 define i16 @test_i16(i16 %a) nounwind {
 ; X86-NO-CMOV-LABEL: test_i16:
 ; X86-NO-CMOV:       # %bb.0:
-; X86-NO-CMOV-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NO-CMOV-NEXT:    movswl {{[0-9]+}}(%esp), %eax
 ; X86-NO-CMOV-NEXT:    movl %eax, %ecx
-; X86-NO-CMOV-NEXT:    sarw $15, %cx
+; X86-NO-CMOV-NEXT:    sarl $15, %ecx
 ; X86-NO-CMOV-NEXT:    addl %ecx, %eax
 ; X86-NO-CMOV-NEXT:    xorl %ecx, %eax
 ; X86-NO-CMOV-NEXT:    # kill: def $ax killed $ax killed $eax

Modified: llvm/trunk/test/CodeGen/X86/load-scalar-as-vector.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/load-scalar-as-vector.ll?rev=357743&r1=357742&r2=357743&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/load-scalar-as-vector.ll (original)
+++ llvm/trunk/test/CodeGen/X86/load-scalar-as-vector.ll Thu Apr  4 23:32:50 2019
@@ -297,16 +297,16 @@ define <8 x i16> @ashr_op0_constant(i16*
 ; SSE-LABEL: ashr_op0_constant:
 ; SSE:       # %bb.0:
 ; SSE-NEXT:    movb (%rdi), %cl
-; SSE-NEXT:    movw $-42, %ax
-; SSE-NEXT:    sarw %cl, %ax
+; SSE-NEXT:    movl $-42, %eax
+; SSE-NEXT:    sarl %cl, %eax
 ; SSE-NEXT:    movd %eax, %xmm0
 ; SSE-NEXT:    retq
 ;
 ; AVX-LABEL: ashr_op0_constant:
 ; AVX:       # %bb.0:
 ; AVX-NEXT:    movb (%rdi), %cl
-; AVX-NEXT:    movw $-42, %ax
-; AVX-NEXT:    sarw %cl, %ax
+; AVX-NEXT:    movl $-42, %eax
+; AVX-NEXT:    sarl %cl, %eax
 ; AVX-NEXT:    vmovd %eax, %xmm0
 ; AVX-NEXT:    retq
   %x = load i16, i16* %p
@@ -318,15 +318,15 @@ define <8 x i16> @ashr_op0_constant(i16*
 define <8 x i16> @ashr_op1_constant(i16* %p) nounwind {
 ; SSE-LABEL: ashr_op1_constant:
 ; SSE:       # %bb.0:
-; SSE-NEXT:    movzwl (%rdi), %eax
-; SSE-NEXT:    sarw $7, %ax
+; SSE-NEXT:    movswl (%rdi), %eax
+; SSE-NEXT:    sarl $7, %eax
 ; SSE-NEXT:    movd %eax, %xmm0
 ; SSE-NEXT:    retq
 ;
 ; AVX-LABEL: ashr_op1_constant:
 ; AVX:       # %bb.0:
-; AVX-NEXT:    movzwl (%rdi), %eax
-; AVX-NEXT:    sarw $7, %ax
+; AVX-NEXT:    movswl (%rdi), %eax
+; AVX-NEXT:    sarl $7, %eax
 ; AVX-NEXT:    vmovd %eax, %xmm0
 ; AVX-NEXT:    retq
   %x = load i16, i16* %p
@@ -365,10 +365,11 @@ define <8 x i16> @sdiv_op1_constant(i16*
 ; SSE-NEXT:    shrl $16, %ecx
 ; SSE-NEXT:    addl %eax, %ecx
 ; SSE-NEXT:    movzwl %cx, %eax
-; SSE-NEXT:    sarw $5, %cx
+; SSE-NEXT:    movswl %ax, %ecx
 ; SSE-NEXT:    shrl $15, %eax
-; SSE-NEXT:    addl %ecx, %eax
-; SSE-NEXT:    movd %eax, %xmm0
+; SSE-NEXT:    sarl $5, %ecx
+; SSE-NEXT:    addl %eax, %ecx
+; SSE-NEXT:    movd %ecx, %xmm0
 ; SSE-NEXT:    retq
 ;
 ; AVX-LABEL: sdiv_op1_constant:
@@ -378,10 +379,11 @@ define <8 x i16> @sdiv_op1_constant(i16*
 ; AVX-NEXT:    shrl $16, %ecx
 ; AVX-NEXT:    addl %eax, %ecx
 ; AVX-NEXT:    movzwl %cx, %eax
-; AVX-NEXT:    sarw $5, %cx
+; AVX-NEXT:    movswl %ax, %ecx
 ; AVX-NEXT:    shrl $15, %eax
-; AVX-NEXT:    addl %ecx, %eax
-; AVX-NEXT:    vmovd %eax, %xmm0
+; AVX-NEXT:    sarl $5, %ecx
+; AVX-NEXT:    addl %eax, %ecx
+; AVX-NEXT:    vmovd %ecx, %xmm0
 ; AVX-NEXT:    retq
   %x = load i16, i16* %p
   %b = sdiv i16 %x, 42

Modified: llvm/trunk/test/CodeGen/X86/pr32420.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/pr32420.ll?rev=357743&r1=357742&r2=357743&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/pr32420.ll (original)
+++ llvm/trunk/test/CodeGen/X86/pr32420.ll Thu Apr  4 23:32:50 2019
@@ -14,12 +14,14 @@ define i32 @PR32420() {
 ; CHECK-NEXT:    movzwl (%rcx), %eax
 ; CHECK-NEXT:    movl %eax, %edx
 ; CHECK-NEXT:    shll $12, %edx
-; CHECK-NEXT:    sarw $12, %dx
+; CHECK-NEXT:    movswl %dx, %edx
+; CHECK-NEXT:    shrl $12, %edx
 ; CHECK-NEXT:    movq _b@{{.*}}(%rip), %rsi
 ; CHECK-NEXT:    orw (%rsi), %dx
 ; CHECK-NEXT:    movl (%rcx), %ecx
 ; CHECK-NEXT:    shll $12, %ecx
-; CHECK-NEXT:    sarw $12, %cx
+; CHECK-NEXT:    movswl %cx, %ecx
+; CHECK-NEXT:    shrl $12, %ecx
 ; CHECK-NEXT:    andl %edx, %ecx
 ; CHECK-NEXT:    movw %cx, (%rsi)
 ; CHECK-NEXT:    retq

Modified: llvm/trunk/test/CodeGen/X86/speculative-load-hardening.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/speculative-load-hardening.ll?rev=357743&r1=357742&r2=357743&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/speculative-load-hardening.ll (original)
+++ llvm/trunk/test/CodeGen/X86/speculative-load-hardening.ll Thu Apr  4 23:32:50 2019
@@ -1045,10 +1045,10 @@ define void @test_deferred_hardening(i32
 ; X64-NEXT:    sarq $63, %rax
 ; X64-NEXT:    cmpq $.Lslh_ret_addr23, %rcx
 ; X64-NEXT:    cmovneq %r15, %rax
-; X64-NEXT:    movzwl (%rbx), %ecx
-; X64-NEXT:    sarw $7, %cx
-; X64-NEXT:    movzwl %cx, %edi
+; X64-NEXT:    movswl (%rbx), %edi
+; X64-NEXT:    shrl $7, %edi
 ; X64-NEXT:    notl %edi
+; X64-NEXT:    orl $-65536, %edi # imm = 0xFFFF0000
 ; X64-NEXT:    orl %eax, %edi
 ; X64-NEXT:    shlq $47, %rax
 ; X64-NEXT:    orq %rax, %rsp
@@ -1098,10 +1098,10 @@ define void @test_deferred_hardening(i32
 ; X64-LFENCE-NEXT:    movl (%rbx), %edi
 ; X64-LFENCE-NEXT:    shll $7, %edi
 ; X64-LFENCE-NEXT:    callq sink
-; X64-LFENCE-NEXT:    movzwl (%rbx), %eax
-; X64-LFENCE-NEXT:    sarw $7, %ax
-; X64-LFENCE-NEXT:    movzwl %ax, %edi
+; X64-LFENCE-NEXT:    movswl (%rbx), %edi
+; X64-LFENCE-NEXT:    shrl $7, %edi
 ; X64-LFENCE-NEXT:    notl %edi
+; X64-LFENCE-NEXT:    orl $-65536, %edi # imm = 0xFFFF0000
 ; X64-LFENCE-NEXT:    callq sink
 ; X64-LFENCE-NEXT:    movzwl (%rbx), %eax
 ; X64-LFENCE-NEXT:    rolw $9, %ax