[llvm] r357912 - [x86] make 8-bit shl undesirable
Sanjay Patel via llvm-commits
llvm-commits at lists.llvm.org
Mon Apr 8 06:58:50 PDT 2019
Author: spatel
Date: Mon Apr 8 06:58:50 2019
New Revision: 357912
URL: http://llvm.org/viewvc/llvm-project?rev=357912&view=rev
Log:
[x86] make 8-bit shl undesirable
I was looking at a potential DAGCombiner fix for 1 of the regressions in D60278, and it caused severe regression test pain because x86 TLI lies about the desirability of 8-bit shift ops.
We've hinted at making all 8-bit ops undesirable for the reason in the code comment:
// TODO: Almost no 8-bit ops are desirable because they have no actual
// size/speed advantages vs. 32-bit ops, but they do have a major
// potential disadvantage by causing partial register stalls.
...but that leads to massive diffs and exposes all kinds of optimization holes itself.
Differential Revision: https://reviews.llvm.org/D60286
Modified:
llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
llvm/trunk/test/CodeGen/X86/bt.ll
llvm/trunk/test/CodeGen/X86/btc_bts_btr.ll
llvm/trunk/test/CodeGen/X86/rotate4.ll
llvm/trunk/test/CodeGen/X86/scheduler-backtracking.ll
llvm/trunk/test/CodeGen/X86/select_const.ll
Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=357912&r1=357911&r2=357912&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Mon Apr 8 06:58:50 2019
@@ -42798,11 +42798,15 @@ bool X86TargetLowering::isTypeDesirableF
if (Opc == ISD::SHL && VT.isVector() && VT.getVectorElementType() == MVT::i8)
return false;
- // 8-bit multiply is probably not much cheaper than 32-bit multiply, and
- // we have specializations to turn 32-bit multiply into LEA or other ops.
+ // TODO: Almost no 8-bit ops are desirable because they have no actual
+ // size/speed advantages vs. 32-bit ops, but they do have a major
+ // potential disadvantage by causing partial register stalls.
+ //
+ // 8-bit multiply/shl is probably not cheaper than 32-bit multiply/shl, and
+ // we have specializations to turn 32-bit multiply/shl into LEA or other ops.
// Also, see the comment in "IsDesirableToPromoteOp" - where we additionally
// check for a constant operand to the multiply.
- if (Opc == ISD::MUL && VT == MVT::i8)
+ if ((Opc == ISD::MUL || Opc == ISD::SHL) && VT == MVT::i8)
return false;
// i16 instruction encodings are longer and some i16 instructions are slow,
Modified: llvm/trunk/test/CodeGen/X86/bt.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/bt.ll?rev=357912&r1=357911&r2=357912&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/bt.ll (original)
+++ llvm/trunk/test/CodeGen/X86/bt.ll Mon Apr 8 06:58:50 2019
@@ -1150,19 +1150,18 @@ define void @demanded_i32(i32* nocapture
define zeroext i1 @demanded_with_known_zeroes(i32 %bit, i32 %bits) {
; X86-LABEL: demanded_with_known_zeroes:
; X86: # %bb.0: # %entry
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movb {{[0-9]+}}(%esp), %cl
-; X86-NEXT: shlb $2, %cl
-; X86-NEXT: movzbl %cl, %ecx
-; X86-NEXT: btl %ecx, %eax
+; X86-NEXT: movb {{[0-9]+}}(%esp), %al
+; X86-NEXT: shlb $2, %al
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movzbl %al, %eax
+; X86-NEXT: btl %eax, %ecx
; X86-NEXT: setb %al
; X86-NEXT: retl
;
; X64-LABEL: demanded_with_known_zeroes:
; X64: # %bb.0: # %entry
-; X64-NEXT: shlb $2, %dil
-; X64-NEXT: movzbl %dil, %eax
-; X64-NEXT: btl %eax, %esi
+; X64-NEXT: shll $2, %edi
+; X64-NEXT: btl %edi, %esi
; X64-NEXT: setb %al
; X64-NEXT: retq
entry:
Modified: llvm/trunk/test/CodeGen/X86/btc_bts_btr.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/btc_bts_btr.ll?rev=357912&r1=357911&r2=357912&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/btc_bts_btr.ll (original)
+++ llvm/trunk/test/CodeGen/X86/btc_bts_btr.ll Mon Apr 8 06:58:50 2019
@@ -954,15 +954,15 @@ define i32 @btr_32_mask_zeros(i32 %x, i3
; X64-LABEL: btr_32_mask_zeros:
; X64: # %bb.0:
; X64-NEXT: movl %edi, %eax
-; X64-NEXT: shlb $2, %sil
+; X64-NEXT: shll $2, %esi
; X64-NEXT: btrl %esi, %eax
; X64-NEXT: retq
;
; X86-LABEL: btr_32_mask_zeros:
; X86: # %bb.0:
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movb {{[0-9]+}}(%esp), %cl
; X86-NEXT: shlb $2, %cl
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: btrl %ecx, %eax
; X86-NEXT: retl
%1 = shl i32 %n, 2
@@ -977,15 +977,15 @@ define i32 @bts_32_mask_zeros(i32 %x, i3
; X64-LABEL: bts_32_mask_zeros:
; X64: # %bb.0:
; X64-NEXT: movl %edi, %eax
-; X64-NEXT: shlb $2, %sil
+; X64-NEXT: shll $2, %esi
; X64-NEXT: btsl %esi, %eax
; X64-NEXT: retq
;
; X86-LABEL: bts_32_mask_zeros:
; X86: # %bb.0:
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movb {{[0-9]+}}(%esp), %cl
; X86-NEXT: shlb $2, %cl
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: btsl %ecx, %eax
; X86-NEXT: retl
%1 = shl i32 %n, 2
@@ -999,15 +999,15 @@ define i32 @btc_32_mask_zeros(i32 %x, i3
; X64-LABEL: btc_32_mask_zeros:
; X64: # %bb.0:
; X64-NEXT: movl %edi, %eax
-; X64-NEXT: shlb $2, %sil
+; X64-NEXT: shll $2, %esi
; X64-NEXT: btcl %esi, %eax
; X64-NEXT: retq
;
; X86-LABEL: btc_32_mask_zeros:
; X86: # %bb.0:
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movb {{[0-9]+}}(%esp), %cl
; X86-NEXT: shlb $2, %cl
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: btcl %ecx, %eax
; X86-NEXT: retl
%1 = shl i32 %n, 2
@@ -1021,14 +1021,14 @@ define i64 @btr_64_mask_zeros(i64 %x, i6
; X64-LABEL: btr_64_mask_zeros:
; X64: # %bb.0:
; X64-NEXT: movq %rdi, %rax
-; X64-NEXT: shlb $2, %sil
+; X64-NEXT: shlq $2, %rsi
; X64-NEXT: btrq %rsi, %rax
; X64-NEXT: retq
;
; X86-LABEL: btr_64_mask_zeros:
; X86: # %bb.0:
-; X86-NEXT: movb {{[0-9]+}}(%esp), %cl
-; X86-NEXT: shlb $2, %cl
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: shll $2, %ecx
; X86-NEXT: movl $1, %eax
; X86-NEXT: xorl %edx, %edx
; X86-NEXT: shldl %cl, %eax, %edx
@@ -1056,14 +1056,14 @@ define i64 @bts_64_mask_zeros(i64 %x, i6
; X64-LABEL: bts_64_mask_zeros:
; X64: # %bb.0:
; X64-NEXT: movq %rdi, %rax
-; X64-NEXT: shlb $2, %sil
+; X64-NEXT: shlq $2, %rsi
; X64-NEXT: btsq %rsi, %rax
; X64-NEXT: retq
;
; X86-LABEL: bts_64_mask_zeros:
; X86: # %bb.0:
-; X86-NEXT: movb {{[0-9]+}}(%esp), %cl
-; X86-NEXT: shlb $2, %cl
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: shll $2, %ecx
; X86-NEXT: movl $1, %eax
; X86-NEXT: xorl %edx, %edx
; X86-NEXT: shldl %cl, %eax, %edx
@@ -1088,14 +1088,14 @@ define i64 @btc_64_mask_zeros(i64 %x, i6
; X64-LABEL: btc_64_mask_zeros:
; X64: # %bb.0:
; X64-NEXT: movq %rdi, %rax
-; X64-NEXT: shlb $2, %sil
+; X64-NEXT: shlq $2, %rsi
; X64-NEXT: btcq %rsi, %rax
; X64-NEXT: retq
;
; X86-LABEL: btc_64_mask_zeros:
; X86: # %bb.0:
-; X86-NEXT: movb {{[0-9]+}}(%esp), %cl
-; X86-NEXT: shlb $2, %cl
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: shll $2, %ecx
; X86-NEXT: movl $1, %eax
; X86-NEXT: xorl %edx, %edx
; X86-NEXT: shldl %cl, %eax, %edx
Modified: llvm/trunk/test/CodeGen/X86/rotate4.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/rotate4.ll?rev=357912&r1=357911&r2=357912&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/rotate4.ll (original)
+++ llvm/trunk/test/CodeGen/X86/rotate4.ll Mon Apr 8 06:58:50 2019
@@ -633,9 +633,9 @@ define i32 @rotate_demanded_bits_2(i32,
define i32 @rotate_demanded_bits_3(i32, i32) {
; X86-LABEL: rotate_demanded_bits_3:
; X86: # %bb.0:
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movb {{[0-9]+}}(%esp), %cl
; X86-NEXT: addb %cl, %cl
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: roll %cl, %eax
; X86-NEXT: retl
;
Modified: llvm/trunk/test/CodeGen/X86/scheduler-backtracking.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/scheduler-backtracking.ll?rev=357912&r1=357911&r2=357912&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/scheduler-backtracking.ll (original)
+++ llvm/trunk/test/CodeGen/X86/scheduler-backtracking.ll Mon Apr 8 06:58:50 2019
@@ -16,7 +16,7 @@ define i256 @test1(i256 %a) nounwind {
; ILP-NEXT: pushq %rbx
; ILP-NEXT: movq %rdi, %rax
; ILP-NEXT: xorl %r8d, %r8d
-; ILP-NEXT: addb %sil, %sil
+; ILP-NEXT: addq %rsi, %rsi
; ILP-NEXT: addb $2, %sil
; ILP-NEXT: orb $1, %sil
; ILP-NEXT: movl $1, %r10d
@@ -61,7 +61,7 @@ define i256 @test1(i256 %a) nounwind {
; HYBRID-LABEL: test1:
; HYBRID: # %bb.0:
; HYBRID-NEXT: movq %rdi, %rax
-; HYBRID-NEXT: addb %sil, %sil
+; HYBRID-NEXT: addq %rsi, %rsi
; HYBRID-NEXT: addb $2, %sil
; HYBRID-NEXT: orb $1, %sil
; HYBRID-NEXT: movb $-128, %cl
@@ -104,7 +104,7 @@ define i256 @test1(i256 %a) nounwind {
; BURR-LABEL: test1:
; BURR: # %bb.0:
; BURR-NEXT: movq %rdi, %rax
-; BURR-NEXT: addb %sil, %sil
+; BURR-NEXT: addq %rsi, %rsi
; BURR-NEXT: addb $2, %sil
; BURR-NEXT: orb $1, %sil
; BURR-NEXT: movb $-128, %cl
@@ -148,7 +148,7 @@ define i256 @test1(i256 %a) nounwind {
; SRC: # %bb.0:
; SRC-NEXT: pushq %rbx
; SRC-NEXT: movq %rdi, %rax
-; SRC-NEXT: addb %sil, %sil
+; SRC-NEXT: addq %rsi, %rsi
; SRC-NEXT: addb $2, %sil
; SRC-NEXT: orb $1, %sil
; SRC-NEXT: movb $-128, %cl
@@ -195,7 +195,7 @@ define i256 @test1(i256 %a) nounwind {
; LIN-NEXT: movq %rdi, %rax
; LIN-NEXT: xorl %r9d, %r9d
; LIN-NEXT: movl $1, %r8d
-; LIN-NEXT: addb %sil, %sil
+; LIN-NEXT: addq %rsi, %rsi
; LIN-NEXT: addb $2, %sil
; LIN-NEXT: orb $1, %sil
; LIN-NEXT: movl $1, %edx
Modified: llvm/trunk/test/CodeGen/X86/select_const.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/select_const.ll?rev=357912&r1=357911&r2=357912&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/select_const.ll (original)
+++ llvm/trunk/test/CodeGen/X86/select_const.ll Mon Apr 8 06:58:50 2019
@@ -353,9 +353,9 @@ define i16 @select_pow2_diff_invert(i1 z
define i32 @select_pow2_diff_neg(i1 zeroext %cond) {
; CHECK-LABEL: select_pow2_diff_neg:
; CHECK: # %bb.0:
-; CHECK-NEXT: shlb $4, %dil
-; CHECK-NEXT: movzbl %dil, %eax
-; CHECK-NEXT: orl $-25, %eax
+; CHECK-NEXT: # kill: def $edi killed $edi def $rdi
+; CHECK-NEXT: shll $4, %edi
+; CHECK-NEXT: leal -25(%rdi), %eax
; CHECK-NEXT: retq
%sel = select i1 %cond, i32 -9, i32 -25
ret i32 %sel
More information about the llvm-commits
mailing list