[llvm] r349285 - [X86] Lower to SHLD/SHRD on slow machines for optsize
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Sat Dec 15 11:43:44 PST 2018
Author: rksimon
Date: Sat Dec 15 11:43:44 2018
New Revision: 349285
URL: http://llvm.org/viewvc/llvm-project?rev=349285&view=rev
Log:
[X86] Lower to SHLD/SHRD on slow machines for optsize
Use consistent rules for when to lower to SHLD/SHRD for slow machines - fixes a weird issue where funnel shift gets expanded but then X86ISelLowering's combineOr sees the optsize and combines to SHLD/SHRD, but now with the modulo amount guard......
Modified:
llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
llvm/trunk/test/CodeGen/X86/fshl.ll
llvm/trunk/test/CodeGen/X86/fshr.ll
Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=349285&r1=349284&r2=349285&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Sat Dec 15 11:43:44 2018
@@ -17047,9 +17047,9 @@ static SDValue LowerFunnelShift(SDValue
SDValue Op1 = Op.getOperand(1);
SDValue Amt = Op.getOperand(2);
- // Expand slow SHLD/SHRD cases.
- // TODO - can we be more selective here: OptSize/RMW etc.?
- if (Subtarget.isSHLDSlow())
+ // Expand slow SHLD/SHRD cases if we are not optimizing for size.
+ bool OptForSize = DAG.getMachineFunction().getFunction().optForSize();
+ if (!OptForSize && Subtarget.isSHLDSlow())
return SDValue();
bool IsFSHR = Op.getOpcode() == ISD::FSHR;
Modified: llvm/trunk/test/CodeGen/X86/fshl.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/fshl.ll?rev=349285&r1=349284&r2=349285&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/fshl.ll (original)
+++ llvm/trunk/test/CodeGen/X86/fshl.ll Sat Dec 15 11:43:44 2018
@@ -179,46 +179,21 @@ define i32 @var_shift_i32(i32 %x, i32 %y
}
define i32 @var_shift_i32_optsize(i32 %x, i32 %y, i32 %z) nounwind optsize {
-; X86-FAST-LABEL: var_shift_i32_optsize:
-; X86-FAST: # %bb.0:
-; X86-FAST-NEXT: movb {{[0-9]+}}(%esp), %cl
-; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %edx
-; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-FAST-NEXT: shldl %cl, %edx, %eax
-; X86-FAST-NEXT: retl
+; X86-LABEL: var_shift_i32_optsize:
+; X86: # %bb.0:
+; X86-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: shldl %cl, %edx, %eax
+; X86-NEXT: retl
;
-; X86-SLOW-LABEL: var_shift_i32_optsize:
-; X86-SLOW: # %bb.0:
-; X86-SLOW-NEXT: pushl %esi
-; X86-SLOW-NEXT: movb {{[0-9]+}}(%esp), %cl
-; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-SLOW-NEXT: movl %eax, %edx
-; X86-SLOW-NEXT: shldl %cl, %esi, %edx
-; X86-SLOW-NEXT: andb $31, %cl
-; X86-SLOW-NEXT: je .LBB3_2
-; X86-SLOW-NEXT: # %bb.1:
-; X86-SLOW-NEXT: movl %edx, %eax
-; X86-SLOW-NEXT: .LBB3_2:
-; X86-SLOW-NEXT: popl %esi
-; X86-SLOW-NEXT: retl
-;
-; X64-FAST-LABEL: var_shift_i32_optsize:
-; X64-FAST: # %bb.0:
-; X64-FAST-NEXT: movl %edx, %ecx
-; X64-FAST-NEXT: movl %edi, %eax
-; X64-FAST-NEXT: # kill: def $cl killed $cl killed $ecx
-; X64-FAST-NEXT: shldl %cl, %esi, %eax
-; X64-FAST-NEXT: retq
-;
-; X64-SLOW-LABEL: var_shift_i32_optsize:
-; X64-SLOW: # %bb.0:
-; X64-SLOW-NEXT: movl %edx, %ecx
-; X64-SLOW-NEXT: movl %edi, %eax
-; X64-SLOW-NEXT: shldl %cl, %esi, %eax
-; X64-SLOW-NEXT: andb $31, %cl
-; X64-SLOW-NEXT: cmovel %edi, %eax
-; X64-SLOW-NEXT: retq
+; X64-LABEL: var_shift_i32_optsize:
+; X64: # %bb.0:
+; X64-NEXT: movl %edx, %ecx
+; X64-NEXT: movl %edi, %eax
+; X64-NEXT: # kill: def $cl killed $cl killed $ecx
+; X64-NEXT: shldl %cl, %esi, %eax
+; X64-NEXT: retq
%tmp = tail call i32 @llvm.fshl.i32(i32 %x, i32 %y, i32 %z)
ret i32 %tmp
}
Modified: llvm/trunk/test/CodeGen/X86/fshr.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/fshr.ll?rev=349285&r1=349284&r2=349285&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/fshr.ll (original)
+++ llvm/trunk/test/CodeGen/X86/fshr.ll Sat Dec 15 11:43:44 2018
@@ -178,46 +178,21 @@ define i32 @var_shift_i32(i32 %x, i32 %y
}
define i32 @var_shift_i32_optsize(i32 %x, i32 %y, i32 %z) nounwind optsize {
-; X86-FAST-LABEL: var_shift_i32_optsize:
-; X86-FAST: # %bb.0:
-; X86-FAST-NEXT: movb {{[0-9]+}}(%esp), %cl
-; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %edx
-; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-FAST-NEXT: shrdl %cl, %edx, %eax
-; X86-FAST-NEXT: retl
+; X86-LABEL: var_shift_i32_optsize:
+; X86: # %bb.0:
+; X86-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: shrdl %cl, %edx, %eax
+; X86-NEXT: retl
;
-; X86-SLOW-LABEL: var_shift_i32_optsize:
-; X86-SLOW: # %bb.0:
-; X86-SLOW-NEXT: pushl %esi
-; X86-SLOW-NEXT: movb {{[0-9]+}}(%esp), %cl
-; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-SLOW-NEXT: movl %eax, %edx
-; X86-SLOW-NEXT: shrdl %cl, %esi, %edx
-; X86-SLOW-NEXT: andb $31, %cl
-; X86-SLOW-NEXT: je .LBB3_2
-; X86-SLOW-NEXT: # %bb.1:
-; X86-SLOW-NEXT: movl %edx, %eax
-; X86-SLOW-NEXT: .LBB3_2:
-; X86-SLOW-NEXT: popl %esi
-; X86-SLOW-NEXT: retl
-;
-; X64-FAST-LABEL: var_shift_i32_optsize:
-; X64-FAST: # %bb.0:
-; X64-FAST-NEXT: movl %edx, %ecx
-; X64-FAST-NEXT: movl %esi, %eax
-; X64-FAST-NEXT: # kill: def $cl killed $cl killed $ecx
-; X64-FAST-NEXT: shrdl %cl, %edi, %eax
-; X64-FAST-NEXT: retq
-;
-; X64-SLOW-LABEL: var_shift_i32_optsize:
-; X64-SLOW: # %bb.0:
-; X64-SLOW-NEXT: movl %edx, %ecx
-; X64-SLOW-NEXT: movl %esi, %eax
-; X64-SLOW-NEXT: shrdl %cl, %edi, %eax
-; X64-SLOW-NEXT: andb $31, %cl
-; X64-SLOW-NEXT: cmovel %esi, %eax
-; X64-SLOW-NEXT: retq
+; X64-LABEL: var_shift_i32_optsize:
+; X64: # %bb.0:
+; X64-NEXT: movl %edx, %ecx
+; X64-NEXT: movl %esi, %eax
+; X64-NEXT: # kill: def $cl killed $cl killed $ecx
+; X64-NEXT: shrdl %cl, %edi, %eax
+; X64-NEXT: retq
%tmp = tail call i32 @llvm.fshr.i32(i32 %x, i32 %y, i32 %z)
ret i32 %tmp
}
More information about the llvm-commits
mailing list