[llvm-commits] CVS: llvm/lib/Target/X86/X86ISelSimple.cpp
Chris Lattner
lattner at cs.uiuc.edu
Tue Oct 5 22:01:18 PDT 2004
Changes in directory llvm/lib/Target/X86:
X86ISelSimple.cpp updated: 1.282 -> 1.283
---
Log message:
Codegen signed mod by 2 or -2 more efficiently. Instead of generating:
t:
mov %EDX, DWORD PTR [%ESP + 4]
mov %ECX, 2
mov %EAX, %EDX
sar %EDX, 31
idiv %ECX
mov %EAX, %EDX
ret
Generate:
t:
mov %ECX, DWORD PTR [%ESP + 4]
*** mov %EAX, %ECX
cdq
and %ECX, 1
xor %ECX, %EDX
sub %ECX, %EDX
*** mov %EAX, %ECX
ret
Note that the two marked moves are redundant, and should be eliminated by the
register allocator, but aren't.
Compare this to GCC, which generates:
t:
mov %eax, DWORD PTR [%esp+4]
mov %edx, %eax
shr %edx, 31
lea %ecx, [%edx+%eax]
and %ecx, -2
sub %eax, %ecx
ret
or ICC 8.0, which generates:
t:
movl 4(%esp), %ecx #3.5
movl $-2147483647, %eax #3.25
imull %ecx #3.25
movl %ecx, %eax #3.25
sarl $31, %eax #3.25
addl %ecx, %edx #3.25
subl %edx, %eax #3.25
addl %eax, %eax #3.25
negl %eax #3.25
subl %eax, %ecx #3.25
movl %ecx, %eax #3.25
ret #3.25
We would be in great shape if not for the moves.
---
Diffs of the changes: (+39 -3)
Index: llvm/lib/Target/X86/X86ISelSimple.cpp
diff -u llvm/lib/Target/X86/X86ISelSimple.cpp:1.282 llvm/lib/Target/X86/X86ISelSimple.cpp:1.283
--- llvm/lib/Target/X86/X86ISelSimple.cpp:1.282 Tue Oct 5 23:19:43 2004
+++ llvm/lib/Target/X86/X86ISelSimple.cpp Wed Oct 6 00:01:07 2004
@@ -2672,14 +2672,14 @@
}
static const unsigned MovOpcode[]={ X86::MOV8rr, X86::MOV16rr, X86::MOV32rr };
- static const unsigned NEGOpcode[] = { X86::NEG8r, X86::NEG16r, X86::NEG32r };
+ static const unsigned NEGOpcode[]={ X86::NEG8r, X86::NEG16r, X86::NEG32r };
static const unsigned SAROpcode[]={ X86::SAR8ri, X86::SAR16ri, X86::SAR32ri };
static const unsigned SHROpcode[]={ X86::SHR8ri, X86::SHR16ri, X86::SHR32ri };
static const unsigned ADDOpcode[]={ X86::ADD8rr, X86::ADD16rr, X86::ADD32rr };
// Special case signed division by power of 2.
- if (isDiv)
- if (ConstantSInt *CI = dyn_cast<ConstantSInt>(Op1)) {
+ if (ConstantSInt *CI = dyn_cast<ConstantSInt>(Op1))
+ if (isDiv) {
assert(Class != cLong && "This doesn't handle 64-bit divides!");
int V = CI->getValue();
@@ -2742,6 +2742,42 @@
BuildMI(*BB, IP, NEGOpcode[Class], 1, ResultReg).addReg(TmpReg4);
return;
}
+ } else { // X % C
+ assert(Class != cLong && "This doesn't handle 64-bit remainder!");
+ int V = CI->getValue();
+
+ if (V == 2 || V == -2) { // X % 2, X % -2
+ std::cerr << "SREM 2\n";
+ static const unsigned SExtOpcode[] = { X86::CBW, X86::CWD, X86::CDQ };
+ static const unsigned BaseReg[] = { X86::AL , X86::AX , X86::EAX };
+ static const unsigned SExtReg[] = { X86::AH , X86::DX , X86::EDX };
+ static const unsigned ANDOpcode[] = {
+ X86::AND8ri, X86::AND16ri, X86::AND32ri
+ };
+ static const unsigned XOROpcode[] = {
+ X86::XOR8rr, X86::XOR16rr, X86::XOR32rr
+ };
+ static const unsigned SUBOpcode[] = {
+ X86::SUB8rr, X86::SUB16rr, X86::SUB32rr
+ };
+
+ // Sign extend result into reg of -1 or 0.
+ unsigned Op0Reg = getReg(Op0, BB, IP);
+ BuildMI(*BB, IP, MovOpcode[Class], 1, BaseReg[Class]).addReg(Op0Reg);
+ BuildMI(*BB, IP, SExtOpcode[Class], 0);
+ unsigned TmpReg0 = makeAnotherReg(Op0->getType());
+ BuildMI(*BB, IP, MovOpcode[Class], 1, TmpReg0).addReg(SExtReg[Class]);
+
+ unsigned TmpReg1 = makeAnotherReg(Op0->getType());
+ BuildMI(*BB, IP, ANDOpcode[Class], 2, TmpReg1).addReg(Op0Reg).addImm(1);
+
+ unsigned TmpReg2 = makeAnotherReg(Op0->getType());
+ BuildMI(*BB, IP, XOROpcode[Class], 2,
+ TmpReg2).addReg(TmpReg1).addReg(TmpReg0);
+ BuildMI(*BB, IP, SUBOpcode[Class], 2,
+ ResultReg).addReg(TmpReg2).addReg(TmpReg0);
+ return;
+ }
}
static const unsigned Regs[] ={ X86::AL , X86::AX , X86::EAX };
More information about the llvm-commits
mailing list