[llvm-commits] CVS: llvm/lib/Target/X86/InstSelectSimple.cpp
Chris Lattner
lattner at cs.uiuc.edu
Mon Apr 5 23:30:02 PDT 2004
Changes in directory llvm/lib/Target/X86:
InstSelectSimple.cpp updated: 1.213 -> 1.214
---
Log message:
Efficiently handle a long multiplication by a constant. For this testcase:
long %test(long %X) {
%Y = mul long %X, 123
ret long %Y
}
we used to generate:
test:
sub %ESP, 12
mov DWORD PTR [%ESP + 8], %ESI
mov DWORD PTR [%ESP + 4], %EDI
mov DWORD PTR [%ESP], %EBX
mov %ECX, DWORD PTR [%ESP + 16]
mov %ESI, DWORD PTR [%ESP + 20]
mov %EDI, 123
mov %EBX, 0
mov %EAX, %ECX
mul %EDI
imul %ESI, %EDI
add %ESI, %EDX
imul %ECX, %EBX
add %ESI, %ECX
mov %EDX, %ESI
mov %EBX, DWORD PTR [%ESP]
mov %EDI, DWORD PTR [%ESP + 4]
mov %ESI, DWORD PTR [%ESP + 8]
add %ESP, 12
ret
Now we emit:
test:
mov %EAX, DWORD PTR [%ESP + 4]
mov %ECX, DWORD PTR [%ESP + 8]
mov %EDX, 123
mul %EDX
imul %ECX, %ECX, 123
add %ECX, %EDX
mov %EDX, %ECX
ret
Which, incidently, is substantially nicer than what GCC manages:
T:
sub %esp, 8
mov %eax, 123
mov DWORD PTR [%esp], %ebx
mov %ebx, DWORD PTR [%esp+16]
mov DWORD PTR [%esp+4], %esi
mov %esi, DWORD PTR [%esp+12]
imul %ecx, %ebx, 123
mov %ebx, DWORD PTR [%esp]
mul %esi
mov %esi, DWORD PTR [%esp+4]
add %esp, 8
lea %edx, [%ecx+%edx]
ret
---
Diffs of the changes: (+57 -24)
Index: llvm/lib/Target/X86/InstSelectSimple.cpp
diff -u llvm/lib/Target/X86/InstSelectSimple.cpp:1.213 llvm/lib/Target/X86/InstSelectSimple.cpp:1.214
--- llvm/lib/Target/X86/InstSelectSimple.cpp:1.213 Mon Apr 5 22:42:38 2004
+++ llvm/lib/Target/X86/InstSelectSimple.cpp Mon Apr 5 23:29:36 2004
@@ -1940,7 +1940,7 @@
unsigned DestReg = getReg(I);
// Simple scalar multiply?
- if (I.getType() != Type::LongTy && I.getType() != Type::ULongTy) {
+ if (getClass(I.getType()) != cLong) {
if (ConstantInt *CI = dyn_cast<ConstantInt>(I.getOperand(1))) {
unsigned Val = (unsigned)CI->getRawValue(); // Cannot be 64-bit constant
MachineBasicBlock::iterator MBBI = BB->end();
@@ -1951,31 +1951,64 @@
doMultiply(BB, MBBI, DestReg, I.getType(), Op0Reg, Op1Reg);
}
} else {
- unsigned Op1Reg = getReg(I.getOperand(1));
-
// Long value. We have to do things the hard way...
- // Multiply the two low parts... capturing carry into EDX
- BuildMI(BB, X86::MOV32rr, 1, X86::EAX).addReg(Op0Reg);
- BuildMI(BB, X86::MUL32r, 1).addReg(Op1Reg); // AL*BL
-
- unsigned OverflowReg = makeAnotherReg(Type::UIntTy);
- BuildMI(BB, X86::MOV32rr, 1, DestReg).addReg(X86::EAX); // AL*BL
- BuildMI(BB, X86::MOV32rr, 1, OverflowReg).addReg(X86::EDX); // AL*BL >> 32
-
- MachineBasicBlock::iterator MBBI = BB->end();
- unsigned AHBLReg = makeAnotherReg(Type::UIntTy); // AH*BL
- BuildMI(*BB, MBBI, X86::IMUL32rr,2,AHBLReg).addReg(Op0Reg+1).addReg(Op1Reg);
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(I.getOperand(1))) {
+ unsigned CLow = CI->getRawValue();
+ unsigned CHi = CI->getRawValue() >> 32;
- unsigned AHBLplusOverflowReg = makeAnotherReg(Type::UIntTy);
- BuildMI(*BB, MBBI, X86::ADD32rr, 2, // AH*BL+(AL*BL >> 32)
- AHBLplusOverflowReg).addReg(AHBLReg).addReg(OverflowReg);
-
- MBBI = BB->end();
- unsigned ALBHReg = makeAnotherReg(Type::UIntTy); // AL*BH
- BuildMI(*BB, MBBI, X86::IMUL32rr,2,ALBHReg).addReg(Op0Reg).addReg(Op1Reg+1);
-
- BuildMI(*BB, MBBI, X86::ADD32rr, 2, // AL*BH + AH*BL + (AL*BL >> 32)
- DestReg+1).addReg(AHBLplusOverflowReg).addReg(ALBHReg);
+ // Multiply the two low parts... capturing carry into EDX
+ unsigned Op1RegL = makeAnotherReg(Type::UIntTy);
+ BuildMI(BB, X86::MOV32ri, 1, Op1RegL).addImm(CLow);
+ BuildMI(BB, X86::MOV32rr, 1, X86::EAX).addReg(Op0Reg);
+ BuildMI(BB, X86::MUL32r, 1).addReg(Op1RegL); // AL*BL
+
+ unsigned OverflowReg = makeAnotherReg(Type::UIntTy);
+ BuildMI(BB, X86::MOV32rr, 1, DestReg).addReg(X86::EAX); // AL*BL
+ BuildMI(BB, X86::MOV32rr, 1, OverflowReg).addReg(X86::EDX); // AL*BL >> 32
+
+ unsigned AHBLReg = makeAnotherReg(Type::UIntTy); // AH*BL
+ BuildMI(BB, X86::IMUL32rri, 2, AHBLReg).addReg(Op0Reg+1).addImm(CLow);
+
+ unsigned AHBLplusOverflowReg = makeAnotherReg(Type::UIntTy);
+ BuildMI(BB, X86::ADD32rr, 2, // AH*BL+(AL*BL >> 32)
+ AHBLplusOverflowReg).addReg(AHBLReg).addReg(OverflowReg);
+
+ if (CHi != 0) {
+ unsigned ALBHReg = makeAnotherReg(Type::UIntTy); // AL*BH
+ BuildMI(BB, X86::IMUL32rri, 2, ALBHReg).addReg(Op0Reg).addImm(CHi);
+
+ BuildMI(BB, X86::ADD32rr, 2, // AL*BH + AH*BL + (AL*BL >> 32)
+ DestReg+1).addReg(AHBLplusOverflowReg).addReg(ALBHReg);
+ } else {
+ BuildMI(BB, X86::MOV32rr, 1, DestReg+1).addReg(AHBLplusOverflowReg);
+ }
+ } else {
+ unsigned Op1Reg = getReg(I.getOperand(1));
+ // Multiply the two low parts... capturing carry into EDX
+ BuildMI(BB, X86::MOV32rr, 1, X86::EAX).addReg(Op0Reg);
+ BuildMI(BB, X86::MUL32r, 1).addReg(Op1Reg); // AL*BL
+
+ unsigned OverflowReg = makeAnotherReg(Type::UIntTy);
+ BuildMI(BB, X86::MOV32rr, 1, DestReg).addReg(X86::EAX); // AL*BL
+ BuildMI(BB, X86::MOV32rr, 1, OverflowReg).addReg(X86::EDX); // AL*BL >> 32
+
+ MachineBasicBlock::iterator MBBI = BB->end();
+ unsigned AHBLReg = makeAnotherReg(Type::UIntTy); // AH*BL
+ BuildMI(*BB, MBBI, X86::IMUL32rr, 2,
+ AHBLReg).addReg(Op0Reg+1).addReg(Op1Reg);
+
+ unsigned AHBLplusOverflowReg = makeAnotherReg(Type::UIntTy);
+ BuildMI(*BB, MBBI, X86::ADD32rr, 2, // AH*BL+(AL*BL >> 32)
+ AHBLplusOverflowReg).addReg(AHBLReg).addReg(OverflowReg);
+
+ MBBI = BB->end();
+ unsigned ALBHReg = makeAnotherReg(Type::UIntTy); // AL*BH
+ BuildMI(*BB, MBBI, X86::IMUL32rr, 2,
+ ALBHReg).addReg(Op0Reg).addReg(Op1Reg+1);
+
+ BuildMI(*BB, MBBI, X86::ADD32rr, 2, // AL*BH + AH*BL + (AL*BL >> 32)
+ DestReg+1).addReg(AHBLplusOverflowReg).addReg(ALBHReg);
+ }
}
}
More information about the llvm-commits
mailing list