[llvm] r350272 - [X86] Add load folding support to the custom isel we do for X86ISD::UMUL/SMUL.
Craig Topper via llvm-commits
llvm-commits at lists.llvm.org
Wed Jan 2 15:24:09 PST 2019
Author: ctopper
Date: Wed Jan 2 15:24:08 2019
New Revision: 350272
URL: http://llvm.org/viewvc/llvm-project?rev=350272&view=rev
Log:
[X86] Add load folding support to the custom isel we do for X86ISD::UMUL/SMUL.
The peephole pass isn't always able to fold the load because it can't commute the implicit usage of AL/AX/EAX/RAX.
Modified:
llvm/trunk/lib/Target/X86/X86ISelDAGToDAG.cpp
llvm/trunk/test/CodeGen/X86/umul-with-overflow.ll
llvm/trunk/test/CodeGen/X86/xmulo.ll
Modified: llvm/trunk/lib/Target/X86/X86ISelDAGToDAG.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelDAGToDAG.cpp?rev=350272&r1=350271&r2=350272&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelDAGToDAG.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelDAGToDAG.cpp Wed Jan 2 15:24:08 2019
@@ -3454,31 +3454,73 @@ void X86DAGToDAGISel::Select(SDNode *Nod
SDValue N0 = Node->getOperand(0);
SDValue N1 = Node->getOperand(1);
- unsigned LoReg, Opc;
+ unsigned LoReg, ROpc, MOpc;
switch (NVT.SimpleTy) {
default: llvm_unreachable("Unsupported VT!");
case MVT::i8:
LoReg = X86::AL;
- Opc = Opcode == X86ISD::SMUL ? X86::IMUL8r : X86::MUL8r;
+ ROpc = Opcode == X86ISD::SMUL ? X86::IMUL8r : X86::MUL8r;
+ MOpc = Opcode == X86ISD::SMUL ? X86::IMUL8m : X86::MUL8m;
break;
- case MVT::i16: LoReg = X86::AX; Opc = X86::MUL16r; break;
- case MVT::i32: LoReg = X86::EAX; Opc = X86::MUL32r; break;
- case MVT::i64: LoReg = X86::RAX; Opc = X86::MUL64r; break;
+ case MVT::i16:
+ LoReg = X86::AX;
+ ROpc = X86::MUL16r;
+ MOpc = X86::MUL16m;
+ break;
+ case MVT::i32:
+ LoReg = X86::EAX;
+ ROpc = X86::MUL32r;
+ MOpc = X86::MUL32m;
+ break;
+ case MVT::i64:
+ LoReg = X86::RAX;
+ ROpc = X86::MUL64r;
+ MOpc = X86::MUL64m;
+ break;
+ }
+
+ SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4;
+ bool FoldedLoad = tryFoldLoad(Node, N1, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4);
+ // Multiply is commmutative.
+ if (!FoldedLoad) {
+ FoldedLoad = tryFoldLoad(Node, N0, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4);
+ if (FoldedLoad)
+ std::swap(N0, N1);
}
SDValue InFlag = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, LoReg,
N0, SDValue()).getValue(1);
- // i16/i32/i64 use an instruction that produces a low and high result even
- // though only the low result is used.
- SDVTList VTs;
- if (NVT == MVT::i8)
- VTs = CurDAG->getVTList(NVT, MVT::i32);
- else
- VTs = CurDAG->getVTList(NVT, NVT, MVT::i32);
+ MachineSDNode *CNode;
+ if (FoldedLoad) {
+ // i16/i32/i64 use an instruction that produces a low and high result even
+ // though only the low result is used.
+ SDVTList VTs;
+ if (NVT == MVT::i8)
+ VTs = CurDAG->getVTList(NVT, MVT::i32, MVT::Other);
+ else
+ VTs = CurDAG->getVTList(NVT, NVT, MVT::i32, MVT::Other);
+
+ SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, N1.getOperand(0),
+ InFlag };
+ CNode = CurDAG->getMachineNode(MOpc, dl, VTs, Ops);
+
+ // Update the chain.
+ ReplaceUses(N1.getValue(1), SDValue(CNode, NVT == MVT::i8 ? 2 : 3));
+ // Record the mem-refs
+ CurDAG->setNodeMemRefs(CNode, {cast<LoadSDNode>(N1)->getMemOperand()});
+ } else {
+ // i16/i32/i64 use an instruction that produces a low and high result even
+ // though only the low result is used.
+ SDVTList VTs;
+ if (NVT == MVT::i8)
+ VTs = CurDAG->getVTList(NVT, MVT::i32);
+ else
+ VTs = CurDAG->getVTList(NVT, NVT, MVT::i32);
+
+ CNode = CurDAG->getMachineNode(ROpc, dl, VTs, {N1, InFlag});
+ }
- SDValue Ops[] = {N1, InFlag};
- SDNode *CNode = CurDAG->getMachineNode(Opc, dl, VTs, Ops);
ReplaceUses(SDValue(Node, 0), SDValue(CNode, 0));
ReplaceUses(SDValue(Node, 1), SDValue(CNode, NVT == MVT::i8 ? 1 : 2));
CurDAG->RemoveDeadNode(Node);
Modified: llvm/trunk/test/CodeGen/X86/umul-with-overflow.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/umul-with-overflow.ll?rev=350272&r1=350271&r2=350272&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/umul-with-overflow.ll (original)
+++ llvm/trunk/test/CodeGen/X86/umul-with-overflow.ll Wed Jan 2 15:24:08 2019
@@ -7,9 +7,8 @@ declare {i32, i1} @llvm.umul.with.overfl
define zeroext i1 @a(i32 %x) nounwind {
; X86-LABEL: a:
; X86: # %bb.0:
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movl $3, %ecx
-; X86-NEXT: mull %ecx
+; X86-NEXT: movl $3, %eax
+; X86-NEXT: mull {{[0-9]+}}(%esp)
; X86-NEXT: seto %al
; X86-NEXT: retl
;
Modified: llvm/trunk/test/CodeGen/X86/xmulo.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/xmulo.ll?rev=350272&r1=350271&r2=350272&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/xmulo.ll (original)
+++ llvm/trunk/test/CodeGen/X86/xmulo.ll Wed Jan 2 15:24:08 2019
@@ -725,8 +725,9 @@ define i1 @bug27873(i64 %c1, i1 %c2) {
define zeroext i1 @smuloi8_load(i8* %ptr1, i8 %v2, i8* %res) {
; SDAG-LABEL: smuloi8_load:
; SDAG: ## %bb.0:
-; SDAG-NEXT: movb (%rdi), %al
-; SDAG-NEXT: imulb %sil
+; SDAG-NEXT: movl %esi, %eax
+; SDAG-NEXT: ## kill: def $al killed $al killed $eax
+; SDAG-NEXT: imulb (%rdi)
; SDAG-NEXT: seto %cl
; SDAG-NEXT: movb %al, (%rdx)
; SDAG-NEXT: movl %ecx, %eax
@@ -753,9 +754,8 @@ define zeroext i1 @smuloi8_load2(i8 %v1,
; SDAG-LABEL: smuloi8_load2:
; SDAG: ## %bb.0:
; SDAG-NEXT: movl %edi, %eax
-; SDAG-NEXT: movb (%rsi), %cl
; SDAG-NEXT: ## kill: def $al killed $al killed $eax
-; SDAG-NEXT: imulb %cl
+; SDAG-NEXT: imulb (%rsi)
; SDAG-NEXT: seto %cl
; SDAG-NEXT: movb %al, (%rdx)
; SDAG-NEXT: movl %ecx, %eax
@@ -926,8 +926,9 @@ define zeroext i1 @smuloi64_load2(i64 %v
define zeroext i1 @umuloi8_load(i8* %ptr1, i8 %v2, i8* %res) {
; SDAG-LABEL: umuloi8_load:
; SDAG: ## %bb.0:
-; SDAG-NEXT: movb (%rdi), %al
-; SDAG-NEXT: mulb %sil
+; SDAG-NEXT: movl %esi, %eax
+; SDAG-NEXT: ## kill: def $al killed $al killed $eax
+; SDAG-NEXT: mulb (%rdi)
; SDAG-NEXT: seto %cl
; SDAG-NEXT: movb %al, (%rdx)
; SDAG-NEXT: movl %ecx, %eax
@@ -954,9 +955,8 @@ define zeroext i1 @umuloi8_load2(i8 %v1,
; SDAG-LABEL: umuloi8_load2:
; SDAG: ## %bb.0:
; SDAG-NEXT: movl %edi, %eax
-; SDAG-NEXT: movb (%rsi), %cl
; SDAG-NEXT: ## kill: def $al killed $al killed $eax
-; SDAG-NEXT: mulb %cl
+; SDAG-NEXT: mulb (%rsi)
; SDAG-NEXT: seto %cl
; SDAG-NEXT: movb %al, (%rdx)
; SDAG-NEXT: movl %ecx, %eax
@@ -984,8 +984,9 @@ define zeroext i1 @umuloi16_load(i16* %p
; SDAG-LABEL: umuloi16_load:
; SDAG: ## %bb.0:
; SDAG-NEXT: movq %rdx, %rcx
-; SDAG-NEXT: movzwl (%rdi), %eax
-; SDAG-NEXT: mulw %si
+; SDAG-NEXT: movl %esi, %eax
+; SDAG-NEXT: ## kill: def $ax killed $ax killed $eax
+; SDAG-NEXT: mulw (%rdi)
; SDAG-NEXT: seto %dl
; SDAG-NEXT: movw %ax, (%rcx)
; SDAG-NEXT: movl %edx, %eax
@@ -1014,9 +1015,8 @@ define zeroext i1 @umuloi16_load2(i16 %v
; SDAG: ## %bb.0:
; SDAG-NEXT: movq %rdx, %rcx
; SDAG-NEXT: movl %edi, %eax
-; SDAG-NEXT: movzwl (%rsi), %edx
; SDAG-NEXT: ## kill: def $ax killed $ax killed $eax
-; SDAG-NEXT: mulw %dx
+; SDAG-NEXT: mulw (%rsi)
; SDAG-NEXT: seto %dl
; SDAG-NEXT: movw %ax, (%rcx)
; SDAG-NEXT: movl %edx, %eax
@@ -1045,8 +1045,8 @@ define zeroext i1 @umuloi32_load(i32* %p
; SDAG-LABEL: umuloi32_load:
; SDAG: ## %bb.0:
; SDAG-NEXT: movq %rdx, %rcx
-; SDAG-NEXT: movl (%rdi), %eax
-; SDAG-NEXT: mull %esi
+; SDAG-NEXT: movl %esi, %eax
+; SDAG-NEXT: mull (%rdi)
; SDAG-NEXT: seto %dl
; SDAG-NEXT: movl %eax, (%rcx)
; SDAG-NEXT: movl %edx, %eax
@@ -1075,8 +1075,7 @@ define zeroext i1 @umuloi32_load2(i32 %v
; SDAG: ## %bb.0:
; SDAG-NEXT: movq %rdx, %rcx
; SDAG-NEXT: movl %edi, %eax
-; SDAG-NEXT: movl (%rsi), %edx
-; SDAG-NEXT: mull %edx
+; SDAG-NEXT: mull (%rsi)
; SDAG-NEXT: seto %dl
; SDAG-NEXT: movl %eax, (%rcx)
; SDAG-NEXT: movl %edx, %eax
@@ -1104,8 +1103,8 @@ define zeroext i1 @umuloi64_load(i64* %p
; SDAG-LABEL: umuloi64_load:
; SDAG: ## %bb.0:
; SDAG-NEXT: movq %rdx, %rcx
-; SDAG-NEXT: movq (%rdi), %rax
-; SDAG-NEXT: mulq %rsi
+; SDAG-NEXT: movq %rsi, %rax
+; SDAG-NEXT: mulq (%rdi)
; SDAG-NEXT: seto %dl
; SDAG-NEXT: movq %rax, (%rcx)
; SDAG-NEXT: movl %edx, %eax
@@ -1134,8 +1133,7 @@ define zeroext i1 @umuloi64_load2(i64 %v
; SDAG: ## %bb.0:
; SDAG-NEXT: movq %rdx, %rcx
; SDAG-NEXT: movq %rdi, %rax
-; SDAG-NEXT: movq (%rsi), %rdx
-; SDAG-NEXT: mulq %rdx
+; SDAG-NEXT: mulq (%rsi)
; SDAG-NEXT: seto %dl
; SDAG-NEXT: movq %rax, (%rcx)
; SDAG-NEXT: movl %edx, %eax
More information about the llvm-commits
mailing list