[llvm-commits] CVS: llvm/lib/Target/X86/X86ISelDAGToDAG.cpp X86InstrSSE.td
Chris Lattner
sabre at nondot.org
Sat Oct 7 14:55:46 PDT 2006
Changes in directory llvm/lib/Target/X86:
X86ISelDAGToDAG.cpp updated: 1.110 -> 1.111
X86InstrSSE.td updated: 1.161 -> 1.162
---
Log message:
completely disable folding of loads into scalar sse instructions and provide
a framework for doing it right. This fixes
CodeGen/X86/2006-10-07-ScalarSSEMiscompile.ll.
Once X86DAGToDAGISel::SelectScalarSSELoad is implemented right, this task
will be done.
---
Diffs of the changes: (+61 -14)
X86ISelDAGToDAG.cpp | 25 +++++++++++++++++++++++++
X86InstrSSE.td | 50 ++++++++++++++++++++++++++++++++++++--------------
2 files changed, 61 insertions(+), 14 deletions(-)
Index: llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
diff -u llvm/lib/Target/X86/X86ISelDAGToDAG.cpp:1.110 llvm/lib/Target/X86/X86ISelDAGToDAG.cpp:1.111
--- llvm/lib/Target/X86/X86ISelDAGToDAG.cpp:1.110 Fri Sep 29 17:05:10 2006
+++ llvm/lib/Target/X86/X86ISelDAGToDAG.cpp Sat Oct 7 16:55:32 2006
@@ -147,6 +147,8 @@
SDOperand &Index, SDOperand &Disp);
bool SelectLEAAddr(SDOperand N, SDOperand &Base, SDOperand &Scale,
SDOperand &Index, SDOperand &Disp);
+ bool SelectScalarSSELoad(SDOperand N, SDOperand &Base, SDOperand &Scale,
+ SDOperand &Index, SDOperand &Disp);
bool TryFoldLoad(SDOperand P, SDOperand N,
SDOperand &Base, SDOperand &Scale,
SDOperand &Index, SDOperand &Disp);
@@ -724,6 +726,29 @@
return true;
}
+/// SelectScalarSSELoad - Match a scalar SSE load. In particular, we want to
+/// match a load whose top elements are either undef or zeros. The load flavor
+/// is derived from the type of N, which is either v4f32 or v2f64.
+bool X86DAGToDAGISel::SelectScalarSSELoad(SDOperand N, SDOperand &Base,
+ SDOperand &Scale,
+ SDOperand &Index, SDOperand &Disp) {
+#if 0
+ if (N.getOpcode() == ISD::SCALAR_TO_VECTOR) {
+ if (N.getOperand(0).getOpcode() == ISD::LOAD) {
+ SDOperand LoadAddr = N.getOperand(0).getOperand(0);
+ if (!SelectAddr(LoadAddr, Base, Scale, Index, Disp))
+ return false;
+ return true;
+ }
+ }
+ // TODO: Also handle the case where we explicitly require zeros in the top
+ // elements. This is a vector shuffle from the zero vector.
+#endif
+
+ return false;
+}
+
+
/// SelectLEAAddr - it calls SelectAddr and determines if the maximal addressing
/// mode it matches can be cost effectively emitted as an LEA instruction.
bool X86DAGToDAGISel::SelectLEAAddr(SDOperand N, SDOperand &Base,
Index: llvm/lib/Target/X86/X86InstrSSE.td
diff -u llvm/lib/Target/X86/X86InstrSSE.td:1.161 llvm/lib/Target/X86/X86InstrSSE.td:1.162
--- llvm/lib/Target/X86/X86InstrSSE.td:1.161 Sat Oct 7 16:17:13 2006
+++ llvm/lib/Target/X86/X86InstrSSE.td Sat Oct 7 16:55:32 2006
@@ -13,6 +13,7 @@
//
//===----------------------------------------------------------------------===//
+
//===----------------------------------------------------------------------===//
// SSE specific DAG Nodes.
//===----------------------------------------------------------------------===//
@@ -32,6 +33,27 @@
def X86pinsrw : SDNode<"X86ISD::PINSRW", SDTypeProfile<1, 3, []>, []>;
//===----------------------------------------------------------------------===//
+// SSE Complex Patterns
+//===----------------------------------------------------------------------===//
+
+// These are 'extloads' from a scalar to the low element of a vector, zeroing
+// the top elements. These are used for the SSE 'ss' and 'sd' instruction
+// forms.
+def sse_load_f32 : ComplexPattern<v4f32, 4, "SelectScalarSSELoad", []>;
+def sse_load_f64 : ComplexPattern<v2f64, 4, "SelectScalarSSELoad", []>;
+
+def ssmem : Operand<v4f32> {
+ let PrintMethod = "printf32mem";
+ let NumMIOperands = 4;
+ let MIOperandInfo = (ops ptr_rc, i8imm, ptr_rc, i32imm);
+}
+def sdmem : Operand<v2f64> {
+ let PrintMethod = "printf64mem";
+ let NumMIOperands = 4;
+ let MIOperandInfo = (ops ptr_rc, i8imm, ptr_rc, i32imm);
+}
+
+//===----------------------------------------------------------------------===//
// SSE pattern fragments
//===----------------------------------------------------------------------===//
@@ -185,18 +207,18 @@
def r : SSI<o, MRMSrcReg, (ops VR128:$dst, VR128:$src),
!strconcat(OpcodeStr, " {$src, $dst|$dst, $src"),
[(set VR128:$dst, (v4f32 (IntId VR128:$src)))]>;
- def m : SSI<o, MRMSrcMem, (ops VR128:$dst, f32mem:$src),
+ def m : SSI<o, MRMSrcMem, (ops VR128:$dst, ssmem:$src),
!strconcat(OpcodeStr, " {$src, $dst|$dst, $src"),
- [(set VR128:$dst, (v4f32 (IntId (load addr:$src))))]>;
+ [(set VR128:$dst, (v4f32 (IntId sse_load_f32:$src)))]>;
}
multiclass SD_IntUnary<bits<8> o, string OpcodeStr, Intrinsic IntId> {
def r : SDI<o, MRMSrcReg, (ops VR128:$dst, VR128:$src),
!strconcat(OpcodeStr, " {$src, $dst|$dst, $src"),
[(set VR128:$dst, (v2f64 (IntId VR128:$src)))]>;
- def m : SDI<o, MRMSrcMem, (ops VR128:$dst, f64mem:$src),
+ def m : SDI<o, MRMSrcMem, (ops VR128:$dst, sdmem:$src),
!strconcat(OpcodeStr, " {$src, $dst|$dst, $src"),
- [(set VR128:$dst, (v2f64 (IntId (load addr:$src))))]>;
+ [(set VR128:$dst, (v2f64 (IntId sse_load_f64:$src)))]>;
}
class PS_Intr<bits<8> o, string OpcodeStr, Intrinsic IntId>
@@ -315,10 +337,10 @@
// Scalar operation, reg+mem.
def SSrm : SSI<opc, MRMSrcMem, (ops FR32:$dst, FR32:$src1, f32mem:$src2),
!strconcat(OpcodeStr, "ss {$src2, $dst|$dst, $src2"),
- [(set FR32:$dst, (OpNode FR32:$src1, (loadf32 addr:$src2)))]>;
+ [(set FR32:$dst, (OpNode FR32:$src1, (load addr:$src2)))]>;
def SDrm : SDI<opc, MRMSrcMem, (ops FR64:$dst, FR64:$src1, f64mem:$src2),
!strconcat(OpcodeStr, "sd {$src2, $dst|$dst, $src2"),
- [(set FR64:$dst, (OpNode FR64:$src1, (loadf64 addr:$src2)))]>;
+ [(set FR64:$dst, (OpNode FR64:$src1, (load addr:$src2)))]>;
// Vector intrinsic operation, reg+reg.
def SSrr_Int : SSI<opc, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
@@ -332,14 +354,14 @@
let isCommutable = Commutable;
}
// Vector intrinsic operation, reg+mem.
- def SSrm_Int : SSI<opc, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f32mem:$src2),
+ def SSrm_Int : SSI<opc, MRMSrcMem, (ops VR128:$dst, VR128:$src1, ssmem:$src2),
!strconcat(OpcodeStr, "ss {$src2, $dst|$dst, $src2"),
[(set VR128:$dst, (F32Int VR128:$src1,
- (load addr:$src2)))]>;
- def SDrm_Int : SDI<opc, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f64mem:$src2),
+ sse_load_f32:$src2))]>;
+ def SDrm_Int : SDI<opc, MRMSrcMem, (ops VR128:$dst, VR128:$src1, sdmem:$src2),
!strconcat(OpcodeStr, "sd {$src2, $dst|$dst, $src2"),
[(set VR128:$dst, (F64Int VR128:$src1,
- (load addr:$src2)))]>;
+ sse_load_f64:$src2))]>;
}
}
@@ -373,17 +395,17 @@
!strconcat(OpcodeStr, " {$src2, $dst|$dst, $src2}"),
[(set VR128:$dst, (v4f32 (IntId VR128:$src1, VR128:$src2)))]>;
class SS_Intrm<bits<8> o, string OpcodeStr, Intrinsic IntId>
- : SSI<o, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f32mem:$src2),
+ : SSI<o, MRMSrcMem, (ops VR128:$dst, VR128:$src1, ssmem:$src2),
!strconcat(OpcodeStr, " {$src2, $dst|$dst, $src2}"),
- [(set VR128:$dst, (v4f32 (IntId VR128:$src1, (load addr:$src2))))]>;
+ [(set VR128:$dst, (v4f32 (IntId VR128:$src1, sse_load_f32:$src2)))]>;
class SD_Intrr<bits<8> o, string OpcodeStr, Intrinsic IntId>
: SDI<o, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
!strconcat(OpcodeStr, " {$src2, $dst|$dst, $src2}"),
[(set VR128:$dst, (v2f64 (IntId VR128:$src1, VR128:$src2)))]>;
class SD_Intrm<bits<8> o, string OpcodeStr, Intrinsic IntId>
- : SDI<o, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f64mem:$src2),
+ : SDI<o, MRMSrcMem, (ops VR128:$dst, VR128:$src1, sdmem:$src2),
!strconcat(OpcodeStr, " {$src2, $dst|$dst, $src2}"),
- [(set VR128:$dst, (v2f64 (IntId VR128:$src1, (load addr:$src2))))]>;
+ [(set VR128:$dst, (v2f64 (IntId VR128:$src1, sse_load_f64:$src2)))]>;
// Aliases to match intrinsics which expect XMM operand(s).
More information about the llvm-commits
mailing list