[llvm] r261570 - AMDGPU/R600: Implement allowsMisalignedMemoryAccess
Matt Arsenault via llvm-commits
llvm-commits at lists.llvm.org
Mon Feb 22 13:04:17 PST 2016
Author: arsenm
Date: Mon Feb 22 15:04:16 2016
New Revision: 261570
URL: http://llvm.org/viewvc/llvm-project?rev=261570&view=rev
Log:
AMDGPU/R600: Implement allowsMisalignedMemoryAccess
This avoids some test regressions in a future commit
when unaligned operations are expanded when they
have custom lowering.
Modified:
llvm/trunk/lib/Target/AMDGPU/R600ISelLowering.cpp
llvm/trunk/lib/Target/AMDGPU/R600ISelLowering.h
llvm/trunk/test/CodeGen/AMDGPU/store.ll
Modified: llvm/trunk/lib/Target/AMDGPU/R600ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/R600ISelLowering.cpp?rev=261570&r1=261569&r2=261570&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/R600ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/R600ISelLowering.cpp Mon Feb 22 15:04:16 2016
@@ -1784,6 +1784,26 @@ EVT R600TargetLowering::getSetCCResultTy
return VT.changeVectorElementTypeToInteger();
}
+bool R600TargetLowering::allowsMisalignedMemoryAccesses(EVT VT,
+ unsigned AddrSpace,
+ unsigned Align,
+ bool *IsFast) const {
+ if (IsFast)
+ *IsFast = false;
+
+ if (!VT.isSimple() || VT == MVT::Other)
+ return false;
+
+ if (VT.bitsLT(MVT::i32))
+ return false;
+
+ // TODO: This is a rough estimate.
+ if (IsFast)
+ *IsFast = true;
+
+ return VT.bitsGT(MVT::i32) && Align % 4 == 0;
+}
+
static SDValue CompactSwizzlableVector(
SelectionDAG &DAG, SDValue VectorEntry,
DenseMap<unsigned, unsigned> &RemapSwizzle) {
Modified: llvm/trunk/lib/Target/AMDGPU/R600ISelLowering.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/R600ISelLowering.h?rev=261570&r1=261569&r2=261570&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/R600ISelLowering.h (original)
+++ llvm/trunk/lib/Target/AMDGPU/R600ISelLowering.h Mon Feb 22 15:04:16 2016
@@ -41,6 +41,10 @@ public:
EVT getSetCCResultType(const DataLayout &DL, LLVMContext &,
EVT VT) const override;
+ bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AS,
+ unsigned Align,
+ bool *IsFast) const override;
+
private:
unsigned Gen;
/// Each OpenCL kernel has nine implicit parameters that are stored in the
Modified: llvm/trunk/test/CodeGen/AMDGPU/store.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/store.ll?rev=261570&r1=261569&r2=261570&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/store.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/store.ll Mon Feb 22 15:04:16 2016
@@ -358,20 +358,13 @@ entry:
ret void
}
-attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
-
; When i128 was a legal type this program generated cannot select errors:
; FUNC-LABEL: {{^}}"i128-const-store":
-; FIXME: We should be able to to this with one store instruction
-; EG: STORE_RAW
-; EG: STORE_RAW
-; EG: STORE_RAW
-; EG: STORE_RAW
-; CM: STORE_DWORD
-; CM: STORE_DWORD
-; CM: STORE_DWORD
-; CM: STORE_DWORD
+; EG: MEM_RAT_CACHELESS STORE_RAW T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 1
+
+; CM: MEM_RAT_CACHELESS STORE_DWORD T{{[0-9]+}}, T{{[0-9]+}}.X
+
; SI: buffer_store_dwordx4
define void @i128-const-store(i32 addrspace(1)* %out) {
entry:
@@ -384,3 +377,5 @@ entry:
store i32 2, i32 addrspace(1)* %arrayidx6, align 4
ret void
}
+
+attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
More information about the llvm-commits
mailing list