[PATCH] Make bitcast, extractelement, and insertelement considered cheap for speculation in SimplifyCFG.
Matt Arsenault
Matthew.Arsenault at amd.com
Mon Nov 18 19:00:36 PST 2013
Hi nadav,
This help fold more branches into selects.
On R600, vectors are cheap and anything that helps
remove branches is very good.
I don't know why this doesn't use the TTI for this cost
calculation or if it should.
http://llvm-reviews.chandlerc.com/D2218
Files:
lib/Transforms/Utils/SimplifyCFG.cpp
test/Transforms/SimplifyCFG/speculate-vector-ops.ll
Index: lib/Transforms/Utils/SimplifyCFG.cpp
===================================================================
--- lib/Transforms/Utils/SimplifyCFG.cpp
+++ lib/Transforms/Utils/SimplifyCFG.cpp
@@ -224,6 +224,9 @@
case Instruction::Trunc:
case Instruction::ZExt:
case Instruction::SExt:
+ case Instruction::BitCast:
+ case Instruction::ExtractElement:
+ case Instruction::InsertElement:
return 1; // These are all cheap.
case Instruction::Call:
Index: test/Transforms/SimplifyCFG/speculate-vector-ops.ll
===================================================================
--- /dev/null
+++ test/Transforms/SimplifyCFG/speculate-vector-ops.ll
@@ -0,0 +1,60 @@
+; RUN: opt -S -simplifycfg < %s | FileCheck %s
+
+define i32 @speculate_vector_extract(i32 %d, <4 x i32> %v) #0 {
+; CHECK-LABEL: @speculate_vector_extract(
+; CHECK-NOT: br
+entry:
+ %conv = insertelement <4 x i32> undef, i32 %d, i32 0
+ %conv2 = insertelement <4 x i32> %conv, i32 %d, i32 1
+ %conv3 = insertelement <4 x i32> %conv2, i32 %d, i32 2
+ %conv4 = insertelement <4 x i32> %conv3, i32 %d, i32 3
+ %tmp6 = add nsw <4 x i32> %conv4, <i32 0, i32 -1, i32 -2, i32 -3>
+ %cmp = icmp eq <4 x i32> %tmp6, zeroinitializer
+ %cmp.ext = sext <4 x i1> %cmp to <4 x i32>
+ %tmp8 = extractelement <4 x i32> %cmp.ext, i32 0
+ %tobool = icmp eq i32 %tmp8, 0
+ br i1 %tobool, label %cond.else, label %cond.then
+
+return: ; preds = %cond.end28
+ ret i32 %cond32
+
+cond.then: ; preds = %entry
+ %tmp10 = extractelement <4 x i32> %v, i32 0
+ br label %cond.end
+
+cond.else: ; preds = %entry
+ %tmp12 = extractelement <4 x i32> %v, i32 3
+ br label %cond.end
+
+cond.end: ; preds = %cond.else, %cond.then
+ %cond = phi i32 [ %tmp10, %cond.then ], [ %tmp12, %cond.else ]
+ %tmp14 = extractelement <4 x i32> %cmp.ext, i32 1
+ %tobool15 = icmp eq i32 %tmp14, 0
+ br i1 %tobool15, label %cond.else17, label %cond.then16
+
+cond.then16: ; preds = %cond.end
+ %tmp20 = extractelement <4 x i32> %v, i32 1
+ br label %cond.end18
+
+cond.else17: ; preds = %cond.end
+ br label %cond.end18
+
+cond.end18: ; preds = %cond.else17, %cond.then16
+ %cond22 = phi i32 [ %tmp20, %cond.then16 ], [ %cond, %cond.else17 ]
+ %tmp24 = extractelement <4 x i32> %cmp.ext, i32 2
+ %tobool25 = icmp eq i32 %tmp24, 0
+ br i1 %tobool25, label %cond.else27, label %cond.then26
+
+cond.then26: ; preds = %cond.end18
+ %tmp30 = extractelement <4 x i32> %v, i32 2
+ br label %cond.end28
+
+cond.else27: ; preds = %cond.end18
+ br label %cond.end28
+
+cond.end28: ; preds = %cond.else27, %cond.then26
+ %cond32 = phi i32 [ %tmp30, %cond.then26 ], [ %cond22, %cond.else27 ]
+ br label %return
+}
+
+attributes #0 = { nounwind }
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D2218.1.patch
Type: text/x-patch
Size: 3085 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20131118/63ad4d6f/attachment.bin>
More information about the llvm-commits
mailing list