[PATCH] D32344: InstCombine/AMDGPU: Fix constant folding of llvm.amdgcn.{icmp, fcmp}
Nicolai Hähnle via Phabricator via llvm-commits
llvm-commits at lists.llvm.org
Fri Apr 21 03:09:06 PDT 2017
nhaehnle created this revision.
Herald added subscribers: t-tye, tpr, dstuttard, yaxunl, wdng, kzhuravl.
The return value of these intrinsics should always have 0 bits for
inactive threads. This means that when all arguments are constant
and the comparison evaluates to true, the intrinsic should return
the current exec mask.
Fixes some GL_ARB_shader_ballot tests.
https://reviews.llvm.org/D32344
Files:
lib/Transforms/InstCombine/InstCombineCalls.cpp
test/Transforms/InstCombine/amdgcn-intrinsics.ll
Index: test/Transforms/InstCombine/amdgcn-intrinsics.ll
===================================================================
--- test/Transforms/InstCombine/amdgcn-intrinsics.ll
+++ test/Transforms/InstCombine/amdgcn-intrinsics.ll
@@ -1259,7 +1259,7 @@
}
; CHECK-LABEL: @icmp_constant_inputs_true(
-; CHECK: ret i64 -1
+; CHECK: %result = call i64 @llvm.read_register.i64(metadata !0) #4
define i64 @icmp_constant_inputs_true() {
%result = call i64 @llvm.amdgcn.icmp.i32(i32 9, i32 8, i32 34)
ret i64 %result
@@ -1524,7 +1524,7 @@
}
; CHECK-LABEL: @fcmp_constant_inputs_true(
-; CHECK: ret i64 -1
+; CHECK: %result = call i64 @llvm.read_register.i64(metadata !0) #4
define i64 @fcmp_constant_inputs_true() {
%result = call i64 @llvm.amdgcn.fcmp.f32(float 2.0, float 4.0, i32 4)
ret i64 %result
Index: lib/Transforms/InstCombine/InstCombineCalls.cpp
===================================================================
--- lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -3400,8 +3400,22 @@
if (auto *CSrc0 = dyn_cast<Constant>(Src0)) {
if (auto *CSrc1 = dyn_cast<Constant>(Src1)) {
Constant *CCmp = ConstantExpr::getCompare(CCVal, CSrc0, CSrc1);
- return replaceInstUsesWith(*II,
- ConstantExpr::getSExt(CCmp, II->getType()));
+ if (CCmp->isNullValue()) {
+ return replaceInstUsesWith(*II,
+ ConstantExpr::getSExt(CCmp, II->getType()));
+ } else {
+ Value *NewF = Intrinsic::getDeclaration(II->getModule(),
+ Intrinsic::read_register,
+ II->getType());
+ Metadata *MDArgs[] = { MDString::get(II->getContext(), "exec") };
+ MDNode *MD = MDNode::get(II->getContext(), MDArgs);
+ Value *Args[] = { MetadataAsValue::get(II->getContext(), MD) };
+ CallInst *NewCall = Builder->CreateCall(NewF, Args);
+ NewCall->addAttribute(AttributeList::FunctionIndex,
+ Attribute::Convergent);
+ NewCall->takeName(II);
+ return replaceInstUsesWith(*II, NewCall);
+ }
}
// Canonicalize constants to RHS.
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D32344.96116.patch
Type: text/x-patch
Size: 2305 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20170421/5a3f4f50/attachment.bin>
More information about the llvm-commits
mailing list