[llvm] r224054 - [InstCombine][X86] Improved folding of calls to Intrinsic::x86_sse4a_insertqi.
Andrea Di Biagio
Andrea_DiBiagio at sn.scee.net
Thu Dec 11 12:44:59 PST 2014
Author: adibiagio
Date: Thu Dec 11 14:44:59 2014
New Revision: 224054
URL: http://llvm.org/viewvc/llvm-project?rev=224054&view=rev
Log:
[InstCombine][X86] Improved folding of calls to Intrinsic::x86_sse4a_insertqi.
This patch teaches the instruction combiner how to fold a call to 'insertqi' if
the 'length field' (3rd operand) is set to zero, and if the sum between
field 'length' and 'bit index' (4th operand) is bigger than 64.
>From the AMD64 Architecture Programmer's Manual:
1. If the sum of the bit index + length field is greater than 64, then the
results are undefined;
2. A value of zero in the field length is defined as a length of 64.
This patch improves the existing combining logic for intrinsic 'insertqi'
adding extra checks to address both point 1. and point 2.
Differential Revision: http://reviews.llvm.org/D6583
Modified:
llvm/trunk/lib/Transforms/InstCombine/InstCombineCalls.cpp
llvm/trunk/test/Transforms/InstCombine/vec_demanded_elts.ll
Modified: llvm/trunk/lib/Transforms/InstCombine/InstCombineCalls.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/InstCombine/InstCombineCalls.cpp?rev=224054&r1=224053&r2=224054&view=diff
==============================================================================
--- llvm/trunk/lib/Transforms/InstCombine/InstCombineCalls.cpp (original)
+++ llvm/trunk/lib/Transforms/InstCombine/InstCombineCalls.cpp Thu Dec 11 14:44:59 2014
@@ -733,7 +733,22 @@ Instruction *InstCombiner::visitCallInst
// TODO: eventually we should lower this intrinsic to IR
if (auto CIWidth = dyn_cast<ConstantInt>(II->getArgOperand(2))) {
if (auto CIStart = dyn_cast<ConstantInt>(II->getArgOperand(3))) {
- if (CIWidth->equalsInt(64) && CIStart->isZero()) {
+ unsigned Index = CIStart->getZExtValue();
+ // From AMD documentation: "a value of zero in the field length is
+ // defined as length of 64".
+ unsigned Length = CIWidth->equalsInt(0) ? 64 : CIWidth->getZExtValue();
+
+ // From AMD documentation: "If the sum of the bit index + length field
+ // is greater than 64, the results are undefined".
+
+ // Note that both field index and field length are 8-bit quantities.
+ // Since variables 'Index' and 'Length' are unsigned values
+ // obtained from zero-extending field index and field length
+ // respectively, their sum should never wrap around.
+ if ((Index + Length) > 64)
+ return ReplaceInstUsesWith(CI, UndefValue::get(II->getType()));
+
+ if (Length == 64 && Index == 0) {
Value *Vec = II->getArgOperand(1);
Value *Undef = UndefValue::get(Vec->getType());
const uint32_t Mask[] = { 0, 2 };
Modified: llvm/trunk/test/Transforms/InstCombine/vec_demanded_elts.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/InstCombine/vec_demanded_elts.ll?rev=224054&r1=224053&r2=224054&view=diff
==============================================================================
--- llvm/trunk/test/Transforms/InstCombine/vec_demanded_elts.ll (original)
+++ llvm/trunk/test/Transforms/InstCombine/vec_demanded_elts.ll Thu Dec 11 14:44:59 2014
@@ -303,6 +303,33 @@ define <2 x i64> @testInsertDisjointRang
ret <2 x i64> %2
}
+; CHECK: define <2 x i64> @testZeroLength(<2 x i64> %v, <2 x i64> %i)
+define <2 x i64> @testZeroLength(<2 x i64> %v, <2 x i64> %i) {
+; CHECK: ret <2 x i64> %i
+ %1 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %v, <2 x i64> %i, i8 0, i8 0)
+ ret <2 x i64> %1
+}
+
+; CHECK: define <2 x i64> @testUndefinedInsertq_1(<2 x i64> %v, <2 x i64> %i)
+define <2 x i64> @testUndefinedInsertq_1(<2 x i64> %v, <2 x i64> %i) {
+; CHECK: ret <2 x i64> undef
+ %1 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %v, <2 x i64> %i, i8 0, i8 16)
+ ret <2 x i64> %1
+}
+
+; CHECK: define <2 x i64> @testUndefinedInsertq_2(<2 x i64> %v, <2 x i64> %i)
+define <2 x i64> @testUndefinedInsertq_2(<2 x i64> %v, <2 x i64> %i) {
+; CHECK: ret <2 x i64> undef
+ %1 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %v, <2 x i64> %i, i8 48, i8 32)
+ ret <2 x i64> %1
+}
+
+; CHECK: define <2 x i64> @testUndefinedInsertq_3(<2 x i64> %v, <2 x i64> %i)
+define <2 x i64> @testUndefinedInsertq_3(<2 x i64> %v, <2 x i64> %i) {
+; CHECK: ret <2 x i64> undef
+ %1 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %v, <2 x i64> %i, i8 64, i8 16)
+ ret <2 x i64> %1
+}
; CHECK: declare <2 x i64> @llvm.x86.sse4a.insertqi
declare <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64>, <2 x i64>, i8, i8) nounwind
More information about the llvm-commits
mailing list