[llvm] r203553 - GVN: merge overflow intrinsics with non-overflow instructions.
Erik Verbruggen
erikjv at me.com
Tue Mar 11 02:36:49 PDT 2014
Author: erikjv
Date: Tue Mar 11 04:36:48 2014
New Revision: 203553
URL: http://llvm.org/viewvc/llvm-project?rev=203553&view=rev
Log:
GVN: merge overflow intrinsics with non-overflow instructions.
When an overflow intrinsic is followed by a non-overflow instruction,
replace the latter with an extract. For example:
%sadd = tail call { i32, i1 } @llvm.sadd.with.overflow.i32(i32 %a, i32 %b)
%sadd3 = add i32 %a, %b
Here the add statement will be replaced by an extract.
When an overflow intrinsic follows a non-overflow instruction, a clone
of the intrinsic is inserted before the normal instruction, which makes
it the same as the previous case. Subsequent runs of GVN can then clean
up the duplicate instructions and insert the extract.
This fixes PR8817.
Added:
llvm/trunk/test/Transforms/GVN/overflow.ll
Modified:
llvm/trunk/lib/Transforms/Scalar/GVN.cpp
Modified: llvm/trunk/lib/Transforms/Scalar/GVN.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Scalar/GVN.cpp?rev=203553&r1=203552&r2=203553&view=diff
==============================================================================
--- llvm/trunk/lib/Transforms/Scalar/GVN.cpp (original)
+++ llvm/trunk/lib/Transforms/Scalar/GVN.cpp Tue Mar 11 04:36:48 2014
@@ -111,10 +111,11 @@ namespace {
uint32_t nextValueNumber;
Expression create_expression(Instruction* I);
+ Expression create_intrinsic_expression(CallInst *C, uint32_t opcode,
+ bool IsCommutative);
Expression create_cmp_expression(unsigned Opcode,
CmpInst::Predicate Predicate,
Value *LHS, Value *RHS);
- Expression create_extractvalue_expression(ExtractValueInst* EI);
uint32_t lookup_or_add_call(CallInst* C);
public:
ValueTable() : nextValueNumber(1) { }
@@ -193,6 +194,29 @@ Expression ValueTable::create_expression
return e;
}
+Expression ValueTable::create_intrinsic_expression(CallInst *C, uint32_t opcode,
+ bool IsCommutative) {
+ Expression e;
+ e.opcode = opcode;
+ StructType *ST = cast<StructType>(C->getType());
+ assert(ST);
+ e.type = *ST->element_begin();
+
+ for (unsigned i = 0, ei = C->getNumArgOperands(); i < ei; ++i)
+ e.varargs.push_back(lookup_or_add(C->getArgOperand(i)));
+ if (IsCommutative) {
+ // Ensure that commutative instructions that only differ by a permutation
+ // of their operands get the same value number by sorting the operand value
+ // numbers. Since all commutative instructions have two operands it is more
+ // efficient to sort by hand rather than using, say, std::sort.
+ assert(C->getNumArgOperands() == 2 && "Unsupported commutative instruction!");
+ if (e.varargs[0] > e.varargs[1])
+ std::swap(e.varargs[0], e.varargs[1]);
+ }
+
+ return e;
+}
+
Expression ValueTable::create_cmp_expression(unsigned Opcode,
CmpInst::Predicate Predicate,
Value *LHS, Value *RHS) {
@@ -212,58 +236,6 @@ Expression ValueTable::create_cmp_expres
return e;
}
-Expression ValueTable::create_extractvalue_expression(ExtractValueInst *EI) {
- assert(EI != 0 && "Not an ExtractValueInst?");
- Expression e;
- e.type = EI->getType();
- e.opcode = 0;
-
- IntrinsicInst *I = dyn_cast<IntrinsicInst>(EI->getAggregateOperand());
- if (I != 0 && EI->getNumIndices() == 1 && *EI->idx_begin() == 0 ) {
- // EI might be an extract from one of our recognised intrinsics. If it
- // is we'll synthesize a semantically equivalent expression instead on
- // an extract value expression.
- switch (I->getIntrinsicID()) {
- case Intrinsic::sadd_with_overflow:
- case Intrinsic::uadd_with_overflow:
- e.opcode = Instruction::Add;
- break;
- case Intrinsic::ssub_with_overflow:
- case Intrinsic::usub_with_overflow:
- e.opcode = Instruction::Sub;
- break;
- case Intrinsic::smul_with_overflow:
- case Intrinsic::umul_with_overflow:
- e.opcode = Instruction::Mul;
- break;
- default:
- break;
- }
-
- if (e.opcode != 0) {
- // Intrinsic recognized. Grab its args to finish building the expression.
- assert(I->getNumArgOperands() == 2 &&
- "Expect two args for recognised intrinsics.");
- e.varargs.push_back(lookup_or_add(I->getArgOperand(0)));
- e.varargs.push_back(lookup_or_add(I->getArgOperand(1)));
- return e;
- }
- }
-
- // Not a recognised intrinsic. Fall back to producing an extract value
- // expression.
- e.opcode = EI->getOpcode();
- for (Instruction::op_iterator OI = EI->op_begin(), OE = EI->op_end();
- OI != OE; ++OI)
- e.varargs.push_back(lookup_or_add(*OI));
-
- for (ExtractValueInst::idx_iterator II = EI->idx_begin(), IE = EI->idx_end();
- II != IE; ++II)
- e.varargs.push_back(*II);
-
- return e;
-}
-
//===----------------------------------------------------------------------===//
// ValueTable External Functions
//===----------------------------------------------------------------------===//
@@ -397,8 +369,29 @@ uint32_t ValueTable::lookup_or_add(Value
Instruction* I = cast<Instruction>(V);
Expression exp;
switch (I->getOpcode()) {
- case Instruction::Call:
- return lookup_or_add_call(cast<CallInst>(I));
+ case Instruction::Call: {
+ CallInst *C = cast<CallInst>(I);
+ if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(C)) {
+ switch (II->getIntrinsicID()) {
+ case Intrinsic::sadd_with_overflow:
+ case Intrinsic::uadd_with_overflow:
+ exp = create_intrinsic_expression(C, Instruction::Add, true);
+ break;
+ case Intrinsic::ssub_with_overflow:
+ case Intrinsic::usub_with_overflow:
+ exp = create_intrinsic_expression(C, Instruction::Sub, false);
+ break;
+ case Intrinsic::smul_with_overflow:
+ case Intrinsic::umul_with_overflow:
+ exp = create_intrinsic_expression(C, Instruction::Mul, true);
+ break;
+ default:
+ return lookup_or_add_call(C);
+ }
+ } else {
+ return lookup_or_add_call(C);
+ }
+ } break;
case Instruction::Add:
case Instruction::FAdd:
case Instruction::Sub:
@@ -437,10 +430,8 @@ uint32_t ValueTable::lookup_or_add(Value
case Instruction::ShuffleVector:
case Instruction::InsertValue:
case Instruction::GetElementPtr:
- exp = create_expression(I);
- break;
case Instruction::ExtractValue:
- exp = create_extractvalue_expression(cast<ExtractValueInst>(I));
+ exp = create_expression(I);
break;
default:
valueNumbering[V] = nextValueNumber;
@@ -2189,6 +2180,54 @@ bool GVN::propagateEquality(Value *LHS,
return Changed;
}
+static bool normalOpAfterIntrinsic(Instruction *I, Value *Repl)
+{
+ switch (I->getOpcode()) {
+ case Instruction::Add:
+ if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Repl))
+ return II->getIntrinsicID() == Intrinsic::sadd_with_overflow
+ || II->getIntrinsicID() == Intrinsic::uadd_with_overflow;
+ return false;
+ case Instruction::Sub:
+ if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Repl))
+ return II->getIntrinsicID() == Intrinsic::ssub_with_overflow
+ || II->getIntrinsicID() == Intrinsic::usub_with_overflow;
+ return false;
+ case Instruction::Mul:
+ if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Repl))
+ return II->getIntrinsicID() == Intrinsic::smul_with_overflow
+ || II->getIntrinsicID() == Intrinsic::umul_with_overflow;
+ return false;
+ default:
+ return false;
+ }
+}
+
+static bool intrinsicAterNormalOp(Instruction *I, Value *Repl)
+{
+ IntrinsicInst *II = dyn_cast<IntrinsicInst>(I);
+ if (!II)
+ return false;
+
+ Instruction *RI = dyn_cast<Instruction>(Repl);
+ if (!RI)
+ return false;
+
+ switch (RI->getOpcode()) {
+ case Instruction::Add:
+ return II->getIntrinsicID() == Intrinsic::sadd_with_overflow
+ || II->getIntrinsicID() == Intrinsic::uadd_with_overflow;
+ case Instruction::Sub:
+ return II->getIntrinsicID() == Intrinsic::ssub_with_overflow
+ || II->getIntrinsicID() == Intrinsic::usub_with_overflow;
+ case Instruction::Mul:
+ return II->getIntrinsicID() == Intrinsic::smul_with_overflow
+ || II->getIntrinsicID() == Intrinsic::umul_with_overflow;
+ default:
+ return false;
+ }
+}
+
/// processInstruction - When calculating availability, handle an instruction
/// by inserting it into the appropriate sets
bool GVN::processInstruction(Instruction *I) {
@@ -2302,6 +2341,27 @@ bool GVN::processInstruction(Instruction
return false;
}
+ if (normalOpAfterIntrinsic(I, repl)) {
+ // An intrinsic followed by a normal operation (e.g. sadd_with_overflow
+ // followed by a sadd): replace the second instruction with an extract.
+ IntrinsicInst *II = cast<IntrinsicInst>(repl);
+ assert(II);
+ repl = ExtractValueInst::Create(II, 0, I->getName() + ".repl", I);
+ } else if (intrinsicAterNormalOp(I, repl)) {
+ // A normal operation followed by an intrinsic (e.g. sadd followed by a
+ // sadd_with_overflow).
+ // Clone the intrinsic, and insert it before the replacing instruction. Then
+ // replace the (current) instruction with the cloned one. In a subsequent
+ // run, the original replacement (the non-intrinsic) will be be replaced by
+ // the new intrinsic.
+ Instruction *RI = dyn_cast<Instruction>(repl);
+ assert(RI);
+ Instruction *newIntrinsic = I->clone();
+ newIntrinsic->setName(I->getName() + ".repl");
+ newIntrinsic->insertBefore(RI);
+ repl = newIntrinsic;
+ }
+
// Remove it!
patchAndReplaceAllUsesWith(I, repl);
if (MD && repl->getType()->getScalarType()->isPointerTy())
Added: llvm/trunk/test/Transforms/GVN/overflow.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/GVN/overflow.ll?rev=203553&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/GVN/overflow.ll (added)
+++ llvm/trunk/test/Transforms/GVN/overflow.ll Tue Mar 11 04:36:48 2014
@@ -0,0 +1,43 @@
+; RUN: opt -S -gvn < %s | FileCheck %s
+
+define i32 @sadd1(i32 %a, i32 %b) #0 {
+; CHECK-LABEL: @sadd1(
+entry:
+ %sadd = tail call { i32, i1 } @llvm.sadd.with.overflow.i32(i32 %a, i32 %b)
+ %cmp = extractvalue { i32, i1 } %sadd, 1
+ br i1 %cmp, label %if.then, label %if.end
+
+if.then: ; preds = %entry
+ ret i32 42
+
+if.end: ; preds = %entry
+ %sadd3 = add i32 %a, %b
+ ret i32 %sadd3
+; CHECK-NOT: add i32 %a, %b
+; CHECK: %sadd3.repl = extractvalue { i32, i1 } %sadd, 0
+; CHECK: ret i32 %sadd3.repl
+}
+
+define i32 @sadd2(i32 %a, i32 %b) #0 {
+entry:
+ %sadd3 = add i32 %a, %b
+ %sadd = tail call { i32, i1 } @llvm.sadd.with.overflow.i32(i32 %a, i32 %b)
+ %cmp = extractvalue { i32, i1 } %sadd, 1
+ br i1 %cmp, label %if.then, label %if.end
+; CHECK-NOT: %sadd3 = add i32 %a, %b
+; CHECK: %sadd.repl = tail call { i32, i1 } @llvm.sadd.with.overflow.i32(i32 %a, i32 %b)
+; CHECK-NOT: %sadd = tail call { i32, i1 } @llvm.sadd.with.overflow.i32(i32 %a, i32 %b)
+; CHECK: %sadd3.repl = extractvalue { i32, i1 } %sadd.repl, 0
+
+if.then: ; preds = %entry
+ %sadd4 = add i32 %sadd3, 1
+ ret i32 %sadd4
+; CHECK: %sadd4 = add i32 %sadd3.repl, 1
+
+if.end: ; preds = %entry
+ ret i32 %sadd3
+; CHECK: ret i32 %sadd3.repl
+}
+
+
+declare { i32, i1 } @llvm.sadd.with.overflow.i32(i32, i32) nounwind readnone
More information about the llvm-commits
mailing list