[llvm] r251849 - [CGP] widen switch condition and case constants to target's register width
Sanjay Patel via llvm-commits
llvm-commits at lists.llvm.org
Mon Nov 2 14:46:24 PST 2015
Author: spatel
Date: Mon Nov 2 16:46:24 2015
New Revision: 251849
URL: http://llvm.org/viewvc/llvm-project?rev=251849&view=rev
Log:
[CGP] widen switch condition and case constants to target's register width
This is a follow-up from the discussion in D12965. The block-at-a-time limitation of
SelectionDAG also came up in D13297.
Without the InstCombine change from D12965, I don't expect this patch to make any
difference in the real world because InstCombine does not shrink cases like this in
visitSwitchInst(). But we need to have this CGP safety harness in place before
proceeding with any shrinkage in D12965, so we won't generate extra extends for compares.
I've opted for IR regression tests in the patch because that seems like a clearer way to
test the transform, but PowerPC CodeGen for an i16 widening test is shown below. x86
will need more work to solve: https://llvm.org/bugs/show_bug.cgi?id=22473
Before:
BB#0:
mr 4, 3
extsh. 3, 4
ble 0, .LBB0_5
BB#1:
cmpwi 3, 99
bgt 0, .LBB0_9
BB#2:
rlwinm 4, 4, 0, 16, 31 <--- 32-bit mask/extend
li 3, 0
cmplwi 4, 1
beqlr 0
BB#3:
cmplwi 4, 10
bne 0, .LBB0_12
BB#4:
li 3, 1
blr
.LBB0_5:
rlwinm 3, 4, 0, 16, 31 <--- 32-bit mask/extend
cmplwi 3, 65436
beq 0, .LBB0_13
BB#6:
cmplwi 3, 65526
beq 0, .LBB0_15
BB#7:
cmplwi 3, 65535
bne 0, .LBB0_12
BB#8:
li 3, 4
blr
.LBB0_9:
rlwinm 3, 4, 0, 16, 31 <--- 32-bit mask/extend
cmplwi 3, 100
beq 0, .LBB0_14
...
After:
BB#0:
rlwinm 4, 3, 0, 16, 31 <--- mask/extend to 32-bit and then use that for comparisons
cmpwi 4, 999
ble 0, .LBB0_5
BB#1:
lis 3, 0
ori 3, 3, 65525
cmpw 4, 3
bgt 0, .LBB0_9
BB#2:
cmplwi 4, 1000
beq 0, .LBB0_14
BB#3:
cmplwi 4, 65436
bne 0, .LBB0_13
BB#4:
li 3, 6
blr
.LBB0_5:
li 3, 0
cmplwi 4, 1
beqlr 0
BB#6:
cmplwi 4, 10
beq 0, .LBB0_12
BB#7:
cmplwi 4, 100
bne 0, .LBB0_13
BB#8:
li 3, 2
blr
.LBB0_9:
cmplwi 4, 65526
beq 0, .LBB0_15
BB#10:
cmplwi 4, 65535
bne 0, .LBB0_13
...
Differential Revision: http://reviews.llvm.org/D13532
Added:
llvm/trunk/test/Transforms/CodeGenPrepare/widen_switch.ll
Modified:
llvm/trunk/lib/CodeGen/CodeGenPrepare.cpp
Modified: llvm/trunk/lib/CodeGen/CodeGenPrepare.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/CodeGenPrepare.cpp?rev=251849&r1=251848&r2=251849&view=diff
==============================================================================
--- llvm/trunk/lib/CodeGen/CodeGenPrepare.cpp (original)
+++ llvm/trunk/lib/CodeGen/CodeGenPrepare.cpp Mon Nov 2 16:46:24 2015
@@ -175,6 +175,7 @@ class TypePromotionTransaction;
bool optimizeExtUses(Instruction *I);
bool optimizeSelectInst(SelectInst *SI);
bool optimizeShuffleVectorInst(ShuffleVectorInst *SI);
+ bool optimizeSwitchInst(SwitchInst *CI);
bool optimizeExtractElementInst(Instruction *Inst);
bool dupRetToEnableTailCallOpts(BasicBlock *BB);
bool placeDbgValues(Function &F);
@@ -4399,6 +4400,49 @@ bool CodeGenPrepare::optimizeShuffleVect
return MadeChange;
}
+bool CodeGenPrepare::optimizeSwitchInst(SwitchInst *SI) {
+ if (!TLI || !DL)
+ return false;
+
+ Value *Cond = SI->getCondition();
+ Type *OldType = Cond->getType();
+ LLVMContext &Context = Cond->getContext();
+ MVT RegType = TLI->getRegisterType(Context, TLI->getValueType(*DL, OldType));
+ unsigned RegWidth = RegType.getSizeInBits();
+
+ if (RegWidth <= cast<IntegerType>(OldType)->getBitWidth())
+ return false;
+
+ // If the register width is greater than the type width, expand the condition
+ // of the switch instruction and each case constant to the width of the
+ // register. By widening the type of the switch condition, subsequent
+ // comparisons (for case comparisons) will not need to be extended to the
+ // preferred register width, so we will potentially eliminate N-1 extends,
+ // where N is the number of cases in the switch.
+ auto *NewType = Type::getIntNTy(Context, RegWidth);
+
+ // Zero-extend the switch condition and case constants unless the switch
+ // condition is a function argument that is already being sign-extended.
+ // In that case, we can avoid an unnecessary mask/extension by sign-extending
+ // everything instead.
+ Instruction::CastOps ExtType = Instruction::ZExt;
+ if (auto *Arg = dyn_cast<Argument>(Cond))
+ if (Arg->hasSExtAttr())
+ ExtType = Instruction::SExt;
+
+ auto *ExtInst = CastInst::Create(ExtType, Cond, NewType);
+ ExtInst->insertBefore(SI);
+ SI->setCondition(ExtInst);
+ for (SwitchInst::CaseIt Case : SI->cases()) {
+ APInt NarrowConst = Case.getCaseValue()->getValue();
+ APInt WideConst = (ExtType == Instruction::ZExt) ?
+ NarrowConst.zext(RegWidth) : NarrowConst.sext(RegWidth);
+ Case.setValue(ConstantInt::get(Context, WideConst));
+ }
+
+ return true;
+}
+
namespace {
/// \brief Helper class to promote a scalar operation to a vector one.
/// This class is used to move downward extractelement transition.
@@ -4871,6 +4915,9 @@ bool CodeGenPrepare::optimizeInst(Instru
if (ShuffleVectorInst *SVI = dyn_cast<ShuffleVectorInst>(I))
return optimizeShuffleVectorInst(SVI);
+ if (auto *Switch = dyn_cast<SwitchInst>(I))
+ return optimizeSwitchInst(Switch);
+
if (isa<ExtractElementInst>(I))
return optimizeExtractElementInst(I);
Added: llvm/trunk/test/Transforms/CodeGenPrepare/widen_switch.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/CodeGenPrepare/widen_switch.ll?rev=251849&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/CodeGenPrepare/widen_switch.ll (added)
+++ llvm/trunk/test/Transforms/CodeGenPrepare/widen_switch.ll Mon Nov 2 16:46:24 2015
@@ -0,0 +1,107 @@
+;; PowerPC is arbitralily chosen as a 32/64-bit RISC representative to show the transform in all tests.
+;; x86 is chosen to show that the transform may differ when 8-bit and 16-bit registers are available.
+
+; RUN: opt < %s -codegenprepare -S -mtriple=powerpc64-unknown-unknown | FileCheck %s --check-prefix=PPC --check-prefix=ALL
+; RUN: opt < %s -codegenprepare -S -mtriple=x86_64-unknown-unknown | FileCheck %s --check-prefix=X86 --check-prefix=ALL
+
+; PPC widens to 32-bit; no change for x86 because 16-bit registers are part of the architecture.
+
+define i32 @widen_switch_i16(i32 %a) {
+entry:
+ %trunc = trunc i32 %a to i16
+ switch i16 %trunc, label %sw.default [
+ i16 1, label %sw.bb0
+ i16 -1, label %sw.bb1
+ ]
+
+sw.bb0:
+ br label %return
+
+sw.bb1:
+ br label %return
+
+sw.default:
+ br label %return
+
+return:
+ %retval = phi i32 [ -1, %sw.default ], [ 0, %sw.bb0 ], [ 1, %sw.bb1 ]
+ ret i32 %retval
+
+; ALL-LABEL: @widen_switch_i16(
+; PPC: %0 = zext i16 %trunc to i32
+; PPC-NEXT: switch i32 %0, label %sw.default [
+; PPC-NEXT: i32 1, label %return
+; PPC-NEXT: i32 65535, label %sw.bb1
+;
+; X86: %trunc = trunc i32 %a to i16
+; X86-NEXT: switch i16 %trunc, label %sw.default [
+; X86-NEXT: i16 1, label %return
+; X86-NEXT: i16 -1, label %sw.bb1
+}
+
+; Both architectures widen to 32-bit from a smaller, non-native type.
+
+define i32 @widen_switch_i17(i32 %a) {
+entry:
+ %trunc = trunc i32 %a to i17
+ switch i17 %trunc, label %sw.default [
+ i17 10, label %sw.bb0
+ i17 -1, label %sw.bb1
+ ]
+
+sw.bb0:
+ br label %return
+
+sw.bb1:
+ br label %return
+
+sw.default:
+ br label %return
+
+return:
+ %retval = phi i32 [ -1, %sw.default ], [ 0, %sw.bb0 ], [ 1, %sw.bb1 ]
+ ret i32 %retval
+
+; ALL-LABEL: @widen_switch_i17(
+; ALL: %0 = zext i17 %trunc to i32
+; ALL-NEXT: switch i32 %0, label %sw.default [
+; ALL-NEXT: i32 10, label %return
+; ALL-NEXT: i32 131071, label %sw.bb1
+}
+
+; If the switch condition is a sign-extended function argument, then the
+; condition and cases should be sign-extended rather than zero-extended
+; because the sign-extension can be optimized away.
+
+define i32 @widen_switch_i16_sext(i2 signext %a) {
+entry:
+ switch i2 %a, label %sw.default [
+ i2 1, label %sw.bb0
+ i2 -1, label %sw.bb1
+ ]
+
+sw.bb0:
+ br label %return
+
+sw.bb1:
+ br label %return
+
+sw.default:
+ br label %return
+
+return:
+ %retval = phi i32 [ -1, %sw.default ], [ 0, %sw.bb0 ], [ 1, %sw.bb1 ]
+ ret i32 %retval
+
+; ALL-LABEL: @widen_switch_i16_sext(
+; PPC: %0 = sext i2 %a to i32
+; PPC-NEXT: switch i32 %0, label %sw.default [
+; PPC-NEXT: i32 1, label %return
+; PPC-NEXT: i32 -1, label %sw.bb1
+;
+; X86: %0 = sext i2 %a to i8
+; X86-NEXT: switch i8 %0, label %sw.default [
+; X86-NEXT: i8 1, label %return
+; X86-NEXT: i8 -1, label %sw.bb1
+}
+
More information about the llvm-commits
mailing list