[llvm] cd19af7 - Avoid 8 and 16bit switch conditions on x86

Tue May 10 10:01:39 PDT 2022

Author: Matthias Braun
Date: 2022-05-10T10:00:10-07:00
New Revision: cd19af74c031f0f538050d00b26bab3fbca07414

URL: https://github.com/llvm/llvm-project/commit/cd19af74c031f0f538050d00b26bab3fbca07414
DIFF: https://github.com/llvm/llvm-project/commit/cd19af74c031f0f538050d00b26bab3fbca07414.diff

LOG: Avoid 8 and 16bit switch conditions on x86

This adds a `TargetLoweringBase::getSwitchConditionType` callback to
give targets a chance to control the type used in
`CodeGenPrepare::optimizeSwitchInst`.

Implement callback for X86 to avoid i8 and i16 types where possible as
they often incur extra zero-extensions.

This is NFC for non-X86 targets.

Differential Revision: https://reviews.llvm.org/D124894

Added: 
    

Modified: 
    llvm/include/llvm/CodeGen/TargetLowering.h
    llvm/lib/CodeGen/CodeGenPrepare.cpp
    llvm/lib/CodeGen/TargetLoweringBase.cpp
    llvm/lib/Target/X86/X86ISelLowering.cpp
    llvm/lib/Target/X86/X86ISelLowering.h
    llvm/test/CodeGen/X86/2008-12-01-loop-iv-used-outside-loop.ll
    llvm/test/CodeGen/X86/SwitchLowering.ll
    llvm/test/CodeGen/X86/conditional-tailcall.ll
    llvm/test/CodeGen/X86/switch-phi-const.ll
    llvm/test/CodeGen/X86/switch.ll
    llvm/test/CodeGen/X86/tail-dup-multiple-latch-loop.ll
    llvm/test/CodeGen/X86/tail-opts.ll
    llvm/test/Transforms/CodeGenPrepare/X86/widen_switch.ll

Removed: 
    


################################################################################
diff  --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h
index 63dc943254a59..06f3cee7a3fdf 100644

--- a/llvm/include/llvm/CodeGen/TargetLowering.h
+++ b/llvm/include/llvm/CodeGen/TargetLowering.h
@@ -1222,6 +1222,10 @@ class TargetLoweringBase {
                                       uint64_t Range, ProfileSummaryInfo *PSI,
                                       BlockFrequencyInfo *BFI) const;
 
+  /// Returns preferred type for switch condition.
+  virtual MVT getPreferredSwitchConditionType(LLVMContext &Context,
+                                              EVT ConditionVT) const;
+
   /// Return true if lowering to a bit test is suitable for a set of case
   /// clusters which contains \p NumDests unique destinations, \p Low and
   /// \p High as its lowest and highest case values, and expects \p NumCmps

diff  --git a/llvm/lib/CodeGen/CodeGenPrepare.cpp b/llvm/lib/CodeGen/CodeGenPrepare.cpp
index 4eeaf3d808ac4..b4b78c4a195b8 100644
--- a/llvm/lib/CodeGen/CodeGenPrepare.cpp
+++ b/llvm/lib/CodeGen/CodeGenPrepare.cpp
@@ -6984,7 +6984,7 @@ bool CodeGenPrepare::optimizeSwitchInst(SwitchInst *SI) {
   Type *OldType = Cond->getType();
   LLVMContext &Context = Cond->getContext();
   EVT OldVT = TLI->getValueType(*DL, OldType);
-  MVT RegType = TLI->getRegisterType(Context, OldVT);
+  MVT RegType = TLI->getPreferredSwitchConditionType(Context, OldVT);
   unsigned RegWidth = RegType.getSizeInBits();
 
   if (RegWidth <= cast<IntegerType>(OldType)->getBitWidth())

diff  --git a/llvm/lib/CodeGen/TargetLoweringBase.cpp b/llvm/lib/CodeGen/TargetLoweringBase.cpp
index 323c41c7b23b2..63cac3619c35a 100644
--- a/llvm/lib/CodeGen/TargetLoweringBase.cpp
+++ b/llvm/lib/CodeGen/TargetLoweringBase.cpp
@@ -1602,6 +1602,11 @@ bool TargetLoweringBase::isSuitableForJumpTable(const SwitchInst *SI,
          (NumCases * 100 >= Range * MinDensity);
 }
 
+MVT TargetLoweringBase::getPreferredSwitchConditionType(LLVMContext &Context,
+                                                        EVT ConditionVT) const {
+  return getRegisterType(Context, ConditionVT);
+}
+
 /// Get the EVTs and ArgFlags collections that represent the legalized return
 /// type of the given function.  This does not require a DAG or a return value,
 /// and is suitable for use before any DAGs for the function are constructed.

diff  --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 14e8f2d3f934f..6c0ec07e9841e 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -33719,6 +33719,16 @@ bool X86TargetLowering::areJTsAllowed(const Function *Fn) const {
   return TargetLowering::areJTsAllowed(Fn);
 }
 
+MVT X86TargetLowering::getPreferredSwitchConditionType(LLVMContext &Context,
+                                                       EVT ConditionVT) const {
+  // Avoid 8 and 16 bit types because they increase the chance for unnecessary
+  // zero-extensions.
+  if (ConditionVT.getSizeInBits() < 32)
+    return MVT::i32;
+  return TargetLoweringBase::getPreferredSwitchConditionType(Context,
+                                                             ConditionVT);
+}
+
 //===----------------------------------------------------------------------===//
 //                           X86 Scheduler Hooks
 //===----------------------------------------------------------------------===//

diff  --git a/llvm/lib/Target/X86/X86ISelLowering.h b/llvm/lib/Target/X86/X86ISelLowering.h
index 67d94edc01582..cd0e4a348f48b 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.h
+++ b/llvm/lib/Target/X86/X86ISelLowering.h
@@ -1316,6 +1316,9 @@ namespace llvm {
     /// Returns true if lowering to a jump table is allowed.
     bool areJTsAllowed(const Function *Fn) const override;
 
+    MVT getPreferredSwitchConditionType(LLVMContext &Context,
+                                        EVT ConditionVT) const override;
+
     /// If true, then instruction selection should
     /// seek to shrink the FP constant of the specified type to a smaller type
     /// in order to save space and / or reduce runtime.

diff  --git a/llvm/test/CodeGen/X86/2008-12-01-loop-iv-used-outside-loop.ll b/llvm/test/CodeGen/X86/2008-12-01-loop-iv-used-outside-loop.ll
index 42c7a19b11536..7c67fcde6c0e9 100644
--- a/llvm/test/CodeGen/X86/2008-12-01-loop-iv-used-outside-loop.ll
+++ b/llvm/test/CodeGen/X86/2008-12-01-loop-iv-used-outside-loop.ll
@@ -18,11 +18,11 @@ define i8* @test(i8* %Q, i32* %L) nounwind {
 ; CHECK-NEXT:  LBB0_2: ## %bb1
 ; CHECK-NEXT:    ## =>This Inner Loop Header: Depth=1
 ; CHECK-NEXT:    movzbl (%eax), %ecx
-; CHECK-NEXT:    cmpb $12, %cl
+; CHECK-NEXT:    cmpl $12, %ecx
 ; CHECK-NEXT:    je LBB0_1
 ; CHECK-NEXT:  ## %bb.3: ## %bb1
 ; CHECK-NEXT:    ## in Loop: Header=BB0_2 Depth=1
-; CHECK-NEXT:    cmpb $42, %cl
+; CHECK-NEXT:    cmpl $42, %ecx
 ; CHECK-NEXT:    je LBB0_1
 ; CHECK-NEXT:  ## %bb.4: ## %bb3
 ; CHECK-NEXT:    movb $4, 2(%eax)

diff  --git a/llvm/test/CodeGen/X86/SwitchLowering.ll b/llvm/test/CodeGen/X86/SwitchLowering.ll
index e97036b1dcf21..d5abb34187f99 100644
--- a/llvm/test/CodeGen/X86/SwitchLowering.ll
+++ b/llvm/test/CodeGen/X86/SwitchLowering.ll
@@ -18,11 +18,11 @@ define i8* @FindChar(i8* %CurPtr) {
 ; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
 ; CHECK-NEXT:    movzbl (%esi,%edi), %eax
 ; CHECK-NEXT:    incl %edi
-; CHECK-NEXT:    cmpb $120, %al
+; CHECK-NEXT:    cmpl $120, %eax
 ; CHECK-NEXT:    je .LBB0_3
 ; CHECK-NEXT:  # %bb.2: # %bb
 ; CHECK-NEXT:    # in Loop: Header=BB0_1 Depth=1
-; CHECK-NEXT:    testb %al, %al
+; CHECK-NEXT:    testl %eax, %eax
 ; CHECK-NEXT:    jne .LBB0_1
 ; CHECK-NEXT:  .LBB0_3: # %bb7
 ; CHECK-NEXT:    movzbl %al, %eax

diff  --git a/llvm/test/CodeGen/X86/conditional-tailcall.ll b/llvm/test/CodeGen/X86/conditional-tailcall.ll
index 3c2903c0071ac..847788b13efdd 100644
--- a/llvm/test/CodeGen/X86/conditional-tailcall.ll
+++ b/llvm/test/CodeGen/X86/conditional-tailcall.ll
@@ -248,7 +248,7 @@ define zeroext i1 @pr31257(%"class.std::basic_string"* nocapture readonly derefe
 ; CHECK32-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x20]
 ; CHECK32-NEXT:    movl (%eax), %eax # encoding: [0x8b,0x00]
 ; CHECK32-NEXT:    movl -24(%eax), %edx # encoding: [0x8b,0x50,0xe8]
-; CHECK32-NEXT:    leal (%eax,%edx), %ebp # encoding: [0x8d,0x2c,0x10]
+; CHECK32-NEXT:    leal (%eax,%edx), %ecx # encoding: [0x8d,0x0c,0x10]
 ; CHECK32-NEXT:    xorl %ebx, %ebx # encoding: [0x31,0xdb]
 ; CHECK32-NEXT:    pushl $2 # encoding: [0x6a,0x02]
 ; CHECK32-NEXT:    .cfi_adjust_cfa_offset 4
@@ -259,76 +259,80 @@ define zeroext i1 @pr31257(%"class.std::basic_string"* nocapture readonly derefe
 ; CHECK32-NEXT:  .LBB3_1: # %for.cond
 ; CHECK32-NEXT:    # =>This Inner Loop Header: Depth=1
 ; CHECK32-NEXT:    testl %edx, %edx # encoding: [0x85,0xd2]
-; CHECK32-NEXT:    je .LBB3_13 # encoding: [0x74,A]
-; CHECK32-NEXT:    # fixup A - offset: 1, value: .LBB3_13-1, kind: FK_PCRel_1
+; CHECK32-NEXT:    je .LBB3_14 # encoding: [0x74,A]
+; CHECK32-NEXT:    # fixup A - offset: 1, value: .LBB3_14-1, kind: FK_PCRel_1
 ; CHECK32-NEXT:  # %bb.2: # %for.body
 ; CHECK32-NEXT:    # in Loop: Header=BB3_1 Depth=1
 ; CHECK32-NEXT:    cmpl $2, %ebx # encoding: [0x83,0xfb,0x02]
-; CHECK32-NEXT:    je .LBB3_11 # encoding: [0x74,A]
-; CHECK32-NEXT:    # fixup A - offset: 1, value: .LBB3_11-1, kind: FK_PCRel_1
+; CHECK32-NEXT:    je .LBB3_12 # encoding: [0x74,A]
+; CHECK32-NEXT:    # fixup A - offset: 1, value: .LBB3_12-1, kind: FK_PCRel_1
 ; CHECK32-NEXT:  # %bb.3: # %for.body
 ; CHECK32-NEXT:    # in Loop: Header=BB3_1 Depth=1
 ; CHECK32-NEXT:    cmpl $1, %ebx # encoding: [0x83,0xfb,0x01]
-; CHECK32-NEXT:    je .LBB3_9 # encoding: [0x74,A]
-; CHECK32-NEXT:    # fixup A - offset: 1, value: .LBB3_9-1, kind: FK_PCRel_1
+; CHECK32-NEXT:    je .LBB3_10 # encoding: [0x74,A]
+; CHECK32-NEXT:    # fixup A - offset: 1, value: .LBB3_10-1, kind: FK_PCRel_1
 ; CHECK32-NEXT:  # %bb.4: # %for.body
 ; CHECK32-NEXT:    # in Loop: Header=BB3_1 Depth=1
 ; CHECK32-NEXT:    testl %ebx, %ebx # encoding: [0x85,0xdb]
-; CHECK32-NEXT:    jne .LBB3_10 # encoding: [0x75,A]
-; CHECK32-NEXT:    # fixup A - offset: 1, value: .LBB3_10-1, kind: FK_PCRel_1
+; CHECK32-NEXT:    jne .LBB3_11 # encoding: [0x75,A]
+; CHECK32-NEXT:    # fixup A - offset: 1, value: .LBB3_11-1, kind: FK_PCRel_1
 ; CHECK32-NEXT:  # %bb.5: # %sw.bb
 ; CHECK32-NEXT:    # in Loop: Header=BB3_1 Depth=1
-; CHECK32-NEXT:    movzbl (%eax), %ecx # encoding: [0x0f,0xb6,0x08]
-; CHECK32-NEXT:    cmpl $43, %ecx # encoding: [0x83,0xf9,0x2b]
+; CHECK32-NEXT:    movzbl (%eax), %ebp # encoding: [0x0f,0xb6,0x28]
+; CHECK32-NEXT:    cmpl $43, %ebp # encoding: [0x83,0xfd,0x2b]
 ; CHECK32-NEXT:    movl %edi, %ebx # encoding: [0x89,0xfb]
-; CHECK32-NEXT:    je .LBB3_10 # encoding: [0x74,A]
-; CHECK32-NEXT:    # fixup A - offset: 1, value: .LBB3_10-1, kind: FK_PCRel_1
+; CHECK32-NEXT:    je .LBB3_11 # encoding: [0x74,A]
+; CHECK32-NEXT:    # fixup A - offset: 1, value: .LBB3_11-1, kind: FK_PCRel_1
 ; CHECK32-NEXT:  # %bb.6: # %sw.bb
 ; CHECK32-NEXT:    # in Loop: Header=BB3_1 Depth=1
-; CHECK32-NEXT:    cmpb $45, %cl # encoding: [0x80,0xf9,0x2d]
+; CHECK32-NEXT:    cmpl $45, %ebp # encoding: [0x83,0xfd,0x2d]
 ; CHECK32-NEXT:    movl %edi, %ebx # encoding: [0x89,0xfb]
-; CHECK32-NEXT:    je .LBB3_10 # encoding: [0x74,A]
-; CHECK32-NEXT:    # fixup A - offset: 1, value: .LBB3_10-1, kind: FK_PCRel_1
-; CHECK32-NEXT:    jmp .LBB3_7 # encoding: [0xeb,A]
-; CHECK32-NEXT:    # fixup A - offset: 1, value: .LBB3_7-1, kind: FK_PCRel_1
-; CHECK32-NEXT:  .LBB3_11: # %sw.bb22
+; CHECK32-NEXT:    je .LBB3_11 # encoding: [0x74,A]
+; CHECK32-NEXT:    # fixup A - offset: 1, value: .LBB3_11-1, kind: FK_PCRel_1
+; CHECK32-NEXT:  # %bb.7: # %if.else
+; CHECK32-NEXT:    # in Loop: Header=BB3_1 Depth=1
+; CHECK32-NEXT:    addl $-48, %ebp # encoding: [0x83,0xc5,0xd0]
+; CHECK32-NEXT:    cmpl $10, %ebp # encoding: [0x83,0xfd,0x0a]
+; CHECK32-NEXT:    jmp .LBB3_8 # encoding: [0xeb,A]
+; CHECK32-NEXT:    # fixup A - offset: 1, value: .LBB3_8-1, kind: FK_PCRel_1
+; CHECK32-NEXT:  .LBB3_12: # %sw.bb22
 ; CHECK32-NEXT:    # in Loop: Header=BB3_1 Depth=1
-; CHECK32-NEXT:    movzbl (%eax), %ecx # encoding: [0x0f,0xb6,0x08]
-; CHECK32-NEXT:    addl $-48, %ecx # encoding: [0x83,0xc1,0xd0]
-; CHECK32-NEXT:    cmpl $10, %ecx # encoding: [0x83,0xf9,0x0a]
+; CHECK32-NEXT:    movzbl (%eax), %ebx # encoding: [0x0f,0xb6,0x18]
+; CHECK32-NEXT:    addl $-48, %ebx # encoding: [0x83,0xc3,0xd0]
+; CHECK32-NEXT:    cmpl $10, %ebx # encoding: [0x83,0xfb,0x0a]
 ; CHECK32-NEXT:    movl %esi, %ebx # encoding: [0x89,0xf3]
-; CHECK32-NEXT:    jb .LBB3_10 # encoding: [0x72,A]
-; CHECK32-NEXT:    # fixup A - offset: 1, value: .LBB3_10-1, kind: FK_PCRel_1
-; CHECK32-NEXT:    jmp .LBB3_12 # encoding: [0xeb,A]
-; CHECK32-NEXT:    # fixup A - offset: 1, value: .LBB3_12-1, kind: FK_PCRel_1
-; CHECK32-NEXT:  .LBB3_9: # %sw.bb14
+; CHECK32-NEXT:    jb .LBB3_11 # encoding: [0x72,A]
+; CHECK32-NEXT:    # fixup A - offset: 1, value: .LBB3_11-1, kind: FK_PCRel_1
+; CHECK32-NEXT:    jmp .LBB3_13 # encoding: [0xeb,A]
+; CHECK32-NEXT:    # fixup A - offset: 1, value: .LBB3_13-1, kind: FK_PCRel_1
+; CHECK32-NEXT:  .LBB3_10: # %sw.bb14
 ; CHECK32-NEXT:    # in Loop: Header=BB3_1 Depth=1
-; CHECK32-NEXT:    movzbl (%eax), %ecx # encoding: [0x0f,0xb6,0x08]
-; CHECK32-NEXT:  .LBB3_7: # %if.else
+; CHECK32-NEXT:    movzbl (%eax), %ebx # encoding: [0x0f,0xb6,0x18]
+; CHECK32-NEXT:    addl $-48, %ebx # encoding: [0x83,0xc3,0xd0]
+; CHECK32-NEXT:    cmpl $10, %ebx # encoding: [0x83,0xfb,0x0a]
+; CHECK32-NEXT:  .LBB3_8: # %if.else
 ; CHECK32-NEXT:    # in Loop: Header=BB3_1 Depth=1
-; CHECK32-NEXT:    addl $-48, %ecx # encoding: [0x83,0xc1,0xd0]
-; CHECK32-NEXT:    cmpl $10, %ecx # encoding: [0x83,0xf9,0x0a]
 ; CHECK32-NEXT:    movl %esi, %ebx # encoding: [0x89,0xf3]
-; CHECK32-NEXT:    jae .LBB3_8 # encoding: [0x73,A]
-; CHECK32-NEXT:    # fixup A - offset: 1, value: .LBB3_8-1, kind: FK_PCRel_1
-; CHECK32-NEXT:  .LBB3_10: # %for.inc
+; CHECK32-NEXT:    jae .LBB3_9 # encoding: [0x73,A]
+; CHECK32-NEXT:    # fixup A - offset: 1, value: .LBB3_9-1, kind: FK_PCRel_1
+; CHECK32-NEXT:  .LBB3_11: # %for.inc
 ; CHECK32-NEXT:    # in Loop: Header=BB3_1 Depth=1
 ; CHECK32-NEXT:    incl %eax # encoding: [0x40]
 ; CHECK32-NEXT:    decl %edx # encoding: [0x4a]
 ; CHECK32-NEXT:    jmp .LBB3_1 # encoding: [0xeb,A]
 ; CHECK32-NEXT:    # fixup A - offset: 1, value: .LBB3_1-1, kind: FK_PCRel_1
-; CHECK32-NEXT:  .LBB3_13:
+; CHECK32-NEXT:  .LBB3_14:
 ; CHECK32-NEXT:    cmpl $2, %ebx # encoding: [0x83,0xfb,0x02]
 ; CHECK32-NEXT:    sete %al # encoding: [0x0f,0x94,0xc0]
-; CHECK32-NEXT:    jmp .LBB3_14 # encoding: [0xeb,A]
-; CHECK32-NEXT:    # fixup A - offset: 1, value: .LBB3_14-1, kind: FK_PCRel_1
-; CHECK32-NEXT:  .LBB3_8:
+; CHECK32-NEXT:    jmp .LBB3_15 # encoding: [0xeb,A]
+; CHECK32-NEXT:    # fixup A - offset: 1, value: .LBB3_15-1, kind: FK_PCRel_1
+; CHECK32-NEXT:  .LBB3_9:
 ; CHECK32-NEXT:    xorl %eax, %eax # encoding: [0x31,0xc0]
-; CHECK32-NEXT:  .LBB3_14: # %cleanup.thread
+; CHECK32-NEXT:  .LBB3_15: # %cleanup.thread
 ; CHECK32-NEXT:    # kill: def $al killed $al killed $eax
 ; CHECK32-NEXT:    addl $12, %esp # encoding: [0x83,0xc4,0x0c]
 ; CHECK32-NEXT:    .cfi_def_cfa_offset 20
-; CHECK32-NEXT:  .LBB3_15: # %cleanup.thread
+; CHECK32-NEXT:  .LBB3_16: # %cleanup.thread
 ; CHECK32-NEXT:    popl %esi # encoding: [0x5e]
 ; CHECK32-NEXT:    .cfi_def_cfa_offset 16
 ; CHECK32-NEXT:    popl %edi # encoding: [0x5f]
@@ -338,11 +342,11 @@ define zeroext i1 @pr31257(%"class.std::basic_string"* nocapture readonly derefe
 ; CHECK32-NEXT:    popl %ebp # encoding: [0x5d]
 ; CHECK32-NEXT:    .cfi_def_cfa_offset 4
 ; CHECK32-NEXT:    retl # encoding: [0xc3]
-; CHECK32-NEXT:  .LBB3_12: # %if.else28
+; CHECK32-NEXT:  .LBB3_13: # %if.else28
 ; CHECK32-NEXT:    .cfi_def_cfa_offset 32
 ; CHECK32-NEXT:    subl $8, %esp # encoding: [0x83,0xec,0x08]
 ; CHECK32-NEXT:    .cfi_adjust_cfa_offset 8
-; CHECK32-NEXT:    pushl %ebp # encoding: [0x55]
+; CHECK32-NEXT:    pushl %ecx # encoding: [0x51]
 ; CHECK32-NEXT:    .cfi_adjust_cfa_offset 4
 ; CHECK32-NEXT:    pushl %eax # encoding: [0x50]
 ; CHECK32-NEXT:    .cfi_adjust_cfa_offset 4
@@ -350,8 +354,8 @@ define zeroext i1 @pr31257(%"class.std::basic_string"* nocapture readonly derefe
 ; CHECK32-NEXT:    # fixup A - offset: 1, value: _Z20isValidIntegerSuffixN9__gnu_cxx17__normal_iteratorIPKcSsEES3_-4, kind: FK_PCRel_4
 ; CHECK32-NEXT:    addl $28, %esp # encoding: [0x83,0xc4,0x1c]
 ; CHECK32-NEXT:    .cfi_adjust_cfa_offset -28
-; CHECK32-NEXT:    jmp .LBB3_15 # encoding: [0xeb,A]
-; CHECK32-NEXT:    # fixup A - offset: 1, value: .LBB3_15-1, kind: FK_PCRel_1
+; CHECK32-NEXT:    jmp .LBB3_16 # encoding: [0xeb,A]
+; CHECK32-NEXT:    # fixup A - offset: 1, value: .LBB3_16-1, kind: FK_PCRel_1
 ;
 ; CHECK64-LABEL: pr31257:
 ; CHECK64:       # %bb.0: # %entry
@@ -396,7 +400,7 @@ define zeroext i1 @pr31257(%"class.std::basic_string"* nocapture readonly derefe
 ; CHECK64-NEXT:    # fixup A - offset: 1, value: .LBB3_11-1, kind: FK_PCRel_1
 ; CHECK64-NEXT:  # %bb.6: # %sw.bb
 ; CHECK64-NEXT:    # in Loop: Header=BB3_1 Depth=1
-; CHECK64-NEXT:    cmpb $45, %dl # encoding: [0x80,0xfa,0x2d]
+; CHECK64-NEXT:    cmpl $45, %edx # encoding: [0x83,0xfa,0x2d]
 ; CHECK64-NEXT:    movl %r8d, %ecx # encoding: [0x44,0x89,0xc1]
 ; CHECK64-NEXT:    je .LBB3_11 # encoding: [0x74,A]
 ; CHECK64-NEXT:    # fixup A - offset: 1, value: .LBB3_11-1, kind: FK_PCRel_1
@@ -478,7 +482,7 @@ define zeroext i1 @pr31257(%"class.std::basic_string"* nocapture readonly derefe
 ; WIN64-NEXT:    # fixup A - offset: 1, value: .LBB3_10-1, kind: FK_PCRel_1
 ; WIN64-NEXT:  # %bb.6: # %sw.bb
 ; WIN64-NEXT:    # in Loop: Header=BB3_1 Depth=1
-; WIN64-NEXT:    cmpb $45, %r9b # encoding: [0x41,0x80,0xf9,0x2d]
+; WIN64-NEXT:    cmpl $45, %r9d # encoding: [0x41,0x83,0xf9,0x2d]
 ; WIN64-NEXT:    je .LBB3_10 # encoding: [0x74,A]
 ; WIN64-NEXT:    # fixup A - offset: 1, value: .LBB3_10-1, kind: FK_PCRel_1
 ; WIN64-NEXT:    jmp .LBB3_8 # encoding: [0xeb,A]

diff  --git a/llvm/test/CodeGen/X86/switch-phi-const.ll b/llvm/test/CodeGen/X86/switch-phi-const.ll
index d0ea18f524cfc..ea84c5918aba1 100644
--- a/llvm/test/CodeGen/X86/switch-phi-const.ll
+++ b/llvm/test/CodeGen/X86/switch-phi-const.ll
@@ -97,27 +97,27 @@ default:
 define void @switch_trunc_phi_const(i32 %x) {
 ; CHECK-LABEL: switch_trunc_phi_const:
 ; CHECK:       # %bb.0: # %bb0
-; CHECK-NEXT:    movl $3895, %r8d # imm = 0xF37
+; CHECK-NEXT:    movzbl %dil, %r8d
+; CHECK-NEXT:    movl $3895, %ecx # imm = 0xF37
 ; CHECK-NEXT:    movl $42, %esi
 ; CHECK-NEXT:    movl $13, %edx
-; CHECK-NEXT:    movl $5, %eax
-; CHECK-NEXT:    movl $1, %ecx
-; CHECK-NEXT:    decb %dil
-; CHECK-NEXT:    movzbl %dil, %edi
-; CHECK-NEXT:    cmpb $54, %dil
+; CHECK-NEXT:    movl $5, %edi
+; CHECK-NEXT:    movl $1, %eax
+; CHECK-NEXT:    decl %r8d
+; CHECK-NEXT:    cmpl $54, %r8d
 ; CHECK-NEXT:    ja .LBB1_8
 ; CHECK-NEXT:  # %bb.1: # %bb0
-; CHECK-NEXT:    jmpq *.LJTI1_0(,%rdi,8)
+; CHECK-NEXT:    jmpq *.LJTI1_0(,%r8,8)
 ; CHECK-NEXT:  .LBB1_8: # %default
 ; CHECK-NEXT:    retq
 ; CHECK-NEXT:  .LBB1_2: # %case_1_loop
-; CHECK-NEXT:    movq effect64 at GOTPCREL(%rip), %rax
-; CHECK-NEXT:    movq $1, (%rax)
-; CHECK-NEXT:    movq %rcx, %rax
-; CHECK-NEXT:  .LBB1_3: # %case_5
 ; CHECK-NEXT:    movq effect64 at GOTPCREL(%rip), %rcx
-; CHECK-NEXT:    movq $5, (%rcx)
-; CHECK-NEXT:    movq %rax, %rdx
+; CHECK-NEXT:    movq $1, (%rcx)
+; CHECK-NEXT:    movq %rax, %rdi
+; CHECK-NEXT:  .LBB1_3: # %case_5
+; CHECK-NEXT:    movq effect64 at GOTPCREL(%rip), %rax
+; CHECK-NEXT:    movq $5, (%rax)
+; CHECK-NEXT:    movq %rdi, %rdx
 ; CHECK-NEXT:  .LBB1_4: # %case_13
 ; CHECK-NEXT:    movq effect64 at GOTPCREL(%rip), %rax
 ; CHECK-NEXT:    movq $13, (%rax)
@@ -125,15 +125,15 @@ define void @switch_trunc_phi_const(i32 %x) {
 ; CHECK-NEXT:  .LBB1_5: # %case_42
 ; CHECK-NEXT:    movq effect64 at GOTPCREL(%rip), %rax
 ; CHECK-NEXT:    movq %rsi, (%rax)
-; CHECK-NEXT:    movl $55, %r8d
+; CHECK-NEXT:    movl $55, %ecx
 ; CHECK-NEXT:  .LBB1_6: # %case_55
 ; CHECK-NEXT:    movq effect64 at GOTPCREL(%rip), %rax
-; CHECK-NEXT:    movq %r8, (%rax)
+; CHECK-NEXT:    movq %rcx, (%rax)
 ; CHECK-NEXT:  .LBB1_7: # %case_7
 ; CHECK-NEXT:    movq g64 at GOTPCREL(%rip), %rax
-; CHECK-NEXT:    movq (%rax), %rcx
-; CHECK-NEXT:    movq effect64 at GOTPCREL(%rip), %rax
-; CHECK-NEXT:    movq $7, (%rax)
+; CHECK-NEXT:    movq (%rax), %rax
+; CHECK-NEXT:    movq effect64 at GOTPCREL(%rip), %rcx
+; CHECK-NEXT:    movq $7, (%rcx)
 ; CHECK-NEXT:    jmp .LBB1_2
 bb0:
   %x_trunc = trunc i32 %x to i8

diff  --git a/llvm/test/CodeGen/X86/switch.ll b/llvm/test/CodeGen/X86/switch.ll
index 5c8718050d77f..3601203cc4714 100644
--- a/llvm/test/CodeGen/X86/switch.ll
+++ b/llvm/test/CodeGen/X86/switch.ll
@@ -1417,8 +1417,8 @@ sw:
 define void @int_max_table_cluster(i8 %x) {
 ; CHECK-LABEL: int_max_table_cluster:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    addb $64, %dil
-; CHECK-NEXT:    cmpb $-65, %dil
+; CHECK-NEXT:    # kill: def $edi killed $edi def $rdi
+; CHECK-NEXT:    cmpb $-9, %dil
 ; CHECK-NEXT:    ja .LBB15_4
 ; CHECK-NEXT:  # %bb.1: # %entry
 ; CHECK-NEXT:    movzbl %dil, %eax
@@ -2445,7 +2445,6 @@ return: ret void
 define void @pr23738(i4 %x) {
 ; CHECK-LABEL: pr23738:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    # kill: def $edi killed $edi def $rdi
 ; CHECK-NEXT:    movl %edi, %eax
 ; CHECK-NEXT:    andb $15, %al
 ; CHECK-NEXT:    cmpb $11, %al
@@ -2453,7 +2452,7 @@ define void @pr23738(i4 %x) {
 ; CHECK-NEXT:  # %bb.1: # %entry
 ; CHECK-NEXT:    andl $15, %edi
 ; CHECK-NEXT:    movl $2051, %eax # imm = 0x803
-; CHECK-NEXT:    btq %rdi, %rax
+; CHECK-NEXT:    btl %edi, %eax
 ; CHECK-NEXT:    jae .LBB23_2
 ; CHECK-NEXT:  # %bb.3: # %bb1
 ; CHECK-NEXT:    movl $1, %edi
@@ -2651,21 +2650,20 @@ define void @switch_i8(i32 %a) {
 ; CHECK-LABEL: switch_i8:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    # kill: def $edi killed $edi def $rdi
-; CHECK-NEXT:    andb $127, %dil
+; CHECK-NEXT:    andl $127, %edi
 ; CHECK-NEXT:    leal -1(%rdi), %eax
-; CHECK-NEXT:    cmpb $8, %al
+; CHECK-NEXT:    cmpl $8, %eax
 ; CHECK-NEXT:    ja .LBB26_1
 ; CHECK-NEXT:  # %bb.10:
-; CHECK-NEXT:    movzbl %al, %eax
 ; CHECK-NEXT:    jmpq *.LJTI26_0(,%rax,8)
 ; CHECK-NEXT:  .LBB26_4: # %bb0
 ; CHECK-NEXT:    xorl %edi, %edi
 ; CHECK-NEXT:    jmp g at PLT # TAILCALL
 ; CHECK-NEXT:  .LBB26_1:
-; CHECK-NEXT:    cmpb $13, %dil
+; CHECK-NEXT:    cmpl $13, %edi
 ; CHECK-NEXT:    je .LBB26_8
 ; CHECK-NEXT:  # %bb.2:
-; CHECK-NEXT:    cmpb $42, %dil
+; CHECK-NEXT:    cmpl $42, %edi
 ; CHECK-NEXT:    jne .LBB26_9
 ; CHECK-NEXT:  # %bb.3: # %bb5
 ; CHECK-NEXT:    movl $5, %edi

diff  --git a/llvm/test/CodeGen/X86/tail-dup-multiple-latch-loop.ll b/llvm/test/CodeGen/X86/tail-dup-multiple-latch-loop.ll
index 2032c7244331c..a38bbd12588a7 100644
--- a/llvm/test/CodeGen/X86/tail-dup-multiple-latch-loop.ll
+++ b/llvm/test/CodeGen/X86/tail-dup-multiple-latch-loop.ll
@@ -139,11 +139,10 @@ define i32 @interp_switch(i8* nocapture readonly %0, i32 %1) {
 ; CHECK-NEXT:    .p2align 4, 0x90
 ; CHECK-NEXT:  .LBB1_1: # =>This Inner Loop Header: Depth=1
 ; CHECK-NEXT:    movzbl (%rdi), %ecx
-; CHECK-NEXT:    decb %cl
-; CHECK-NEXT:    cmpb $5, %cl
+; CHECK-NEXT:    decl %ecx
+; CHECK-NEXT:    cmpl $5, %ecx
 ; CHECK-NEXT:    ja .LBB1_9
 ; CHECK-NEXT:  # %bb.2: # in Loop: Header=BB1_1 Depth=1
-; CHECK-NEXT:    movzbl %cl, %ecx
 ; CHECK-NEXT:    jmpq *.LJTI1_0(,%rcx,8)
 ; CHECK-NEXT:  .LBB1_3: # in Loop: Header=BB1_1 Depth=1
 ; CHECK-NEXT:    incl %eax

diff  --git a/llvm/test/CodeGen/X86/tail-opts.ll b/llvm/test/CodeGen/X86/tail-opts.ll
index 40207f814f3fb..fbc35b38f3df0 100644
--- a/llvm/test/CodeGen/X86/tail-opts.ll
+++ b/llvm/test/CodeGen/X86/tail-opts.ll
@@ -286,22 +286,22 @@ define fastcc void @c_expand_expr_stmt(%union.tree_node* %expr) nounwind {
 ; CHECK-NEXT:    testb %bl, %bl
 ; CHECK-NEXT:  .LBB3_10: # %bb2.i3
 ; CHECK-NEXT:    movq 8(%rax), %rax
-; CHECK-NEXT:    movb 16(%rax), %cl
+; CHECK-NEXT:    movzbl 16(%rax), %ecx
 ; CHECK-NEXT:    xorl %eax, %eax
-; CHECK-NEXT:    cmpb $23, %cl
+; CHECK-NEXT:    cmpl $23, %ecx
 ; CHECK-NEXT:    je .LBB3_16
 ; CHECK-NEXT:  # %bb.11: # %bb2.i3
-; CHECK-NEXT:    cmpb $16, %cl
+; CHECK-NEXT:    cmpl $16, %ecx
 ; CHECK-NEXT:    je .LBB3_16
 ; CHECK-NEXT:    jmp .LBB3_9
 ; CHECK-NEXT:  .LBB3_13: # %bb2.i.i2
 ; CHECK-NEXT:    movq 8(%rax), %rax
-; CHECK-NEXT:    movb 16(%rax), %cl
+; CHECK-NEXT:    movzbl 16(%rax), %ecx
 ; CHECK-NEXT:    xorl %eax, %eax
-; CHECK-NEXT:    cmpb $16, %cl
+; CHECK-NEXT:    cmpl $16, %ecx
 ; CHECK-NEXT:    je .LBB3_16
 ; CHECK-NEXT:  # %bb.14: # %bb2.i.i2
-; CHECK-NEXT:    cmpb $23, %cl
+; CHECK-NEXT:    cmpl $23, %ecx
 ; CHECK-NEXT:    je .LBB3_16
 ; CHECK-NEXT:    jmp .LBB3_9
 entry:

diff  --git a/llvm/test/Transforms/CodeGenPrepare/X86/widen_switch.ll b/llvm/test/Transforms/CodeGenPrepare/X86/widen_switch.ll
index 82c9938336eb1..9d04c4be49f4b 100644
--- a/llvm/test/Transforms/CodeGenPrepare/X86/widen_switch.ll
+++ b/llvm/test/Transforms/CodeGenPrepare/X86/widen_switch.ll
@@ -3,7 +3,7 @@
 ; RUN: opt < %s -codegenprepare -S -mtriple=x86_64-unknown-unknown    | FileCheck %s --check-prefix=X86
 ; RUN: opt < %s -debugify -codegenprepare -S -mtriple=x86_64-unknown-unknown | FileCheck %s --check-prefix=DEBUG
 
-; No change for x86 because 16-bit registers are part of the architecture.
+; X86 prefers i32 over i16 for address calculation.
 
 define i32 @widen_switch_i16(i32 %a)  {
 entry:
@@ -28,9 +28,10 @@ return:
 
 ; X86-LABEL: @widen_switch_i16(
 ; X86:       %trunc = trunc i32 %a to i16
-; X86-NEXT:  switch i16 %trunc, label %sw.default [
-; X86-NEXT:    i16 1, label %sw.bb0
-; X86-NEXT:    i16 -1, label %sw.bb1
+; X86-NEXT:  %0 = zext i16 %trunc to i32
+; X86-NEXT:  switch i32 %0, label %sw.default [
+; X86-NEXT:    i32 1, label %sw.bb0
+; X86-NEXT:    i32 65535, label %sw.bb1
 }
 
 ; Widen to 32-bit from a smaller, non-native type.
@@ -95,9 +96,9 @@ return:
   ret i32 %retval
 
 ; X86-LABEL: @widen_switch_i16_sext(
-; X86:       %0 = sext i2 %a to i8
-; X86-NEXT:  switch i8 %0, label %sw.default [
-; X86-NEXT:    i8 1, label %sw.bb0
-; X86-NEXT:    i8 -1, label %sw.bb1
+; X86:       %0 = sext i2 %a to i32
+; X86-NEXT:  switch i32 %0, label %sw.default [
+; X86-NEXT:    i32 1, label %sw.bb0
+; X86-NEXT:    i32 -1, label %sw.bb1
 }