[llvm] [X86] EmitCmp - always use cmpw with foldable loads (PR #92251)

Wed May 15 06:18:00 PDT 2024

https://github.com/RKSimon updated https://github.com/llvm/llvm-project/pull/92251

>From f5b39abdd76b877fad4e12bc74859301df4ee32f Mon Sep 17 00:00:00 2001
From: Simon Pilgrim <llvm-dev at redking.me.uk>
Date: Wed, 15 May 2024 12:50:26 +0100
Subject: [PATCH] [X86] EmitCmp - always use cmpw with foldable loads

By default, EmitCmp avoids cmpw with i16 immediates due to 66/67h length-changing prefixes causing stalls, extending to cmpl with a i32 immediate, unless it has the TuningFastImm16 flag or we're building for optsize/minsize.

However, if we're loading the value for comparison, the latency costs of the folded load are likely to match/exceed the decode stalls, the shorter encoding and not needing an extra register to store the ext-load are the more performant factors.

This matches the behaviour of gcc and msvc.

Fixes #90355
---
 llvm/lib/Target/X86/X86ISelLowering.cpp       |   9 +-
 llvm/test/CodeGen/X86/cmp16.ll                | 196 ++++--------------
 .../CodeGen/X86/memcmp-more-load-pairs-x32.ll |   3 +-
 .../CodeGen/X86/memcmp-more-load-pairs.ll     |   3 +-
 llvm/test/CodeGen/X86/memcmp-optsize-x32.ll   |   3 +-
 llvm/test/CodeGen/X86/memcmp-optsize.ll       |   3 +-
 llvm/test/CodeGen/X86/memcmp-pgso-x32.ll      |   3 +-
 llvm/test/CodeGen/X86/memcmp-pgso.ll          |   3 +-
 llvm/test/CodeGen/X86/memcmp-x32.ll           |   3 +-
 llvm/test/CodeGen/X86/memcmp.ll               |   3 +-
 10 files changed, 60 insertions(+), 169 deletions(-)

diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index a57c10e784d9c..e8b7aa2d1ac06 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -22692,11 +22692,14 @@ static SDValue EmitCmp(SDValue Op0, SDValue Op1, unsigned X86CC,
           CmpVT == MVT::i32 || CmpVT == MVT::i64) && "Unexpected VT!");
 
   // Only promote the compare up to I32 if it is a 16 bit operation
-  // with an immediate.  16 bit immediates are to be avoided.
+  // with an immediate.  16 bit immediates are to be avoided unless the target
+  // isn't slowed down by length chaning prefixes, we're optimizing for codesize
+  // or the comparison is with a folded load.
   if (CmpVT == MVT::i16 && !Subtarget.hasFastImm16() &&
+      !X86::mayFoldLoad(Op0, Subtarget) && !X86::mayFoldLoad(Op1, Subtarget) &&
       !DAG.getMachineFunction().getFunction().hasMinSize()) {
-    ConstantSDNode *COp0 = dyn_cast<ConstantSDNode>(Op0);
-    ConstantSDNode *COp1 = dyn_cast<ConstantSDNode>(Op1);
+    auto *COp0 = dyn_cast<ConstantSDNode>(Op0);
+    auto *COp1 = dyn_cast<ConstantSDNode>(Op1);
     // Don't do this if the immediate can fit in 8-bits.
     if ((COp0 && !COp0->getAPIntValue().isSignedIntN(8)) ||
         (COp1 && !COp1->getAPIntValue().isSignedIntN(8))) {
diff --git a/llvm/test/CodeGen/X86/cmp16.ll b/llvm/test/CodeGen/X86/cmp16.ll
index 699ea3e4dd473..fa9e75ff16a5c 100644
--- a/llvm/test/CodeGen/X86/cmp16.ll
+++ b/llvm/test/CodeGen/X86/cmp16.ll
@@ -113,8 +113,7 @@ define i1 @cmp16_reg_eq_imm8(i16 %a0) {
 define i1 @cmp16_reg_eq_imm16(i16 %a0) {
 ; X86-GENERIC-LABEL: cmp16_reg_eq_imm16:
 ; X86-GENERIC:       # %bb.0:
-; X86-GENERIC-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
-; X86-GENERIC-NEXT:    cmpl $1024, %eax # imm = 0x400
+; X86-GENERIC-NEXT:    cmpw $1024, {{[0-9]+}}(%esp) # imm = 0x400
 ; X86-GENERIC-NEXT:    sete %al
 ; X86-GENERIC-NEXT:    retl
 ;
@@ -177,12 +176,11 @@ define i1 @cmp16_reg_eq_imm16_minsize(i16 %a0) minsize {
 }
 
 define i1 @cmp16_reg_eq_imm16_optsize(i16 %a0) optsize {
-; X86-GENERIC-LABEL: cmp16_reg_eq_imm16_optsize:
-; X86-GENERIC:       # %bb.0:
-; X86-GENERIC-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
-; X86-GENERIC-NEXT:    cmpl $1024, %eax # imm = 0x400
-; X86-GENERIC-NEXT:    sete %al
-; X86-GENERIC-NEXT:    retl
+; X86-LABEL: cmp16_reg_eq_imm16_optsize:
+; X86:       # %bb.0:
+; X86-NEXT:    cmpw $1024, {{[0-9]+}}(%esp) # imm = 0x400
+; X86-NEXT:    sete %al
+; X86-NEXT:    retl
 ;
 ; X64-GENERIC-LABEL: cmp16_reg_eq_imm16_optsize:
 ; X64-GENERIC:       # %bb.0:
@@ -191,24 +189,12 @@ define i1 @cmp16_reg_eq_imm16_optsize(i16 %a0) optsize {
 ; X64-GENERIC-NEXT:    sete %al
 ; X64-GENERIC-NEXT:    retq
 ;
-; X86-FAST-LABEL: cmp16_reg_eq_imm16_optsize:
-; X86-FAST:       # %bb.0:
-; X86-FAST-NEXT:    cmpw $1024, {{[0-9]+}}(%esp) # imm = 0x400
-; X86-FAST-NEXT:    sete %al
-; X86-FAST-NEXT:    retl
-;
 ; X64-FAST-LABEL: cmp16_reg_eq_imm16_optsize:
 ; X64-FAST:       # %bb.0:
 ; X64-FAST-NEXT:    cmpw $1024, %di # imm = 0x400
 ; X64-FAST-NEXT:    sete %al
 ; X64-FAST-NEXT:    retq
 ;
-; X86-ATOM-LABEL: cmp16_reg_eq_imm16_optsize:
-; X86-ATOM:       # %bb.0:
-; X86-ATOM-NEXT:    cmpw $1024, {{[0-9]+}}(%esp) # imm = 0x400
-; X86-ATOM-NEXT:    sete %al
-; X86-ATOM-NEXT:    retl
-;
 ; X64-ATOM-LABEL: cmp16_reg_eq_imm16_optsize:
 ; X64-ATOM:       # %bb.0:
 ; X64-ATOM-NEXT:    cmpw $1024, %di # imm = 0x400
@@ -269,8 +255,7 @@ define i1 @cmp16_reg_sgt_imm8(i16 %a0) {
 define i1 @cmp16_reg_sgt_imm16(i16 %a0) {
 ; X86-GENERIC-LABEL: cmp16_reg_sgt_imm16:
 ; X86-GENERIC:       # %bb.0:
-; X86-GENERIC-NEXT:    movswl {{[0-9]+}}(%esp), %eax
-; X86-GENERIC-NEXT:    cmpl $-1023, %eax # imm = 0xFC01
+; X86-GENERIC-NEXT:    cmpw $-1023, {{[0-9]+}}(%esp) # imm = 0xFC01
 ; X86-GENERIC-NEXT:    setge %al
 ; X86-GENERIC-NEXT:    retl
 ;
@@ -333,12 +318,11 @@ define i1 @cmp16_reg_sgt_imm16_minsize(i16 %a0) minsize {
 }
 
 define i1 @cmp16_reg_sgt_imm16_optsize(i16 %a0) optsize {
-; X86-GENERIC-LABEL: cmp16_reg_sgt_imm16_optsize:
-; X86-GENERIC:       # %bb.0:
-; X86-GENERIC-NEXT:    movswl {{[0-9]+}}(%esp), %eax
-; X86-GENERIC-NEXT:    cmpl $-1023, %eax # imm = 0xFC01
-; X86-GENERIC-NEXT:    setge %al
-; X86-GENERIC-NEXT:    retl
+; X86-LABEL: cmp16_reg_sgt_imm16_optsize:
+; X86:       # %bb.0:
+; X86-NEXT:    cmpw $-1023, {{[0-9]+}}(%esp) # imm = 0xFC01
+; X86-NEXT:    setge %al
+; X86-NEXT:    retl
 ;
 ; X64-GENERIC-LABEL: cmp16_reg_sgt_imm16_optsize:
 ; X64-GENERIC:       # %bb.0:
@@ -347,24 +331,12 @@ define i1 @cmp16_reg_sgt_imm16_optsize(i16 %a0) optsize {
 ; X64-GENERIC-NEXT:    setge %al
 ; X64-GENERIC-NEXT:    retq
 ;
-; X86-FAST-LABEL: cmp16_reg_sgt_imm16_optsize:
-; X86-FAST:       # %bb.0:
-; X86-FAST-NEXT:    cmpw $-1023, {{[0-9]+}}(%esp) # imm = 0xFC01
-; X86-FAST-NEXT:    setge %al
-; X86-FAST-NEXT:    retl
-;
 ; X64-FAST-LABEL: cmp16_reg_sgt_imm16_optsize:
 ; X64-FAST:       # %bb.0:
 ; X64-FAST-NEXT:    cmpw $-1023, %di # imm = 0xFC01
 ; X64-FAST-NEXT:    setge %al
 ; X64-FAST-NEXT:    retq
 ;
-; X86-ATOM-LABEL: cmp16_reg_sgt_imm16_optsize:
-; X86-ATOM:       # %bb.0:
-; X86-ATOM-NEXT:    cmpw $-1023, {{[0-9]+}}(%esp) # imm = 0xFC01
-; X86-ATOM-NEXT:    setge %al
-; X86-ATOM-NEXT:    retl
-;
 ; X64-ATOM-LABEL: cmp16_reg_sgt_imm16_optsize:
 ; X64-ATOM:       # %bb.0:
 ; X64-ATOM-NEXT:    cmpw $-1023, %di # imm = 0xFC01
@@ -377,8 +349,7 @@ define i1 @cmp16_reg_sgt_imm16_optsize(i16 %a0) optsize {
 define i1 @cmp16_reg_uge_imm16(i16 %a0) {
 ; X86-GENERIC-LABEL: cmp16_reg_uge_imm16:
 ; X86-GENERIC:       # %bb.0:
-; X86-GENERIC-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
-; X86-GENERIC-NEXT:    cmpl $64512, %eax # imm = 0xFC00
+; X86-GENERIC-NEXT:    cmpw $-1024, {{[0-9]+}}(%esp) # imm = 0xFC00
 ; X86-GENERIC-NEXT:    setae %al
 ; X86-GENERIC-NEXT:    retl
 ;
@@ -441,12 +412,11 @@ define i1 @cmp16_reg_uge_imm16_minsize(i16 %a0) minsize {
 }
 
 define i1 @cmp16_reg_uge_imm16_optsize(i16 %a0) optsize {
-; X86-GENERIC-LABEL: cmp16_reg_uge_imm16_optsize:
-; X86-GENERIC:       # %bb.0:
-; X86-GENERIC-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
-; X86-GENERIC-NEXT:    cmpl $64512, %eax # imm = 0xFC00
-; X86-GENERIC-NEXT:    setae %al
-; X86-GENERIC-NEXT:    retl
+; X86-LABEL: cmp16_reg_uge_imm16_optsize:
+; X86:       # %bb.0:
+; X86-NEXT:    cmpw $-1024, {{[0-9]+}}(%esp) # imm = 0xFC00
+; X86-NEXT:    setae %al
+; X86-NEXT:    retl
 ;
 ; X64-GENERIC-LABEL: cmp16_reg_uge_imm16_optsize:
 ; X64-GENERIC:       # %bb.0:
@@ -455,24 +425,12 @@ define i1 @cmp16_reg_uge_imm16_optsize(i16 %a0) optsize {
 ; X64-GENERIC-NEXT:    setae %al
 ; X64-GENERIC-NEXT:    retq
 ;
-; X86-FAST-LABEL: cmp16_reg_uge_imm16_optsize:
-; X86-FAST:       # %bb.0:
-; X86-FAST-NEXT:    cmpw $-1024, {{[0-9]+}}(%esp) # imm = 0xFC00
-; X86-FAST-NEXT:    setae %al
-; X86-FAST-NEXT:    retl
-;
 ; X64-FAST-LABEL: cmp16_reg_uge_imm16_optsize:
 ; X64-FAST:       # %bb.0:
 ; X64-FAST-NEXT:    cmpw $-1024, %di # imm = 0xFC00
 ; X64-FAST-NEXT:    setae %al
 ; X64-FAST-NEXT:    retq
 ;
-; X86-ATOM-LABEL: cmp16_reg_uge_imm16_optsize:
-; X86-ATOM:       # %bb.0:
-; X86-ATOM-NEXT:    cmpw $-1024, {{[0-9]+}}(%esp) # imm = 0xFC00
-; X86-ATOM-NEXT:    setae %al
-; X86-ATOM-NEXT:    retl
-;
 ; X64-ATOM-LABEL: cmp16_reg_uge_imm16_optsize:
 ; X64-ATOM:       # %bb.0:
 ; X64-ATOM-NEXT:    cmpw $-1024, %di # imm = 0xFC00
@@ -592,15 +550,13 @@ define i1 @cmp16_load_ne_imm16(ptr %p0) {
 ; X86-GENERIC-LABEL: cmp16_load_ne_imm16:
 ; X86-GENERIC:       # %bb.0:
 ; X86-GENERIC-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-GENERIC-NEXT:    movzwl (%eax), %eax
-; X86-GENERIC-NEXT:    cmpl $512, %eax # imm = 0x200
+; X86-GENERIC-NEXT:    cmpw $512, (%eax) # imm = 0x200
 ; X86-GENERIC-NEXT:    setne %al
 ; X86-GENERIC-NEXT:    retl
 ;
 ; X64-GENERIC-LABEL: cmp16_load_ne_imm16:
 ; X64-GENERIC:       # %bb.0:
-; X64-GENERIC-NEXT:    movzwl (%rdi), %eax
-; X64-GENERIC-NEXT:    cmpl $512, %eax # imm = 0x200
+; X64-GENERIC-NEXT:    cmpw $512, (%rdi) # imm = 0x200
 ; X64-GENERIC-NEXT:    setne %al
 ; X64-GENERIC-NEXT:    retq
 ;
@@ -694,15 +650,13 @@ define i1 @cmp16_load_slt_imm16(ptr %p0) {
 ; X86-GENERIC-LABEL: cmp16_load_slt_imm16:
 ; X86-GENERIC:       # %bb.0:
 ; X86-GENERIC-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-GENERIC-NEXT:    movswl (%eax), %eax
-; X86-GENERIC-NEXT:    cmpl $512, %eax # imm = 0x200
+; X86-GENERIC-NEXT:    cmpw $512, (%eax) # imm = 0x200
 ; X86-GENERIC-NEXT:    setl %al
 ; X86-GENERIC-NEXT:    retl
 ;
 ; X64-GENERIC-LABEL: cmp16_load_slt_imm16:
 ; X64-GENERIC:       # %bb.0:
-; X64-GENERIC-NEXT:    movswl (%rdi), %eax
-; X64-GENERIC-NEXT:    cmpl $512, %eax # imm = 0x200
+; X64-GENERIC-NEXT:    cmpw $512, (%rdi) # imm = 0x200
 ; X64-GENERIC-NEXT:    setl %al
 ; X64-GENERIC-NEXT:    retq
 ;
@@ -761,46 +715,18 @@ define i1 @cmp16_load_slt_imm16_minsize(ptr %p0) minsize {
 }
 
 define i1 @cmp16_load_slt_imm16_optsize(ptr %p0) optsize {
-; X86-GENERIC-LABEL: cmp16_load_slt_imm16_optsize:
-; X86-GENERIC:       # %bb.0:
-; X86-GENERIC-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-GENERIC-NEXT:    movswl (%eax), %eax
-; X86-GENERIC-NEXT:    cmpl $512, %eax # imm = 0x200
-; X86-GENERIC-NEXT:    setl %al
-; X86-GENERIC-NEXT:    retl
-;
-; X64-GENERIC-LABEL: cmp16_load_slt_imm16_optsize:
-; X64-GENERIC:       # %bb.0:
-; X64-GENERIC-NEXT:    movswl (%rdi), %eax
-; X64-GENERIC-NEXT:    cmpl $512, %eax # imm = 0x200
-; X64-GENERIC-NEXT:    setl %al
-; X64-GENERIC-NEXT:    retq
-;
-; X86-FAST-LABEL: cmp16_load_slt_imm16_optsize:
-; X86-FAST:       # %bb.0:
-; X86-FAST-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-FAST-NEXT:    cmpw $512, (%eax) # imm = 0x200
-; X86-FAST-NEXT:    setl %al
-; X86-FAST-NEXT:    retl
-;
-; X64-FAST-LABEL: cmp16_load_slt_imm16_optsize:
-; X64-FAST:       # %bb.0:
-; X64-FAST-NEXT:    cmpw $512, (%rdi) # imm = 0x200
-; X64-FAST-NEXT:    setl %al
-; X64-FAST-NEXT:    retq
-;
-; X86-ATOM-LABEL: cmp16_load_slt_imm16_optsize:
-; X86-ATOM:       # %bb.0:
-; X86-ATOM-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-ATOM-NEXT:    cmpw $512, (%eax) # imm = 0x200
-; X86-ATOM-NEXT:    setl %al
-; X86-ATOM-NEXT:    retl
+; X86-LABEL: cmp16_load_slt_imm16_optsize:
+; X86:       # %bb.0:
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    cmpw $512, (%eax) # imm = 0x200
+; X86-NEXT:    setl %al
+; X86-NEXT:    retl
 ;
-; X64-ATOM-LABEL: cmp16_load_slt_imm16_optsize:
-; X64-ATOM:       # %bb.0:
-; X64-ATOM-NEXT:    cmpw $512, (%rdi) # imm = 0x200
-; X64-ATOM-NEXT:    setl %al
-; X64-ATOM-NEXT:    retq
+; X64-LABEL: cmp16_load_slt_imm16_optsize:
+; X64:       # %bb.0:
+; X64-NEXT:    cmpw $512, (%rdi) # imm = 0x200
+; X64-NEXT:    setl %al
+; X64-NEXT:    retq
   %ld = load i16, ptr %p0
   %cmp = icmp slt i16 %ld, 512
   ret i1 %cmp
@@ -860,15 +786,13 @@ define i1 @cmp16_load_ule_imm16(ptr %p0) {
 ; X86-GENERIC-LABEL: cmp16_load_ule_imm16:
 ; X86-GENERIC:       # %bb.0:
 ; X86-GENERIC-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-GENERIC-NEXT:    movzwl (%eax), %eax
-; X86-GENERIC-NEXT:    cmpl $513, %eax # imm = 0x201
+; X86-GENERIC-NEXT:    cmpw $513, (%eax) # imm = 0x201
 ; X86-GENERIC-NEXT:    setb %al
 ; X86-GENERIC-NEXT:    retl
 ;
 ; X64-GENERIC-LABEL: cmp16_load_ule_imm16:
 ; X64-GENERIC:       # %bb.0:
-; X64-GENERIC-NEXT:    movzwl (%rdi), %eax
-; X64-GENERIC-NEXT:    cmpl $513, %eax # imm = 0x201
+; X64-GENERIC-NEXT:    cmpw $513, (%rdi) # imm = 0x201
 ; X64-GENERIC-NEXT:    setb %al
 ; X64-GENERIC-NEXT:    retq
 ;
@@ -927,46 +851,18 @@ define i1 @cmp16_load_ule_imm16_minsize(ptr %p0) minsize {
 }
 
 define i1 @cmp16_load_ule_imm16_optsize(ptr %p0) optsize {
-; X86-GENERIC-LABEL: cmp16_load_ule_imm16_optsize:
-; X86-GENERIC:       # %bb.0:
-; X86-GENERIC-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-GENERIC-NEXT:    movzwl (%eax), %eax
-; X86-GENERIC-NEXT:    cmpl $513, %eax # imm = 0x201
-; X86-GENERIC-NEXT:    setb %al
-; X86-GENERIC-NEXT:    retl
-;
-; X64-GENERIC-LABEL: cmp16_load_ule_imm16_optsize:
-; X64-GENERIC:       # %bb.0:
-; X64-GENERIC-NEXT:    movzwl (%rdi), %eax
-; X64-GENERIC-NEXT:    cmpl $513, %eax # imm = 0x201
-; X64-GENERIC-NEXT:    setb %al
-; X64-GENERIC-NEXT:    retq
-;
-; X86-FAST-LABEL: cmp16_load_ule_imm16_optsize:
-; X86-FAST:       # %bb.0:
-; X86-FAST-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-FAST-NEXT:    cmpw $513, (%eax) # imm = 0x201
-; X86-FAST-NEXT:    setb %al
-; X86-FAST-NEXT:    retl
-;
-; X64-FAST-LABEL: cmp16_load_ule_imm16_optsize:
-; X64-FAST:       # %bb.0:
-; X64-FAST-NEXT:    cmpw $513, (%rdi) # imm = 0x201
-; X64-FAST-NEXT:    setb %al
-; X64-FAST-NEXT:    retq
-;
-; X86-ATOM-LABEL: cmp16_load_ule_imm16_optsize:
-; X86-ATOM:       # %bb.0:
-; X86-ATOM-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-ATOM-NEXT:    cmpw $513, (%eax) # imm = 0x201
-; X86-ATOM-NEXT:    setb %al
-; X86-ATOM-NEXT:    retl
+; X86-LABEL: cmp16_load_ule_imm16_optsize:
+; X86:       # %bb.0:
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    cmpw $513, (%eax) # imm = 0x201
+; X86-NEXT:    setb %al
+; X86-NEXT:    retl
 ;
-; X64-ATOM-LABEL: cmp16_load_ule_imm16_optsize:
-; X64-ATOM:       # %bb.0:
-; X64-ATOM-NEXT:    cmpw $513, (%rdi) # imm = 0x201
-; X64-ATOM-NEXT:    setb %al
-; X64-ATOM-NEXT:    retq
+; X64-LABEL: cmp16_load_ule_imm16_optsize:
+; X64:       # %bb.0:
+; X64-NEXT:    cmpw $513, (%rdi) # imm = 0x201
+; X64-NEXT:    setb %al
+; X64-NEXT:    retq
   %ld = load i16, ptr %p0
   %cmp = icmp ule i16 %ld, 512
   ret i1 %cmp
diff --git a/llvm/test/CodeGen/X86/memcmp-more-load-pairs-x32.ll b/llvm/test/CodeGen/X86/memcmp-more-load-pairs-x32.ll
index 0253d13122608..ee5fd78c64379 100644
--- a/llvm/test/CodeGen/X86/memcmp-more-load-pairs-x32.ll
+++ b/llvm/test/CodeGen/X86/memcmp-more-load-pairs-x32.ll
@@ -116,8 +116,7 @@ define i1 @length2_eq_const(ptr %X) nounwind {
 ; X86-LABEL: length2_eq_const:
 ; X86:       # %bb.0:
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT:    movzwl (%eax), %eax
-; X86-NEXT:    cmpl $12849, %eax # imm = 0x3231
+; X86-NEXT:    cmpw $12849, (%eax) # imm = 0x3231
 ; X86-NEXT:    setne %al
 ; X86-NEXT:    retl
   %m = tail call i32 @memcmp(ptr %X, ptr getelementptr inbounds ([513 x i8], ptr @.str, i32 0, i32 1), i32 2) nounwind
diff --git a/llvm/test/CodeGen/X86/memcmp-more-load-pairs.ll b/llvm/test/CodeGen/X86/memcmp-more-load-pairs.ll
index da46ea4065579..a46f9ed3d3798 100644
--- a/llvm/test/CodeGen/X86/memcmp-more-load-pairs.ll
+++ b/llvm/test/CodeGen/X86/memcmp-more-load-pairs.ll
@@ -113,8 +113,7 @@ define i1 @length2_gt(ptr %X, ptr %Y) nounwind {
 define i1 @length2_eq_const(ptr %X) nounwind {
 ; X64-LABEL: length2_eq_const:
 ; X64:       # %bb.0:
-; X64-NEXT:    movzwl (%rdi), %eax
-; X64-NEXT:    cmpl $12849, %eax # imm = 0x3231
+; X64-NEXT:    cmpw $12849, (%rdi) # imm = 0x3231
 ; X64-NEXT:    setne %al
 ; X64-NEXT:    retq
   %m = tail call i32 @memcmp(ptr %X, ptr getelementptr inbounds ([513 x i8], ptr @.str, i32 0, i32 1), i64 2) nounwind
diff --git a/llvm/test/CodeGen/X86/memcmp-optsize-x32.ll b/llvm/test/CodeGen/X86/memcmp-optsize-x32.ll
index 3db6ae8b76b29..4a9643c0f4fc8 100644
--- a/llvm/test/CodeGen/X86/memcmp-optsize-x32.ll
+++ b/llvm/test/CodeGen/X86/memcmp-optsize-x32.ll
@@ -45,8 +45,7 @@ define i1 @length2_eq_const(ptr %X) nounwind optsize {
 ; X86-LABEL: length2_eq_const:
 ; X86:       # %bb.0:
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT:    movzwl (%eax), %eax
-; X86-NEXT:    cmpl $12849, %eax # imm = 0x3231
+; X86-NEXT:    cmpw $12849, (%eax) # imm = 0x3231
 ; X86-NEXT:    setne %al
 ; X86-NEXT:    retl
   %m = tail call i32 @memcmp(ptr %X, ptr getelementptr inbounds ([65 x i8], ptr @.str, i32 0, i32 1), i32 2) nounwind
diff --git a/llvm/test/CodeGen/X86/memcmp-optsize.ll b/llvm/test/CodeGen/X86/memcmp-optsize.ll
index edd61641ad2aa..4e27301436c34 100644
--- a/llvm/test/CodeGen/X86/memcmp-optsize.ll
+++ b/llvm/test/CodeGen/X86/memcmp-optsize.ll
@@ -41,8 +41,7 @@ define i1 @length2_eq(ptr %X, ptr %Y) nounwind optsize {
 define i1 @length2_eq_const(ptr %X) nounwind optsize {
 ; X64-LABEL: length2_eq_const:
 ; X64:       # %bb.0:
-; X64-NEXT:    movzwl (%rdi), %eax
-; X64-NEXT:    cmpl $12849, %eax # imm = 0x3231
+; X64-NEXT:    cmpw $12849, (%rdi) # imm = 0x3231
 ; X64-NEXT:    setne %al
 ; X64-NEXT:    retq
   %m = tail call i32 @memcmp(ptr %X, ptr getelementptr inbounds ([65 x i8], ptr @.str, i32 0, i32 1), i64 2) nounwind
diff --git a/llvm/test/CodeGen/X86/memcmp-pgso-x32.ll b/llvm/test/CodeGen/X86/memcmp-pgso-x32.ll
index 1c301da26beaa..bdb50f5b60c49 100644
--- a/llvm/test/CodeGen/X86/memcmp-pgso-x32.ll
+++ b/llvm/test/CodeGen/X86/memcmp-pgso-x32.ll
@@ -45,8 +45,7 @@ define i1 @length2_eq_const(ptr %X) nounwind !prof !14 {
 ; X86-LABEL: length2_eq_const:
 ; X86:       # %bb.0:
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT:    movzwl (%eax), %eax
-; X86-NEXT:    cmpl $12849, %eax # imm = 0x3231
+; X86-NEXT:    cmpw $12849, (%eax) # imm = 0x3231
 ; X86-NEXT:    setne %al
 ; X86-NEXT:    retl
   %m = tail call i32 @memcmp(ptr %X, ptr getelementptr inbounds ([65 x i8], ptr @.str, i32 0, i32 1), i32 2) nounwind
diff --git a/llvm/test/CodeGen/X86/memcmp-pgso.ll b/llvm/test/CodeGen/X86/memcmp-pgso.ll
index 1ee3317b9c969..9347e54220220 100644
--- a/llvm/test/CodeGen/X86/memcmp-pgso.ll
+++ b/llvm/test/CodeGen/X86/memcmp-pgso.ll
@@ -41,8 +41,7 @@ define i1 @length2_eq(ptr %X, ptr %Y) nounwind !prof !14 {
 define i1 @length2_eq_const(ptr %X) nounwind !prof !14 {
 ; X64-LABEL: length2_eq_const:
 ; X64:       # %bb.0:
-; X64-NEXT:    movzwl (%rdi), %eax
-; X64-NEXT:    cmpl $12849, %eax # imm = 0x3231
+; X64-NEXT:    cmpw $12849, (%rdi) # imm = 0x3231
 ; X64-NEXT:    setne %al
 ; X64-NEXT:    retq
   %m = tail call i32 @memcmp(ptr %X, ptr getelementptr inbounds ([65 x i8], ptr @.str, i32 0, i32 1), i64 2) nounwind
diff --git a/llvm/test/CodeGen/X86/memcmp-x32.ll b/llvm/test/CodeGen/X86/memcmp-x32.ll
index a63402cea2096..ad9f2a30d75bb 100644
--- a/llvm/test/CodeGen/X86/memcmp-x32.ll
+++ b/llvm/test/CodeGen/X86/memcmp-x32.ll
@@ -144,8 +144,7 @@ define i1 @length2_eq_const(ptr %X) nounwind {
 ; X86-LABEL: length2_eq_const:
 ; X86:       # %bb.0:
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT:    movzwl (%eax), %eax
-; X86-NEXT:    cmpl $12849, %eax # imm = 0x3231
+; X86-NEXT:    cmpw $12849, (%eax) # imm = 0x3231
 ; X86-NEXT:    setne %al
 ; X86-NEXT:    retl
   %m = tail call i32 @memcmp(ptr %X, ptr getelementptr inbounds ([513 x i8], ptr @.str, i32 0, i32 1), i32 2) nounwind
diff --git a/llvm/test/CodeGen/X86/memcmp.ll b/llvm/test/CodeGen/X86/memcmp.ll
index 83cb0d6f973be..8fe1a581cd9c2 100644
--- a/llvm/test/CodeGen/X86/memcmp.ll
+++ b/llvm/test/CodeGen/X86/memcmp.ll
@@ -139,8 +139,7 @@ define i1 @length2_gt(ptr %X, ptr %Y) nounwind {
 define i1 @length2_eq_const(ptr %X) nounwind {
 ; X64-LABEL: length2_eq_const:
 ; X64:       # %bb.0:
-; X64-NEXT:    movzwl (%rdi), %eax
-; X64-NEXT:    cmpl $12849, %eax # imm = 0x3231
+; X64-NEXT:    cmpw $12849, (%rdi) # imm = 0x3231
 ; X64-NEXT:    setne %al
 ; X64-NEXT:    retq
   %m = tail call i32 @memcmp(ptr %X, ptr getelementptr inbounds ([513 x i8], ptr @.str, i32 0, i32 1), i64 2) nounwind