[llvm] [X86] EmitCmp - always use cmpw with foldable loads (PR #92251)

Wed May 15 04:55:41 PDT 2024

https://github.com/RKSimon created https://github.com/llvm/llvm-project/pull/92251

By default, EmitCmp avoids cmpw with i16 immediates due to 66/67h length-changing prefixes causing stalls, instead extending the value to i32 and using a cmpl with a i32 immediate, unless it has the TuningFastImm16 flag or we're building for optsize/minsize.

However, if we're loading the value for comparison, the performance costs of the decode stalls are likely to be exceeded by the impact of the load latency of the folded load, the shorter encoding and not needing an extra register to store the ext-load.

This matches the behaviour of gcc and msvc.

Fixes #90355

>From 50dcd466284f7f37fd0224e744b85189e129fe01 Mon Sep 17 00:00:00 2001
From: Simon Pilgrim <llvm-dev at redking.me.uk>
Date: Wed, 15 May 2024 12:50:26 +0100
Subject: [PATCH] [X86] EmitCmp - always use cmpw with foldable loads

By default, EmitCmp avoids cmpw with i16 immediates due to 66/67h length-changing prefixes causing stalls, extending to cmpl with a i32 immediate, unless it has the TuningFastImm16 flag or we're building for optsize/minsize.

However, if we're loading the value for comparison, the latency costs of the folded load are likely to match/exceed the decode stalls, the shorter encoding and not needing an extra register to store the ext-load are the more performant factors.

This matches the behaviour of gcc and msvc.

Fixes #90355
---
 llvm/lib/Target/X86/X86ISelLowering.cpp       |   5 +-
 llvm/test/CodeGen/X86/cmp16.ll                | 196 ++++--------------
 .../CodeGen/X86/memcmp-more-load-pairs-x32.ll |   3 +-
 .../CodeGen/X86/memcmp-more-load-pairs.ll     |   3 +-
 llvm/test/CodeGen/X86/memcmp-optsize-x32.ll   |   3 +-
 llvm/test/CodeGen/X86/memcmp-optsize.ll       |   3 +-
 llvm/test/CodeGen/X86/memcmp-pgso-x32.ll      |   3 +-
 llvm/test/CodeGen/X86/memcmp-pgso.ll          |   3 +-
 llvm/test/CodeGen/X86/memcmp-x32.ll           |   3 +-
 llvm/test/CodeGen/X86/memcmp.ll               |   3 +-
 10 files changed, 57 insertions(+), 168 deletions(-)

diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index a57c10e784d9c..c31be8684b920 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -22694,9 +22694,10 @@ static SDValue EmitCmp(SDValue Op0, SDValue Op1, unsigned X86CC,
   // Only promote the compare up to I32 if it is a 16 bit operation
   // with an immediate.  16 bit immediates are to be avoided.
   if (CmpVT == MVT::i16 && !Subtarget.hasFastImm16() &&
+      !X86::mayFoldLoad(Op0, Subtarget) && !X86::mayFoldLoad(Op1, Subtarget) &&
       !DAG.getMachineFunction().getFunction().hasMinSize()) {
-    ConstantSDNode *COp0 = dyn_cast<ConstantSDNode>(Op0);
-    ConstantSDNode *COp1 = dyn_cast<ConstantSDNode>(Op1);
+    auto *COp0 = dyn_cast<ConstantSDNode>(Op0);
+    auto *COp1 = dyn_cast<ConstantSDNode>(Op1);
     // Don't do this if the immediate can fit in 8-bits.
     if ((COp0 && !COp0->getAPIntValue().isSignedIntN(8)) ||
         (COp1 && !COp1->getAPIntValue().isSignedIntN(8))) {
diff --git a/llvm/test/CodeGen/X86/cmp16.ll b/llvm/test/CodeGen/X86/cmp16.ll
index 699ea3e4dd473..fa9e75ff16a5c 100644
--- a/llvm/test/CodeGen/X86/cmp16.ll
+++ b/llvm/test/CodeGen/X86/cmp16.ll
@@ -113,8 +113,7 @@ define i1 @cmp16_reg_eq_imm8(i16 %a0) {
 define i1 @cmp16_reg_eq_imm16(i16 %a0) {
 ; X86-GENERIC-LABEL: cmp16_reg_eq_imm16:
 ; X86-GENERIC:       # %bb.0:
-; X86-GENERIC-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
-; X86-GENERIC-NEXT:    cmpl $1024, %eax # imm = 0x400
+; X86-GENERIC-NEXT:    cmpw $1024, {{[0-9]+}}(%esp) # imm = 0x400
 ; X86-GENERIC-NEXT:    sete %al
 ; X86-GENERIC-NEXT:    retl
 ;
@@ -177,12 +176,11 @@ define i1 @cmp16_reg_eq_imm16_minsize(i16 %a0) minsize {
 }
 
 define i1 @cmp16_reg_eq_imm16_optsize(i16 %a0) optsize {
-; X86-GENERIC-LABEL: cmp16_reg_eq_imm16_optsize:
-; X86-GENERIC:       # %bb.0:
-; X86-GENERIC-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
-; X86-GENERIC-NEXT:    cmpl $1024, %eax # imm = 0x400
-; X86-GENERIC-NEXT:    sete %al
-; X86-GENERIC-NEXT:    retl
+; X86-LABEL: cmp16_reg_eq_imm16_optsize:
+; X86:       # %bb.0:
+; X86-NEXT:    cmpw $1024, {{[0-9]+}}(%esp) # imm = 0x400
+; X86-NEXT:    sete %al
+; X86-NEXT:    retl
 ;
 ; X64-GENERIC-LABEL: cmp16_reg_eq_imm16_optsize:
 ; X64-GENERIC:       # %bb.0:
@@ -191,24 +189,12 @@ define i1 @cmp16_reg_eq_imm16_optsize(i16 %a0) optsize {
 ; X64-GENERIC-NEXT:    sete %al
 ; X64-GENERIC-NEXT:    retq
 ;
-; X86-FAST-LABEL: cmp16_reg_eq_imm16_optsize:
-; X86-FAST:       # %bb.0:
-; X86-FAST-NEXT:    cmpw $1024, {{[0-9]+}}(%esp) # imm = 0x400
-; X86-FAST-NEXT:    sete %al
-; X86-FAST-NEXT:    retl
-;
 ; X64-FAST-LABEL: cmp16_reg_eq_imm16_optsize:
 ; X64-FAST:       # %bb.0:
 ; X64-FAST-NEXT:    cmpw $1024, %di # imm = 0x400
 ; X64-FAST-NEXT:    sete %al
 ; X64-FAST-NEXT:    retq
 ;
-; X86-ATOM-LABEL: cmp16_reg_eq_imm16_optsize:
-; X86-ATOM:       # %bb.0:
-; X86-ATOM-NEXT:    cmpw $1024, {{[0-9]+}}(%esp) # imm = 0x400
-; X86-ATOM-NEXT:    sete %al
-; X86-ATOM-NEXT:    retl
-;
 ; X64-ATOM-LABEL: cmp16_reg_eq_imm16_optsize:
 ; X64-ATOM:       # %bb.0:
 ; X64-ATOM-NEXT:    cmpw $1024, %di # imm = 0x400
@@ -269,8 +255,7 @@ define i1 @cmp16_reg_sgt_imm8(i16 %a0) {
 define i1 @cmp16_reg_sgt_imm16(i16 %a0) {
 ; X86-GENERIC-LABEL: cmp16_reg_sgt_imm16:
 ; X86-GENERIC:       # %bb.0:
-; X86-GENERIC-NEXT:    movswl {{[0-9]+}}(%esp), %eax
-; X86-GENERIC-NEXT:    cmpl $-1023, %eax # imm = 0xFC01
+; X86-GENERIC-NEXT:    cmpw $-1023, {{[0-9]+}}(%esp) # imm = 0xFC01
 ; X86-GENERIC-NEXT:    setge %al
 ; X86-GENERIC-NEXT:    retl
 ;
@@ -333,12 +318,11 @@ define i1 @cmp16_reg_sgt_imm16_minsize(i16 %a0) minsize {
 }
 
 define i1 @cmp16_reg_sgt_imm16_optsize(i16 %a0) optsize {
-; X86-GENERIC-LABEL: cmp16_reg_sgt_imm16_optsize:
-; X86-GENERIC:       # %bb.0:
-; X86-GENERIC-NEXT:    movswl {{[0-9]+}}(%esp), %eax
-; X86-GENERIC-NEXT:    cmpl $-1023, %eax # imm = 0xFC01
-; X86-GENERIC-NEXT:    setge %al
-; X86-GENERIC-NEXT:    retl
+; X86-LABEL: cmp16_reg_sgt_imm16_optsize:
+; X86:       # %bb.0:
+; X86-NEXT:    cmpw $-1023, {{[0-9]+}}(%esp) # imm = 0xFC01
+; X86-NEXT:    setge %al
+; X86-NEXT:    retl
 ;
 ; X64-GENERIC-LABEL: cmp16_reg_sgt_imm16_optsize:
 ; X64-GENERIC:       # %bb.0:
@@ -347,24 +331,12 @@ define i1 @cmp16_reg_sgt_imm16_optsize(i16 %a0) optsize {
 ; X64-GENERIC-NEXT:    setge %al
 ; X64-GENERIC-NEXT:    retq
 ;
-; X86-FAST-LABEL: cmp16_reg_sgt_imm16_optsize:
-; X86-FAST:       # %bb.0:
-; X86-FAST-NEXT:    cmpw $-1023, {{[0-9]+}}(%esp) # imm = 0xFC01
-; X86-FAST-NEXT:    setge %al
-; X86-FAST-NEXT:    retl
-;
 ; X64-FAST-LABEL: cmp16_reg_sgt_imm16_optsize:
 ; X64-FAST:       # %bb.0:
 ; X64-FAST-NEXT:    cmpw $-1023, %di # imm = 0xFC01
 ; X64-FAST-NEXT:    setge %al
 ; X64-FAST-NEXT:    retq
 ;
-; X86-ATOM-LABEL: cmp16_reg_sgt_imm16_optsize:
-; X86-ATOM:       # %bb.0:
-; X86-ATOM-NEXT:    cmpw $-1023, {{[0-9]+}}(%esp) # imm = 0xFC01
-; X86-ATOM-NEXT:    setge %al
-; X86-ATOM-NEXT:    retl
-;
 ; X64-ATOM-LABEL: cmp16_reg_sgt_imm16_optsize:
 ; X64-ATOM:       # %bb.0:
 ; X64-ATOM-NEXT:    cmpw $-1023, %di # imm = 0xFC01
@@ -377,8 +349,7 @@ define i1 @cmp16_reg_sgt_imm16_optsize(i16 %a0) optsize {
 define i1 @cmp16_reg_uge_imm16(i16 %a0) {
 ; X86-GENERIC-LABEL: cmp16_reg_uge_imm16:
 ; X86-GENERIC:       # %bb.0:
-; X86-GENERIC-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
-; X86-GENERIC-NEXT:    cmpl $64512, %eax # imm = 0xFC00
+; X86-GENERIC-NEXT:    cmpw $-1024, {{[0-9]+}}(%esp) # imm = 0xFC00
 ; X86-GENERIC-NEXT:    setae %al
 ; X86-GENERIC-NEXT:    retl
 ;
@@ -441,12 +412,11 @@ define i1 @cmp16_reg_uge_imm16_minsize(i16 %a0) minsize {
 }
 
 define i1 @cmp16_reg_uge_imm16_optsize(i16 %a0) optsize {
-; X86-GENERIC-LABEL: cmp16_reg_uge_imm16_optsize:
-; X86-GENERIC:       # %bb.0:
-; X86-GENERIC-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
-; X86-GENERIC-NEXT:    cmpl $64512, %eax # imm = 0xFC00
-; X86-GENERIC-NEXT:    setae %al
-; X86-GENERIC-NEXT:    retl
+; X86-LABEL: cmp16_reg_uge_imm16_optsize:
+; X86:       # %bb.0:
+; X86-NEXT:    cmpw $-1024, {{[0-9]+}}(%esp) # imm = 0xFC00
+; X86-NEXT:    setae %al
+; X86-NEXT:    retl
 ;
 ; X64-GENERIC-LABEL: cmp16_reg_uge_imm16_optsize:
 ; X64-GENERIC:       # %bb.0:
@@ -455,24 +425,12 @@ define i1 @cmp16_reg_uge_imm16_optsize(i16 %a0) optsize {
 ; X64-GENERIC-NEXT:    setae %al
 ; X64-GENERIC-NEXT:    retq
 ;
-; X86-FAST-LABEL: cmp16_reg_uge_imm16_optsize:
-; X86-FAST:       # %bb.0:
-; X86-FAST-NEXT:    cmpw $-1024, {{[0-9]+}}(%esp) # imm = 0xFC00
-; X86-FAST-NEXT:    setae %al
-; X86-FAST-NEXT:    retl
-;
 ; X64-FAST-LABEL: cmp16_reg_uge_imm16_optsize:
 ; X64-FAST:       # %bb.0:
 ; X64-FAST-NEXT:    cmpw $-1024, %di # imm = 0xFC00
 ; X64-FAST-NEXT:    setae %al
 ; X64-FAST-NEXT:    retq
 ;
-; X86-ATOM-LABEL: cmp16_reg_uge_imm16_optsize:
-; X86-ATOM:       # %bb.0:
-; X86-ATOM-NEXT:    cmpw $-1024, {{[0-9]+}}(%esp) # imm = 0xFC00
-; X86-ATOM-NEXT:    setae %al
-; X86-ATOM-NEXT:    retl
-;
 ; X64-ATOM-LABEL: cmp16_reg_uge_imm16_optsize:
 ; X64-ATOM:       # %bb.0:
 ; X64-ATOM-NEXT:    cmpw $-1024, %di # imm = 0xFC00
@@ -592,15 +550,13 @@ define i1 @cmp16_load_ne_imm16(ptr %p0) {
 ; X86-GENERIC-LABEL: cmp16_load_ne_imm16:
 ; X86-GENERIC:       # %bb.0:
 ; X86-GENERIC-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-GENERIC-NEXT:    movzwl (%eax), %eax
-; X86-GENERIC-NEXT:    cmpl $512, %eax # imm = 0x200
+; X86-GENERIC-NEXT:    cmpw $512, (%eax) # imm = 0x200
 ; X86-GENERIC-NEXT:    setne %al
 ; X86-GENERIC-NEXT:    retl
 ;
 ; X64-GENERIC-LABEL: cmp16_load_ne_imm16:
 ; X64-GENERIC:       # %bb.0:
-; X64-GENERIC-NEXT:    movzwl (%rdi), %eax
-; X64-GENERIC-NEXT:    cmpl $512, %eax # imm = 0x200
+; X64-GENERIC-NEXT:    cmpw $512, (%rdi) # imm = 0x200
 ; X64-GENERIC-NEXT:    setne %al
 ; X64-GENERIC-NEXT:    retq
 ;
@@ -694,15 +650,13 @@ define i1 @cmp16_load_slt_imm16(ptr %p0) {
 ; X86-GENERIC-LABEL: cmp16_load_slt_imm16:
 ; X86-GENERIC:       # %bb.0:
 ; X86-GENERIC-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-GENERIC-NEXT:    movswl (%eax), %eax
-; X86-GENERIC-NEXT:    cmpl $512, %eax # imm = 0x200
+; X86-GENERIC-NEXT:    cmpw $512, (%eax) # imm = 0x200
 ; X86-GENERIC-NEXT:    setl %al
 ; X86-GENERIC-NEXT:    retl
 ;
 ; X64-GENERIC-LABEL: cmp16_load_slt_imm16:
 ; X64-GENERIC:       # %bb.0:
-; X64-GENERIC-NEXT:    movswl (%rdi), %eax
-; X64-GENERIC-NEXT:    cmpl $512, %eax # imm = 0x200
+; X64-GENERIC-NEXT:    cmpw $512, (%rdi) # imm = 0x200
 ; X64-GENERIC-NEXT:    setl %al
 ; X64-GENERIC-NEXT:    retq
 ;
@@ -761,46 +715,18 @@ define i1 @cmp16_load_slt_imm16_minsize(ptr %p0) minsize {
 }
 
 define i1 @cmp16_load_slt_imm16_optsize(ptr %p0) optsize {
-; X86-GENERIC-LABEL: cmp16_load_slt_imm16_optsize:
-; X86-GENERIC:       # %bb.0:
-; X86-GENERIC-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-GENERIC-NEXT:    movswl (%eax), %eax
-; X86-GENERIC-NEXT:    cmpl $512, %eax # imm = 0x200
-; X86-GENERIC-NEXT:    setl %al
-; X86-GENERIC-NEXT:    retl
-;
-; X64-GENERIC-LABEL: cmp16_load_slt_imm16_optsize:
-; X64-GENERIC:       # %bb.0:
-; X64-GENERIC-NEXT:    movswl (%rdi), %eax
-; X64-GENERIC-NEXT:    cmpl $512, %eax # imm = 0x200
-; X64-GENERIC-NEXT:    setl %al
-; X64-GENERIC-NEXT:    retq
-;
-; X86-FAST-LABEL: cmp16_load_slt_imm16_optsize:
-; X86-FAST:       # %bb.0:
-; X86-FAST-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-FAST-NEXT:    cmpw $512, (%eax) # imm = 0x200
-; X86-FAST-NEXT:    setl %al
-; X86-FAST-NEXT:    retl
-;
-; X64-FAST-LABEL: cmp16_load_slt_imm16_optsize:
-; X64-FAST:       # %bb.0:
-; X64-FAST-NEXT:    cmpw $512, (%rdi) # imm = 0x200
-; X64-FAST-NEXT:    setl %al
-; X64-FAST-NEXT:    retq
-;
-; X86-ATOM-LABEL: cmp16_load_slt_imm16_optsize:
-; X86-ATOM:       # %bb.0:
-; X86-ATOM-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-ATOM-NEXT:    cmpw $512, (%eax) # imm = 0x200
-; X86-ATOM-NEXT:    setl %al
-; X86-ATOM-NEXT:    retl
+; X86-LABEL: cmp16_load_slt_imm16_optsize:
+; X86:       # %bb.0:
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    cmpw $512, (%eax) # imm = 0x200
+; X86-NEXT:    setl %al
+; X86-NEXT:    retl
 ;
-; X64-ATOM-LABEL: cmp16_load_slt_imm16_optsize:
-; X64-ATOM:       # %bb.0:
-; X64-ATOM-NEXT:    cmpw $512, (%rdi) # imm = 0x200
-; X64-ATOM-NEXT:    setl %al
-; X64-ATOM-NEXT:    retq
+; X64-LABEL: cmp16_load_slt_imm16_optsize:
+; X64:       # %bb.0:
+; X64-NEXT:    cmpw $512, (%rdi) # imm = 0x200
+; X64-NEXT:    setl %al
+; X64-NEXT:    retq
   %ld = load i16, ptr %p0
   %cmp = icmp slt i16 %ld, 512
   ret i1 %cmp
@@ -860,15 +786,13 @@ define i1 @cmp16_load_ule_imm16(ptr %p0) {
 ; X86-GENERIC-LABEL: cmp16_load_ule_imm16:
 ; X86-GENERIC:       # %bb.0:
 ; X86-GENERIC-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-GENERIC-NEXT:    movzwl (%eax), %eax
-; X86-GENERIC-NEXT:    cmpl $513, %eax # imm = 0x201
+; X86-GENERIC-NEXT:    cmpw $513, (%eax) # imm = 0x201
 ; X86-GENERIC-NEXT:    setb %al
 ; X86-GENERIC-NEXT:    retl
 ;
 ; X64-GENERIC-LABEL: cmp16_load_ule_imm16:
 ; X64-GENERIC:       # %bb.0:
-; X64-GENERIC-NEXT:    movzwl (%rdi), %eax
-; X64-GENERIC-NEXT:    cmpl $513, %eax # imm = 0x201
+; X64-GENERIC-NEXT:    cmpw $513, (%rdi) # imm = 0x201
 ; X64-GENERIC-NEXT:    setb %al
 ; X64-GENERIC-NEXT:    retq
 ;
@@ -927,46 +851,18 @@ define i1 @cmp16_load_ule_imm16_minsize(ptr %p0) minsize {
 }
 
 define i1 @cmp16_load_ule_imm16_optsize(ptr %p0) optsize {
-; X86-GENERIC-LABEL: cmp16_load_ule_imm16_optsize:
-; X86-GENERIC:       # %bb.0:
-; X86-GENERIC-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-GENERIC-NEXT:    movzwl (%eax), %eax
-; X86-GENERIC-NEXT:    cmpl $513, %eax # imm = 0x201
-; X86-GENERIC-NEXT:    setb %al
-; X86-GENERIC-NEXT:    retl
-;
-; X64-GENERIC-LABEL: cmp16_load_ule_imm16_optsize:
-; X64-GENERIC:       # %bb.0:
-; X64-GENERIC-NEXT:    movzwl (%rdi), %eax
-; X64-GENERIC-NEXT:    cmpl $513, %eax # imm = 0x201
-; X64-GENERIC-NEXT:    setb %al
-; X64-GENERIC-NEXT:    retq
-;
-; X86-FAST-LABEL: cmp16_load_ule_imm16_optsize:
-; X86-FAST:       # %bb.0:
-; X86-FAST-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-FAST-NEXT:    cmpw $513, (%eax) # imm = 0x201
-; X86-FAST-NEXT:    setb %al
-; X86-FAST-NEXT:    retl
-;
-; X64-FAST-LABEL: cmp16_load_ule_imm16_optsize:
-; X64-FAST:       # %bb.0:
-; X64-FAST-NEXT:    cmpw $513, (%rdi) # imm = 0x201
-; X64-FAST-NEXT:    setb %al
-; X64-FAST-NEXT:    retq
-;
-; X86-ATOM-LABEL: cmp16_load_ule_imm16_optsize:
-; X86-ATOM:       # %bb.0:
-; X86-ATOM-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-ATOM-NEXT:    cmpw $513, (%eax) # imm = 0x201
-; X86-ATOM-NEXT:    setb %al
-; X86-ATOM-NEXT:    retl
+; X86-LABEL: cmp16_load_ule_imm16_optsize:
+; X86:       # %bb.0:
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    cmpw $513, (%eax) # imm = 0x201
+; X86-NEXT:    setb %al
+; X86-NEXT:    retl
 ;
-; X64-ATOM-LABEL: cmp16_load_ule_imm16_optsize:
-; X64-ATOM:       # %bb.0:
-; X64-ATOM-NEXT:    cmpw $513, (%rdi) # imm = 0x201
-; X64-ATOM-NEXT:    setb %al
-; X64-ATOM-NEXT:    retq
+; X64-LABEL: cmp16_load_ule_imm16_optsize:
+; X64:       # %bb.0:
+; X64-NEXT:    cmpw $513, (%rdi) # imm = 0x201
+; X64-NEXT:    setb %al
+; X64-NEXT:    retq
   %ld = load i16, ptr %p0
   %cmp = icmp ule i16 %ld, 512
   ret i1 %cmp
diff --git a/llvm/test/CodeGen/X86/memcmp-more-load-pairs-x32.ll b/llvm/test/CodeGen/X86/memcmp-more-load-pairs-x32.ll
index 0253d13122608..ee5fd78c64379 100644
--- a/llvm/test/CodeGen/X86/memcmp-more-load-pairs-x32.ll
+++ b/llvm/test/CodeGen/X86/memcmp-more-load-pairs-x32.ll
@@ -116,8 +116,7 @@ define i1 @length2_eq_const(ptr %X) nounwind {
 ; X86-LABEL: length2_eq_const:
 ; X86:       # %bb.0:
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT:    movzwl (%eax), %eax
-; X86-NEXT:    cmpl $12849, %eax # imm = 0x3231
+; X86-NEXT:    cmpw $12849, (%eax) # imm = 0x3231
 ; X86-NEXT:    setne %al
 ; X86-NEXT:    retl
   %m = tail call i32 @memcmp(ptr %X, ptr getelementptr inbounds ([513 x i8], ptr @.str, i32 0, i32 1), i32 2) nounwind
diff --git a/llvm/test/CodeGen/X86/memcmp-more-load-pairs.ll b/llvm/test/CodeGen/X86/memcmp-more-load-pairs.ll
index da46ea4065579..a46f9ed3d3798 100644
--- a/llvm/test/CodeGen/X86/memcmp-more-load-pairs.ll
+++ b/llvm/test/CodeGen/X86/memcmp-more-load-pairs.ll
@@ -113,8 +113,7 @@ define i1 @length2_gt(ptr %X, ptr %Y) nounwind {
 define i1 @length2_eq_const(ptr %X) nounwind {
 ; X64-LABEL: length2_eq_const:
 ; X64:       # %bb.0:
-; X64-NEXT:    movzwl (%rdi), %eax
-; X64-NEXT:    cmpl $12849, %eax # imm = 0x3231
+; X64-NEXT:    cmpw $12849, (%rdi) # imm = 0x3231
 ; X64-NEXT:    setne %al
 ; X64-NEXT:    retq
   %m = tail call i32 @memcmp(ptr %X, ptr getelementptr inbounds ([513 x i8], ptr @.str, i32 0, i32 1), i64 2) nounwind
diff --git a/llvm/test/CodeGen/X86/memcmp-optsize-x32.ll b/llvm/test/CodeGen/X86/memcmp-optsize-x32.ll
index 3db6ae8b76b29..4a9643c0f4fc8 100644
--- a/llvm/test/CodeGen/X86/memcmp-optsize-x32.ll
+++ b/llvm/test/CodeGen/X86/memcmp-optsize-x32.ll
@@ -45,8 +45,7 @@ define i1 @length2_eq_const(ptr %X) nounwind optsize {
 ; X86-LABEL: length2_eq_const:
 ; X86:       # %bb.0:
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT:    movzwl (%eax), %eax
-; X86-NEXT:    cmpl $12849, %eax # imm = 0x3231
+; X86-NEXT:    cmpw $12849, (%eax) # imm = 0x3231
 ; X86-NEXT:    setne %al
 ; X86-NEXT:    retl
   %m = tail call i32 @memcmp(ptr %X, ptr getelementptr inbounds ([65 x i8], ptr @.str, i32 0, i32 1), i32 2) nounwind
diff --git a/llvm/test/CodeGen/X86/memcmp-optsize.ll b/llvm/test/CodeGen/X86/memcmp-optsize.ll
index edd61641ad2aa..4e27301436c34 100644
--- a/llvm/test/CodeGen/X86/memcmp-optsize.ll
+++ b/llvm/test/CodeGen/X86/memcmp-optsize.ll
@@ -41,8 +41,7 @@ define i1 @length2_eq(ptr %X, ptr %Y) nounwind optsize {
 define i1 @length2_eq_const(ptr %X) nounwind optsize {
 ; X64-LABEL: length2_eq_const:
 ; X64:       # %bb.0:
-; X64-NEXT:    movzwl (%rdi), %eax
-; X64-NEXT:    cmpl $12849, %eax # imm = 0x3231
+; X64-NEXT:    cmpw $12849, (%rdi) # imm = 0x3231
 ; X64-NEXT:    setne %al
 ; X64-NEXT:    retq
   %m = tail call i32 @memcmp(ptr %X, ptr getelementptr inbounds ([65 x i8], ptr @.str, i32 0, i32 1), i64 2) nounwind
diff --git a/llvm/test/CodeGen/X86/memcmp-pgso-x32.ll b/llvm/test/CodeGen/X86/memcmp-pgso-x32.ll
index 1c301da26beaa..bdb50f5b60c49 100644
--- a/llvm/test/CodeGen/X86/memcmp-pgso-x32.ll
+++ b/llvm/test/CodeGen/X86/memcmp-pgso-x32.ll
@@ -45,8 +45,7 @@ define i1 @length2_eq_const(ptr %X) nounwind !prof !14 {
 ; X86-LABEL: length2_eq_const:
 ; X86:       # %bb.0:
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT:    movzwl (%eax), %eax
-; X86-NEXT:    cmpl $12849, %eax # imm = 0x3231
+; X86-NEXT:    cmpw $12849, (%eax) # imm = 0x3231
 ; X86-NEXT:    setne %al
 ; X86-NEXT:    retl
   %m = tail call i32 @memcmp(ptr %X, ptr getelementptr inbounds ([65 x i8], ptr @.str, i32 0, i32 1), i32 2) nounwind
diff --git a/llvm/test/CodeGen/X86/memcmp-pgso.ll b/llvm/test/CodeGen/X86/memcmp-pgso.ll
index 1ee3317b9c969..9347e54220220 100644
--- a/llvm/test/CodeGen/X86/memcmp-pgso.ll
+++ b/llvm/test/CodeGen/X86/memcmp-pgso.ll
@@ -41,8 +41,7 @@ define i1 @length2_eq(ptr %X, ptr %Y) nounwind !prof !14 {
 define i1 @length2_eq_const(ptr %X) nounwind !prof !14 {
 ; X64-LABEL: length2_eq_const:
 ; X64:       # %bb.0:
-; X64-NEXT:    movzwl (%rdi), %eax
-; X64-NEXT:    cmpl $12849, %eax # imm = 0x3231
+; X64-NEXT:    cmpw $12849, (%rdi) # imm = 0x3231
 ; X64-NEXT:    setne %al
 ; X64-NEXT:    retq
   %m = tail call i32 @memcmp(ptr %X, ptr getelementptr inbounds ([65 x i8], ptr @.str, i32 0, i32 1), i64 2) nounwind
diff --git a/llvm/test/CodeGen/X86/memcmp-x32.ll b/llvm/test/CodeGen/X86/memcmp-x32.ll
index a63402cea2096..ad9f2a30d75bb 100644
--- a/llvm/test/CodeGen/X86/memcmp-x32.ll
+++ b/llvm/test/CodeGen/X86/memcmp-x32.ll
@@ -144,8 +144,7 @@ define i1 @length2_eq_const(ptr %X) nounwind {
 ; X86-LABEL: length2_eq_const:
 ; X86:       # %bb.0:
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT:    movzwl (%eax), %eax
-; X86-NEXT:    cmpl $12849, %eax # imm = 0x3231
+; X86-NEXT:    cmpw $12849, (%eax) # imm = 0x3231
 ; X86-NEXT:    setne %al
 ; X86-NEXT:    retl
   %m = tail call i32 @memcmp(ptr %X, ptr getelementptr inbounds ([513 x i8], ptr @.str, i32 0, i32 1), i32 2) nounwind
diff --git a/llvm/test/CodeGen/X86/memcmp.ll b/llvm/test/CodeGen/X86/memcmp.ll
index 83cb0d6f973be..8fe1a581cd9c2 100644
--- a/llvm/test/CodeGen/X86/memcmp.ll
+++ b/llvm/test/CodeGen/X86/memcmp.ll
@@ -139,8 +139,7 @@ define i1 @length2_gt(ptr %X, ptr %Y) nounwind {
 define i1 @length2_eq_const(ptr %X) nounwind {
 ; X64-LABEL: length2_eq_const:
 ; X64:       # %bb.0:
-; X64-NEXT:    movzwl (%rdi), %eax
-; X64-NEXT:    cmpl $12849, %eax # imm = 0x3231
+; X64-NEXT:    cmpw $12849, (%rdi) # imm = 0x3231
 ; X64-NEXT:    setne %al
 ; X64-NEXT:    retq
   %m = tail call i32 @memcmp(ptr %X, ptr getelementptr inbounds ([513 x i8], ptr @.str, i32 0, i32 1), i64 2) nounwind