[llvm] d9245e8 - [X86][ISEL] Add NDD entries in X86ISelDAGToDAG.cpp

Fri Jan 26 07:03:19 PST 2024

Author: Shengchen Kan
Date: 2024-01-26T23:02:53+08:00
New Revision: d9245e8b471c6b3f61e3810faa9788b4994e295a

URL: https://github.com/llvm/llvm-project/commit/d9245e8b471c6b3f61e3810faa9788b4994e295a
DIFF: https://github.com/llvm/llvm-project/commit/d9245e8b471c6b3f61e3810faa9788b4994e295a.diff

LOG: [X86][ISEL] Add NDD entries in X86ISelDAGToDAG.cpp

Added: 
    

Modified: 
    llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
    llvm/test/CodeGen/X86/cmp.ll
    llvm/test/CodeGen/X86/popcnt.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
index c0b7a5523b5d8f3..c8f80ced354538f 100644

--- a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
+++ b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
@@ -278,9 +278,11 @@ namespace {
 
       Scale = getI8Imm(AM.Scale, DL);
 
+#define GET_ND_IF_ENABLED(OPC) (Subtarget->hasNDD() ? OPC##_ND : OPC)
       // Negate the index if needed.
       if (AM.NegateIndex) {
-        unsigned NegOpc = VT == MVT::i64 ? X86::NEG64r : X86::NEG32r;
+        unsigned NegOpc = VT == MVT::i64 ? GET_ND_IF_ENABLED(X86::NEG64r)
+                                         : GET_ND_IF_ENABLED(X86::NEG32r);
         SDValue Neg = SDValue(CurDAG->getMachineNode(NegOpc, DL, VT, MVT::i32,
                                                      AM.IndexReg), 0);
         AM.IndexReg = Neg;
@@ -4143,7 +4145,8 @@ MachineSDNode *X86DAGToDAGISel::matchBEXTRFromAndImm(SDNode *Node) {
   if (!PreferBEXTR) {
     // We still need to apply the shift.
     SDValue ShAmt = CurDAG->getTargetConstant(Shift, dl, NVT);
-    unsigned NewOpc = NVT == MVT::i64 ? X86::SHR64ri : X86::SHR32ri;
+    unsigned NewOpc = NVT == MVT::i64 ? GET_ND_IF_ENABLED(X86::SHR64ri)
+                                      : GET_ND_IF_ENABLED(X86::SHR32ri);
     NewNode =
         CurDAG->getMachineNode(NewOpc, dl, NVT, SDValue(NewNode, 0), ShAmt);
   }
@@ -5338,41 +5341,101 @@ void X86DAGToDAGISel::Select(SDNode *Node) {
     case MVT::i8:
       switch (Opcode) {
       default: llvm_unreachable("Unexpected opcode!");
-      case ISD::ADD: ROpc = X86::ADD8rr; MOpc = X86::ADD8rm; break;
-      case ISD::SUB: ROpc = X86::SUB8rr; MOpc = X86::SUB8rm; break;
-      case ISD::AND: ROpc = X86::AND8rr; MOpc = X86::AND8rm; break;
-      case ISD::OR:  ROpc = X86::OR8rr;  MOpc = X86::OR8rm;  break;
-      case ISD::XOR: ROpc = X86::XOR8rr; MOpc = X86::XOR8rm; break;
+      case ISD::ADD:
+        ROpc = GET_ND_IF_ENABLED(X86::ADD8rr);
+        MOpc = GET_ND_IF_ENABLED(X86::ADD8rm);
+        break;
+      case ISD::SUB:
+        ROpc = GET_ND_IF_ENABLED(X86::SUB8rr);
+        MOpc = GET_ND_IF_ENABLED(X86::SUB8rm);
+        break;
+      case ISD::AND:
+        ROpc = GET_ND_IF_ENABLED(X86::AND8rr);
+        MOpc = GET_ND_IF_ENABLED(X86::AND8rm);
+        break;
+      case ISD::OR:
+        ROpc = GET_ND_IF_ENABLED(X86::OR8rr);
+        MOpc = GET_ND_IF_ENABLED(X86::OR8rm);
+        break;
+      case ISD::XOR:
+        ROpc = GET_ND_IF_ENABLED(X86::XOR8rr);
+        MOpc = GET_ND_IF_ENABLED(X86::XOR8rm);
+        break;
       }
       break;
     case MVT::i16:
       switch (Opcode) {
       default: llvm_unreachable("Unexpected opcode!");
-      case ISD::ADD: ROpc = X86::ADD16rr; MOpc = X86::ADD16rm; break;
-      case ISD::SUB: ROpc = X86::SUB16rr; MOpc = X86::SUB16rm; break;
-      case ISD::AND: ROpc = X86::AND16rr; MOpc = X86::AND16rm; break;
-      case ISD::OR:  ROpc = X86::OR16rr;  MOpc = X86::OR16rm;  break;
-      case ISD::XOR: ROpc = X86::XOR16rr; MOpc = X86::XOR16rm; break;
+      case ISD::ADD:
+        ROpc = GET_ND_IF_ENABLED(X86::ADD16rr);
+        MOpc = GET_ND_IF_ENABLED(X86::ADD16rm);
+        break;
+      case ISD::SUB:
+        ROpc = GET_ND_IF_ENABLED(X86::SUB16rr);
+        MOpc = GET_ND_IF_ENABLED(X86::SUB16rm);
+        break;
+      case ISD::AND:
+        ROpc = GET_ND_IF_ENABLED(X86::AND16rr);
+        MOpc = GET_ND_IF_ENABLED(X86::AND16rm);
+        break;
+      case ISD::OR:
+        ROpc = GET_ND_IF_ENABLED(X86::OR16rr);
+        MOpc = GET_ND_IF_ENABLED(X86::OR16rm);
+        break;
+      case ISD::XOR:
+        ROpc = GET_ND_IF_ENABLED(X86::XOR16rr);
+        MOpc = GET_ND_IF_ENABLED(X86::XOR16rm);
+        break;
       }
       break;
     case MVT::i32:
       switch (Opcode) {
       default: llvm_unreachable("Unexpected opcode!");
-      case ISD::ADD: ROpc = X86::ADD32rr; MOpc = X86::ADD32rm; break;
-      case ISD::SUB: ROpc = X86::SUB32rr; MOpc = X86::SUB32rm; break;
-      case ISD::AND: ROpc = X86::AND32rr; MOpc = X86::AND32rm; break;
-      case ISD::OR:  ROpc = X86::OR32rr;  MOpc = X86::OR32rm;  break;
-      case ISD::XOR: ROpc = X86::XOR32rr; MOpc = X86::XOR32rm; break;
+      case ISD::ADD:
+        ROpc = GET_ND_IF_ENABLED(X86::ADD32rr);
+        MOpc = GET_ND_IF_ENABLED(X86::ADD32rm);
+        break;
+      case ISD::SUB:
+        ROpc = GET_ND_IF_ENABLED(X86::SUB32rr);
+        MOpc = GET_ND_IF_ENABLED(X86::SUB32rm);
+        break;
+      case ISD::AND:
+        ROpc = GET_ND_IF_ENABLED(X86::AND32rr);
+        MOpc = GET_ND_IF_ENABLED(X86::AND32rm);
+        break;
+      case ISD::OR:
+        ROpc = GET_ND_IF_ENABLED(X86::OR32rr);
+        MOpc = GET_ND_IF_ENABLED(X86::OR32rm);
+        break;
+      case ISD::XOR:
+        ROpc = GET_ND_IF_ENABLED(X86::XOR32rr);
+        MOpc = GET_ND_IF_ENABLED(X86::XOR32rm);
+        break;
       }
       break;
     case MVT::i64:
       switch (Opcode) {
       default: llvm_unreachable("Unexpected opcode!");
-      case ISD::ADD: ROpc = X86::ADD64rr; MOpc = X86::ADD64rm; break;
-      case ISD::SUB: ROpc = X86::SUB64rr; MOpc = X86::SUB64rm; break;
-      case ISD::AND: ROpc = X86::AND64rr; MOpc = X86::AND64rm; break;
-      case ISD::OR:  ROpc = X86::OR64rr;  MOpc = X86::OR64rm;  break;
-      case ISD::XOR: ROpc = X86::XOR64rr; MOpc = X86::XOR64rm; break;
+      case ISD::ADD:
+        ROpc = GET_ND_IF_ENABLED(X86::ADD64rr);
+        MOpc = GET_ND_IF_ENABLED(X86::ADD64rm);
+        break;
+      case ISD::SUB:
+        ROpc = GET_ND_IF_ENABLED(X86::SUB64rr);
+        MOpc = GET_ND_IF_ENABLED(X86::SUB64rm);
+        break;
+      case ISD::AND:
+        ROpc = GET_ND_IF_ENABLED(X86::AND64rr);
+        MOpc = GET_ND_IF_ENABLED(X86::AND64rm);
+        break;
+      case ISD::OR:
+        ROpc = GET_ND_IF_ENABLED(X86::OR64rr);
+        MOpc = GET_ND_IF_ENABLED(X86::OR64rm);
+        break;
+      case ISD::XOR:
+        ROpc = GET_ND_IF_ENABLED(X86::XOR64rr);
+        MOpc = GET_ND_IF_ENABLED(X86::XOR64rm);
+        break;
       }
       break;
     }
@@ -5918,7 +5981,7 @@ void X86DAGToDAGISel::Select(SDNode *Node) {
           // If the mask covers the most significant bit, then we can replace
           // TEST+AND with a SHR and check eflags.
           // This emits a redundant TEST which is subsequently eliminated.
-          ShiftOpcode = X86::SHR64ri;
+          ShiftOpcode = GET_ND_IF_ENABLED(X86::SHR64ri);
           ShiftAmt = TrailingZeros;
           SubRegIdx = 0;
           TestOpcode = X86::TEST64rr;
@@ -5926,7 +5989,7 @@ void X86DAGToDAGISel::Select(SDNode *Node) {
           // If the mask covers the least significant bit, then we can replace
           // TEST+AND with a SHL and check eflags.
           // This emits a redundant TEST which is subsequently eliminated.
-          ShiftOpcode = X86::SHL64ri;
+          ShiftOpcode = GET_ND_IF_ENABLED(X86::SHL64ri);
           ShiftAmt = LeadingZeros;
           SubRegIdx = 0;
           TestOpcode = X86::TEST64rr;
@@ -5935,19 +5998,19 @@ void X86DAGToDAGISel::Select(SDNode *Node) {
           // wide, then replace it with a SHR and a TEST8rr/TEST16rr/TEST32rr.
           unsigned PopCount = 64 - LeadingZeros - TrailingZeros;
           if (PopCount == 8) {
-            ShiftOpcode = X86::SHR64ri;
+            ShiftOpcode = GET_ND_IF_ENABLED(X86::SHR64ri);
             ShiftAmt = TrailingZeros;
             SubRegIdx = X86::sub_8bit;
             SubRegVT = MVT::i8;
             TestOpcode = X86::TEST8rr;
           } else if (PopCount == 16) {
-            ShiftOpcode = X86::SHR64ri;
+            ShiftOpcode = GET_ND_IF_ENABLED(X86::SHR64ri);
             ShiftAmt = TrailingZeros;
             SubRegIdx = X86::sub_16bit;
             SubRegVT = MVT::i16;
             TestOpcode = X86::TEST16rr;
           } else if (PopCount == 32) {
-            ShiftOpcode = X86::SHR64ri;
+            ShiftOpcode = GET_ND_IF_ENABLED(X86::SHR64ri);
             ShiftAmt = TrailingZeros;
             SubRegIdx = X86::sub_32bit;
             SubRegVT = MVT::i32;

diff  --git a/llvm/test/CodeGen/X86/cmp.ll b/llvm/test/CodeGen/X86/cmp.ll
index e6ab3ec55ad92ed..89879c7f433644a 100644
--- a/llvm/test/CodeGen/X86/cmp.ll
+++ b/llvm/test/CodeGen/X86/cmp.ll
@@ -1,5 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -show-mc-encoding | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+ndd -show-mc-encoding | FileCheck --check-prefix=NDD %s
 
 @d = dso_local global i8 0, align 1
 @d64 = dso_local global i64 0
@@ -16,6 +17,18 @@ define i32 @test1(i32 %X, ptr %y) nounwind {
 ; CHECK-NEXT:  .LBB0_2: # %ReturnBlock
 ; CHECK-NEXT:    xorl %eax, %eax # encoding: [0x31,0xc0]
 ; CHECK-NEXT:    retq # encoding: [0xc3]
+;
+; NDD-LABEL: test1:
+; NDD:       # %bb.0: # %entry
+; NDD-NEXT:    cmpl $0, (%rsi) # encoding: [0x83,0x3e,0x00]
+; NDD-NEXT:    je .LBB0_2 # encoding: [0x74,A]
+; NDD-NEXT:    # fixup A - offset: 1, value: .LBB0_2-1, kind: FK_PCRel_1
+; NDD-NEXT:  # %bb.1: # %cond_true
+; NDD-NEXT:    movl $1, %eax # encoding: [0xb8,0x01,0x00,0x00,0x00]
+; NDD-NEXT:    retq # encoding: [0xc3]
+; NDD-NEXT:  .LBB0_2: # %ReturnBlock
+; NDD-NEXT:    xorl %eax, %eax # encoding: [0x31,0xc0]
+; NDD-NEXT:    retq # encoding: [0xc3]
 entry:
   %tmp = load i32, ptr %y
   %tmp.upgrd.1 = icmp eq i32 %tmp, 0
@@ -41,6 +54,19 @@ define i32 @test2(i32 %X, ptr %y) nounwind {
 ; CHECK-NEXT:  .LBB1_2: # %ReturnBlock
 ; CHECK-NEXT:    xorl %eax, %eax # encoding: [0x31,0xc0]
 ; CHECK-NEXT:    retq # encoding: [0xc3]
+;
+; NDD-LABEL: test2:
+; NDD:       # %bb.0: # %entry
+; NDD-NEXT:    testl $536870911, (%rsi) # encoding: [0xf7,0x06,0xff,0xff,0xff,0x1f]
+; NDD-NEXT:    # imm = 0x1FFFFFFF
+; NDD-NEXT:    je .LBB1_2 # encoding: [0x74,A]
+; NDD-NEXT:    # fixup A - offset: 1, value: .LBB1_2-1, kind: FK_PCRel_1
+; NDD-NEXT:  # %bb.1: # %cond_true
+; NDD-NEXT:    movl $1, %eax # encoding: [0xb8,0x01,0x00,0x00,0x00]
+; NDD-NEXT:    retq # encoding: [0xc3]
+; NDD-NEXT:  .LBB1_2: # %ReturnBlock
+; NDD-NEXT:    xorl %eax, %eax # encoding: [0x31,0xc0]
+; NDD-NEXT:    retq # encoding: [0xc3]
 entry:
   %tmp = load i32, ptr %y
   %tmp1 = shl i32 %tmp, 3
@@ -66,6 +92,18 @@ define i8 @test2b(i8 %X, ptr %y) nounwind {
 ; CHECK-NEXT:  .LBB2_2: # %ReturnBlock
 ; CHECK-NEXT:    xorl %eax, %eax # encoding: [0x31,0xc0]
 ; CHECK-NEXT:    retq # encoding: [0xc3]
+;
+; NDD-LABEL: test2b:
+; NDD:       # %bb.0: # %entry
+; NDD-NEXT:    testb $31, (%rsi) # encoding: [0xf6,0x06,0x1f]
+; NDD-NEXT:    je .LBB2_2 # encoding: [0x74,A]
+; NDD-NEXT:    # fixup A - offset: 1, value: .LBB2_2-1, kind: FK_PCRel_1
+; NDD-NEXT:  # %bb.1: # %cond_true
+; NDD-NEXT:    movb $1, %al # encoding: [0xb0,0x01]
+; NDD-NEXT:    retq # encoding: [0xc3]
+; NDD-NEXT:  .LBB2_2: # %ReturnBlock
+; NDD-NEXT:    xorl %eax, %eax # encoding: [0x31,0xc0]
+; NDD-NEXT:    retq # encoding: [0xc3]
 entry:
   %tmp = load i8, ptr %y
   %tmp1 = shl i8 %tmp, 3
@@ -86,6 +124,13 @@ define i64 @test3(i64 %x) nounwind {
 ; CHECK-NEXT:    testq %rdi, %rdi # encoding: [0x48,0x85,0xff]
 ; CHECK-NEXT:    sete %al # encoding: [0x0f,0x94,0xc0]
 ; CHECK-NEXT:    retq # encoding: [0xc3]
+;
+; NDD-LABEL: test3:
+; NDD:       # %bb.0: # %entry
+; NDD-NEXT:    xorl %eax, %eax # encoding: [0x31,0xc0]
+; NDD-NEXT:    testq %rdi, %rdi # encoding: [0x48,0x85,0xff]
+; NDD-NEXT:    sete %al # encoding: [0x0f,0x94,0xc0]
+; NDD-NEXT:    retq # encoding: [0xc3]
 entry:
   %t = icmp eq i64 %x, 0
   %r = zext i1 %t to i64
@@ -99,6 +144,13 @@ define i64 @test4(i64 %x) nounwind {
 ; CHECK-NEXT:    testq %rdi, %rdi # encoding: [0x48,0x85,0xff]
 ; CHECK-NEXT:    setle %al # encoding: [0x0f,0x9e,0xc0]
 ; CHECK-NEXT:    retq # encoding: [0xc3]
+;
+; NDD-LABEL: test4:
+; NDD:       # %bb.0:
+; NDD-NEXT:    xorl %eax, %eax # encoding: [0x31,0xc0]
+; NDD-NEXT:    testq %rdi, %rdi # encoding: [0x48,0x85,0xff]
+; NDD-NEXT:    setle %al # encoding: [0x0f,0x9e,0xc0]
+; NDD-NEXT:    retq # encoding: [0xc3]
   %t = icmp slt i64 %x, 1
   %r = zext i1 %t to i64
   ret i64 %r
@@ -124,6 +176,26 @@ define i32 @test5(double %A) nounwind {
 ; CHECK-NEXT:    jmp foo at PLT # TAILCALL
 ; CHECK-NEXT:    # encoding: [0xeb,A]
 ; CHECK-NEXT:    # fixup A - offset: 1, value: foo at PLT-1, kind: FK_PCRel_1
+;
+; NDD-LABEL: test5:
+; NDD:       # %bb.0: # %entry
+; NDD-NEXT:    ucomisd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # encoding: [0x66,0x0f,0x2e,0x05,A,A,A,A]
+; NDD-NEXT:    # fixup A - offset: 4, value: {{\.?LCPI[0-9]+_[0-9]+}}-4, kind: reloc_riprel_4byte
+; NDD-NEXT:    ja .LBB5_3 # encoding: [0x77,A]
+; NDD-NEXT:    # fixup A - offset: 1, value: .LBB5_3-1, kind: FK_PCRel_1
+; NDD-NEXT:  # %bb.1: # %entry
+; NDD-NEXT:    ucomisd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # encoding: [0x66,0x0f,0x2e,0x05,A,A,A,A]
+; NDD-NEXT:    # fixup A - offset: 4, value: {{\.?LCPI[0-9]+_[0-9]+}}-4, kind: reloc_riprel_4byte
+; NDD-NEXT:    jb .LBB5_3 # encoding: [0x72,A]
+; NDD-NEXT:    # fixup A - offset: 1, value: .LBB5_3-1, kind: FK_PCRel_1
+; NDD-NEXT:  # %bb.2: # %bb12
+; NDD-NEXT:    movl $32, %eax # encoding: [0xb8,0x20,0x00,0x00,0x00]
+; NDD-NEXT:    retq # encoding: [0xc3]
+; NDD-NEXT:  .LBB5_3: # %bb8
+; NDD-NEXT:    xorl %eax, %eax # encoding: [0x31,0xc0]
+; NDD-NEXT:    jmp foo at PLT # TAILCALL
+; NDD-NEXT:    # encoding: [0xeb,A]
+; NDD-NEXT:    # fixup A - offset: 1, value: foo at PLT-1, kind: FK_PCRel_1
 entry:
   %tmp2 = fcmp ogt double %A, 1.500000e+02
   %tmp5 = fcmp ult double %A, 7.500000e+01
@@ -152,6 +224,18 @@ define i32 @test6() nounwind align 2 {
 ; CHECK-NEXT:  .LBB6_1: # %T
 ; CHECK-NEXT:    movl $1, %eax # encoding: [0xb8,0x01,0x00,0x00,0x00]
 ; CHECK-NEXT:    retq # encoding: [0xc3]
+;
+; NDD-LABEL: test6:
+; NDD:       # %bb.0: # %entry
+; NDD-NEXT:    cmpq $0, -{{[0-9]+}}(%rsp) # encoding: [0x48,0x83,0x7c,0x24,0xf8,0x00]
+; NDD-NEXT:    je .LBB6_1 # encoding: [0x74,A]
+; NDD-NEXT:    # fixup A - offset: 1, value: .LBB6_1-1, kind: FK_PCRel_1
+; NDD-NEXT:  # %bb.2: # %F
+; NDD-NEXT:    xorl %eax, %eax # encoding: [0x31,0xc0]
+; NDD-NEXT:    retq # encoding: [0xc3]
+; NDD-NEXT:  .LBB6_1: # %T
+; NDD-NEXT:    movl $1, %eax # encoding: [0xb8,0x01,0x00,0x00,0x00]
+; NDD-NEXT:    retq # encoding: [0xc3]
 entry:
   %A = alloca { i64, i64 }, align 8
   %B = getelementptr inbounds { i64, i64 }, ptr %A, i64 0, i32 1
@@ -173,6 +257,13 @@ define i32 @test7(i64 %res) nounwind {
 ; CHECK-NEXT:    shrq $32, %rdi # encoding: [0x48,0xc1,0xef,0x20]
 ; CHECK-NEXT:    sete %al # encoding: [0x0f,0x94,0xc0]
 ; CHECK-NEXT:    retq # encoding: [0xc3]
+;
+; NDD-LABEL: test7:
+; NDD:       # %bb.0: # %entry
+; NDD-NEXT:    xorl %eax, %eax # encoding: [0x31,0xc0]
+; NDD-NEXT:    shrq $32, %rdi, %rcx # encoding: [0x62,0xf4,0xf4,0x18,0xc1,0xef,0x20]
+; NDD-NEXT:    sete %al # encoding: [0x0f,0x94,0xc0]
+; NDD-NEXT:    retq # encoding: [0xc3]
 entry:
   %lnot = icmp ult i64 %res, 4294967296
   %lnot.ext = zext i1 %lnot to i32
@@ -187,6 +278,14 @@ define i32 @test8(i64 %res) nounwind {
 ; CHECK-NEXT:    cmpl $3, %edi # encoding: [0x83,0xff,0x03]
 ; CHECK-NEXT:    setb %al # encoding: [0x0f,0x92,0xc0]
 ; CHECK-NEXT:    retq # encoding: [0xc3]
+;
+; NDD-LABEL: test8:
+; NDD:       # %bb.0:
+; NDD-NEXT:    shrq $32, %rdi, %rcx # encoding: [0x62,0xf4,0xf4,0x18,0xc1,0xef,0x20]
+; NDD-NEXT:    xorl %eax, %eax # encoding: [0x31,0xc0]
+; NDD-NEXT:    cmpl $3, %ecx # encoding: [0x83,0xf9,0x03]
+; NDD-NEXT:    setb %al # encoding: [0x0f,0x92,0xc0]
+; NDD-NEXT:    retq # encoding: [0xc3]
   %lnot = icmp ult i64 %res, 12884901888
   %lnot.ext = zext i1 %lnot to i32
   ret i32 %lnot.ext
@@ -199,6 +298,13 @@ define i32 @test9(i64 %res) nounwind {
 ; CHECK-NEXT:    shrq $33, %rdi # encoding: [0x48,0xc1,0xef,0x21]
 ; CHECK-NEXT:    sete %al # encoding: [0x0f,0x94,0xc0]
 ; CHECK-NEXT:    retq # encoding: [0xc3]
+;
+; NDD-LABEL: test9:
+; NDD:       # %bb.0:
+; NDD-NEXT:    xorl %eax, %eax # encoding: [0x31,0xc0]
+; NDD-NEXT:    shrq $33, %rdi, %rcx # encoding: [0x62,0xf4,0xf4,0x18,0xc1,0xef,0x21]
+; NDD-NEXT:    sete %al # encoding: [0x0f,0x94,0xc0]
+; NDD-NEXT:    retq # encoding: [0xc3]
   %lnot = icmp ult i64 %res, 8589934592
   %lnot.ext = zext i1 %lnot to i32
   ret i32 %lnot.ext
@@ -211,6 +317,13 @@ define i32 @test10(i64 %res) nounwind {
 ; CHECK-NEXT:    shrq $32, %rdi # encoding: [0x48,0xc1,0xef,0x20]
 ; CHECK-NEXT:    setne %al # encoding: [0x0f,0x95,0xc0]
 ; CHECK-NEXT:    retq # encoding: [0xc3]
+;
+; NDD-LABEL: test10:
+; NDD:       # %bb.0:
+; NDD-NEXT:    xorl %eax, %eax # encoding: [0x31,0xc0]
+; NDD-NEXT:    shrq $32, %rdi, %rcx # encoding: [0x62,0xf4,0xf4,0x18,0xc1,0xef,0x20]
+; NDD-NEXT:    setne %al # encoding: [0x0f,0x95,0xc0]
+; NDD-NEXT:    retq # encoding: [0xc3]
   %lnot = icmp uge i64 %res, 4294967296
   %lnot.ext = zext i1 %lnot to i32
   ret i32 %lnot.ext
@@ -224,6 +337,14 @@ define i32 @test11(i64 %l) nounwind {
 ; CHECK-NEXT:    cmpl $1, %edi # encoding: [0x83,0xff,0x01]
 ; CHECK-NEXT:    sete %al # encoding: [0x0f,0x94,0xc0]
 ; CHECK-NEXT:    retq # encoding: [0xc3]
+;
+; NDD-LABEL: test11:
+; NDD:       # %bb.0:
+; NDD-NEXT:    shrq $47, %rdi, %rcx # encoding: [0x62,0xf4,0xf4,0x18,0xc1,0xef,0x2f]
+; NDD-NEXT:    xorl %eax, %eax # encoding: [0x31,0xc0]
+; NDD-NEXT:    cmpl $1, %ecx # encoding: [0x83,0xf9,0x01]
+; NDD-NEXT:    sete %al # encoding: [0x0f,0x94,0xc0]
+; NDD-NEXT:    retq # encoding: [0xc3]
   %shr.mask = and i64 %l, -140737488355328
   %cmp = icmp eq i64 %shr.mask, 140737488355328
   %conv = zext i1 %cmp to i32
@@ -251,6 +372,27 @@ define i32 @test12() ssp uwtable {
 ; CHECK-NEXT:    popq %rcx # encoding: [0x59]
 ; CHECK-NEXT:    .cfi_def_cfa_offset 8
 ; CHECK-NEXT:    retq # encoding: [0xc3]
+;
+; NDD-LABEL: test12:
+; NDD:       # %bb.0: # %entry
+; NDD-NEXT:    pushq %rax # encoding: [0x50]
+; NDD-NEXT:    .cfi_def_cfa_offset 16
+; NDD-NEXT:    callq test12b at PLT # encoding: [0xe8,A,A,A,A]
+; NDD-NEXT:    # fixup A - offset: 1, value: test12b at PLT-4, kind: FK_PCRel_4
+; NDD-NEXT:    testb %al, %al # encoding: [0x84,0xc0]
+; NDD-NEXT:    je .LBB12_2 # encoding: [0x74,A]
+; NDD-NEXT:    # fixup A - offset: 1, value: .LBB12_2-1, kind: FK_PCRel_1
+; NDD-NEXT:  # %bb.1: # %T
+; NDD-NEXT:    movl $1, %eax # encoding: [0xb8,0x01,0x00,0x00,0x00]
+; NDD-NEXT:    popq %rcx # encoding: [0x59]
+; NDD-NEXT:    .cfi_def_cfa_offset 8
+; NDD-NEXT:    retq # encoding: [0xc3]
+; NDD-NEXT:  .LBB12_2: # %F
+; NDD-NEXT:    .cfi_def_cfa_offset 16
+; NDD-NEXT:    movl $2, %eax # encoding: [0xb8,0x02,0x00,0x00,0x00]
+; NDD-NEXT:    popq %rcx # encoding: [0x59]
+; NDD-NEXT:    .cfi_def_cfa_offset 8
+; NDD-NEXT:    retq # encoding: [0xc3]
 entry:
   %tmp1 = call zeroext i1 @test12b()
   br i1 %tmp1, label %T, label %F
@@ -271,6 +413,13 @@ define i32 @test13(i32 %mask, i32 %base, i32 %intra) {
 ; CHECK-NEXT:    testb $8, %dil # encoding: [0x40,0xf6,0xc7,0x08]
 ; CHECK-NEXT:    cmovnel %edx, %eax # encoding: [0x0f,0x45,0xc2]
 ; CHECK-NEXT:    retq # encoding: [0xc3]
+;
+; NDD-LABEL: test13:
+; NDD:       # %bb.0:
+; NDD-NEXT:    movl %esi, %eax # encoding: [0x89,0xf0]
+; NDD-NEXT:    testb $8, %dil # encoding: [0x40,0xf6,0xc7,0x08]
+; NDD-NEXT:    cmovnel %edx, %eax # encoding: [0x0f,0x45,0xc2]
+; NDD-NEXT:    retq # encoding: [0xc3]
   %and = and i32 %mask, 8
   %tobool = icmp ne i32 %and, 0
   %cond = select i1 %tobool, i32 %intra, i32 %base
@@ -284,6 +433,13 @@ define i32 @test14(i32 %mask, i32 %base, i32 %intra) {
 ; CHECK-NEXT:    shrl $7, %edi # encoding: [0xc1,0xef,0x07]
 ; CHECK-NEXT:    cmovnsl %edx, %eax # encoding: [0x0f,0x49,0xc2]
 ; CHECK-NEXT:    retq # encoding: [0xc3]
+;
+; NDD-LABEL: test14:
+; NDD:       # %bb.0:
+; NDD-NEXT:    movl %esi, %eax # encoding: [0x89,0xf0]
+; NDD-NEXT:    shrl $7, %edi, %ecx # encoding: [0x62,0xf4,0x74,0x18,0xc1,0xef,0x07]
+; NDD-NEXT:    cmovnsl %edx, %eax # encoding: [0x0f,0x49,0xc2]
+; NDD-NEXT:    retq # encoding: [0xc3]
   %s = lshr i32 %mask, 7
   %tobool = icmp sgt i32 %s, -1
   %cond = select i1 %tobool, i32 %intra, i32 %base
@@ -300,6 +456,15 @@ define zeroext i1 @test15(i32 %bf.load, i32 %n) {
 ; CHECK-NEXT:    setae %al # encoding: [0x0f,0x93,0xc0]
 ; CHECK-NEXT:    orb %cl, %al # encoding: [0x08,0xc8]
 ; CHECK-NEXT:    retq # encoding: [0xc3]
+;
+; NDD-LABEL: test15:
+; NDD:       # %bb.0:
+; NDD-NEXT:    shrl $16, %edi, %eax # encoding: [0x62,0xf4,0x7c,0x18,0xc1,0xef,0x10]
+; NDD-NEXT:    sete %cl # encoding: [0x0f,0x94,0xc1]
+; NDD-NEXT:    cmpl %esi, %eax # encoding: [0x39,0xf0]
+; NDD-NEXT:    setae %al # encoding: [0x0f,0x93,0xc0]
+; NDD-NEXT:    orb %cl, %al # EVEX TO LEGACY Compression encoding: [0x08,0xc8]
+; NDD-NEXT:    retq # encoding: [0xc3]
   %bf.lshr = lshr i32 %bf.load, 16
   %cmp2 = icmp eq i32 %bf.lshr, 0
   %cmp5 = icmp uge i32 %bf.lshr, %n
@@ -313,6 +478,12 @@ define i8 @signbit_i16(i16 signext %L) {
 ; CHECK-NEXT:    testw %di, %di # encoding: [0x66,0x85,0xff]
 ; CHECK-NEXT:    setns %al # encoding: [0x0f,0x99,0xc0]
 ; CHECK-NEXT:    retq # encoding: [0xc3]
+;
+; NDD-LABEL: signbit_i16:
+; NDD:       # %bb.0:
+; NDD-NEXT:    testw %di, %di # encoding: [0x66,0x85,0xff]
+; NDD-NEXT:    setns %al # encoding: [0x0f,0x99,0xc0]
+; NDD-NEXT:    retq # encoding: [0xc3]
   %lshr = lshr i16 %L, 15
   %trunc = trunc i16 %lshr to i8
   %not = xor i8 %trunc, 1
@@ -325,6 +496,12 @@ define i8 @signbit_i32(i32 %L) {
 ; CHECK-NEXT:    testl %edi, %edi # encoding: [0x85,0xff]
 ; CHECK-NEXT:    setns %al # encoding: [0x0f,0x99,0xc0]
 ; CHECK-NEXT:    retq # encoding: [0xc3]
+;
+; NDD-LABEL: signbit_i32:
+; NDD:       # %bb.0:
+; NDD-NEXT:    testl %edi, %edi # encoding: [0x85,0xff]
+; NDD-NEXT:    setns %al # encoding: [0x0f,0x99,0xc0]
+; NDD-NEXT:    retq # encoding: [0xc3]
   %lshr = lshr i32 %L, 31
   %trunc = trunc i32 %lshr to i8
   %not = xor i8 %trunc, 1
@@ -337,6 +514,12 @@ define i8 @signbit_i64(i64 %L) {
 ; CHECK-NEXT:    testq %rdi, %rdi # encoding: [0x48,0x85,0xff]
 ; CHECK-NEXT:    setns %al # encoding: [0x0f,0x99,0xc0]
 ; CHECK-NEXT:    retq # encoding: [0xc3]
+;
+; NDD-LABEL: signbit_i64:
+; NDD:       # %bb.0:
+; NDD-NEXT:    testq %rdi, %rdi # encoding: [0x48,0x85,0xff]
+; NDD-NEXT:    setns %al # encoding: [0x0f,0x99,0xc0]
+; NDD-NEXT:    retq # encoding: [0xc3]
   %lshr = lshr i64 %L, 63
   %trunc = trunc i64 %lshr to i8
   %not = xor i8 %trunc, 1
@@ -349,6 +532,12 @@ define zeroext i1 @signbit_i32_i1(i32 %L) {
 ; CHECK-NEXT:    testl %edi, %edi # encoding: [0x85,0xff]
 ; CHECK-NEXT:    setns %al # encoding: [0x0f,0x99,0xc0]
 ; CHECK-NEXT:    retq # encoding: [0xc3]
+;
+; NDD-LABEL: signbit_i32_i1:
+; NDD:       # %bb.0:
+; NDD-NEXT:    testl %edi, %edi # encoding: [0x85,0xff]
+; NDD-NEXT:    setns %al # encoding: [0x0f,0x99,0xc0]
+; NDD-NEXT:    retq # encoding: [0xc3]
   %lshr = lshr i32 %L, 31
   %trunc = trunc i32 %lshr to i1
   %not = xor i1 %trunc, true
@@ -371,6 +560,21 @@ define void @test20(i32 %bf.load, i8 %x1, ptr %b_addr) {
 ; CHECK-NEXT:    setne d(%rip) # encoding: [0x0f,0x95,0x05,A,A,A,A]
 ; CHECK-NEXT:    # fixup A - offset: 3, value: d-4, kind: reloc_riprel_4byte
 ; CHECK-NEXT:    retq # encoding: [0xc3]
+;
+; NDD-LABEL: test20:
+; NDD:       # %bb.0:
+; NDD-NEXT:    xorl %eax, %eax # encoding: [0x31,0xc0]
+; NDD-NEXT:    testl $16777215, %edi # encoding: [0xf7,0xc7,0xff,0xff,0xff,0x00]
+; NDD-NEXT:    # imm = 0xFFFFFF
+; NDD-NEXT:    setne %al # encoding: [0x0f,0x95,0xc0]
+; NDD-NEXT:    movzbl %sil, %ecx # encoding: [0x40,0x0f,0xb6,0xce]
+; NDD-NEXT:    addl %ecx, %eax # EVEX TO LEGACY Compression encoding: [0x01,0xc8]
+; NDD-NEXT:    setne (%rdx) # encoding: [0x0f,0x95,0x02]
+; NDD-NEXT:    testl $16777215, %edi # encoding: [0xf7,0xc7,0xff,0xff,0xff,0x00]
+; NDD-NEXT:    # imm = 0xFFFFFF
+; NDD-NEXT:    setne d(%rip) # encoding: [0x0f,0x95,0x05,A,A,A,A]
+; NDD-NEXT:    # fixup A - offset: 3, value: d-4, kind: reloc_riprel_4byte
+; NDD-NEXT:    retq # encoding: [0xc3]
   %bf.shl = shl i32 %bf.load, 8
   %bf.ashr = ashr exact i32 %bf.shl, 8
   %tobool4 = icmp ne i32 %bf.ashr, 0
@@ -391,6 +595,11 @@ define i32 @highmask_i64_simplify(i64 %val) {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    xorl %eax, %eax # encoding: [0x31,0xc0]
 ; CHECK-NEXT:    retq # encoding: [0xc3]
+;
+; NDD-LABEL: highmask_i64_simplify:
+; NDD:       # %bb.0:
+; NDD-NEXT:    xorl %eax, %eax # encoding: [0x31,0xc0]
+; NDD-NEXT:    retq # encoding: [0xc3]
   %and = and i64 %val, -2199023255552
   %cmp = icmp ult i64 %and, 0
   %ret = zext i1 %cmp to i32
@@ -404,6 +613,13 @@ define i32 @highmask_i64_mask64(i64 %val) {
 ; CHECK-NEXT:    shrq $41, %rdi # encoding: [0x48,0xc1,0xef,0x29]
 ; CHECK-NEXT:    setne %al # encoding: [0x0f,0x95,0xc0]
 ; CHECK-NEXT:    retq # encoding: [0xc3]
+;
+; NDD-LABEL: highmask_i64_mask64:
+; NDD:       # %bb.0:
+; NDD-NEXT:    xorl %eax, %eax # encoding: [0x31,0xc0]
+; NDD-NEXT:    shrq $41, %rdi, %rcx # encoding: [0x62,0xf4,0xf4,0x18,0xc1,0xef,0x29]
+; NDD-NEXT:    setne %al # encoding: [0x0f,0x95,0xc0]
+; NDD-NEXT:    retq # encoding: [0xc3]
   %and = and i64 %val, -2199023255552
   %cmp = icmp ne i64 %and, 0
   %ret = zext i1 %cmp to i32
@@ -419,6 +635,14 @@ define i64 @highmask_i64_mask64_extra_use(i64 %val) nounwind {
 ; CHECK-NEXT:    setne %al # encoding: [0x0f,0x95,0xc0]
 ; CHECK-NEXT:    imulq %rdi, %rax # encoding: [0x48,0x0f,0xaf,0xc7]
 ; CHECK-NEXT:    retq # encoding: [0xc3]
+;
+; NDD-LABEL: highmask_i64_mask64_extra_use:
+; NDD:       # %bb.0:
+; NDD-NEXT:    xorl %eax, %eax # encoding: [0x31,0xc0]
+; NDD-NEXT:    shrq $41, %rdi, %rcx # encoding: [0x62,0xf4,0xf4,0x18,0xc1,0xef,0x29]
+; NDD-NEXT:    setne %al # encoding: [0x0f,0x95,0xc0]
+; NDD-NEXT:    imulq %rdi, %rax # EVEX TO LEGACY Compression encoding: [0x48,0x0f,0xaf,0xc7]
+; NDD-NEXT:    retq # encoding: [0xc3]
   %and = and i64 %val, -2199023255552
   %cmp = icmp ne i64 %and, 0
   %z = zext i1 %cmp to i64
@@ -433,6 +657,13 @@ define i32 @highmask_i64_mask32(i64 %val) {
 ; CHECK-NEXT:    shrq $20, %rdi # encoding: [0x48,0xc1,0xef,0x14]
 ; CHECK-NEXT:    sete %al # encoding: [0x0f,0x94,0xc0]
 ; CHECK-NEXT:    retq # encoding: [0xc3]
+;
+; NDD-LABEL: highmask_i64_mask32:
+; NDD:       # %bb.0:
+; NDD-NEXT:    xorl %eax, %eax # encoding: [0x31,0xc0]
+; NDD-NEXT:    shrq $20, %rdi, %rcx # encoding: [0x62,0xf4,0xf4,0x18,0xc1,0xef,0x14]
+; NDD-NEXT:    sete %al # encoding: [0x0f,0x94,0xc0]
+; NDD-NEXT:    retq # encoding: [0xc3]
   %and = and i64 %val, -1048576
   %cmp = icmp eq i64 %and, 0
   %ret = zext i1 %cmp to i32
@@ -448,6 +679,15 @@ define i64 @highmask_i64_mask32_extra_use(i64 %val) nounwind {
 ; CHECK-NEXT:    sete %al # encoding: [0x0f,0x94,0xc0]
 ; CHECK-NEXT:    imulq %rdi, %rax # encoding: [0x48,0x0f,0xaf,0xc7]
 ; CHECK-NEXT:    retq # encoding: [0xc3]
+;
+; NDD-LABEL: highmask_i64_mask32_extra_use:
+; NDD:       # %bb.0:
+; NDD-NEXT:    xorl %eax, %eax # encoding: [0x31,0xc0]
+; NDD-NEXT:    testq $-1048576, %rdi # encoding: [0x48,0xf7,0xc7,0x00,0x00,0xf0,0xff]
+; NDD-NEXT:    # imm = 0xFFF00000
+; NDD-NEXT:    sete %al # encoding: [0x0f,0x94,0xc0]
+; NDD-NEXT:    imulq %rdi, %rax # EVEX TO LEGACY Compression encoding: [0x48,0x0f,0xaf,0xc7]
+; NDD-NEXT:    retq # encoding: [0xc3]
   %and = and i64 %val, -1048576
   %cmp = icmp eq i64 %and, 0
   %z = zext i1 %cmp to i64
@@ -462,6 +702,13 @@ define i32 @highmask_i64_mask8(i64 %val) {
 ; CHECK-NEXT:    testq $-16, %rdi # encoding: [0x48,0xf7,0xc7,0xf0,0xff,0xff,0xff]
 ; CHECK-NEXT:    setne %al # encoding: [0x0f,0x95,0xc0]
 ; CHECK-NEXT:    retq # encoding: [0xc3]
+;
+; NDD-LABEL: highmask_i64_mask8:
+; NDD:       # %bb.0:
+; NDD-NEXT:    xorl %eax, %eax # encoding: [0x31,0xc0]
+; NDD-NEXT:    testq $-16, %rdi # encoding: [0x48,0xf7,0xc7,0xf0,0xff,0xff,0xff]
+; NDD-NEXT:    setne %al # encoding: [0x0f,0x95,0xc0]
+; NDD-NEXT:    retq # encoding: [0xc3]
   %and = and i64 %val, -16
   %cmp = icmp ne i64 %and, 0
   %ret = zext i1 %cmp to i32
@@ -475,6 +722,13 @@ define i32 @lowmask_i64_mask64(i64 %val) {
 ; CHECK-NEXT:    shlq $16, %rdi # encoding: [0x48,0xc1,0xe7,0x10]
 ; CHECK-NEXT:    sete %al # encoding: [0x0f,0x94,0xc0]
 ; CHECK-NEXT:    retq # encoding: [0xc3]
+;
+; NDD-LABEL: lowmask_i64_mask64:
+; NDD:       # %bb.0:
+; NDD-NEXT:    xorl %eax, %eax # encoding: [0x31,0xc0]
+; NDD-NEXT:    shlq $16, %rdi, %rcx # encoding: [0x62,0xf4,0xf4,0x18,0xc1,0xe7,0x10]
+; NDD-NEXT:    sete %al # encoding: [0x0f,0x94,0xc0]
+; NDD-NEXT:    retq # encoding: [0xc3]
   %and = and i64 %val, 281474976710655
   %cmp = icmp eq i64 %and, 0
   %ret = zext i1 %cmp to i32
@@ -490,6 +744,14 @@ define i64 @lowmask_i64_mask64_extra_use(i64 %val) nounwind {
 ; CHECK-NEXT:    sete %al # encoding: [0x0f,0x94,0xc0]
 ; CHECK-NEXT:    imulq %rdi, %rax # encoding: [0x48,0x0f,0xaf,0xc7]
 ; CHECK-NEXT:    retq # encoding: [0xc3]
+;
+; NDD-LABEL: lowmask_i64_mask64_extra_use:
+; NDD:       # %bb.0:
+; NDD-NEXT:    xorl %eax, %eax # encoding: [0x31,0xc0]
+; NDD-NEXT:    shlq $16, %rdi, %rcx # encoding: [0x62,0xf4,0xf4,0x18,0xc1,0xe7,0x10]
+; NDD-NEXT:    sete %al # encoding: [0x0f,0x94,0xc0]
+; NDD-NEXT:    imulq %rdi, %rax # EVEX TO LEGACY Compression encoding: [0x48,0x0f,0xaf,0xc7]
+; NDD-NEXT:    retq # encoding: [0xc3]
   %and = and i64 %val, 281474976710655
   %cmp = icmp eq i64 %and, 0
   %z = zext i1 %cmp to i64
@@ -504,6 +766,13 @@ define i32 @lowmask_i64_mask32(i64 %val) {
 ; CHECK-NEXT:    shlq $44, %rdi # encoding: [0x48,0xc1,0xe7,0x2c]
 ; CHECK-NEXT:    setne %al # encoding: [0x0f,0x95,0xc0]
 ; CHECK-NEXT:    retq # encoding: [0xc3]
+;
+; NDD-LABEL: lowmask_i64_mask32:
+; NDD:       # %bb.0:
+; NDD-NEXT:    xorl %eax, %eax # encoding: [0x31,0xc0]
+; NDD-NEXT:    shlq $44, %rdi, %rcx # encoding: [0x62,0xf4,0xf4,0x18,0xc1,0xe7,0x2c]
+; NDD-NEXT:    setne %al # encoding: [0x0f,0x95,0xc0]
+; NDD-NEXT:    retq # encoding: [0xc3]
   %and = and i64 %val, 1048575
   %cmp = icmp ne i64 %and, 0
   %ret = zext i1 %cmp to i32
@@ -519,6 +788,15 @@ define i64 @lowmask_i64_mask32_extra_use(i64 %val) nounwind {
 ; CHECK-NEXT:    setne %al # encoding: [0x0f,0x95,0xc0]
 ; CHECK-NEXT:    imulq %rdi, %rax # encoding: [0x48,0x0f,0xaf,0xc7]
 ; CHECK-NEXT:    retq # encoding: [0xc3]
+;
+; NDD-LABEL: lowmask_i64_mask32_extra_use:
+; NDD:       # %bb.0:
+; NDD-NEXT:    xorl %eax, %eax # encoding: [0x31,0xc0]
+; NDD-NEXT:    testl $1048575, %edi # encoding: [0xf7,0xc7,0xff,0xff,0x0f,0x00]
+; NDD-NEXT:    # imm = 0xFFFFF
+; NDD-NEXT:    setne %al # encoding: [0x0f,0x95,0xc0]
+; NDD-NEXT:    imulq %rdi, %rax # EVEX TO LEGACY Compression encoding: [0x48,0x0f,0xaf,0xc7]
+; NDD-NEXT:    retq # encoding: [0xc3]
   %and = and i64 %val, 1048575
   %cmp = icmp ne i64 %and, 0
   %z = zext i1 %cmp to i64
@@ -533,6 +811,13 @@ define i32 @lowmask_i64_mask8(i64 %val) {
 ; CHECK-NEXT:    testb $31, %dil # encoding: [0x40,0xf6,0xc7,0x1f]
 ; CHECK-NEXT:    sete %al # encoding: [0x0f,0x94,0xc0]
 ; CHECK-NEXT:    retq # encoding: [0xc3]
+;
+; NDD-LABEL: lowmask_i64_mask8:
+; NDD:       # %bb.0:
+; NDD-NEXT:    xorl %eax, %eax # encoding: [0x31,0xc0]
+; NDD-NEXT:    testb $31, %dil # encoding: [0x40,0xf6,0xc7,0x1f]
+; NDD-NEXT:    sete %al # encoding: [0x0f,0x94,0xc0]
+; NDD-NEXT:    retq # encoding: [0xc3]
   %and = and i64 %val, 31
   %cmp = icmp eq i64 %and, 0
   %ret = zext i1 %cmp to i32
@@ -547,6 +832,14 @@ define i32 @highmask_i32_mask32(i32 %val) {
 ; CHECK-NEXT:    # imm = 0xFFF00000
 ; CHECK-NEXT:    setne %al # encoding: [0x0f,0x95,0xc0]
 ; CHECK-NEXT:    retq # encoding: [0xc3]
+;
+; NDD-LABEL: highmask_i32_mask32:
+; NDD:       # %bb.0:
+; NDD-NEXT:    xorl %eax, %eax # encoding: [0x31,0xc0]
+; NDD-NEXT:    testl $-1048576, %edi # encoding: [0xf7,0xc7,0x00,0x00,0xf0,0xff]
+; NDD-NEXT:    # imm = 0xFFF00000
+; NDD-NEXT:    setne %al # encoding: [0x0f,0x95,0xc0]
+; NDD-NEXT:    retq # encoding: [0xc3]
   %and = and i32 %val, -1048576
   %cmp = icmp ne i32 %and, 0
   %ret = zext i1 %cmp to i32
@@ -560,6 +853,13 @@ define i32 @highmask_i32_mask8(i32 %val) {
 ; CHECK-NEXT:    testl $-16, %edi # encoding: [0xf7,0xc7,0xf0,0xff,0xff,0xff]
 ; CHECK-NEXT:    sete %al # encoding: [0x0f,0x94,0xc0]
 ; CHECK-NEXT:    retq # encoding: [0xc3]
+;
+; NDD-LABEL: highmask_i32_mask8:
+; NDD:       # %bb.0:
+; NDD-NEXT:    xorl %eax, %eax # encoding: [0x31,0xc0]
+; NDD-NEXT:    testl $-16, %edi # encoding: [0xf7,0xc7,0xf0,0xff,0xff,0xff]
+; NDD-NEXT:    sete %al # encoding: [0x0f,0x94,0xc0]
+; NDD-NEXT:    retq # encoding: [0xc3]
   %and = and i32 %val, -16
   %cmp = icmp eq i32 %and, 0
   %ret = zext i1 %cmp to i32
@@ -574,6 +874,14 @@ define i32 @lowmask_i32_mask32(i32 %val) {
 ; CHECK-NEXT:    # imm = 0xFFFFF
 ; CHECK-NEXT:    sete %al # encoding: [0x0f,0x94,0xc0]
 ; CHECK-NEXT:    retq # encoding: [0xc3]
+;
+; NDD-LABEL: lowmask_i32_mask32:
+; NDD:       # %bb.0:
+; NDD-NEXT:    xorl %eax, %eax # encoding: [0x31,0xc0]
+; NDD-NEXT:    testl $1048575, %edi # encoding: [0xf7,0xc7,0xff,0xff,0x0f,0x00]
+; NDD-NEXT:    # imm = 0xFFFFF
+; NDD-NEXT:    sete %al # encoding: [0x0f,0x94,0xc0]
+; NDD-NEXT:    retq # encoding: [0xc3]
   %and = and i32 %val, 1048575
   %cmp = icmp eq i32 %and, 0
   %ret = zext i1 %cmp to i32
@@ -587,6 +895,13 @@ define i32 @lowmask_i32_mask8(i32 %val) {
 ; CHECK-NEXT:    testb $31, %dil # encoding: [0x40,0xf6,0xc7,0x1f]
 ; CHECK-NEXT:    setne %al # encoding: [0x0f,0x95,0xc0]
 ; CHECK-NEXT:    retq # encoding: [0xc3]
+;
+; NDD-LABEL: lowmask_i32_mask8:
+; NDD:       # %bb.0:
+; NDD-NEXT:    xorl %eax, %eax # encoding: [0x31,0xc0]
+; NDD-NEXT:    testb $31, %dil # encoding: [0x40,0xf6,0xc7,0x1f]
+; NDD-NEXT:    setne %al # encoding: [0x0f,0x95,0xc0]
+; NDD-NEXT:    retq # encoding: [0xc3]
   %and = and i32 %val, 31
   %cmp = icmp ne i32 %and, 0
   %ret = zext i1 %cmp to i32
@@ -600,6 +915,13 @@ define i1 @shifted_mask64_testb(i64 %a) {
 ; CHECK-NEXT:    testb %dil, %dil # encoding: [0x40,0x84,0xff]
 ; CHECK-NEXT:    setne %al # encoding: [0x0f,0x95,0xc0]
 ; CHECK-NEXT:    retq # encoding: [0xc3]
+;
+; NDD-LABEL: shifted_mask64_testb:
+; NDD:       # %bb.0:
+; NDD-NEXT:    shrq $50, %rdi, %rax # encoding: [0x62,0xf4,0xfc,0x18,0xc1,0xef,0x32]
+; NDD-NEXT:    testb %al, %al # encoding: [0x84,0xc0]
+; NDD-NEXT:    setne %al # encoding: [0x0f,0x95,0xc0]
+; NDD-NEXT:    retq # encoding: [0xc3]
   %v0 = and i64 %a, 287104476244869120  ; 0xff << 50
   %v1 = icmp ne i64 %v0, 0
   ret i1 %v1
@@ -612,6 +934,13 @@ define i1 @shifted_mask64_testw(i64 %a) {
 ; CHECK-NEXT:    testw %di, %di # encoding: [0x66,0x85,0xff]
 ; CHECK-NEXT:    setne %al # encoding: [0x0f,0x95,0xc0]
 ; CHECK-NEXT:    retq # encoding: [0xc3]
+;
+; NDD-LABEL: shifted_mask64_testw:
+; NDD:       # %bb.0:
+; NDD-NEXT:    shrq $33, %rdi, %rax # encoding: [0x62,0xf4,0xfc,0x18,0xc1,0xef,0x21]
+; NDD-NEXT:    testw %ax, %ax # encoding: [0x66,0x85,0xc0]
+; NDD-NEXT:    setne %al # encoding: [0x0f,0x95,0xc0]
+; NDD-NEXT:    retq # encoding: [0xc3]
   %v0 = and i64 %a, 562941363486720  ; 0xffff << 33
   %v1 = icmp ne i64 %v0, 0
   ret i1 %v1
@@ -624,6 +953,13 @@ define i1 @shifted_mask64_testl(i64 %a) {
 ; CHECK-NEXT:    testl %edi, %edi # encoding: [0x85,0xff]
 ; CHECK-NEXT:    sete %al # encoding: [0x0f,0x94,0xc0]
 ; CHECK-NEXT:    retq # encoding: [0xc3]
+;
+; NDD-LABEL: shifted_mask64_testl:
+; NDD:       # %bb.0:
+; NDD-NEXT:    shrq $7, %rdi, %rax # encoding: [0x62,0xf4,0xfc,0x18,0xc1,0xef,0x07]
+; NDD-NEXT:    testl %eax, %eax # encoding: [0x85,0xc0]
+; NDD-NEXT:    sete %al # encoding: [0x0f,0x94,0xc0]
+; NDD-NEXT:    retq # encoding: [0xc3]
   %v0 = and i64 %a, 549755813760  ; 0xffffffff << 7
   %v1 = icmp eq i64 %v0, 0
   ret i1 %v1
@@ -639,6 +975,16 @@ define i1 @shifted_mask64_extra_use_const(i64 %a) {
 ; CHECK-NEXT:    movq %rcx, d64(%rip) # encoding: [0x48,0x89,0x0d,A,A,A,A]
 ; CHECK-NEXT:    # fixup A - offset: 3, value: d64-4, kind: reloc_riprel_4byte
 ; CHECK-NEXT:    retq # encoding: [0xc3]
+;
+; NDD-LABEL: shifted_mask64_extra_use_const:
+; NDD:       # %bb.0:
+; NDD-NEXT:    movabsq $287104476244869120, %rcx # encoding: [0x48,0xb9,0x00,0x00,0x00,0x00,0x00,0x00,0xfc,0x03]
+; NDD-NEXT:    # imm = 0x3FC000000000000
+; NDD-NEXT:    andq %rcx, %rdi, %rax # encoding: [0x62,0xf4,0xfc,0x18,0x21,0xcf]
+; NDD-NEXT:    setne %al # encoding: [0x0f,0x95,0xc0]
+; NDD-NEXT:    movq %rcx, d64(%rip) # encoding: [0x48,0x89,0x0d,A,A,A,A]
+; NDD-NEXT:    # fixup A - offset: 3, value: d64-4, kind: reloc_riprel_4byte
+; NDD-NEXT:    retq # encoding: [0xc3]
   %v0 = and i64 %a, 287104476244869120  ; 0xff << 50
   %v1 = icmp ne i64 %v0, 0
   store i64 287104476244869120, ptr @d64
@@ -655,6 +1001,16 @@ define i1 @shifted_mask64_extra_use_and(i64 %a) {
 ; CHECK-NEXT:    movq %rcx, d64(%rip) # encoding: [0x48,0x89,0x0d,A,A,A,A]
 ; CHECK-NEXT:    # fixup A - offset: 3, value: d64-4, kind: reloc_riprel_4byte
 ; CHECK-NEXT:    retq # encoding: [0xc3]
+;
+; NDD-LABEL: shifted_mask64_extra_use_and:
+; NDD:       # %bb.0:
+; NDD-NEXT:    movabsq $287104476244869120, %rax # encoding: [0x48,0xb8,0x00,0x00,0x00,0x00,0x00,0x00,0xfc,0x03]
+; NDD-NEXT:    # imm = 0x3FC000000000000
+; NDD-NEXT:    andq %rax, %rdi, %rcx # encoding: [0x62,0xf4,0xf4,0x18,0x21,0xc7]
+; NDD-NEXT:    setne %al # encoding: [0x0f,0x95,0xc0]
+; NDD-NEXT:    movq %rcx, d64(%rip) # encoding: [0x48,0x89,0x0d,A,A,A,A]
+; NDD-NEXT:    # fixup A - offset: 3, value: d64-4, kind: reloc_riprel_4byte
+; NDD-NEXT:    retq # encoding: [0xc3]
   %v0 = and i64 %a, 287104476244869120  ; 0xff << 50
   %v1 = icmp ne i64 %v0, 0
   store i64 %v0, ptr @d64
@@ -668,6 +1024,13 @@ define i1 @shifted_mask32_testl_immediate(i64 %a) {
 ; CHECK-NEXT:    # imm = 0x3FC0000
 ; CHECK-NEXT:    setne %al # encoding: [0x0f,0x95,0xc0]
 ; CHECK-NEXT:    retq # encoding: [0xc3]
+;
+; NDD-LABEL: shifted_mask32_testl_immediate:
+; NDD:       # %bb.0:
+; NDD-NEXT:    testl $66846720, %edi # encoding: [0xf7,0xc7,0x00,0x00,0xfc,0x03]
+; NDD-NEXT:    # imm = 0x3FC0000
+; NDD-NEXT:    setne %al # encoding: [0x0f,0x95,0xc0]
+; NDD-NEXT:    retq # encoding: [0xc3]
   %v0 = and i64 %a, 66846720  ; 0xff << 18
   %v1 = icmp ne i64 %v0, 0
   ret i1 %v1
@@ -683,6 +1046,16 @@ define i1 @shifted_mask32_extra_use_const(i64 %a) {
 ; CHECK-NEXT:    # fixup A - offset: 3, value: d64-8, kind: reloc_riprel_4byte
 ; CHECK-NEXT:    # imm = 0x3FC0000
 ; CHECK-NEXT:    retq # encoding: [0xc3]
+;
+; NDD-LABEL: shifted_mask32_extra_use_const:
+; NDD:       # %bb.0:
+; NDD-NEXT:    testl $66846720, %edi # encoding: [0xf7,0xc7,0x00,0x00,0xfc,0x03]
+; NDD-NEXT:    # imm = 0x3FC0000
+; NDD-NEXT:    setne %al # encoding: [0x0f,0x95,0xc0]
+; NDD-NEXT:    movq $66846720, d64(%rip) # encoding: [0x48,0xc7,0x05,A,A,A,A,0x00,0x00,0xfc,0x03]
+; NDD-NEXT:    # fixup A - offset: 3, value: d64-8, kind: reloc_riprel_4byte
+; NDD-NEXT:    # imm = 0x3FC0000
+; NDD-NEXT:    retq # encoding: [0xc3]
   %v0 = and i64 %a, 66846720  ; 0xff << 18
   %v1 = icmp ne i64 %v0, 0
   store i64 66846720, ptr @d64
@@ -698,6 +1071,15 @@ define i1 @shifted_mask32_extra_use_and(i64 %a) {
 ; CHECK-NEXT:    movq %rdi, d64(%rip) # encoding: [0x48,0x89,0x3d,A,A,A,A]
 ; CHECK-NEXT:    # fixup A - offset: 3, value: d64-4, kind: reloc_riprel_4byte
 ; CHECK-NEXT:    retq # encoding: [0xc3]
+;
+; NDD-LABEL: shifted_mask32_extra_use_and:
+; NDD:       # %bb.0:
+; NDD-NEXT:    andq $66846720, %rdi, %rcx # encoding: [0x62,0xf4,0xf4,0x18,0x81,0xe7,0x00,0x00,0xfc,0x03]
+; NDD-NEXT:    # imm = 0x3FC0000
+; NDD-NEXT:    setne %al # encoding: [0x0f,0x95,0xc0]
+; NDD-NEXT:    movq %rcx, d64(%rip) # encoding: [0x48,0x89,0x0d,A,A,A,A]
+; NDD-NEXT:    # fixup A - offset: 3, value: d64-4, kind: reloc_riprel_4byte
+; NDD-NEXT:    retq # encoding: [0xc3]
   %v0 = and i64 %a, 66846720  ; 0xff << 50
   %v1 = icmp ne i64 %v0, 0
   store i64 %v0, ptr @d64
@@ -713,6 +1095,15 @@ define { i64, i64 } @pr39968(i64, i64, i32) {
 ; CHECK-NEXT:    cmovneq %rdi, %rax # encoding: [0x48,0x0f,0x45,0xc7]
 ; CHECK-NEXT:    movq %rsi, %rdx # encoding: [0x48,0x89,0xf2]
 ; CHECK-NEXT:    retq # encoding: [0xc3]
+;
+; NDD-LABEL: pr39968:
+; NDD:       # %bb.0:
+; NDD-NEXT:    xorl %eax, %eax # encoding: [0x31,0xc0]
+; NDD-NEXT:    testb $64, %dl # encoding: [0xf6,0xc2,0x40]
+; NDD-NEXT:    cmovneq %rdi, %rsi # encoding: [0x48,0x0f,0x45,0xf7]
+; NDD-NEXT:    cmovneq %rdi, %rax # encoding: [0x48,0x0f,0x45,0xc7]
+; NDD-NEXT:    movq %rsi, %rdx # encoding: [0x48,0x89,0xf2]
+; NDD-NEXT:    retq # encoding: [0xc3]
   %4 = and i32 %2, 64
   %5 = icmp ne i32 %4, 0
   %6 = select i1 %5, i64 %0, i64 %1
@@ -736,6 +1127,18 @@ define i32 @pr42189(i16 signext %c) {
 ; CHECK-NEXT:    jmp g at PLT # TAILCALL
 ; CHECK-NEXT:    # encoding: [0xeb,A]
 ; CHECK-NEXT:    # fixup A - offset: 1, value: g at PLT-1, kind: FK_PCRel_1
+;
+; NDD-LABEL: pr42189:
+; NDD:       # %bb.0: # %entry
+; NDD-NEXT:    cmpl $32767, %edi # encoding: [0x81,0xff,0xff,0x7f,0x00,0x00]
+; NDD-NEXT:    # imm = 0x7FFF
+; NDD-NEXT:    jne f at PLT # TAILCALL
+; NDD-NEXT:    # encoding: [0x75,A]
+; NDD-NEXT:    # fixup A - offset: 1, value: f at PLT-1, kind: FK_PCRel_1
+; NDD-NEXT:  # %bb.1: # %if.then
+; NDD-NEXT:    jmp g at PLT # TAILCALL
+; NDD-NEXT:    # encoding: [0xeb,A]
+; NDD-NEXT:    # fixup A - offset: 1, value: g at PLT-1, kind: FK_PCRel_1
 entry:
   %cmp = icmp eq i16 %c, 32767
   br i1 %cmp, label %if.then, label %if.end
@@ -766,6 +1169,13 @@ define i1 @fold_test_and_with_chain(i32* %x, i32* %y, i32 %z) {
 ; CHECK-NEXT:    sete %al # encoding: [0x0f,0x94,0xc0]
 ; CHECK-NEXT:    movl %edx, (%rsi) # encoding: [0x89,0x16]
 ; CHECK-NEXT:    retq # encoding: [0xc3]
+;
+; NDD-LABEL: fold_test_and_with_chain:
+; NDD:       # %bb.0:
+; NDD-NEXT:    andl (%rdi), %edx, %eax # encoding: [0x62,0xf4,0x7c,0x18,0x23,0x17]
+; NDD-NEXT:    sete %al # encoding: [0x0f,0x94,0xc0]
+; NDD-NEXT:    movl %edx, (%rsi) # encoding: [0x89,0x16]
+; NDD-NEXT:    retq # encoding: [0xc3]
   %a = load i32, i32* %x
   %b = and i32 %z, %a
   %c = icmp eq i32 %b, 0

diff  --git a/llvm/test/CodeGen/X86/popcnt.ll b/llvm/test/CodeGen/X86/popcnt.ll
index a0879ad930a302d..2d780370a110bce 100644
--- a/llvm/test/CodeGen/X86/popcnt.ll
+++ b/llvm/test/CodeGen/X86/popcnt.ll
@@ -3,6 +3,7 @@
 ; RUN: llc < %s -mtriple=x86_64-unknown | FileCheck %s --check-prefix=X64
 ; RUN: llc < %s -mtriple=i686-unknown -mattr=+popcnt | FileCheck %s --check-prefix=X86-POPCNT
 ; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+popcnt | FileCheck %s --check-prefix=X64-POPCNT
+; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+ndd | FileCheck %s --check-prefix=X64-NDD
 ; RUN: llc < %s -mtriple=i686-unknown -mattr=sse2 | FileCheck %s --check-prefixes=X86,X86-SSE2
 ; RUN: llc < %s -mtriple=i686-unknown -mattr=ssse3 | FileCheck %s --check-prefixes=X86,X86-SSSE3
 
@@ -55,6 +56,20 @@ define i8 @cnt8(i8 %x) nounwind readnone {
 ; X64-POPCNT-NEXT:    popcntl %eax, %eax
 ; X64-POPCNT-NEXT:    # kill: def $al killed $al killed $eax
 ; X64-POPCNT-NEXT:    retq
+;
+; X64-NDD-LABEL: cnt8:
+; X64-NDD:       # %bb.0:
+; X64-NDD-NEXT:    shrb $1, %dil, %al
+; X64-NDD-NEXT:    andb $85, %al
+; X64-NDD-NEXT:    subb %al, %dil, %al
+; X64-NDD-NEXT:    andb $51, %al, %cl
+; X64-NDD-NEXT:    shrb $2, %al
+; X64-NDD-NEXT:    andb $51, %al
+; X64-NDD-NEXT:    addb %cl, %al
+; X64-NDD-NEXT:    shrb $4, %al, %cl
+; X64-NDD-NEXT:    addb %cl, %al
+; X64-NDD-NEXT:    andb $15, %al
+; X64-NDD-NEXT:    retq
   %cnt = tail call i8 @llvm.ctpop.i8(i8 %x)
   ret i8 %cnt
 }
@@ -118,6 +133,24 @@ define i16 @cnt16(i16 %x) nounwind readnone {
 ; X64-POPCNT-NEXT:    popcntl %eax, %eax
 ; X64-POPCNT-NEXT:    # kill: def $ax killed $ax killed $eax
 ; X64-POPCNT-NEXT:    retq
+;
+; X64-NDD-LABEL: cnt16:
+; X64-NDD:       # %bb.0:
+; X64-NDD-NEXT:    shrl $1, %edi, %eax
+; X64-NDD-NEXT:    andl $21845, %eax # imm = 0x5555
+; X64-NDD-NEXT:    subl %eax, %edi, %eax
+; X64-NDD-NEXT:    andl $13107, %eax, %ecx # imm = 0x3333
+; X64-NDD-NEXT:    shrl $2, %eax
+; X64-NDD-NEXT:    andl $13107, %eax # imm = 0x3333
+; X64-NDD-NEXT:    addl %ecx, %eax
+; X64-NDD-NEXT:    shrl $4, %eax, %ecx
+; X64-NDD-NEXT:    addl %ecx, %eax
+; X64-NDD-NEXT:    andl $3855, %eax # imm = 0xF0F
+; X64-NDD-NEXT:    shrl $8, %eax, %ecx
+; X64-NDD-NEXT:    addl %ecx, %eax
+; X64-NDD-NEXT:    movzbl %al, %eax
+; X64-NDD-NEXT:    # kill: def $ax killed $ax killed $eax
+; X64-NDD-NEXT:    retq
   %cnt = tail call i16 @llvm.ctpop.i16(i16 %x)
   ret i16 %cnt
 }
@@ -171,6 +204,22 @@ define i32 @cnt32(i32 %x) nounwind readnone {
 ; X64-POPCNT:       # %bb.0:
 ; X64-POPCNT-NEXT:    popcntl %edi, %eax
 ; X64-POPCNT-NEXT:    retq
+;
+; X64-NDD-LABEL: cnt32:
+; X64-NDD:       # %bb.0:
+; X64-NDD-NEXT:    shrl $1, %edi, %eax
+; X64-NDD-NEXT:    andl $1431655765, %eax # imm = 0x55555555
+; X64-NDD-NEXT:    subl %eax, %edi, %eax
+; X64-NDD-NEXT:    andl $858993459, %eax, %ecx # imm = 0x33333333
+; X64-NDD-NEXT:    shrl $2, %eax
+; X64-NDD-NEXT:    andl $858993459, %eax # imm = 0x33333333
+; X64-NDD-NEXT:    addl %ecx, %eax
+; X64-NDD-NEXT:    shrl $4, %eax, %ecx
+; X64-NDD-NEXT:    addl %ecx, %eax
+; X64-NDD-NEXT:    andl $252645135, %eax # imm = 0xF0F0F0F
+; X64-NDD-NEXT:    imull $16843009, %eax, %eax # imm = 0x1010101
+; X64-NDD-NEXT:    shrl $24, %eax
+; X64-NDD-NEXT:    retq
   %cnt = tail call i32 @llvm.ctpop.i32(i32 %x)
   ret i32 %cnt
 }
@@ -250,6 +299,26 @@ define i64 @cnt64(i64 %x) nounwind readnone {
 ; X64-POPCNT-NEXT:    popcntq %rdi, %rax
 ; X64-POPCNT-NEXT:    retq
 ;
+; X64-NDD-LABEL: cnt64:
+; X64-NDD:       # %bb.0:
+; X64-NDD-NEXT:    shrq $1, %rdi, %rax
+; X64-NDD-NEXT:    movabsq $6148914691236517205, %rcx # imm = 0x5555555555555555
+; X64-NDD-NEXT:    andq %rcx, %rax
+; X64-NDD-NEXT:    subq %rax, %rdi, %rax
+; X64-NDD-NEXT:    movabsq $3689348814741910323, %rcx # imm = 0x3333333333333333
+; X64-NDD-NEXT:    andq %rcx, %rax, %rdx
+; X64-NDD-NEXT:    shrq $2, %rax
+; X64-NDD-NEXT:    andq %rcx, %rax
+; X64-NDD-NEXT:    addq %rdx, %rax
+; X64-NDD-NEXT:    shrq $4, %rax, %rcx
+; X64-NDD-NEXT:    addq %rcx, %rax
+; X64-NDD-NEXT:    movabsq $1085102592571150095, %rcx # imm = 0xF0F0F0F0F0F0F0F
+; X64-NDD-NEXT:    andq %rcx, %rax
+; X64-NDD-NEXT:    movabsq $72340172838076673, %rcx # imm = 0x101010101010101
+; X64-NDD-NEXT:    imulq %rcx, %rax
+; X64-NDD-NEXT:    shrq $56, %rax
+; X64-NDD-NEXT:    retq
+;
 ; X86-SSE2-LABEL: cnt64:
 ; X86-SSE2:       # %bb.0:
 ; X86-SSE2-NEXT:    movq {{.*#+}} xmm0 = mem[0],zero
@@ -444,6 +513,40 @@ define i128 @cnt128(i128 %x) nounwind readnone {
 ; X64-POPCNT-NEXT:    xorl %edx, %edx
 ; X64-POPCNT-NEXT:    retq
 ;
+; X64-NDD-LABEL: cnt128:
+; X64-NDD:       # %bb.0:
+; X64-NDD-NEXT:    shrq $1, %rsi, %rax
+; X64-NDD-NEXT:    movabsq $6148914691236517205, %rcx # imm = 0x5555555555555555
+; X64-NDD-NEXT:    andq %rcx, %rax
+; X64-NDD-NEXT:    subq %rax, %rsi, %rax
+; X64-NDD-NEXT:    movabsq $3689348814741910323, %rdx # imm = 0x3333333333333333
+; X64-NDD-NEXT:    andq %rdx, %rax, %rsi
+; X64-NDD-NEXT:    shrq $2, %rax
+; X64-NDD-NEXT:    andq %rdx, %rax
+; X64-NDD-NEXT:    addq %rsi, %rax
+; X64-NDD-NEXT:    shrq $4, %rax, %rsi
+; X64-NDD-NEXT:    addq %rsi, %rax
+; X64-NDD-NEXT:    movabsq $1085102592571150095, %rsi # imm = 0xF0F0F0F0F0F0F0F
+; X64-NDD-NEXT:    andq %rsi, %rax
+; X64-NDD-NEXT:    movabsq $72340172838076673, %r8 # imm = 0x101010101010101
+; X64-NDD-NEXT:    imulq %r8, %rax
+; X64-NDD-NEXT:    shrq $56, %rax
+; X64-NDD-NEXT:    shrq $1, %rdi, %r9
+; X64-NDD-NEXT:    andq %r9, %rcx
+; X64-NDD-NEXT:    subq %rcx, %rdi, %rcx
+; X64-NDD-NEXT:    andq %rdx, %rcx, %rdi
+; X64-NDD-NEXT:    shrq $2, %rcx
+; X64-NDD-NEXT:    andq %rdx, %rcx
+; X64-NDD-NEXT:    addq %rdi, %rcx
+; X64-NDD-NEXT:    shrq $4, %rcx, %rdx
+; X64-NDD-NEXT:    addq %rdx, %rcx
+; X64-NDD-NEXT:    andq %rsi, %rcx
+; X64-NDD-NEXT:    imulq %r8, %rcx
+; X64-NDD-NEXT:    shrq $56, %rcx
+; X64-NDD-NEXT:    addq %rcx, %rax
+; X64-NDD-NEXT:    xorl %edx, %edx
+; X64-NDD-NEXT:    retq
+;
 ; X86-SSE2-LABEL: cnt128:
 ; X86-SSE2:       # %bb.0:
 ; X86-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %eax
@@ -603,6 +706,26 @@ define i64 @cnt64_noimplicitfloat(i64 %x) nounwind readnone noimplicitfloat  {
 ; X64-POPCNT:       # %bb.0:
 ; X64-POPCNT-NEXT:    popcntq %rdi, %rax
 ; X64-POPCNT-NEXT:    retq
+;
+; X64-NDD-LABEL: cnt64_noimplicitfloat:
+; X64-NDD:       # %bb.0:
+; X64-NDD-NEXT:    shrq $1, %rdi, %rax
+; X64-NDD-NEXT:    movabsq $6148914691236517205, %rcx # imm = 0x5555555555555555
+; X64-NDD-NEXT:    andq %rcx, %rax
+; X64-NDD-NEXT:    subq %rax, %rdi, %rax
+; X64-NDD-NEXT:    movabsq $3689348814741910323, %rcx # imm = 0x3333333333333333
+; X64-NDD-NEXT:    andq %rcx, %rax, %rdx
+; X64-NDD-NEXT:    shrq $2, %rax
+; X64-NDD-NEXT:    andq %rcx, %rax
+; X64-NDD-NEXT:    addq %rdx, %rax
+; X64-NDD-NEXT:    shrq $4, %rax, %rcx
+; X64-NDD-NEXT:    addq %rcx, %rax
+; X64-NDD-NEXT:    movabsq $1085102592571150095, %rcx # imm = 0xF0F0F0F0F0F0F0F
+; X64-NDD-NEXT:    andq %rcx, %rax
+; X64-NDD-NEXT:    movabsq $72340172838076673, %rcx # imm = 0x101010101010101
+; X64-NDD-NEXT:    imulq %rcx, %rax
+; X64-NDD-NEXT:    shrq $56, %rax
+; X64-NDD-NEXT:    retq
   %cnt = tail call i64 @llvm.ctpop.i64(i64 %x)
   ret i64 %cnt
 }
@@ -658,6 +781,23 @@ define i32 @cnt32_optsize(i32 %x) nounwind readnone optsize {
 ; X64-POPCNT:       # %bb.0:
 ; X64-POPCNT-NEXT:    popcntl %edi, %eax
 ; X64-POPCNT-NEXT:    retq
+;
+; X64-NDD-LABEL: cnt32_optsize:
+; X64-NDD:       # %bb.0:
+; X64-NDD-NEXT:    shrl $1, %edi, %eax
+; X64-NDD-NEXT:    andl $1431655765, %eax # imm = 0x55555555
+; X64-NDD-NEXT:    subl %eax, %edi, %eax
+; X64-NDD-NEXT:    movl $858993459, %ecx # imm = 0x33333333
+; X64-NDD-NEXT:    andl %ecx, %eax, %edx
+; X64-NDD-NEXT:    shrl $2, %eax
+; X64-NDD-NEXT:    andl %ecx, %eax
+; X64-NDD-NEXT:    addl %edx, %eax
+; X64-NDD-NEXT:    shrl $4, %eax, %ecx
+; X64-NDD-NEXT:    addl %ecx, %eax
+; X64-NDD-NEXT:    andl $252645135, %eax # imm = 0xF0F0F0F
+; X64-NDD-NEXT:    imull $16843009, %eax, %eax # imm = 0x1010101
+; X64-NDD-NEXT:    shrl $24, %eax
+; X64-NDD-NEXT:    retq
   %cnt = tail call i32 @llvm.ctpop.i32(i32 %x)
   ret i32 %cnt
 }
@@ -746,6 +886,26 @@ define i64 @cnt64_optsize(i64 %x) nounwind readnone optsize {
 ; X64-POPCNT-NEXT:    popcntq %rdi, %rax
 ; X64-POPCNT-NEXT:    retq
 ;
+; X64-NDD-LABEL: cnt64_optsize:
+; X64-NDD:       # %bb.0:
+; X64-NDD-NEXT:    shrq $1, %rdi, %rax
+; X64-NDD-NEXT:    movabsq $6148914691236517205, %rcx # imm = 0x5555555555555555
+; X64-NDD-NEXT:    andq %rcx, %rax
+; X64-NDD-NEXT:    subq %rax, %rdi, %rax
+; X64-NDD-NEXT:    movabsq $3689348814741910323, %rcx # imm = 0x3333333333333333
+; X64-NDD-NEXT:    andq %rcx, %rax, %rdx
+; X64-NDD-NEXT:    shrq $2, %rax
+; X64-NDD-NEXT:    andq %rcx, %rax
+; X64-NDD-NEXT:    addq %rdx, %rax
+; X64-NDD-NEXT:    shrq $4, %rax, %rcx
+; X64-NDD-NEXT:    addq %rcx, %rax
+; X64-NDD-NEXT:    movabsq $1085102592571150095, %rcx # imm = 0xF0F0F0F0F0F0F0F
+; X64-NDD-NEXT:    andq %rcx, %rax
+; X64-NDD-NEXT:    movabsq $72340172838076673, %rcx # imm = 0x101010101010101
+; X64-NDD-NEXT:    imulq %rcx, %rax
+; X64-NDD-NEXT:    shrq $56, %rax
+; X64-NDD-NEXT:    retq
+;
 ; X86-SSE2-LABEL: cnt64_optsize:
 ; X86-SSE2:       # %bb.0:
 ; X86-SSE2-NEXT:    movq {{.*#+}} xmm0 = mem[0],zero
@@ -949,6 +1109,40 @@ define i128 @cnt128_optsize(i128 %x) nounwind readnone optsize {
 ; X64-POPCNT-NEXT:    xorl %edx, %edx
 ; X64-POPCNT-NEXT:    retq
 ;
+; X64-NDD-LABEL: cnt128_optsize:
+; X64-NDD:       # %bb.0:
+; X64-NDD-NEXT:    shrq $1, %rsi, %rax
+; X64-NDD-NEXT:    movabsq $6148914691236517205, %rcx # imm = 0x5555555555555555
+; X64-NDD-NEXT:    andq %rcx, %rax
+; X64-NDD-NEXT:    subq %rax, %rsi, %rax
+; X64-NDD-NEXT:    movabsq $3689348814741910323, %rdx # imm = 0x3333333333333333
+; X64-NDD-NEXT:    andq %rdx, %rax, %rsi
+; X64-NDD-NEXT:    shrq $2, %rax
+; X64-NDD-NEXT:    andq %rdx, %rax
+; X64-NDD-NEXT:    addq %rsi, %rax
+; X64-NDD-NEXT:    shrq $4, %rax, %rsi
+; X64-NDD-NEXT:    addq %rsi, %rax
+; X64-NDD-NEXT:    movabsq $1085102592571150095, %rsi # imm = 0xF0F0F0F0F0F0F0F
+; X64-NDD-NEXT:    andq %rsi, %rax
+; X64-NDD-NEXT:    movabsq $72340172838076673, %r8 # imm = 0x101010101010101
+; X64-NDD-NEXT:    imulq %r8, %rax
+; X64-NDD-NEXT:    shrq $56, %rax
+; X64-NDD-NEXT:    shrq $1, %rdi, %r9
+; X64-NDD-NEXT:    andq %r9, %rcx
+; X64-NDD-NEXT:    subq %rcx, %rdi, %rcx
+; X64-NDD-NEXT:    andq %rdx, %rcx, %rdi
+; X64-NDD-NEXT:    shrq $2, %rcx
+; X64-NDD-NEXT:    andq %rdx, %rcx
+; X64-NDD-NEXT:    addq %rdi, %rcx
+; X64-NDD-NEXT:    shrq $4, %rcx, %rdx
+; X64-NDD-NEXT:    addq %rdx, %rcx
+; X64-NDD-NEXT:    andq %rsi, %rcx
+; X64-NDD-NEXT:    imulq %r8, %rcx
+; X64-NDD-NEXT:    shrq $56, %rcx
+; X64-NDD-NEXT:    addq %rcx, %rax
+; X64-NDD-NEXT:    xorl %edx, %edx
+; X64-NDD-NEXT:    retq
+;
 ; X86-SSE2-LABEL: cnt128_optsize:
 ; X86-SSE2:       # %bb.0:
 ; X86-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %eax
@@ -1085,6 +1279,22 @@ define i32 @cnt32_pgso(i32 %x) nounwind readnone !prof !14 {
 ; X64-POPCNT:       # %bb.0:
 ; X64-POPCNT-NEXT:    popcntl %edi, %eax
 ; X64-POPCNT-NEXT:    retq
+;
+; X64-NDD-LABEL: cnt32_pgso:
+; X64-NDD:       # %bb.0:
+; X64-NDD-NEXT:    shrl $1, %edi, %eax
+; X64-NDD-NEXT:    andl $1431655765, %eax # imm = 0x55555555
+; X64-NDD-NEXT:    subl %eax, %edi, %eax
+; X64-NDD-NEXT:    andl $858993459, %eax, %ecx # imm = 0x33333333
+; X64-NDD-NEXT:    shrl $2, %eax
+; X64-NDD-NEXT:    andl $858993459, %eax # imm = 0x33333333
+; X64-NDD-NEXT:    addl %ecx, %eax
+; X64-NDD-NEXT:    shrl $4, %eax, %ecx
+; X64-NDD-NEXT:    addl %ecx, %eax
+; X64-NDD-NEXT:    andl $252645135, %eax # imm = 0xF0F0F0F
+; X64-NDD-NEXT:    imull $16843009, %eax, %eax # imm = 0x1010101
+; X64-NDD-NEXT:    shrl $24, %eax
+; X64-NDD-NEXT:    retq
   %cnt = tail call i32 @llvm.ctpop.i32(i32 %x)
   ret i32 %cnt
 }
@@ -1164,6 +1374,26 @@ define i64 @cnt64_pgso(i64 %x) nounwind readnone !prof !14 {
 ; X64-POPCNT-NEXT:    popcntq %rdi, %rax
 ; X64-POPCNT-NEXT:    retq
 ;
+; X64-NDD-LABEL: cnt64_pgso:
+; X64-NDD:       # %bb.0:
+; X64-NDD-NEXT:    shrq $1, %rdi, %rax
+; X64-NDD-NEXT:    movabsq $6148914691236517205, %rcx # imm = 0x5555555555555555
+; X64-NDD-NEXT:    andq %rcx, %rax
+; X64-NDD-NEXT:    subq %rax, %rdi, %rax
+; X64-NDD-NEXT:    movabsq $3689348814741910323, %rcx # imm = 0x3333333333333333
+; X64-NDD-NEXT:    andq %rcx, %rax, %rdx
+; X64-NDD-NEXT:    shrq $2, %rax
+; X64-NDD-NEXT:    andq %rcx, %rax
+; X64-NDD-NEXT:    addq %rdx, %rax
+; X64-NDD-NEXT:    shrq $4, %rax, %rcx
+; X64-NDD-NEXT:    addq %rcx, %rax
+; X64-NDD-NEXT:    movabsq $1085102592571150095, %rcx # imm = 0xF0F0F0F0F0F0F0F
+; X64-NDD-NEXT:    andq %rcx, %rax
+; X64-NDD-NEXT:    movabsq $72340172838076673, %rcx # imm = 0x101010101010101
+; X64-NDD-NEXT:    imulq %rcx, %rax
+; X64-NDD-NEXT:    shrq $56, %rax
+; X64-NDD-NEXT:    retq
+;
 ; X86-SSE2-LABEL: cnt64_pgso:
 ; X86-SSE2:       # %bb.0:
 ; X86-SSE2-NEXT:    movq {{.*#+}} xmm0 = mem[0],zero
@@ -1360,6 +1590,40 @@ define i128 @cnt128_pgso(i128 %x) nounwind readnone !prof !14 {
 ; X64-POPCNT-NEXT:    xorl %edx, %edx
 ; X64-POPCNT-NEXT:    retq
 ;
+; X64-NDD-LABEL: cnt128_pgso:
+; X64-NDD:       # %bb.0:
+; X64-NDD-NEXT:    shrq $1, %rsi, %rax
+; X64-NDD-NEXT:    movabsq $6148914691236517205, %rcx # imm = 0x5555555555555555
+; X64-NDD-NEXT:    andq %rcx, %rax
+; X64-NDD-NEXT:    subq %rax, %rsi, %rax
+; X64-NDD-NEXT:    movabsq $3689348814741910323, %rdx # imm = 0x3333333333333333
+; X64-NDD-NEXT:    andq %rdx, %rax, %rsi
+; X64-NDD-NEXT:    shrq $2, %rax
+; X64-NDD-NEXT:    andq %rdx, %rax
+; X64-NDD-NEXT:    addq %rsi, %rax
+; X64-NDD-NEXT:    shrq $4, %rax, %rsi
+; X64-NDD-NEXT:    addq %rsi, %rax
+; X64-NDD-NEXT:    movabsq $1085102592571150095, %rsi # imm = 0xF0F0F0F0F0F0F0F
+; X64-NDD-NEXT:    andq %rsi, %rax
+; X64-NDD-NEXT:    movabsq $72340172838076673, %r8 # imm = 0x101010101010101
+; X64-NDD-NEXT:    imulq %r8, %rax
+; X64-NDD-NEXT:    shrq $56, %rax
+; X64-NDD-NEXT:    shrq $1, %rdi, %r9
+; X64-NDD-NEXT:    andq %r9, %rcx
+; X64-NDD-NEXT:    subq %rcx, %rdi, %rcx
+; X64-NDD-NEXT:    andq %rdx, %rcx, %rdi
+; X64-NDD-NEXT:    shrq $2, %rcx
+; X64-NDD-NEXT:    andq %rdx, %rcx
+; X64-NDD-NEXT:    addq %rdi, %rcx
+; X64-NDD-NEXT:    shrq $4, %rcx, %rdx
+; X64-NDD-NEXT:    addq %rdx, %rcx
+; X64-NDD-NEXT:    andq %rsi, %rcx
+; X64-NDD-NEXT:    imulq %r8, %rcx
+; X64-NDD-NEXT:    shrq $56, %rcx
+; X64-NDD-NEXT:    addq %rcx, %rax
+; X64-NDD-NEXT:    xorl %edx, %edx
+; X64-NDD-NEXT:    retq
+;
 ; X86-SSE2-LABEL: cnt128_pgso:
 ; X86-SSE2:       # %bb.0:
 ; X86-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %eax
@@ -1497,6 +1761,22 @@ define i32 @popcount_zext_i32(i16 zeroext %x) {
 ; X64-POPCNT:       # %bb.0:
 ; X64-POPCNT-NEXT:    popcntl %edi, %eax
 ; X64-POPCNT-NEXT:    retq
+;
+; X64-NDD-LABEL: popcount_zext_i32:
+; X64-NDD:       # %bb.0:
+; X64-NDD-NEXT:    shrl $1, %edi, %eax
+; X64-NDD-NEXT:    andl $21845, %eax # imm = 0x5555
+; X64-NDD-NEXT:    subl %eax, %edi, %eax
+; X64-NDD-NEXT:    andl $858993459, %eax, %ecx # imm = 0x33333333
+; X64-NDD-NEXT:    shrl $2, %eax
+; X64-NDD-NEXT:    andl $858993459, %eax # imm = 0x33333333
+; X64-NDD-NEXT:    addl %ecx, %eax
+; X64-NDD-NEXT:    shrl $4, %eax, %ecx
+; X64-NDD-NEXT:    addl %ecx, %eax
+; X64-NDD-NEXT:    andl $252645135, %eax # imm = 0xF0F0F0F
+; X64-NDD-NEXT:    imull $16843009, %eax, %eax # imm = 0x1010101
+; X64-NDD-NEXT:    shrl $24, %eax
+; X64-NDD-NEXT:    retq
   %z = zext i16 %x to i32
   %cnt = tail call i32 @llvm.ctpop.i32(i32 %z)
   ret i32 %cnt
@@ -1556,6 +1836,23 @@ define i32 @popcount_i16_zext(i16 zeroext %x) {
 ; X64-POPCNT:       # %bb.0:
 ; X64-POPCNT-NEXT:    popcntl %edi, %eax
 ; X64-POPCNT-NEXT:    retq
+;
+; X64-NDD-LABEL: popcount_i16_zext:
+; X64-NDD:       # %bb.0:
+; X64-NDD-NEXT:    shrl $1, %edi, %eax
+; X64-NDD-NEXT:    andl $21845, %eax # imm = 0x5555
+; X64-NDD-NEXT:    subl %eax, %edi, %eax
+; X64-NDD-NEXT:    andl $13107, %eax, %ecx # imm = 0x3333
+; X64-NDD-NEXT:    shrl $2, %eax
+; X64-NDD-NEXT:    andl $13107, %eax # imm = 0x3333
+; X64-NDD-NEXT:    addl %ecx, %eax
+; X64-NDD-NEXT:    shrl $4, %eax, %ecx
+; X64-NDD-NEXT:    addl %ecx, %eax
+; X64-NDD-NEXT:    andl $3855, %eax # imm = 0xF0F
+; X64-NDD-NEXT:    shrl $8, %eax, %ecx
+; X64-NDD-NEXT:    addl %ecx, %eax
+; X64-NDD-NEXT:    movzbl %al, %eax
+; X64-NDD-NEXT:    retq
   %cnt = tail call i16 @llvm.ctpop.i16(i16 %x)
   %z = zext i16 %cnt to i32
   ret i32 %z