[llvm] d9245e8 - [X86][ISEL] Add NDD entries in X86ISelDAGToDAG.cpp
Shengchen Kan via llvm-commits
llvm-commits at lists.llvm.org
Fri Jan 26 07:03:19 PST 2024
Author: Shengchen Kan
Date: 2024-01-26T23:02:53+08:00
New Revision: d9245e8b471c6b3f61e3810faa9788b4994e295a
URL: https://github.com/llvm/llvm-project/commit/d9245e8b471c6b3f61e3810faa9788b4994e295a
DIFF: https://github.com/llvm/llvm-project/commit/d9245e8b471c6b3f61e3810faa9788b4994e295a.diff
LOG: [X86][ISEL] Add NDD entries in X86ISelDAGToDAG.cpp
Added:
Modified:
llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
llvm/test/CodeGen/X86/cmp.ll
llvm/test/CodeGen/X86/popcnt.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
index c0b7a5523b5d8f3..c8f80ced354538f 100644
--- a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
+++ b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
@@ -278,9 +278,11 @@ namespace {
Scale = getI8Imm(AM.Scale, DL);
+#define GET_ND_IF_ENABLED(OPC) (Subtarget->hasNDD() ? OPC##_ND : OPC)
// Negate the index if needed.
if (AM.NegateIndex) {
- unsigned NegOpc = VT == MVT::i64 ? X86::NEG64r : X86::NEG32r;
+ unsigned NegOpc = VT == MVT::i64 ? GET_ND_IF_ENABLED(X86::NEG64r)
+ : GET_ND_IF_ENABLED(X86::NEG32r);
SDValue Neg = SDValue(CurDAG->getMachineNode(NegOpc, DL, VT, MVT::i32,
AM.IndexReg), 0);
AM.IndexReg = Neg;
@@ -4143,7 +4145,8 @@ MachineSDNode *X86DAGToDAGISel::matchBEXTRFromAndImm(SDNode *Node) {
if (!PreferBEXTR) {
// We still need to apply the shift.
SDValue ShAmt = CurDAG->getTargetConstant(Shift, dl, NVT);
- unsigned NewOpc = NVT == MVT::i64 ? X86::SHR64ri : X86::SHR32ri;
+ unsigned NewOpc = NVT == MVT::i64 ? GET_ND_IF_ENABLED(X86::SHR64ri)
+ : GET_ND_IF_ENABLED(X86::SHR32ri);
NewNode =
CurDAG->getMachineNode(NewOpc, dl, NVT, SDValue(NewNode, 0), ShAmt);
}
@@ -5338,41 +5341,101 @@ void X86DAGToDAGISel::Select(SDNode *Node) {
case MVT::i8:
switch (Opcode) {
default: llvm_unreachable("Unexpected opcode!");
- case ISD::ADD: ROpc = X86::ADD8rr; MOpc = X86::ADD8rm; break;
- case ISD::SUB: ROpc = X86::SUB8rr; MOpc = X86::SUB8rm; break;
- case ISD::AND: ROpc = X86::AND8rr; MOpc = X86::AND8rm; break;
- case ISD::OR: ROpc = X86::OR8rr; MOpc = X86::OR8rm; break;
- case ISD::XOR: ROpc = X86::XOR8rr; MOpc = X86::XOR8rm; break;
+ case ISD::ADD:
+ ROpc = GET_ND_IF_ENABLED(X86::ADD8rr);
+ MOpc = GET_ND_IF_ENABLED(X86::ADD8rm);
+ break;
+ case ISD::SUB:
+ ROpc = GET_ND_IF_ENABLED(X86::SUB8rr);
+ MOpc = GET_ND_IF_ENABLED(X86::SUB8rm);
+ break;
+ case ISD::AND:
+ ROpc = GET_ND_IF_ENABLED(X86::AND8rr);
+ MOpc = GET_ND_IF_ENABLED(X86::AND8rm);
+ break;
+ case ISD::OR:
+ ROpc = GET_ND_IF_ENABLED(X86::OR8rr);
+ MOpc = GET_ND_IF_ENABLED(X86::OR8rm);
+ break;
+ case ISD::XOR:
+ ROpc = GET_ND_IF_ENABLED(X86::XOR8rr);
+ MOpc = GET_ND_IF_ENABLED(X86::XOR8rm);
+ break;
}
break;
case MVT::i16:
switch (Opcode) {
default: llvm_unreachable("Unexpected opcode!");
- case ISD::ADD: ROpc = X86::ADD16rr; MOpc = X86::ADD16rm; break;
- case ISD::SUB: ROpc = X86::SUB16rr; MOpc = X86::SUB16rm; break;
- case ISD::AND: ROpc = X86::AND16rr; MOpc = X86::AND16rm; break;
- case ISD::OR: ROpc = X86::OR16rr; MOpc = X86::OR16rm; break;
- case ISD::XOR: ROpc = X86::XOR16rr; MOpc = X86::XOR16rm; break;
+ case ISD::ADD:
+ ROpc = GET_ND_IF_ENABLED(X86::ADD16rr);
+ MOpc = GET_ND_IF_ENABLED(X86::ADD16rm);
+ break;
+ case ISD::SUB:
+ ROpc = GET_ND_IF_ENABLED(X86::SUB16rr);
+ MOpc = GET_ND_IF_ENABLED(X86::SUB16rm);
+ break;
+ case ISD::AND:
+ ROpc = GET_ND_IF_ENABLED(X86::AND16rr);
+ MOpc = GET_ND_IF_ENABLED(X86::AND16rm);
+ break;
+ case ISD::OR:
+ ROpc = GET_ND_IF_ENABLED(X86::OR16rr);
+ MOpc = GET_ND_IF_ENABLED(X86::OR16rm);
+ break;
+ case ISD::XOR:
+ ROpc = GET_ND_IF_ENABLED(X86::XOR16rr);
+ MOpc = GET_ND_IF_ENABLED(X86::XOR16rm);
+ break;
}
break;
case MVT::i32:
switch (Opcode) {
default: llvm_unreachable("Unexpected opcode!");
- case ISD::ADD: ROpc = X86::ADD32rr; MOpc = X86::ADD32rm; break;
- case ISD::SUB: ROpc = X86::SUB32rr; MOpc = X86::SUB32rm; break;
- case ISD::AND: ROpc = X86::AND32rr; MOpc = X86::AND32rm; break;
- case ISD::OR: ROpc = X86::OR32rr; MOpc = X86::OR32rm; break;
- case ISD::XOR: ROpc = X86::XOR32rr; MOpc = X86::XOR32rm; break;
+ case ISD::ADD:
+ ROpc = GET_ND_IF_ENABLED(X86::ADD32rr);
+ MOpc = GET_ND_IF_ENABLED(X86::ADD32rm);
+ break;
+ case ISD::SUB:
+ ROpc = GET_ND_IF_ENABLED(X86::SUB32rr);
+ MOpc = GET_ND_IF_ENABLED(X86::SUB32rm);
+ break;
+ case ISD::AND:
+ ROpc = GET_ND_IF_ENABLED(X86::AND32rr);
+ MOpc = GET_ND_IF_ENABLED(X86::AND32rm);
+ break;
+ case ISD::OR:
+ ROpc = GET_ND_IF_ENABLED(X86::OR32rr);
+ MOpc = GET_ND_IF_ENABLED(X86::OR32rm);
+ break;
+ case ISD::XOR:
+ ROpc = GET_ND_IF_ENABLED(X86::XOR32rr);
+ MOpc = GET_ND_IF_ENABLED(X86::XOR32rm);
+ break;
}
break;
case MVT::i64:
switch (Opcode) {
default: llvm_unreachable("Unexpected opcode!");
- case ISD::ADD: ROpc = X86::ADD64rr; MOpc = X86::ADD64rm; break;
- case ISD::SUB: ROpc = X86::SUB64rr; MOpc = X86::SUB64rm; break;
- case ISD::AND: ROpc = X86::AND64rr; MOpc = X86::AND64rm; break;
- case ISD::OR: ROpc = X86::OR64rr; MOpc = X86::OR64rm; break;
- case ISD::XOR: ROpc = X86::XOR64rr; MOpc = X86::XOR64rm; break;
+ case ISD::ADD:
+ ROpc = GET_ND_IF_ENABLED(X86::ADD64rr);
+ MOpc = GET_ND_IF_ENABLED(X86::ADD64rm);
+ break;
+ case ISD::SUB:
+ ROpc = GET_ND_IF_ENABLED(X86::SUB64rr);
+ MOpc = GET_ND_IF_ENABLED(X86::SUB64rm);
+ break;
+ case ISD::AND:
+ ROpc = GET_ND_IF_ENABLED(X86::AND64rr);
+ MOpc = GET_ND_IF_ENABLED(X86::AND64rm);
+ break;
+ case ISD::OR:
+ ROpc = GET_ND_IF_ENABLED(X86::OR64rr);
+ MOpc = GET_ND_IF_ENABLED(X86::OR64rm);
+ break;
+ case ISD::XOR:
+ ROpc = GET_ND_IF_ENABLED(X86::XOR64rr);
+ MOpc = GET_ND_IF_ENABLED(X86::XOR64rm);
+ break;
}
break;
}
@@ -5918,7 +5981,7 @@ void X86DAGToDAGISel::Select(SDNode *Node) {
// If the mask covers the most significant bit, then we can replace
// TEST+AND with a SHR and check eflags.
// This emits a redundant TEST which is subsequently eliminated.
- ShiftOpcode = X86::SHR64ri;
+ ShiftOpcode = GET_ND_IF_ENABLED(X86::SHR64ri);
ShiftAmt = TrailingZeros;
SubRegIdx = 0;
TestOpcode = X86::TEST64rr;
@@ -5926,7 +5989,7 @@ void X86DAGToDAGISel::Select(SDNode *Node) {
// If the mask covers the least significant bit, then we can replace
// TEST+AND with a SHL and check eflags.
// This emits a redundant TEST which is subsequently eliminated.
- ShiftOpcode = X86::SHL64ri;
+ ShiftOpcode = GET_ND_IF_ENABLED(X86::SHL64ri);
ShiftAmt = LeadingZeros;
SubRegIdx = 0;
TestOpcode = X86::TEST64rr;
@@ -5935,19 +5998,19 @@ void X86DAGToDAGISel::Select(SDNode *Node) {
// wide, then replace it with a SHR and a TEST8rr/TEST16rr/TEST32rr.
unsigned PopCount = 64 - LeadingZeros - TrailingZeros;
if (PopCount == 8) {
- ShiftOpcode = X86::SHR64ri;
+ ShiftOpcode = GET_ND_IF_ENABLED(X86::SHR64ri);
ShiftAmt = TrailingZeros;
SubRegIdx = X86::sub_8bit;
SubRegVT = MVT::i8;
TestOpcode = X86::TEST8rr;
} else if (PopCount == 16) {
- ShiftOpcode = X86::SHR64ri;
+ ShiftOpcode = GET_ND_IF_ENABLED(X86::SHR64ri);
ShiftAmt = TrailingZeros;
SubRegIdx = X86::sub_16bit;
SubRegVT = MVT::i16;
TestOpcode = X86::TEST16rr;
} else if (PopCount == 32) {
- ShiftOpcode = X86::SHR64ri;
+ ShiftOpcode = GET_ND_IF_ENABLED(X86::SHR64ri);
ShiftAmt = TrailingZeros;
SubRegIdx = X86::sub_32bit;
SubRegVT = MVT::i32;
diff --git a/llvm/test/CodeGen/X86/cmp.ll b/llvm/test/CodeGen/X86/cmp.ll
index e6ab3ec55ad92ed..89879c7f433644a 100644
--- a/llvm/test/CodeGen/X86/cmp.ll
+++ b/llvm/test/CodeGen/X86/cmp.ll
@@ -1,5 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -show-mc-encoding | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+ndd -show-mc-encoding | FileCheck --check-prefix=NDD %s
@d = dso_local global i8 0, align 1
@d64 = dso_local global i64 0
@@ -16,6 +17,18 @@ define i32 @test1(i32 %X, ptr %y) nounwind {
; CHECK-NEXT: .LBB0_2: # %ReturnBlock
; CHECK-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0]
; CHECK-NEXT: retq # encoding: [0xc3]
+;
+; NDD-LABEL: test1:
+; NDD: # %bb.0: # %entry
+; NDD-NEXT: cmpl $0, (%rsi) # encoding: [0x83,0x3e,0x00]
+; NDD-NEXT: je .LBB0_2 # encoding: [0x74,A]
+; NDD-NEXT: # fixup A - offset: 1, value: .LBB0_2-1, kind: FK_PCRel_1
+; NDD-NEXT: # %bb.1: # %cond_true
+; NDD-NEXT: movl $1, %eax # encoding: [0xb8,0x01,0x00,0x00,0x00]
+; NDD-NEXT: retq # encoding: [0xc3]
+; NDD-NEXT: .LBB0_2: # %ReturnBlock
+; NDD-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0]
+; NDD-NEXT: retq # encoding: [0xc3]
entry:
%tmp = load i32, ptr %y
%tmp.upgrd.1 = icmp eq i32 %tmp, 0
@@ -41,6 +54,19 @@ define i32 @test2(i32 %X, ptr %y) nounwind {
; CHECK-NEXT: .LBB1_2: # %ReturnBlock
; CHECK-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0]
; CHECK-NEXT: retq # encoding: [0xc3]
+;
+; NDD-LABEL: test2:
+; NDD: # %bb.0: # %entry
+; NDD-NEXT: testl $536870911, (%rsi) # encoding: [0xf7,0x06,0xff,0xff,0xff,0x1f]
+; NDD-NEXT: # imm = 0x1FFFFFFF
+; NDD-NEXT: je .LBB1_2 # encoding: [0x74,A]
+; NDD-NEXT: # fixup A - offset: 1, value: .LBB1_2-1, kind: FK_PCRel_1
+; NDD-NEXT: # %bb.1: # %cond_true
+; NDD-NEXT: movl $1, %eax # encoding: [0xb8,0x01,0x00,0x00,0x00]
+; NDD-NEXT: retq # encoding: [0xc3]
+; NDD-NEXT: .LBB1_2: # %ReturnBlock
+; NDD-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0]
+; NDD-NEXT: retq # encoding: [0xc3]
entry:
%tmp = load i32, ptr %y
%tmp1 = shl i32 %tmp, 3
@@ -66,6 +92,18 @@ define i8 @test2b(i8 %X, ptr %y) nounwind {
; CHECK-NEXT: .LBB2_2: # %ReturnBlock
; CHECK-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0]
; CHECK-NEXT: retq # encoding: [0xc3]
+;
+; NDD-LABEL: test2b:
+; NDD: # %bb.0: # %entry
+; NDD-NEXT: testb $31, (%rsi) # encoding: [0xf6,0x06,0x1f]
+; NDD-NEXT: je .LBB2_2 # encoding: [0x74,A]
+; NDD-NEXT: # fixup A - offset: 1, value: .LBB2_2-1, kind: FK_PCRel_1
+; NDD-NEXT: # %bb.1: # %cond_true
+; NDD-NEXT: movb $1, %al # encoding: [0xb0,0x01]
+; NDD-NEXT: retq # encoding: [0xc3]
+; NDD-NEXT: .LBB2_2: # %ReturnBlock
+; NDD-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0]
+; NDD-NEXT: retq # encoding: [0xc3]
entry:
%tmp = load i8, ptr %y
%tmp1 = shl i8 %tmp, 3
@@ -86,6 +124,13 @@ define i64 @test3(i64 %x) nounwind {
; CHECK-NEXT: testq %rdi, %rdi # encoding: [0x48,0x85,0xff]
; CHECK-NEXT: sete %al # encoding: [0x0f,0x94,0xc0]
; CHECK-NEXT: retq # encoding: [0xc3]
+;
+; NDD-LABEL: test3:
+; NDD: # %bb.0: # %entry
+; NDD-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0]
+; NDD-NEXT: testq %rdi, %rdi # encoding: [0x48,0x85,0xff]
+; NDD-NEXT: sete %al # encoding: [0x0f,0x94,0xc0]
+; NDD-NEXT: retq # encoding: [0xc3]
entry:
%t = icmp eq i64 %x, 0
%r = zext i1 %t to i64
@@ -99,6 +144,13 @@ define i64 @test4(i64 %x) nounwind {
; CHECK-NEXT: testq %rdi, %rdi # encoding: [0x48,0x85,0xff]
; CHECK-NEXT: setle %al # encoding: [0x0f,0x9e,0xc0]
; CHECK-NEXT: retq # encoding: [0xc3]
+;
+; NDD-LABEL: test4:
+; NDD: # %bb.0:
+; NDD-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0]
+; NDD-NEXT: testq %rdi, %rdi # encoding: [0x48,0x85,0xff]
+; NDD-NEXT: setle %al # encoding: [0x0f,0x9e,0xc0]
+; NDD-NEXT: retq # encoding: [0xc3]
%t = icmp slt i64 %x, 1
%r = zext i1 %t to i64
ret i64 %r
@@ -124,6 +176,26 @@ define i32 @test5(double %A) nounwind {
; CHECK-NEXT: jmp foo at PLT # TAILCALL
; CHECK-NEXT: # encoding: [0xeb,A]
; CHECK-NEXT: # fixup A - offset: 1, value: foo at PLT-1, kind: FK_PCRel_1
+;
+; NDD-LABEL: test5:
+; NDD: # %bb.0: # %entry
+; NDD-NEXT: ucomisd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # encoding: [0x66,0x0f,0x2e,0x05,A,A,A,A]
+; NDD-NEXT: # fixup A - offset: 4, value: {{\.?LCPI[0-9]+_[0-9]+}}-4, kind: reloc_riprel_4byte
+; NDD-NEXT: ja .LBB5_3 # encoding: [0x77,A]
+; NDD-NEXT: # fixup A - offset: 1, value: .LBB5_3-1, kind: FK_PCRel_1
+; NDD-NEXT: # %bb.1: # %entry
+; NDD-NEXT: ucomisd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # encoding: [0x66,0x0f,0x2e,0x05,A,A,A,A]
+; NDD-NEXT: # fixup A - offset: 4, value: {{\.?LCPI[0-9]+_[0-9]+}}-4, kind: reloc_riprel_4byte
+; NDD-NEXT: jb .LBB5_3 # encoding: [0x72,A]
+; NDD-NEXT: # fixup A - offset: 1, value: .LBB5_3-1, kind: FK_PCRel_1
+; NDD-NEXT: # %bb.2: # %bb12
+; NDD-NEXT: movl $32, %eax # encoding: [0xb8,0x20,0x00,0x00,0x00]
+; NDD-NEXT: retq # encoding: [0xc3]
+; NDD-NEXT: .LBB5_3: # %bb8
+; NDD-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0]
+; NDD-NEXT: jmp foo at PLT # TAILCALL
+; NDD-NEXT: # encoding: [0xeb,A]
+; NDD-NEXT: # fixup A - offset: 1, value: foo at PLT-1, kind: FK_PCRel_1
entry:
%tmp2 = fcmp ogt double %A, 1.500000e+02
%tmp5 = fcmp ult double %A, 7.500000e+01
@@ -152,6 +224,18 @@ define i32 @test6() nounwind align 2 {
; CHECK-NEXT: .LBB6_1: # %T
; CHECK-NEXT: movl $1, %eax # encoding: [0xb8,0x01,0x00,0x00,0x00]
; CHECK-NEXT: retq # encoding: [0xc3]
+;
+; NDD-LABEL: test6:
+; NDD: # %bb.0: # %entry
+; NDD-NEXT: cmpq $0, -{{[0-9]+}}(%rsp) # encoding: [0x48,0x83,0x7c,0x24,0xf8,0x00]
+; NDD-NEXT: je .LBB6_1 # encoding: [0x74,A]
+; NDD-NEXT: # fixup A - offset: 1, value: .LBB6_1-1, kind: FK_PCRel_1
+; NDD-NEXT: # %bb.2: # %F
+; NDD-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0]
+; NDD-NEXT: retq # encoding: [0xc3]
+; NDD-NEXT: .LBB6_1: # %T
+; NDD-NEXT: movl $1, %eax # encoding: [0xb8,0x01,0x00,0x00,0x00]
+; NDD-NEXT: retq # encoding: [0xc3]
entry:
%A = alloca { i64, i64 }, align 8
%B = getelementptr inbounds { i64, i64 }, ptr %A, i64 0, i32 1
@@ -173,6 +257,13 @@ define i32 @test7(i64 %res) nounwind {
; CHECK-NEXT: shrq $32, %rdi # encoding: [0x48,0xc1,0xef,0x20]
; CHECK-NEXT: sete %al # encoding: [0x0f,0x94,0xc0]
; CHECK-NEXT: retq # encoding: [0xc3]
+;
+; NDD-LABEL: test7:
+; NDD: # %bb.0: # %entry
+; NDD-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0]
+; NDD-NEXT: shrq $32, %rdi, %rcx # encoding: [0x62,0xf4,0xf4,0x18,0xc1,0xef,0x20]
+; NDD-NEXT: sete %al # encoding: [0x0f,0x94,0xc0]
+; NDD-NEXT: retq # encoding: [0xc3]
entry:
%lnot = icmp ult i64 %res, 4294967296
%lnot.ext = zext i1 %lnot to i32
@@ -187,6 +278,14 @@ define i32 @test8(i64 %res) nounwind {
; CHECK-NEXT: cmpl $3, %edi # encoding: [0x83,0xff,0x03]
; CHECK-NEXT: setb %al # encoding: [0x0f,0x92,0xc0]
; CHECK-NEXT: retq # encoding: [0xc3]
+;
+; NDD-LABEL: test8:
+; NDD: # %bb.0:
+; NDD-NEXT: shrq $32, %rdi, %rcx # encoding: [0x62,0xf4,0xf4,0x18,0xc1,0xef,0x20]
+; NDD-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0]
+; NDD-NEXT: cmpl $3, %ecx # encoding: [0x83,0xf9,0x03]
+; NDD-NEXT: setb %al # encoding: [0x0f,0x92,0xc0]
+; NDD-NEXT: retq # encoding: [0xc3]
%lnot = icmp ult i64 %res, 12884901888
%lnot.ext = zext i1 %lnot to i32
ret i32 %lnot.ext
@@ -199,6 +298,13 @@ define i32 @test9(i64 %res) nounwind {
; CHECK-NEXT: shrq $33, %rdi # encoding: [0x48,0xc1,0xef,0x21]
; CHECK-NEXT: sete %al # encoding: [0x0f,0x94,0xc0]
; CHECK-NEXT: retq # encoding: [0xc3]
+;
+; NDD-LABEL: test9:
+; NDD: # %bb.0:
+; NDD-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0]
+; NDD-NEXT: shrq $33, %rdi, %rcx # encoding: [0x62,0xf4,0xf4,0x18,0xc1,0xef,0x21]
+; NDD-NEXT: sete %al # encoding: [0x0f,0x94,0xc0]
+; NDD-NEXT: retq # encoding: [0xc3]
%lnot = icmp ult i64 %res, 8589934592
%lnot.ext = zext i1 %lnot to i32
ret i32 %lnot.ext
@@ -211,6 +317,13 @@ define i32 @test10(i64 %res) nounwind {
; CHECK-NEXT: shrq $32, %rdi # encoding: [0x48,0xc1,0xef,0x20]
; CHECK-NEXT: setne %al # encoding: [0x0f,0x95,0xc0]
; CHECK-NEXT: retq # encoding: [0xc3]
+;
+; NDD-LABEL: test10:
+; NDD: # %bb.0:
+; NDD-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0]
+; NDD-NEXT: shrq $32, %rdi, %rcx # encoding: [0x62,0xf4,0xf4,0x18,0xc1,0xef,0x20]
+; NDD-NEXT: setne %al # encoding: [0x0f,0x95,0xc0]
+; NDD-NEXT: retq # encoding: [0xc3]
%lnot = icmp uge i64 %res, 4294967296
%lnot.ext = zext i1 %lnot to i32
ret i32 %lnot.ext
@@ -224,6 +337,14 @@ define i32 @test11(i64 %l) nounwind {
; CHECK-NEXT: cmpl $1, %edi # encoding: [0x83,0xff,0x01]
; CHECK-NEXT: sete %al # encoding: [0x0f,0x94,0xc0]
; CHECK-NEXT: retq # encoding: [0xc3]
+;
+; NDD-LABEL: test11:
+; NDD: # %bb.0:
+; NDD-NEXT: shrq $47, %rdi, %rcx # encoding: [0x62,0xf4,0xf4,0x18,0xc1,0xef,0x2f]
+; NDD-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0]
+; NDD-NEXT: cmpl $1, %ecx # encoding: [0x83,0xf9,0x01]
+; NDD-NEXT: sete %al # encoding: [0x0f,0x94,0xc0]
+; NDD-NEXT: retq # encoding: [0xc3]
%shr.mask = and i64 %l, -140737488355328
%cmp = icmp eq i64 %shr.mask, 140737488355328
%conv = zext i1 %cmp to i32
@@ -251,6 +372,27 @@ define i32 @test12() ssp uwtable {
; CHECK-NEXT: popq %rcx # encoding: [0x59]
; CHECK-NEXT: .cfi_def_cfa_offset 8
; CHECK-NEXT: retq # encoding: [0xc3]
+;
+; NDD-LABEL: test12:
+; NDD: # %bb.0: # %entry
+; NDD-NEXT: pushq %rax # encoding: [0x50]
+; NDD-NEXT: .cfi_def_cfa_offset 16
+; NDD-NEXT: callq test12b at PLT # encoding: [0xe8,A,A,A,A]
+; NDD-NEXT: # fixup A - offset: 1, value: test12b at PLT-4, kind: FK_PCRel_4
+; NDD-NEXT: testb %al, %al # encoding: [0x84,0xc0]
+; NDD-NEXT: je .LBB12_2 # encoding: [0x74,A]
+; NDD-NEXT: # fixup A - offset: 1, value: .LBB12_2-1, kind: FK_PCRel_1
+; NDD-NEXT: # %bb.1: # %T
+; NDD-NEXT: movl $1, %eax # encoding: [0xb8,0x01,0x00,0x00,0x00]
+; NDD-NEXT: popq %rcx # encoding: [0x59]
+; NDD-NEXT: .cfi_def_cfa_offset 8
+; NDD-NEXT: retq # encoding: [0xc3]
+; NDD-NEXT: .LBB12_2: # %F
+; NDD-NEXT: .cfi_def_cfa_offset 16
+; NDD-NEXT: movl $2, %eax # encoding: [0xb8,0x02,0x00,0x00,0x00]
+; NDD-NEXT: popq %rcx # encoding: [0x59]
+; NDD-NEXT: .cfi_def_cfa_offset 8
+; NDD-NEXT: retq # encoding: [0xc3]
entry:
%tmp1 = call zeroext i1 @test12b()
br i1 %tmp1, label %T, label %F
@@ -271,6 +413,13 @@ define i32 @test13(i32 %mask, i32 %base, i32 %intra) {
; CHECK-NEXT: testb $8, %dil # encoding: [0x40,0xf6,0xc7,0x08]
; CHECK-NEXT: cmovnel %edx, %eax # encoding: [0x0f,0x45,0xc2]
; CHECK-NEXT: retq # encoding: [0xc3]
+;
+; NDD-LABEL: test13:
+; NDD: # %bb.0:
+; NDD-NEXT: movl %esi, %eax # encoding: [0x89,0xf0]
+; NDD-NEXT: testb $8, %dil # encoding: [0x40,0xf6,0xc7,0x08]
+; NDD-NEXT: cmovnel %edx, %eax # encoding: [0x0f,0x45,0xc2]
+; NDD-NEXT: retq # encoding: [0xc3]
%and = and i32 %mask, 8
%tobool = icmp ne i32 %and, 0
%cond = select i1 %tobool, i32 %intra, i32 %base
@@ -284,6 +433,13 @@ define i32 @test14(i32 %mask, i32 %base, i32 %intra) {
; CHECK-NEXT: shrl $7, %edi # encoding: [0xc1,0xef,0x07]
; CHECK-NEXT: cmovnsl %edx, %eax # encoding: [0x0f,0x49,0xc2]
; CHECK-NEXT: retq # encoding: [0xc3]
+;
+; NDD-LABEL: test14:
+; NDD: # %bb.0:
+; NDD-NEXT: movl %esi, %eax # encoding: [0x89,0xf0]
+; NDD-NEXT: shrl $7, %edi, %ecx # encoding: [0x62,0xf4,0x74,0x18,0xc1,0xef,0x07]
+; NDD-NEXT: cmovnsl %edx, %eax # encoding: [0x0f,0x49,0xc2]
+; NDD-NEXT: retq # encoding: [0xc3]
%s = lshr i32 %mask, 7
%tobool = icmp sgt i32 %s, -1
%cond = select i1 %tobool, i32 %intra, i32 %base
@@ -300,6 +456,15 @@ define zeroext i1 @test15(i32 %bf.load, i32 %n) {
; CHECK-NEXT: setae %al # encoding: [0x0f,0x93,0xc0]
; CHECK-NEXT: orb %cl, %al # encoding: [0x08,0xc8]
; CHECK-NEXT: retq # encoding: [0xc3]
+;
+; NDD-LABEL: test15:
+; NDD: # %bb.0:
+; NDD-NEXT: shrl $16, %edi, %eax # encoding: [0x62,0xf4,0x7c,0x18,0xc1,0xef,0x10]
+; NDD-NEXT: sete %cl # encoding: [0x0f,0x94,0xc1]
+; NDD-NEXT: cmpl %esi, %eax # encoding: [0x39,0xf0]
+; NDD-NEXT: setae %al # encoding: [0x0f,0x93,0xc0]
+; NDD-NEXT: orb %cl, %al # EVEX TO LEGACY Compression encoding: [0x08,0xc8]
+; NDD-NEXT: retq # encoding: [0xc3]
%bf.lshr = lshr i32 %bf.load, 16
%cmp2 = icmp eq i32 %bf.lshr, 0
%cmp5 = icmp uge i32 %bf.lshr, %n
@@ -313,6 +478,12 @@ define i8 @signbit_i16(i16 signext %L) {
; CHECK-NEXT: testw %di, %di # encoding: [0x66,0x85,0xff]
; CHECK-NEXT: setns %al # encoding: [0x0f,0x99,0xc0]
; CHECK-NEXT: retq # encoding: [0xc3]
+;
+; NDD-LABEL: signbit_i16:
+; NDD: # %bb.0:
+; NDD-NEXT: testw %di, %di # encoding: [0x66,0x85,0xff]
+; NDD-NEXT: setns %al # encoding: [0x0f,0x99,0xc0]
+; NDD-NEXT: retq # encoding: [0xc3]
%lshr = lshr i16 %L, 15
%trunc = trunc i16 %lshr to i8
%not = xor i8 %trunc, 1
@@ -325,6 +496,12 @@ define i8 @signbit_i32(i32 %L) {
; CHECK-NEXT: testl %edi, %edi # encoding: [0x85,0xff]
; CHECK-NEXT: setns %al # encoding: [0x0f,0x99,0xc0]
; CHECK-NEXT: retq # encoding: [0xc3]
+;
+; NDD-LABEL: signbit_i32:
+; NDD: # %bb.0:
+; NDD-NEXT: testl %edi, %edi # encoding: [0x85,0xff]
+; NDD-NEXT: setns %al # encoding: [0x0f,0x99,0xc0]
+; NDD-NEXT: retq # encoding: [0xc3]
%lshr = lshr i32 %L, 31
%trunc = trunc i32 %lshr to i8
%not = xor i8 %trunc, 1
@@ -337,6 +514,12 @@ define i8 @signbit_i64(i64 %L) {
; CHECK-NEXT: testq %rdi, %rdi # encoding: [0x48,0x85,0xff]
; CHECK-NEXT: setns %al # encoding: [0x0f,0x99,0xc0]
; CHECK-NEXT: retq # encoding: [0xc3]
+;
+; NDD-LABEL: signbit_i64:
+; NDD: # %bb.0:
+; NDD-NEXT: testq %rdi, %rdi # encoding: [0x48,0x85,0xff]
+; NDD-NEXT: setns %al # encoding: [0x0f,0x99,0xc0]
+; NDD-NEXT: retq # encoding: [0xc3]
%lshr = lshr i64 %L, 63
%trunc = trunc i64 %lshr to i8
%not = xor i8 %trunc, 1
@@ -349,6 +532,12 @@ define zeroext i1 @signbit_i32_i1(i32 %L) {
; CHECK-NEXT: testl %edi, %edi # encoding: [0x85,0xff]
; CHECK-NEXT: setns %al # encoding: [0x0f,0x99,0xc0]
; CHECK-NEXT: retq # encoding: [0xc3]
+;
+; NDD-LABEL: signbit_i32_i1:
+; NDD: # %bb.0:
+; NDD-NEXT: testl %edi, %edi # encoding: [0x85,0xff]
+; NDD-NEXT: setns %al # encoding: [0x0f,0x99,0xc0]
+; NDD-NEXT: retq # encoding: [0xc3]
%lshr = lshr i32 %L, 31
%trunc = trunc i32 %lshr to i1
%not = xor i1 %trunc, true
@@ -371,6 +560,21 @@ define void @test20(i32 %bf.load, i8 %x1, ptr %b_addr) {
; CHECK-NEXT: setne d(%rip) # encoding: [0x0f,0x95,0x05,A,A,A,A]
; CHECK-NEXT: # fixup A - offset: 3, value: d-4, kind: reloc_riprel_4byte
; CHECK-NEXT: retq # encoding: [0xc3]
+;
+; NDD-LABEL: test20:
+; NDD: # %bb.0:
+; NDD-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0]
+; NDD-NEXT: testl $16777215, %edi # encoding: [0xf7,0xc7,0xff,0xff,0xff,0x00]
+; NDD-NEXT: # imm = 0xFFFFFF
+; NDD-NEXT: setne %al # encoding: [0x0f,0x95,0xc0]
+; NDD-NEXT: movzbl %sil, %ecx # encoding: [0x40,0x0f,0xb6,0xce]
+; NDD-NEXT: addl %ecx, %eax # EVEX TO LEGACY Compression encoding: [0x01,0xc8]
+; NDD-NEXT: setne (%rdx) # encoding: [0x0f,0x95,0x02]
+; NDD-NEXT: testl $16777215, %edi # encoding: [0xf7,0xc7,0xff,0xff,0xff,0x00]
+; NDD-NEXT: # imm = 0xFFFFFF
+; NDD-NEXT: setne d(%rip) # encoding: [0x0f,0x95,0x05,A,A,A,A]
+; NDD-NEXT: # fixup A - offset: 3, value: d-4, kind: reloc_riprel_4byte
+; NDD-NEXT: retq # encoding: [0xc3]
%bf.shl = shl i32 %bf.load, 8
%bf.ashr = ashr exact i32 %bf.shl, 8
%tobool4 = icmp ne i32 %bf.ashr, 0
@@ -391,6 +595,11 @@ define i32 @highmask_i64_simplify(i64 %val) {
; CHECK: # %bb.0:
; CHECK-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0]
; CHECK-NEXT: retq # encoding: [0xc3]
+;
+; NDD-LABEL: highmask_i64_simplify:
+; NDD: # %bb.0:
+; NDD-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0]
+; NDD-NEXT: retq # encoding: [0xc3]
%and = and i64 %val, -2199023255552
%cmp = icmp ult i64 %and, 0
%ret = zext i1 %cmp to i32
@@ -404,6 +613,13 @@ define i32 @highmask_i64_mask64(i64 %val) {
; CHECK-NEXT: shrq $41, %rdi # encoding: [0x48,0xc1,0xef,0x29]
; CHECK-NEXT: setne %al # encoding: [0x0f,0x95,0xc0]
; CHECK-NEXT: retq # encoding: [0xc3]
+;
+; NDD-LABEL: highmask_i64_mask64:
+; NDD: # %bb.0:
+; NDD-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0]
+; NDD-NEXT: shrq $41, %rdi, %rcx # encoding: [0x62,0xf4,0xf4,0x18,0xc1,0xef,0x29]
+; NDD-NEXT: setne %al # encoding: [0x0f,0x95,0xc0]
+; NDD-NEXT: retq # encoding: [0xc3]
%and = and i64 %val, -2199023255552
%cmp = icmp ne i64 %and, 0
%ret = zext i1 %cmp to i32
@@ -419,6 +635,14 @@ define i64 @highmask_i64_mask64_extra_use(i64 %val) nounwind {
; CHECK-NEXT: setne %al # encoding: [0x0f,0x95,0xc0]
; CHECK-NEXT: imulq %rdi, %rax # encoding: [0x48,0x0f,0xaf,0xc7]
; CHECK-NEXT: retq # encoding: [0xc3]
+;
+; NDD-LABEL: highmask_i64_mask64_extra_use:
+; NDD: # %bb.0:
+; NDD-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0]
+; NDD-NEXT: shrq $41, %rdi, %rcx # encoding: [0x62,0xf4,0xf4,0x18,0xc1,0xef,0x29]
+; NDD-NEXT: setne %al # encoding: [0x0f,0x95,0xc0]
+; NDD-NEXT: imulq %rdi, %rax # EVEX TO LEGACY Compression encoding: [0x48,0x0f,0xaf,0xc7]
+; NDD-NEXT: retq # encoding: [0xc3]
%and = and i64 %val, -2199023255552
%cmp = icmp ne i64 %and, 0
%z = zext i1 %cmp to i64
@@ -433,6 +657,13 @@ define i32 @highmask_i64_mask32(i64 %val) {
; CHECK-NEXT: shrq $20, %rdi # encoding: [0x48,0xc1,0xef,0x14]
; CHECK-NEXT: sete %al # encoding: [0x0f,0x94,0xc0]
; CHECK-NEXT: retq # encoding: [0xc3]
+;
+; NDD-LABEL: highmask_i64_mask32:
+; NDD: # %bb.0:
+; NDD-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0]
+; NDD-NEXT: shrq $20, %rdi, %rcx # encoding: [0x62,0xf4,0xf4,0x18,0xc1,0xef,0x14]
+; NDD-NEXT: sete %al # encoding: [0x0f,0x94,0xc0]
+; NDD-NEXT: retq # encoding: [0xc3]
%and = and i64 %val, -1048576
%cmp = icmp eq i64 %and, 0
%ret = zext i1 %cmp to i32
@@ -448,6 +679,15 @@ define i64 @highmask_i64_mask32_extra_use(i64 %val) nounwind {
; CHECK-NEXT: sete %al # encoding: [0x0f,0x94,0xc0]
; CHECK-NEXT: imulq %rdi, %rax # encoding: [0x48,0x0f,0xaf,0xc7]
; CHECK-NEXT: retq # encoding: [0xc3]
+;
+; NDD-LABEL: highmask_i64_mask32_extra_use:
+; NDD: # %bb.0:
+; NDD-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0]
+; NDD-NEXT: testq $-1048576, %rdi # encoding: [0x48,0xf7,0xc7,0x00,0x00,0xf0,0xff]
+; NDD-NEXT: # imm = 0xFFF00000
+; NDD-NEXT: sete %al # encoding: [0x0f,0x94,0xc0]
+; NDD-NEXT: imulq %rdi, %rax # EVEX TO LEGACY Compression encoding: [0x48,0x0f,0xaf,0xc7]
+; NDD-NEXT: retq # encoding: [0xc3]
%and = and i64 %val, -1048576
%cmp = icmp eq i64 %and, 0
%z = zext i1 %cmp to i64
@@ -462,6 +702,13 @@ define i32 @highmask_i64_mask8(i64 %val) {
; CHECK-NEXT: testq $-16, %rdi # encoding: [0x48,0xf7,0xc7,0xf0,0xff,0xff,0xff]
; CHECK-NEXT: setne %al # encoding: [0x0f,0x95,0xc0]
; CHECK-NEXT: retq # encoding: [0xc3]
+;
+; NDD-LABEL: highmask_i64_mask8:
+; NDD: # %bb.0:
+; NDD-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0]
+; NDD-NEXT: testq $-16, %rdi # encoding: [0x48,0xf7,0xc7,0xf0,0xff,0xff,0xff]
+; NDD-NEXT: setne %al # encoding: [0x0f,0x95,0xc0]
+; NDD-NEXT: retq # encoding: [0xc3]
%and = and i64 %val, -16
%cmp = icmp ne i64 %and, 0
%ret = zext i1 %cmp to i32
@@ -475,6 +722,13 @@ define i32 @lowmask_i64_mask64(i64 %val) {
; CHECK-NEXT: shlq $16, %rdi # encoding: [0x48,0xc1,0xe7,0x10]
; CHECK-NEXT: sete %al # encoding: [0x0f,0x94,0xc0]
; CHECK-NEXT: retq # encoding: [0xc3]
+;
+; NDD-LABEL: lowmask_i64_mask64:
+; NDD: # %bb.0:
+; NDD-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0]
+; NDD-NEXT: shlq $16, %rdi, %rcx # encoding: [0x62,0xf4,0xf4,0x18,0xc1,0xe7,0x10]
+; NDD-NEXT: sete %al # encoding: [0x0f,0x94,0xc0]
+; NDD-NEXT: retq # encoding: [0xc3]
%and = and i64 %val, 281474976710655
%cmp = icmp eq i64 %and, 0
%ret = zext i1 %cmp to i32
@@ -490,6 +744,14 @@ define i64 @lowmask_i64_mask64_extra_use(i64 %val) nounwind {
; CHECK-NEXT: sete %al # encoding: [0x0f,0x94,0xc0]
; CHECK-NEXT: imulq %rdi, %rax # encoding: [0x48,0x0f,0xaf,0xc7]
; CHECK-NEXT: retq # encoding: [0xc3]
+;
+; NDD-LABEL: lowmask_i64_mask64_extra_use:
+; NDD: # %bb.0:
+; NDD-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0]
+; NDD-NEXT: shlq $16, %rdi, %rcx # encoding: [0x62,0xf4,0xf4,0x18,0xc1,0xe7,0x10]
+; NDD-NEXT: sete %al # encoding: [0x0f,0x94,0xc0]
+; NDD-NEXT: imulq %rdi, %rax # EVEX TO LEGACY Compression encoding: [0x48,0x0f,0xaf,0xc7]
+; NDD-NEXT: retq # encoding: [0xc3]
%and = and i64 %val, 281474976710655
%cmp = icmp eq i64 %and, 0
%z = zext i1 %cmp to i64
@@ -504,6 +766,13 @@ define i32 @lowmask_i64_mask32(i64 %val) {
; CHECK-NEXT: shlq $44, %rdi # encoding: [0x48,0xc1,0xe7,0x2c]
; CHECK-NEXT: setne %al # encoding: [0x0f,0x95,0xc0]
; CHECK-NEXT: retq # encoding: [0xc3]
+;
+; NDD-LABEL: lowmask_i64_mask32:
+; NDD: # %bb.0:
+; NDD-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0]
+; NDD-NEXT: shlq $44, %rdi, %rcx # encoding: [0x62,0xf4,0xf4,0x18,0xc1,0xe7,0x2c]
+; NDD-NEXT: setne %al # encoding: [0x0f,0x95,0xc0]
+; NDD-NEXT: retq # encoding: [0xc3]
%and = and i64 %val, 1048575
%cmp = icmp ne i64 %and, 0
%ret = zext i1 %cmp to i32
@@ -519,6 +788,15 @@ define i64 @lowmask_i64_mask32_extra_use(i64 %val) nounwind {
; CHECK-NEXT: setne %al # encoding: [0x0f,0x95,0xc0]
; CHECK-NEXT: imulq %rdi, %rax # encoding: [0x48,0x0f,0xaf,0xc7]
; CHECK-NEXT: retq # encoding: [0xc3]
+;
+; NDD-LABEL: lowmask_i64_mask32_extra_use:
+; NDD: # %bb.0:
+; NDD-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0]
+; NDD-NEXT: testl $1048575, %edi # encoding: [0xf7,0xc7,0xff,0xff,0x0f,0x00]
+; NDD-NEXT: # imm = 0xFFFFF
+; NDD-NEXT: setne %al # encoding: [0x0f,0x95,0xc0]
+; NDD-NEXT: imulq %rdi, %rax # EVEX TO LEGACY Compression encoding: [0x48,0x0f,0xaf,0xc7]
+; NDD-NEXT: retq # encoding: [0xc3]
%and = and i64 %val, 1048575
%cmp = icmp ne i64 %and, 0
%z = zext i1 %cmp to i64
@@ -533,6 +811,13 @@ define i32 @lowmask_i64_mask8(i64 %val) {
; CHECK-NEXT: testb $31, %dil # encoding: [0x40,0xf6,0xc7,0x1f]
; CHECK-NEXT: sete %al # encoding: [0x0f,0x94,0xc0]
; CHECK-NEXT: retq # encoding: [0xc3]
+;
+; NDD-LABEL: lowmask_i64_mask8:
+; NDD: # %bb.0:
+; NDD-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0]
+; NDD-NEXT: testb $31, %dil # encoding: [0x40,0xf6,0xc7,0x1f]
+; NDD-NEXT: sete %al # encoding: [0x0f,0x94,0xc0]
+; NDD-NEXT: retq # encoding: [0xc3]
%and = and i64 %val, 31
%cmp = icmp eq i64 %and, 0
%ret = zext i1 %cmp to i32
@@ -547,6 +832,14 @@ define i32 @highmask_i32_mask32(i32 %val) {
; CHECK-NEXT: # imm = 0xFFF00000
; CHECK-NEXT: setne %al # encoding: [0x0f,0x95,0xc0]
; CHECK-NEXT: retq # encoding: [0xc3]
+;
+; NDD-LABEL: highmask_i32_mask32:
+; NDD: # %bb.0:
+; NDD-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0]
+; NDD-NEXT: testl $-1048576, %edi # encoding: [0xf7,0xc7,0x00,0x00,0xf0,0xff]
+; NDD-NEXT: # imm = 0xFFF00000
+; NDD-NEXT: setne %al # encoding: [0x0f,0x95,0xc0]
+; NDD-NEXT: retq # encoding: [0xc3]
%and = and i32 %val, -1048576
%cmp = icmp ne i32 %and, 0
%ret = zext i1 %cmp to i32
@@ -560,6 +853,13 @@ define i32 @highmask_i32_mask8(i32 %val) {
; CHECK-NEXT: testl $-16, %edi # encoding: [0xf7,0xc7,0xf0,0xff,0xff,0xff]
; CHECK-NEXT: sete %al # encoding: [0x0f,0x94,0xc0]
; CHECK-NEXT: retq # encoding: [0xc3]
+;
+; NDD-LABEL: highmask_i32_mask8:
+; NDD: # %bb.0:
+; NDD-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0]
+; NDD-NEXT: testl $-16, %edi # encoding: [0xf7,0xc7,0xf0,0xff,0xff,0xff]
+; NDD-NEXT: sete %al # encoding: [0x0f,0x94,0xc0]
+; NDD-NEXT: retq # encoding: [0xc3]
%and = and i32 %val, -16
%cmp = icmp eq i32 %and, 0
%ret = zext i1 %cmp to i32
@@ -574,6 +874,14 @@ define i32 @lowmask_i32_mask32(i32 %val) {
; CHECK-NEXT: # imm = 0xFFFFF
; CHECK-NEXT: sete %al # encoding: [0x0f,0x94,0xc0]
; CHECK-NEXT: retq # encoding: [0xc3]
+;
+; NDD-LABEL: lowmask_i32_mask32:
+; NDD: # %bb.0:
+; NDD-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0]
+; NDD-NEXT: testl $1048575, %edi # encoding: [0xf7,0xc7,0xff,0xff,0x0f,0x00]
+; NDD-NEXT: # imm = 0xFFFFF
+; NDD-NEXT: sete %al # encoding: [0x0f,0x94,0xc0]
+; NDD-NEXT: retq # encoding: [0xc3]
%and = and i32 %val, 1048575
%cmp = icmp eq i32 %and, 0
%ret = zext i1 %cmp to i32
@@ -587,6 +895,13 @@ define i32 @lowmask_i32_mask8(i32 %val) {
; CHECK-NEXT: testb $31, %dil # encoding: [0x40,0xf6,0xc7,0x1f]
; CHECK-NEXT: setne %al # encoding: [0x0f,0x95,0xc0]
; CHECK-NEXT: retq # encoding: [0xc3]
+;
+; NDD-LABEL: lowmask_i32_mask8:
+; NDD: # %bb.0:
+; NDD-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0]
+; NDD-NEXT: testb $31, %dil # encoding: [0x40,0xf6,0xc7,0x1f]
+; NDD-NEXT: setne %al # encoding: [0x0f,0x95,0xc0]
+; NDD-NEXT: retq # encoding: [0xc3]
%and = and i32 %val, 31
%cmp = icmp ne i32 %and, 0
%ret = zext i1 %cmp to i32
@@ -600,6 +915,13 @@ define i1 @shifted_mask64_testb(i64 %a) {
; CHECK-NEXT: testb %dil, %dil # encoding: [0x40,0x84,0xff]
; CHECK-NEXT: setne %al # encoding: [0x0f,0x95,0xc0]
; CHECK-NEXT: retq # encoding: [0xc3]
+;
+; NDD-LABEL: shifted_mask64_testb:
+; NDD: # %bb.0:
+; NDD-NEXT: shrq $50, %rdi, %rax # encoding: [0x62,0xf4,0xfc,0x18,0xc1,0xef,0x32]
+; NDD-NEXT: testb %al, %al # encoding: [0x84,0xc0]
+; NDD-NEXT: setne %al # encoding: [0x0f,0x95,0xc0]
+; NDD-NEXT: retq # encoding: [0xc3]
%v0 = and i64 %a, 287104476244869120 ; 0xff << 50
%v1 = icmp ne i64 %v0, 0
ret i1 %v1
@@ -612,6 +934,13 @@ define i1 @shifted_mask64_testw(i64 %a) {
; CHECK-NEXT: testw %di, %di # encoding: [0x66,0x85,0xff]
; CHECK-NEXT: setne %al # encoding: [0x0f,0x95,0xc0]
; CHECK-NEXT: retq # encoding: [0xc3]
+;
+; NDD-LABEL: shifted_mask64_testw:
+; NDD: # %bb.0:
+; NDD-NEXT: shrq $33, %rdi, %rax # encoding: [0x62,0xf4,0xfc,0x18,0xc1,0xef,0x21]
+; NDD-NEXT: testw %ax, %ax # encoding: [0x66,0x85,0xc0]
+; NDD-NEXT: setne %al # encoding: [0x0f,0x95,0xc0]
+; NDD-NEXT: retq # encoding: [0xc3]
%v0 = and i64 %a, 562941363486720 ; 0xffff << 33
%v1 = icmp ne i64 %v0, 0
ret i1 %v1
@@ -624,6 +953,13 @@ define i1 @shifted_mask64_testl(i64 %a) {
; CHECK-NEXT: testl %edi, %edi # encoding: [0x85,0xff]
; CHECK-NEXT: sete %al # encoding: [0x0f,0x94,0xc0]
; CHECK-NEXT: retq # encoding: [0xc3]
+;
+; NDD-LABEL: shifted_mask64_testl:
+; NDD: # %bb.0:
+; NDD-NEXT: shrq $7, %rdi, %rax # encoding: [0x62,0xf4,0xfc,0x18,0xc1,0xef,0x07]
+; NDD-NEXT: testl %eax, %eax # encoding: [0x85,0xc0]
+; NDD-NEXT: sete %al # encoding: [0x0f,0x94,0xc0]
+; NDD-NEXT: retq # encoding: [0xc3]
%v0 = and i64 %a, 549755813760 ; 0xffffffff << 7
%v1 = icmp eq i64 %v0, 0
ret i1 %v1
@@ -639,6 +975,16 @@ define i1 @shifted_mask64_extra_use_const(i64 %a) {
; CHECK-NEXT: movq %rcx, d64(%rip) # encoding: [0x48,0x89,0x0d,A,A,A,A]
; CHECK-NEXT: # fixup A - offset: 3, value: d64-4, kind: reloc_riprel_4byte
; CHECK-NEXT: retq # encoding: [0xc3]
+;
+; NDD-LABEL: shifted_mask64_extra_use_const:
+; NDD: # %bb.0:
+; NDD-NEXT: movabsq $287104476244869120, %rcx # encoding: [0x48,0xb9,0x00,0x00,0x00,0x00,0x00,0x00,0xfc,0x03]
+; NDD-NEXT: # imm = 0x3FC000000000000
+; NDD-NEXT: andq %rcx, %rdi, %rax # encoding: [0x62,0xf4,0xfc,0x18,0x21,0xcf]
+; NDD-NEXT: setne %al # encoding: [0x0f,0x95,0xc0]
+; NDD-NEXT: movq %rcx, d64(%rip) # encoding: [0x48,0x89,0x0d,A,A,A,A]
+; NDD-NEXT: # fixup A - offset: 3, value: d64-4, kind: reloc_riprel_4byte
+; NDD-NEXT: retq # encoding: [0xc3]
%v0 = and i64 %a, 287104476244869120 ; 0xff << 50
%v1 = icmp ne i64 %v0, 0
store i64 287104476244869120, ptr @d64
@@ -655,6 +1001,16 @@ define i1 @shifted_mask64_extra_use_and(i64 %a) {
; CHECK-NEXT: movq %rcx, d64(%rip) # encoding: [0x48,0x89,0x0d,A,A,A,A]
; CHECK-NEXT: # fixup A - offset: 3, value: d64-4, kind: reloc_riprel_4byte
; CHECK-NEXT: retq # encoding: [0xc3]
+;
+; NDD-LABEL: shifted_mask64_extra_use_and:
+; NDD: # %bb.0:
+; NDD-NEXT: movabsq $287104476244869120, %rax # encoding: [0x48,0xb8,0x00,0x00,0x00,0x00,0x00,0x00,0xfc,0x03]
+; NDD-NEXT: # imm = 0x3FC000000000000
+; NDD-NEXT: andq %rax, %rdi, %rcx # encoding: [0x62,0xf4,0xf4,0x18,0x21,0xc7]
+; NDD-NEXT: setne %al # encoding: [0x0f,0x95,0xc0]
+; NDD-NEXT: movq %rcx, d64(%rip) # encoding: [0x48,0x89,0x0d,A,A,A,A]
+; NDD-NEXT: # fixup A - offset: 3, value: d64-4, kind: reloc_riprel_4byte
+; NDD-NEXT: retq # encoding: [0xc3]
%v0 = and i64 %a, 287104476244869120 ; 0xff << 50
%v1 = icmp ne i64 %v0, 0
store i64 %v0, ptr @d64
@@ -668,6 +1024,13 @@ define i1 @shifted_mask32_testl_immediate(i64 %a) {
; CHECK-NEXT: # imm = 0x3FC0000
; CHECK-NEXT: setne %al # encoding: [0x0f,0x95,0xc0]
; CHECK-NEXT: retq # encoding: [0xc3]
+;
+; NDD-LABEL: shifted_mask32_testl_immediate:
+; NDD: # %bb.0:
+; NDD-NEXT: testl $66846720, %edi # encoding: [0xf7,0xc7,0x00,0x00,0xfc,0x03]
+; NDD-NEXT: # imm = 0x3FC0000
+; NDD-NEXT: setne %al # encoding: [0x0f,0x95,0xc0]
+; NDD-NEXT: retq # encoding: [0xc3]
%v0 = and i64 %a, 66846720 ; 0xff << 18
%v1 = icmp ne i64 %v0, 0
ret i1 %v1
@@ -683,6 +1046,16 @@ define i1 @shifted_mask32_extra_use_const(i64 %a) {
; CHECK-NEXT: # fixup A - offset: 3, value: d64-8, kind: reloc_riprel_4byte
; CHECK-NEXT: # imm = 0x3FC0000
; CHECK-NEXT: retq # encoding: [0xc3]
+;
+; NDD-LABEL: shifted_mask32_extra_use_const:
+; NDD: # %bb.0:
+; NDD-NEXT: testl $66846720, %edi # encoding: [0xf7,0xc7,0x00,0x00,0xfc,0x03]
+; NDD-NEXT: # imm = 0x3FC0000
+; NDD-NEXT: setne %al # encoding: [0x0f,0x95,0xc0]
+; NDD-NEXT: movq $66846720, d64(%rip) # encoding: [0x48,0xc7,0x05,A,A,A,A,0x00,0x00,0xfc,0x03]
+; NDD-NEXT: # fixup A - offset: 3, value: d64-8, kind: reloc_riprel_4byte
+; NDD-NEXT: # imm = 0x3FC0000
+; NDD-NEXT: retq # encoding: [0xc3]
%v0 = and i64 %a, 66846720 ; 0xff << 18
%v1 = icmp ne i64 %v0, 0
store i64 66846720, ptr @d64
@@ -698,6 +1071,15 @@ define i1 @shifted_mask32_extra_use_and(i64 %a) {
; CHECK-NEXT: movq %rdi, d64(%rip) # encoding: [0x48,0x89,0x3d,A,A,A,A]
; CHECK-NEXT: # fixup A - offset: 3, value: d64-4, kind: reloc_riprel_4byte
; CHECK-NEXT: retq # encoding: [0xc3]
+;
+; NDD-LABEL: shifted_mask32_extra_use_and:
+; NDD: # %bb.0:
+; NDD-NEXT: andq $66846720, %rdi, %rcx # encoding: [0x62,0xf4,0xf4,0x18,0x81,0xe7,0x00,0x00,0xfc,0x03]
+; NDD-NEXT: # imm = 0x3FC0000
+; NDD-NEXT: setne %al # encoding: [0x0f,0x95,0xc0]
+; NDD-NEXT: movq %rcx, d64(%rip) # encoding: [0x48,0x89,0x0d,A,A,A,A]
+; NDD-NEXT: # fixup A - offset: 3, value: d64-4, kind: reloc_riprel_4byte
+; NDD-NEXT: retq # encoding: [0xc3]
%v0 = and i64 %a, 66846720 ; 0xff << 50
%v1 = icmp ne i64 %v0, 0
store i64 %v0, ptr @d64
@@ -713,6 +1095,15 @@ define { i64, i64 } @pr39968(i64, i64, i32) {
; CHECK-NEXT: cmovneq %rdi, %rax # encoding: [0x48,0x0f,0x45,0xc7]
; CHECK-NEXT: movq %rsi, %rdx # encoding: [0x48,0x89,0xf2]
; CHECK-NEXT: retq # encoding: [0xc3]
+;
+; NDD-LABEL: pr39968:
+; NDD: # %bb.0:
+; NDD-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0]
+; NDD-NEXT: testb $64, %dl # encoding: [0xf6,0xc2,0x40]
+; NDD-NEXT: cmovneq %rdi, %rsi # encoding: [0x48,0x0f,0x45,0xf7]
+; NDD-NEXT: cmovneq %rdi, %rax # encoding: [0x48,0x0f,0x45,0xc7]
+; NDD-NEXT: movq %rsi, %rdx # encoding: [0x48,0x89,0xf2]
+; NDD-NEXT: retq # encoding: [0xc3]
%4 = and i32 %2, 64
%5 = icmp ne i32 %4, 0
%6 = select i1 %5, i64 %0, i64 %1
@@ -736,6 +1127,18 @@ define i32 @pr42189(i16 signext %c) {
; CHECK-NEXT: jmp g at PLT # TAILCALL
; CHECK-NEXT: # encoding: [0xeb,A]
; CHECK-NEXT: # fixup A - offset: 1, value: g at PLT-1, kind: FK_PCRel_1
+;
+; NDD-LABEL: pr42189:
+; NDD: # %bb.0: # %entry
+; NDD-NEXT: cmpl $32767, %edi # encoding: [0x81,0xff,0xff,0x7f,0x00,0x00]
+; NDD-NEXT: # imm = 0x7FFF
+; NDD-NEXT: jne f at PLT # TAILCALL
+; NDD-NEXT: # encoding: [0x75,A]
+; NDD-NEXT: # fixup A - offset: 1, value: f at PLT-1, kind: FK_PCRel_1
+; NDD-NEXT: # %bb.1: # %if.then
+; NDD-NEXT: jmp g at PLT # TAILCALL
+; NDD-NEXT: # encoding: [0xeb,A]
+; NDD-NEXT: # fixup A - offset: 1, value: g at PLT-1, kind: FK_PCRel_1
entry:
%cmp = icmp eq i16 %c, 32767
br i1 %cmp, label %if.then, label %if.end
@@ -766,6 +1169,13 @@ define i1 @fold_test_and_with_chain(i32* %x, i32* %y, i32 %z) {
; CHECK-NEXT: sete %al # encoding: [0x0f,0x94,0xc0]
; CHECK-NEXT: movl %edx, (%rsi) # encoding: [0x89,0x16]
; CHECK-NEXT: retq # encoding: [0xc3]
+;
+; NDD-LABEL: fold_test_and_with_chain:
+; NDD: # %bb.0:
+; NDD-NEXT: andl (%rdi), %edx, %eax # encoding: [0x62,0xf4,0x7c,0x18,0x23,0x17]
+; NDD-NEXT: sete %al # encoding: [0x0f,0x94,0xc0]
+; NDD-NEXT: movl %edx, (%rsi) # encoding: [0x89,0x16]
+; NDD-NEXT: retq # encoding: [0xc3]
%a = load i32, i32* %x
%b = and i32 %z, %a
%c = icmp eq i32 %b, 0
diff --git a/llvm/test/CodeGen/X86/popcnt.ll b/llvm/test/CodeGen/X86/popcnt.ll
index a0879ad930a302d..2d780370a110bce 100644
--- a/llvm/test/CodeGen/X86/popcnt.ll
+++ b/llvm/test/CodeGen/X86/popcnt.ll
@@ -3,6 +3,7 @@
; RUN: llc < %s -mtriple=x86_64-unknown | FileCheck %s --check-prefix=X64
; RUN: llc < %s -mtriple=i686-unknown -mattr=+popcnt | FileCheck %s --check-prefix=X86-POPCNT
; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+popcnt | FileCheck %s --check-prefix=X64-POPCNT
+; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+ndd | FileCheck %s --check-prefix=X64-NDD
; RUN: llc < %s -mtriple=i686-unknown -mattr=sse2 | FileCheck %s --check-prefixes=X86,X86-SSE2
; RUN: llc < %s -mtriple=i686-unknown -mattr=ssse3 | FileCheck %s --check-prefixes=X86,X86-SSSE3
@@ -55,6 +56,20 @@ define i8 @cnt8(i8 %x) nounwind readnone {
; X64-POPCNT-NEXT: popcntl %eax, %eax
; X64-POPCNT-NEXT: # kill: def $al killed $al killed $eax
; X64-POPCNT-NEXT: retq
+;
+; X64-NDD-LABEL: cnt8:
+; X64-NDD: # %bb.0:
+; X64-NDD-NEXT: shrb $1, %dil, %al
+; X64-NDD-NEXT: andb $85, %al
+; X64-NDD-NEXT: subb %al, %dil, %al
+; X64-NDD-NEXT: andb $51, %al, %cl
+; X64-NDD-NEXT: shrb $2, %al
+; X64-NDD-NEXT: andb $51, %al
+; X64-NDD-NEXT: addb %cl, %al
+; X64-NDD-NEXT: shrb $4, %al, %cl
+; X64-NDD-NEXT: addb %cl, %al
+; X64-NDD-NEXT: andb $15, %al
+; X64-NDD-NEXT: retq
%cnt = tail call i8 @llvm.ctpop.i8(i8 %x)
ret i8 %cnt
}
@@ -118,6 +133,24 @@ define i16 @cnt16(i16 %x) nounwind readnone {
; X64-POPCNT-NEXT: popcntl %eax, %eax
; X64-POPCNT-NEXT: # kill: def $ax killed $ax killed $eax
; X64-POPCNT-NEXT: retq
+;
+; X64-NDD-LABEL: cnt16:
+; X64-NDD: # %bb.0:
+; X64-NDD-NEXT: shrl $1, %edi, %eax
+; X64-NDD-NEXT: andl $21845, %eax # imm = 0x5555
+; X64-NDD-NEXT: subl %eax, %edi, %eax
+; X64-NDD-NEXT: andl $13107, %eax, %ecx # imm = 0x3333
+; X64-NDD-NEXT: shrl $2, %eax
+; X64-NDD-NEXT: andl $13107, %eax # imm = 0x3333
+; X64-NDD-NEXT: addl %ecx, %eax
+; X64-NDD-NEXT: shrl $4, %eax, %ecx
+; X64-NDD-NEXT: addl %ecx, %eax
+; X64-NDD-NEXT: andl $3855, %eax # imm = 0xF0F
+; X64-NDD-NEXT: shrl $8, %eax, %ecx
+; X64-NDD-NEXT: addl %ecx, %eax
+; X64-NDD-NEXT: movzbl %al, %eax
+; X64-NDD-NEXT: # kill: def $ax killed $ax killed $eax
+; X64-NDD-NEXT: retq
%cnt = tail call i16 @llvm.ctpop.i16(i16 %x)
ret i16 %cnt
}
@@ -171,6 +204,22 @@ define i32 @cnt32(i32 %x) nounwind readnone {
; X64-POPCNT: # %bb.0:
; X64-POPCNT-NEXT: popcntl %edi, %eax
; X64-POPCNT-NEXT: retq
+;
+; X64-NDD-LABEL: cnt32:
+; X64-NDD: # %bb.0:
+; X64-NDD-NEXT: shrl $1, %edi, %eax
+; X64-NDD-NEXT: andl $1431655765, %eax # imm = 0x55555555
+; X64-NDD-NEXT: subl %eax, %edi, %eax
+; X64-NDD-NEXT: andl $858993459, %eax, %ecx # imm = 0x33333333
+; X64-NDD-NEXT: shrl $2, %eax
+; X64-NDD-NEXT: andl $858993459, %eax # imm = 0x33333333
+; X64-NDD-NEXT: addl %ecx, %eax
+; X64-NDD-NEXT: shrl $4, %eax, %ecx
+; X64-NDD-NEXT: addl %ecx, %eax
+; X64-NDD-NEXT: andl $252645135, %eax # imm = 0xF0F0F0F
+; X64-NDD-NEXT: imull $16843009, %eax, %eax # imm = 0x1010101
+; X64-NDD-NEXT: shrl $24, %eax
+; X64-NDD-NEXT: retq
%cnt = tail call i32 @llvm.ctpop.i32(i32 %x)
ret i32 %cnt
}
@@ -250,6 +299,26 @@ define i64 @cnt64(i64 %x) nounwind readnone {
; X64-POPCNT-NEXT: popcntq %rdi, %rax
; X64-POPCNT-NEXT: retq
;
+; X64-NDD-LABEL: cnt64:
+; X64-NDD: # %bb.0:
+; X64-NDD-NEXT: shrq $1, %rdi, %rax
+; X64-NDD-NEXT: movabsq $6148914691236517205, %rcx # imm = 0x5555555555555555
+; X64-NDD-NEXT: andq %rcx, %rax
+; X64-NDD-NEXT: subq %rax, %rdi, %rax
+; X64-NDD-NEXT: movabsq $3689348814741910323, %rcx # imm = 0x3333333333333333
+; X64-NDD-NEXT: andq %rcx, %rax, %rdx
+; X64-NDD-NEXT: shrq $2, %rax
+; X64-NDD-NEXT: andq %rcx, %rax
+; X64-NDD-NEXT: addq %rdx, %rax
+; X64-NDD-NEXT: shrq $4, %rax, %rcx
+; X64-NDD-NEXT: addq %rcx, %rax
+; X64-NDD-NEXT: movabsq $1085102592571150095, %rcx # imm = 0xF0F0F0F0F0F0F0F
+; X64-NDD-NEXT: andq %rcx, %rax
+; X64-NDD-NEXT: movabsq $72340172838076673, %rcx # imm = 0x101010101010101
+; X64-NDD-NEXT: imulq %rcx, %rax
+; X64-NDD-NEXT: shrq $56, %rax
+; X64-NDD-NEXT: retq
+;
; X86-SSE2-LABEL: cnt64:
; X86-SSE2: # %bb.0:
; X86-SSE2-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
@@ -444,6 +513,40 @@ define i128 @cnt128(i128 %x) nounwind readnone {
; X64-POPCNT-NEXT: xorl %edx, %edx
; X64-POPCNT-NEXT: retq
;
+; X64-NDD-LABEL: cnt128:
+; X64-NDD: # %bb.0:
+; X64-NDD-NEXT: shrq $1, %rsi, %rax
+; X64-NDD-NEXT: movabsq $6148914691236517205, %rcx # imm = 0x5555555555555555
+; X64-NDD-NEXT: andq %rcx, %rax
+; X64-NDD-NEXT: subq %rax, %rsi, %rax
+; X64-NDD-NEXT: movabsq $3689348814741910323, %rdx # imm = 0x3333333333333333
+; X64-NDD-NEXT: andq %rdx, %rax, %rsi
+; X64-NDD-NEXT: shrq $2, %rax
+; X64-NDD-NEXT: andq %rdx, %rax
+; X64-NDD-NEXT: addq %rsi, %rax
+; X64-NDD-NEXT: shrq $4, %rax, %rsi
+; X64-NDD-NEXT: addq %rsi, %rax
+; X64-NDD-NEXT: movabsq $1085102592571150095, %rsi # imm = 0xF0F0F0F0F0F0F0F
+; X64-NDD-NEXT: andq %rsi, %rax
+; X64-NDD-NEXT: movabsq $72340172838076673, %r8 # imm = 0x101010101010101
+; X64-NDD-NEXT: imulq %r8, %rax
+; X64-NDD-NEXT: shrq $56, %rax
+; X64-NDD-NEXT: shrq $1, %rdi, %r9
+; X64-NDD-NEXT: andq %r9, %rcx
+; X64-NDD-NEXT: subq %rcx, %rdi, %rcx
+; X64-NDD-NEXT: andq %rdx, %rcx, %rdi
+; X64-NDD-NEXT: shrq $2, %rcx
+; X64-NDD-NEXT: andq %rdx, %rcx
+; X64-NDD-NEXT: addq %rdi, %rcx
+; X64-NDD-NEXT: shrq $4, %rcx, %rdx
+; X64-NDD-NEXT: addq %rdx, %rcx
+; X64-NDD-NEXT: andq %rsi, %rcx
+; X64-NDD-NEXT: imulq %r8, %rcx
+; X64-NDD-NEXT: shrq $56, %rcx
+; X64-NDD-NEXT: addq %rcx, %rax
+; X64-NDD-NEXT: xorl %edx, %edx
+; X64-NDD-NEXT: retq
+;
; X86-SSE2-LABEL: cnt128:
; X86-SSE2: # %bb.0:
; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
@@ -603,6 +706,26 @@ define i64 @cnt64_noimplicitfloat(i64 %x) nounwind readnone noimplicitfloat {
; X64-POPCNT: # %bb.0:
; X64-POPCNT-NEXT: popcntq %rdi, %rax
; X64-POPCNT-NEXT: retq
+;
+; X64-NDD-LABEL: cnt64_noimplicitfloat:
+; X64-NDD: # %bb.0:
+; X64-NDD-NEXT: shrq $1, %rdi, %rax
+; X64-NDD-NEXT: movabsq $6148914691236517205, %rcx # imm = 0x5555555555555555
+; X64-NDD-NEXT: andq %rcx, %rax
+; X64-NDD-NEXT: subq %rax, %rdi, %rax
+; X64-NDD-NEXT: movabsq $3689348814741910323, %rcx # imm = 0x3333333333333333
+; X64-NDD-NEXT: andq %rcx, %rax, %rdx
+; X64-NDD-NEXT: shrq $2, %rax
+; X64-NDD-NEXT: andq %rcx, %rax
+; X64-NDD-NEXT: addq %rdx, %rax
+; X64-NDD-NEXT: shrq $4, %rax, %rcx
+; X64-NDD-NEXT: addq %rcx, %rax
+; X64-NDD-NEXT: movabsq $1085102592571150095, %rcx # imm = 0xF0F0F0F0F0F0F0F
+; X64-NDD-NEXT: andq %rcx, %rax
+; X64-NDD-NEXT: movabsq $72340172838076673, %rcx # imm = 0x101010101010101
+; X64-NDD-NEXT: imulq %rcx, %rax
+; X64-NDD-NEXT: shrq $56, %rax
+; X64-NDD-NEXT: retq
%cnt = tail call i64 @llvm.ctpop.i64(i64 %x)
ret i64 %cnt
}
@@ -658,6 +781,23 @@ define i32 @cnt32_optsize(i32 %x) nounwind readnone optsize {
; X64-POPCNT: # %bb.0:
; X64-POPCNT-NEXT: popcntl %edi, %eax
; X64-POPCNT-NEXT: retq
+;
+; X64-NDD-LABEL: cnt32_optsize:
+; X64-NDD: # %bb.0:
+; X64-NDD-NEXT: shrl $1, %edi, %eax
+; X64-NDD-NEXT: andl $1431655765, %eax # imm = 0x55555555
+; X64-NDD-NEXT: subl %eax, %edi, %eax
+; X64-NDD-NEXT: movl $858993459, %ecx # imm = 0x33333333
+; X64-NDD-NEXT: andl %ecx, %eax, %edx
+; X64-NDD-NEXT: shrl $2, %eax
+; X64-NDD-NEXT: andl %ecx, %eax
+; X64-NDD-NEXT: addl %edx, %eax
+; X64-NDD-NEXT: shrl $4, %eax, %ecx
+; X64-NDD-NEXT: addl %ecx, %eax
+; X64-NDD-NEXT: andl $252645135, %eax # imm = 0xF0F0F0F
+; X64-NDD-NEXT: imull $16843009, %eax, %eax # imm = 0x1010101
+; X64-NDD-NEXT: shrl $24, %eax
+; X64-NDD-NEXT: retq
%cnt = tail call i32 @llvm.ctpop.i32(i32 %x)
ret i32 %cnt
}
@@ -746,6 +886,26 @@ define i64 @cnt64_optsize(i64 %x) nounwind readnone optsize {
; X64-POPCNT-NEXT: popcntq %rdi, %rax
; X64-POPCNT-NEXT: retq
;
+; X64-NDD-LABEL: cnt64_optsize:
+; X64-NDD: # %bb.0:
+; X64-NDD-NEXT: shrq $1, %rdi, %rax
+; X64-NDD-NEXT: movabsq $6148914691236517205, %rcx # imm = 0x5555555555555555
+; X64-NDD-NEXT: andq %rcx, %rax
+; X64-NDD-NEXT: subq %rax, %rdi, %rax
+; X64-NDD-NEXT: movabsq $3689348814741910323, %rcx # imm = 0x3333333333333333
+; X64-NDD-NEXT: andq %rcx, %rax, %rdx
+; X64-NDD-NEXT: shrq $2, %rax
+; X64-NDD-NEXT: andq %rcx, %rax
+; X64-NDD-NEXT: addq %rdx, %rax
+; X64-NDD-NEXT: shrq $4, %rax, %rcx
+; X64-NDD-NEXT: addq %rcx, %rax
+; X64-NDD-NEXT: movabsq $1085102592571150095, %rcx # imm = 0xF0F0F0F0F0F0F0F
+; X64-NDD-NEXT: andq %rcx, %rax
+; X64-NDD-NEXT: movabsq $72340172838076673, %rcx # imm = 0x101010101010101
+; X64-NDD-NEXT: imulq %rcx, %rax
+; X64-NDD-NEXT: shrq $56, %rax
+; X64-NDD-NEXT: retq
+;
; X86-SSE2-LABEL: cnt64_optsize:
; X86-SSE2: # %bb.0:
; X86-SSE2-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
@@ -949,6 +1109,40 @@ define i128 @cnt128_optsize(i128 %x) nounwind readnone optsize {
; X64-POPCNT-NEXT: xorl %edx, %edx
; X64-POPCNT-NEXT: retq
;
+; X64-NDD-LABEL: cnt128_optsize:
+; X64-NDD: # %bb.0:
+; X64-NDD-NEXT: shrq $1, %rsi, %rax
+; X64-NDD-NEXT: movabsq $6148914691236517205, %rcx # imm = 0x5555555555555555
+; X64-NDD-NEXT: andq %rcx, %rax
+; X64-NDD-NEXT: subq %rax, %rsi, %rax
+; X64-NDD-NEXT: movabsq $3689348814741910323, %rdx # imm = 0x3333333333333333
+; X64-NDD-NEXT: andq %rdx, %rax, %rsi
+; X64-NDD-NEXT: shrq $2, %rax
+; X64-NDD-NEXT: andq %rdx, %rax
+; X64-NDD-NEXT: addq %rsi, %rax
+; X64-NDD-NEXT: shrq $4, %rax, %rsi
+; X64-NDD-NEXT: addq %rsi, %rax
+; X64-NDD-NEXT: movabsq $1085102592571150095, %rsi # imm = 0xF0F0F0F0F0F0F0F
+; X64-NDD-NEXT: andq %rsi, %rax
+; X64-NDD-NEXT: movabsq $72340172838076673, %r8 # imm = 0x101010101010101
+; X64-NDD-NEXT: imulq %r8, %rax
+; X64-NDD-NEXT: shrq $56, %rax
+; X64-NDD-NEXT: shrq $1, %rdi, %r9
+; X64-NDD-NEXT: andq %r9, %rcx
+; X64-NDD-NEXT: subq %rcx, %rdi, %rcx
+; X64-NDD-NEXT: andq %rdx, %rcx, %rdi
+; X64-NDD-NEXT: shrq $2, %rcx
+; X64-NDD-NEXT: andq %rdx, %rcx
+; X64-NDD-NEXT: addq %rdi, %rcx
+; X64-NDD-NEXT: shrq $4, %rcx, %rdx
+; X64-NDD-NEXT: addq %rdx, %rcx
+; X64-NDD-NEXT: andq %rsi, %rcx
+; X64-NDD-NEXT: imulq %r8, %rcx
+; X64-NDD-NEXT: shrq $56, %rcx
+; X64-NDD-NEXT: addq %rcx, %rax
+; X64-NDD-NEXT: xorl %edx, %edx
+; X64-NDD-NEXT: retq
+;
; X86-SSE2-LABEL: cnt128_optsize:
; X86-SSE2: # %bb.0:
; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
@@ -1085,6 +1279,22 @@ define i32 @cnt32_pgso(i32 %x) nounwind readnone !prof !14 {
; X64-POPCNT: # %bb.0:
; X64-POPCNT-NEXT: popcntl %edi, %eax
; X64-POPCNT-NEXT: retq
+;
+; X64-NDD-LABEL: cnt32_pgso:
+; X64-NDD: # %bb.0:
+; X64-NDD-NEXT: shrl $1, %edi, %eax
+; X64-NDD-NEXT: andl $1431655765, %eax # imm = 0x55555555
+; X64-NDD-NEXT: subl %eax, %edi, %eax
+; X64-NDD-NEXT: andl $858993459, %eax, %ecx # imm = 0x33333333
+; X64-NDD-NEXT: shrl $2, %eax
+; X64-NDD-NEXT: andl $858993459, %eax # imm = 0x33333333
+; X64-NDD-NEXT: addl %ecx, %eax
+; X64-NDD-NEXT: shrl $4, %eax, %ecx
+; X64-NDD-NEXT: addl %ecx, %eax
+; X64-NDD-NEXT: andl $252645135, %eax # imm = 0xF0F0F0F
+; X64-NDD-NEXT: imull $16843009, %eax, %eax # imm = 0x1010101
+; X64-NDD-NEXT: shrl $24, %eax
+; X64-NDD-NEXT: retq
%cnt = tail call i32 @llvm.ctpop.i32(i32 %x)
ret i32 %cnt
}
@@ -1164,6 +1374,26 @@ define i64 @cnt64_pgso(i64 %x) nounwind readnone !prof !14 {
; X64-POPCNT-NEXT: popcntq %rdi, %rax
; X64-POPCNT-NEXT: retq
;
+; X64-NDD-LABEL: cnt64_pgso:
+; X64-NDD: # %bb.0:
+; X64-NDD-NEXT: shrq $1, %rdi, %rax
+; X64-NDD-NEXT: movabsq $6148914691236517205, %rcx # imm = 0x5555555555555555
+; X64-NDD-NEXT: andq %rcx, %rax
+; X64-NDD-NEXT: subq %rax, %rdi, %rax
+; X64-NDD-NEXT: movabsq $3689348814741910323, %rcx # imm = 0x3333333333333333
+; X64-NDD-NEXT: andq %rcx, %rax, %rdx
+; X64-NDD-NEXT: shrq $2, %rax
+; X64-NDD-NEXT: andq %rcx, %rax
+; X64-NDD-NEXT: addq %rdx, %rax
+; X64-NDD-NEXT: shrq $4, %rax, %rcx
+; X64-NDD-NEXT: addq %rcx, %rax
+; X64-NDD-NEXT: movabsq $1085102592571150095, %rcx # imm = 0xF0F0F0F0F0F0F0F
+; X64-NDD-NEXT: andq %rcx, %rax
+; X64-NDD-NEXT: movabsq $72340172838076673, %rcx # imm = 0x101010101010101
+; X64-NDD-NEXT: imulq %rcx, %rax
+; X64-NDD-NEXT: shrq $56, %rax
+; X64-NDD-NEXT: retq
+;
; X86-SSE2-LABEL: cnt64_pgso:
; X86-SSE2: # %bb.0:
; X86-SSE2-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
@@ -1360,6 +1590,40 @@ define i128 @cnt128_pgso(i128 %x) nounwind readnone !prof !14 {
; X64-POPCNT-NEXT: xorl %edx, %edx
; X64-POPCNT-NEXT: retq
;
+; X64-NDD-LABEL: cnt128_pgso:
+; X64-NDD: # %bb.0:
+; X64-NDD-NEXT: shrq $1, %rsi, %rax
+; X64-NDD-NEXT: movabsq $6148914691236517205, %rcx # imm = 0x5555555555555555
+; X64-NDD-NEXT: andq %rcx, %rax
+; X64-NDD-NEXT: subq %rax, %rsi, %rax
+; X64-NDD-NEXT: movabsq $3689348814741910323, %rdx # imm = 0x3333333333333333
+; X64-NDD-NEXT: andq %rdx, %rax, %rsi
+; X64-NDD-NEXT: shrq $2, %rax
+; X64-NDD-NEXT: andq %rdx, %rax
+; X64-NDD-NEXT: addq %rsi, %rax
+; X64-NDD-NEXT: shrq $4, %rax, %rsi
+; X64-NDD-NEXT: addq %rsi, %rax
+; X64-NDD-NEXT: movabsq $1085102592571150095, %rsi # imm = 0xF0F0F0F0F0F0F0F
+; X64-NDD-NEXT: andq %rsi, %rax
+; X64-NDD-NEXT: movabsq $72340172838076673, %r8 # imm = 0x101010101010101
+; X64-NDD-NEXT: imulq %r8, %rax
+; X64-NDD-NEXT: shrq $56, %rax
+; X64-NDD-NEXT: shrq $1, %rdi, %r9
+; X64-NDD-NEXT: andq %r9, %rcx
+; X64-NDD-NEXT: subq %rcx, %rdi, %rcx
+; X64-NDD-NEXT: andq %rdx, %rcx, %rdi
+; X64-NDD-NEXT: shrq $2, %rcx
+; X64-NDD-NEXT: andq %rdx, %rcx
+; X64-NDD-NEXT: addq %rdi, %rcx
+; X64-NDD-NEXT: shrq $4, %rcx, %rdx
+; X64-NDD-NEXT: addq %rdx, %rcx
+; X64-NDD-NEXT: andq %rsi, %rcx
+; X64-NDD-NEXT: imulq %r8, %rcx
+; X64-NDD-NEXT: shrq $56, %rcx
+; X64-NDD-NEXT: addq %rcx, %rax
+; X64-NDD-NEXT: xorl %edx, %edx
+; X64-NDD-NEXT: retq
+;
; X86-SSE2-LABEL: cnt128_pgso:
; X86-SSE2: # %bb.0:
; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
@@ -1497,6 +1761,22 @@ define i32 @popcount_zext_i32(i16 zeroext %x) {
; X64-POPCNT: # %bb.0:
; X64-POPCNT-NEXT: popcntl %edi, %eax
; X64-POPCNT-NEXT: retq
+;
+; X64-NDD-LABEL: popcount_zext_i32:
+; X64-NDD: # %bb.0:
+; X64-NDD-NEXT: shrl $1, %edi, %eax
+; X64-NDD-NEXT: andl $21845, %eax # imm = 0x5555
+; X64-NDD-NEXT: subl %eax, %edi, %eax
+; X64-NDD-NEXT: andl $858993459, %eax, %ecx # imm = 0x33333333
+; X64-NDD-NEXT: shrl $2, %eax
+; X64-NDD-NEXT: andl $858993459, %eax # imm = 0x33333333
+; X64-NDD-NEXT: addl %ecx, %eax
+; X64-NDD-NEXT: shrl $4, %eax, %ecx
+; X64-NDD-NEXT: addl %ecx, %eax
+; X64-NDD-NEXT: andl $252645135, %eax # imm = 0xF0F0F0F
+; X64-NDD-NEXT: imull $16843009, %eax, %eax # imm = 0x1010101
+; X64-NDD-NEXT: shrl $24, %eax
+; X64-NDD-NEXT: retq
%z = zext i16 %x to i32
%cnt = tail call i32 @llvm.ctpop.i32(i32 %z)
ret i32 %cnt
@@ -1556,6 +1836,23 @@ define i32 @popcount_i16_zext(i16 zeroext %x) {
; X64-POPCNT: # %bb.0:
; X64-POPCNT-NEXT: popcntl %edi, %eax
; X64-POPCNT-NEXT: retq
+;
+; X64-NDD-LABEL: popcount_i16_zext:
+; X64-NDD: # %bb.0:
+; X64-NDD-NEXT: shrl $1, %edi, %eax
+; X64-NDD-NEXT: andl $21845, %eax # imm = 0x5555
+; X64-NDD-NEXT: subl %eax, %edi, %eax
+; X64-NDD-NEXT: andl $13107, %eax, %ecx # imm = 0x3333
+; X64-NDD-NEXT: shrl $2, %eax
+; X64-NDD-NEXT: andl $13107, %eax # imm = 0x3333
+; X64-NDD-NEXT: addl %ecx, %eax
+; X64-NDD-NEXT: shrl $4, %eax, %ecx
+; X64-NDD-NEXT: addl %ecx, %eax
+; X64-NDD-NEXT: andl $3855, %eax # imm = 0xF0F
+; X64-NDD-NEXT: shrl $8, %eax, %ecx
+; X64-NDD-NEXT: addl %ecx, %eax
+; X64-NDD-NEXT: movzbl %al, %eax
+; X64-NDD-NEXT: retq
%cnt = tail call i16 @llvm.ctpop.i16(i16 %x)
%z = zext i16 %cnt to i32
ret i32 %z
More information about the llvm-commits
mailing list