[llvm] ff91b2d - [X86] Promote i16 CTTZ/CTTZ_ZERO_UNDEF always.

Craig Topper via llvm-commits llvm-commits at lists.llvm.org
Wed Aug 3 13:12:25 PDT 2022


Author: Craig Topper
Date: 2022-08-03T13:12:20-07:00
New Revision: ff91b2d9df8098fca31684c8595087ec9f921eda

URL: https://github.com/llvm/llvm-project/commit/ff91b2d9df8098fca31684c8595087ec9f921eda
DIFF: https://github.com/llvm/llvm-project/commit/ff91b2d9df8098fca31684c8595087ec9f921eda.diff

LOG: [X86] Promote i16 CTTZ/CTTZ_ZERO_UNDEF always.

If we're going to emit a rep prefix before bsf as proposed in
D130956, it makes sense to promote i16 operations to i32 to avoid
the false depedency of tzcntw.

Reviewed By: skan, pengfei

Differential Revision: https://reviews.llvm.org/D130995

Added: 
    

Modified: 
    llvm/lib/Target/X86/X86ISelLowering.cpp
    llvm/test/CodeGen/X86/bmi-intrinsics-fast-isel.ll
    llvm/test/CodeGen/X86/clz.ll
    llvm/test/CodeGen/X86/lzcnt-tzcnt.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 2e98ec573aeca..c52a9642f9dda 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -368,15 +368,14 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
   // encoding.
   setOperationPromotedToType(ISD::CTTZ           , MVT::i8   , MVT::i32);
   setOperationPromotedToType(ISD::CTTZ_ZERO_UNDEF, MVT::i8   , MVT::i32);
+  // Promoted i16. tzcntw has a false dependency on Intel CPUs. For BSF, we emit
+  // a REP prefix to encode it as TZCNT for modern CPUs so it makes sense to
+  // promote that too.
+  setOperationPromotedToType(ISD::CTTZ           , MVT::i16  , MVT::i32);
+  setOperationPromotedToType(ISD::CTTZ_ZERO_UNDEF, MVT::i16  , MVT::i32);
 
-  if (Subtarget.hasBMI()) {
-    // Promote the i16 zero undef variant and force it on up to i32 when tzcnt
-    // is enabled.
-    setOperationPromotedToType(ISD::CTTZ_ZERO_UNDEF, MVT::i16, MVT::i32);
-  } else {
-    setOperationAction(ISD::CTTZ, MVT::i16, Custom);
+  if (!Subtarget.hasBMI()) {
     setOperationAction(ISD::CTTZ           , MVT::i32  , Custom);
-    setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i16  , Legal);
     setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i32  , Legal);
     if (Subtarget.is64Bit()) {
       setOperationAction(ISD::CTTZ         , MVT::i64  , Custom);

diff  --git a/llvm/test/CodeGen/X86/bmi-intrinsics-fast-isel.ll b/llvm/test/CodeGen/X86/bmi-intrinsics-fast-isel.ll
index fc3e54e06d5fa..58b894a9da8b6 100644
--- a/llvm/test/CodeGen/X86/bmi-intrinsics-fast-isel.ll
+++ b/llvm/test/CodeGen/X86/bmi-intrinsics-fast-isel.ll
@@ -11,12 +11,17 @@
 define i16 @test__tzcnt_u16(i16 %a0) {
 ; X86-LABEL: test__tzcnt_u16:
 ; X86:       # %bb.0:
-; X86-NEXT:    tzcntw {{[0-9]+}}(%esp), %ax
+; X86-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    orl $65536, %eax # imm = 0x10000
+; X86-NEXT:    tzcntl %eax, %eax
+; X86-NEXT:    # kill: def $ax killed $ax killed $eax
 ; X86-NEXT:    retl
 ;
 ; X64-LABEL: test__tzcnt_u16:
 ; X64:       # %bb.0:
-; X64-NEXT:    tzcntw %di, %ax
+; X64-NEXT:    orl $65536, %edi # imm = 0x10000
+; X64-NEXT:    tzcntl %edi, %eax
+; X64-NEXT:    # kill: def $ax killed $ax killed $eax
 ; X64-NEXT:    retq
   %zext = zext i16 %a0 to i32
   %cmp = icmp ne i32 %zext, 0
@@ -138,12 +143,17 @@ define i32 @test__tzcnt_u32(i32 %a0) {
 define i16 @test_tzcnt_u16(i16 %a0) {
 ; X86-LABEL: test_tzcnt_u16:
 ; X86:       # %bb.0:
-; X86-NEXT:    tzcntw {{[0-9]+}}(%esp), %ax
+; X86-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    orl $65536, %eax # imm = 0x10000
+; X86-NEXT:    tzcntl %eax, %eax
+; X86-NEXT:    # kill: def $ax killed $ax killed $eax
 ; X86-NEXT:    retl
 ;
 ; X64-LABEL: test_tzcnt_u16:
 ; X64:       # %bb.0:
-; X64-NEXT:    tzcntw %di, %ax
+; X64-NEXT:    orl $65536, %edi # imm = 0x10000
+; X64-NEXT:    tzcntl %edi, %eax
+; X64-NEXT:    # kill: def $ax killed $ax killed $eax
 ; X64-NEXT:    retq
   %zext = zext i16 %a0 to i32
   %cmp = icmp ne i32 %zext, 0

diff  --git a/llvm/test/CodeGen/X86/clz.ll b/llvm/test/CodeGen/X86/clz.ll
index a4c34579fa72c..ff5bfc9b48979 100644
--- a/llvm/test/CodeGen/X86/clz.ll
+++ b/llvm/test/CodeGen/X86/clz.ll
@@ -46,12 +46,14 @@ define i8 @cttz_i8(i8 %x)  {
 define i16 @cttz_i16(i16 %x)  {
 ; X86-LABEL: cttz_i16:
 ; X86:       # %bb.0:
-; X86-NEXT:    rep bsfw {{[0-9]+}}(%esp), %ax
+; X86-NEXT:    rep bsfl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    # kill: def $ax killed $ax killed $eax
 ; X86-NEXT:    retl
 ;
 ; X64-LABEL: cttz_i16:
 ; X64:       # %bb.0:
-; X64-NEXT:    rep bsfw %di, %ax
+; X64-NEXT:    rep bsfl %edi, %eax
+; X64-NEXT:    # kill: def $ax killed $ax killed $eax
 ; X64-NEXT:    retq
 ;
 ; X86-CLZ-LABEL: cttz_i16:
@@ -565,10 +567,12 @@ define i16 @cttz_i16_zero_test(i16 %n) {
 ; X86-NEXT:    testw %ax, %ax
 ; X86-NEXT:    je .LBB13_1
 ; X86-NEXT:  # %bb.2: # %cond.false
-; X86-NEXT:    rep bsfw %ax, %ax
+; X86-NEXT:    rep bsfl %eax, %eax
+; X86-NEXT:    # kill: def $ax killed $ax killed $eax
 ; X86-NEXT:    retl
 ; X86-NEXT:  .LBB13_1:
 ; X86-NEXT:    movw $16, %ax
+; X86-NEXT:    # kill: def $ax killed $ax killed $eax
 ; X86-NEXT:    retl
 ;
 ; X64-LABEL: cttz_i16_zero_test:
@@ -576,20 +580,27 @@ define i16 @cttz_i16_zero_test(i16 %n) {
 ; X64-NEXT:    testw %di, %di
 ; X64-NEXT:    je .LBB13_1
 ; X64-NEXT:  # %bb.2: # %cond.false
-; X64-NEXT:    rep bsfw %di, %ax
+; X64-NEXT:    rep bsfl %edi, %eax
+; X64-NEXT:    # kill: def $ax killed $ax killed $eax
 ; X64-NEXT:    retq
 ; X64-NEXT:  .LBB13_1:
 ; X64-NEXT:    movw $16, %ax
+; X64-NEXT:    # kill: def $ax killed $ax killed $eax
 ; X64-NEXT:    retq
 ;
 ; X86-CLZ-LABEL: cttz_i16_zero_test:
 ; X86-CLZ:       # %bb.0:
-; X86-CLZ-NEXT:    tzcntw {{[0-9]+}}(%esp), %ax
+; X86-CLZ-NEXT:    movl $65536, %eax # imm = 0x10000
+; X86-CLZ-NEXT:    orl {{[0-9]+}}(%esp), %eax
+; X86-CLZ-NEXT:    tzcntl %eax, %eax
+; X86-CLZ-NEXT:    # kill: def $ax killed $ax killed $eax
 ; X86-CLZ-NEXT:    retl
 ;
 ; X64-CLZ-LABEL: cttz_i16_zero_test:
 ; X64-CLZ:       # %bb.0:
-; X64-CLZ-NEXT:    tzcntw %di, %ax
+; X64-CLZ-NEXT:    orl $65536, %edi # imm = 0x10000
+; X64-CLZ-NEXT:    tzcntl %edi, %eax
+; X64-CLZ-NEXT:    # kill: def $ax killed $ax killed $eax
 ; X64-CLZ-NEXT:    retq
   %tmp1 = call i16 @llvm.cttz.i16(i16 %n, i1 false)
   ret i16 %tmp1

diff  --git a/llvm/test/CodeGen/X86/lzcnt-tzcnt.ll b/llvm/test/CodeGen/X86/lzcnt-tzcnt.ll
index f271bd055d048..bced63e1cf853 100644
--- a/llvm/test/CodeGen/X86/lzcnt-tzcnt.ll
+++ b/llvm/test/CodeGen/X86/lzcnt-tzcnt.ll
@@ -160,7 +160,9 @@ define i64 @test15_ctlz(ptr %ptr) {
 define i16 @test1_cttz(i16 %v) {
 ; CHECK-LABEL: test1_cttz:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    tzcntw %di, %ax
+; CHECK-NEXT:    orl $65536, %edi # imm = 0x10000
+; CHECK-NEXT:    tzcntl %edi, %eax
+; CHECK-NEXT:    # kill: def $ax killed $ax killed $eax
 ; CHECK-NEXT:    retq
   %cnt = tail call i16 @llvm.cttz.i16(i16 %v, i1 true)
   %tobool = icmp eq i16 %v, 0
@@ -196,7 +198,9 @@ define i64 @test3_cttz(i64 %v) {
 define i16 @test4_cttz(i16 %v) {
 ; CHECK-LABEL: test4_cttz:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    tzcntw %di, %ax
+; CHECK-NEXT:    orl $65536, %edi # imm = 0x10000
+; CHECK-NEXT:    tzcntl %edi, %eax
+; CHECK-NEXT:    # kill: def $ax killed $ax killed $eax
 ; CHECK-NEXT:    retq
   %cnt = tail call i16 @llvm.cttz.i16(i16 %v, i1 true)
   %tobool = icmp eq i16 0, %v
@@ -232,7 +236,10 @@ define i64 @test6_cttz(i64 %v) {
 define i16 @test10_cttz(ptr %ptr) {
 ; CHECK-LABEL: test10_cttz:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    tzcntw (%rdi), %ax
+; CHECK-NEXT:    movzwl (%rdi), %eax
+; CHECK-NEXT:    orl $65536, %eax # imm = 0x10000
+; CHECK-NEXT:    tzcntl %eax, %eax
+; CHECK-NEXT:    # kill: def $ax killed $ax killed $eax
 ; CHECK-NEXT:    retq
   %v = load i16, ptr %ptr
   %cnt = tail call i16 @llvm.cttz.i16(i16 %v, i1 true)
@@ -271,7 +278,10 @@ define i64 @test12_cttz(ptr %ptr) {
 define i16 @test13_cttz(ptr %ptr) {
 ; CHECK-LABEL: test13_cttz:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    tzcntw (%rdi), %ax
+; CHECK-NEXT:    movzwl (%rdi), %eax
+; CHECK-NEXT:    orl $65536, %eax # imm = 0x10000
+; CHECK-NEXT:    tzcntl %eax, %eax
+; CHECK-NEXT:    # kill: def $ax killed $ax killed $eax
 ; CHECK-NEXT:    retq
   %v = load i16, ptr %ptr
   %cnt = tail call i16 @llvm.cttz.i16(i16 %v, i1 true)
@@ -346,7 +356,9 @@ define i64 @test6b_ctlz(i64 %v) {
 define i16 @test4b_cttz(i16 %v) {
 ; CHECK-LABEL: test4b_cttz:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    tzcntw %di, %ax
+; CHECK-NEXT:    orl $65536, %edi # imm = 0x10000
+; CHECK-NEXT:    tzcntl %edi, %eax
+; CHECK-NEXT:    # kill: def $ax killed $ax killed $eax
 ; CHECK-NEXT:    retq
   %cnt = tail call i16 @llvm.cttz.i16(i16 %v, i1 true)
   %tobool = icmp ne i16 %v, 0


        


More information about the llvm-commits mailing list