[llvm] ff91b2d - [X86] Promote i16 CTTZ/CTTZ_ZERO_UNDEF always.
Craig Topper via llvm-commits
llvm-commits at lists.llvm.org
Wed Aug 3 13:12:25 PDT 2022
Author: Craig Topper
Date: 2022-08-03T13:12:20-07:00
New Revision: ff91b2d9df8098fca31684c8595087ec9f921eda
URL: https://github.com/llvm/llvm-project/commit/ff91b2d9df8098fca31684c8595087ec9f921eda
DIFF: https://github.com/llvm/llvm-project/commit/ff91b2d9df8098fca31684c8595087ec9f921eda.diff
LOG: [X86] Promote i16 CTTZ/CTTZ_ZERO_UNDEF always.
If we're going to emit a rep prefix before bsf as proposed in
D130956, it makes sense to promote i16 operations to i32 to avoid
the false depedency of tzcntw.
Reviewed By: skan, pengfei
Differential Revision: https://reviews.llvm.org/D130995
Added:
Modified:
llvm/lib/Target/X86/X86ISelLowering.cpp
llvm/test/CodeGen/X86/bmi-intrinsics-fast-isel.ll
llvm/test/CodeGen/X86/clz.ll
llvm/test/CodeGen/X86/lzcnt-tzcnt.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 2e98ec573aeca..c52a9642f9dda 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -368,15 +368,14 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
// encoding.
setOperationPromotedToType(ISD::CTTZ , MVT::i8 , MVT::i32);
setOperationPromotedToType(ISD::CTTZ_ZERO_UNDEF, MVT::i8 , MVT::i32);
+ // Promoted i16. tzcntw has a false dependency on Intel CPUs. For BSF, we emit
+ // a REP prefix to encode it as TZCNT for modern CPUs so it makes sense to
+ // promote that too.
+ setOperationPromotedToType(ISD::CTTZ , MVT::i16 , MVT::i32);
+ setOperationPromotedToType(ISD::CTTZ_ZERO_UNDEF, MVT::i16 , MVT::i32);
- if (Subtarget.hasBMI()) {
- // Promote the i16 zero undef variant and force it on up to i32 when tzcnt
- // is enabled.
- setOperationPromotedToType(ISD::CTTZ_ZERO_UNDEF, MVT::i16, MVT::i32);
- } else {
- setOperationAction(ISD::CTTZ, MVT::i16, Custom);
+ if (!Subtarget.hasBMI()) {
setOperationAction(ISD::CTTZ , MVT::i32 , Custom);
- setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i16 , Legal);
setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i32 , Legal);
if (Subtarget.is64Bit()) {
setOperationAction(ISD::CTTZ , MVT::i64 , Custom);
diff --git a/llvm/test/CodeGen/X86/bmi-intrinsics-fast-isel.ll b/llvm/test/CodeGen/X86/bmi-intrinsics-fast-isel.ll
index fc3e54e06d5fa..58b894a9da8b6 100644
--- a/llvm/test/CodeGen/X86/bmi-intrinsics-fast-isel.ll
+++ b/llvm/test/CodeGen/X86/bmi-intrinsics-fast-isel.ll
@@ -11,12 +11,17 @@
define i16 @test__tzcnt_u16(i16 %a0) {
; X86-LABEL: test__tzcnt_u16:
; X86: # %bb.0:
-; X86-NEXT: tzcntw {{[0-9]+}}(%esp), %ax
+; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: orl $65536, %eax # imm = 0x10000
+; X86-NEXT: tzcntl %eax, %eax
+; X86-NEXT: # kill: def $ax killed $ax killed $eax
; X86-NEXT: retl
;
; X64-LABEL: test__tzcnt_u16:
; X64: # %bb.0:
-; X64-NEXT: tzcntw %di, %ax
+; X64-NEXT: orl $65536, %edi # imm = 0x10000
+; X64-NEXT: tzcntl %edi, %eax
+; X64-NEXT: # kill: def $ax killed $ax killed $eax
; X64-NEXT: retq
%zext = zext i16 %a0 to i32
%cmp = icmp ne i32 %zext, 0
@@ -138,12 +143,17 @@ define i32 @test__tzcnt_u32(i32 %a0) {
define i16 @test_tzcnt_u16(i16 %a0) {
; X86-LABEL: test_tzcnt_u16:
; X86: # %bb.0:
-; X86-NEXT: tzcntw {{[0-9]+}}(%esp), %ax
+; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: orl $65536, %eax # imm = 0x10000
+; X86-NEXT: tzcntl %eax, %eax
+; X86-NEXT: # kill: def $ax killed $ax killed $eax
; X86-NEXT: retl
;
; X64-LABEL: test_tzcnt_u16:
; X64: # %bb.0:
-; X64-NEXT: tzcntw %di, %ax
+; X64-NEXT: orl $65536, %edi # imm = 0x10000
+; X64-NEXT: tzcntl %edi, %eax
+; X64-NEXT: # kill: def $ax killed $ax killed $eax
; X64-NEXT: retq
%zext = zext i16 %a0 to i32
%cmp = icmp ne i32 %zext, 0
diff --git a/llvm/test/CodeGen/X86/clz.ll b/llvm/test/CodeGen/X86/clz.ll
index a4c34579fa72c..ff5bfc9b48979 100644
--- a/llvm/test/CodeGen/X86/clz.ll
+++ b/llvm/test/CodeGen/X86/clz.ll
@@ -46,12 +46,14 @@ define i8 @cttz_i8(i8 %x) {
define i16 @cttz_i16(i16 %x) {
; X86-LABEL: cttz_i16:
; X86: # %bb.0:
-; X86-NEXT: rep bsfw {{[0-9]+}}(%esp), %ax
+; X86-NEXT: rep bsfl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: # kill: def $ax killed $ax killed $eax
; X86-NEXT: retl
;
; X64-LABEL: cttz_i16:
; X64: # %bb.0:
-; X64-NEXT: rep bsfw %di, %ax
+; X64-NEXT: rep bsfl %edi, %eax
+; X64-NEXT: # kill: def $ax killed $ax killed $eax
; X64-NEXT: retq
;
; X86-CLZ-LABEL: cttz_i16:
@@ -565,10 +567,12 @@ define i16 @cttz_i16_zero_test(i16 %n) {
; X86-NEXT: testw %ax, %ax
; X86-NEXT: je .LBB13_1
; X86-NEXT: # %bb.2: # %cond.false
-; X86-NEXT: rep bsfw %ax, %ax
+; X86-NEXT: rep bsfl %eax, %eax
+; X86-NEXT: # kill: def $ax killed $ax killed $eax
; X86-NEXT: retl
; X86-NEXT: .LBB13_1:
; X86-NEXT: movw $16, %ax
+; X86-NEXT: # kill: def $ax killed $ax killed $eax
; X86-NEXT: retl
;
; X64-LABEL: cttz_i16_zero_test:
@@ -576,20 +580,27 @@ define i16 @cttz_i16_zero_test(i16 %n) {
; X64-NEXT: testw %di, %di
; X64-NEXT: je .LBB13_1
; X64-NEXT: # %bb.2: # %cond.false
-; X64-NEXT: rep bsfw %di, %ax
+; X64-NEXT: rep bsfl %edi, %eax
+; X64-NEXT: # kill: def $ax killed $ax killed $eax
; X64-NEXT: retq
; X64-NEXT: .LBB13_1:
; X64-NEXT: movw $16, %ax
+; X64-NEXT: # kill: def $ax killed $ax killed $eax
; X64-NEXT: retq
;
; X86-CLZ-LABEL: cttz_i16_zero_test:
; X86-CLZ: # %bb.0:
-; X86-CLZ-NEXT: tzcntw {{[0-9]+}}(%esp), %ax
+; X86-CLZ-NEXT: movl $65536, %eax # imm = 0x10000
+; X86-CLZ-NEXT: orl {{[0-9]+}}(%esp), %eax
+; X86-CLZ-NEXT: tzcntl %eax, %eax
+; X86-CLZ-NEXT: # kill: def $ax killed $ax killed $eax
; X86-CLZ-NEXT: retl
;
; X64-CLZ-LABEL: cttz_i16_zero_test:
; X64-CLZ: # %bb.0:
-; X64-CLZ-NEXT: tzcntw %di, %ax
+; X64-CLZ-NEXT: orl $65536, %edi # imm = 0x10000
+; X64-CLZ-NEXT: tzcntl %edi, %eax
+; X64-CLZ-NEXT: # kill: def $ax killed $ax killed $eax
; X64-CLZ-NEXT: retq
%tmp1 = call i16 @llvm.cttz.i16(i16 %n, i1 false)
ret i16 %tmp1
diff --git a/llvm/test/CodeGen/X86/lzcnt-tzcnt.ll b/llvm/test/CodeGen/X86/lzcnt-tzcnt.ll
index f271bd055d048..bced63e1cf853 100644
--- a/llvm/test/CodeGen/X86/lzcnt-tzcnt.ll
+++ b/llvm/test/CodeGen/X86/lzcnt-tzcnt.ll
@@ -160,7 +160,9 @@ define i64 @test15_ctlz(ptr %ptr) {
define i16 @test1_cttz(i16 %v) {
; CHECK-LABEL: test1_cttz:
; CHECK: # %bb.0:
-; CHECK-NEXT: tzcntw %di, %ax
+; CHECK-NEXT: orl $65536, %edi # imm = 0x10000
+; CHECK-NEXT: tzcntl %edi, %eax
+; CHECK-NEXT: # kill: def $ax killed $ax killed $eax
; CHECK-NEXT: retq
%cnt = tail call i16 @llvm.cttz.i16(i16 %v, i1 true)
%tobool = icmp eq i16 %v, 0
@@ -196,7 +198,9 @@ define i64 @test3_cttz(i64 %v) {
define i16 @test4_cttz(i16 %v) {
; CHECK-LABEL: test4_cttz:
; CHECK: # %bb.0:
-; CHECK-NEXT: tzcntw %di, %ax
+; CHECK-NEXT: orl $65536, %edi # imm = 0x10000
+; CHECK-NEXT: tzcntl %edi, %eax
+; CHECK-NEXT: # kill: def $ax killed $ax killed $eax
; CHECK-NEXT: retq
%cnt = tail call i16 @llvm.cttz.i16(i16 %v, i1 true)
%tobool = icmp eq i16 0, %v
@@ -232,7 +236,10 @@ define i64 @test6_cttz(i64 %v) {
define i16 @test10_cttz(ptr %ptr) {
; CHECK-LABEL: test10_cttz:
; CHECK: # %bb.0:
-; CHECK-NEXT: tzcntw (%rdi), %ax
+; CHECK-NEXT: movzwl (%rdi), %eax
+; CHECK-NEXT: orl $65536, %eax # imm = 0x10000
+; CHECK-NEXT: tzcntl %eax, %eax
+; CHECK-NEXT: # kill: def $ax killed $ax killed $eax
; CHECK-NEXT: retq
%v = load i16, ptr %ptr
%cnt = tail call i16 @llvm.cttz.i16(i16 %v, i1 true)
@@ -271,7 +278,10 @@ define i64 @test12_cttz(ptr %ptr) {
define i16 @test13_cttz(ptr %ptr) {
; CHECK-LABEL: test13_cttz:
; CHECK: # %bb.0:
-; CHECK-NEXT: tzcntw (%rdi), %ax
+; CHECK-NEXT: movzwl (%rdi), %eax
+; CHECK-NEXT: orl $65536, %eax # imm = 0x10000
+; CHECK-NEXT: tzcntl %eax, %eax
+; CHECK-NEXT: # kill: def $ax killed $ax killed $eax
; CHECK-NEXT: retq
%v = load i16, ptr %ptr
%cnt = tail call i16 @llvm.cttz.i16(i16 %v, i1 true)
@@ -346,7 +356,9 @@ define i64 @test6b_ctlz(i64 %v) {
define i16 @test4b_cttz(i16 %v) {
; CHECK-LABEL: test4b_cttz:
; CHECK: # %bb.0:
-; CHECK-NEXT: tzcntw %di, %ax
+; CHECK-NEXT: orl $65536, %edi # imm = 0x10000
+; CHECK-NEXT: tzcntl %edi, %eax
+; CHECK-NEXT: # kill: def $ax killed $ax killed $eax
; CHECK-NEXT: retq
%cnt = tail call i16 @llvm.cttz.i16(i16 %v, i1 true)
%tobool = icmp ne i16 %v, 0
More information about the llvm-commits
mailing list