[llvm] d98d625 - [X86] combineSETCC - drop unnecessary shift amount bounds check for larger-than-legal ICMP_ZERO(AND(X,SHL(1,IDX))) folds (#182021)

Thu Feb 19 05:46:54 PST 2026

Author: Simon Pilgrim
Date: 2026-02-19T13:46:50Z
New Revision: d98d625ff7ab14f0651138721d773c66d49504c4

URL: https://github.com/llvm/llvm-project/commit/d98d625ff7ab14f0651138721d773c66d49504c4
DIFF: https://github.com/llvm/llvm-project/commit/d98d625ff7ab14f0651138721d773c66d49504c4.diff

LOG: [X86] combineSETCC - drop unnecessary shift amount bounds check for larger-than-legal ICMP_ZERO(AND(X,SHL(1,IDX))) folds (#182021)

For i128 etc. bittest patterns, we split the pattern into a i32
extraction + i32 bittest.

But we were unnecessarily limiting this to inbounds shift amounts. I
wrote this fold at the same time as narrowBitOpRMW where we needed the
bounds check for safe memory access, which isn't necessary in
combineSETCC.

Fix 2 of 2 for #147216

Added: 
    

Modified: 
    llvm/lib/Target/X86/X86ISelLowering.cpp
    llvm/test/CodeGen/X86/hoist-and-by-const-from-shl-in-eqcmp-zero.ll
    llvm/test/CodeGen/X86/known-pow2.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 060987ffe46d2..b41dd2f70abaf 100644

--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -57565,23 +57565,18 @@ static SDValue combineSetCC(SDNode *N, SelectionDAG &DAG,
       SDValue X, ShAmt;
       if (sd_match(LHS, m_OneUse(m_And(m_Value(X),
                                        m_Shl(m_One(), m_Value(ShAmt)))))) {
-        // Only attempt this if the shift amount is known to be in bounds.
-        KnownBits KnownAmt = DAG.computeKnownBits(ShAmt);
-        if (KnownAmt.getMaxValue().ult(OpVT.getScalarSizeInBits())) {
-          EVT AmtVT = ShAmt.getValueType();
-          SDValue AlignAmt =
-              DAG.getNode(ISD::AND, DL, AmtVT, ShAmt,
-                          DAG.getSignedConstant(-32LL, DL, AmtVT));
-          SDValue ModuloAmt = DAG.getNode(ISD::AND, DL, AmtVT, ShAmt,
-                                          DAG.getConstant(31, DL, AmtVT));
-          SDValue Mask = DAG.getNode(
-              ISD::SHL, DL, MVT::i32, DAG.getConstant(1, DL, MVT::i32),
-              DAG.getZExtOrTrunc(ModuloAmt, DL, MVT::i8));
-          X = DAG.getNode(ISD::SRL, DL, OpVT, X, AlignAmt);
-          X = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, X);
-          X = DAG.getNode(ISD::AND, DL, MVT::i32, X, Mask);
-          return DAG.getSetCC(DL, VT, X, DAG.getConstant(0, DL, MVT::i32), CC);
-        }
+        EVT AmtVT = ShAmt.getValueType();
+        SDValue AlignAmt = DAG.getNode(ISD::AND, DL, AmtVT, ShAmt,
+                                       DAG.getSignedConstant(-32LL, DL, AmtVT));
+        SDValue ModuloAmt = DAG.getNode(ISD::AND, DL, AmtVT, ShAmt,
+                                        DAG.getConstant(31, DL, AmtVT));
+        SDValue Mask = DAG.getNode(ISD::SHL, DL, MVT::i32,
+                                   DAG.getConstant(1, DL, MVT::i32),
+                                   DAG.getZExtOrTrunc(ModuloAmt, DL, MVT::i8));
+        X = DAG.getNode(ISD::SRL, DL, OpVT, X, AlignAmt);
+        X = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, X);
+        X = DAG.getNode(ISD::AND, DL, MVT::i32, X, Mask);
+        return DAG.getSetCC(DL, VT, X, DAG.getConstant(0, DL, MVT::i32), CC);
       }
     }
 

diff  --git a/llvm/test/CodeGen/X86/hoist-and-by-const-from-shl-in-eqcmp-zero.ll b/llvm/test/CodeGen/X86/hoist-and-by-const-from-shl-in-eqcmp-zero.ll
index 19744a897f415..26c8e5c95824a 100644
--- a/llvm/test/CodeGen/X86/hoist-and-by-const-from-shl-in-eqcmp-zero.ll
+++ b/llvm/test/CodeGen/X86/hoist-and-by-const-from-shl-in-eqcmp-zero.ll
@@ -1,7 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -mtriple=i686-unknown-linux-gnu -mattr=+sse,sse2                  < %s | FileCheck %s --check-prefixes=X86,X86-SSE2,X86-BMI1
 ; RUN: llc -mtriple=i686-unknown-linux-gnu -mattr=+sse,sse2,+bmi             < %s | FileCheck %s --check-prefixes=X86,X86-SSE2,X86-BMI1
-; RUN: llc -mtriple=i686-unknown-linux-gnu -mattr=+sse,sse2,+bmi,+bmi2       < %s | FileCheck %s --check-prefixes=X86,X86-SSE2,X86-BMI2,X86-BMI2-SSE2
+; RUN: llc -mtriple=i686-unknown-linux-gnu -mattr=+sse,sse2,+bmi,+bmi2       < %s | FileCheck %s --check-prefixes=X86,X86-SSE2,X86-BMI2
 ; RUN: llc -mtriple=i686-unknown-linux-gnu -mattr=+sse,sse2,+bmi,+bmi2,+avx2 < %s | FileCheck %s --check-prefixes=X86,X86-BMI2,AVX2,X86-BMI2-AVX2
 ; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+sse,sse2                  < %s | FileCheck %s --check-prefixes=X64,X64-SSE2,X64-BMI1
 ; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+sse,sse2,+bmi             < %s | FileCheck %s --check-prefixes=X64,X64-SSE2,X64-BMI1
@@ -341,43 +341,20 @@ define i1 @scalar_i64_signbit_eq(i64 %x, i64 %y) nounwind {
 }
 
 define i1 @scalar_i64_lowestbit_eq(i64 %x, i64 %y) nounwind {
-; X86-BMI1-LABEL: scalar_i64_lowestbit_eq:
-; X86-BMI1:       # %bb.0:
-; X86-BMI1-NEXT:    pushl %esi
-; X86-BMI1-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
-; X86-BMI1-NEXT:    movl $1, %eax
-; X86-BMI1-NEXT:    xorl %esi, %esi
-; X86-BMI1-NEXT:    xorl %edx, %edx
-; X86-BMI1-NEXT:    shldl %cl, %eax, %edx
-; X86-BMI1-NEXT:    shll %cl, %eax
-; X86-BMI1-NEXT:    testb $32, %cl
-; X86-BMI1-NEXT:    cmovnel %eax, %edx
-; X86-BMI1-NEXT:    cmovnel %esi, %eax
-; X86-BMI1-NEXT:    andl {{[0-9]+}}(%esp), %edx
-; X86-BMI1-NEXT:    andl {{[0-9]+}}(%esp), %eax
-; X86-BMI1-NEXT:    orl %edx, %eax
-; X86-BMI1-NEXT:    sete %al
-; X86-BMI1-NEXT:    popl %esi
-; X86-BMI1-NEXT:    retl
-;
-; X86-BMI2-LABEL: scalar_i64_lowestbit_eq:
-; X86-BMI2:       # %bb.0:
-; X86-BMI2-NEXT:    pushl %esi
-; X86-BMI2-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
-; X86-BMI2-NEXT:    movl $1, %edx
-; X86-BMI2-NEXT:    xorl %esi, %esi
-; X86-BMI2-NEXT:    xorl %eax, %eax
-; X86-BMI2-NEXT:    shldl %cl, %edx, %eax
-; X86-BMI2-NEXT:    shlxl %ecx, %edx, %edx
-; X86-BMI2-NEXT:    testb $32, %cl
-; X86-BMI2-NEXT:    cmovnel %edx, %eax
-; X86-BMI2-NEXT:    cmovnel %esi, %edx
-; X86-BMI2-NEXT:    andl {{[0-9]+}}(%esp), %eax
-; X86-BMI2-NEXT:    andl {{[0-9]+}}(%esp), %edx
-; X86-BMI2-NEXT:    orl %eax, %edx
-; X86-BMI2-NEXT:    sete %al
-; X86-BMI2-NEXT:    popl %esi
-; X86-BMI2-NEXT:    retl
+; X86-LABEL: scalar_i64_lowestbit_eq:
+; X86:       # %bb.0:
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    testb $32, %al
+; X86-NEXT:    je .LBB10_1
+; X86-NEXT:  # %bb.2:
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    jmp .LBB10_3
+; X86-NEXT:  .LBB10_1:
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:  .LBB10_3:
+; X86-NEXT:    btl %eax, %ecx
+; X86-NEXT:    setae %al
+; X86-NEXT:    retl
 ;
 ; X64-LABEL: scalar_i64_lowestbit_eq:
 ; X64:       # %bb.0:
@@ -454,146 +431,64 @@ define i1 @scalar_i64_bitsinmiddle_eq(i64 %x, i64 %y) nounwind {
 }
 
 define i1 @scalar_i128_lowestbit_eq(i128 %x, i128 %y) nounwind {
-; X86-BMI1-LABEL: scalar_i128_lowestbit_eq:
-; X86-BMI1:       # %bb.0:
-; X86-BMI1-NEXT:    pushl %edi
-; X86-BMI1-NEXT:    pushl %esi
-; X86-BMI1-NEXT:    subl $36, %esp
-; X86-BMI1-NEXT:    movl {{[0-9]+}}(%esp), %ecx
-; X86-BMI1-NEXT:    movss {{.*#+}} xmm0 = [1,0,0,0]
-; X86-BMI1-NEXT:    movaps %xmm0, {{[0-9]+}}(%esp)
-; X86-BMI1-NEXT:    xorps %xmm0, %xmm0
-; X86-BMI1-NEXT:    movaps %xmm0, (%esp)
-; X86-BMI1-NEXT:    movl %ecx, %eax
-; X86-BMI1-NEXT:    shrb $3, %al
-; X86-BMI1-NEXT:    andb $12, %al
-; X86-BMI1-NEXT:    negb %al
-; X86-BMI1-NEXT:    movsbl %al, %esi
-; X86-BMI1-NEXT:    movl 24(%esp,%esi), %edi
-; X86-BMI1-NEXT:    movl 28(%esp,%esi), %eax
-; X86-BMI1-NEXT:    shldl %cl, %edi, %eax
-; X86-BMI1-NEXT:    movl 16(%esp,%esi), %edx
-; X86-BMI1-NEXT:    movl 20(%esp,%esi), %esi
-; X86-BMI1-NEXT:    shldl %cl, %esi, %edi
-; X86-BMI1-NEXT:    shldl %cl, %edx, %esi
-; X86-BMI1-NEXT:    # kill: def $cl killed $cl killed $ecx
-; X86-BMI1-NEXT:    shll %cl, %edx
-; X86-BMI1-NEXT:    andl {{[0-9]+}}(%esp), %edi
-; X86-BMI1-NEXT:    andl {{[0-9]+}}(%esp), %eax
-; X86-BMI1-NEXT:    andl {{[0-9]+}}(%esp), %edx
-; X86-BMI1-NEXT:    orl %edi, %edx
-; X86-BMI1-NEXT:    andl {{[0-9]+}}(%esp), %esi
-; X86-BMI1-NEXT:    orl %eax, %esi
-; X86-BMI1-NEXT:    orl %edx, %esi
-; X86-BMI1-NEXT:    sete %al
-; X86-BMI1-NEXT:    addl $36, %esp
-; X86-BMI1-NEXT:    popl %esi
-; X86-BMI1-NEXT:    popl %edi
-; X86-BMI1-NEXT:    retl
-;
-; X86-BMI2-SSE2-LABEL: scalar_i128_lowestbit_eq:
-; X86-BMI2-SSE2:       # %bb.0:
-; X86-BMI2-SSE2-NEXT:    pushl %edi
-; X86-BMI2-SSE2-NEXT:    pushl %esi
-; X86-BMI2-SSE2-NEXT:    subl $36, %esp
-; X86-BMI2-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %ecx
-; X86-BMI2-SSE2-NEXT:    movss {{.*#+}} xmm0 = [1,0,0,0]
-; X86-BMI2-SSE2-NEXT:    movaps %xmm0, {{[0-9]+}}(%esp)
-; X86-BMI2-SSE2-NEXT:    xorps %xmm0, %xmm0
-; X86-BMI2-SSE2-NEXT:    movaps %xmm0, (%esp)
-; X86-BMI2-SSE2-NEXT:    movl %ecx, %eax
-; X86-BMI2-SSE2-NEXT:    shrb $3, %al
-; X86-BMI2-SSE2-NEXT:    andb $12, %al
-; X86-BMI2-SSE2-NEXT:    negb %al
-; X86-BMI2-SSE2-NEXT:    movsbl %al, %edx
-; X86-BMI2-SSE2-NEXT:    movl 24(%esp,%edx), %esi
-; X86-BMI2-SSE2-NEXT:    movl 28(%esp,%edx), %eax
-; X86-BMI2-SSE2-NEXT:    shldl %cl, %esi, %eax
-; X86-BMI2-SSE2-NEXT:    movl 16(%esp,%edx), %edi
-; X86-BMI2-SSE2-NEXT:    movl 20(%esp,%edx), %edx
-; X86-BMI2-SSE2-NEXT:    shldl %cl, %edx, %esi
-; X86-BMI2-SSE2-NEXT:    shldl %cl, %edi, %edx
-; X86-BMI2-SSE2-NEXT:    shlxl %ecx, %edi, %ecx
-; X86-BMI2-SSE2-NEXT:    andl {{[0-9]+}}(%esp), %esi
-; X86-BMI2-SSE2-NEXT:    andl {{[0-9]+}}(%esp), %eax
-; X86-BMI2-SSE2-NEXT:    andl {{[0-9]+}}(%esp), %ecx
-; X86-BMI2-SSE2-NEXT:    orl %esi, %ecx
-; X86-BMI2-SSE2-NEXT:    andl {{[0-9]+}}(%esp), %edx
-; X86-BMI2-SSE2-NEXT:    orl %eax, %edx
-; X86-BMI2-SSE2-NEXT:    orl %ecx, %edx
-; X86-BMI2-SSE2-NEXT:    sete %al
-; X86-BMI2-SSE2-NEXT:    addl $36, %esp
-; X86-BMI2-SSE2-NEXT:    popl %esi
-; X86-BMI2-SSE2-NEXT:    popl %edi
-; X86-BMI2-SSE2-NEXT:    retl
+; X86-SSE2-LABEL: scalar_i128_lowestbit_eq:
+; X86-SSE2:       # %bb.0:
+; X86-SSE2-NEXT:    subl $44, %esp
+; X86-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-SSE2-NEXT:    movaps {{[0-9]+}}(%esp), %xmm0
+; X86-SSE2-NEXT:    xorps %xmm1, %xmm1
+; X86-SSE2-NEXT:    movaps %xmm1, {{[0-9]+}}(%esp)
+; X86-SSE2-NEXT:    movaps %xmm0, (%esp)
+; X86-SSE2-NEXT:    movl %eax, %ecx
+; X86-SSE2-NEXT:    shrb $3, %cl
+; X86-SSE2-NEXT:    andb $12, %cl
+; X86-SSE2-NEXT:    movzbl %cl, %ecx
+; X86-SSE2-NEXT:    movl (%esp,%ecx), %ecx
+; X86-SSE2-NEXT:    btl %eax, %ecx
+; X86-SSE2-NEXT:    setae %al
+; X86-SSE2-NEXT:    addl $44, %esp
+; X86-SSE2-NEXT:    retl
 ;
 ; X86-BMI2-AVX2-LABEL: scalar_i128_lowestbit_eq:
 ; X86-BMI2-AVX2:       # %bb.0:
-; X86-BMI2-AVX2-NEXT:    pushl %edi
-; X86-BMI2-AVX2-NEXT:    pushl %esi
-; X86-BMI2-AVX2-NEXT:    subl $36, %esp
-; X86-BMI2-AVX2-NEXT:    movl {{[0-9]+}}(%esp), %ecx
-; X86-BMI2-AVX2-NEXT:    vmovaps {{.*#+}} ymm0 = [0,0,0,0,1,0,0,0]
-; X86-BMI2-AVX2-NEXT:    vmovups %ymm0, (%esp)
-; X86-BMI2-AVX2-NEXT:    movl %ecx, %eax
-; X86-BMI2-AVX2-NEXT:    shrb $3, %al
-; X86-BMI2-AVX2-NEXT:    andb $12, %al
-; X86-BMI2-AVX2-NEXT:    negb %al
-; X86-BMI2-AVX2-NEXT:    movsbl %al, %edx
-; X86-BMI2-AVX2-NEXT:    movl 24(%esp,%edx), %esi
-; X86-BMI2-AVX2-NEXT:    movl 28(%esp,%edx), %eax
-; X86-BMI2-AVX2-NEXT:    shldl %cl, %esi, %eax
-; X86-BMI2-AVX2-NEXT:    movl 16(%esp,%edx), %edi
-; X86-BMI2-AVX2-NEXT:    movl 20(%esp,%edx), %edx
-; X86-BMI2-AVX2-NEXT:    shldl %cl, %edx, %esi
-; X86-BMI2-AVX2-NEXT:    shldl %cl, %edi, %edx
-; X86-BMI2-AVX2-NEXT:    shlxl %ecx, %edi, %ecx
-; X86-BMI2-AVX2-NEXT:    andl {{[0-9]+}}(%esp), %esi
-; X86-BMI2-AVX2-NEXT:    andl {{[0-9]+}}(%esp), %eax
-; X86-BMI2-AVX2-NEXT:    andl {{[0-9]+}}(%esp), %ecx
-; X86-BMI2-AVX2-NEXT:    orl %esi, %ecx
-; X86-BMI2-AVX2-NEXT:    andl {{[0-9]+}}(%esp), %edx
-; X86-BMI2-AVX2-NEXT:    orl %eax, %edx
-; X86-BMI2-AVX2-NEXT:    orl %ecx, %edx
-; X86-BMI2-AVX2-NEXT:    sete %al
-; X86-BMI2-AVX2-NEXT:    addl $36, %esp
-; X86-BMI2-AVX2-NEXT:    popl %esi
-; X86-BMI2-AVX2-NEXT:    popl %edi
-; X86-BMI2-AVX2-NEXT:    vzeroupper
+; X86-BMI2-AVX2-NEXT:    subl $44, %esp
+; X86-BMI2-AVX2-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-BMI2-AVX2-NEXT:    vmovaps {{[0-9]+}}(%esp), %xmm0
+; X86-BMI2-AVX2-NEXT:    vxorps %xmm1, %xmm1, %xmm1
+; X86-BMI2-AVX2-NEXT:    vmovaps %xmm1, {{[0-9]+}}(%esp)
+; X86-BMI2-AVX2-NEXT:    vmovaps %xmm0, (%esp)
+; X86-BMI2-AVX2-NEXT:    movl %eax, %ecx
+; X86-BMI2-AVX2-NEXT:    shrb $3, %cl
+; X86-BMI2-AVX2-NEXT:    andb $12, %cl
+; X86-BMI2-AVX2-NEXT:    movzbl %cl, %ecx
+; X86-BMI2-AVX2-NEXT:    movl (%esp,%ecx), %ecx
+; X86-BMI2-AVX2-NEXT:    btl %eax, %ecx
+; X86-BMI2-AVX2-NEXT:    setae %al
+; X86-BMI2-AVX2-NEXT:    addl $44, %esp
 ; X86-BMI2-AVX2-NEXT:    retl
 ;
 ; X64-BMI1-LABEL: scalar_i128_lowestbit_eq:
 ; X64-BMI1:       # %bb.0:
-; X64-BMI1-NEXT:    movq %rdx, %rcx
-; X64-BMI1-NEXT:    xorl %eax, %eax
-; X64-BMI1-NEXT:    movl $1, %edx
-; X64-BMI1-NEXT:    xorl %r8d, %r8d
-; X64-BMI1-NEXT:    shldq %cl, %rdx, %r8
-; X64-BMI1-NEXT:    shlq %cl, %rdx
-; X64-BMI1-NEXT:    testb $64, %cl
-; X64-BMI1-NEXT:    cmovneq %rdx, %r8
-; X64-BMI1-NEXT:    cmovneq %rax, %rdx
-; X64-BMI1-NEXT:    andq %rsi, %r8
-; X64-BMI1-NEXT:    andq %rdi, %rdx
-; X64-BMI1-NEXT:    orq %r8, %rdx
-; X64-BMI1-NEXT:    sete %al
+; X64-BMI1-NEXT:    movl %edx, %ecx
+; X64-BMI1-NEXT:    andb $32, %cl
+; X64-BMI1-NEXT:    shrdq %cl, %rsi, %rdi
+; X64-BMI1-NEXT:    shrq %cl, %rsi
+; X64-BMI1-NEXT:    testb $64, %dl
+; X64-BMI1-NEXT:    cmoveq %rdi, %rsi
+; X64-BMI1-NEXT:    btl %edx, %esi
+; X64-BMI1-NEXT:    setae %al
 ; X64-BMI1-NEXT:    retq
 ;
 ; X64-BMI2-LABEL: scalar_i128_lowestbit_eq:
 ; X64-BMI2:       # %bb.0:
-; X64-BMI2-NEXT:    movq %rdx, %rcx
-; X64-BMI2-NEXT:    xorl %eax, %eax
-; X64-BMI2-NEXT:    movl $1, %edx
-; X64-BMI2-NEXT:    xorl %r8d, %r8d
-; X64-BMI2-NEXT:    shldq %cl, %rdx, %r8
-; X64-BMI2-NEXT:    shlxq %rcx, %rdx, %rdx
-; X64-BMI2-NEXT:    testb $64, %cl
-; X64-BMI2-NEXT:    cmovneq %rdx, %r8
-; X64-BMI2-NEXT:    cmovneq %rax, %rdx
-; X64-BMI2-NEXT:    andq %rsi, %r8
-; X64-BMI2-NEXT:    andq %rdi, %rdx
-; X64-BMI2-NEXT:    orq %r8, %rdx
-; X64-BMI2-NEXT:    sete %al
+; X64-BMI2-NEXT:    movl %edx, %ecx
+; X64-BMI2-NEXT:    andb $32, %cl
+; X64-BMI2-NEXT:    shrdq %cl, %rsi, %rdi
+; X64-BMI2-NEXT:    shrxq %rcx, %rsi, %rax
+; X64-BMI2-NEXT:    testb $64, %dl
+; X64-BMI2-NEXT:    cmoveq %rdi, %rax
+; X64-BMI2-NEXT:    btl %edx, %eax
+; X64-BMI2-NEXT:    setae %al
 ; X64-BMI2-NEXT:    retq
   %t0 = shl i128 1, %y
   %t1 = and i128 %t0, %x

diff  --git a/llvm/test/CodeGen/X86/known-pow2.ll b/llvm/test/CodeGen/X86/known-pow2.ll
index b4dd00125aab5..2ef5def9c0fd8 100644
--- a/llvm/test/CodeGen/X86/known-pow2.ll
+++ b/llvm/test/CodeGen/X86/known-pow2.ll
@@ -875,19 +875,14 @@ define i1 @pow2_and_i50(i50 %num, i50 %shift) {
 define i1 @pow2_and_i128(i128 %num, i128 %shift) {
 ; CHECK-LABEL: pow2_and_i128:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    movq %rdx, %rcx
-; CHECK-NEXT:    xorl %eax, %eax
-; CHECK-NEXT:    movl $1, %edx
-; CHECK-NEXT:    xorl %r8d, %r8d
-; CHECK-NEXT:    shldq %cl, %rdx, %r8
-; CHECK-NEXT:    shlq %cl, %rdx
-; CHECK-NEXT:    testb $64, %cl
-; CHECK-NEXT:    cmovneq %rdx, %r8
-; CHECK-NEXT:    cmovneq %rax, %rdx
-; CHECK-NEXT:    andq %rsi, %r8
-; CHECK-NEXT:    andq %rdi, %rdx
-; CHECK-NEXT:    orq %r8, %rdx
-; CHECK-NEXT:    sete %al
+; CHECK-NEXT:    movl %edx, %ecx
+; CHECK-NEXT:    andb $32, %cl
+; CHECK-NEXT:    shrdq %cl, %rsi, %rdi
+; CHECK-NEXT:    shrq %cl, %rsi
+; CHECK-NEXT:    testb $64, %dl
+; CHECK-NEXT:    cmoveq %rdi, %rsi
+; CHECK-NEXT:    btl %edx, %esi
+; CHECK-NEXT:    setae %al
 ; CHECK-NEXT:    retq
   %mask = shl nuw i128 1, %shift
   %bit = and i128 %mask, %num