[llvm] [CGP] Despeculate ctlz/cttz with "illegal" integer types (PR #137197)

Sergei Barannikov via llvm-commits llvm-commits at lists.llvm.org
Fri Apr 25 06:58:00 PDT 2025


================
@@ -221,87 +221,99 @@ define i64 @test_i64(i64 %a) {
 ;
 ; CHECK-6M-LABEL: test_i64:
 ; CHECK-6M:       @ %bb.0:
-; CHECK-6M-NEXT:    .save {r4, r5, r7, lr}
-; CHECK-6M-NEXT:    push {r4, r5, r7, lr}
+; CHECK-6M-NEXT:    .save {r4, r5, r6, lr}
+; CHECK-6M-NEXT:    push {r4, r5, r6, lr}
+; CHECK-6M-NEXT:    mov r3, r1
 ; CHECK-6M-NEXT:    mov r2, r0
-; CHECK-6M-NEXT:    ldr r5, .LCPI3_0
-; CHECK-6M-NEXT:    adr r3, .LCPI3_1
+; CHECK-6M-NEXT:    movs r1, #0
+; CHECK-6M-NEXT:    orrs r0, r3
+; CHECK-6M-NEXT:    beq .LBB3_6
+; CHECK-6M-NEXT:  @ %bb.1: @ %cond.false
+; CHECK-6M-NEXT:    ldr r6, .LCPI3_0
+; CHECK-6M-NEXT:    adr r4, .LCPI3_1
 ; CHECK-6M-NEXT:    movs r0, #32
-; CHECK-6M-NEXT:    cmp r1, #0
-; CHECK-6M-NEXT:    mov r4, r0
-; CHECK-6M-NEXT:    beq .LBB3_2
-; CHECK-6M-NEXT:  @ %bb.1:
-; CHECK-6M-NEXT:    rsbs r4, r1, #0
-; CHECK-6M-NEXT:    ands r4, r1
-; CHECK-6M-NEXT:    muls r4, r5, r4
-; CHECK-6M-NEXT:    lsrs r1, r4, #27
-; CHECK-6M-NEXT:    ldrb r4, [r3, r1]
-; CHECK-6M-NEXT:  .LBB3_2:
-; CHECK-6M-NEXT:    adds r4, #32
-; CHECK-6M-NEXT:    rsbs r1, r2, #0
-; CHECK-6M-NEXT:    ands r1, r2
-; CHECK-6M-NEXT:    muls r5, r1, r5
-; CHECK-6M-NEXT:    lsrs r1, r5, #27
+; CHECK-6M-NEXT:    cmp r3, #0
+; CHECK-6M-NEXT:    mov r5, r0
+; CHECK-6M-NEXT:    beq .LBB3_3
+; CHECK-6M-NEXT:  @ %bb.2: @ %cond.false
+; CHECK-6M-NEXT:    rsbs r5, r3, #0
+; CHECK-6M-NEXT:    ands r5, r3
+; CHECK-6M-NEXT:    muls r5, r6, r5
+; CHECK-6M-NEXT:    lsrs r3, r5, #27
+; CHECK-6M-NEXT:    ldrb r5, [r4, r3]
+; CHECK-6M-NEXT:  .LBB3_3: @ %cond.false
+; CHECK-6M-NEXT:    adds r5, #32
+; CHECK-6M-NEXT:    rsbs r3, r2, #0
+; CHECK-6M-NEXT:    ands r3, r2
+; CHECK-6M-NEXT:    muls r6, r3, r6
+; CHECK-6M-NEXT:    lsrs r3, r6, #27
 ; CHECK-6M-NEXT:    cmp r2, #0
-; CHECK-6M-NEXT:    bne .LBB3_5
-; CHECK-6M-NEXT:  @ %bb.3:
-; CHECK-6M-NEXT:    beq .LBB3_6
-; CHECK-6M-NEXT:  .LBB3_4:
-; CHECK-6M-NEXT:    movs r1, #0
-; CHECK-6M-NEXT:    pop {r4, r5, r7, pc}
-; CHECK-6M-NEXT:  .LBB3_5:
-; CHECK-6M-NEXT:    ldrb r0, [r3, r1]
-; CHECK-6M-NEXT:    bne .LBB3_4
+; CHECK-6M-NEXT:    bne .LBB3_7
+; CHECK-6M-NEXT:  @ %bb.4: @ %cond.false
+; CHECK-6M-NEXT:    beq .LBB3_8
+; CHECK-6M-NEXT:  .LBB3_5: @ %cond.end
+; CHECK-6M-NEXT:    pop {r4, r5, r6, pc}
 ; CHECK-6M-NEXT:  .LBB3_6:
-; CHECK-6M-NEXT:    mov r0, r4
-; CHECK-6M-NEXT:    movs r1, #0
-; CHECK-6M-NEXT:    pop {r4, r5, r7, pc}
+; CHECK-6M-NEXT:    movs r0, #64
+; CHECK-6M-NEXT:    pop {r4, r5, r6, pc}
+; CHECK-6M-NEXT:  .LBB3_7: @ %cond.false
+; CHECK-6M-NEXT:    ldrb r0, [r4, r3]
+; CHECK-6M-NEXT:    bne .LBB3_5
+; CHECK-6M-NEXT:  .LBB3_8: @ %cond.false
+; CHECK-6M-NEXT:    mov r0, r5
+; CHECK-6M-NEXT:    pop {r4, r5, r6, pc}
 ; CHECK-6M-NEXT:    .p2align 2
-; CHECK-6M-NEXT:  @ %bb.7:
+; CHECK-6M-NEXT:  @ %bb.9:
 ; CHECK-6M-NEXT:  .LCPI3_0:
 ; CHECK-6M-NEXT:    .long 125613361 @ 0x77cb531
 ; CHECK-6M-NEXT:  .LCPI3_1:
 ; CHECK-6M-NEXT:    .ascii "\000\001\034\002\035\016\030\003\036\026\024\017\031\021\004\b\037\033\r\027\025\023\020\007\032\f\022\006\013\005\n\t"
 ;
 ; CHECK-8MBASE-LABEL: test_i64:
 ; CHECK-8MBASE:       @ %bb.0:
-; CHECK-8MBASE-NEXT:    .save {r4, r5, r7, lr}
-; CHECK-8MBASE-NEXT:    push {r4, r5, r7, lr}
+; CHECK-8MBASE-NEXT:    .save {r4, r5, r6, lr}
+; CHECK-8MBASE-NEXT:    push {r4, r5, r6, lr}
+; CHECK-8MBASE-NEXT:    mov r3, r1
 ; CHECK-8MBASE-NEXT:    mov r2, r0
-; CHECK-8MBASE-NEXT:    movw r5, #46385
-; CHECK-8MBASE-NEXT:    movt r5, #1916
-; CHECK-8MBASE-NEXT:    adr r3, .LCPI3_0
+; CHECK-8MBASE-NEXT:    movs r1, #0
+; CHECK-8MBASE-NEXT:    orrs r0, r3
+; CHECK-8MBASE-NEXT:    beq .LBB3_6
+; CHECK-8MBASE-NEXT:  @ %bb.1: @ %cond.false
+; CHECK-8MBASE-NEXT:    movw r6, #46385
+; CHECK-8MBASE-NEXT:    movt r6, #1916
+; CHECK-8MBASE-NEXT:    adr r4, .LCPI3_0
 ; CHECK-8MBASE-NEXT:    movs r0, #32
-; CHECK-8MBASE-NEXT:    mov r4, r0
-; CHECK-8MBASE-NEXT:    cbz r1, .LBB3_2
-; CHECK-8MBASE-NEXT:  @ %bb.1:
-; CHECK-8MBASE-NEXT:    rsbs r4, r1, #0
-; CHECK-8MBASE-NEXT:    ands r4, r1
-; CHECK-8MBASE-NEXT:    muls r4, r5, r4
-; CHECK-8MBASE-NEXT:    lsrs r1, r4, #27
-; CHECK-8MBASE-NEXT:    ldrb r4, [r3, r1]
-; CHECK-8MBASE-NEXT:  .LBB3_2:
-; CHECK-8MBASE-NEXT:    adds r4, #32
-; CHECK-8MBASE-NEXT:    rsbs r1, r2, #0
-; CHECK-8MBASE-NEXT:    ands r1, r2
-; CHECK-8MBASE-NEXT:    muls r5, r1, r5
-; CHECK-8MBASE-NEXT:    lsrs r1, r5, #27
+; CHECK-8MBASE-NEXT:    mov r5, r0
+; CHECK-8MBASE-NEXT:    cbz r3, .LBB3_3
+; CHECK-8MBASE-NEXT:  @ %bb.2: @ %cond.false
+; CHECK-8MBASE-NEXT:    rsbs r5, r3, #0
+; CHECK-8MBASE-NEXT:    ands r5, r3
+; CHECK-8MBASE-NEXT:    muls r5, r6, r5
+; CHECK-8MBASE-NEXT:    lsrs r3, r5, #27
+; CHECK-8MBASE-NEXT:    ldrb r5, [r4, r3]
+; CHECK-8MBASE-NEXT:  .LBB3_3: @ %cond.false
+; CHECK-8MBASE-NEXT:    adds r5, #32
+; CHECK-8MBASE-NEXT:    rsbs r3, r2, #0
+; CHECK-8MBASE-NEXT:    ands r3, r2
+; CHECK-8MBASE-NEXT:    muls r6, r3, r6
+; CHECK-8MBASE-NEXT:    lsrs r3, r6, #27
 ; CHECK-8MBASE-NEXT:    cmp r2, #0
-; CHECK-8MBASE-NEXT:    bne .LBB3_5
-; CHECK-8MBASE-NEXT:  @ %bb.3:
-; CHECK-8MBASE-NEXT:    beq .LBB3_6
-; CHECK-8MBASE-NEXT:  .LBB3_4:
-; CHECK-8MBASE-NEXT:    movs r1, #0
-; CHECK-8MBASE-NEXT:    pop {r4, r5, r7, pc}
-; CHECK-8MBASE-NEXT:  .LBB3_5:
-; CHECK-8MBASE-NEXT:    ldrb r0, [r3, r1]
-; CHECK-8MBASE-NEXT:    bne .LBB3_4
+; CHECK-8MBASE-NEXT:    bne .LBB3_7
+; CHECK-8MBASE-NEXT:  @ %bb.4: @ %cond.false
+; CHECK-8MBASE-NEXT:    beq .LBB3_8
----------------
s-barannikov wrote:

FWIW
There are some strange branches here and below (on the left, too).
X86 eliminates them in `X86TargetLowering::EmitLoweredSelect`. AFACT ARM's equivalent doesn't do this optimization.
There is also a `PPCBranchCoalescing` pass the could could help here. It is more or less generic, but currently lives under PowerPC directory.


https://github.com/llvm/llvm-project/pull/137197


More information about the llvm-commits mailing list