[llvm] [CGP] Despeculate ctlz/cttz with "illegal" integer types (PR #137197)
Sergei Barannikov via llvm-commits
llvm-commits at lists.llvm.org
Fri Apr 25 06:58:00 PDT 2025
================
@@ -221,87 +221,99 @@ define i64 @test_i64(i64 %a) {
;
; CHECK-6M-LABEL: test_i64:
; CHECK-6M: @ %bb.0:
-; CHECK-6M-NEXT: .save {r4, r5, r7, lr}
-; CHECK-6M-NEXT: push {r4, r5, r7, lr}
+; CHECK-6M-NEXT: .save {r4, r5, r6, lr}
+; CHECK-6M-NEXT: push {r4, r5, r6, lr}
+; CHECK-6M-NEXT: mov r3, r1
; CHECK-6M-NEXT: mov r2, r0
-; CHECK-6M-NEXT: ldr r5, .LCPI3_0
-; CHECK-6M-NEXT: adr r3, .LCPI3_1
+; CHECK-6M-NEXT: movs r1, #0
+; CHECK-6M-NEXT: orrs r0, r3
+; CHECK-6M-NEXT: beq .LBB3_6
+; CHECK-6M-NEXT: @ %bb.1: @ %cond.false
+; CHECK-6M-NEXT: ldr r6, .LCPI3_0
+; CHECK-6M-NEXT: adr r4, .LCPI3_1
; CHECK-6M-NEXT: movs r0, #32
-; CHECK-6M-NEXT: cmp r1, #0
-; CHECK-6M-NEXT: mov r4, r0
-; CHECK-6M-NEXT: beq .LBB3_2
-; CHECK-6M-NEXT: @ %bb.1:
-; CHECK-6M-NEXT: rsbs r4, r1, #0
-; CHECK-6M-NEXT: ands r4, r1
-; CHECK-6M-NEXT: muls r4, r5, r4
-; CHECK-6M-NEXT: lsrs r1, r4, #27
-; CHECK-6M-NEXT: ldrb r4, [r3, r1]
-; CHECK-6M-NEXT: .LBB3_2:
-; CHECK-6M-NEXT: adds r4, #32
-; CHECK-6M-NEXT: rsbs r1, r2, #0
-; CHECK-6M-NEXT: ands r1, r2
-; CHECK-6M-NEXT: muls r5, r1, r5
-; CHECK-6M-NEXT: lsrs r1, r5, #27
+; CHECK-6M-NEXT: cmp r3, #0
+; CHECK-6M-NEXT: mov r5, r0
+; CHECK-6M-NEXT: beq .LBB3_3
+; CHECK-6M-NEXT: @ %bb.2: @ %cond.false
+; CHECK-6M-NEXT: rsbs r5, r3, #0
+; CHECK-6M-NEXT: ands r5, r3
+; CHECK-6M-NEXT: muls r5, r6, r5
+; CHECK-6M-NEXT: lsrs r3, r5, #27
+; CHECK-6M-NEXT: ldrb r5, [r4, r3]
+; CHECK-6M-NEXT: .LBB3_3: @ %cond.false
+; CHECK-6M-NEXT: adds r5, #32
+; CHECK-6M-NEXT: rsbs r3, r2, #0
+; CHECK-6M-NEXT: ands r3, r2
+; CHECK-6M-NEXT: muls r6, r3, r6
+; CHECK-6M-NEXT: lsrs r3, r6, #27
; CHECK-6M-NEXT: cmp r2, #0
-; CHECK-6M-NEXT: bne .LBB3_5
-; CHECK-6M-NEXT: @ %bb.3:
-; CHECK-6M-NEXT: beq .LBB3_6
-; CHECK-6M-NEXT: .LBB3_4:
-; CHECK-6M-NEXT: movs r1, #0
-; CHECK-6M-NEXT: pop {r4, r5, r7, pc}
-; CHECK-6M-NEXT: .LBB3_5:
-; CHECK-6M-NEXT: ldrb r0, [r3, r1]
-; CHECK-6M-NEXT: bne .LBB3_4
+; CHECK-6M-NEXT: bne .LBB3_7
+; CHECK-6M-NEXT: @ %bb.4: @ %cond.false
+; CHECK-6M-NEXT: beq .LBB3_8
+; CHECK-6M-NEXT: .LBB3_5: @ %cond.end
+; CHECK-6M-NEXT: pop {r4, r5, r6, pc}
; CHECK-6M-NEXT: .LBB3_6:
-; CHECK-6M-NEXT: mov r0, r4
-; CHECK-6M-NEXT: movs r1, #0
-; CHECK-6M-NEXT: pop {r4, r5, r7, pc}
+; CHECK-6M-NEXT: movs r0, #64
+; CHECK-6M-NEXT: pop {r4, r5, r6, pc}
+; CHECK-6M-NEXT: .LBB3_7: @ %cond.false
+; CHECK-6M-NEXT: ldrb r0, [r4, r3]
+; CHECK-6M-NEXT: bne .LBB3_5
+; CHECK-6M-NEXT: .LBB3_8: @ %cond.false
+; CHECK-6M-NEXT: mov r0, r5
+; CHECK-6M-NEXT: pop {r4, r5, r6, pc}
; CHECK-6M-NEXT: .p2align 2
-; CHECK-6M-NEXT: @ %bb.7:
+; CHECK-6M-NEXT: @ %bb.9:
; CHECK-6M-NEXT: .LCPI3_0:
; CHECK-6M-NEXT: .long 125613361 @ 0x77cb531
; CHECK-6M-NEXT: .LCPI3_1:
; CHECK-6M-NEXT: .ascii "\000\001\034\002\035\016\030\003\036\026\024\017\031\021\004\b\037\033\r\027\025\023\020\007\032\f\022\006\013\005\n\t"
;
; CHECK-8MBASE-LABEL: test_i64:
; CHECK-8MBASE: @ %bb.0:
-; CHECK-8MBASE-NEXT: .save {r4, r5, r7, lr}
-; CHECK-8MBASE-NEXT: push {r4, r5, r7, lr}
+; CHECK-8MBASE-NEXT: .save {r4, r5, r6, lr}
+; CHECK-8MBASE-NEXT: push {r4, r5, r6, lr}
+; CHECK-8MBASE-NEXT: mov r3, r1
; CHECK-8MBASE-NEXT: mov r2, r0
-; CHECK-8MBASE-NEXT: movw r5, #46385
-; CHECK-8MBASE-NEXT: movt r5, #1916
-; CHECK-8MBASE-NEXT: adr r3, .LCPI3_0
+; CHECK-8MBASE-NEXT: movs r1, #0
+; CHECK-8MBASE-NEXT: orrs r0, r3
+; CHECK-8MBASE-NEXT: beq .LBB3_6
+; CHECK-8MBASE-NEXT: @ %bb.1: @ %cond.false
+; CHECK-8MBASE-NEXT: movw r6, #46385
+; CHECK-8MBASE-NEXT: movt r6, #1916
+; CHECK-8MBASE-NEXT: adr r4, .LCPI3_0
; CHECK-8MBASE-NEXT: movs r0, #32
-; CHECK-8MBASE-NEXT: mov r4, r0
-; CHECK-8MBASE-NEXT: cbz r1, .LBB3_2
-; CHECK-8MBASE-NEXT: @ %bb.1:
-; CHECK-8MBASE-NEXT: rsbs r4, r1, #0
-; CHECK-8MBASE-NEXT: ands r4, r1
-; CHECK-8MBASE-NEXT: muls r4, r5, r4
-; CHECK-8MBASE-NEXT: lsrs r1, r4, #27
-; CHECK-8MBASE-NEXT: ldrb r4, [r3, r1]
-; CHECK-8MBASE-NEXT: .LBB3_2:
-; CHECK-8MBASE-NEXT: adds r4, #32
-; CHECK-8MBASE-NEXT: rsbs r1, r2, #0
-; CHECK-8MBASE-NEXT: ands r1, r2
-; CHECK-8MBASE-NEXT: muls r5, r1, r5
-; CHECK-8MBASE-NEXT: lsrs r1, r5, #27
+; CHECK-8MBASE-NEXT: mov r5, r0
+; CHECK-8MBASE-NEXT: cbz r3, .LBB3_3
+; CHECK-8MBASE-NEXT: @ %bb.2: @ %cond.false
+; CHECK-8MBASE-NEXT: rsbs r5, r3, #0
+; CHECK-8MBASE-NEXT: ands r5, r3
+; CHECK-8MBASE-NEXT: muls r5, r6, r5
+; CHECK-8MBASE-NEXT: lsrs r3, r5, #27
+; CHECK-8MBASE-NEXT: ldrb r5, [r4, r3]
+; CHECK-8MBASE-NEXT: .LBB3_3: @ %cond.false
+; CHECK-8MBASE-NEXT: adds r5, #32
+; CHECK-8MBASE-NEXT: rsbs r3, r2, #0
+; CHECK-8MBASE-NEXT: ands r3, r2
+; CHECK-8MBASE-NEXT: muls r6, r3, r6
+; CHECK-8MBASE-NEXT: lsrs r3, r6, #27
; CHECK-8MBASE-NEXT: cmp r2, #0
-; CHECK-8MBASE-NEXT: bne .LBB3_5
-; CHECK-8MBASE-NEXT: @ %bb.3:
-; CHECK-8MBASE-NEXT: beq .LBB3_6
-; CHECK-8MBASE-NEXT: .LBB3_4:
-; CHECK-8MBASE-NEXT: movs r1, #0
-; CHECK-8MBASE-NEXT: pop {r4, r5, r7, pc}
-; CHECK-8MBASE-NEXT: .LBB3_5:
-; CHECK-8MBASE-NEXT: ldrb r0, [r3, r1]
-; CHECK-8MBASE-NEXT: bne .LBB3_4
+; CHECK-8MBASE-NEXT: bne .LBB3_7
+; CHECK-8MBASE-NEXT: @ %bb.4: @ %cond.false
+; CHECK-8MBASE-NEXT: beq .LBB3_8
----------------
s-barannikov wrote:
FWIW
There are some strange branches here and below (on the left, too).
X86 eliminates them in `X86TargetLowering::EmitLoweredSelect`. AFACT ARM's equivalent doesn't do this optimization.
There is also a `PPCBranchCoalescing` pass the could could help here. It is more or less generic, but currently lives under PowerPC directory.
https://github.com/llvm/llvm-project/pull/137197
More information about the llvm-commits
mailing list