[llvm] [GISel] funnel shift combiner port from SelectionDAG ISel to GlobalISel (PR #135132)
Axel Sorenson via llvm-commits
llvm-commits at lists.llvm.org
Wed Apr 16 01:11:06 PDT 2025
================
@@ -105,3 +105,55 @@ define i16 @test_shl_i48_2(i48 %x, i48 %y) {
%trunc = trunc i48 %shl to i16
ret i16 %trunc
}
+
+define i16 @test_fshl_i32(i32 %x, i32 %_, i32 %y) {
+; RV32-LABEL: test_fshl_i32:
+; RV32: # %bb.0:
+; RV32-NEXT: not a3, a2
+; RV32-NEXT: sll a0, a0, a2
+; RV32-NEXT: srli a1, a1, 1
+; RV32-NEXT: srl a1, a1, a3
+; RV32-NEXT: or a0, a0, a1
+; RV32-NEXT: ret
+;
+; RV64-LABEL: test_fshl_i32:
+; RV64: # %bb.0:
+; RV64-NEXT: not a3, a2
+; RV64-NEXT: sllw a0, a0, a2
+; RV64-NEXT: srliw a1, a1, 1
+; RV64-NEXT: srlw a1, a1, a3
+; RV64-NEXT: or a0, a0, a1
+; RV64-NEXT: ret
+
+ %fshl = call i32 @llvm.fshl.i32(i32 %x, i32 %_, i32 %y)
+ %shl = shl i32 %x, %y
+ %or = or i32 %fshl, %shl
+ %trunc = trunc i32 %or to i16
+ ret i16 %trunc
+}
+
+define i16 @test_fshr_i32(i32 %_, i32 %x, i32 %y) {
+; RV32-LABEL: test_fshr_i32:
+; RV32: # %bb.0:
+; RV32-NEXT: not a3, a2
+; RV32-NEXT: slli a0, a0, 1
+; RV32-NEXT: sll a0, a0, a3
+; RV32-NEXT: srl a1, a1, a2
+; RV32-NEXT: or a0, a0, a1
+; RV32-NEXT: ret
+;
+; RV64-LABEL: test_fshr_i32:
+; RV64: # %bb.0:
+; RV64-NEXT: not a3, a2
+; RV64-NEXT: slli a0, a0, 1
+; RV64-NEXT: sllw a0, a0, a3
+; RV64-NEXT: srlw a1, a1, a2
+; RV64-NEXT: or a0, a0, a1
+; RV64-NEXT: ret
+
+ %fshr = call i32 @llvm.fshr.i32(i32 %_, i32 %x, i32 %y)
+ %lshr = lshr i32 %x, %y
+ %or = or i32 %fshr, %lshr
+ %trunc = trunc i32 %or to i16
+ ret i16 %trunc
+}
----------------
axelcool1234 wrote:
> That's going to be true in either case. As long as it actually compiles, it's not a problem and gives a point of reference for whenever the code improves for illegal types.
Before I go forward and introduce i48 funnel shift calls to the original tests (in order to also test for multi-use and if it correctly ensures the intermediate instructions are still emitted), I want to further clarify what I meant. The following is a test that uses an i32 fshl and another test that uses an i48 fshl. The difference in generated code (via `update_llc_test_checks`) is quite large; I assume that isn't much of a concern here? Once I introduce these calls to the original tests their generated code checks will also explode in size.
```
define i32 @test_fshl_i32(i32 %x, i32 %y) {
; RV32-LABEL: test_fshl_i32:
; RV32: # %bb.0:
; RV32-NEXT: not a2, a1
; RV32-NEXT: sll a0, a0, a1
; RV32-NEXT: li a1, 8
; RV32-NEXT: srl a1, a1, a2
; RV32-NEXT: or a0, a0, a1
; RV32-NEXT: ret
;
; RV64-LABEL: test_fshl_i32:
; RV64: # %bb.0:
; RV64-NEXT: not a2, a1
; RV64-NEXT: sllw a0, a0, a1
; RV64-NEXT: li a1, 8
; RV64-NEXT: srlw a1, a1, a2
; RV64-NEXT: or a0, a0, a1
; RV64-NEXT: ret
%fshl = call i32 @llvm.fshl.i32(i32 %x, i32 16, i32 %y)
ret i32 %fshl
}
define i48 @test_fshl_i48(i48 %x, i48 %y) {
; RV32-LABEL: test_fshl_i48:
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
; RV32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
; RV32-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
; RV32-NEXT: sw s2, 0(sp) # 4-byte Folded Spill
; RV32-NEXT: .cfi_offset ra, -4
; RV32-NEXT: .cfi_offset s0, -8
; RV32-NEXT: .cfi_offset s1, -12
; RV32-NEXT: .cfi_offset s2, -16
; RV32-NEXT: mv s1, a0
; RV32-NEXT: mv s0, a1
; RV32-NEXT: mv a0, a2
; RV32-NEXT: li s2, 47
; RV32-NEXT: slli a1, a3, 16
; RV32-NEXT: srli a1, a1, 16
; RV32-NEXT: li a2, 48
; RV32-NEXT: li a3, 0
; RV32-NEXT: call __umoddi3
; RV32-NEXT: li a2, 32
; RV32-NEXT: bltu a0, a2, .LBB2_2
; RV32-NEXT: # %bb.1:
; RV32-NEXT: li a1, 0
; RV32-NEXT: sll a4, s1, a0
; RV32-NEXT: sub a3, s2, a0
; RV32-NEXT: bnez a0, .LBB2_3
; RV32-NEXT: j .LBB2_4
; RV32-NEXT: .LBB2_2:
; RV32-NEXT: sll a1, s1, a0
; RV32-NEXT: neg a3, a0
; RV32-NEXT: srl a3, s1, a3
; RV32-NEXT: sll a4, s0, a0
; RV32-NEXT: or a4, a3, a4
; RV32-NEXT: sub a3, s2, a0
; RV32-NEXT: beqz a0, .LBB2_4
; RV32-NEXT: .LBB2_3:
; RV32-NEXT: mv s0, a4
; RV32-NEXT: .LBB2_4:
; RV32-NEXT: li a0, 8
; RV32-NEXT: bltu a3, a2, .LBB2_6
; RV32-NEXT: # %bb.5:
; RV32-NEXT: li a2, 0
; RV32-NEXT: bnez a3, .LBB2_7
; RV32-NEXT: j .LBB2_8
; RV32-NEXT: .LBB2_6:
; RV32-NEXT: srl a2, a0, a3
; RV32-NEXT: beqz a3, .LBB2_8
; RV32-NEXT: .LBB2_7:
; RV32-NEXT: mv a0, a2
; RV32-NEXT: .LBB2_8:
; RV32-NEXT: or a0, a1, a0
; RV32-NEXT: mv a1, s0
; RV32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
; RV32-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
; RV32-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
; RV32-NEXT: lw s2, 0(sp) # 4-byte Folded Reload
; RV32-NEXT: .cfi_restore ra
; RV32-NEXT: .cfi_restore s0
; RV32-NEXT: .cfi_restore s1
; RV32-NEXT: .cfi_restore s2
; RV32-NEXT: addi sp, sp, 16
; RV32-NEXT: .cfi_def_cfa_offset 0
; RV32-NEXT: ret
;
; RV64-LABEL: test_fshl_i48:
; RV64: # %bb.0:
; RV64-NEXT: addi sp, sp, -32
; RV64-NEXT: .cfi_def_cfa_offset 32
; RV64-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
; RV64-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
; RV64-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
; RV64-NEXT: .cfi_offset ra, -8
; RV64-NEXT: .cfi_offset s0, -16
; RV64-NEXT: .cfi_offset s1, -24
; RV64-NEXT: mv s0, a0
; RV64-NEXT: li s1, 47
; RV64-NEXT: slli a0, a1, 16
; RV64-NEXT: srli a0, a0, 16
; RV64-NEXT: li a1, 48
; RV64-NEXT: call __umoddi3
; RV64-NEXT: subw s1, s1, a0
; RV64-NEXT: sll a0, s0, a0
; RV64-NEXT: li a1, 8
; RV64-NEXT: srl a1, a1, s1
; RV64-NEXT: or a0, a0, a1
; RV64-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
; RV64-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
; RV64-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
; RV64-NEXT: .cfi_restore ra
; RV64-NEXT: .cfi_restore s0
; RV64-NEXT: .cfi_restore s1
; RV64-NEXT: addi sp, sp, 32
; RV64-NEXT: .cfi_def_cfa_offset 0
; RV64-NEXT: ret
%fshl = call i48 @llvm.fshl.i48(i48 %x, i48 16, i48 %y)
ret i48 %fshl
}
```
> you also wouldn't need to think too hard about that if reusing the original test, which I hope covers some vectors
There aren't any vector tests here, but I'll gladly add some myself!
https://github.com/llvm/llvm-project/pull/135132
More information about the llvm-commits
mailing list