[llvm] 3acbadd - [NFC][AArch64]Precommit test cases to show ORR is better when one operand is a shift of the other operand

Fri Nov 11 14:01:48 PST 2022

Author: Mingming Liu
Date: 2022-11-11T14:01:02-08:00
New Revision: 3acbaddd43d8340257ad79a3514865c9758612c2

URL: https://github.com/llvm/llvm-project/commit/3acbaddd43d8340257ad79a3514865c9758612c2
DIFF: https://github.com/llvm/llvm-project/commit/3acbaddd43d8340257ad79a3514865c9758612c2.diff

LOG: [NFC][AArch64]Precommit test cases to show ORR is better when one operand is a shift of the other operand

In `bfi-not-orr` tests, bfi/bfxil are better since they simplifies away two instructions (extracting bits into destination directly)

In `orr-not-bfi` tests, orr is better since both orr and bfm would simplify away one instruction (the shl node), orr has higher throughput and shorter latency than bfm.

Added: 
    

Modified: 
    llvm/test/CodeGen/AArch64/bitfield-insert.ll

Removed: 
    


################################################################################
diff  --git a/llvm/test/CodeGen/AArch64/bitfield-insert.ll b/llvm/test/CodeGen/AArch64/bitfield-insert.ll
index b8e69d5cfaaf..8a383e85a366 100644

--- a/llvm/test/CodeGen/AArch64/bitfield-insert.ll
+++ b/llvm/test/CodeGen/AArch64/bitfield-insert.ll
@@ -636,3 +636,109 @@ define i32 @test_orr_not_bfxil_i32(i32 %0) {
   %5 = or i32 %4, %2
   ret i32 %5
 }
+
+; For or operation, one operand is a left shift of another operand.
+; Use orr with left-shifted operand is better than bfi.
+define i64 @test_orr_not_bfi_i64(i64 %0) {
+; CHECK-LABEL: test_orr_not_bfi_i64:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    and x8, x0, #0xff
+; CHECK-NEXT:    bfi x8, x0, #8, #8
+; CHECK-NEXT:    mov x0, x8
+; CHECK-NEXT:    ret
+  %2 = and i64 %0, 255
+  %3 = shl i64 %2, 8
+  %4 = or i64 %2, %3
+  ret i64 %4
+}
+
+; bfi is better than orr, since it would simplify away two instructions
+; (%mask and %bit-field-pos-op).
+define i32 @test_bfi_not_orr_i32(i32 %0, i32 %1) {
+; CHECK-LABEL: test_bfi_not_orr_i32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    and w8, w1, #0xff
+; CHECK-NEXT:    bfi w8, w0, #8, #8
+; CHECK-NEXT:    mov w0, w8
+; CHECK-NEXT:    ret
+  %bfi_dst = and i32 %1, 255
+  %mask = and i32 %0, 255
+  %bit-field-pos-op = shl i32 %mask, 8
+  %or_res = or i32 %bit-field-pos-op, %bfi_dst
+  ret i32 %or_res
+}
+
+; orr is better than bfi, since both simplify away one instruction (%3)
+; while orr has shorter latency and higher throughput.
+define i32 @test_orr_not_bfi_i32(i32 %0) {
+; CHECK-LABEL: test_orr_not_bfi_i32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    and w8, w0, #0xff
+; CHECK-NEXT:    bfi w8, w0, #8, #8
+; CHECK-NEXT:    mov w0, w8
+; CHECK-NEXT:    ret
+  %2 = and i32 %0, 255
+  %3 = shl i32 %2, 8
+  %4 = or i32 %2, %3
+  ret i32 %4
+}
+
+; bfxil is better than orr, since it would simplify away two instructions
+; (%mask and %bit-field-extract-op).
+define i64 @test_bfxil_not_orr_i64(i64 %0, i64 %1) {
+; CHECK-LABEL: test_bfxil_not_orr_i64:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    and x0, x0, #0xff000
+; CHECK-NEXT:    bfxil x0, x1, #12, #8
+; CHECK-NEXT:    ret
+  %shifted-mask = and i64 %1, 1044480
+  %bfi-dst = and i64 %0, 1044480
+  %bit-field-extract-op = lshr i64 %shifted-mask, 12
+  %or_res = or i64 %bit-field-extract-op, %bfi-dst
+  ret i64 %or_res
+}
+
+; orr is better than bfxil, since one operand is the right shift of  another
+; operand.
+define i64 @orr_not_bfxil_test2_i64(i64 %0) {
+; CHECK-LABEL: orr_not_bfxil_test2_i64:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    and x8, x0, #0xff000
+; CHECK-NEXT:    bfxil x8, x0, #12, #8
+; CHECK-NEXT:    mov x0, x8
+; CHECK-NEXT:    ret
+  %2 = and i64 %0, 1044480 ; 0xff000
+  %3 = lshr i64 %2, 12
+  %4 = or i64 %2, %3
+  ret i64 %4
+}
+
+; bfxil simplifies away two instructions (that computes %shifted-mask and
+; %bit-field-extract-op respectively), so it's better than orr (which
+; simplifies away at most one shift).
+define i32 @test_bfxil_not_orr_i32(i32 %0, i32 %1) {
+; CHECK-LABEL: test_bfxil_not_orr_i32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    and w0, w0, #0xff000
+; CHECK-NEXT:    bfxil w0, w1, #12, #8
+; CHECK-NEXT:    ret
+  %shifted-mask = and i32 %1, 1044480
+  %bfxil-dst = and i32 %0, 1044480
+  %bit-field-extract-op = lshr i32 %shifted-mask, 12
+  %or_res = or i32 %bit-field-extract-op, %bfxil-dst
+  ret i32 %or_res
+}
+
+; one operand is the shift of another operand, so orr is better.
+define i32 @orr_not_bfxil_test2_i32(i32 %0) {
+; CHECK-LABEL: orr_not_bfxil_test2_i32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    and w8, w0, #0xff000
+; CHECK-NEXT:    bfxil w8, w0, #12, #8
+; CHECK-NEXT:    mov w0, w8
+; CHECK-NEXT:    ret
+  %2 = and i32 %0, 1044480  ; 0xff000
+  %3 = lshr i32 %2, 12
+  %4 = or i32 %2, %3
+  ret i32 %4
+}