[llvm] 6a01b67 - [DAGCombine] add tests for bswap-shift optimization

Tue Mar 29 01:37:19 PDT 2022

Author: Chenbing Zheng
Date: 2022-03-29T16:34:52+08:00
New Revision: 6a01b676cfb227bf7ca8a7b6ff1b466e33a33d1f

URL: https://github.com/llvm/llvm-project/commit/6a01b676cfb227bf7ca8a7b6ff1b466e33a33d1f
DIFF: https://github.com/llvm/llvm-project/commit/6a01b676cfb227bf7ca8a7b6ff1b466e33a33d1f.diff

LOG: [DAGCombine] add tests for bswap-shift optimization

Reviewed By: RKSimon

Differential Revision: https://reviews.llvm.org/D121504

Added: 
    llvm/test/CodeGen/RISCV/bswap-shift.ll

Modified: 
    llvm/test/CodeGen/X86/combine-bswap.ll

Removed: 
    


################################################################################
diff  --git a/llvm/test/CodeGen/RISCV/bswap-shift.ll b/llvm/test/CodeGen/RISCV/bswap-shift.ll
new file mode 100644
index 0000000000000..2e1a50be83451

--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/bswap-shift.ll
@@ -0,0 +1,271 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=riscv32 -mattr=+zbb -verify-machineinstrs < %s \
+; RUN:   | FileCheck %s -check-prefixes=RV32ZB
+; RUN: llc -mtriple=riscv64 -mattr=+zbb -verify-machineinstrs < %s \
+; RUN:   | FileCheck %s -check-prefixes=RV64ZB
+; RUN: llc -mtriple=riscv32 -mattr=+zbkb -verify-machineinstrs < %s \
+; RUN:   | FileCheck %s -check-prefixes=RV32ZB
+; RUN: llc -mtriple=riscv64 -mattr=+zbkb -verify-machineinstrs < %s \
+; RUN:   | FileCheck %s -check-prefixes=RV64ZB
+
+; TODO: These tests can be optmised, with x%8 == 0
+;       fold (bswap(srl (bswap c), x)) -> (shl c, x)
+;       fold (bswap(shl (bswap c), x)) -> (srl c, x)
+
+declare i16 @llvm.bswap.i16(i16)
+declare i32 @llvm.bswap.i32(i32)
+declare i64 @llvm.bswap.i64(i64)
+
+define i16 @test_bswap_srli_7_bswap_i16(i16 %a) nounwind {
+; RV32ZB-LABEL: test_bswap_srli_7_bswap_i16:
+; RV32ZB:       # %bb.0:
+; RV32ZB-NEXT:    rev8 a0, a0
+; RV32ZB-NEXT:    srli a0, a0, 23
+; RV32ZB-NEXT:    rev8 a0, a0
+; RV32ZB-NEXT:    srli a0, a0, 16
+; RV32ZB-NEXT:    ret
+;
+; RV64ZB-LABEL: test_bswap_srli_7_bswap_i16:
+; RV64ZB:       # %bb.0:
+; RV64ZB-NEXT:    rev8 a0, a0
+; RV64ZB-NEXT:    srli a0, a0, 55
+; RV64ZB-NEXT:    rev8 a0, a0
+; RV64ZB-NEXT:    srli a0, a0, 48
+; RV64ZB-NEXT:    ret
+    %1 = call i16 @llvm.bswap.i16(i16 %a)
+    %2 = lshr i16 %1, 7
+    %3 = call i16 @llvm.bswap.i16(i16 %2)
+    ret i16 %3
+}
+
+define i16 @test_bswap_srli_8_bswap_i16(i16 %a) nounwind {
+; RV32ZB-LABEL: test_bswap_srli_8_bswap_i16:
+; RV32ZB:       # %bb.0:
+; RV32ZB-NEXT:    andi a0, a0, 255
+; RV32ZB-NEXT:    rev8 a0, a0
+; RV32ZB-NEXT:    srli a0, a0, 16
+; RV32ZB-NEXT:    ret
+;
+; RV64ZB-LABEL: test_bswap_srli_8_bswap_i16:
+; RV64ZB:       # %bb.0:
+; RV64ZB-NEXT:    andi a0, a0, 255
+; RV64ZB-NEXT:    rev8 a0, a0
+; RV64ZB-NEXT:    srli a0, a0, 48
+; RV64ZB-NEXT:    ret
+    %1 = call i16 @llvm.bswap.i16(i16 %a)
+    %2 = lshr i16 %1, 8
+    %3 = call i16 @llvm.bswap.i16(i16 %2)
+    ret i16 %3
+}
+
+define i32 @test_bswap_srli_8_bswap_i32(i32 %a) nounwind {
+; RV32ZB-LABEL: test_bswap_srli_8_bswap_i32:
+; RV32ZB:       # %bb.0:
+; RV32ZB-NEXT:    rev8 a0, a0
+; RV32ZB-NEXT:    srli a0, a0, 8
+; RV32ZB-NEXT:    rev8 a0, a0
+; RV32ZB-NEXT:    ret
+;
+; RV64ZB-LABEL: test_bswap_srli_8_bswap_i32:
+; RV64ZB:       # %bb.0:
+; RV64ZB-NEXT:    rev8 a0, a0
+; RV64ZB-NEXT:    srli a0, a0, 40
+; RV64ZB-NEXT:    rev8 a0, a0
+; RV64ZB-NEXT:    srli a0, a0, 32
+; RV64ZB-NEXT:    ret
+    %1 = call i32 @llvm.bswap.i32(i32 %a)
+    %2 = lshr i32 %1, 8
+    %3 = call i32 @llvm.bswap.i32(i32 %2)
+    ret i32 %3
+}
+
+define i32 @test_bswap_srli_16_bswap_i32(i32 %a) nounwind {
+; RV32ZB-LABEL: test_bswap_srli_16_bswap_i32:
+; RV32ZB:       # %bb.0:
+; RV32ZB-NEXT:    rev8 a0, a0
+; RV32ZB-NEXT:    srli a0, a0, 16
+; RV32ZB-NEXT:    rev8 a0, a0
+; RV32ZB-NEXT:    ret
+;
+; RV64ZB-LABEL: test_bswap_srli_16_bswap_i32:
+; RV64ZB:       # %bb.0:
+; RV64ZB-NEXT:    rev8 a0, a0
+; RV64ZB-NEXT:    srli a0, a0, 48
+; RV64ZB-NEXT:    rev8 a0, a0
+; RV64ZB-NEXT:    srli a0, a0, 32
+; RV64ZB-NEXT:    ret
+    %1 = call i32 @llvm.bswap.i32(i32 %a)
+    %2 = lshr i32 %1, 16
+    %3 = call i32 @llvm.bswap.i32(i32 %2)
+    ret i32 %3
+}
+
+define i32 @test_bswap_srli_24_bswap_i32(i32 %a) nounwind {
+; RV32ZB-LABEL: test_bswap_srli_24_bswap_i32:
+; RV32ZB:       # %bb.0:
+; RV32ZB-NEXT:    andi a0, a0, 255
+; RV32ZB-NEXT:    rev8 a0, a0
+; RV32ZB-NEXT:    ret
+;
+; RV64ZB-LABEL: test_bswap_srli_24_bswap_i32:
+; RV64ZB:       # %bb.0:
+; RV64ZB-NEXT:    andi a0, a0, 255
+; RV64ZB-NEXT:    rev8 a0, a0
+; RV64ZB-NEXT:    srli a0, a0, 32
+; RV64ZB-NEXT:    ret
+    %1 = call i32 @llvm.bswap.i32(i32 %a)
+    %2 = lshr i32 %1, 24
+    %3 = call i32 @llvm.bswap.i32(i32 %2)
+    ret i32 %3
+}
+
+define i64 @test_bswap_srli_48_bswap_i64(i64 %a) nounwind {
+; RV32ZB-LABEL: test_bswap_srli_48_bswap_i64:
+; RV32ZB:       # %bb.0:
+; RV32ZB-NEXT:    rev8 a0, a0
+; RV32ZB-NEXT:    srli a0, a0, 16
+; RV32ZB-NEXT:    rev8 a1, a0
+; RV32ZB-NEXT:    li a0, 0
+; RV32ZB-NEXT:    ret
+;
+; RV64ZB-LABEL: test_bswap_srli_48_bswap_i64:
+; RV64ZB:       # %bb.0:
+; RV64ZB-NEXT:    rev8 a0, a0
+; RV64ZB-NEXT:    srli a0, a0, 48
+; RV64ZB-NEXT:    rev8 a0, a0
+; RV64ZB-NEXT:    ret
+    %1 = call i64 @llvm.bswap.i64(i64 %a)
+    %2 = lshr i64 %1, 48
+    %3 = call i64 @llvm.bswap.i64(i64 %2)
+    ret i64 %3
+}
+
+define i16 @test_bswap_shli_7_bswap_i16(i16 %a) nounwind {
+; RV32ZB-LABEL: test_bswap_shli_7_bswap_i16:
+; RV32ZB:       # %bb.0:
+; RV32ZB-NEXT:    rev8 a0, a0
+; RV32ZB-NEXT:    srli a0, a0, 9
+; RV32ZB-NEXT:    andi a0, a0, -128
+; RV32ZB-NEXT:    rev8 a0, a0
+; RV32ZB-NEXT:    srli a0, a0, 16
+; RV32ZB-NEXT:    ret
+;
+; RV64ZB-LABEL: test_bswap_shli_7_bswap_i16:
+; RV64ZB:       # %bb.0:
+; RV64ZB-NEXT:    rev8 a0, a0
+; RV64ZB-NEXT:    srli a0, a0, 41
+; RV64ZB-NEXT:    andi a0, a0, -128
+; RV64ZB-NEXT:    rev8 a0, a0
+; RV64ZB-NEXT:    srli a0, a0, 48
+; RV64ZB-NEXT:    ret
+    %1 = call i16 @llvm.bswap.i16(i16 %a)
+    %2 = shl i16 %1, 7
+    %3 = call i16 @llvm.bswap.i16(i16 %2)
+    ret i16 %3
+}
+
+define i16 @test_bswap_shli_8_bswap_i16(i16 %a) nounwind {
+; RV32ZB-LABEL: test_bswap_shli_8_bswap_i16:
+; RV32ZB:       # %bb.0:
+; RV32ZB-NEXT:    andi a0, a0, -256
+; RV32ZB-NEXT:    rev8 a0, a0
+; RV32ZB-NEXT:    srli a0, a0, 16
+; RV32ZB-NEXT:    ret
+;
+; RV64ZB-LABEL: test_bswap_shli_8_bswap_i16:
+; RV64ZB:       # %bb.0:
+; RV64ZB-NEXT:    andi a0, a0, -256
+; RV64ZB-NEXT:    rev8 a0, a0
+; RV64ZB-NEXT:    srli a0, a0, 48
+; RV64ZB-NEXT:    ret
+    %1 = call i16 @llvm.bswap.i16(i16 %a)
+    %2 = shl i16 %1, 8
+    %3 = call i16 @llvm.bswap.i16(i16 %2)
+    ret i16 %3
+}
+
+define i32 @test_bswap_shli_8_bswap_i32(i32 %a) nounwind {
+; RV32ZB-LABEL: test_bswap_shli_8_bswap_i32:
+; RV32ZB:       # %bb.0:
+; RV32ZB-NEXT:    rev8 a0, a0
+; RV32ZB-NEXT:    slli a0, a0, 8
+; RV32ZB-NEXT:    rev8 a0, a0
+; RV32ZB-NEXT:    ret
+;
+; RV64ZB-LABEL: test_bswap_shli_8_bswap_i32:
+; RV64ZB:       # %bb.0:
+; RV64ZB-NEXT:    rev8 a0, a0
+; RV64ZB-NEXT:    srli a0, a0, 24
+; RV64ZB-NEXT:    andi a0, a0, -256
+; RV64ZB-NEXT:    rev8 a0, a0
+; RV64ZB-NEXT:    srli a0, a0, 32
+; RV64ZB-NEXT:    ret
+    %1 = call i32 @llvm.bswap.i32(i32 %a)
+    %2 = shl i32 %1, 8
+    %3 = call i32 @llvm.bswap.i32(i32 %2)
+    ret i32 %3
+}
+
+define i32 @test_bswap_shli_16_bswap_i32(i32 %a) nounwind {
+; RV32ZB-LABEL: test_bswap_shli_16_bswap_i32:
+; RV32ZB:       # %bb.0:
+; RV32ZB-NEXT:    rev8 a0, a0
+; RV32ZB-NEXT:    slli a0, a0, 16
+; RV32ZB-NEXT:    rev8 a0, a0
+; RV32ZB-NEXT:    ret
+;
+; RV64ZB-LABEL: test_bswap_shli_16_bswap_i32:
+; RV64ZB:       # %bb.0:
+; RV64ZB-NEXT:    rev8 a0, a0
+; RV64ZB-NEXT:    srli a0, a0, 32
+; RV64ZB-NEXT:    slli a0, a0, 16
+; RV64ZB-NEXT:    rev8 a0, a0
+; RV64ZB-NEXT:    srli a0, a0, 32
+; RV64ZB-NEXT:    ret
+    %1 = call i32 @llvm.bswap.i32(i32 %a)
+    %2 = shl i32 %1, 16
+    %3 = call i32 @llvm.bswap.i32(i32 %2)
+    ret i32 %3
+}
+
+define i32 @test_bswap_shli_24_bswap_i32(i32 %a) nounwind {
+; RV32ZB-LABEL: test_bswap_shli_24_bswap_i32:
+; RV32ZB:       # %bb.0:
+; RV32ZB-NEXT:    lui a1, 1044480
+; RV32ZB-NEXT:    and a0, a0, a1
+; RV32ZB-NEXT:    rev8 a0, a0
+; RV32ZB-NEXT:    ret
+;
+; RV64ZB-LABEL: test_bswap_shli_24_bswap_i32:
+; RV64ZB:       # %bb.0:
+; RV64ZB-NEXT:    lui a1, 1044480
+; RV64ZB-NEXT:    and a0, a0, a1
+; RV64ZB-NEXT:    rev8 a0, a0
+; RV64ZB-NEXT:    srli a0, a0, 32
+; RV64ZB-NEXT:    ret
+    %1 = call i32 @llvm.bswap.i32(i32 %a)
+    %2 = shl i32 %1, 24
+    %3 = call i32 @llvm.bswap.i32(i32 %2)
+    ret i32 %3
+}
+
+define i64 @test_bswap_shli_48_bswap_i64(i64 %a) nounwind {
+; RV32ZB-LABEL: test_bswap_shli_48_bswap_i64:
+; RV32ZB:       # %bb.0:
+; RV32ZB-NEXT:    rev8 a0, a1
+; RV32ZB-NEXT:    slli a0, a0, 16
+; RV32ZB-NEXT:    rev8 a0, a0
+; RV32ZB-NEXT:    li a1, 0
+; RV32ZB-NEXT:    ret
+;
+; RV64ZB-LABEL: test_bswap_shli_48_bswap_i64:
+; RV64ZB:       # %bb.0:
+; RV64ZB-NEXT:    rev8 a0, a0
+; RV64ZB-NEXT:    slli a0, a0, 48
+; RV64ZB-NEXT:    rev8 a0, a0
+; RV64ZB-NEXT:    ret
+    %1 = call i64 @llvm.bswap.i64(i64 %a)
+    %2 = shl i64 %1, 48
+    %3 = call i64 @llvm.bswap.i64(i64 %2)
+    ret i64 %3
+}

diff  --git a/llvm/test/CodeGen/X86/combine-bswap.ll b/llvm/test/CodeGen/X86/combine-bswap.ll
index f4814a9adc35d..0e6fd2cdc39aa 100644
--- a/llvm/test/CodeGen/X86/combine-bswap.ll
+++ b/llvm/test/CodeGen/X86/combine-bswap.ll
@@ -5,6 +5,7 @@
 ; These tests just check that the plumbing is in place for @llvm.bswap. The
 ; actual output is massive at the moment as llvm.bswap is not yet legal.
 
+declare i16 @llvm.bswap.i16(i16) readnone
 declare i32 @llvm.bswap.i32(i32) readnone
 declare i64 @llvm.bswap.i64(i64) readnone
 declare i32 @llvm.bswap.v4i32(i32) readnone
@@ -38,6 +39,146 @@ define i32 @test_bswap_bswap(i32 %a0) nounwind {
   ret i32 %c
 }
 
+; TODO: fold (bswap(srl (bswap c), x)) -> (shl c, x)
+define i16 @test_bswap_srli_8_bswap_i16(i16 %a) nounwind {
+; X86-LABEL: test_bswap_srli_8_bswap_i16:
+; X86:       # %bb.0:
+; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    rolw $8, %ax
+; X86-NEXT:    # kill: def $ax killed $ax killed $eax
+; X86-NEXT:    retl
+;
+; X64-LABEL: test_bswap_srli_8_bswap_i16:
+; X64:       # %bb.0:
+; X64-NEXT:    movzbl %dil, %eax
+; X64-NEXT:    rolw $8, %ax
+; X64-NEXT:    # kill: def $ax killed $ax killed $eax
+; X64-NEXT:    retq
+    %1 = call i16 @llvm.bswap.i16(i16 %a)
+    %2 = lshr i16 %1, 8
+    %3 = call i16 @llvm.bswap.i16(i16 %2)
+    ret i16 %3
+}
+
+define i32 @test_bswap_srli_8_bswap_i32(i32 %a) nounwind {
+; X86-LABEL: test_bswap_srli_8_bswap_i32:
+; X86:       # %bb.0:
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    bswapl %eax
+; X86-NEXT:    shrl $8, %eax
+; X86-NEXT:    bswapl %eax
+; X86-NEXT:    retl
+;
+; X64-LABEL: test_bswap_srli_8_bswap_i32:
+; X64:       # %bb.0:
+; X64-NEXT:    movl %edi, %eax
+; X64-NEXT:    bswapl %eax
+; X64-NEXT:    shrl $8, %eax
+; X64-NEXT:    bswapl %eax
+; X64-NEXT:    retq
+    %1 = call i32 @llvm.bswap.i32(i32 %a)
+    %2 = lshr i32 %1, 8
+    %3 = call i32 @llvm.bswap.i32(i32 %2)
+    ret i32 %3
+}
+
+define i64 @test_bswap_srli_16_bswap_i64(i64 %a) nounwind {
+; X86-LABEL: test_bswap_srli_16_bswap_i64:
+; X86:       # %bb.0:
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT:    bswapl %edx
+; X86-NEXT:    bswapl %eax
+; X86-NEXT:    shrdl $16, %eax, %edx
+; X86-NEXT:    shrl $16, %eax
+; X86-NEXT:    bswapl %edx
+; X86-NEXT:    bswapl %eax
+; X86-NEXT:    retl
+;
+; X64-LABEL: test_bswap_srli_16_bswap_i64:
+; X64:       # %bb.0:
+; X64-NEXT:    movq %rdi, %rax
+; X64-NEXT:    bswapq %rax
+; X64-NEXT:    shrq $16, %rax
+; X64-NEXT:    bswapq %rax
+; X64-NEXT:    retq
+    %1 = call i64 @llvm.bswap.i64(i64 %a)
+    %2 = lshr i64 %1, 16
+    %3 = call i64 @llvm.bswap.i64(i64 %2)
+    ret i64 %3
+}
+
+; TODO: fold (bswap(shl (bswap c), x)) -> (srl c, x)
+define i16 @test_bswap_shli_8_bswap_i16(i16 %a) nounwind {
+; X86-LABEL: test_bswap_shli_8_bswap_i16:
+; X86:       # %bb.0:
+; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    shll $8, %eax
+; X86-NEXT:    rolw $8, %ax
+; X86-NEXT:    # kill: def $ax killed $ax killed $eax
+; X86-NEXT:    retl
+;
+; X64-LABEL: test_bswap_shli_8_bswap_i16:
+; X64:       # %bb.0:
+; X64-NEXT:    movl %edi, %eax
+; X64-NEXT:    andl $65280, %eax # imm = 0xFF00
+; X64-NEXT:    rolw $8, %ax
+; X64-NEXT:    # kill: def $ax killed $ax killed $eax
+; X64-NEXT:    retq
+    %1 = call i16 @llvm.bswap.i16(i16 %a)
+    %2 = shl i16 %1, 8
+    %3 = call i16 @llvm.bswap.i16(i16 %2)
+    ret i16 %3
+}
+
+define i32 @test_bswap_shli_8_bswap_i32(i32 %a) nounwind {
+; X86-LABEL: test_bswap_shli_8_bswap_i32:
+; X86:       # %bb.0:
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    bswapl %eax
+; X86-NEXT:    shll $8, %eax
+; X86-NEXT:    bswapl %eax
+; X86-NEXT:    retl
+;
+; X64-LABEL: test_bswap_shli_8_bswap_i32:
+; X64:       # %bb.0:
+; X64-NEXT:    movl %edi, %eax
+; X64-NEXT:    bswapl %eax
+; X64-NEXT:    shll $8, %eax
+; X64-NEXT:    bswapl %eax
+; X64-NEXT:    retq
+    %1 = call i32 @llvm.bswap.i32(i32 %a)
+    %2 = shl i32 %1, 8
+    %3 = call i32 @llvm.bswap.i32(i32 %2)
+    ret i32 %3
+}
+
+define i64 @test_bswap_shli_16_bswap_i64(i64 %a) nounwind {
+; X86-LABEL: test_bswap_shli_16_bswap_i64:
+; X86:       # %bb.0:
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    bswapl %ecx
+; X86-NEXT:    bswapl %eax
+; X86-NEXT:    shldl $16, %ecx, %eax
+; X86-NEXT:    bswapl %eax
+; X86-NEXT:    rolw $8, %cx
+; X86-NEXT:    movzwl %cx, %edx
+; X86-NEXT:    retl
+;
+; X64-LABEL: test_bswap_shli_16_bswap_i64:
+; X64:       # %bb.0:
+; X64-NEXT:    movq %rdi, %rax
+; X64-NEXT:    bswapq %rax
+; X64-NEXT:    shlq $16, %rax
+; X64-NEXT:    bswapq %rax
+; X64-NEXT:    retq
+    %1 = call i64 @llvm.bswap.i64(i64 %a)
+    %2 = shl i64 %1, 16
+    %3 = call i64 @llvm.bswap.i64(i64 %2)
+    ret i64 %3
+}
+
 define i32 @test_demandedbits_bswap(i32 %a0) nounwind {
 ; X86-LABEL: test_demandedbits_bswap:
 ; X86:       # %bb.0: