[llvm] 6882a30 - [RISCV] Add BREV8 and ORC_B to hasAllNBitUsers in RISCVOptWInstrs. (#148076)

Fri Jul 11 09:23:54 PDT 2025

Author: Craig Topper
Date: 2025-07-11T09:23:50-07:00
New Revision: 6882a30acec695beab568d5393af9bd15970d0d6

URL: https://github.com/llvm/llvm-project/commit/6882a30acec695beab568d5393af9bd15970d0d6
DIFF: https://github.com/llvm/llvm-project/commit/6882a30acec695beab568d5393af9bd15970d0d6.diff

LOG: [RISCV] Add BREV8 and ORC_B to hasAllNBitUsers in RISCVOptWInstrs. (#148076)

These were removed in #147830 due to ignoring that these instructions
operate on bytes. This patch adds them back with tests including a test
for the byte boundary issue.

I seperated out the commits to show bad optimization if we don't round
Bits to the nearest byte.

Added: 
    

Modified: 
    llvm/lib/Target/RISCV/RISCVOptWInstrs.cpp
    llvm/test/CodeGen/RISCV/sextw-removal.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/RISCV/RISCVOptWInstrs.cpp b/llvm/lib/Target/RISCV/RISCVOptWInstrs.cpp
index 24c05a2f807d0..d257f56cf4129 100644

--- a/llvm/lib/Target/RISCV/RISCVOptWInstrs.cpp
+++ b/llvm/lib/Target/RISCV/RISCVOptWInstrs.cpp
@@ -323,6 +323,12 @@ static bool hasAllNBitUsers(const MachineInstr &OrigMI,
         Worklist.push_back(std::make_pair(UserMI, Bits));
         break;
 
+      case RISCV::BREV8:
+      case RISCV::ORC_B:
+        // BREV8 and ORC_B work on bytes. Round Bits down to the nearest byte.
+        Worklist.push_back(std::make_pair(UserMI, alignDown(Bits, 8)));
+        break;
+
       case RISCV::PseudoCCMOVGPR:
       case RISCV::PseudoCCMOVGPRNoX0:
         // Either operand 4 or operand 5 is returned by this instruction. If

diff  --git a/llvm/test/CodeGen/RISCV/sextw-removal.ll b/llvm/test/CodeGen/RISCV/sextw-removal.ll
index 1a978d1a0fcac..9c8230572b926 100644
--- a/llvm/test/CodeGen/RISCV/sextw-removal.ll
+++ b/llvm/test/CodeGen/RISCV/sextw-removal.ll
@@ -1,9 +1,9 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc < %s -mtriple=riscv64 -mattr=+m,+f,+zknh,+v -target-abi=lp64f \
 ; RUN:   | FileCheck %s --check-prefixes=CHECK,RV64I
-; RUN: llc < %s -mtriple=riscv64 -mattr=+m,+zba,+zbb,+f,+zknh,+v -target-abi=lp64f \
+; RUN: llc < %s -mtriple=riscv64 -mattr=+m,+zba,+zbb,+zbkb,+f,+zknh,+v -target-abi=lp64f \
 ; RUN:   | FileCheck %s --check-prefixes=CHECK,RV64ZBB
-; RUN: llc < %s -mtriple=riscv64 -mattr=+m,+zba,+zbb,+f,+zknh,+v -target-abi=lp64f \
+; RUN: llc < %s -mtriple=riscv64 -mattr=+m,+zba,+zbb,+zbkb,+f,+zknh,+v -target-abi=lp64f \
 ; RUN:   -riscv-disable-sextw-removal | FileCheck %s --check-prefix=NOREMOVAL
 
 define void @test1(i32 signext %arg, i32 signext %arg1) nounwind {
@@ -1499,3 +1499,186 @@ bb7:                                              ; preds = %bb2
 }
 
 declare i32 @llvm.riscv.vmv.x.s.nxv1i32( <vscale x 1 x i32>)
+
+; Test that we can look through brev8 in hasAllNBitUsers.
+define signext i32 @test21(i64 %arg1, i64 %arg2, i64 %arg3)  {
+; RV64I-LABEL: test21:
+; RV64I:       # %bb.0: # %entry
+; RV64I-NEXT:    addi a2, a2, -1
+; RV64I-NEXT:    lui a3, 61681
+; RV64I-NEXT:    lui a4, 209715
+; RV64I-NEXT:    addi a3, a3, -241
+; RV64I-NEXT:    addi a4, a4, 819
+; RV64I-NEXT:    slli a5, a3, 32
+; RV64I-NEXT:    add a3, a3, a5
+; RV64I-NEXT:    slli a5, a4, 32
+; RV64I-NEXT:    add a4, a4, a5
+; RV64I-NEXT:    li a5, 256
+; RV64I-NEXT:  .LBB25_1: # %bb2
+; RV64I-NEXT:    # =>This Inner Loop Header: Depth=1
+; RV64I-NEXT:    srli a6, a0, 4
+; RV64I-NEXT:    and a0, a0, a3
+; RV64I-NEXT:    and a6, a6, a3
+; RV64I-NEXT:    slli a0, a0, 4
+; RV64I-NEXT:    or a0, a6, a0
+; RV64I-NEXT:    srli a6, a0, 2
+; RV64I-NEXT:    and a0, a0, a4
+; RV64I-NEXT:    and a6, a6, a4
+; RV64I-NEXT:    slli a0, a0, 2
+; RV64I-NEXT:    or a0, a6, a0
+; RV64I-NEXT:    andi a6, a0, 65
+; RV64I-NEXT:    srli a0, a0, 1
+; RV64I-NEXT:    slli a6, a6, 1
+; RV64I-NEXT:    andi a0, a0, 1104
+; RV64I-NEXT:    or a0, a0, a6
+; RV64I-NEXT:    addi a2, a2, 1
+; RV64I-NEXT:    add a0, a0, a1
+; RV64I-NEXT:    bltu a2, a5, .LBB25_1
+; RV64I-NEXT:  # %bb.2: # %bb7
+; RV64I-NEXT:    sext.w a0, a0
+; RV64I-NEXT:    ret
+;
+; RV64ZBB-LABEL: test21:
+; RV64ZBB:       # %bb.0: # %entry
+; RV64ZBB-NEXT:    addi a2, a2, -1
+; RV64ZBB-NEXT:    li a3, 256
+; RV64ZBB-NEXT:  .LBB25_1: # %bb2
+; RV64ZBB-NEXT:    # =>This Inner Loop Header: Depth=1
+; RV64ZBB-NEXT:    brev8 a0, a0
+; RV64ZBB-NEXT:    andi a0, a0, 1234
+; RV64ZBB-NEXT:    addi a2, a2, 1
+; RV64ZBB-NEXT:    addw a0, a0, a1
+; RV64ZBB-NEXT:    bltu a2, a3, .LBB25_1
+; RV64ZBB-NEXT:  # %bb.2: # %bb7
+; RV64ZBB-NEXT:    ret
+;
+; NOREMOVAL-LABEL: test21:
+; NOREMOVAL:       # %bb.0: # %entry
+; NOREMOVAL-NEXT:    addi a2, a2, -1
+; NOREMOVAL-NEXT:    li a3, 256
+; NOREMOVAL-NEXT:  .LBB25_1: # %bb2
+; NOREMOVAL-NEXT:    # =>This Inner Loop Header: Depth=1
+; NOREMOVAL-NEXT:    brev8 a0, a0
+; NOREMOVAL-NEXT:    andi a0, a0, 1234
+; NOREMOVAL-NEXT:    addi a2, a2, 1
+; NOREMOVAL-NEXT:    add a0, a0, a1
+; NOREMOVAL-NEXT:    bltu a2, a3, .LBB25_1
+; NOREMOVAL-NEXT:  # %bb.2: # %bb7
+; NOREMOVAL-NEXT:    sext.w a0, a0
+; NOREMOVAL-NEXT:    ret
+entry:
+  br label %bb2
+
+bb2:                                              ; preds = %bb2, %entry
+  %i1 = phi i64 [ %arg1, %entry ], [ %i5, %bb2 ]
+  %i2 = phi i64 [ %arg3, %entry ], [ %i3, %bb2 ]
+  %i3 = add i64 %i2, 1
+  %bswap = call i64 @llvm.bswap.i64(i64 %i1)
+  %bitreverse = call i64 @llvm.bitreverse.i64(i64 %bswap)
+  %i4 = and i64 %bitreverse, 1234
+  %i5 = add i64 %i4, %arg2
+  %i6 = icmp ugt i64 %i2, 255
+  br i1 %i6, label %bb7, label %bb2
+
+bb7:                                              ; preds = %bb2
+  %i7 = trunc i64 %i5 to i32
+  ret i32 %i7
+}
+
+; Negative test for looking through brev8. Make sure we consider that it works
+; on bytes.
+define signext i32 @test22(i64 %arg1, i64 %arg2, i64 %arg3)  {
+; RV64I-LABEL: test22:
+; RV64I:       # %bb.0: # %entry
+; RV64I-NEXT:    addi a2, a2, -1
+; RV64I-NEXT:    lui a3, %hi(.LCPI26_0)
+; RV64I-NEXT:    lui a4, %hi(.LCPI26_1)
+; RV64I-NEXT:    lui a5, %hi(.LCPI26_2)
+; RV64I-NEXT:    lui a6, %hi(.LCPI26_3)
+; RV64I-NEXT:    li a7, 69
+; RV64I-NEXT:    ld a3, %lo(.LCPI26_0)(a3)
+; RV64I-NEXT:    ld a4, %lo(.LCPI26_1)(a4)
+; RV64I-NEXT:    ld a5, %lo(.LCPI26_2)(a5)
+; RV64I-NEXT:    ld a6, %lo(.LCPI26_3)(a6)
+; RV64I-NEXT:    slli a7, a7, 32
+; RV64I-NEXT:    li t0, 65
+; RV64I-NEXT:    slli t0, t0, 28
+; RV64I-NEXT:    li t1, 256
+; RV64I-NEXT:  .LBB26_1: # %bb2
+; RV64I-NEXT:    # =>This Inner Loop Header: Depth=1
+; RV64I-NEXT:    slli t2, a0, 11
+; RV64I-NEXT:    slli a0, a0, 3
+; RV64I-NEXT:    and t2, t2, a3
+; RV64I-NEXT:    and a0, a0, a4
+; RV64I-NEXT:    or a0, a0, t2
+; RV64I-NEXT:    srli t2, a0, 2
+; RV64I-NEXT:    and a0, a0, a6
+; RV64I-NEXT:    and t2, t2, a5
+; RV64I-NEXT:    slli a0, a0, 2
+; RV64I-NEXT:    or a0, t2, a0
+; RV64I-NEXT:    srli t2, a0, 1
+; RV64I-NEXT:    and a0, a0, t0
+; RV64I-NEXT:    and t2, t2, a7
+; RV64I-NEXT:    slli a0, a0, 1
+; RV64I-NEXT:    or a0, t2, a0
+; RV64I-NEXT:    srli a0, a0, 28
+; RV64I-NEXT:    addi a2, a2, 1
+; RV64I-NEXT:    add a0, a0, a1
+; RV64I-NEXT:    bltu a2, t1, .LBB26_1
+; RV64I-NEXT:  # %bb.2: # %bb7
+; RV64I-NEXT:    sext.w a0, a0
+; RV64I-NEXT:    ret
+;
+; RV64ZBB-LABEL: test22:
+; RV64ZBB:       # %bb.0: # %entry
+; RV64ZBB-NEXT:    addi a2, a2, -1
+; RV64ZBB-NEXT:    li a3, 256
+; RV64ZBB-NEXT:  .LBB26_1: # %bb2
+; RV64ZBB-NEXT:    # =>This Inner Loop Header: Depth=1
+; RV64ZBB-NEXT:    slli a0, a0, 7
+; RV64ZBB-NEXT:    brev8 a0, a0
+; RV64ZBB-NEXT:    srli a0, a0, 28
+; RV64ZBB-NEXT:    andi a0, a0, 1234
+; RV64ZBB-NEXT:    addi a2, a2, 1
+; RV64ZBB-NEXT:    add a0, a0, a1
+; RV64ZBB-NEXT:    bltu a2, a3, .LBB26_1
+; RV64ZBB-NEXT:  # %bb.2: # %bb7
+; RV64ZBB-NEXT:    sext.w a0, a0
+; RV64ZBB-NEXT:    ret
+;
+; NOREMOVAL-LABEL: test22:
+; NOREMOVAL:       # %bb.0: # %entry
+; NOREMOVAL-NEXT:    addi a2, a2, -1
+; NOREMOVAL-NEXT:    li a3, 256
+; NOREMOVAL-NEXT:  .LBB26_1: # %bb2
+; NOREMOVAL-NEXT:    # =>This Inner Loop Header: Depth=1
+; NOREMOVAL-NEXT:    slli a0, a0, 7
+; NOREMOVAL-NEXT:    brev8 a0, a0
+; NOREMOVAL-NEXT:    srli a0, a0, 28
+; NOREMOVAL-NEXT:    andi a0, a0, 1234
+; NOREMOVAL-NEXT:    addi a2, a2, 1
+; NOREMOVAL-NEXT:    add a0, a0, a1
+; NOREMOVAL-NEXT:    bltu a2, a3, .LBB26_1
+; NOREMOVAL-NEXT:  # %bb.2: # %bb7
+; NOREMOVAL-NEXT:    sext.w a0, a0
+; NOREMOVAL-NEXT:    ret
+entry:
+  br label %bb2
+
+bb2:                                              ; preds = %bb2, %entry
+  %i1 = phi i64 [ %arg1, %entry ], [ %i5, %bb2 ]
+  %i2 = phi i64 [ %arg3, %entry ], [ %i3, %bb2 ]
+  %i3 = add i64 %i2, 1
+  %shl = shl i64 %i1, 7
+  %bswap = call i64 @llvm.bswap.i64(i64 %shl)
+  %bitreverse = call i64 @llvm.bitreverse.i64(i64 %bswap)
+  %lshr = lshr i64 %bitreverse, 28
+  %i4 = and i64 %lshr, 1234
+  %i5 = add i64 %i4, %arg2
+  %i6 = icmp ugt i64 %i2, 255
+  br i1 %i6, label %bb7, label %bb2
+
+bb7:                                              ; preds = %bb2
+  %i7 = trunc i64 %i5 to i32
+  ret i32 %i7
+}