[llvm] 6882a30 - [RISCV] Add BREV8 and ORC_B to hasAllNBitUsers in RISCVOptWInstrs. (#148076)
via llvm-commits
llvm-commits at lists.llvm.org
Fri Jul 11 09:23:54 PDT 2025
Author: Craig Topper
Date: 2025-07-11T09:23:50-07:00
New Revision: 6882a30acec695beab568d5393af9bd15970d0d6
URL: https://github.com/llvm/llvm-project/commit/6882a30acec695beab568d5393af9bd15970d0d6
DIFF: https://github.com/llvm/llvm-project/commit/6882a30acec695beab568d5393af9bd15970d0d6.diff
LOG: [RISCV] Add BREV8 and ORC_B to hasAllNBitUsers in RISCVOptWInstrs. (#148076)
These were removed in #147830 due to ignoring that these instructions
operate on bytes. This patch adds them back with tests including a test
for the byte boundary issue.
I seperated out the commits to show bad optimization if we don't round
Bits to the nearest byte.
Added:
Modified:
llvm/lib/Target/RISCV/RISCVOptWInstrs.cpp
llvm/test/CodeGen/RISCV/sextw-removal.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/RISCV/RISCVOptWInstrs.cpp b/llvm/lib/Target/RISCV/RISCVOptWInstrs.cpp
index 24c05a2f807d0..d257f56cf4129 100644
--- a/llvm/lib/Target/RISCV/RISCVOptWInstrs.cpp
+++ b/llvm/lib/Target/RISCV/RISCVOptWInstrs.cpp
@@ -323,6 +323,12 @@ static bool hasAllNBitUsers(const MachineInstr &OrigMI,
Worklist.push_back(std::make_pair(UserMI, Bits));
break;
+ case RISCV::BREV8:
+ case RISCV::ORC_B:
+ // BREV8 and ORC_B work on bytes. Round Bits down to the nearest byte.
+ Worklist.push_back(std::make_pair(UserMI, alignDown(Bits, 8)));
+ break;
+
case RISCV::PseudoCCMOVGPR:
case RISCV::PseudoCCMOVGPRNoX0:
// Either operand 4 or operand 5 is returned by this instruction. If
diff --git a/llvm/test/CodeGen/RISCV/sextw-removal.ll b/llvm/test/CodeGen/RISCV/sextw-removal.ll
index 1a978d1a0fcac..9c8230572b926 100644
--- a/llvm/test/CodeGen/RISCV/sextw-removal.ll
+++ b/llvm/test/CodeGen/RISCV/sextw-removal.ll
@@ -1,9 +1,9 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=riscv64 -mattr=+m,+f,+zknh,+v -target-abi=lp64f \
; RUN: | FileCheck %s --check-prefixes=CHECK,RV64I
-; RUN: llc < %s -mtriple=riscv64 -mattr=+m,+zba,+zbb,+f,+zknh,+v -target-abi=lp64f \
+; RUN: llc < %s -mtriple=riscv64 -mattr=+m,+zba,+zbb,+zbkb,+f,+zknh,+v -target-abi=lp64f \
; RUN: | FileCheck %s --check-prefixes=CHECK,RV64ZBB
-; RUN: llc < %s -mtriple=riscv64 -mattr=+m,+zba,+zbb,+f,+zknh,+v -target-abi=lp64f \
+; RUN: llc < %s -mtriple=riscv64 -mattr=+m,+zba,+zbb,+zbkb,+f,+zknh,+v -target-abi=lp64f \
; RUN: -riscv-disable-sextw-removal | FileCheck %s --check-prefix=NOREMOVAL
define void @test1(i32 signext %arg, i32 signext %arg1) nounwind {
@@ -1499,3 +1499,186 @@ bb7: ; preds = %bb2
}
declare i32 @llvm.riscv.vmv.x.s.nxv1i32( <vscale x 1 x i32>)
+
+; Test that we can look through brev8 in hasAllNBitUsers.
+define signext i32 @test21(i64 %arg1, i64 %arg2, i64 %arg3) {
+; RV64I-LABEL: test21:
+; RV64I: # %bb.0: # %entry
+; RV64I-NEXT: addi a2, a2, -1
+; RV64I-NEXT: lui a3, 61681
+; RV64I-NEXT: lui a4, 209715
+; RV64I-NEXT: addi a3, a3, -241
+; RV64I-NEXT: addi a4, a4, 819
+; RV64I-NEXT: slli a5, a3, 32
+; RV64I-NEXT: add a3, a3, a5
+; RV64I-NEXT: slli a5, a4, 32
+; RV64I-NEXT: add a4, a4, a5
+; RV64I-NEXT: li a5, 256
+; RV64I-NEXT: .LBB25_1: # %bb2
+; RV64I-NEXT: # =>This Inner Loop Header: Depth=1
+; RV64I-NEXT: srli a6, a0, 4
+; RV64I-NEXT: and a0, a0, a3
+; RV64I-NEXT: and a6, a6, a3
+; RV64I-NEXT: slli a0, a0, 4
+; RV64I-NEXT: or a0, a6, a0
+; RV64I-NEXT: srli a6, a0, 2
+; RV64I-NEXT: and a0, a0, a4
+; RV64I-NEXT: and a6, a6, a4
+; RV64I-NEXT: slli a0, a0, 2
+; RV64I-NEXT: or a0, a6, a0
+; RV64I-NEXT: andi a6, a0, 65
+; RV64I-NEXT: srli a0, a0, 1
+; RV64I-NEXT: slli a6, a6, 1
+; RV64I-NEXT: andi a0, a0, 1104
+; RV64I-NEXT: or a0, a0, a6
+; RV64I-NEXT: addi a2, a2, 1
+; RV64I-NEXT: add a0, a0, a1
+; RV64I-NEXT: bltu a2, a5, .LBB25_1
+; RV64I-NEXT: # %bb.2: # %bb7
+; RV64I-NEXT: sext.w a0, a0
+; RV64I-NEXT: ret
+;
+; RV64ZBB-LABEL: test21:
+; RV64ZBB: # %bb.0: # %entry
+; RV64ZBB-NEXT: addi a2, a2, -1
+; RV64ZBB-NEXT: li a3, 256
+; RV64ZBB-NEXT: .LBB25_1: # %bb2
+; RV64ZBB-NEXT: # =>This Inner Loop Header: Depth=1
+; RV64ZBB-NEXT: brev8 a0, a0
+; RV64ZBB-NEXT: andi a0, a0, 1234
+; RV64ZBB-NEXT: addi a2, a2, 1
+; RV64ZBB-NEXT: addw a0, a0, a1
+; RV64ZBB-NEXT: bltu a2, a3, .LBB25_1
+; RV64ZBB-NEXT: # %bb.2: # %bb7
+; RV64ZBB-NEXT: ret
+;
+; NOREMOVAL-LABEL: test21:
+; NOREMOVAL: # %bb.0: # %entry
+; NOREMOVAL-NEXT: addi a2, a2, -1
+; NOREMOVAL-NEXT: li a3, 256
+; NOREMOVAL-NEXT: .LBB25_1: # %bb2
+; NOREMOVAL-NEXT: # =>This Inner Loop Header: Depth=1
+; NOREMOVAL-NEXT: brev8 a0, a0
+; NOREMOVAL-NEXT: andi a0, a0, 1234
+; NOREMOVAL-NEXT: addi a2, a2, 1
+; NOREMOVAL-NEXT: add a0, a0, a1
+; NOREMOVAL-NEXT: bltu a2, a3, .LBB25_1
+; NOREMOVAL-NEXT: # %bb.2: # %bb7
+; NOREMOVAL-NEXT: sext.w a0, a0
+; NOREMOVAL-NEXT: ret
+entry:
+ br label %bb2
+
+bb2: ; preds = %bb2, %entry
+ %i1 = phi i64 [ %arg1, %entry ], [ %i5, %bb2 ]
+ %i2 = phi i64 [ %arg3, %entry ], [ %i3, %bb2 ]
+ %i3 = add i64 %i2, 1
+ %bswap = call i64 @llvm.bswap.i64(i64 %i1)
+ %bitreverse = call i64 @llvm.bitreverse.i64(i64 %bswap)
+ %i4 = and i64 %bitreverse, 1234
+ %i5 = add i64 %i4, %arg2
+ %i6 = icmp ugt i64 %i2, 255
+ br i1 %i6, label %bb7, label %bb2
+
+bb7: ; preds = %bb2
+ %i7 = trunc i64 %i5 to i32
+ ret i32 %i7
+}
+
+; Negative test for looking through brev8. Make sure we consider that it works
+; on bytes.
+define signext i32 @test22(i64 %arg1, i64 %arg2, i64 %arg3) {
+; RV64I-LABEL: test22:
+; RV64I: # %bb.0: # %entry
+; RV64I-NEXT: addi a2, a2, -1
+; RV64I-NEXT: lui a3, %hi(.LCPI26_0)
+; RV64I-NEXT: lui a4, %hi(.LCPI26_1)
+; RV64I-NEXT: lui a5, %hi(.LCPI26_2)
+; RV64I-NEXT: lui a6, %hi(.LCPI26_3)
+; RV64I-NEXT: li a7, 69
+; RV64I-NEXT: ld a3, %lo(.LCPI26_0)(a3)
+; RV64I-NEXT: ld a4, %lo(.LCPI26_1)(a4)
+; RV64I-NEXT: ld a5, %lo(.LCPI26_2)(a5)
+; RV64I-NEXT: ld a6, %lo(.LCPI26_3)(a6)
+; RV64I-NEXT: slli a7, a7, 32
+; RV64I-NEXT: li t0, 65
+; RV64I-NEXT: slli t0, t0, 28
+; RV64I-NEXT: li t1, 256
+; RV64I-NEXT: .LBB26_1: # %bb2
+; RV64I-NEXT: # =>This Inner Loop Header: Depth=1
+; RV64I-NEXT: slli t2, a0, 11
+; RV64I-NEXT: slli a0, a0, 3
+; RV64I-NEXT: and t2, t2, a3
+; RV64I-NEXT: and a0, a0, a4
+; RV64I-NEXT: or a0, a0, t2
+; RV64I-NEXT: srli t2, a0, 2
+; RV64I-NEXT: and a0, a0, a6
+; RV64I-NEXT: and t2, t2, a5
+; RV64I-NEXT: slli a0, a0, 2
+; RV64I-NEXT: or a0, t2, a0
+; RV64I-NEXT: srli t2, a0, 1
+; RV64I-NEXT: and a0, a0, t0
+; RV64I-NEXT: and t2, t2, a7
+; RV64I-NEXT: slli a0, a0, 1
+; RV64I-NEXT: or a0, t2, a0
+; RV64I-NEXT: srli a0, a0, 28
+; RV64I-NEXT: addi a2, a2, 1
+; RV64I-NEXT: add a0, a0, a1
+; RV64I-NEXT: bltu a2, t1, .LBB26_1
+; RV64I-NEXT: # %bb.2: # %bb7
+; RV64I-NEXT: sext.w a0, a0
+; RV64I-NEXT: ret
+;
+; RV64ZBB-LABEL: test22:
+; RV64ZBB: # %bb.0: # %entry
+; RV64ZBB-NEXT: addi a2, a2, -1
+; RV64ZBB-NEXT: li a3, 256
+; RV64ZBB-NEXT: .LBB26_1: # %bb2
+; RV64ZBB-NEXT: # =>This Inner Loop Header: Depth=1
+; RV64ZBB-NEXT: slli a0, a0, 7
+; RV64ZBB-NEXT: brev8 a0, a0
+; RV64ZBB-NEXT: srli a0, a0, 28
+; RV64ZBB-NEXT: andi a0, a0, 1234
+; RV64ZBB-NEXT: addi a2, a2, 1
+; RV64ZBB-NEXT: add a0, a0, a1
+; RV64ZBB-NEXT: bltu a2, a3, .LBB26_1
+; RV64ZBB-NEXT: # %bb.2: # %bb7
+; RV64ZBB-NEXT: sext.w a0, a0
+; RV64ZBB-NEXT: ret
+;
+; NOREMOVAL-LABEL: test22:
+; NOREMOVAL: # %bb.0: # %entry
+; NOREMOVAL-NEXT: addi a2, a2, -1
+; NOREMOVAL-NEXT: li a3, 256
+; NOREMOVAL-NEXT: .LBB26_1: # %bb2
+; NOREMOVAL-NEXT: # =>This Inner Loop Header: Depth=1
+; NOREMOVAL-NEXT: slli a0, a0, 7
+; NOREMOVAL-NEXT: brev8 a0, a0
+; NOREMOVAL-NEXT: srli a0, a0, 28
+; NOREMOVAL-NEXT: andi a0, a0, 1234
+; NOREMOVAL-NEXT: addi a2, a2, 1
+; NOREMOVAL-NEXT: add a0, a0, a1
+; NOREMOVAL-NEXT: bltu a2, a3, .LBB26_1
+; NOREMOVAL-NEXT: # %bb.2: # %bb7
+; NOREMOVAL-NEXT: sext.w a0, a0
+; NOREMOVAL-NEXT: ret
+entry:
+ br label %bb2
+
+bb2: ; preds = %bb2, %entry
+ %i1 = phi i64 [ %arg1, %entry ], [ %i5, %bb2 ]
+ %i2 = phi i64 [ %arg3, %entry ], [ %i3, %bb2 ]
+ %i3 = add i64 %i2, 1
+ %shl = shl i64 %i1, 7
+ %bswap = call i64 @llvm.bswap.i64(i64 %shl)
+ %bitreverse = call i64 @llvm.bitreverse.i64(i64 %bswap)
+ %lshr = lshr i64 %bitreverse, 28
+ %i4 = and i64 %lshr, 1234
+ %i5 = add i64 %i4, %arg2
+ %i6 = icmp ugt i64 %i2, 255
+ br i1 %i6, label %bb7, label %bb2
+
+bb7: ; preds = %bb2
+ %i7 = trunc i64 %i5 to i32
+ ret i32 %i7
+}
More information about the llvm-commits
mailing list