[llvm] [RISCV] Don't commute with shift if it would break sh{1,2,3}add pattern (PR #119527)
Luke Lau via llvm-commits
llvm-commits at lists.llvm.org
Sun Jan 5 21:14:47 PST 2025
https://github.com/lukel97 updated https://github.com/llvm/llvm-project/pull/119527
>From 75fa452ceb539c956693c40af801c8f09f6b4480 Mon Sep 17 00:00:00 2001
From: Luke Lau <luke at igalia.com>
Date: Wed, 11 Dec 2024 15:57:29 +0800
Subject: [PATCH 1/4] Precommit tests
---
.../CodeGen/RISCV/add_sext_shl_constant.ll | 293 +++++++++++++-----
llvm/test/CodeGen/RISCV/add_shl_constant.ll | 233 ++++++++++----
2 files changed, 388 insertions(+), 138 deletions(-)
diff --git a/llvm/test/CodeGen/RISCV/add_sext_shl_constant.ll b/llvm/test/CodeGen/RISCV/add_sext_shl_constant.ll
index 47b6c07cc699e7..c2b05903e8e456 100644
--- a/llvm/test/CodeGen/RISCV/add_sext_shl_constant.ll
+++ b/llvm/test/CodeGen/RISCV/add_sext_shl_constant.ll
@@ -1,17 +1,28 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3
-; RUN: llc -mtriple=riscv64 < %s | FileCheck -check-prefix=RV64 %s
+; RUN: llc -mtriple=riscv64 < %s | FileCheck -check-prefixes=RV64,NO-ZBA %s
+; RUN: llc -mtriple=riscv64 -mattr=+zba < %s | FileCheck -check-prefixes=RV64,ZBA %s
define void @add_sext_shl_moreOneUse_add(ptr %array1, i32 %a, i32 %b) {
-; RV64-LABEL: add_sext_shl_moreOneUse_add:
-; RV64: # %bb.0: # %entry
-; RV64-NEXT: addi a3, a1, 5
-; RV64-NEXT: sext.w a1, a1
-; RV64-NEXT: slli a1, a1, 2
-; RV64-NEXT: add a0, a1, a0
-; RV64-NEXT: sw a2, 20(a0)
-; RV64-NEXT: sw a2, 24(a0)
-; RV64-NEXT: sw a3, 140(a0)
-; RV64-NEXT: ret
+; NO-ZBA-LABEL: add_sext_shl_moreOneUse_add:
+; NO-ZBA: # %bb.0: # %entry
+; NO-ZBA-NEXT: addi a3, a1, 5
+; NO-ZBA-NEXT: sext.w a1, a1
+; NO-ZBA-NEXT: slli a1, a1, 2
+; NO-ZBA-NEXT: add a0, a1, a0
+; NO-ZBA-NEXT: sw a2, 20(a0)
+; NO-ZBA-NEXT: sw a2, 24(a0)
+; NO-ZBA-NEXT: sw a3, 140(a0)
+; NO-ZBA-NEXT: ret
+;
+; ZBA-LABEL: add_sext_shl_moreOneUse_add:
+; ZBA: # %bb.0: # %entry
+; ZBA-NEXT: addi a3, a1, 5
+; ZBA-NEXT: sext.w a1, a1
+; ZBA-NEXT: sh2add a0, a1, a0
+; ZBA-NEXT: sw a2, 20(a0)
+; ZBA-NEXT: sw a2, 24(a0)
+; ZBA-NEXT: sw a3, 140(a0)
+; ZBA-NEXT: ret
entry:
%add = add nsw i32 %a, 5
%idxprom = sext i32 %add to i64
@@ -29,19 +40,32 @@ entry:
}
define void @add_sext_shl_moreOneUse_addexceedsign12(ptr %array1, i32 %a, i32 %b) {
-; RV64-LABEL: add_sext_shl_moreOneUse_addexceedsign12:
-; RV64: # %bb.0: # %entry
-; RV64-NEXT: addi a3, a1, 2047
-; RV64-NEXT: lui a4, 2
-; RV64-NEXT: sext.w a1, a1
-; RV64-NEXT: addi a3, a3, 1
-; RV64-NEXT: slli a1, a1, 2
-; RV64-NEXT: add a0, a0, a4
-; RV64-NEXT: add a0, a0, a1
-; RV64-NEXT: sw a2, 0(a0)
-; RV64-NEXT: sw a3, 4(a0)
-; RV64-NEXT: sw a2, 120(a0)
-; RV64-NEXT: ret
+; NO-ZBA-LABEL: add_sext_shl_moreOneUse_addexceedsign12:
+; NO-ZBA: # %bb.0: # %entry
+; NO-ZBA-NEXT: addi a3, a1, 2047
+; NO-ZBA-NEXT: lui a4, 2
+; NO-ZBA-NEXT: sext.w a1, a1
+; NO-ZBA-NEXT: addi a3, a3, 1
+; NO-ZBA-NEXT: slli a1, a1, 2
+; NO-ZBA-NEXT: add a0, a0, a4
+; NO-ZBA-NEXT: add a0, a0, a1
+; NO-ZBA-NEXT: sw a2, 0(a0)
+; NO-ZBA-NEXT: sw a3, 4(a0)
+; NO-ZBA-NEXT: sw a2, 120(a0)
+; NO-ZBA-NEXT: ret
+;
+; ZBA-LABEL: add_sext_shl_moreOneUse_addexceedsign12:
+; ZBA: # %bb.0: # %entry
+; ZBA-NEXT: addi a3, a1, 2047
+; ZBA-NEXT: lui a4, 2
+; ZBA-NEXT: sext.w a1, a1
+; ZBA-NEXT: addi a3, a3, 1
+; ZBA-NEXT: sh2add a0, a1, a0
+; ZBA-NEXT: add a0, a0, a4
+; ZBA-NEXT: sw a2, 0(a0)
+; ZBA-NEXT: sw a3, 4(a0)
+; ZBA-NEXT: sw a2, 120(a0)
+; ZBA-NEXT: ret
entry:
%add = add nsw i32 %a, 2048
%idxprom = sext i32 %add to i64
@@ -57,16 +81,26 @@ entry:
}
define void @add_sext_shl_moreOneUse_sext(ptr %array1, i32 %a, i32 %b) {
-; RV64-LABEL: add_sext_shl_moreOneUse_sext:
-; RV64: # %bb.0: # %entry
-; RV64-NEXT: sext.w a1, a1
-; RV64-NEXT: addi a3, a1, 5
-; RV64-NEXT: slli a1, a1, 2
-; RV64-NEXT: add a0, a1, a0
-; RV64-NEXT: sw a2, 20(a0)
-; RV64-NEXT: sw a2, 24(a0)
-; RV64-NEXT: sd a3, 140(a0)
-; RV64-NEXT: ret
+; NO-ZBA-LABEL: add_sext_shl_moreOneUse_sext:
+; NO-ZBA: # %bb.0: # %entry
+; NO-ZBA-NEXT: sext.w a1, a1
+; NO-ZBA-NEXT: addi a3, a1, 5
+; NO-ZBA-NEXT: slli a1, a1, 2
+; NO-ZBA-NEXT: add a0, a1, a0
+; NO-ZBA-NEXT: sw a2, 20(a0)
+; NO-ZBA-NEXT: sw a2, 24(a0)
+; NO-ZBA-NEXT: sd a3, 140(a0)
+; NO-ZBA-NEXT: ret
+;
+; ZBA-LABEL: add_sext_shl_moreOneUse_sext:
+; ZBA: # %bb.0: # %entry
+; ZBA-NEXT: sext.w a1, a1
+; ZBA-NEXT: addi a3, a1, 5
+; ZBA-NEXT: sh2add a0, a1, a0
+; ZBA-NEXT: sw a2, 20(a0)
+; ZBA-NEXT: sw a2, 24(a0)
+; ZBA-NEXT: sd a3, 140(a0)
+; ZBA-NEXT: ret
entry:
%add = add nsw i32 %a, 5
%idxprom = sext i32 %add to i64
@@ -85,20 +119,34 @@ entry:
; test of jumpping, find add's operand has one more use can simplified
define void @add_sext_shl_moreOneUse_add_inSelect(ptr %array1, i32 signext %a, i32 %b, i32 signext %x) {
-; RV64-LABEL: add_sext_shl_moreOneUse_add_inSelect:
-; RV64: # %bb.0: # %entry
-; RV64-NEXT: addi a4, a1, 5
-; RV64-NEXT: mv a5, a4
-; RV64-NEXT: bgtz a3, .LBB3_2
-; RV64-NEXT: # %bb.1: # %entry
-; RV64-NEXT: mv a5, a2
-; RV64-NEXT: .LBB3_2: # %entry
-; RV64-NEXT: slli a1, a1, 2
-; RV64-NEXT: add a0, a1, a0
-; RV64-NEXT: sw a5, 20(a0)
-; RV64-NEXT: sw a5, 24(a0)
-; RV64-NEXT: sw a4, 140(a0)
-; RV64-NEXT: ret
+; NO-ZBA-LABEL: add_sext_shl_moreOneUse_add_inSelect:
+; NO-ZBA: # %bb.0: # %entry
+; NO-ZBA-NEXT: addi a4, a1, 5
+; NO-ZBA-NEXT: mv a5, a4
+; NO-ZBA-NEXT: bgtz a3, .LBB3_2
+; NO-ZBA-NEXT: # %bb.1: # %entry
+; NO-ZBA-NEXT: mv a5, a2
+; NO-ZBA-NEXT: .LBB3_2: # %entry
+; NO-ZBA-NEXT: slli a1, a1, 2
+; NO-ZBA-NEXT: add a0, a1, a0
+; NO-ZBA-NEXT: sw a5, 20(a0)
+; NO-ZBA-NEXT: sw a5, 24(a0)
+; NO-ZBA-NEXT: sw a4, 140(a0)
+; NO-ZBA-NEXT: ret
+;
+; ZBA-LABEL: add_sext_shl_moreOneUse_add_inSelect:
+; ZBA: # %bb.0: # %entry
+; ZBA-NEXT: addi a4, a1, 5
+; ZBA-NEXT: mv a5, a4
+; ZBA-NEXT: bgtz a3, .LBB3_2
+; ZBA-NEXT: # %bb.1: # %entry
+; ZBA-NEXT: mv a5, a2
+; ZBA-NEXT: .LBB3_2: # %entry
+; ZBA-NEXT: sh2add a0, a1, a0
+; ZBA-NEXT: sw a5, 20(a0)
+; ZBA-NEXT: sw a5, 24(a0)
+; ZBA-NEXT: sw a4, 140(a0)
+; ZBA-NEXT: ret
entry:
%add = add nsw i32 %a, 5
%cmp = icmp sgt i32 %x, 0
@@ -118,23 +166,40 @@ entry:
}
define void @add_sext_shl_moreOneUse_add_inSelect_addexceedsign12(ptr %array1, i32 signext %a, i32 %b, i32 signext %x) {
-; RV64-LABEL: add_sext_shl_moreOneUse_add_inSelect_addexceedsign12:
-; RV64: # %bb.0: # %entry
-; RV64-NEXT: addi a4, a1, 2047
-; RV64-NEXT: lui a5, 2
-; RV64-NEXT: slli a6, a1, 2
-; RV64-NEXT: addi a1, a4, 1
-; RV64-NEXT: add a0, a0, a6
-; RV64-NEXT: add a0, a0, a5
-; RV64-NEXT: mv a4, a1
-; RV64-NEXT: bgtz a3, .LBB4_2
-; RV64-NEXT: # %bb.1: # %entry
-; RV64-NEXT: mv a4, a2
-; RV64-NEXT: .LBB4_2: # %entry
-; RV64-NEXT: sw a4, 0(a0)
-; RV64-NEXT: sw a4, 4(a0)
-; RV64-NEXT: sw a1, 120(a0)
-; RV64-NEXT: ret
+; NO-ZBA-LABEL: add_sext_shl_moreOneUse_add_inSelect_addexceedsign12:
+; NO-ZBA: # %bb.0: # %entry
+; NO-ZBA-NEXT: addi a4, a1, 2047
+; NO-ZBA-NEXT: lui a5, 2
+; NO-ZBA-NEXT: slli a6, a1, 2
+; NO-ZBA-NEXT: addi a1, a4, 1
+; NO-ZBA-NEXT: add a0, a0, a6
+; NO-ZBA-NEXT: add a0, a0, a5
+; NO-ZBA-NEXT: mv a4, a1
+; NO-ZBA-NEXT: bgtz a3, .LBB4_2
+; NO-ZBA-NEXT: # %bb.1: # %entry
+; NO-ZBA-NEXT: mv a4, a2
+; NO-ZBA-NEXT: .LBB4_2: # %entry
+; NO-ZBA-NEXT: sw a4, 0(a0)
+; NO-ZBA-NEXT: sw a4, 4(a0)
+; NO-ZBA-NEXT: sw a1, 120(a0)
+; NO-ZBA-NEXT: ret
+;
+; ZBA-LABEL: add_sext_shl_moreOneUse_add_inSelect_addexceedsign12:
+; ZBA: # %bb.0: # %entry
+; ZBA-NEXT: addi a4, a1, 2047
+; ZBA-NEXT: lui a5, 2
+; ZBA-NEXT: addi a4, a4, 1
+; ZBA-NEXT: sh2add a0, a1, a0
+; ZBA-NEXT: add a0, a0, a5
+; ZBA-NEXT: mv a1, a4
+; ZBA-NEXT: bgtz a3, .LBB4_2
+; ZBA-NEXT: # %bb.1: # %entry
+; ZBA-NEXT: mv a1, a2
+; ZBA-NEXT: .LBB4_2: # %entry
+; ZBA-NEXT: sw a1, 0(a0)
+; ZBA-NEXT: sw a1, 4(a0)
+; ZBA-NEXT: sw a4, 120(a0)
+; ZBA-NEXT: ret
entry:
%add = add nsw i32 %a, 2048
%cmp = icmp sgt i32 %x, 0
@@ -152,20 +217,34 @@ entry:
}
define void @add_shl_moreOneUse_inSelect(ptr %array1, i64 %a, i64 %b, i64 %x) {
-; RV64-LABEL: add_shl_moreOneUse_inSelect:
-; RV64: # %bb.0: # %entry
-; RV64-NEXT: addi a4, a1, 5
-; RV64-NEXT: mv a5, a4
-; RV64-NEXT: bgtz a3, .LBB5_2
-; RV64-NEXT: # %bb.1: # %entry
-; RV64-NEXT: mv a5, a2
-; RV64-NEXT: .LBB5_2: # %entry
-; RV64-NEXT: slli a1, a1, 3
-; RV64-NEXT: add a0, a1, a0
-; RV64-NEXT: sd a5, 40(a0)
-; RV64-NEXT: sd a5, 48(a0)
-; RV64-NEXT: sd a4, 280(a0)
-; RV64-NEXT: ret
+; NO-ZBA-LABEL: add_shl_moreOneUse_inSelect:
+; NO-ZBA: # %bb.0: # %entry
+; NO-ZBA-NEXT: addi a4, a1, 5
+; NO-ZBA-NEXT: mv a5, a4
+; NO-ZBA-NEXT: bgtz a3, .LBB5_2
+; NO-ZBA-NEXT: # %bb.1: # %entry
+; NO-ZBA-NEXT: mv a5, a2
+; NO-ZBA-NEXT: .LBB5_2: # %entry
+; NO-ZBA-NEXT: slli a1, a1, 3
+; NO-ZBA-NEXT: add a0, a1, a0
+; NO-ZBA-NEXT: sd a5, 40(a0)
+; NO-ZBA-NEXT: sd a5, 48(a0)
+; NO-ZBA-NEXT: sd a4, 280(a0)
+; NO-ZBA-NEXT: ret
+;
+; ZBA-LABEL: add_shl_moreOneUse_inSelect:
+; ZBA: # %bb.0: # %entry
+; ZBA-NEXT: addi a4, a1, 5
+; ZBA-NEXT: mv a5, a4
+; ZBA-NEXT: bgtz a3, .LBB5_2
+; ZBA-NEXT: # %bb.1: # %entry
+; ZBA-NEXT: mv a5, a2
+; ZBA-NEXT: .LBB5_2: # %entry
+; ZBA-NEXT: sh3add a0, a1, a0
+; ZBA-NEXT: sd a5, 40(a0)
+; ZBA-NEXT: sd a5, 48(a0)
+; ZBA-NEXT: sd a4, 280(a0)
+; ZBA-NEXT: ret
entry:
%add = add nsw i64 %a, 5
%cmp = icmp sgt i64 %x, 0
@@ -180,3 +259,59 @@ entry:
store i64 %add, ptr %arrayidx6
ret void
}
+
+define i64 @add_shl_moreOneUse_sh1add(i64 %x) {
+; RV64-LABEL: add_shl_moreOneUse_sh1add:
+; RV64: # %bb.0:
+; RV64-NEXT: ori a1, a0, 1
+; RV64-NEXT: slli a0, a0, 1
+; RV64-NEXT: ori a0, a0, 2
+; RV64-NEXT: add a0, a0, a1
+; RV64-NEXT: ret
+ %or = or i64 %x, 1
+ %mul = shl i64 %or, 1
+ %add = add i64 %mul, %or
+ ret i64 %add
+}
+
+define i64 @add_shl_moreOneUse_sh2add(i64 %x) {
+; RV64-LABEL: add_shl_moreOneUse_sh2add:
+; RV64: # %bb.0:
+; RV64-NEXT: ori a1, a0, 1
+; RV64-NEXT: slli a0, a0, 2
+; RV64-NEXT: ori a0, a0, 4
+; RV64-NEXT: add a0, a0, a1
+; RV64-NEXT: ret
+ %or = or i64 %x, 1
+ %mul = shl i64 %or, 2
+ %add = add i64 %mul, %or
+ ret i64 %add
+}
+
+define i64 @add_shl_moreOneUse_sh3add(i64 %x) {
+; RV64-LABEL: add_shl_moreOneUse_sh3add:
+; RV64: # %bb.0:
+; RV64-NEXT: ori a1, a0, 1
+; RV64-NEXT: slli a0, a0, 3
+; RV64-NEXT: ori a0, a0, 8
+; RV64-NEXT: add a0, a0, a1
+; RV64-NEXT: ret
+ %or = or i64 %x, 1
+ %mul = shl i64 %or, 3
+ %add = add i64 %mul, %or
+ ret i64 %add
+}
+
+define i64 @add_shl_moreOneUse_sh4add(i64 %x) {
+; RV64-LABEL: add_shl_moreOneUse_sh4add:
+; RV64: # %bb.0:
+; RV64-NEXT: ori a1, a0, 1
+; RV64-NEXT: slli a0, a0, 4
+; RV64-NEXT: ori a0, a0, 16
+; RV64-NEXT: add a0, a0, a1
+; RV64-NEXT: ret
+ %or = or i64 %x, 1
+ %mul = shl i64 %or, 4
+ %add = add i64 %mul, %or
+ ret i64 %add
+}
diff --git a/llvm/test/CodeGen/RISCV/add_shl_constant.ll b/llvm/test/CodeGen/RISCV/add_shl_constant.ll
index 71b61868b8c844..ea7d17c80b57c4 100644
--- a/llvm/test/CodeGen/RISCV/add_shl_constant.ll
+++ b/llvm/test/CodeGen/RISCV/add_shl_constant.ll
@@ -1,13 +1,20 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
-; RUN: llc -mtriple=riscv32 < %s | FileCheck -check-prefix=RV32 %s
+; RUN: llc -mtriple=riscv32 < %s | FileCheck -check-prefixes=RV32,NO-ZBA %s
+; RUN: llc -mtriple=riscv32 -mattr=+zba < %s | FileCheck -check-prefixes=RV32,ZBA %s
define i32 @add_shl_oneUse(i32 %x, i32 %y) nounwind {
-; RV32-LABEL: add_shl_oneUse:
-; RV32: # %bb.0:
-; RV32-NEXT: slli a0, a0, 3
-; RV32-NEXT: add a0, a0, a1
-; RV32-NEXT: addi a0, a0, 984
-; RV32-NEXT: ret
+; NO-ZBA-LABEL: add_shl_oneUse:
+; NO-ZBA: # %bb.0:
+; NO-ZBA-NEXT: slli a0, a0, 3
+; NO-ZBA-NEXT: add a0, a0, a1
+; NO-ZBA-NEXT: addi a0, a0, 984
+; NO-ZBA-NEXT: ret
+;
+; ZBA-LABEL: add_shl_oneUse:
+; ZBA: # %bb.0:
+; ZBA-NEXT: sh3add a0, a0, a1
+; ZBA-NEXT: addi a0, a0, 984
+; ZBA-NEXT: ret
%add.0 = add i32 %x, 123
%shl = shl i32 %add.0, 3
%add.1 = add i32 %shl, %y
@@ -15,15 +22,24 @@ define i32 @add_shl_oneUse(i32 %x, i32 %y) nounwind {
}
define void @add_shl_moreOneUse_inStore(ptr %array1, i32 %a, i32 %b) {
-; RV32-LABEL: add_shl_moreOneUse_inStore:
-; RV32: # %bb.0: # %entry
-; RV32-NEXT: addi a3, a1, 5
-; RV32-NEXT: slli a1, a1, 2
-; RV32-NEXT: add a0, a0, a1
-; RV32-NEXT: sw a2, 20(a0)
-; RV32-NEXT: sw a2, 24(a0)
-; RV32-NEXT: sw a3, 140(a0)
-; RV32-NEXT: ret
+; NO-ZBA-LABEL: add_shl_moreOneUse_inStore:
+; NO-ZBA: # %bb.0: # %entry
+; NO-ZBA-NEXT: addi a3, a1, 5
+; NO-ZBA-NEXT: slli a1, a1, 2
+; NO-ZBA-NEXT: add a0, a0, a1
+; NO-ZBA-NEXT: sw a2, 20(a0)
+; NO-ZBA-NEXT: sw a2, 24(a0)
+; NO-ZBA-NEXT: sw a3, 140(a0)
+; NO-ZBA-NEXT: ret
+;
+; ZBA-LABEL: add_shl_moreOneUse_inStore:
+; ZBA: # %bb.0: # %entry
+; ZBA-NEXT: addi a3, a1, 5
+; ZBA-NEXT: sh2add a0, a1, a0
+; ZBA-NEXT: sw a2, 20(a0)
+; ZBA-NEXT: sw a2, 24(a0)
+; ZBA-NEXT: sw a3, 140(a0)
+; ZBA-NEXT: ret
entry:
%add = add nsw i32 %a, 5
%arrayidx = getelementptr inbounds i32, ptr %array1, i32 %add
@@ -37,18 +53,30 @@ entry:
}
define void @add_shl_moreOneUse_inStore_addexceedsign12(ptr %array1, i32 %a, i32 %b) {
-; RV32-LABEL: add_shl_moreOneUse_inStore_addexceedsign12:
-; RV32: # %bb.0: # %entry
-; RV32-NEXT: addi a3, a1, 2047
-; RV32-NEXT: lui a4, 2
-; RV32-NEXT: slli a1, a1, 2
-; RV32-NEXT: addi a3, a3, 1
-; RV32-NEXT: add a0, a0, a1
-; RV32-NEXT: add a0, a0, a4
-; RV32-NEXT: sw a2, 0(a0)
-; RV32-NEXT: sw a3, 4(a0)
-; RV32-NEXT: sw a2, 120(a0)
-; RV32-NEXT: ret
+; NO-ZBA-LABEL: add_shl_moreOneUse_inStore_addexceedsign12:
+; NO-ZBA: # %bb.0: # %entry
+; NO-ZBA-NEXT: addi a3, a1, 2047
+; NO-ZBA-NEXT: lui a4, 2
+; NO-ZBA-NEXT: slli a1, a1, 2
+; NO-ZBA-NEXT: addi a3, a3, 1
+; NO-ZBA-NEXT: add a0, a0, a1
+; NO-ZBA-NEXT: add a0, a0, a4
+; NO-ZBA-NEXT: sw a2, 0(a0)
+; NO-ZBA-NEXT: sw a3, 4(a0)
+; NO-ZBA-NEXT: sw a2, 120(a0)
+; NO-ZBA-NEXT: ret
+;
+; ZBA-LABEL: add_shl_moreOneUse_inStore_addexceedsign12:
+; ZBA: # %bb.0: # %entry
+; ZBA-NEXT: addi a3, a1, 2047
+; ZBA-NEXT: lui a4, 2
+; ZBA-NEXT: sh2add a0, a1, a0
+; ZBA-NEXT: addi a3, a3, 1
+; ZBA-NEXT: add a0, a0, a4
+; ZBA-NEXT: sw a2, 0(a0)
+; ZBA-NEXT: sw a3, 4(a0)
+; ZBA-NEXT: sw a2, 120(a0)
+; ZBA-NEXT: ret
entry:
%add = add nsw i32 %a, 2048
%arrayidx = getelementptr inbounds i32, ptr %array1, i32 %add
@@ -62,20 +90,34 @@ entry:
}
define void @add_shl_moreOneUse_inSelect(ptr %array1, i32 %a, i32 %b, i32 %x) {
-; RV32-LABEL: add_shl_moreOneUse_inSelect:
-; RV32: # %bb.0: # %entry
-; RV32-NEXT: addi a4, a1, 5
-; RV32-NEXT: mv a5, a4
-; RV32-NEXT: bgtz a3, .LBB3_2
-; RV32-NEXT: # %bb.1: # %entry
-; RV32-NEXT: mv a5, a2
-; RV32-NEXT: .LBB3_2: # %entry
-; RV32-NEXT: slli a1, a1, 2
-; RV32-NEXT: add a0, a0, a1
-; RV32-NEXT: sw a5, 20(a0)
-; RV32-NEXT: sw a5, 24(a0)
-; RV32-NEXT: sw a4, 140(a0)
-; RV32-NEXT: ret
+; NO-ZBA-LABEL: add_shl_moreOneUse_inSelect:
+; NO-ZBA: # %bb.0: # %entry
+; NO-ZBA-NEXT: addi a4, a1, 5
+; NO-ZBA-NEXT: mv a5, a4
+; NO-ZBA-NEXT: bgtz a3, .LBB3_2
+; NO-ZBA-NEXT: # %bb.1: # %entry
+; NO-ZBA-NEXT: mv a5, a2
+; NO-ZBA-NEXT: .LBB3_2: # %entry
+; NO-ZBA-NEXT: slli a1, a1, 2
+; NO-ZBA-NEXT: add a0, a0, a1
+; NO-ZBA-NEXT: sw a5, 20(a0)
+; NO-ZBA-NEXT: sw a5, 24(a0)
+; NO-ZBA-NEXT: sw a4, 140(a0)
+; NO-ZBA-NEXT: ret
+;
+; ZBA-LABEL: add_shl_moreOneUse_inSelect:
+; ZBA: # %bb.0: # %entry
+; ZBA-NEXT: addi a4, a1, 5
+; ZBA-NEXT: mv a5, a4
+; ZBA-NEXT: bgtz a3, .LBB3_2
+; ZBA-NEXT: # %bb.1: # %entry
+; ZBA-NEXT: mv a5, a2
+; ZBA-NEXT: .LBB3_2: # %entry
+; ZBA-NEXT: sh2add a0, a1, a0
+; ZBA-NEXT: sw a5, 20(a0)
+; ZBA-NEXT: sw a5, 24(a0)
+; ZBA-NEXT: sw a4, 140(a0)
+; ZBA-NEXT: ret
entry:
%add = add nsw i32 %a, 5
%cmp = icmp sgt i32 %x, 0
@@ -91,23 +133,40 @@ entry:
}
define void @add_shl_moreOneUse_inSelect_addexceedsign12(ptr %array1, i32 %a, i32 %b, i32 %x) {
-; RV32-LABEL: add_shl_moreOneUse_inSelect_addexceedsign12:
-; RV32: # %bb.0: # %entry
-; RV32-NEXT: addi a4, a1, 2047
-; RV32-NEXT: addi a4, a4, 1
-; RV32-NEXT: mv a5, a4
-; RV32-NEXT: bgtz a3, .LBB4_2
-; RV32-NEXT: # %bb.1: # %entry
-; RV32-NEXT: mv a5, a2
-; RV32-NEXT: .LBB4_2: # %entry
-; RV32-NEXT: lui a2, 2
-; RV32-NEXT: slli a1, a1, 2
-; RV32-NEXT: add a0, a0, a1
-; RV32-NEXT: add a0, a0, a2
-; RV32-NEXT: sw a5, 0(a0)
-; RV32-NEXT: sw a5, 4(a0)
-; RV32-NEXT: sw a4, 120(a0)
-; RV32-NEXT: ret
+; NO-ZBA-LABEL: add_shl_moreOneUse_inSelect_addexceedsign12:
+; NO-ZBA: # %bb.0: # %entry
+; NO-ZBA-NEXT: addi a4, a1, 2047
+; NO-ZBA-NEXT: addi a4, a4, 1
+; NO-ZBA-NEXT: mv a5, a4
+; NO-ZBA-NEXT: bgtz a3, .LBB4_2
+; NO-ZBA-NEXT: # %bb.1: # %entry
+; NO-ZBA-NEXT: mv a5, a2
+; NO-ZBA-NEXT: .LBB4_2: # %entry
+; NO-ZBA-NEXT: lui a2, 2
+; NO-ZBA-NEXT: slli a1, a1, 2
+; NO-ZBA-NEXT: add a0, a0, a1
+; NO-ZBA-NEXT: add a0, a0, a2
+; NO-ZBA-NEXT: sw a5, 0(a0)
+; NO-ZBA-NEXT: sw a5, 4(a0)
+; NO-ZBA-NEXT: sw a4, 120(a0)
+; NO-ZBA-NEXT: ret
+;
+; ZBA-LABEL: add_shl_moreOneUse_inSelect_addexceedsign12:
+; ZBA: # %bb.0: # %entry
+; ZBA-NEXT: addi a4, a1, 2047
+; ZBA-NEXT: addi a4, a4, 1
+; ZBA-NEXT: mv a5, a4
+; ZBA-NEXT: bgtz a3, .LBB4_2
+; ZBA-NEXT: # %bb.1: # %entry
+; ZBA-NEXT: mv a5, a2
+; ZBA-NEXT: .LBB4_2: # %entry
+; ZBA-NEXT: lui a2, 2
+; ZBA-NEXT: sh2add a0, a1, a0
+; ZBA-NEXT: add a0, a0, a2
+; ZBA-NEXT: sw a5, 0(a0)
+; ZBA-NEXT: sw a5, 4(a0)
+; ZBA-NEXT: sw a4, 120(a0)
+; ZBA-NEXT: ret
entry:
%add = add nsw i32 %a, 2048
%cmp = icmp sgt i32 %x, 0
@@ -121,3 +180,59 @@ entry:
store i32 %add, ptr %arrayidx6
ret void
}
+
+define i32 @add_shl_moreOneUse_sh1add(i32 %x) {
+; RV32-LABEL: add_shl_moreOneUse_sh1add:
+; RV32: # %bb.0:
+; RV32-NEXT: ori a1, a0, 1
+; RV32-NEXT: slli a0, a0, 1
+; RV32-NEXT: ori a0, a0, 2
+; RV32-NEXT: add a0, a0, a1
+; RV32-NEXT: ret
+ %or = or i32 %x, 1
+ %mul = shl i32 %or, 1
+ %add = add i32 %mul, %or
+ ret i32 %add
+}
+
+define i32 @add_shl_moreOneUse_sh2add(i32 %x) {
+; RV32-LABEL: add_shl_moreOneUse_sh2add:
+; RV32: # %bb.0:
+; RV32-NEXT: ori a1, a0, 1
+; RV32-NEXT: slli a0, a0, 2
+; RV32-NEXT: ori a0, a0, 4
+; RV32-NEXT: add a0, a0, a1
+; RV32-NEXT: ret
+ %or = or i32 %x, 1
+ %mul = shl i32 %or, 2
+ %add = add i32 %mul, %or
+ ret i32 %add
+}
+
+define i32 @add_shl_moreOneUse_sh3add(i32 %x) {
+; RV32-LABEL: add_shl_moreOneUse_sh3add:
+; RV32: # %bb.0:
+; RV32-NEXT: ori a1, a0, 1
+; RV32-NEXT: slli a0, a0, 3
+; RV32-NEXT: ori a0, a0, 8
+; RV32-NEXT: add a0, a0, a1
+; RV32-NEXT: ret
+ %or = or i32 %x, 1
+ %mul = shl i32 %or, 3
+ %add = add i32 %mul, %or
+ ret i32 %add
+}
+
+define i32 @add_shl_moreOneUse_sh4add(i32 %x) {
+; RV32-LABEL: add_shl_moreOneUse_sh4add:
+; RV32: # %bb.0:
+; RV32-NEXT: ori a1, a0, 1
+; RV32-NEXT: slli a0, a0, 4
+; RV32-NEXT: ori a0, a0, 16
+; RV32-NEXT: add a0, a0, a1
+; RV32-NEXT: ret
+ %or = or i32 %x, 1
+ %mul = shl i32 %or, 4
+ %add = add i32 %mul, %or
+ ret i32 %add
+}
>From ad62250357fbb34051fc6999c910a55b1a22f422 Mon Sep 17 00:00:00 2001
From: Luke Lau <luke at igalia.com>
Date: Wed, 11 Dec 2024 16:53:24 +0800
Subject: [PATCH 2/4] [RISCV] Don't commute with shift if it would break
sh{1,2,3}add pattern
Stacked on #119526
This fixes a regression from #101294 by checking if we might be clobbering a sh{1,2,3}add pattern.
Only do this is the underlying add isn't going to be folded away into an address offset.
---
llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 8 +++
.../CodeGen/RISCV/add_sext_shl_constant.ll | 60 +++++++++++-------
llvm/test/CodeGen/RISCV/add_shl_constant.ll | 62 ++++++++++++-------
3 files changed, 87 insertions(+), 43 deletions(-)
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index bf919122452628..5b94ae087f11ae 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -18255,6 +18255,14 @@ bool RISCVTargetLowering::isDesirableToCommuteWithShift(
auto *C1 = dyn_cast<ConstantSDNode>(N0->getOperand(1));
auto *C2 = dyn_cast<ConstantSDNode>(N->getOperand(1));
+
+ // Bail if we might break a sh{1,2,3}add pattern.
+ if (Subtarget.hasStdExtZba() && C2->getZExtValue() >= 1 &&
+ C2->getZExtValue() <= 3 && N->hasOneUse() &&
+ N->use_begin()->getOpcode() == ISD::ADD &&
+ !isUsedByLdSt(*N->use_begin(), nullptr))
+ return false;
+
if (C1 && C2) {
const APInt &C1Int = C1->getAPIntValue();
APInt ShiftedC1Int = C1Int << C2->getAPIntValue();
diff --git a/llvm/test/CodeGen/RISCV/add_sext_shl_constant.ll b/llvm/test/CodeGen/RISCV/add_sext_shl_constant.ll
index c2b05903e8e456..2f329fb9d83bfd 100644
--- a/llvm/test/CodeGen/RISCV/add_sext_shl_constant.ll
+++ b/llvm/test/CodeGen/RISCV/add_sext_shl_constant.ll
@@ -261,13 +261,19 @@ entry:
}
define i64 @add_shl_moreOneUse_sh1add(i64 %x) {
-; RV64-LABEL: add_shl_moreOneUse_sh1add:
-; RV64: # %bb.0:
-; RV64-NEXT: ori a1, a0, 1
-; RV64-NEXT: slli a0, a0, 1
-; RV64-NEXT: ori a0, a0, 2
-; RV64-NEXT: add a0, a0, a1
-; RV64-NEXT: ret
+; NO-ZBA-LABEL: add_shl_moreOneUse_sh1add:
+; NO-ZBA: # %bb.0:
+; NO-ZBA-NEXT: ori a1, a0, 1
+; NO-ZBA-NEXT: slli a0, a0, 1
+; NO-ZBA-NEXT: ori a0, a0, 2
+; NO-ZBA-NEXT: add a0, a0, a1
+; NO-ZBA-NEXT: ret
+;
+; ZBA-LABEL: add_shl_moreOneUse_sh1add:
+; ZBA: # %bb.0:
+; ZBA-NEXT: ori a0, a0, 1
+; ZBA-NEXT: sh1add a0, a0, a0
+; ZBA-NEXT: ret
%or = or i64 %x, 1
%mul = shl i64 %or, 1
%add = add i64 %mul, %or
@@ -275,13 +281,19 @@ define i64 @add_shl_moreOneUse_sh1add(i64 %x) {
}
define i64 @add_shl_moreOneUse_sh2add(i64 %x) {
-; RV64-LABEL: add_shl_moreOneUse_sh2add:
-; RV64: # %bb.0:
-; RV64-NEXT: ori a1, a0, 1
-; RV64-NEXT: slli a0, a0, 2
-; RV64-NEXT: ori a0, a0, 4
-; RV64-NEXT: add a0, a0, a1
-; RV64-NEXT: ret
+; NO-ZBA-LABEL: add_shl_moreOneUse_sh2add:
+; NO-ZBA: # %bb.0:
+; NO-ZBA-NEXT: ori a1, a0, 1
+; NO-ZBA-NEXT: slli a0, a0, 2
+; NO-ZBA-NEXT: ori a0, a0, 4
+; NO-ZBA-NEXT: add a0, a0, a1
+; NO-ZBA-NEXT: ret
+;
+; ZBA-LABEL: add_shl_moreOneUse_sh2add:
+; ZBA: # %bb.0:
+; ZBA-NEXT: ori a0, a0, 1
+; ZBA-NEXT: sh2add a0, a0, a0
+; ZBA-NEXT: ret
%or = or i64 %x, 1
%mul = shl i64 %or, 2
%add = add i64 %mul, %or
@@ -289,13 +301,19 @@ define i64 @add_shl_moreOneUse_sh2add(i64 %x) {
}
define i64 @add_shl_moreOneUse_sh3add(i64 %x) {
-; RV64-LABEL: add_shl_moreOneUse_sh3add:
-; RV64: # %bb.0:
-; RV64-NEXT: ori a1, a0, 1
-; RV64-NEXT: slli a0, a0, 3
-; RV64-NEXT: ori a0, a0, 8
-; RV64-NEXT: add a0, a0, a1
-; RV64-NEXT: ret
+; NO-ZBA-LABEL: add_shl_moreOneUse_sh3add:
+; NO-ZBA: # %bb.0:
+; NO-ZBA-NEXT: ori a1, a0, 1
+; NO-ZBA-NEXT: slli a0, a0, 3
+; NO-ZBA-NEXT: ori a0, a0, 8
+; NO-ZBA-NEXT: add a0, a0, a1
+; NO-ZBA-NEXT: ret
+;
+; ZBA-LABEL: add_shl_moreOneUse_sh3add:
+; ZBA: # %bb.0:
+; ZBA-NEXT: ori a0, a0, 1
+; ZBA-NEXT: sh3add a0, a0, a0
+; ZBA-NEXT: ret
%or = or i64 %x, 1
%mul = shl i64 %or, 3
%add = add i64 %mul, %or
diff --git a/llvm/test/CodeGen/RISCV/add_shl_constant.ll b/llvm/test/CodeGen/RISCV/add_shl_constant.ll
index ea7d17c80b57c4..a4da9e26836488 100644
--- a/llvm/test/CodeGen/RISCV/add_shl_constant.ll
+++ b/llvm/test/CodeGen/RISCV/add_shl_constant.ll
@@ -12,8 +12,8 @@ define i32 @add_shl_oneUse(i32 %x, i32 %y) nounwind {
;
; ZBA-LABEL: add_shl_oneUse:
; ZBA: # %bb.0:
+; ZBA-NEXT: addi a0, a0, 123
; ZBA-NEXT: sh3add a0, a0, a1
-; ZBA-NEXT: addi a0, a0, 984
; ZBA-NEXT: ret
%add.0 = add i32 %x, 123
%shl = shl i32 %add.0, 3
@@ -182,13 +182,19 @@ entry:
}
define i32 @add_shl_moreOneUse_sh1add(i32 %x) {
-; RV32-LABEL: add_shl_moreOneUse_sh1add:
-; RV32: # %bb.0:
-; RV32-NEXT: ori a1, a0, 1
-; RV32-NEXT: slli a0, a0, 1
-; RV32-NEXT: ori a0, a0, 2
-; RV32-NEXT: add a0, a0, a1
-; RV32-NEXT: ret
+; NO-ZBA-LABEL: add_shl_moreOneUse_sh1add:
+; NO-ZBA: # %bb.0:
+; NO-ZBA-NEXT: ori a1, a0, 1
+; NO-ZBA-NEXT: slli a0, a0, 1
+; NO-ZBA-NEXT: ori a0, a0, 2
+; NO-ZBA-NEXT: add a0, a0, a1
+; NO-ZBA-NEXT: ret
+;
+; ZBA-LABEL: add_shl_moreOneUse_sh1add:
+; ZBA: # %bb.0:
+; ZBA-NEXT: ori a0, a0, 1
+; ZBA-NEXT: sh1add a0, a0, a0
+; ZBA-NEXT: ret
%or = or i32 %x, 1
%mul = shl i32 %or, 1
%add = add i32 %mul, %or
@@ -196,13 +202,19 @@ define i32 @add_shl_moreOneUse_sh1add(i32 %x) {
}
define i32 @add_shl_moreOneUse_sh2add(i32 %x) {
-; RV32-LABEL: add_shl_moreOneUse_sh2add:
-; RV32: # %bb.0:
-; RV32-NEXT: ori a1, a0, 1
-; RV32-NEXT: slli a0, a0, 2
-; RV32-NEXT: ori a0, a0, 4
-; RV32-NEXT: add a0, a0, a1
-; RV32-NEXT: ret
+; NO-ZBA-LABEL: add_shl_moreOneUse_sh2add:
+; NO-ZBA: # %bb.0:
+; NO-ZBA-NEXT: ori a1, a0, 1
+; NO-ZBA-NEXT: slli a0, a0, 2
+; NO-ZBA-NEXT: ori a0, a0, 4
+; NO-ZBA-NEXT: add a0, a0, a1
+; NO-ZBA-NEXT: ret
+;
+; ZBA-LABEL: add_shl_moreOneUse_sh2add:
+; ZBA: # %bb.0:
+; ZBA-NEXT: ori a0, a0, 1
+; ZBA-NEXT: sh2add a0, a0, a0
+; ZBA-NEXT: ret
%or = or i32 %x, 1
%mul = shl i32 %or, 2
%add = add i32 %mul, %or
@@ -210,13 +222,19 @@ define i32 @add_shl_moreOneUse_sh2add(i32 %x) {
}
define i32 @add_shl_moreOneUse_sh3add(i32 %x) {
-; RV32-LABEL: add_shl_moreOneUse_sh3add:
-; RV32: # %bb.0:
-; RV32-NEXT: ori a1, a0, 1
-; RV32-NEXT: slli a0, a0, 3
-; RV32-NEXT: ori a0, a0, 8
-; RV32-NEXT: add a0, a0, a1
-; RV32-NEXT: ret
+; NO-ZBA-LABEL: add_shl_moreOneUse_sh3add:
+; NO-ZBA: # %bb.0:
+; NO-ZBA-NEXT: ori a1, a0, 1
+; NO-ZBA-NEXT: slli a0, a0, 3
+; NO-ZBA-NEXT: ori a0, a0, 8
+; NO-ZBA-NEXT: add a0, a0, a1
+; NO-ZBA-NEXT: ret
+;
+; ZBA-LABEL: add_shl_moreOneUse_sh3add:
+; ZBA: # %bb.0:
+; ZBA-NEXT: ori a0, a0, 1
+; ZBA-NEXT: sh3add a0, a0, a0
+; ZBA-NEXT: ret
%or = or i32 %x, 1
%mul = shl i32 %or, 3
%add = add i32 %mul, %or
>From b1dcde487920fa3d0aff4534f7641b13f2b5c40d Mon Sep 17 00:00:00 2001
From: Luke Lau <luke at igalia.com>
Date: Mon, 6 Jan 2025 13:12:57 +0800
Subject: [PATCH 3/4] Add test for constant RHS in add
---
.../CodeGen/RISCV/add_sext_shl_constant.ll | 21 +++++++++++++++++++
1 file changed, 21 insertions(+)
diff --git a/llvm/test/CodeGen/RISCV/add_sext_shl_constant.ll b/llvm/test/CodeGen/RISCV/add_sext_shl_constant.ll
index 2f329fb9d83bfd..3ee429f426b6d3 100644
--- a/llvm/test/CodeGen/RISCV/add_sext_shl_constant.ll
+++ b/llvm/test/CodeGen/RISCV/add_sext_shl_constant.ll
@@ -333,3 +333,24 @@ define i64 @add_shl_moreOneUse_sh4add(i64 %x) {
%add = add i64 %mul, %or
ret i64 %add
}
+
+define i64 @add_shl_rhs_constant(i64 %x, i64 %y) {
+; NO-ZBA-LABEL: add_shl_rhs_constant:
+; NO-ZBA: # %bb.0:
+; NO-ZBA-NEXT: add a0, a0, a1
+; NO-ZBA-NEXT: slli a0, a0, 3
+; NO-ZBA-NEXT: ret
+;
+; ZBA-LABEL: add_shl_rhs_constant:
+; ZBA: # %bb.0:
+; ZBA-NEXT: add a0, a0, a1
+; ZBA-NEXT: addi a0, a0, 1
+; ZBA-NEXT: slli a0, a0, 3
+; ZBA-NEXT: addi a0, a0, -8
+; ZBA-NEXT: ret
+ %a = add i64 %x, 1
+ %b = add i64 %y, %a
+ %c = shl i64 %b, 3
+ %d = add i64 %c, -8
+ ret i64 %d
+}
>From 3e04d32d9f4d602ba6ec3b5e243847d8415699e9 Mon Sep 17 00:00:00 2001
From: Luke Lau <luke at igalia.com>
Date: Mon, 6 Jan 2025 13:14:01 +0800
Subject: [PATCH 4/4] Only bail if add RHS isn't constant
---
llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 3 ++-
.../CodeGen/RISCV/add_sext_shl_constant.ll | 18 +++++-------------
2 files changed, 7 insertions(+), 14 deletions(-)
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 5b94ae087f11ae..24891cf32e89d4 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -18260,7 +18260,8 @@ bool RISCVTargetLowering::isDesirableToCommuteWithShift(
if (Subtarget.hasStdExtZba() && C2->getZExtValue() >= 1 &&
C2->getZExtValue() <= 3 && N->hasOneUse() &&
N->use_begin()->getOpcode() == ISD::ADD &&
- !isUsedByLdSt(*N->use_begin(), nullptr))
+ !isUsedByLdSt(*N->use_begin(), nullptr) &&
+ !isa<ConstantSDNode>(N->use_begin()->getOperand(1)))
return false;
if (C1 && C2) {
diff --git a/llvm/test/CodeGen/RISCV/add_sext_shl_constant.ll b/llvm/test/CodeGen/RISCV/add_sext_shl_constant.ll
index 3ee429f426b6d3..fe89b4aa24171c 100644
--- a/llvm/test/CodeGen/RISCV/add_sext_shl_constant.ll
+++ b/llvm/test/CodeGen/RISCV/add_sext_shl_constant.ll
@@ -335,19 +335,11 @@ define i64 @add_shl_moreOneUse_sh4add(i64 %x) {
}
define i64 @add_shl_rhs_constant(i64 %x, i64 %y) {
-; NO-ZBA-LABEL: add_shl_rhs_constant:
-; NO-ZBA: # %bb.0:
-; NO-ZBA-NEXT: add a0, a0, a1
-; NO-ZBA-NEXT: slli a0, a0, 3
-; NO-ZBA-NEXT: ret
-;
-; ZBA-LABEL: add_shl_rhs_constant:
-; ZBA: # %bb.0:
-; ZBA-NEXT: add a0, a0, a1
-; ZBA-NEXT: addi a0, a0, 1
-; ZBA-NEXT: slli a0, a0, 3
-; ZBA-NEXT: addi a0, a0, -8
-; ZBA-NEXT: ret
+; RV64-LABEL: add_shl_rhs_constant:
+; RV64: # %bb.0:
+; RV64-NEXT: add a0, a0, a1
+; RV64-NEXT: slli a0, a0, 3
+; RV64-NEXT: ret
%a = add i64 %x, 1
%b = add i64 %y, %a
%c = shl i64 %b, 3
More information about the llvm-commits
mailing list