[llvm] 9ca02d6 - [RISCV] Add an ANDI to shift amount of FSL/FSR instructions

Thu Nov 12 07:36:26 PST 2020

Author: Craig Topper
Date: 2020-11-12T07:33:40-08:00
New Revision: 9ca02d6fe166e4dcc8e651592883171af47257c9

URL: https://github.com/llvm/llvm-project/commit/9ca02d6fe166e4dcc8e651592883171af47257c9
DIFF: https://github.com/llvm/llvm-project/commit/9ca02d6fe166e4dcc8e651592883171af47257c9.diff

LOG: [RISCV] Add an ANDI to shift amount of FSL/FSR instructions

The fshl and fshr intrinsics are defined to modulo their shift amount by the bitwidth of one of their inputs. The FSR/FSL instructions read one extra bit from the shift amount. If that bit is set the inputs are swapped. In order to preserve the semantics of the llvm intrinsics we need to make sure that the extra bit isn't set. DAG combine or instcombine may have removed any mask that was originally present.

We could be smarter here and try to use computeKnownBits to check if the bit is known zero, but wanted to start with correctness.

Differential Revision: https://reviews.llvm.org/D90905

Added: 
    

Modified: 
    llvm/lib/Target/RISCV/RISCVInstrInfoB.td
    llvm/test/CodeGen/RISCV/rv32Zbt.ll
    llvm/test/CodeGen/RISCV/rv64Zbt.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/RISCV/RISCVInstrInfoB.td b/llvm/lib/Target/RISCV/RISCVInstrInfoB.td
index d327781277ce..d1b06e9136bb 100644

--- a/llvm/lib/Target/RISCV/RISCVInstrInfoB.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoB.td
@@ -802,15 +802,32 @@ def : Pat<(or (and (not GPR:$rs2), GPR:$rs3), (and GPR:$rs2, GPR:$rs1)),
           (CMIX GPR:$rs1, GPR:$rs2, GPR:$rs3)>;
 def : Pat<(riscv_selectcc GPR:$rs2, (XLenVT 0), (XLenVT 17), GPR:$rs3, GPR:$rs1),
           (CMOV GPR:$rs1, GPR:$rs2, GPR:$rs3)>;
+} // Predicates = [HasStdExtZbt]
 
 // fshl and fshr concatenate their operands in the same order. fsr and fsl
 // instruction use 
diff erent orders. fshl will return its first operand for
 // shift of zero, fshr will return its second operand. fsl and fsr both return
 // $rs1 so the patterns need to have 
diff erent operand orders.
+//
+// fshl and fshr only read the lower log2(xlen) bits of the shift amount, but
+// fsl/fsr instructions read log2(xlen)+1 bits. DAG combine may have removed
+// an AND mask on the shift amount that we need to add back to avoid a one in
+// the extra bit.
+// FIXME: If we can prove that the extra bit in the shift amount is zero, we
+// don't need this mask.
+let Predicates = [HasStdExtZbt, IsRV32] in {
+def : Pat<(fshl GPR:$rs1, GPR:$rs3, GPR:$rs2),
+          (FSL GPR:$rs1, (ANDI GPR:$rs2, 31), GPR:$rs3)>;
+def : Pat<(fshr GPR:$rs3, GPR:$rs1, GPR:$rs2),
+          (FSR GPR:$rs1, (ANDI GPR:$rs2, 31), GPR:$rs3)>;
+}
+let Predicates = [HasStdExtZbt, IsRV64] in {
 def : Pat<(fshl GPR:$rs1, GPR:$rs3, GPR:$rs2),
-          (FSL GPR:$rs1, GPR:$rs2, GPR:$rs3)>;
+          (FSL GPR:$rs1, (ANDI GPR:$rs2, 63), GPR:$rs3)>;
 def : Pat<(fshr GPR:$rs3, GPR:$rs1, GPR:$rs2),
-          (FSR GPR:$rs1, GPR:$rs2, GPR:$rs3)>;
+          (FSR GPR:$rs1, (ANDI GPR:$rs2, 63), GPR:$rs3)>;
+}
+let Predicates = [HasStdExtZbt] in {
 def : Pat<(fshr GPR:$rs3, GPR:$rs1, uimmlog2xlen:$shamt),
           (FSRI GPR:$rs1, GPR:$rs3, uimmlog2xlen:$shamt)>;
 // We can use FSRI for fshl by immediate if we subtract the immediate from
@@ -1019,11 +1036,11 @@ let Predicates = [HasStdExtZbt, IsRV64] in {
 def : Pat<(sext_inreg (fshl GPR:$rs1, (shl GPR:$rs3, (i64 32)),
                             (and GPR:$rs2, (i64 31))),
                       i32),
-          (FSLW GPR:$rs1, GPR:$rs2, GPR:$rs3)>;
+          (FSLW GPR:$rs1, (ANDI GPR:$rs2, 31), GPR:$rs3)>;
 def : Pat<(sext_inreg (fshr GPR:$rs3, (shl GPR:$rs1, (i64 32)),
                             (or GPR:$rs2, (i64 32))),
                       i32),
-          (FSRW GPR:$rs1, GPR:$rs2, GPR:$rs3)>;
+          (FSRW GPR:$rs1, (ANDI GPR:$rs2, 31), GPR:$rs3)>;
 def : Pat<(sext_inreg (fshr GPR:$rs3, (shl GPR:$rs1, (i64 32)),
                             uimm6gt32:$shamt),
                       i32),

diff  --git a/llvm/test/CodeGen/RISCV/rv32Zbt.ll b/llvm/test/CodeGen/RISCV/rv32Zbt.ll
index 2f8d9c6c0678..7093bb3aebae 100644
--- a/llvm/test/CodeGen/RISCV/rv32Zbt.ll
+++ b/llvm/test/CodeGen/RISCV/rv32Zbt.ll
@@ -131,11 +131,13 @@ define i32 @fshl_i32(i32 %a, i32 %b, i32 %c) nounwind {
 ;
 ; RV32IB-LABEL: fshl_i32:
 ; RV32IB:       # %bb.0:
+; RV32IB-NEXT:    andi a2, a2, 31
 ; RV32IB-NEXT:    fsl a0, a0, a1, a2
 ; RV32IB-NEXT:    ret
 ;
 ; RV32IBT-LABEL: fshl_i32:
 ; RV32IBT:       # %bb.0:
+; RV32IBT-NEXT:    andi a2, a2, 31
 ; RV32IBT-NEXT:    fsl a0, a0, a1, a2
 ; RV32IBT-NEXT:    ret
   %1 = tail call i32 @llvm.fshl.i32(i32 %a, i32 %b, i32 %c)
@@ -315,11 +317,13 @@ define i32 @fshr_i32(i32 %a, i32 %b, i32 %c) nounwind {
 ;
 ; RV32IB-LABEL: fshr_i32:
 ; RV32IB:       # %bb.0:
+; RV32IB-NEXT:    andi a2, a2, 31
 ; RV32IB-NEXT:    fsr a0, a1, a0, a2
 ; RV32IB-NEXT:    ret
 ;
 ; RV32IBT-LABEL: fshr_i32:
 ; RV32IBT:       # %bb.0:
+; RV32IBT-NEXT:    andi a2, a2, 31
 ; RV32IBT-NEXT:    fsr a0, a1, a0, a2
 ; RV32IBT-NEXT:    ret
   %1 = tail call i32 @llvm.fshr.i32(i32 %a, i32 %b, i32 %c)

diff  --git a/llvm/test/CodeGen/RISCV/rv64Zbt.ll b/llvm/test/CodeGen/RISCV/rv64Zbt.ll
index 71b6fa25becb..8b7b938ec91f 100644
--- a/llvm/test/CodeGen/RISCV/rv64Zbt.ll
+++ b/llvm/test/CodeGen/RISCV/rv64Zbt.ll
@@ -120,11 +120,13 @@ define signext i32 @fshl_i32(i32 signext %a, i32 signext %b, i32 signext %c) nou
 ;
 ; RV64IB-LABEL: fshl_i32:
 ; RV64IB:       # %bb.0:
+; RV64IB-NEXT:    andi a2, a2, 31
 ; RV64IB-NEXT:    fslw a0, a0, a1, a2
 ; RV64IB-NEXT:    ret
 ;
 ; RV64IBT-LABEL: fshl_i32:
 ; RV64IBT:       # %bb.0:
+; RV64IBT-NEXT:    andi a2, a2, 31
 ; RV64IBT-NEXT:    fslw a0, a0, a1, a2
 ; RV64IBT-NEXT:    ret
   %1 = tail call i32 @llvm.fshl.i32(i32 %a, i32 %b, i32 %c)
@@ -145,11 +147,13 @@ define i64 @fshl_i64(i64 %a, i64 %b, i64 %c) nounwind {
 ;
 ; RV64IB-LABEL: fshl_i64:
 ; RV64IB:       # %bb.0:
+; RV64IB-NEXT:    andi a2, a2, 63
 ; RV64IB-NEXT:    fsl a0, a0, a1, a2
 ; RV64IB-NEXT:    ret
 ;
 ; RV64IBT-LABEL: fshl_i64:
 ; RV64IBT:       # %bb.0:
+; RV64IBT-NEXT:    andi a2, a2, 63
 ; RV64IBT-NEXT:    fsl a0, a0, a1, a2
 ; RV64IBT-NEXT:    ret
   %1 = tail call i64 @llvm.fshl.i64(i64 %a, i64 %b, i64 %c)
@@ -172,11 +176,13 @@ define signext i32 @fshr_i32(i32 signext %a, i32 signext %b, i32 signext %c) nou
 ;
 ; RV64IB-LABEL: fshr_i32:
 ; RV64IB:       # %bb.0:
+; RV64IB-NEXT:    andi a2, a2, 31
 ; RV64IB-NEXT:    fsrw a0, a1, a0, a2
 ; RV64IB-NEXT:    ret
 ;
 ; RV64IBT-LABEL: fshr_i32:
 ; RV64IBT:       # %bb.0:
+; RV64IBT-NEXT:    andi a2, a2, 31
 ; RV64IBT-NEXT:    fsrw a0, a1, a0, a2
 ; RV64IBT-NEXT:    ret
   %1 = tail call i32 @llvm.fshr.i32(i32 %a, i32 %b, i32 %c)
@@ -197,11 +203,13 @@ define i64 @fshr_i64(i64 %a, i64 %b, i64 %c) nounwind {
 ;
 ; RV64IB-LABEL: fshr_i64:
 ; RV64IB:       # %bb.0:
+; RV64IB-NEXT:    andi a2, a2, 63
 ; RV64IB-NEXT:    fsr a0, a1, a0, a2
 ; RV64IB-NEXT:    ret
 ;
 ; RV64IBT-LABEL: fshr_i64:
 ; RV64IBT:       # %bb.0:
+; RV64IBT-NEXT:    andi a2, a2, 63
 ; RV64IBT-NEXT:    fsr a0, a1, a0, a2
 ; RV64IBT-NEXT:    ret
   %1 = tail call i64 @llvm.fshr.i64(i64 %a, i64 %b, i64 %c)