[llvm] r352169 - [RISCV] Custom-legalise 32-bit variable shifts on RV64

Alex Bradbury via llvm-commits llvm-commits at lists.llvm.org
Thu Jan 24 21:04:00 PST 2019


Author: asb
Date: Thu Jan 24 21:04:00 2019
New Revision: 352169

URL: http://llvm.org/viewvc/llvm-project?rev=352169&view=rev
Log:
[RISCV] Custom-legalise 32-bit variable shifts on RV64

The previous DAG combiner-based approach had an issue with infinite loops
between the target-dependent and target-independent combiner logic (see
PR40333). Although this was worked around in rL351806, the combiner-based
approach is still potentially brittle and can fail to select the 32-bit shift
variant when profitable to do so, as demonstrated in the pr40333.ll test case.

This patch instead introduces target-specific SelectionDAG nodes for
SHLW/SRLW/SRAW and custom-lowers variable i32 shifts to them. pr40333.ll is a
good example of how this approach can improve codegen.

This adds DAG combine that does SimplifyDemandedBits on the operands (only
lower 32-bits of first operand and lower 5 bits of second operand are read).
This seems better than implementing SimplifyDemandedBitsForTargetNode as there
is no guarantee that would be called (and it's not for e.g. the anyext return
test cases). Also implements ComputeNumSignBitsForTargetNode.

There are codegen changes in atomic-rmw.ll and atomic-cmpxchg.ll but the new
instruction sequences are semantically equivalent.

Differential Revision: https://reviews.llvm.org/D57085


Modified:
    llvm/trunk/lib/Target/RISCV/RISCVISelLowering.cpp
    llvm/trunk/lib/Target/RISCV/RISCVISelLowering.h
    llvm/trunk/lib/Target/RISCV/RISCVInstrInfo.td
    llvm/trunk/test/CodeGen/RISCV/atomic-cmpxchg.ll
    llvm/trunk/test/CodeGen/RISCV/atomic-rmw.ll
    llvm/trunk/test/CodeGen/RISCV/pr40333.ll

Modified: llvm/trunk/lib/Target/RISCV/RISCVISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/RISCV/RISCVISelLowering.cpp?rev=352169&r1=352168&r2=352169&view=diff
==============================================================================
--- llvm/trunk/lib/Target/RISCV/RISCVISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/RISCV/RISCVISelLowering.cpp Thu Jan 24 21:04:00 2019
@@ -80,10 +80,10 @@ RISCVTargetLowering::RISCVTargetLowering
     setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Expand);
 
   if (Subtarget.is64Bit()) {
-    setTargetDAGCombine(ISD::SHL);
-    setTargetDAGCombine(ISD::SRL);
-    setTargetDAGCombine(ISD::SRA);
     setTargetDAGCombine(ISD::ANY_EXTEND);
+    setOperationAction(ISD::SHL, MVT::i32, Custom);
+    setOperationAction(ISD::SRA, MVT::i32, Custom);
+    setOperationAction(ISD::SRL, MVT::i32, Custom);
   }
 
   if (!Subtarget.hasStdExtM()) {
@@ -512,15 +512,52 @@ SDValue RISCVTargetLowering::lowerRETURN
   return DAG.getCopyFromReg(DAG.getEntryNode(), DL, Reg, XLenVT);
 }
 
-// Return true if the given node is a shift with a non-constant shift amount.
-static bool isVariableShift(SDValue Val) {
-  switch (Val.getOpcode()) {
+// Returns the opcode of the target-specific SDNode that implements the 32-bit
+// form of the given Opcode.
+static RISCVISD::NodeType getRISCVWOpcode(unsigned Opcode) {
+  switch (Opcode) {
   default:
-    return false;
+    llvm_unreachable("Unexpected opcode");
   case ISD::SHL:
+    return RISCVISD::SLLW;
   case ISD::SRA:
+    return RISCVISD::SRAW;
   case ISD::SRL:
-    return Val.getOperand(1).getOpcode() != ISD::Constant;
+    return RISCVISD::SRLW;
+  }
+}
+
+// Converts the given 32-bit operation to a target-specific SelectionDAG node.
+// Because i32 isn't a legal type for RV64, these operations would otherwise
+// be promoted to i64, making it difficult to select the SLLW/DIVUW/.../*W
+// later one because the fact the operation was originally of type i32 is
+// lost.
+static SDValue customLegalizeToWOp(SDNode *N, SelectionDAG &DAG) {
+  SDLoc DL(N);
+  RISCVISD::NodeType WOpcode = getRISCVWOpcode(N->getOpcode());
+  SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
+  SDValue NewOp1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
+  SDValue NewRes = DAG.getNode(WOpcode, DL, MVT::i64, NewOp0, NewOp1);
+  // ReplaceNodeResults requires we maintain the same type for the return value.
+  return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewRes);
+}
+
+void RISCVTargetLowering::ReplaceNodeResults(SDNode *N,
+                                             SmallVectorImpl<SDValue> &Results,
+                                             SelectionDAG &DAG) const {
+  SDLoc DL(N);
+  switch (N->getOpcode()) {
+  default:
+    llvm_unreachable("Don't know how to custom type legalize this operation!");
+  case ISD::SHL:
+  case ISD::SRA:
+  case ISD::SRL:
+    assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
+           "Unexpected custom legalisation");
+    if (N->getOperand(1).getOpcode() == ISD::Constant)
+      return;
+    Results.push_back(customLegalizeToWOp(N, DAG));
+    break;
   }
 }
 
@@ -545,34 +582,14 @@ SDValue RISCVTargetLowering::PerformDAGC
   switch (N->getOpcode()) {
   default:
     break;
-  case ISD::SHL:
-  case ISD::SRL:
-  case ISD::SRA: {
-    assert(Subtarget.getXLen() == 64 && "Combine should be 64-bit only");
-    if (!DCI.isBeforeLegalize())
-      break;
-    SDValue RHS = N->getOperand(1);
-    if (N->getValueType(0) != MVT::i32 || RHS->getOpcode() == ISD::Constant ||
-        (RHS->getOpcode() == ISD::AssertZext &&
-         cast<VTSDNode>(RHS->getOperand(1))->getVT().getSizeInBits() <= 5))
-      break;
-    SDValue LHS = N->getOperand(0);
-    SDLoc DL(N);
-    SDValue NewRHS =
-        DAG.getNode(ISD::AssertZext, DL, RHS.getValueType(), RHS,
-                    DAG.getValueType(EVT::getIntegerVT(*DAG.getContext(), 5)));
-    return DCI.CombineTo(
-        N, DAG.getNode(N->getOpcode(), DL, LHS.getValueType(), LHS, NewRHS));
-  }
   case ISD::ANY_EXTEND: {
-    // If any-extending an i32 variable-length shift or sdiv/udiv/urem to i64,
-    // then instead sign-extend in order to increase the chance of being able
-    // to select the sllw/srlw/sraw/divw/divuw/remuw instructions.
+    // If any-extending an i32 sdiv/udiv/urem to i64, then instead sign-extend
+    // in order to increase the chance of being able to select the
+    // divw/divuw/remuw instructions.
     SDValue Src = N->getOperand(0);
     if (N->getValueType(0) != MVT::i64 || Src.getValueType() != MVT::i32)
       break;
-    if (!isVariableShift(Src) &&
-        !(Subtarget.hasStdExtM() && isVariableSDivUDivURem(Src)))
+    if (!(Subtarget.hasStdExtM() && isVariableSDivUDivURem(Src)))
       break;
     SDLoc DL(N);
     // Don't add the new node to the DAGCombiner worklist, in order to avoid
@@ -589,11 +606,42 @@ SDValue RISCVTargetLowering::PerformDAGC
       break;
     return DCI.CombineTo(N, Op0.getOperand(0), Op0.getOperand(1));
   }
+  case RISCVISD::SLLW:
+  case RISCVISD::SRAW:
+  case RISCVISD::SRLW: {
+    // Only the lower 32 bits of LHS and lower 5 bits of RHS are read.
+    SDValue LHS = N->getOperand(0);
+    SDValue RHS = N->getOperand(1);
+    APInt LHSMask = APInt::getLowBitsSet(LHS.getValueSizeInBits(), 32);
+    APInt RHSMask = APInt::getLowBitsSet(RHS.getValueSizeInBits(), 5);
+    if ((SimplifyDemandedBits(N->getOperand(0), LHSMask, DCI)) ||
+        (SimplifyDemandedBits(N->getOperand(1), RHSMask, DCI)))
+      return SDValue();
+    break;
+  }
   }
 
   return SDValue();
 }
 
+unsigned RISCVTargetLowering::ComputeNumSignBitsForTargetNode(
+    SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG,
+    unsigned Depth) const {
+  switch (Op.getOpcode()) {
+  default:
+    break;
+  case RISCVISD::SLLW:
+  case RISCVISD::SRAW:
+  case RISCVISD::SRLW:
+    // TODO: As the result is sign-extended, this is conservatively correct. A
+    // more precise answer could be calculated for SRAW depending on known
+    // bits in the shift amount.
+    return 33;
+  }
+
+  return 1;
+}
+
 static MachineBasicBlock *emitSplitF64Pseudo(MachineInstr &MI,
                                              MachineBasicBlock *BB) {
   assert(MI.getOpcode() == RISCV::SplitF64Pseudo && "Unexpected instruction");
@@ -1682,6 +1730,12 @@ const char *RISCVTargetLowering::getTarg
     return "RISCVISD::SplitF64";
   case RISCVISD::TAIL:
     return "RISCVISD::TAIL";
+  case RISCVISD::SLLW:
+    return "RISCVISD::SLLW";
+  case RISCVISD::SRAW:
+    return "RISCVISD::SRAW";
+  case RISCVISD::SRLW:
+    return "RISCVISD::SRLW";
   }
   return nullptr;
 }

Modified: llvm/trunk/lib/Target/RISCV/RISCVISelLowering.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/RISCV/RISCVISelLowering.h?rev=352169&r1=352168&r2=352169&view=diff
==============================================================================
--- llvm/trunk/lib/Target/RISCV/RISCVISelLowering.h (original)
+++ llvm/trunk/lib/Target/RISCV/RISCVISelLowering.h Thu Jan 24 21:04:00 2019
@@ -31,7 +31,12 @@ enum NodeType : unsigned {
   SELECT_CC,
   BuildPairF64,
   SplitF64,
-  TAIL
+  TAIL,
+  // RV64I shifts, directly matching the semantics of the named RISC-V
+  // instructions.
+  SLLW,
+  SRAW,
+  SRLW
 };
 }
 
@@ -57,9 +62,16 @@ public:
 
   // Provide custom lowering hooks for some operations.
   SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override;
+  void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue> &Results,
+                          SelectionDAG &DAG) const override;
 
   SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override;
 
+  unsigned ComputeNumSignBitsForTargetNode(SDValue Op,
+                                           const APInt &DemandedElts,
+                                           const SelectionDAG &DAG,
+                                           unsigned Depth) const override;
+
   // This method returns the name of a target specific DAG node.
   const char *getTargetNodeName(unsigned Opcode) const override;
 

Modified: llvm/trunk/lib/Target/RISCV/RISCVInstrInfo.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/RISCV/RISCVInstrInfo.td?rev=352169&r1=352168&r2=352169&view=diff
==============================================================================
--- llvm/trunk/lib/Target/RISCV/RISCVInstrInfo.td (original)
+++ llvm/trunk/lib/Target/RISCV/RISCVInstrInfo.td Thu Jan 24 21:04:00 2019
@@ -51,6 +51,9 @@ def riscv_selectcc  : SDNode<"RISCVISD::
 def riscv_tail      : SDNode<"RISCVISD::TAIL", SDT_RISCVCall,
                              [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
                               SDNPVariadic]>;
+def riscv_sllw      : SDNode<"RISCVISD::SLLW", SDTIntShiftOp>;
+def riscv_sraw      : SDNode<"RISCVISD::SRAW", SDTIntShiftOp>;
+def riscv_srlw      : SDNode<"RISCVISD::SRLW", SDTIntShiftOp>;
 
 //===----------------------------------------------------------------------===//
 // Operand and SDNode transformation definitions.
@@ -672,21 +675,9 @@ def sexti32 : PatFrags<(ops node:$src),
 def assertzexti32 : PatFrag<(ops node:$src), (assertzext node:$src), [{
   return cast<VTSDNode>(N->getOperand(1))->getVT() == MVT::i32;
 }]>;
-def assertzexti5 : PatFrag<(ops node:$src), (assertzext node:$src), [{
-  return cast<VTSDNode>(N->getOperand(1))->getVT().getSizeInBits() <= 5;
-}]>;
 def zexti32 : PatFrags<(ops node:$src),
                        [(and node:$src, 0xffffffff),
                         (assertzexti32 node:$src)]>;
-// Defines a legal mask for (assertzexti5 (and src, mask)) to be combinable
-// with a shiftw operation. The mask mustn't modify the lower 5 bits or the
-// upper 32 bits.
-def shiftwamt_mask : ImmLeaf<XLenVT, [{
-  return countTrailingOnes<uint64_t>(Imm) >= 5 && isUInt<32>(Imm);
-}]>;
-def shiftwamt : PatFrags<(ops node:$src),
-                         [(assertzexti5 (and node:$src, shiftwamt_mask)),
-                          (assertzexti5 node:$src)]>;
 
 /// Immediates
 
@@ -946,28 +937,9 @@ def : Pat<(sext_inreg (shl GPR:$rs1, uim
 def : Pat<(sra (sext_inreg GPR:$rs1, i32), uimm5:$shamt),
           (SRAIW GPR:$rs1, uimm5:$shamt)>;
 
-// For variable-length shifts, we rely on assertzexti5 being inserted during
-// lowering (see RISCVTargetLowering::PerformDAGCombine). This enables us to
-// guarantee that selecting a 32-bit variable shift is legal (as the variable
-// shift is known to be <= 32). We must also be careful not to create
-// semantically incorrect patterns. For instance, selecting SRLW for
-// (srl (zexti32 GPR:$rs1), (shiftwamt GPR:$rs2)),
-// is not guaranteed to be safe, as we don't know whether the upper 32-bits of
-// the result are used or not (in the case where rs2=0, this is a
-// sign-extension operation).
-
-def : Pat<(sext_inreg (shl GPR:$rs1, (shiftwamt GPR:$rs2)), i32),
-          (SLLW GPR:$rs1, GPR:$rs2)>;
-def : Pat<(zexti32 (shl GPR:$rs1, (shiftwamt GPR:$rs2))),
-          (SRLI (SLLI (SLLW GPR:$rs1, GPR:$rs2), 32), 32)>;
-
-def : Pat<(sext_inreg (srl (zexti32 GPR:$rs1), (shiftwamt GPR:$rs2)), i32),
-          (SRLW GPR:$rs1, GPR:$rs2)>;
-def : Pat<(zexti32 (srl (zexti32 GPR:$rs1), (shiftwamt GPR:$rs2))),
-          (SRLI (SLLI (SRLW GPR:$rs1, GPR:$rs2), 32), 32)>;
-
-def : Pat<(sra (sexti32 GPR:$rs1), (shiftwamt GPR:$rs2)),
-          (SRAW GPR:$rs1, GPR:$rs2)>;
+def : PatGprGpr<riscv_sllw, SLLW>;
+def : PatGprGpr<riscv_srlw, SRLW>;
+def : PatGprGpr<riscv_sraw, SRAW>;
 
 /// Loads
 

Modified: llvm/trunk/test/CodeGen/RISCV/atomic-cmpxchg.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/RISCV/atomic-cmpxchg.ll?rev=352169&r1=352168&r2=352169&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/RISCV/atomic-cmpxchg.ll (original)
+++ llvm/trunk/test/CodeGen/RISCV/atomic-cmpxchg.ll Thu Jan 24 21:04:00 2019
@@ -61,8 +61,8 @@ define void @cmpxchg_i8_monotonic_monoto
 ;
 ; RV64IA-LABEL: cmpxchg_i8_monotonic_monotonic:
 ; RV64IA:       # %bb.0:
-; RV64IA-NEXT:    andi a3, a0, 3
-; RV64IA-NEXT:    slli a3, a3, 3
+; RV64IA-NEXT:    slli a3, a0, 3
+; RV64IA-NEXT:    andi a3, a3, 24
 ; RV64IA-NEXT:    addi a4, zero, 255
 ; RV64IA-NEXT:    sllw a4, a4, a3
 ; RV64IA-NEXT:    andi a2, a2, 255
@@ -139,8 +139,8 @@ define void @cmpxchg_i8_acquire_monotoni
 ;
 ; RV64IA-LABEL: cmpxchg_i8_acquire_monotonic:
 ; RV64IA:       # %bb.0:
-; RV64IA-NEXT:    andi a3, a0, 3
-; RV64IA-NEXT:    slli a3, a3, 3
+; RV64IA-NEXT:    slli a3, a0, 3
+; RV64IA-NEXT:    andi a3, a3, 24
 ; RV64IA-NEXT:    addi a4, zero, 255
 ; RV64IA-NEXT:    sllw a4, a4, a3
 ; RV64IA-NEXT:    andi a2, a2, 255
@@ -217,8 +217,8 @@ define void @cmpxchg_i8_acquire_acquire(
 ;
 ; RV64IA-LABEL: cmpxchg_i8_acquire_acquire:
 ; RV64IA:       # %bb.0:
-; RV64IA-NEXT:    andi a3, a0, 3
-; RV64IA-NEXT:    slli a3, a3, 3
+; RV64IA-NEXT:    slli a3, a0, 3
+; RV64IA-NEXT:    andi a3, a3, 24
 ; RV64IA-NEXT:    addi a4, zero, 255
 ; RV64IA-NEXT:    sllw a4, a4, a3
 ; RV64IA-NEXT:    andi a2, a2, 255
@@ -295,8 +295,8 @@ define void @cmpxchg_i8_release_monotoni
 ;
 ; RV64IA-LABEL: cmpxchg_i8_release_monotonic:
 ; RV64IA:       # %bb.0:
-; RV64IA-NEXT:    andi a3, a0, 3
-; RV64IA-NEXT:    slli a3, a3, 3
+; RV64IA-NEXT:    slli a3, a0, 3
+; RV64IA-NEXT:    andi a3, a3, 24
 ; RV64IA-NEXT:    addi a4, zero, 255
 ; RV64IA-NEXT:    sllw a4, a4, a3
 ; RV64IA-NEXT:    andi a2, a2, 255
@@ -373,8 +373,8 @@ define void @cmpxchg_i8_release_acquire(
 ;
 ; RV64IA-LABEL: cmpxchg_i8_release_acquire:
 ; RV64IA:       # %bb.0:
-; RV64IA-NEXT:    andi a3, a0, 3
-; RV64IA-NEXT:    slli a3, a3, 3
+; RV64IA-NEXT:    slli a3, a0, 3
+; RV64IA-NEXT:    andi a3, a3, 24
 ; RV64IA-NEXT:    addi a4, zero, 255
 ; RV64IA-NEXT:    sllw a4, a4, a3
 ; RV64IA-NEXT:    andi a2, a2, 255
@@ -451,8 +451,8 @@ define void @cmpxchg_i8_acq_rel_monotoni
 ;
 ; RV64IA-LABEL: cmpxchg_i8_acq_rel_monotonic:
 ; RV64IA:       # %bb.0:
-; RV64IA-NEXT:    andi a3, a0, 3
-; RV64IA-NEXT:    slli a3, a3, 3
+; RV64IA-NEXT:    slli a3, a0, 3
+; RV64IA-NEXT:    andi a3, a3, 24
 ; RV64IA-NEXT:    addi a4, zero, 255
 ; RV64IA-NEXT:    sllw a4, a4, a3
 ; RV64IA-NEXT:    andi a2, a2, 255
@@ -529,8 +529,8 @@ define void @cmpxchg_i8_acq_rel_acquire(
 ;
 ; RV64IA-LABEL: cmpxchg_i8_acq_rel_acquire:
 ; RV64IA:       # %bb.0:
-; RV64IA-NEXT:    andi a3, a0, 3
-; RV64IA-NEXT:    slli a3, a3, 3
+; RV64IA-NEXT:    slli a3, a0, 3
+; RV64IA-NEXT:    andi a3, a3, 24
 ; RV64IA-NEXT:    addi a4, zero, 255
 ; RV64IA-NEXT:    sllw a4, a4, a3
 ; RV64IA-NEXT:    andi a2, a2, 255
@@ -607,8 +607,8 @@ define void @cmpxchg_i8_seq_cst_monotoni
 ;
 ; RV64IA-LABEL: cmpxchg_i8_seq_cst_monotonic:
 ; RV64IA:       # %bb.0:
-; RV64IA-NEXT:    andi a3, a0, 3
-; RV64IA-NEXT:    slli a3, a3, 3
+; RV64IA-NEXT:    slli a3, a0, 3
+; RV64IA-NEXT:    andi a3, a3, 24
 ; RV64IA-NEXT:    addi a4, zero, 255
 ; RV64IA-NEXT:    sllw a4, a4, a3
 ; RV64IA-NEXT:    andi a2, a2, 255
@@ -685,8 +685,8 @@ define void @cmpxchg_i8_seq_cst_acquire(
 ;
 ; RV64IA-LABEL: cmpxchg_i8_seq_cst_acquire:
 ; RV64IA:       # %bb.0:
-; RV64IA-NEXT:    andi a3, a0, 3
-; RV64IA-NEXT:    slli a3, a3, 3
+; RV64IA-NEXT:    slli a3, a0, 3
+; RV64IA-NEXT:    andi a3, a3, 24
 ; RV64IA-NEXT:    addi a4, zero, 255
 ; RV64IA-NEXT:    sllw a4, a4, a3
 ; RV64IA-NEXT:    andi a2, a2, 255
@@ -763,8 +763,8 @@ define void @cmpxchg_i8_seq_cst_seq_cst(
 ;
 ; RV64IA-LABEL: cmpxchg_i8_seq_cst_seq_cst:
 ; RV64IA:       # %bb.0:
-; RV64IA-NEXT:    andi a3, a0, 3
-; RV64IA-NEXT:    slli a3, a3, 3
+; RV64IA-NEXT:    slli a3, a0, 3
+; RV64IA-NEXT:    andi a3, a3, 24
 ; RV64IA-NEXT:    addi a4, zero, 255
 ; RV64IA-NEXT:    sllw a4, a4, a3
 ; RV64IA-NEXT:    andi a2, a2, 255
@@ -846,8 +846,8 @@ define void @cmpxchg_i16_monotonic_monot
 ; RV64IA-NEXT:    addiw a3, a3, -1
 ; RV64IA-NEXT:    and a1, a1, a3
 ; RV64IA-NEXT:    and a2, a2, a3
-; RV64IA-NEXT:    andi a4, a0, 3
-; RV64IA-NEXT:    slli a4, a4, 3
+; RV64IA-NEXT:    slli a4, a0, 3
+; RV64IA-NEXT:    andi a4, a4, 24
 ; RV64IA-NEXT:    sllw a3, a3, a4
 ; RV64IA-NEXT:    sllw a2, a2, a4
 ; RV64IA-NEXT:    sllw a1, a1, a4
@@ -926,8 +926,8 @@ define void @cmpxchg_i16_acquire_monoton
 ; RV64IA-NEXT:    addiw a3, a3, -1
 ; RV64IA-NEXT:    and a1, a1, a3
 ; RV64IA-NEXT:    and a2, a2, a3
-; RV64IA-NEXT:    andi a4, a0, 3
-; RV64IA-NEXT:    slli a4, a4, 3
+; RV64IA-NEXT:    slli a4, a0, 3
+; RV64IA-NEXT:    andi a4, a4, 24
 ; RV64IA-NEXT:    sllw a3, a3, a4
 ; RV64IA-NEXT:    sllw a2, a2, a4
 ; RV64IA-NEXT:    sllw a1, a1, a4
@@ -1006,8 +1006,8 @@ define void @cmpxchg_i16_acquire_acquire
 ; RV64IA-NEXT:    addiw a3, a3, -1
 ; RV64IA-NEXT:    and a1, a1, a3
 ; RV64IA-NEXT:    and a2, a2, a3
-; RV64IA-NEXT:    andi a4, a0, 3
-; RV64IA-NEXT:    slli a4, a4, 3
+; RV64IA-NEXT:    slli a4, a0, 3
+; RV64IA-NEXT:    andi a4, a4, 24
 ; RV64IA-NEXT:    sllw a3, a3, a4
 ; RV64IA-NEXT:    sllw a2, a2, a4
 ; RV64IA-NEXT:    sllw a1, a1, a4
@@ -1086,8 +1086,8 @@ define void @cmpxchg_i16_release_monoton
 ; RV64IA-NEXT:    addiw a3, a3, -1
 ; RV64IA-NEXT:    and a1, a1, a3
 ; RV64IA-NEXT:    and a2, a2, a3
-; RV64IA-NEXT:    andi a4, a0, 3
-; RV64IA-NEXT:    slli a4, a4, 3
+; RV64IA-NEXT:    slli a4, a0, 3
+; RV64IA-NEXT:    andi a4, a4, 24
 ; RV64IA-NEXT:    sllw a3, a3, a4
 ; RV64IA-NEXT:    sllw a2, a2, a4
 ; RV64IA-NEXT:    sllw a1, a1, a4
@@ -1166,8 +1166,8 @@ define void @cmpxchg_i16_release_acquire
 ; RV64IA-NEXT:    addiw a3, a3, -1
 ; RV64IA-NEXT:    and a1, a1, a3
 ; RV64IA-NEXT:    and a2, a2, a3
-; RV64IA-NEXT:    andi a4, a0, 3
-; RV64IA-NEXT:    slli a4, a4, 3
+; RV64IA-NEXT:    slli a4, a0, 3
+; RV64IA-NEXT:    andi a4, a4, 24
 ; RV64IA-NEXT:    sllw a3, a3, a4
 ; RV64IA-NEXT:    sllw a2, a2, a4
 ; RV64IA-NEXT:    sllw a1, a1, a4
@@ -1246,8 +1246,8 @@ define void @cmpxchg_i16_acq_rel_monoton
 ; RV64IA-NEXT:    addiw a3, a3, -1
 ; RV64IA-NEXT:    and a1, a1, a3
 ; RV64IA-NEXT:    and a2, a2, a3
-; RV64IA-NEXT:    andi a4, a0, 3
-; RV64IA-NEXT:    slli a4, a4, 3
+; RV64IA-NEXT:    slli a4, a0, 3
+; RV64IA-NEXT:    andi a4, a4, 24
 ; RV64IA-NEXT:    sllw a3, a3, a4
 ; RV64IA-NEXT:    sllw a2, a2, a4
 ; RV64IA-NEXT:    sllw a1, a1, a4
@@ -1326,8 +1326,8 @@ define void @cmpxchg_i16_acq_rel_acquire
 ; RV64IA-NEXT:    addiw a3, a3, -1
 ; RV64IA-NEXT:    and a1, a1, a3
 ; RV64IA-NEXT:    and a2, a2, a3
-; RV64IA-NEXT:    andi a4, a0, 3
-; RV64IA-NEXT:    slli a4, a4, 3
+; RV64IA-NEXT:    slli a4, a0, 3
+; RV64IA-NEXT:    andi a4, a4, 24
 ; RV64IA-NEXT:    sllw a3, a3, a4
 ; RV64IA-NEXT:    sllw a2, a2, a4
 ; RV64IA-NEXT:    sllw a1, a1, a4
@@ -1406,8 +1406,8 @@ define void @cmpxchg_i16_seq_cst_monoton
 ; RV64IA-NEXT:    addiw a3, a3, -1
 ; RV64IA-NEXT:    and a1, a1, a3
 ; RV64IA-NEXT:    and a2, a2, a3
-; RV64IA-NEXT:    andi a4, a0, 3
-; RV64IA-NEXT:    slli a4, a4, 3
+; RV64IA-NEXT:    slli a4, a0, 3
+; RV64IA-NEXT:    andi a4, a4, 24
 ; RV64IA-NEXT:    sllw a3, a3, a4
 ; RV64IA-NEXT:    sllw a2, a2, a4
 ; RV64IA-NEXT:    sllw a1, a1, a4
@@ -1486,8 +1486,8 @@ define void @cmpxchg_i16_seq_cst_acquire
 ; RV64IA-NEXT:    addiw a3, a3, -1
 ; RV64IA-NEXT:    and a1, a1, a3
 ; RV64IA-NEXT:    and a2, a2, a3
-; RV64IA-NEXT:    andi a4, a0, 3
-; RV64IA-NEXT:    slli a4, a4, 3
+; RV64IA-NEXT:    slli a4, a0, 3
+; RV64IA-NEXT:    andi a4, a4, 24
 ; RV64IA-NEXT:    sllw a3, a3, a4
 ; RV64IA-NEXT:    sllw a2, a2, a4
 ; RV64IA-NEXT:    sllw a1, a1, a4
@@ -1566,8 +1566,8 @@ define void @cmpxchg_i16_seq_cst_seq_cst
 ; RV64IA-NEXT:    addiw a3, a3, -1
 ; RV64IA-NEXT:    and a1, a1, a3
 ; RV64IA-NEXT:    and a2, a2, a3
-; RV64IA-NEXT:    andi a4, a0, 3
-; RV64IA-NEXT:    slli a4, a4, 3
+; RV64IA-NEXT:    slli a4, a0, 3
+; RV64IA-NEXT:    andi a4, a4, 24
 ; RV64IA-NEXT:    sllw a3, a3, a4
 ; RV64IA-NEXT:    sllw a2, a2, a4
 ; RV64IA-NEXT:    sllw a1, a1, a4

Modified: llvm/trunk/test/CodeGen/RISCV/atomic-rmw.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/RISCV/atomic-rmw.ll?rev=352169&r1=352168&r2=352169&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/RISCV/atomic-rmw.ll (original)
+++ llvm/trunk/test/CodeGen/RISCV/atomic-rmw.ll Thu Jan 24 21:04:00 2019
@@ -52,8 +52,8 @@ define i8 @atomicrmw_xchg_i8_monotonic(i
 ;
 ; RV64IA-LABEL: atomicrmw_xchg_i8_monotonic:
 ; RV64IA:       # %bb.0:
-; RV64IA-NEXT:    andi a2, a0, 3
-; RV64IA-NEXT:    slli a2, a2, 3
+; RV64IA-NEXT:    slli a2, a0, 3
+; RV64IA-NEXT:    andi a2, a2, 24
 ; RV64IA-NEXT:    addi a3, zero, 255
 ; RV64IA-NEXT:    sllw a3, a3, a2
 ; RV64IA-NEXT:    andi a1, a1, 255
@@ -118,8 +118,8 @@ define i8 @atomicrmw_xchg_i8_acquire(i8*
 ;
 ; RV64IA-LABEL: atomicrmw_xchg_i8_acquire:
 ; RV64IA:       # %bb.0:
-; RV64IA-NEXT:    andi a2, a0, 3
-; RV64IA-NEXT:    slli a2, a2, 3
+; RV64IA-NEXT:    slli a2, a0, 3
+; RV64IA-NEXT:    andi a2, a2, 24
 ; RV64IA-NEXT:    addi a3, zero, 255
 ; RV64IA-NEXT:    sllw a3, a3, a2
 ; RV64IA-NEXT:    andi a1, a1, 255
@@ -184,8 +184,8 @@ define i8 @atomicrmw_xchg_i8_release(i8*
 ;
 ; RV64IA-LABEL: atomicrmw_xchg_i8_release:
 ; RV64IA:       # %bb.0:
-; RV64IA-NEXT:    andi a2, a0, 3
-; RV64IA-NEXT:    slli a2, a2, 3
+; RV64IA-NEXT:    slli a2, a0, 3
+; RV64IA-NEXT:    andi a2, a2, 24
 ; RV64IA-NEXT:    addi a3, zero, 255
 ; RV64IA-NEXT:    sllw a3, a3, a2
 ; RV64IA-NEXT:    andi a1, a1, 255
@@ -250,8 +250,8 @@ define i8 @atomicrmw_xchg_i8_acq_rel(i8*
 ;
 ; RV64IA-LABEL: atomicrmw_xchg_i8_acq_rel:
 ; RV64IA:       # %bb.0:
-; RV64IA-NEXT:    andi a2, a0, 3
-; RV64IA-NEXT:    slli a2, a2, 3
+; RV64IA-NEXT:    slli a2, a0, 3
+; RV64IA-NEXT:    andi a2, a2, 24
 ; RV64IA-NEXT:    addi a3, zero, 255
 ; RV64IA-NEXT:    sllw a3, a3, a2
 ; RV64IA-NEXT:    andi a1, a1, 255
@@ -316,8 +316,8 @@ define i8 @atomicrmw_xchg_i8_seq_cst(i8*
 ;
 ; RV64IA-LABEL: atomicrmw_xchg_i8_seq_cst:
 ; RV64IA:       # %bb.0:
-; RV64IA-NEXT:    andi a2, a0, 3
-; RV64IA-NEXT:    slli a2, a2, 3
+; RV64IA-NEXT:    slli a2, a0, 3
+; RV64IA-NEXT:    andi a2, a2, 24
 ; RV64IA-NEXT:    addi a3, zero, 255
 ; RV64IA-NEXT:    sllw a3, a3, a2
 ; RV64IA-NEXT:    andi a1, a1, 255
@@ -382,8 +382,8 @@ define i8 @atomicrmw_add_i8_monotonic(i8
 ;
 ; RV64IA-LABEL: atomicrmw_add_i8_monotonic:
 ; RV64IA:       # %bb.0:
-; RV64IA-NEXT:    andi a2, a0, 3
-; RV64IA-NEXT:    slli a2, a2, 3
+; RV64IA-NEXT:    slli a2, a0, 3
+; RV64IA-NEXT:    andi a2, a2, 24
 ; RV64IA-NEXT:    addi a3, zero, 255
 ; RV64IA-NEXT:    sllw a3, a3, a2
 ; RV64IA-NEXT:    andi a1, a1, 255
@@ -448,8 +448,8 @@ define i8 @atomicrmw_add_i8_acquire(i8 *
 ;
 ; RV64IA-LABEL: atomicrmw_add_i8_acquire:
 ; RV64IA:       # %bb.0:
-; RV64IA-NEXT:    andi a2, a0, 3
-; RV64IA-NEXT:    slli a2, a2, 3
+; RV64IA-NEXT:    slli a2, a0, 3
+; RV64IA-NEXT:    andi a2, a2, 24
 ; RV64IA-NEXT:    addi a3, zero, 255
 ; RV64IA-NEXT:    sllw a3, a3, a2
 ; RV64IA-NEXT:    andi a1, a1, 255
@@ -514,8 +514,8 @@ define i8 @atomicrmw_add_i8_release(i8 *
 ;
 ; RV64IA-LABEL: atomicrmw_add_i8_release:
 ; RV64IA:       # %bb.0:
-; RV64IA-NEXT:    andi a2, a0, 3
-; RV64IA-NEXT:    slli a2, a2, 3
+; RV64IA-NEXT:    slli a2, a0, 3
+; RV64IA-NEXT:    andi a2, a2, 24
 ; RV64IA-NEXT:    addi a3, zero, 255
 ; RV64IA-NEXT:    sllw a3, a3, a2
 ; RV64IA-NEXT:    andi a1, a1, 255
@@ -580,8 +580,8 @@ define i8 @atomicrmw_add_i8_acq_rel(i8 *
 ;
 ; RV64IA-LABEL: atomicrmw_add_i8_acq_rel:
 ; RV64IA:       # %bb.0:
-; RV64IA-NEXT:    andi a2, a0, 3
-; RV64IA-NEXT:    slli a2, a2, 3
+; RV64IA-NEXT:    slli a2, a0, 3
+; RV64IA-NEXT:    andi a2, a2, 24
 ; RV64IA-NEXT:    addi a3, zero, 255
 ; RV64IA-NEXT:    sllw a3, a3, a2
 ; RV64IA-NEXT:    andi a1, a1, 255
@@ -646,8 +646,8 @@ define i8 @atomicrmw_add_i8_seq_cst(i8 *
 ;
 ; RV64IA-LABEL: atomicrmw_add_i8_seq_cst:
 ; RV64IA:       # %bb.0:
-; RV64IA-NEXT:    andi a2, a0, 3
-; RV64IA-NEXT:    slli a2, a2, 3
+; RV64IA-NEXT:    slli a2, a0, 3
+; RV64IA-NEXT:    andi a2, a2, 24
 ; RV64IA-NEXT:    addi a3, zero, 255
 ; RV64IA-NEXT:    sllw a3, a3, a2
 ; RV64IA-NEXT:    andi a1, a1, 255
@@ -712,8 +712,8 @@ define i8 @atomicrmw_sub_i8_monotonic(i8
 ;
 ; RV64IA-LABEL: atomicrmw_sub_i8_monotonic:
 ; RV64IA:       # %bb.0:
-; RV64IA-NEXT:    andi a2, a0, 3
-; RV64IA-NEXT:    slli a2, a2, 3
+; RV64IA-NEXT:    slli a2, a0, 3
+; RV64IA-NEXT:    andi a2, a2, 24
 ; RV64IA-NEXT:    addi a3, zero, 255
 ; RV64IA-NEXT:    sllw a3, a3, a2
 ; RV64IA-NEXT:    andi a1, a1, 255
@@ -778,8 +778,8 @@ define i8 @atomicrmw_sub_i8_acquire(i8*
 ;
 ; RV64IA-LABEL: atomicrmw_sub_i8_acquire:
 ; RV64IA:       # %bb.0:
-; RV64IA-NEXT:    andi a2, a0, 3
-; RV64IA-NEXT:    slli a2, a2, 3
+; RV64IA-NEXT:    slli a2, a0, 3
+; RV64IA-NEXT:    andi a2, a2, 24
 ; RV64IA-NEXT:    addi a3, zero, 255
 ; RV64IA-NEXT:    sllw a3, a3, a2
 ; RV64IA-NEXT:    andi a1, a1, 255
@@ -844,8 +844,8 @@ define i8 @atomicrmw_sub_i8_release(i8*
 ;
 ; RV64IA-LABEL: atomicrmw_sub_i8_release:
 ; RV64IA:       # %bb.0:
-; RV64IA-NEXT:    andi a2, a0, 3
-; RV64IA-NEXT:    slli a2, a2, 3
+; RV64IA-NEXT:    slli a2, a0, 3
+; RV64IA-NEXT:    andi a2, a2, 24
 ; RV64IA-NEXT:    addi a3, zero, 255
 ; RV64IA-NEXT:    sllw a3, a3, a2
 ; RV64IA-NEXT:    andi a1, a1, 255
@@ -910,8 +910,8 @@ define i8 @atomicrmw_sub_i8_acq_rel(i8*
 ;
 ; RV64IA-LABEL: atomicrmw_sub_i8_acq_rel:
 ; RV64IA:       # %bb.0:
-; RV64IA-NEXT:    andi a2, a0, 3
-; RV64IA-NEXT:    slli a2, a2, 3
+; RV64IA-NEXT:    slli a2, a0, 3
+; RV64IA-NEXT:    andi a2, a2, 24
 ; RV64IA-NEXT:    addi a3, zero, 255
 ; RV64IA-NEXT:    sllw a3, a3, a2
 ; RV64IA-NEXT:    andi a1, a1, 255
@@ -976,8 +976,8 @@ define i8 @atomicrmw_sub_i8_seq_cst(i8*
 ;
 ; RV64IA-LABEL: atomicrmw_sub_i8_seq_cst:
 ; RV64IA:       # %bb.0:
-; RV64IA-NEXT:    andi a2, a0, 3
-; RV64IA-NEXT:    slli a2, a2, 3
+; RV64IA-NEXT:    slli a2, a0, 3
+; RV64IA-NEXT:    andi a2, a2, 24
 ; RV64IA-NEXT:    addi a3, zero, 255
 ; RV64IA-NEXT:    sllw a3, a3, a2
 ; RV64IA-NEXT:    andi a1, a1, 255
@@ -1037,11 +1037,11 @@ define i8 @atomicrmw_and_i8_monotonic(i8
 ; RV64IA-LABEL: atomicrmw_and_i8_monotonic:
 ; RV64IA:       # %bb.0:
 ; RV64IA-NEXT:    andi a1, a1, 255
-; RV64IA-NEXT:    andi a2, a0, 3
-; RV64IA-NEXT:    slli a2, a2, 3
-; RV64IA-NEXT:    sll a1, a1, a2
+; RV64IA-NEXT:    slli a2, a0, 3
+; RV64IA-NEXT:    andi a2, a2, 24
+; RV64IA-NEXT:    sllw a1, a1, a2
 ; RV64IA-NEXT:    addi a3, zero, 255
-; RV64IA-NEXT:    sll a3, a3, a2
+; RV64IA-NEXT:    sllw a3, a3, a2
 ; RV64IA-NEXT:    not a3, a3
 ; RV64IA-NEXT:    or a1, a3, a1
 ; RV64IA-NEXT:    andi a0, a0, -4
@@ -1091,11 +1091,11 @@ define i8 @atomicrmw_and_i8_acquire(i8 *
 ; RV64IA-LABEL: atomicrmw_and_i8_acquire:
 ; RV64IA:       # %bb.0:
 ; RV64IA-NEXT:    andi a1, a1, 255
-; RV64IA-NEXT:    andi a2, a0, 3
-; RV64IA-NEXT:    slli a2, a2, 3
-; RV64IA-NEXT:    sll a1, a1, a2
+; RV64IA-NEXT:    slli a2, a0, 3
+; RV64IA-NEXT:    andi a2, a2, 24
+; RV64IA-NEXT:    sllw a1, a1, a2
 ; RV64IA-NEXT:    addi a3, zero, 255
-; RV64IA-NEXT:    sll a3, a3, a2
+; RV64IA-NEXT:    sllw a3, a3, a2
 ; RV64IA-NEXT:    not a3, a3
 ; RV64IA-NEXT:    or a1, a3, a1
 ; RV64IA-NEXT:    andi a0, a0, -4
@@ -1145,11 +1145,11 @@ define i8 @atomicrmw_and_i8_release(i8 *
 ; RV64IA-LABEL: atomicrmw_and_i8_release:
 ; RV64IA:       # %bb.0:
 ; RV64IA-NEXT:    andi a1, a1, 255
-; RV64IA-NEXT:    andi a2, a0, 3
-; RV64IA-NEXT:    slli a2, a2, 3
-; RV64IA-NEXT:    sll a1, a1, a2
+; RV64IA-NEXT:    slli a2, a0, 3
+; RV64IA-NEXT:    andi a2, a2, 24
+; RV64IA-NEXT:    sllw a1, a1, a2
 ; RV64IA-NEXT:    addi a3, zero, 255
-; RV64IA-NEXT:    sll a3, a3, a2
+; RV64IA-NEXT:    sllw a3, a3, a2
 ; RV64IA-NEXT:    not a3, a3
 ; RV64IA-NEXT:    or a1, a3, a1
 ; RV64IA-NEXT:    andi a0, a0, -4
@@ -1199,11 +1199,11 @@ define i8 @atomicrmw_and_i8_acq_rel(i8 *
 ; RV64IA-LABEL: atomicrmw_and_i8_acq_rel:
 ; RV64IA:       # %bb.0:
 ; RV64IA-NEXT:    andi a1, a1, 255
-; RV64IA-NEXT:    andi a2, a0, 3
-; RV64IA-NEXT:    slli a2, a2, 3
-; RV64IA-NEXT:    sll a1, a1, a2
+; RV64IA-NEXT:    slli a2, a0, 3
+; RV64IA-NEXT:    andi a2, a2, 24
+; RV64IA-NEXT:    sllw a1, a1, a2
 ; RV64IA-NEXT:    addi a3, zero, 255
-; RV64IA-NEXT:    sll a3, a3, a2
+; RV64IA-NEXT:    sllw a3, a3, a2
 ; RV64IA-NEXT:    not a3, a3
 ; RV64IA-NEXT:    or a1, a3, a1
 ; RV64IA-NEXT:    andi a0, a0, -4
@@ -1253,11 +1253,11 @@ define i8 @atomicrmw_and_i8_seq_cst(i8 *
 ; RV64IA-LABEL: atomicrmw_and_i8_seq_cst:
 ; RV64IA:       # %bb.0:
 ; RV64IA-NEXT:    andi a1, a1, 255
-; RV64IA-NEXT:    andi a2, a0, 3
-; RV64IA-NEXT:    slli a2, a2, 3
-; RV64IA-NEXT:    sll a1, a1, a2
+; RV64IA-NEXT:    slli a2, a0, 3
+; RV64IA-NEXT:    andi a2, a2, 24
+; RV64IA-NEXT:    sllw a1, a1, a2
 ; RV64IA-NEXT:    addi a3, zero, 255
-; RV64IA-NEXT:    sll a3, a3, a2
+; RV64IA-NEXT:    sllw a3, a3, a2
 ; RV64IA-NEXT:    not a3, a3
 ; RV64IA-NEXT:    or a1, a3, a1
 ; RV64IA-NEXT:    andi a0, a0, -4
@@ -1313,8 +1313,8 @@ define i8 @atomicrmw_nand_i8_monotonic(i
 ;
 ; RV64IA-LABEL: atomicrmw_nand_i8_monotonic:
 ; RV64IA:       # %bb.0:
-; RV64IA-NEXT:    andi a2, a0, 3
-; RV64IA-NEXT:    slli a2, a2, 3
+; RV64IA-NEXT:    slli a2, a0, 3
+; RV64IA-NEXT:    andi a2, a2, 24
 ; RV64IA-NEXT:    addi a3, zero, 255
 ; RV64IA-NEXT:    sllw a3, a3, a2
 ; RV64IA-NEXT:    andi a1, a1, 255
@@ -1381,8 +1381,8 @@ define i8 @atomicrmw_nand_i8_acquire(i8*
 ;
 ; RV64IA-LABEL: atomicrmw_nand_i8_acquire:
 ; RV64IA:       # %bb.0:
-; RV64IA-NEXT:    andi a2, a0, 3
-; RV64IA-NEXT:    slli a2, a2, 3
+; RV64IA-NEXT:    slli a2, a0, 3
+; RV64IA-NEXT:    andi a2, a2, 24
 ; RV64IA-NEXT:    addi a3, zero, 255
 ; RV64IA-NEXT:    sllw a3, a3, a2
 ; RV64IA-NEXT:    andi a1, a1, 255
@@ -1449,8 +1449,8 @@ define i8 @atomicrmw_nand_i8_release(i8*
 ;
 ; RV64IA-LABEL: atomicrmw_nand_i8_release:
 ; RV64IA:       # %bb.0:
-; RV64IA-NEXT:    andi a2, a0, 3
-; RV64IA-NEXT:    slli a2, a2, 3
+; RV64IA-NEXT:    slli a2, a0, 3
+; RV64IA-NEXT:    andi a2, a2, 24
 ; RV64IA-NEXT:    addi a3, zero, 255
 ; RV64IA-NEXT:    sllw a3, a3, a2
 ; RV64IA-NEXT:    andi a1, a1, 255
@@ -1517,8 +1517,8 @@ define i8 @atomicrmw_nand_i8_acq_rel(i8*
 ;
 ; RV64IA-LABEL: atomicrmw_nand_i8_acq_rel:
 ; RV64IA:       # %bb.0:
-; RV64IA-NEXT:    andi a2, a0, 3
-; RV64IA-NEXT:    slli a2, a2, 3
+; RV64IA-NEXT:    slli a2, a0, 3
+; RV64IA-NEXT:    andi a2, a2, 24
 ; RV64IA-NEXT:    addi a3, zero, 255
 ; RV64IA-NEXT:    sllw a3, a3, a2
 ; RV64IA-NEXT:    andi a1, a1, 255
@@ -1585,8 +1585,8 @@ define i8 @atomicrmw_nand_i8_seq_cst(i8*
 ;
 ; RV64IA-LABEL: atomicrmw_nand_i8_seq_cst:
 ; RV64IA:       # %bb.0:
-; RV64IA-NEXT:    andi a2, a0, 3
-; RV64IA-NEXT:    slli a2, a2, 3
+; RV64IA-NEXT:    slli a2, a0, 3
+; RV64IA-NEXT:    andi a2, a2, 24
 ; RV64IA-NEXT:    addi a3, zero, 255
 ; RV64IA-NEXT:    sllw a3, a3, a2
 ; RV64IA-NEXT:    andi a1, a1, 255
@@ -1643,9 +1643,9 @@ define i8 @atomicrmw_or_i8_monotonic(i8
 ; RV64IA-LABEL: atomicrmw_or_i8_monotonic:
 ; RV64IA:       # %bb.0:
 ; RV64IA-NEXT:    andi a1, a1, 255
-; RV64IA-NEXT:    andi a2, a0, 3
-; RV64IA-NEXT:    slli a2, a2, 3
-; RV64IA-NEXT:    sll a1, a1, a2
+; RV64IA-NEXT:    slli a2, a0, 3
+; RV64IA-NEXT:    andi a2, a2, 24
+; RV64IA-NEXT:    sllw a1, a1, a2
 ; RV64IA-NEXT:    andi a0, a0, -4
 ; RV64IA-NEXT:    amoor.w a0, a1, (a0)
 ; RV64IA-NEXT:    srlw a0, a0, a2
@@ -1689,9 +1689,9 @@ define i8 @atomicrmw_or_i8_acquire(i8 *%
 ; RV64IA-LABEL: atomicrmw_or_i8_acquire:
 ; RV64IA:       # %bb.0:
 ; RV64IA-NEXT:    andi a1, a1, 255
-; RV64IA-NEXT:    andi a2, a0, 3
-; RV64IA-NEXT:    slli a2, a2, 3
-; RV64IA-NEXT:    sll a1, a1, a2
+; RV64IA-NEXT:    slli a2, a0, 3
+; RV64IA-NEXT:    andi a2, a2, 24
+; RV64IA-NEXT:    sllw a1, a1, a2
 ; RV64IA-NEXT:    andi a0, a0, -4
 ; RV64IA-NEXT:    amoor.w.aq a0, a1, (a0)
 ; RV64IA-NEXT:    srlw a0, a0, a2
@@ -1735,9 +1735,9 @@ define i8 @atomicrmw_or_i8_release(i8 *%
 ; RV64IA-LABEL: atomicrmw_or_i8_release:
 ; RV64IA:       # %bb.0:
 ; RV64IA-NEXT:    andi a1, a1, 255
-; RV64IA-NEXT:    andi a2, a0, 3
-; RV64IA-NEXT:    slli a2, a2, 3
-; RV64IA-NEXT:    sll a1, a1, a2
+; RV64IA-NEXT:    slli a2, a0, 3
+; RV64IA-NEXT:    andi a2, a2, 24
+; RV64IA-NEXT:    sllw a1, a1, a2
 ; RV64IA-NEXT:    andi a0, a0, -4
 ; RV64IA-NEXT:    amoor.w.rl a0, a1, (a0)
 ; RV64IA-NEXT:    srlw a0, a0, a2
@@ -1781,9 +1781,9 @@ define i8 @atomicrmw_or_i8_acq_rel(i8 *%
 ; RV64IA-LABEL: atomicrmw_or_i8_acq_rel:
 ; RV64IA:       # %bb.0:
 ; RV64IA-NEXT:    andi a1, a1, 255
-; RV64IA-NEXT:    andi a2, a0, 3
-; RV64IA-NEXT:    slli a2, a2, 3
-; RV64IA-NEXT:    sll a1, a1, a2
+; RV64IA-NEXT:    slli a2, a0, 3
+; RV64IA-NEXT:    andi a2, a2, 24
+; RV64IA-NEXT:    sllw a1, a1, a2
 ; RV64IA-NEXT:    andi a0, a0, -4
 ; RV64IA-NEXT:    amoor.w.aqrl a0, a1, (a0)
 ; RV64IA-NEXT:    srlw a0, a0, a2
@@ -1827,9 +1827,9 @@ define i8 @atomicrmw_or_i8_seq_cst(i8 *%
 ; RV64IA-LABEL: atomicrmw_or_i8_seq_cst:
 ; RV64IA:       # %bb.0:
 ; RV64IA-NEXT:    andi a1, a1, 255
-; RV64IA-NEXT:    andi a2, a0, 3
-; RV64IA-NEXT:    slli a2, a2, 3
-; RV64IA-NEXT:    sll a1, a1, a2
+; RV64IA-NEXT:    slli a2, a0, 3
+; RV64IA-NEXT:    andi a2, a2, 24
+; RV64IA-NEXT:    sllw a1, a1, a2
 ; RV64IA-NEXT:    andi a0, a0, -4
 ; RV64IA-NEXT:    amoor.w.aqrl a0, a1, (a0)
 ; RV64IA-NEXT:    srlw a0, a0, a2
@@ -1873,9 +1873,9 @@ define i8 @atomicrmw_xor_i8_monotonic(i8
 ; RV64IA-LABEL: atomicrmw_xor_i8_monotonic:
 ; RV64IA:       # %bb.0:
 ; RV64IA-NEXT:    andi a1, a1, 255
-; RV64IA-NEXT:    andi a2, a0, 3
-; RV64IA-NEXT:    slli a2, a2, 3
-; RV64IA-NEXT:    sll a1, a1, a2
+; RV64IA-NEXT:    slli a2, a0, 3
+; RV64IA-NEXT:    andi a2, a2, 24
+; RV64IA-NEXT:    sllw a1, a1, a2
 ; RV64IA-NEXT:    andi a0, a0, -4
 ; RV64IA-NEXT:    amoxor.w a0, a1, (a0)
 ; RV64IA-NEXT:    srlw a0, a0, a2
@@ -1919,9 +1919,9 @@ define i8 @atomicrmw_xor_i8_acquire(i8 *
 ; RV64IA-LABEL: atomicrmw_xor_i8_acquire:
 ; RV64IA:       # %bb.0:
 ; RV64IA-NEXT:    andi a1, a1, 255
-; RV64IA-NEXT:    andi a2, a0, 3
-; RV64IA-NEXT:    slli a2, a2, 3
-; RV64IA-NEXT:    sll a1, a1, a2
+; RV64IA-NEXT:    slli a2, a0, 3
+; RV64IA-NEXT:    andi a2, a2, 24
+; RV64IA-NEXT:    sllw a1, a1, a2
 ; RV64IA-NEXT:    andi a0, a0, -4
 ; RV64IA-NEXT:    amoxor.w.aq a0, a1, (a0)
 ; RV64IA-NEXT:    srlw a0, a0, a2
@@ -1965,9 +1965,9 @@ define i8 @atomicrmw_xor_i8_release(i8 *
 ; RV64IA-LABEL: atomicrmw_xor_i8_release:
 ; RV64IA:       # %bb.0:
 ; RV64IA-NEXT:    andi a1, a1, 255
-; RV64IA-NEXT:    andi a2, a0, 3
-; RV64IA-NEXT:    slli a2, a2, 3
-; RV64IA-NEXT:    sll a1, a1, a2
+; RV64IA-NEXT:    slli a2, a0, 3
+; RV64IA-NEXT:    andi a2, a2, 24
+; RV64IA-NEXT:    sllw a1, a1, a2
 ; RV64IA-NEXT:    andi a0, a0, -4
 ; RV64IA-NEXT:    amoxor.w.rl a0, a1, (a0)
 ; RV64IA-NEXT:    srlw a0, a0, a2
@@ -2011,9 +2011,9 @@ define i8 @atomicrmw_xor_i8_acq_rel(i8 *
 ; RV64IA-LABEL: atomicrmw_xor_i8_acq_rel:
 ; RV64IA:       # %bb.0:
 ; RV64IA-NEXT:    andi a1, a1, 255
-; RV64IA-NEXT:    andi a2, a0, 3
-; RV64IA-NEXT:    slli a2, a2, 3
-; RV64IA-NEXT:    sll a1, a1, a2
+; RV64IA-NEXT:    slli a2, a0, 3
+; RV64IA-NEXT:    andi a2, a2, 24
+; RV64IA-NEXT:    sllw a1, a1, a2
 ; RV64IA-NEXT:    andi a0, a0, -4
 ; RV64IA-NEXT:    amoxor.w.aqrl a0, a1, (a0)
 ; RV64IA-NEXT:    srlw a0, a0, a2
@@ -2057,9 +2057,9 @@ define i8 @atomicrmw_xor_i8_seq_cst(i8 *
 ; RV64IA-LABEL: atomicrmw_xor_i8_seq_cst:
 ; RV64IA:       # %bb.0:
 ; RV64IA-NEXT:    andi a1, a1, 255
-; RV64IA-NEXT:    andi a2, a0, 3
-; RV64IA-NEXT:    slli a2, a2, 3
-; RV64IA-NEXT:    sll a1, a1, a2
+; RV64IA-NEXT:    slli a2, a0, 3
+; RV64IA-NEXT:    andi a2, a2, 24
+; RV64IA-NEXT:    sllw a1, a1, a2
 ; RV64IA-NEXT:    andi a0, a0, -4
 ; RV64IA-NEXT:    amoxor.w.aqrl a0, a1, (a0)
 ; RV64IA-NEXT:    srlw a0, a0, a2
@@ -3738,8 +3738,8 @@ define i8 @atomicrmw_umax_i8_monotonic(i
 ;
 ; RV64IA-LABEL: atomicrmw_umax_i8_monotonic:
 ; RV64IA:       # %bb.0:
-; RV64IA-NEXT:    andi a2, a0, 3
-; RV64IA-NEXT:    slli a2, a2, 3
+; RV64IA-NEXT:    slli a2, a0, 3
+; RV64IA-NEXT:    andi a2, a2, 24
 ; RV64IA-NEXT:    addi a3, zero, 255
 ; RV64IA-NEXT:    sllw a6, a3, a2
 ; RV64IA-NEXT:    andi a1, a1, 255
@@ -3880,8 +3880,8 @@ define i8 @atomicrmw_umax_i8_acquire(i8
 ;
 ; RV64IA-LABEL: atomicrmw_umax_i8_acquire:
 ; RV64IA:       # %bb.0:
-; RV64IA-NEXT:    andi a2, a0, 3
-; RV64IA-NEXT:    slli a2, a2, 3
+; RV64IA-NEXT:    slli a2, a0, 3
+; RV64IA-NEXT:    andi a2, a2, 24
 ; RV64IA-NEXT:    addi a3, zero, 255
 ; RV64IA-NEXT:    sllw a6, a3, a2
 ; RV64IA-NEXT:    andi a1, a1, 255
@@ -4022,8 +4022,8 @@ define i8 @atomicrmw_umax_i8_release(i8
 ;
 ; RV64IA-LABEL: atomicrmw_umax_i8_release:
 ; RV64IA:       # %bb.0:
-; RV64IA-NEXT:    andi a2, a0, 3
-; RV64IA-NEXT:    slli a2, a2, 3
+; RV64IA-NEXT:    slli a2, a0, 3
+; RV64IA-NEXT:    andi a2, a2, 24
 ; RV64IA-NEXT:    addi a3, zero, 255
 ; RV64IA-NEXT:    sllw a6, a3, a2
 ; RV64IA-NEXT:    andi a1, a1, 255
@@ -4170,8 +4170,8 @@ define i8 @atomicrmw_umax_i8_acq_rel(i8
 ;
 ; RV64IA-LABEL: atomicrmw_umax_i8_acq_rel:
 ; RV64IA:       # %bb.0:
-; RV64IA-NEXT:    andi a2, a0, 3
-; RV64IA-NEXT:    slli a2, a2, 3
+; RV64IA-NEXT:    slli a2, a0, 3
+; RV64IA-NEXT:    andi a2, a2, 24
 ; RV64IA-NEXT:    addi a3, zero, 255
 ; RV64IA-NEXT:    sllw a6, a3, a2
 ; RV64IA-NEXT:    andi a1, a1, 255
@@ -4312,8 +4312,8 @@ define i8 @atomicrmw_umax_i8_seq_cst(i8
 ;
 ; RV64IA-LABEL: atomicrmw_umax_i8_seq_cst:
 ; RV64IA:       # %bb.0:
-; RV64IA-NEXT:    andi a2, a0, 3
-; RV64IA-NEXT:    slli a2, a2, 3
+; RV64IA-NEXT:    slli a2, a0, 3
+; RV64IA-NEXT:    andi a2, a2, 24
 ; RV64IA-NEXT:    addi a3, zero, 255
 ; RV64IA-NEXT:    sllw a6, a3, a2
 ; RV64IA-NEXT:    andi a1, a1, 255
@@ -4448,8 +4448,8 @@ define i8 @atomicrmw_umin_i8_monotonic(i
 ;
 ; RV64IA-LABEL: atomicrmw_umin_i8_monotonic:
 ; RV64IA:       # %bb.0:
-; RV64IA-NEXT:    andi a2, a0, 3
-; RV64IA-NEXT:    slli a2, a2, 3
+; RV64IA-NEXT:    slli a2, a0, 3
+; RV64IA-NEXT:    andi a2, a2, 24
 ; RV64IA-NEXT:    addi a3, zero, 255
 ; RV64IA-NEXT:    sllw a6, a3, a2
 ; RV64IA-NEXT:    andi a1, a1, 255
@@ -4590,8 +4590,8 @@ define i8 @atomicrmw_umin_i8_acquire(i8
 ;
 ; RV64IA-LABEL: atomicrmw_umin_i8_acquire:
 ; RV64IA:       # %bb.0:
-; RV64IA-NEXT:    andi a2, a0, 3
-; RV64IA-NEXT:    slli a2, a2, 3
+; RV64IA-NEXT:    slli a2, a0, 3
+; RV64IA-NEXT:    andi a2, a2, 24
 ; RV64IA-NEXT:    addi a3, zero, 255
 ; RV64IA-NEXT:    sllw a6, a3, a2
 ; RV64IA-NEXT:    andi a1, a1, 255
@@ -4732,8 +4732,8 @@ define i8 @atomicrmw_umin_i8_release(i8
 ;
 ; RV64IA-LABEL: atomicrmw_umin_i8_release:
 ; RV64IA:       # %bb.0:
-; RV64IA-NEXT:    andi a2, a0, 3
-; RV64IA-NEXT:    slli a2, a2, 3
+; RV64IA-NEXT:    slli a2, a0, 3
+; RV64IA-NEXT:    andi a2, a2, 24
 ; RV64IA-NEXT:    addi a3, zero, 255
 ; RV64IA-NEXT:    sllw a6, a3, a2
 ; RV64IA-NEXT:    andi a1, a1, 255
@@ -4880,8 +4880,8 @@ define i8 @atomicrmw_umin_i8_acq_rel(i8
 ;
 ; RV64IA-LABEL: atomicrmw_umin_i8_acq_rel:
 ; RV64IA:       # %bb.0:
-; RV64IA-NEXT:    andi a2, a0, 3
-; RV64IA-NEXT:    slli a2, a2, 3
+; RV64IA-NEXT:    slli a2, a0, 3
+; RV64IA-NEXT:    andi a2, a2, 24
 ; RV64IA-NEXT:    addi a3, zero, 255
 ; RV64IA-NEXT:    sllw a6, a3, a2
 ; RV64IA-NEXT:    andi a1, a1, 255
@@ -5022,8 +5022,8 @@ define i8 @atomicrmw_umin_i8_seq_cst(i8
 ;
 ; RV64IA-LABEL: atomicrmw_umin_i8_seq_cst:
 ; RV64IA:       # %bb.0:
-; RV64IA-NEXT:    andi a2, a0, 3
-; RV64IA-NEXT:    slli a2, a2, 3
+; RV64IA-NEXT:    slli a2, a0, 3
+; RV64IA-NEXT:    andi a2, a2, 24
 ; RV64IA-NEXT:    addi a3, zero, 255
 ; RV64IA-NEXT:    sllw a6, a3, a2
 ; RV64IA-NEXT:    andi a1, a1, 255
@@ -5096,8 +5096,8 @@ define i16 @atomicrmw_xchg_i16_monotonic
 ; RV64IA-NEXT:    lui a2, 16
 ; RV64IA-NEXT:    addiw a2, a2, -1
 ; RV64IA-NEXT:    and a1, a1, a2
-; RV64IA-NEXT:    andi a3, a0, 3
-; RV64IA-NEXT:    slli a3, a3, 3
+; RV64IA-NEXT:    slli a3, a0, 3
+; RV64IA-NEXT:    andi a3, a3, 24
 ; RV64IA-NEXT:    sllw a2, a2, a3
 ; RV64IA-NEXT:    sllw a1, a1, a3
 ; RV64IA-NEXT:    andi a0, a0, -4
@@ -5164,8 +5164,8 @@ define i16 @atomicrmw_xchg_i16_acquire(i
 ; RV64IA-NEXT:    lui a2, 16
 ; RV64IA-NEXT:    addiw a2, a2, -1
 ; RV64IA-NEXT:    and a1, a1, a2
-; RV64IA-NEXT:    andi a3, a0, 3
-; RV64IA-NEXT:    slli a3, a3, 3
+; RV64IA-NEXT:    slli a3, a0, 3
+; RV64IA-NEXT:    andi a3, a3, 24
 ; RV64IA-NEXT:    sllw a2, a2, a3
 ; RV64IA-NEXT:    sllw a1, a1, a3
 ; RV64IA-NEXT:    andi a0, a0, -4
@@ -5232,8 +5232,8 @@ define i16 @atomicrmw_xchg_i16_release(i
 ; RV64IA-NEXT:    lui a2, 16
 ; RV64IA-NEXT:    addiw a2, a2, -1
 ; RV64IA-NEXT:    and a1, a1, a2
-; RV64IA-NEXT:    andi a3, a0, 3
-; RV64IA-NEXT:    slli a3, a3, 3
+; RV64IA-NEXT:    slli a3, a0, 3
+; RV64IA-NEXT:    andi a3, a3, 24
 ; RV64IA-NEXT:    sllw a2, a2, a3
 ; RV64IA-NEXT:    sllw a1, a1, a3
 ; RV64IA-NEXT:    andi a0, a0, -4
@@ -5300,8 +5300,8 @@ define i16 @atomicrmw_xchg_i16_acq_rel(i
 ; RV64IA-NEXT:    lui a2, 16
 ; RV64IA-NEXT:    addiw a2, a2, -1
 ; RV64IA-NEXT:    and a1, a1, a2
-; RV64IA-NEXT:    andi a3, a0, 3
-; RV64IA-NEXT:    slli a3, a3, 3
+; RV64IA-NEXT:    slli a3, a0, 3
+; RV64IA-NEXT:    andi a3, a3, 24
 ; RV64IA-NEXT:    sllw a2, a2, a3
 ; RV64IA-NEXT:    sllw a1, a1, a3
 ; RV64IA-NEXT:    andi a0, a0, -4
@@ -5368,8 +5368,8 @@ define i16 @atomicrmw_xchg_i16_seq_cst(i
 ; RV64IA-NEXT:    lui a2, 16
 ; RV64IA-NEXT:    addiw a2, a2, -1
 ; RV64IA-NEXT:    and a1, a1, a2
-; RV64IA-NEXT:    andi a3, a0, 3
-; RV64IA-NEXT:    slli a3, a3, 3
+; RV64IA-NEXT:    slli a3, a0, 3
+; RV64IA-NEXT:    andi a3, a3, 24
 ; RV64IA-NEXT:    sllw a2, a2, a3
 ; RV64IA-NEXT:    sllw a1, a1, a3
 ; RV64IA-NEXT:    andi a0, a0, -4
@@ -5436,8 +5436,8 @@ define i16 @atomicrmw_add_i16_monotonic(
 ; RV64IA-NEXT:    lui a2, 16
 ; RV64IA-NEXT:    addiw a2, a2, -1
 ; RV64IA-NEXT:    and a1, a1, a2
-; RV64IA-NEXT:    andi a3, a0, 3
-; RV64IA-NEXT:    slli a3, a3, 3
+; RV64IA-NEXT:    slli a3, a0, 3
+; RV64IA-NEXT:    andi a3, a3, 24
 ; RV64IA-NEXT:    sllw a2, a2, a3
 ; RV64IA-NEXT:    sllw a1, a1, a3
 ; RV64IA-NEXT:    andi a0, a0, -4
@@ -5504,8 +5504,8 @@ define i16 @atomicrmw_add_i16_acquire(i1
 ; RV64IA-NEXT:    lui a2, 16
 ; RV64IA-NEXT:    addiw a2, a2, -1
 ; RV64IA-NEXT:    and a1, a1, a2
-; RV64IA-NEXT:    andi a3, a0, 3
-; RV64IA-NEXT:    slli a3, a3, 3
+; RV64IA-NEXT:    slli a3, a0, 3
+; RV64IA-NEXT:    andi a3, a3, 24
 ; RV64IA-NEXT:    sllw a2, a2, a3
 ; RV64IA-NEXT:    sllw a1, a1, a3
 ; RV64IA-NEXT:    andi a0, a0, -4
@@ -5572,8 +5572,8 @@ define i16 @atomicrmw_add_i16_release(i1
 ; RV64IA-NEXT:    lui a2, 16
 ; RV64IA-NEXT:    addiw a2, a2, -1
 ; RV64IA-NEXT:    and a1, a1, a2
-; RV64IA-NEXT:    andi a3, a0, 3
-; RV64IA-NEXT:    slli a3, a3, 3
+; RV64IA-NEXT:    slli a3, a0, 3
+; RV64IA-NEXT:    andi a3, a3, 24
 ; RV64IA-NEXT:    sllw a2, a2, a3
 ; RV64IA-NEXT:    sllw a1, a1, a3
 ; RV64IA-NEXT:    andi a0, a0, -4
@@ -5640,8 +5640,8 @@ define i16 @atomicrmw_add_i16_acq_rel(i1
 ; RV64IA-NEXT:    lui a2, 16
 ; RV64IA-NEXT:    addiw a2, a2, -1
 ; RV64IA-NEXT:    and a1, a1, a2
-; RV64IA-NEXT:    andi a3, a0, 3
-; RV64IA-NEXT:    slli a3, a3, 3
+; RV64IA-NEXT:    slli a3, a0, 3
+; RV64IA-NEXT:    andi a3, a3, 24
 ; RV64IA-NEXT:    sllw a2, a2, a3
 ; RV64IA-NEXT:    sllw a1, a1, a3
 ; RV64IA-NEXT:    andi a0, a0, -4
@@ -5708,8 +5708,8 @@ define i16 @atomicrmw_add_i16_seq_cst(i1
 ; RV64IA-NEXT:    lui a2, 16
 ; RV64IA-NEXT:    addiw a2, a2, -1
 ; RV64IA-NEXT:    and a1, a1, a2
-; RV64IA-NEXT:    andi a3, a0, 3
-; RV64IA-NEXT:    slli a3, a3, 3
+; RV64IA-NEXT:    slli a3, a0, 3
+; RV64IA-NEXT:    andi a3, a3, 24
 ; RV64IA-NEXT:    sllw a2, a2, a3
 ; RV64IA-NEXT:    sllw a1, a1, a3
 ; RV64IA-NEXT:    andi a0, a0, -4
@@ -5776,8 +5776,8 @@ define i16 @atomicrmw_sub_i16_monotonic(
 ; RV64IA-NEXT:    lui a2, 16
 ; RV64IA-NEXT:    addiw a2, a2, -1
 ; RV64IA-NEXT:    and a1, a1, a2
-; RV64IA-NEXT:    andi a3, a0, 3
-; RV64IA-NEXT:    slli a3, a3, 3
+; RV64IA-NEXT:    slli a3, a0, 3
+; RV64IA-NEXT:    andi a3, a3, 24
 ; RV64IA-NEXT:    sllw a2, a2, a3
 ; RV64IA-NEXT:    sllw a1, a1, a3
 ; RV64IA-NEXT:    andi a0, a0, -4
@@ -5844,8 +5844,8 @@ define i16 @atomicrmw_sub_i16_acquire(i1
 ; RV64IA-NEXT:    lui a2, 16
 ; RV64IA-NEXT:    addiw a2, a2, -1
 ; RV64IA-NEXT:    and a1, a1, a2
-; RV64IA-NEXT:    andi a3, a0, 3
-; RV64IA-NEXT:    slli a3, a3, 3
+; RV64IA-NEXT:    slli a3, a0, 3
+; RV64IA-NEXT:    andi a3, a3, 24
 ; RV64IA-NEXT:    sllw a2, a2, a3
 ; RV64IA-NEXT:    sllw a1, a1, a3
 ; RV64IA-NEXT:    andi a0, a0, -4
@@ -5912,8 +5912,8 @@ define i16 @atomicrmw_sub_i16_release(i1
 ; RV64IA-NEXT:    lui a2, 16
 ; RV64IA-NEXT:    addiw a2, a2, -1
 ; RV64IA-NEXT:    and a1, a1, a2
-; RV64IA-NEXT:    andi a3, a0, 3
-; RV64IA-NEXT:    slli a3, a3, 3
+; RV64IA-NEXT:    slli a3, a0, 3
+; RV64IA-NEXT:    andi a3, a3, 24
 ; RV64IA-NEXT:    sllw a2, a2, a3
 ; RV64IA-NEXT:    sllw a1, a1, a3
 ; RV64IA-NEXT:    andi a0, a0, -4
@@ -5980,8 +5980,8 @@ define i16 @atomicrmw_sub_i16_acq_rel(i1
 ; RV64IA-NEXT:    lui a2, 16
 ; RV64IA-NEXT:    addiw a2, a2, -1
 ; RV64IA-NEXT:    and a1, a1, a2
-; RV64IA-NEXT:    andi a3, a0, 3
-; RV64IA-NEXT:    slli a3, a3, 3
+; RV64IA-NEXT:    slli a3, a0, 3
+; RV64IA-NEXT:    andi a3, a3, 24
 ; RV64IA-NEXT:    sllw a2, a2, a3
 ; RV64IA-NEXT:    sllw a1, a1, a3
 ; RV64IA-NEXT:    andi a0, a0, -4
@@ -6048,8 +6048,8 @@ define i16 @atomicrmw_sub_i16_seq_cst(i1
 ; RV64IA-NEXT:    lui a2, 16
 ; RV64IA-NEXT:    addiw a2, a2, -1
 ; RV64IA-NEXT:    and a1, a1, a2
-; RV64IA-NEXT:    andi a3, a0, 3
-; RV64IA-NEXT:    slli a3, a3, 3
+; RV64IA-NEXT:    slli a3, a0, 3
+; RV64IA-NEXT:    andi a3, a3, 24
 ; RV64IA-NEXT:    sllw a2, a2, a3
 ; RV64IA-NEXT:    sllw a1, a1, a3
 ; RV64IA-NEXT:    andi a0, a0, -4
@@ -6110,10 +6110,10 @@ define i16 @atomicrmw_and_i16_monotonic(
 ; RV64IA-NEXT:    lui a2, 16
 ; RV64IA-NEXT:    addiw a2, a2, -1
 ; RV64IA-NEXT:    and a1, a1, a2
-; RV64IA-NEXT:    andi a3, a0, 3
-; RV64IA-NEXT:    slli a3, a3, 3
-; RV64IA-NEXT:    sll a1, a1, a3
-; RV64IA-NEXT:    sll a2, a2, a3
+; RV64IA-NEXT:    slli a3, a0, 3
+; RV64IA-NEXT:    andi a3, a3, 24
+; RV64IA-NEXT:    sllw a1, a1, a3
+; RV64IA-NEXT:    sllw a2, a2, a3
 ; RV64IA-NEXT:    not a2, a2
 ; RV64IA-NEXT:    or a1, a2, a1
 ; RV64IA-NEXT:    andi a0, a0, -4
@@ -6166,10 +6166,10 @@ define i16 @atomicrmw_and_i16_acquire(i1
 ; RV64IA-NEXT:    lui a2, 16
 ; RV64IA-NEXT:    addiw a2, a2, -1
 ; RV64IA-NEXT:    and a1, a1, a2
-; RV64IA-NEXT:    andi a3, a0, 3
-; RV64IA-NEXT:    slli a3, a3, 3
-; RV64IA-NEXT:    sll a1, a1, a3
-; RV64IA-NEXT:    sll a2, a2, a3
+; RV64IA-NEXT:    slli a3, a0, 3
+; RV64IA-NEXT:    andi a3, a3, 24
+; RV64IA-NEXT:    sllw a1, a1, a3
+; RV64IA-NEXT:    sllw a2, a2, a3
 ; RV64IA-NEXT:    not a2, a2
 ; RV64IA-NEXT:    or a1, a2, a1
 ; RV64IA-NEXT:    andi a0, a0, -4
@@ -6222,10 +6222,10 @@ define i16 @atomicrmw_and_i16_release(i1
 ; RV64IA-NEXT:    lui a2, 16
 ; RV64IA-NEXT:    addiw a2, a2, -1
 ; RV64IA-NEXT:    and a1, a1, a2
-; RV64IA-NEXT:    andi a3, a0, 3
-; RV64IA-NEXT:    slli a3, a3, 3
-; RV64IA-NEXT:    sll a1, a1, a3
-; RV64IA-NEXT:    sll a2, a2, a3
+; RV64IA-NEXT:    slli a3, a0, 3
+; RV64IA-NEXT:    andi a3, a3, 24
+; RV64IA-NEXT:    sllw a1, a1, a3
+; RV64IA-NEXT:    sllw a2, a2, a3
 ; RV64IA-NEXT:    not a2, a2
 ; RV64IA-NEXT:    or a1, a2, a1
 ; RV64IA-NEXT:    andi a0, a0, -4
@@ -6278,10 +6278,10 @@ define i16 @atomicrmw_and_i16_acq_rel(i1
 ; RV64IA-NEXT:    lui a2, 16
 ; RV64IA-NEXT:    addiw a2, a2, -1
 ; RV64IA-NEXT:    and a1, a1, a2
-; RV64IA-NEXT:    andi a3, a0, 3
-; RV64IA-NEXT:    slli a3, a3, 3
-; RV64IA-NEXT:    sll a1, a1, a3
-; RV64IA-NEXT:    sll a2, a2, a3
+; RV64IA-NEXT:    slli a3, a0, 3
+; RV64IA-NEXT:    andi a3, a3, 24
+; RV64IA-NEXT:    sllw a1, a1, a3
+; RV64IA-NEXT:    sllw a2, a2, a3
 ; RV64IA-NEXT:    not a2, a2
 ; RV64IA-NEXT:    or a1, a2, a1
 ; RV64IA-NEXT:    andi a0, a0, -4
@@ -6334,10 +6334,10 @@ define i16 @atomicrmw_and_i16_seq_cst(i1
 ; RV64IA-NEXT:    lui a2, 16
 ; RV64IA-NEXT:    addiw a2, a2, -1
 ; RV64IA-NEXT:    and a1, a1, a2
-; RV64IA-NEXT:    andi a3, a0, 3
-; RV64IA-NEXT:    slli a3, a3, 3
-; RV64IA-NEXT:    sll a1, a1, a3
-; RV64IA-NEXT:    sll a2, a2, a3
+; RV64IA-NEXT:    slli a3, a0, 3
+; RV64IA-NEXT:    andi a3, a3, 24
+; RV64IA-NEXT:    sllw a1, a1, a3
+; RV64IA-NEXT:    sllw a2, a2, a3
 ; RV64IA-NEXT:    not a2, a2
 ; RV64IA-NEXT:    or a1, a2, a1
 ; RV64IA-NEXT:    andi a0, a0, -4
@@ -6397,8 +6397,8 @@ define i16 @atomicrmw_nand_i16_monotonic
 ; RV64IA-NEXT:    lui a2, 16
 ; RV64IA-NEXT:    addiw a2, a2, -1
 ; RV64IA-NEXT:    and a1, a1, a2
-; RV64IA-NEXT:    andi a3, a0, 3
-; RV64IA-NEXT:    slli a3, a3, 3
+; RV64IA-NEXT:    slli a3, a0, 3
+; RV64IA-NEXT:    andi a3, a3, 24
 ; RV64IA-NEXT:    sllw a2, a2, a3
 ; RV64IA-NEXT:    sllw a1, a1, a3
 ; RV64IA-NEXT:    andi a0, a0, -4
@@ -6467,8 +6467,8 @@ define i16 @atomicrmw_nand_i16_acquire(i
 ; RV64IA-NEXT:    lui a2, 16
 ; RV64IA-NEXT:    addiw a2, a2, -1
 ; RV64IA-NEXT:    and a1, a1, a2
-; RV64IA-NEXT:    andi a3, a0, 3
-; RV64IA-NEXT:    slli a3, a3, 3
+; RV64IA-NEXT:    slli a3, a0, 3
+; RV64IA-NEXT:    andi a3, a3, 24
 ; RV64IA-NEXT:    sllw a2, a2, a3
 ; RV64IA-NEXT:    sllw a1, a1, a3
 ; RV64IA-NEXT:    andi a0, a0, -4
@@ -6537,8 +6537,8 @@ define i16 @atomicrmw_nand_i16_release(i
 ; RV64IA-NEXT:    lui a2, 16
 ; RV64IA-NEXT:    addiw a2, a2, -1
 ; RV64IA-NEXT:    and a1, a1, a2
-; RV64IA-NEXT:    andi a3, a0, 3
-; RV64IA-NEXT:    slli a3, a3, 3
+; RV64IA-NEXT:    slli a3, a0, 3
+; RV64IA-NEXT:    andi a3, a3, 24
 ; RV64IA-NEXT:    sllw a2, a2, a3
 ; RV64IA-NEXT:    sllw a1, a1, a3
 ; RV64IA-NEXT:    andi a0, a0, -4
@@ -6607,8 +6607,8 @@ define i16 @atomicrmw_nand_i16_acq_rel(i
 ; RV64IA-NEXT:    lui a2, 16
 ; RV64IA-NEXT:    addiw a2, a2, -1
 ; RV64IA-NEXT:    and a1, a1, a2
-; RV64IA-NEXT:    andi a3, a0, 3
-; RV64IA-NEXT:    slli a3, a3, 3
+; RV64IA-NEXT:    slli a3, a0, 3
+; RV64IA-NEXT:    andi a3, a3, 24
 ; RV64IA-NEXT:    sllw a2, a2, a3
 ; RV64IA-NEXT:    sllw a1, a1, a3
 ; RV64IA-NEXT:    andi a0, a0, -4
@@ -6677,8 +6677,8 @@ define i16 @atomicrmw_nand_i16_seq_cst(i
 ; RV64IA-NEXT:    lui a2, 16
 ; RV64IA-NEXT:    addiw a2, a2, -1
 ; RV64IA-NEXT:    and a1, a1, a2
-; RV64IA-NEXT:    andi a3, a0, 3
-; RV64IA-NEXT:    slli a3, a3, 3
+; RV64IA-NEXT:    slli a3, a0, 3
+; RV64IA-NEXT:    andi a3, a3, 24
 ; RV64IA-NEXT:    sllw a2, a2, a3
 ; RV64IA-NEXT:    sllw a1, a1, a3
 ; RV64IA-NEXT:    andi a0, a0, -4
@@ -6737,9 +6737,9 @@ define i16 @atomicrmw_or_i16_monotonic(i
 ; RV64IA-NEXT:    lui a2, 16
 ; RV64IA-NEXT:    addiw a2, a2, -1
 ; RV64IA-NEXT:    and a1, a1, a2
-; RV64IA-NEXT:    andi a2, a0, 3
-; RV64IA-NEXT:    slli a2, a2, 3
-; RV64IA-NEXT:    sll a1, a1, a2
+; RV64IA-NEXT:    slli a2, a0, 3
+; RV64IA-NEXT:    andi a2, a2, 24
+; RV64IA-NEXT:    sllw a1, a1, a2
 ; RV64IA-NEXT:    andi a0, a0, -4
 ; RV64IA-NEXT:    amoor.w a0, a1, (a0)
 ; RV64IA-NEXT:    srlw a0, a0, a2
@@ -6787,9 +6787,9 @@ define i16 @atomicrmw_or_i16_acquire(i16
 ; RV64IA-NEXT:    lui a2, 16
 ; RV64IA-NEXT:    addiw a2, a2, -1
 ; RV64IA-NEXT:    and a1, a1, a2
-; RV64IA-NEXT:    andi a2, a0, 3
-; RV64IA-NEXT:    slli a2, a2, 3
-; RV64IA-NEXT:    sll a1, a1, a2
+; RV64IA-NEXT:    slli a2, a0, 3
+; RV64IA-NEXT:    andi a2, a2, 24
+; RV64IA-NEXT:    sllw a1, a1, a2
 ; RV64IA-NEXT:    andi a0, a0, -4
 ; RV64IA-NEXT:    amoor.w.aq a0, a1, (a0)
 ; RV64IA-NEXT:    srlw a0, a0, a2
@@ -6837,9 +6837,9 @@ define i16 @atomicrmw_or_i16_release(i16
 ; RV64IA-NEXT:    lui a2, 16
 ; RV64IA-NEXT:    addiw a2, a2, -1
 ; RV64IA-NEXT:    and a1, a1, a2
-; RV64IA-NEXT:    andi a2, a0, 3
-; RV64IA-NEXT:    slli a2, a2, 3
-; RV64IA-NEXT:    sll a1, a1, a2
+; RV64IA-NEXT:    slli a2, a0, 3
+; RV64IA-NEXT:    andi a2, a2, 24
+; RV64IA-NEXT:    sllw a1, a1, a2
 ; RV64IA-NEXT:    andi a0, a0, -4
 ; RV64IA-NEXT:    amoor.w.rl a0, a1, (a0)
 ; RV64IA-NEXT:    srlw a0, a0, a2
@@ -6887,9 +6887,9 @@ define i16 @atomicrmw_or_i16_acq_rel(i16
 ; RV64IA-NEXT:    lui a2, 16
 ; RV64IA-NEXT:    addiw a2, a2, -1
 ; RV64IA-NEXT:    and a1, a1, a2
-; RV64IA-NEXT:    andi a2, a0, 3
-; RV64IA-NEXT:    slli a2, a2, 3
-; RV64IA-NEXT:    sll a1, a1, a2
+; RV64IA-NEXT:    slli a2, a0, 3
+; RV64IA-NEXT:    andi a2, a2, 24
+; RV64IA-NEXT:    sllw a1, a1, a2
 ; RV64IA-NEXT:    andi a0, a0, -4
 ; RV64IA-NEXT:    amoor.w.aqrl a0, a1, (a0)
 ; RV64IA-NEXT:    srlw a0, a0, a2
@@ -6937,9 +6937,9 @@ define i16 @atomicrmw_or_i16_seq_cst(i16
 ; RV64IA-NEXT:    lui a2, 16
 ; RV64IA-NEXT:    addiw a2, a2, -1
 ; RV64IA-NEXT:    and a1, a1, a2
-; RV64IA-NEXT:    andi a2, a0, 3
-; RV64IA-NEXT:    slli a2, a2, 3
-; RV64IA-NEXT:    sll a1, a1, a2
+; RV64IA-NEXT:    slli a2, a0, 3
+; RV64IA-NEXT:    andi a2, a2, 24
+; RV64IA-NEXT:    sllw a1, a1, a2
 ; RV64IA-NEXT:    andi a0, a0, -4
 ; RV64IA-NEXT:    amoor.w.aqrl a0, a1, (a0)
 ; RV64IA-NEXT:    srlw a0, a0, a2
@@ -6987,9 +6987,9 @@ define i16 @atomicrmw_xor_i16_monotonic(
 ; RV64IA-NEXT:    lui a2, 16
 ; RV64IA-NEXT:    addiw a2, a2, -1
 ; RV64IA-NEXT:    and a1, a1, a2
-; RV64IA-NEXT:    andi a2, a0, 3
-; RV64IA-NEXT:    slli a2, a2, 3
-; RV64IA-NEXT:    sll a1, a1, a2
+; RV64IA-NEXT:    slli a2, a0, 3
+; RV64IA-NEXT:    andi a2, a2, 24
+; RV64IA-NEXT:    sllw a1, a1, a2
 ; RV64IA-NEXT:    andi a0, a0, -4
 ; RV64IA-NEXT:    amoxor.w a0, a1, (a0)
 ; RV64IA-NEXT:    srlw a0, a0, a2
@@ -7037,9 +7037,9 @@ define i16 @atomicrmw_xor_i16_acquire(i1
 ; RV64IA-NEXT:    lui a2, 16
 ; RV64IA-NEXT:    addiw a2, a2, -1
 ; RV64IA-NEXT:    and a1, a1, a2
-; RV64IA-NEXT:    andi a2, a0, 3
-; RV64IA-NEXT:    slli a2, a2, 3
-; RV64IA-NEXT:    sll a1, a1, a2
+; RV64IA-NEXT:    slli a2, a0, 3
+; RV64IA-NEXT:    andi a2, a2, 24
+; RV64IA-NEXT:    sllw a1, a1, a2
 ; RV64IA-NEXT:    andi a0, a0, -4
 ; RV64IA-NEXT:    amoxor.w.aq a0, a1, (a0)
 ; RV64IA-NEXT:    srlw a0, a0, a2
@@ -7087,9 +7087,9 @@ define i16 @atomicrmw_xor_i16_release(i1
 ; RV64IA-NEXT:    lui a2, 16
 ; RV64IA-NEXT:    addiw a2, a2, -1
 ; RV64IA-NEXT:    and a1, a1, a2
-; RV64IA-NEXT:    andi a2, a0, 3
-; RV64IA-NEXT:    slli a2, a2, 3
-; RV64IA-NEXT:    sll a1, a1, a2
+; RV64IA-NEXT:    slli a2, a0, 3
+; RV64IA-NEXT:    andi a2, a2, 24
+; RV64IA-NEXT:    sllw a1, a1, a2
 ; RV64IA-NEXT:    andi a0, a0, -4
 ; RV64IA-NEXT:    amoxor.w.rl a0, a1, (a0)
 ; RV64IA-NEXT:    srlw a0, a0, a2
@@ -7137,9 +7137,9 @@ define i16 @atomicrmw_xor_i16_acq_rel(i1
 ; RV64IA-NEXT:    lui a2, 16
 ; RV64IA-NEXT:    addiw a2, a2, -1
 ; RV64IA-NEXT:    and a1, a1, a2
-; RV64IA-NEXT:    andi a2, a0, 3
-; RV64IA-NEXT:    slli a2, a2, 3
-; RV64IA-NEXT:    sll a1, a1, a2
+; RV64IA-NEXT:    slli a2, a0, 3
+; RV64IA-NEXT:    andi a2, a2, 24
+; RV64IA-NEXT:    sllw a1, a1, a2
 ; RV64IA-NEXT:    andi a0, a0, -4
 ; RV64IA-NEXT:    amoxor.w.aqrl a0, a1, (a0)
 ; RV64IA-NEXT:    srlw a0, a0, a2
@@ -7187,9 +7187,9 @@ define i16 @atomicrmw_xor_i16_seq_cst(i1
 ; RV64IA-NEXT:    lui a2, 16
 ; RV64IA-NEXT:    addiw a2, a2, -1
 ; RV64IA-NEXT:    and a1, a1, a2
-; RV64IA-NEXT:    andi a2, a0, 3
-; RV64IA-NEXT:    slli a2, a2, 3
-; RV64IA-NEXT:    sll a1, a1, a2
+; RV64IA-NEXT:    slli a2, a0, 3
+; RV64IA-NEXT:    andi a2, a2, 24
+; RV64IA-NEXT:    sllw a1, a1, a2
 ; RV64IA-NEXT:    andi a0, a0, -4
 ; RV64IA-NEXT:    amoxor.w.aqrl a0, a1, (a0)
 ; RV64IA-NEXT:    srlw a0, a0, a2
@@ -8900,8 +8900,8 @@ define i16 @atomicrmw_umax_i16_monotonic
 ; RV64IA-NEXT:    lui a2, 16
 ; RV64IA-NEXT:    addiw a2, a2, -1
 ; RV64IA-NEXT:    and a1, a1, a2
-; RV64IA-NEXT:    andi a3, a0, 3
-; RV64IA-NEXT:    slli a3, a3, 3
+; RV64IA-NEXT:    slli a3, a0, 3
+; RV64IA-NEXT:    andi a3, a3, 24
 ; RV64IA-NEXT:    sllw a6, a2, a3
 ; RV64IA-NEXT:    sllw a1, a1, a3
 ; RV64IA-NEXT:    andi a0, a0, -4
@@ -9052,8 +9052,8 @@ define i16 @atomicrmw_umax_i16_acquire(i
 ; RV64IA-NEXT:    lui a2, 16
 ; RV64IA-NEXT:    addiw a2, a2, -1
 ; RV64IA-NEXT:    and a1, a1, a2
-; RV64IA-NEXT:    andi a3, a0, 3
-; RV64IA-NEXT:    slli a3, a3, 3
+; RV64IA-NEXT:    slli a3, a0, 3
+; RV64IA-NEXT:    andi a3, a3, 24
 ; RV64IA-NEXT:    sllw a6, a2, a3
 ; RV64IA-NEXT:    sllw a1, a1, a3
 ; RV64IA-NEXT:    andi a0, a0, -4
@@ -9204,8 +9204,8 @@ define i16 @atomicrmw_umax_i16_release(i
 ; RV64IA-NEXT:    lui a2, 16
 ; RV64IA-NEXT:    addiw a2, a2, -1
 ; RV64IA-NEXT:    and a1, a1, a2
-; RV64IA-NEXT:    andi a3, a0, 3
-; RV64IA-NEXT:    slli a3, a3, 3
+; RV64IA-NEXT:    slli a3, a0, 3
+; RV64IA-NEXT:    andi a3, a3, 24
 ; RV64IA-NEXT:    sllw a6, a2, a3
 ; RV64IA-NEXT:    sllw a1, a1, a3
 ; RV64IA-NEXT:    andi a0, a0, -4
@@ -9362,8 +9362,8 @@ define i16 @atomicrmw_umax_i16_acq_rel(i
 ; RV64IA-NEXT:    lui a2, 16
 ; RV64IA-NEXT:    addiw a2, a2, -1
 ; RV64IA-NEXT:    and a1, a1, a2
-; RV64IA-NEXT:    andi a3, a0, 3
-; RV64IA-NEXT:    slli a3, a3, 3
+; RV64IA-NEXT:    slli a3, a0, 3
+; RV64IA-NEXT:    andi a3, a3, 24
 ; RV64IA-NEXT:    sllw a6, a2, a3
 ; RV64IA-NEXT:    sllw a1, a1, a3
 ; RV64IA-NEXT:    andi a0, a0, -4
@@ -9514,8 +9514,8 @@ define i16 @atomicrmw_umax_i16_seq_cst(i
 ; RV64IA-NEXT:    lui a2, 16
 ; RV64IA-NEXT:    addiw a2, a2, -1
 ; RV64IA-NEXT:    and a1, a1, a2
-; RV64IA-NEXT:    andi a3, a0, 3
-; RV64IA-NEXT:    slli a3, a3, 3
+; RV64IA-NEXT:    slli a3, a0, 3
+; RV64IA-NEXT:    andi a3, a3, 24
 ; RV64IA-NEXT:    sllw a6, a2, a3
 ; RV64IA-NEXT:    sllw a1, a1, a3
 ; RV64IA-NEXT:    andi a0, a0, -4
@@ -9660,8 +9660,8 @@ define i16 @atomicrmw_umin_i16_monotonic
 ; RV64IA-NEXT:    lui a2, 16
 ; RV64IA-NEXT:    addiw a2, a2, -1
 ; RV64IA-NEXT:    and a1, a1, a2
-; RV64IA-NEXT:    andi a3, a0, 3
-; RV64IA-NEXT:    slli a3, a3, 3
+; RV64IA-NEXT:    slli a3, a0, 3
+; RV64IA-NEXT:    andi a3, a3, 24
 ; RV64IA-NEXT:    sllw a6, a2, a3
 ; RV64IA-NEXT:    sllw a1, a1, a3
 ; RV64IA-NEXT:    andi a0, a0, -4
@@ -9812,8 +9812,8 @@ define i16 @atomicrmw_umin_i16_acquire(i
 ; RV64IA-NEXT:    lui a2, 16
 ; RV64IA-NEXT:    addiw a2, a2, -1
 ; RV64IA-NEXT:    and a1, a1, a2
-; RV64IA-NEXT:    andi a3, a0, 3
-; RV64IA-NEXT:    slli a3, a3, 3
+; RV64IA-NEXT:    slli a3, a0, 3
+; RV64IA-NEXT:    andi a3, a3, 24
 ; RV64IA-NEXT:    sllw a6, a2, a3
 ; RV64IA-NEXT:    sllw a1, a1, a3
 ; RV64IA-NEXT:    andi a0, a0, -4
@@ -9964,8 +9964,8 @@ define i16 @atomicrmw_umin_i16_release(i
 ; RV64IA-NEXT:    lui a2, 16
 ; RV64IA-NEXT:    addiw a2, a2, -1
 ; RV64IA-NEXT:    and a1, a1, a2
-; RV64IA-NEXT:    andi a3, a0, 3
-; RV64IA-NEXT:    slli a3, a3, 3
+; RV64IA-NEXT:    slli a3, a0, 3
+; RV64IA-NEXT:    andi a3, a3, 24
 ; RV64IA-NEXT:    sllw a6, a2, a3
 ; RV64IA-NEXT:    sllw a1, a1, a3
 ; RV64IA-NEXT:    andi a0, a0, -4
@@ -10122,8 +10122,8 @@ define i16 @atomicrmw_umin_i16_acq_rel(i
 ; RV64IA-NEXT:    lui a2, 16
 ; RV64IA-NEXT:    addiw a2, a2, -1
 ; RV64IA-NEXT:    and a1, a1, a2
-; RV64IA-NEXT:    andi a3, a0, 3
-; RV64IA-NEXT:    slli a3, a3, 3
+; RV64IA-NEXT:    slli a3, a0, 3
+; RV64IA-NEXT:    andi a3, a3, 24
 ; RV64IA-NEXT:    sllw a6, a2, a3
 ; RV64IA-NEXT:    sllw a1, a1, a3
 ; RV64IA-NEXT:    andi a0, a0, -4
@@ -10274,8 +10274,8 @@ define i16 @atomicrmw_umin_i16_seq_cst(i
 ; RV64IA-NEXT:    lui a2, 16
 ; RV64IA-NEXT:    addiw a2, a2, -1
 ; RV64IA-NEXT:    and a1, a1, a2
-; RV64IA-NEXT:    andi a3, a0, 3
-; RV64IA-NEXT:    slli a3, a3, 3
+; RV64IA-NEXT:    slli a3, a0, 3
+; RV64IA-NEXT:    andi a3, a3, 24
 ; RV64IA-NEXT:    sllw a6, a2, a3
 ; RV64IA-NEXT:    sllw a1, a1, a3
 ; RV64IA-NEXT:    andi a0, a0, -4

Modified: llvm/trunk/test/CodeGen/RISCV/pr40333.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/RISCV/pr40333.ll?rev=352169&r1=352168&r2=352169&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/RISCV/pr40333.ll (original)
+++ llvm/trunk/test/CodeGen/RISCV/pr40333.ll Thu Jan 24 21:04:00 2019
@@ -7,17 +7,10 @@
 ; loop would be created in DAGCombine, converting ANY_EXTEND to SIGN_EXTEND
 ; and back again.
 
-; TODO: This test case is also an example of where it would be cheaper to
-; select SRLW, but the current lowering strategy fails to do so.
-
 define signext i8 @foo(i32 %a, i32 %b) nounwind {
 ; RV64I-LABEL: foo:
 ; RV64I:       # %bb.0:
-; RV64I-NEXT:    slli a1, a1, 32
-; RV64I-NEXT:    srli a1, a1, 32
-; RV64I-NEXT:    slli a0, a0, 32
-; RV64I-NEXT:    srli a0, a0, 32
-; RV64I-NEXT:    srl a0, a0, a1
+; RV64I-NEXT:    srlw a0, a0, a1
 ; RV64I-NEXT:    slli a0, a0, 56
 ; RV64I-NEXT:    srai a0, a0, 56
 ; RV64I-NEXT:    ret




More information about the llvm-commits mailing list