[llvm] [Xtensa] Implement lowering Mul/Div/Shift operations. (PR #99981)
Andrei Safronov via llvm-commits
llvm-commits at lists.llvm.org
Fri Jul 26 08:06:39 PDT 2024
https://github.com/andreisfr updated https://github.com/llvm/llvm-project/pull/99981
>From 32e567066310dc93a83eb15245b4c351b9dbcd38 Mon Sep 17 00:00:00 2001
From: Andrei Safronov <safronov at espressif.com>
Date: Tue, 23 Jul 2024 01:31:41 +0300
Subject: [PATCH 1/2] [Xtensa] Implement lowering Mul/Div/Shift operations.
Implement lowering of the Mul/Div operations and also shift
parts operations. Implement lowering of the bit manipulations,
like ROT/SWAP/CTPOP/CTTZ/CTLZ.
---
llvm/lib/Target/Xtensa/XtensaISelLowering.cpp | 252 ++++++-
llvm/lib/Target/Xtensa/XtensaISelLowering.h | 14 +
llvm/lib/Target/Xtensa/XtensaInstrInfo.td | 24 +
.../Target/Xtensa/XtensaMachineFunctionInfo.h | 53 ++
llvm/lib/Target/Xtensa/XtensaOperators.td | 8 +
.../lib/Target/Xtensa/XtensaTargetMachine.cpp | 7 +
llvm/lib/Target/Xtensa/XtensaTargetMachine.h | 4 +
llvm/test/CodeGen/Xtensa/bswap.ll | 413 ++++++++++++
llvm/test/CodeGen/Xtensa/ctlz-cttz-ctpop.ll | 531 +++++++++++++++
llvm/test/CodeGen/Xtensa/div.ll | 491 ++++++++++++++
llvm/test/CodeGen/Xtensa/mul.ll | 636 ++++++++++++++++++
llvm/test/CodeGen/Xtensa/rotl-rotr.ll | 500 ++++++++++++++
llvm/test/CodeGen/Xtensa/shift.ll | 72 ++
13 files changed, 3001 insertions(+), 4 deletions(-)
create mode 100644 llvm/lib/Target/Xtensa/XtensaMachineFunctionInfo.h
create mode 100644 llvm/test/CodeGen/Xtensa/bswap.ll
create mode 100644 llvm/test/CodeGen/Xtensa/ctlz-cttz-ctpop.ll
create mode 100644 llvm/test/CodeGen/Xtensa/div.ll
create mode 100644 llvm/test/CodeGen/Xtensa/mul.ll
create mode 100644 llvm/test/CodeGen/Xtensa/rotl-rotr.ll
create mode 100644 llvm/test/CodeGen/Xtensa/shift.ll
diff --git a/llvm/lib/Target/Xtensa/XtensaISelLowering.cpp b/llvm/lib/Target/Xtensa/XtensaISelLowering.cpp
index 80d01d662a221..8c30dbbad821e 100644
--- a/llvm/lib/Target/Xtensa/XtensaISelLowering.cpp
+++ b/llvm/lib/Target/Xtensa/XtensaISelLowering.cpp
@@ -13,6 +13,7 @@
#include "XtensaISelLowering.h"
#include "XtensaConstantPoolValue.h"
+#include "XtensaMachineFunctionInfo.h"
#include "XtensaSubtarget.h"
#include "XtensaTargetMachine.h"
#include "llvm/CodeGen/CallingConvLower.h"
@@ -21,6 +22,7 @@
#include "llvm/CodeGen/MachineJumpTableInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
+#include "llvm/IR/GlobalVariable.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MathExtras.h"
@@ -98,6 +100,32 @@ XtensaTargetLowering::XtensaTargetLowering(const TargetMachine &TM,
setCondCodeAction(ISD::SETUGT, MVT::i32, Expand);
setCondCodeAction(ISD::SETULE, MVT::i32, Expand);
+ setOperationAction(ISD::MUL, MVT::i32, Custom);
+ setOperationAction(ISD::MULHU, MVT::i32, Expand);
+ setOperationAction(ISD::MULHS, MVT::i32, Expand);
+ setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand);
+ setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand);
+
+ setOperationAction(ISD::SDIV, MVT::i32, Expand);
+ setOperationAction(ISD::UDIV, MVT::i32, Expand);
+ setOperationAction(ISD::SREM, MVT::i32, Expand);
+ setOperationAction(ISD::UREM, MVT::i32, Expand);
+ setOperationAction(ISD::SDIVREM, MVT::i32, Expand);
+ setOperationAction(ISD::UDIVREM, MVT::i32, Expand);
+
+ setOperationAction(ISD::SHL_PARTS, MVT::i32, Custom);
+ setOperationAction(ISD::SRA_PARTS, MVT::i32, Custom);
+ setOperationAction(ISD::SRL_PARTS, MVT::i32, Custom);
+
+ setOperationAction(ISD::BSWAP, MVT::i32, Expand);
+ setOperationAction(ISD::ROTL, MVT::i32, Expand);
+ setOperationAction(ISD::ROTR, MVT::i32, Expand);
+ setOperationAction(ISD::CTPOP, MVT::i32, Expand);
+ setOperationAction(ISD::CTTZ, MVT::i32, Expand);
+ setOperationAction(ISD::CTLZ, MVT::i32, Expand);
+ setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i32, Expand);
+ setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32, Expand);
+
// Implement custom stack allocations
setOperationAction(ISD::DYNAMIC_STACKALLOC, PtrVT, Custom);
// Implement custom stack save and restore
@@ -665,12 +693,30 @@ SDValue XtensaTargetLowering::getAddrPCRel(SDValue Op,
SDValue XtensaTargetLowering::LowerConstantPool(ConstantPoolSDNode *CP,
SelectionDAG &DAG) const {
EVT PtrVT = getPointerTy(DAG.getDataLayout());
+ auto C = const_cast<Constant *>(CP->getConstVal());
+ auto T = const_cast<Type *>(CP->getType());
SDValue Result;
- if (!CP->isMachineConstantPoolEntry()) {
- Result = DAG.getTargetConstantPool(CP->getConstVal(), PtrVT, CP->getAlign(),
- CP->getOffset());
+
+ // Do not use constant pool for aggregate or vector constant types,
+ // in such cases create global variable, for example to store tabel
+ // when we lower CTTZ operation.
+ if (T->isAggregateType() || T->isVectorTy()) {
+ auto AFI = DAG.getMachineFunction().getInfo<XtensaFunctionInfo>();
+ auto M = const_cast<Module *>(
+ DAG.getMachineFunction().getFunction().getParent());
+ auto GV = new GlobalVariable(
+ *M, T, /*isConstant=*/true, GlobalVariable::InternalLinkage, C,
+ Twine(DAG.getDataLayout().getPrivateGlobalPrefix()) + "CP" +
+ Twine(DAG.getMachineFunction().getFunctionNumber()) + "_" +
+ Twine(AFI->createLabelUId()));
+ Result = DAG.getTargetConstantPool(GV, PtrVT, Align(4));
} else {
- report_fatal_error("This constantpool type is not supported yet");
+ if (!CP->isMachineConstantPoolEntry()) {
+ Result = DAG.getTargetConstantPool(CP->getConstVal(), PtrVT,
+ CP->getAlign(), CP->getOffset());
+ } else {
+ report_fatal_error("This constantpool type is not supported yet");
+ }
}
return getAddrPCRel(Result, DAG);
@@ -713,6 +759,131 @@ SDValue XtensaTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
return DAG.getMergeValues(Ops, DL);
}
+SDValue XtensaTargetLowering::LowerShiftLeftParts(SDValue Op,
+ SelectionDAG &DAG) const {
+ SDLoc DL(Op);
+ MVT VT = MVT::i32;
+ SDValue Lo = Op.getOperand(0), Hi = Op.getOperand(1);
+ SDValue Shamt = Op.getOperand(2);
+
+ // if Shamt - register size < 0: // Shamt < register size
+ // Lo = Lo << Shamt
+ // Hi = (Hi << Shamt) | (Lo >>u (register size - Shamt))
+ // else:
+ // Lo = 0
+ // Hi = Lo << (Shamt - register size)
+
+ SDValue MinusRegisterSize = DAG.getConstant(-32, DL, VT);
+ SDValue ShamtMinusRegisterSize =
+ DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusRegisterSize);
+
+ SDValue LoTrue = DAG.getNode(ISD::SHL, DL, VT, Lo, Shamt);
+
+ SDValue HiTrue = DAG.getNode(XtensaISD::SRCL, DL, VT, Hi, Lo, Shamt);
+
+ SDValue Zero = DAG.getConstant(0, DL, VT);
+
+ SDValue HiFalse = DAG.getNode(ISD::SHL, DL, VT, Lo, ShamtMinusRegisterSize);
+
+ SDValue Cond = DAG.getSetCC(DL, VT, ShamtMinusRegisterSize, Zero, ISD::SETLT);
+
+ Lo = DAG.getNode(ISD::SELECT, DL, VT, Cond, LoTrue, Zero);
+
+ Hi = DAG.getNode(ISD::SELECT, DL, VT, Cond, HiTrue, HiFalse);
+
+ return DAG.getMergeValues({Lo, Hi}, DL);
+}
+
+SDValue XtensaTargetLowering::LowerShiftRightParts(SDValue Op,
+ SelectionDAG &DAG,
+ bool IsSRA) const {
+ SDLoc DL(Op);
+ SDValue Lo = Op.getOperand(0), Hi = Op.getOperand(1);
+ SDValue Shamt = Op.getOperand(2);
+ MVT VT = MVT::i32;
+
+ // SRA expansion:
+ // if Shamt - register size < 0: // Shamt < register size
+ // Lo = (Lo >>u Shamt) | (Hi << u (register size - Shamt))
+ // Hi = Hi >>s Shamt
+ // else:
+ // Lo = Hi >>s (Shamt - register size);
+ // Hi = Hi >>s (register size - 1)
+ //
+ // SRL expansion:
+ // if Shamt - register size < 0: // Shamt < register size
+ // Lo = (Lo >>u Shamt) | (Hi << u (register size - Shamt))
+ // Hi = Hi >>u Shamt
+ // else:
+ // Lo = Hi >>u (Shamt - register size);
+ // Hi = 0;
+
+ unsigned ShiftRightOp = IsSRA ? ISD::SRA : ISD::SRL;
+
+ SDValue MinusRegisterSize = DAG.getConstant(-32, DL, VT);
+ SDValue RegisterSizeMinus1 = DAG.getConstant(32 - 1, DL, VT);
+ SDValue ShamtMinusRegisterSize =
+ DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusRegisterSize);
+
+ SDValue LoTrue = DAG.getNode(XtensaISD::SRCR, DL, VT, Hi, Lo, Shamt);
+
+ SDValue HiTrue = DAG.getNode(ShiftRightOp, DL, VT, Hi, Shamt);
+
+ SDValue Zero = DAG.getConstant(0, DL, VT);
+
+ SDValue LoFalse =
+ DAG.getNode(ShiftRightOp, DL, VT, Hi, ShamtMinusRegisterSize);
+
+ SDValue HiFalse;
+
+ if (IsSRA) {
+ HiFalse = DAG.getNode(ShiftRightOp, DL, VT, Hi, RegisterSizeMinus1);
+ } else {
+ HiFalse = Zero;
+ }
+
+ SDValue Cond = DAG.getSetCC(DL, VT, ShamtMinusRegisterSize, Zero, ISD::SETLT);
+
+ Lo = DAG.getNode(ISD::SELECT, DL, VT, Cond, LoTrue, LoFalse);
+
+ Hi = DAG.getNode(ISD::SELECT, DL, VT, Cond, HiTrue, HiFalse);
+
+ SDValue Ops[2] = {Lo, Hi};
+ return DAG.getMergeValues(Ops, DL);
+}
+
+SDValue XtensaTargetLowering::LowerMUL(SDValue Op, SelectionDAG &DAG) const {
+ EVT VT = Op->getValueType(0);
+ SDLoc DL(Op);
+
+ if (VT != MVT::i32)
+ return SDValue();
+
+ ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op->getOperand(1));
+ if (!C)
+ return SDValue();
+
+ int64_t MulAmt = C->getSExtValue();
+ unsigned ShiftAmt = 0;
+
+ switch (MulAmt) {
+ case 2:
+ ShiftAmt = 1;
+ break;
+ case 4:
+ ShiftAmt = 2;
+ break;
+ case 8:
+ ShiftAmt = 3;
+ break;
+ default:
+ return SDValue();
+ }
+
+ return DAG.getNode(ISD::SHL, DL, VT, Op->getOperand(0),
+ DAG.getConstant(ShiftAmt, DL, VT));
+}
+
SDValue XtensaTargetLowering::LowerOperation(SDValue Op,
SelectionDAG &DAG) const {
switch (Op.getOpcode()) {
@@ -728,6 +899,8 @@ SDValue XtensaTargetLowering::LowerOperation(SDValue Op,
return LowerJumpTable(Op, DAG);
case ISD::ConstantPool:
return LowerConstantPool(cast<ConstantPoolSDNode>(Op), DAG);
+ case ISD::MUL:
+ return LowerMUL(Op, DAG);
case ISD::SELECT_CC:
return LowerSELECT_CC(Op, DAG);
case ISD::STACKSAVE:
@@ -736,6 +909,12 @@ SDValue XtensaTargetLowering::LowerOperation(SDValue Op,
return LowerSTACKRESTORE(Op, DAG);
case ISD::DYNAMIC_STACKALLOC:
return LowerDYNAMIC_STACKALLOC(Op, DAG);
+ case ISD::SHL_PARTS:
+ return LowerShiftLeftParts(Op, DAG);
+ case ISD::SRA_PARTS:
+ return LowerShiftRightParts(Op, DAG, true);
+ case ISD::SRL_PARTS:
+ return LowerShiftRightParts(Op, DAG, false);
default:
report_fatal_error("Unexpected node to lower");
}
@@ -753,6 +932,10 @@ const char *XtensaTargetLowering::getTargetNodeName(unsigned Opcode) const {
return "XtensaISD::RET";
case XtensaISD::SELECT_CC:
return "XtensaISD::SELECT_CC";
+ case XtensaISD::SRCL:
+ return "XtensaISD::SRCL";
+ case XtensaISD::SRCR:
+ return "XtensaISD::SRCR";
}
return nullptr;
}
@@ -827,9 +1010,70 @@ XtensaTargetLowering::emitSelectCC(MachineInstr &MI,
MachineBasicBlock *XtensaTargetLowering::EmitInstrWithCustomInserter(
MachineInstr &MI, MachineBasicBlock *MBB) const {
+ const TargetInstrInfo &TII = *Subtarget.getInstrInfo();
+ DebugLoc DL = MI.getDebugLoc();
+
switch (MI.getOpcode()) {
case Xtensa::SELECT:
return emitSelectCC(MI, MBB);
+ case Xtensa::SHL_P: {
+ MachineOperand &R = MI.getOperand(0);
+ MachineOperand &S = MI.getOperand(1);
+ MachineOperand &SA = MI.getOperand(2);
+
+ BuildMI(*MBB, MI, DL, TII.get(Xtensa::SSL)).addReg(SA.getReg());
+ BuildMI(*MBB, MI, DL, TII.get(Xtensa::SLL), R.getReg()).addReg(S.getReg());
+ MI.eraseFromParent();
+ return MBB;
+ }
+ case Xtensa::SRA_P: {
+ MachineOperand &R = MI.getOperand(0);
+ MachineOperand &T = MI.getOperand(1);
+ MachineOperand &SA = MI.getOperand(2);
+
+ BuildMI(*MBB, MI, DL, TII.get(Xtensa::SSR)).addReg(SA.getReg());
+ BuildMI(*MBB, MI, DL, TII.get(Xtensa::SRA), R.getReg()).addReg(T.getReg());
+ MI.eraseFromParent();
+ return MBB;
+ }
+ case Xtensa::SRL_P: {
+ MachineOperand &R = MI.getOperand(0);
+ MachineOperand &T = MI.getOperand(1);
+ MachineOperand &SA = MI.getOperand(2);
+
+ BuildMI(*MBB, MI, DL, TII.get(Xtensa::SSR)).addReg(SA.getReg());
+ BuildMI(*MBB, MI, DL, TII.get(Xtensa::SRL), R.getReg()).addReg(T.getReg());
+ MI.eraseFromParent();
+ return MBB;
+ }
+ case Xtensa::SRCL_P: {
+ MachineOperand &R = MI.getOperand(0);
+ MachineOperand &HI = MI.getOperand(1);
+ MachineOperand &LO = MI.getOperand(2);
+ MachineOperand &SA = MI.getOperand(3);
+
+ BuildMI(*MBB, MI, DL, TII.get(Xtensa::SSL)).addReg(SA.getReg());
+ BuildMI(*MBB, MI, DL, TII.get(Xtensa::SRC), R.getReg())
+ .addReg(HI.getReg())
+ .addReg(LO.getReg());
+ ;
+ MI.eraseFromParent();
+ return MBB;
+ }
+ case Xtensa::SRCR_P: {
+ MachineOperand &R = MI.getOperand(0);
+ MachineOperand &HI = MI.getOperand(1);
+ MachineOperand &LO = MI.getOperand(2);
+ MachineOperand &SA = MI.getOperand(3);
+
+ BuildMI(*MBB, MI, DL, TII.get(Xtensa::SSR)).addReg(SA.getReg());
+ BuildMI(*MBB, MI, DL, TII.get(Xtensa::SRC), R.getReg())
+ .addReg(HI.getReg())
+ .addReg(LO.getReg());
+ ;
+ MI.eraseFromParent();
+ return MBB;
+ }
default:
llvm_unreachable("Unexpected instr type to insert");
}
diff --git a/llvm/lib/Target/Xtensa/XtensaISelLowering.h b/llvm/lib/Target/Xtensa/XtensaISelLowering.h
index dd811ae9f3a77..b4c4929922cbf 100644
--- a/llvm/lib/Target/Xtensa/XtensaISelLowering.h
+++ b/llvm/lib/Target/Xtensa/XtensaISelLowering.h
@@ -40,6 +40,10 @@ enum {
// the lhs and rhs (ops #0 and #1) of a conditional expression with the
// condition code in op #4
SELECT_CC,
+
+ // Shift
+ SRCL,
+ SRCR,
};
}
@@ -50,6 +54,10 @@ class XtensaTargetLowering : public TargetLowering {
explicit XtensaTargetLowering(const TargetMachine &TM,
const XtensaSubtarget &STI);
+ MVT getScalarShiftAmountTy(const DataLayout &, EVT LHSTy) const override {
+ return LHSTy.getSizeInBits() <= 32 ? MVT::i32 : MVT::i64;
+ }
+
EVT getSetCCResultType(const DataLayout &, LLVMContext &,
EVT VT) const override {
if (!VT.isVector())
@@ -103,6 +111,8 @@ class XtensaTargetLowering : public TargetLowering {
SDValue LowerConstantPool(ConstantPoolSDNode *CP, SelectionDAG &DAG) const;
+ SDValue LowerMUL(SDValue Op, SelectionDAG &DAG) const;
+
SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const;
@@ -111,6 +121,10 @@ class XtensaTargetLowering : public TargetLowering {
SDValue LowerSTACKRESTORE(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerShiftLeftParts(SDValue Op, SelectionDAG &DAG) const;
+
+ SDValue LowerShiftRightParts(SDValue Op, SelectionDAG &DAG, bool IsSRA) const;
+
SDValue getAddrPCRel(SDValue Op, SelectionDAG &DAG) const;
CCAssignFn *CCAssignFnForCall(CallingConv::ID CC, bool IsVarArg) const;
diff --git a/llvm/lib/Target/Xtensa/XtensaInstrInfo.td b/llvm/lib/Target/Xtensa/XtensaInstrInfo.td
index fc134e794153b..18a31fef18446 100644
--- a/llvm/lib/Target/Xtensa/XtensaInstrInfo.td
+++ b/llvm/lib/Target/Xtensa/XtensaInstrInfo.td
@@ -189,6 +189,30 @@ def SSAI : RRR_Inst<0x00, 0x00, 0x04, (outs), (ins uimm5:$imm),
let t{0} = imm{4};
}
+// Shift Pseudo instructions:
+// SSL/SSR + Shift combination
+let usesCustomInserter = 1 in {
+ def SHL_P : Pseudo<(outs AR:$r), (ins AR:$s, AR:$sa),
+ "# SHL_P $r, $s, $sa",
+ [(set i32:$r, (shl i32:$s, i32:$sa))]>;
+
+ def SRA_P : Pseudo<(outs AR:$r), (ins AR:$t, AR:$sa),
+ "# SRA_P $r, $t, $sa",
+ [(set i32:$r, (sra i32:$t, i32:$sa))]>;
+
+ def SRL_P : Pseudo<(outs AR:$r), (ins AR:$t, AR:$sa),
+ "# SRL_P $r, $t, $sa",
+ [(set i32:$r, (srl i32:$t, i32:$sa))]>;
+
+ def SRCL_P : Pseudo<(outs AR:$r), (ins AR:$hi, AR:$lo, AR:$sa),
+ "# SRCL_P $r, $hi, $lo, $sa",
+ [(set i32:$r, (Xtensa_srcl i32:$hi, i32:$lo, i32:$sa))]>;
+
+ def SRCR_P : Pseudo<(outs AR:$r), (ins AR:$hi, AR:$lo, AR:$sa),
+ "# SRCR_P $r, $hi, $lo, $sa",
+ [(set i32:$r, (Xtensa_srcr i32:$hi, i32:$lo, i32:$sa))]>;
+}
+
//===----------------------------------------------------------------------===//
// Load and store instructions
//===----------------------------------------------------------------------===//
diff --git a/llvm/lib/Target/Xtensa/XtensaMachineFunctionInfo.h b/llvm/lib/Target/Xtensa/XtensaMachineFunctionInfo.h
new file mode 100644
index 0000000000000..86ee81128c34c
--- /dev/null
+++ b/llvm/lib/Target/Xtensa/XtensaMachineFunctionInfo.h
@@ -0,0 +1,53 @@
+//==- XtensaMachineFunctionInfo.h - Xtensa machine function info --*- C++ -*-=//
+//
+// The LLVM Compiler Infrastructure
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares Xtensa-specific per-machine-function information.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_XTENSA_XTENSAMACHINEFUNCTIONINFO_H
+#define LLVM_LIB_TARGET_XTENSA_XTENSAMACHINEFUNCTIONINFO_H
+
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/Target/TargetMachine.h"
+
+namespace llvm {
+
+class XtensaFunctionInfo : public MachineFunctionInfo {
+ unsigned VarArgsFirstGPR;
+ int VarArgsStackOffset;
+ unsigned VarArgsFrameIndex;
+ bool SaveFrameRegister = false;
+ unsigned LabelUId = 0;
+
+public:
+ explicit XtensaFunctionInfo(const Function &F, const TargetSubtargetInfo *STI)
+ : VarArgsFirstGPR(0), VarArgsStackOffset(0), VarArgsFrameIndex(0) {}
+
+ unsigned getVarArgsFirstGPR() const { return VarArgsFirstGPR; }
+ void setVarArgsFirstGPR(unsigned GPR) { VarArgsFirstGPR = GPR; }
+
+ int getVarArgsStackOffset() const { return VarArgsStackOffset; }
+ void setVarArgsStackOffset(int Offset) { VarArgsStackOffset = Offset; }
+
+ // Get and set the frame index of the first stack vararg.
+ unsigned getVarArgsFrameIndex() const { return VarArgsFrameIndex; }
+ void setVarArgsFrameIndex(unsigned FI) { VarArgsFrameIndex = FI; }
+
+ bool isSaveFrameRegister() const { return SaveFrameRegister; }
+ void setSaveFrameRegister() { SaveFrameRegister = true; }
+
+ unsigned createLabelUId() { return LabelUId++; }
+};
+
+} // namespace llvm
+
+#endif /* LLVM_LIB_TARGET_XTENSA_XTENSAMACHINEFUNCTIONINFO_H */
diff --git a/llvm/lib/Target/Xtensa/XtensaOperators.td b/llvm/lib/Target/Xtensa/XtensaOperators.td
index 93cd1c933dbde..c825359f3c5dd 100644
--- a/llvm/lib/Target/Xtensa/XtensaOperators.td
+++ b/llvm/lib/Target/Xtensa/XtensaOperators.td
@@ -24,6 +24,10 @@ def SDT_XtensaSelectCC : SDTypeProfile<1, 5,
[SDTCisSameAs<0, 1>,
SDTCisSameAs<2, 3>,
SDTCisVT<5, i32>]>;
+
+def SDT_XtensaSRC : SDTypeProfile<1, 3, [SDTCisVT<0, i32>, SDTCisVT<1, i32>,
+ SDTCisVT<2, i32>, SDTCisVT<3, i32>]>;
+
//===----------------------------------------------------------------------===//
// Node definitions
//===----------------------------------------------------------------------===//
@@ -46,3 +50,7 @@ def Xtensa_brjt: SDNode<"XtensaISD::BR_JT", SDT_XtensaBrJT, [SDNPHasChain]>;
def Xtensa_select_cc: SDNode<"XtensaISD::SELECT_CC", SDT_XtensaSelectCC,
[SDNPInGlue]>;
+
+def Xtensa_srcl: SDNode<"XtensaISD::SRCL", SDT_XtensaSRC>;
+
+def Xtensa_srcr: SDNode<"XtensaISD::SRCR", SDT_XtensaSRC>;
diff --git a/llvm/lib/Target/Xtensa/XtensaTargetMachine.cpp b/llvm/lib/Target/Xtensa/XtensaTargetMachine.cpp
index 49c7faf84df1d..eba169a2fe7a9 100644
--- a/llvm/lib/Target/Xtensa/XtensaTargetMachine.cpp
+++ b/llvm/lib/Target/Xtensa/XtensaTargetMachine.cpp
@@ -14,6 +14,7 @@
#include "XtensaTargetMachine.h"
#include "TargetInfo/XtensaTargetInfo.h"
+#include "XtensaMachineFunctionInfo.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
#include "llvm/CodeGen/TargetPassConfig.h"
@@ -83,6 +84,12 @@ XtensaTargetMachine::getSubtargetImpl(const Function &F) const {
return I.get();
}
+MachineFunctionInfo *XtensaTargetMachine::createMachineFunctionInfo(
+ BumpPtrAllocator &Allocator, const Function &F,
+ const TargetSubtargetInfo *STI) const {
+ return XtensaFunctionInfo::create<XtensaFunctionInfo>(Allocator, F, STI);
+}
+
namespace {
/// Xtensa Code Generator Pass Configuration Options.
class XtensaPassConfig : public TargetPassConfig {
diff --git a/llvm/lib/Target/Xtensa/XtensaTargetMachine.h b/llvm/lib/Target/Xtensa/XtensaTargetMachine.h
index f371f22ed3d0e..6975076b5d699 100644
--- a/llvm/lib/Target/Xtensa/XtensaTargetMachine.h
+++ b/llvm/lib/Target/Xtensa/XtensaTargetMachine.h
@@ -45,6 +45,10 @@ class XtensaTargetMachine : public LLVMTargetMachine {
return TLOF.get();
}
+ MachineFunctionInfo *
+ createMachineFunctionInfo(BumpPtrAllocator &Allocator, const Function &F,
+ const TargetSubtargetInfo *STI) const override;
+
protected:
mutable StringMap<std::unique_ptr<XtensaSubtarget>> SubtargetMap;
};
diff --git a/llvm/test/CodeGen/Xtensa/bswap.ll b/llvm/test/CodeGen/Xtensa/bswap.ll
new file mode 100644
index 0000000000000..e4458c7cf81c3
--- /dev/null
+++ b/llvm/test/CodeGen/Xtensa/bswap.ll
@@ -0,0 +1,413 @@
+; RUN: llc -mtriple=xtensa -verify-machineinstrs < %s \
+; RUN: | FileCheck -check-prefix=XTENSA %s
+
+declare i16 @llvm.bswap.i16(i16)
+declare i32 @llvm.bswap.i32(i32)
+declare i64 @llvm.bswap.i64(i64)
+declare i8 @llvm.bitreverse.i8(i8)
+declare i16 @llvm.bitreverse.i16(i16)
+declare i32 @llvm.bitreverse.i32(i32)
+declare i64 @llvm.bitreverse.i64(i64)
+
+define i16 @test_bswap_i16(i16 %a) nounwind {
+; XTENSA-LABEL: test_bswap_i16:
+; XTENSA: l32r a8, .LCPI0_0
+; XTENSA-NEXT: and a8, a2, a8
+; XTENSA-NEXT: srli a8, a8, 8
+; XTENSA-NEXT: slli a9, a2, 8
+; XTENSA-NEXT: or a2, a9, a8
+; XTENSA-NEXT: ret
+ %tmp = call i16 @llvm.bswap.i16(i16 %a)
+ ret i16 %tmp
+}
+
+define i32 @test_bswap_i32(i32 %a) nounwind {
+; XTENSA-LABEL: test_bswap_i32:
+; XTENSA: movi a8, 24
+; XTENSA-NEXT: ssr a8
+; XTENSA-NEXT: srl a8, a2
+; XTENSA-NEXT: srli a9, a2, 8
+; XTENSA-NEXT: l32r a10, .LCPI1_0
+; XTENSA-NEXT: and a9, a9, a10
+; XTENSA-NEXT: or a8, a9, a8
+; XTENSA-NEXT: and a9, a2, a10
+; XTENSA-NEXT: slli a9, a9, 8
+; XTENSA-NEXT: slli a10, a2, 24
+; XTENSA-NEXT: or a9, a10, a9
+; XTENSA-NEXT: or a2, a9, a8
+; XTENSA-NEXT: ret
+ %tmp = call i32 @llvm.bswap.i32(i32 %a)
+ ret i32 %tmp
+}
+
+define i64 @test_bswap_i64(i64 %a) nounwind {
+; XTENSA-LABEL: test_bswap_i64:
+; XTENSA: movi a9, 24
+; XTENSA-NEXT: ssr a9
+; XTENSA-NEXT: srl a8, a3
+; XTENSA-NEXT: srli a10, a3, 8
+; XTENSA-NEXT: l32r a11, .LCPI2_0
+; XTENSA-NEXT: and a10, a10, a11
+; XTENSA-NEXT: or a8, a10, a8
+; XTENSA-NEXT: and a10, a3, a11
+; XTENSA-NEXT: slli a10, a10, 8
+; XTENSA-NEXT: slli a7, a3, 24
+; XTENSA-NEXT: or a10, a7, a10
+; XTENSA-NEXT: or a8, a10, a8
+; XTENSA-NEXT: ssr a9
+; XTENSA-NEXT: srl a9, a2
+; XTENSA-NEXT: srli a10, a2, 8
+; XTENSA-NEXT: and a10, a10, a11
+; XTENSA-NEXT: or a9, a10, a9
+; XTENSA-NEXT: and a10, a2, a11
+; XTENSA-NEXT: slli a10, a10, 8
+; XTENSA-NEXT: slli a11, a2, 24
+; XTENSA-NEXT: or a10, a11, a10
+; XTENSA-NEXT: or a3, a10, a9
+; XTENSA-NEXT: or a2, a8, a8
+; XTENSA-NEXT: ret
+ %tmp = call i64 @llvm.bswap.i64(i64 %a)
+ ret i64 %tmp
+}
+
+define i8 @test_bitreverse_i8(i8 %a) nounwind {
+; XTENSA-LABEL: test_bitreverse_i8:
+; XTENSA: movi a8, 15
+; XTENSA-NEXT: and a8, a2, a8
+; XTENSA-NEXT: slli a8, a8, 4
+; XTENSA-NEXT: movi a9, 240
+; XTENSA-NEXT: and a9, a2, a9
+; XTENSA-NEXT: srli a9, a9, 4
+; XTENSA-NEXT: or a8, a9, a8
+; XTENSA-NEXT: srli a9, a8, 2
+; XTENSA-NEXT: movi a10, 51
+; XTENSA-NEXT: and a9, a9, a10
+; XTENSA-NEXT: and a8, a8, a10
+; XTENSA-NEXT: slli a8, a8, 2
+; XTENSA-NEXT: or a8, a9, a8
+; XTENSA-NEXT: srli a9, a8, 1
+; XTENSA-NEXT: movi a10, 85
+; XTENSA-NEXT: and a9, a9, a10
+; XTENSA-NEXT: and a8, a8, a10
+; XTENSA-NEXT: slli a8, a8, 1
+; XTENSA-NEXT: or a2, a9, a8
+; XTENSA-NEXT: ret
+ %tmp = call i8 @llvm.bitreverse.i8(i8 %a)
+ ret i8 %tmp
+}
+
+define i16 @test_bitreverse_i16(i16 %a) nounwind {
+; XTENSA-LABEL: test_bitreverse_i16:
+; XTENSA: l32r a8, .LCPI4_0
+; XTENSA-NEXT: and a8, a2, a8
+; XTENSA-NEXT: srli a8, a8, 8
+; XTENSA-NEXT: slli a9, a2, 8
+; XTENSA-NEXT: or a8, a9, a8
+; XTENSA-NEXT: srli a9, a8, 4
+; XTENSA-NEXT: l32r a10, .LCPI4_1
+; XTENSA-NEXT: and a9, a9, a10
+; XTENSA-NEXT: and a8, a8, a10
+; XTENSA-NEXT: slli a8, a8, 4
+; XTENSA-NEXT: or a8, a9, a8
+; XTENSA-NEXT: srli a9, a8, 2
+; XTENSA-NEXT: l32r a10, .LCPI4_2
+; XTENSA-NEXT: and a9, a9, a10
+; XTENSA-NEXT: and a8, a8, a10
+; XTENSA-NEXT: slli a8, a8, 2
+; XTENSA-NEXT: or a8, a9, a8
+; XTENSA-NEXT: srli a9, a8, 1
+; XTENSA-NEXT: l32r a10, .LCPI4_3
+; XTENSA-NEXT: and a9, a9, a10
+; XTENSA-NEXT: and a8, a8, a10
+; XTENSA-NEXT: slli a8, a8, 1
+; XTENSA-NEXT: or a2, a9, a8
+; XTENSA-NEXT: ret
+ %tmp = call i16 @llvm.bitreverse.i16(i16 %a)
+ ret i16 %tmp
+}
+
+define i32 @test_bitreverse_i32(i32 %a) nounwind {
+; XTENSA-LABEL: test_bitreverse_i32:
+; XTENSA: movi a8, 24
+; XTENSA-NEXT: ssr a8
+; XTENSA-NEXT: srl a8, a2
+; XTENSA-NEXT: srli a9, a2, 8
+; XTENSA-NEXT: l32r a10, .LCPI5_0
+; XTENSA-NEXT: and a9, a9, a10
+; XTENSA-NEXT: or a8, a9, a8
+; XTENSA-NEXT: and a9, a2, a10
+; XTENSA-NEXT: slli a9, a9, 8
+; XTENSA-NEXT: slli a10, a2, 24
+; XTENSA-NEXT: or a9, a10, a9
+; XTENSA-NEXT: or a8, a9, a8
+; XTENSA-NEXT: srli a9, a8, 4
+; XTENSA-NEXT: l32r a10, .LCPI5_1
+; XTENSA-NEXT: and a9, a9, a10
+; XTENSA-NEXT: and a8, a8, a10
+; XTENSA-NEXT: slli a8, a8, 4
+; XTENSA-NEXT: or a8, a9, a8
+; XTENSA-NEXT: srli a9, a8, 2
+; XTENSA-NEXT: l32r a10, .LCPI5_2
+; XTENSA-NEXT: and a9, a9, a10
+; XTENSA-NEXT: and a8, a8, a10
+; XTENSA-NEXT: slli a8, a8, 2
+; XTENSA-NEXT: or a8, a9, a8
+; XTENSA-NEXT: srli a9, a8, 1
+; XTENSA-NEXT: l32r a10, .LCPI5_3
+; XTENSA-NEXT: and a9, a9, a10
+; XTENSA-NEXT: and a8, a8, a10
+; XTENSA-NEXT: slli a8, a8, 1
+; XTENSA-NEXT: or a2, a9, a8
+; XTENSA-NEXT: ret
+ %tmp = call i32 @llvm.bitreverse.i32(i32 %a)
+ ret i32 %tmp
+}
+
+define i64 @test_bitreverse_i64(i64 %a) nounwind {
+; XTENSA-LABEL: test_bitreverse_i64:
+; XTENSA: movi a10, 24
+; XTENSA-NEXT: ssr a10
+; XTENSA-NEXT: srl a8, a3
+; XTENSA-NEXT: srli a11, a3, 8
+; XTENSA-NEXT: l32r a9, .LCPI6_0
+; XTENSA-NEXT: and a11, a11, a9
+; XTENSA-NEXT: or a8, a11, a8
+; XTENSA-NEXT: and a11, a3, a9
+; XTENSA-NEXT: slli a11, a11, 8
+; XTENSA-NEXT: slli a7, a3, 24
+; XTENSA-NEXT: or a11, a7, a11
+; XTENSA-NEXT: or a8, a11, a8
+; XTENSA-NEXT: srli a7, a8, 4
+; XTENSA-NEXT: l32r a11, .LCPI6_1
+; XTENSA-NEXT: and a7, a7, a11
+; XTENSA-NEXT: and a8, a8, a11
+; XTENSA-NEXT: slli a8, a8, 4
+; XTENSA-NEXT: or a8, a7, a8
+; XTENSA-NEXT: srli a7, a8, 2
+; XTENSA-NEXT: l32r a6, .LCPI6_2
+; XTENSA-NEXT: and a7, a7, a6
+; XTENSA-NEXT: and a8, a8, a6
+; XTENSA-NEXT: slli a8, a8, 2
+; XTENSA-NEXT: or a8, a7, a8
+; XTENSA-NEXT: srli a7, a8, 1
+; XTENSA-NEXT: l32r a5, .LCPI6_3
+; XTENSA-NEXT: and a7, a7, a5
+; XTENSA-NEXT: and a8, a8, a5
+; XTENSA-NEXT: slli a8, a8, 1
+; XTENSA-NEXT: or a8, a7, a8
+; XTENSA-NEXT: ssr a10
+; XTENSA-NEXT: srl a10, a2
+; XTENSA-NEXT: srli a7, a2, 8
+; XTENSA-NEXT: and a7, a7, a9
+; XTENSA-NEXT: or a10, a7, a10
+; XTENSA-NEXT: and a9, a2, a9
+; XTENSA-NEXT: slli a9, a9, 8
+; XTENSA-NEXT: slli a7, a2, 24
+; XTENSA-NEXT: or a9, a7, a9
+; XTENSA-NEXT: or a9, a9, a10
+; XTENSA-NEXT: srli a10, a9, 4
+; XTENSA-NEXT: and a10, a10, a11
+; XTENSA-NEXT: and a9, a9, a11
+; XTENSA-NEXT: slli a9, a9, 4
+; XTENSA-NEXT: or a9, a10, a9
+; XTENSA-NEXT: srli a10, a9, 2
+; XTENSA-NEXT: and a10, a10, a6
+; XTENSA-NEXT: and a9, a9, a6
+; XTENSA-NEXT: slli a9, a9, 2
+; XTENSA-NEXT: or a9, a10, a9
+; XTENSA-NEXT: srli a10, a9, 1
+; XTENSA-NEXT: and a10, a10, a5
+; XTENSA-NEXT: and a9, a9, a5
+; XTENSA-NEXT: slli a9, a9, 1
+; XTENSA-NEXT: or a3, a10, a9
+; XTENSA-NEXT: or a2, a8, a8
+; XTENSA-NEXT: ret
+ %tmp = call i64 @llvm.bitreverse.i64(i64 %a)
+ ret i64 %tmp
+}
+
+define i16 @test_bswap_bitreverse_i16(i16 %a) nounwind {
+; XTENSA-LABEL: test_bswap_bitreverse_i16:
+; XTENSA: srli a8, a2, 4
+; XTENSA-NEXT: l32r a9, .LCPI7_0
+; XTENSA-NEXT: and a8, a8, a9
+; XTENSA-NEXT: and a9, a2, a9
+; XTENSA-NEXT: slli a9, a9, 4
+; XTENSA-NEXT: or a8, a8, a9
+; XTENSA-NEXT: srli a9, a8, 2
+; XTENSA-NEXT: l32r a10, .LCPI7_1
+; XTENSA-NEXT: and a9, a9, a10
+; XTENSA-NEXT: and a8, a8, a10
+; XTENSA-NEXT: slli a8, a8, 2
+; XTENSA-NEXT: or a8, a9, a8
+; XTENSA-NEXT: srli a9, a8, 1
+; XTENSA-NEXT: l32r a10, .LCPI7_2
+; XTENSA-NEXT: and a9, a9, a10
+; XTENSA-NEXT: and a8, a8, a10
+; XTENSA-NEXT: slli a8, a8, 1
+; XTENSA-NEXT: or a2, a9, a8
+; XTENSA-NEXT: ret
+ %tmp = call i16 @llvm.bswap.i16(i16 %a)
+ %tmp2 = call i16 @llvm.bitreverse.i16(i16 %tmp)
+ ret i16 %tmp2
+}
+
+define i32 @test_bswap_bitreverse_i32(i32 %a) nounwind {
+; XTENSA-LABEL: test_bswap_bitreverse_i32:
+; XTENSA: srli a8, a2, 4
+; XTENSA-NEXT: l32r a9, .LCPI8_0
+; XTENSA-NEXT: and a8, a8, a9
+; XTENSA-NEXT: and a9, a2, a9
+; XTENSA-NEXT: slli a9, a9, 4
+; XTENSA-NEXT: or a8, a8, a9
+; XTENSA-NEXT: srli a9, a8, 2
+; XTENSA-NEXT: l32r a10, .LCPI8_1
+; XTENSA-NEXT: and a9, a9, a10
+; XTENSA-NEXT: and a8, a8, a10
+; XTENSA-NEXT: slli a8, a8, 2
+; XTENSA-NEXT: or a8, a9, a8
+; XTENSA-NEXT: srli a9, a8, 1
+; XTENSA-NEXT: l32r a10, .LCPI8_2
+; XTENSA-NEXT: and a9, a9, a10
+; XTENSA-NEXT: and a8, a8, a10
+; XTENSA-NEXT: slli a8, a8, 1
+; XTENSA-NEXT: or a2, a9, a8
+; XTENSA-NEXT: ret
+ %tmp = call i32 @llvm.bswap.i32(i32 %a)
+ %tmp2 = call i32 @llvm.bitreverse.i32(i32 %tmp)
+ ret i32 %tmp2
+}
+
+define i64 @test_bswap_bitreverse_i64(i64 %a) nounwind {
+; XTENSA-LABEL: test_bswap_bitreverse_i64:
+; XTENSA: srli a8, a2, 4
+; XTENSA-NEXT: l32r a9, .LCPI9_0
+; XTENSA-NEXT: and a8, a8, a9
+; XTENSA-NEXT: and a10, a2, a9
+; XTENSA-NEXT: slli a10, a10, 4
+; XTENSA-NEXT: or a8, a8, a10
+; XTENSA-NEXT: srli a10, a8, 2
+; XTENSA-NEXT: l32r a11, .LCPI9_1
+; XTENSA-NEXT: and a10, a10, a11
+; XTENSA-NEXT: and a8, a8, a11
+; XTENSA-NEXT: slli a8, a8, 2
+; XTENSA-NEXT: or a8, a10, a8
+; XTENSA-NEXT: srli a10, a8, 1
+; XTENSA-NEXT: l32r a7, .LCPI9_2
+; XTENSA-NEXT: and a10, a10, a7
+; XTENSA-NEXT: and a8, a8, a7
+; XTENSA-NEXT: slli a8, a8, 1
+; XTENSA-NEXT: or a2, a10, a8
+; XTENSA-NEXT: srli a8, a3, 4
+; XTENSA-NEXT: and a8, a8, a9
+; XTENSA-NEXT: and a9, a3, a9
+; XTENSA-NEXT: slli a9, a9, 4
+; XTENSA-NEXT: or a8, a8, a9
+; XTENSA-NEXT: srli a9, a8, 2
+; XTENSA-NEXT: and a9, a9, a11
+; XTENSA-NEXT: and a8, a8, a11
+; XTENSA-NEXT: slli a8, a8, 2
+; XTENSA-NEXT: or a8, a9, a8
+; XTENSA-NEXT: srli a9, a8, 1
+; XTENSA-NEXT: and a9, a9, a7
+; XTENSA-NEXT: and a8, a8, a7
+; XTENSA-NEXT: slli a8, a8, 1
+; XTENSA-NEXT: or a3, a9, a8
+; XTENSA-NEXT: ret
+ %tmp = call i64 @llvm.bswap.i64(i64 %a)
+ %tmp2 = call i64 @llvm.bitreverse.i64(i64 %tmp)
+ ret i64 %tmp2
+}
+
+define i16 @test_bitreverse_bswap_i16(i16 %a) nounwind {
+; XTENSA-LABEL: test_bitreverse_bswap_i16:
+; XTENSA: srli a8, a2, 4
+; XTENSA-NEXT: l32r a9, .LCPI10_0
+; XTENSA-NEXT: and a8, a8, a9
+; XTENSA-NEXT: and a9, a2, a9
+; XTENSA-NEXT: slli a9, a9, 4
+; XTENSA-NEXT: or a8, a8, a9
+; XTENSA-NEXT: srli a9, a8, 2
+; XTENSA-NEXT: l32r a10, .LCPI10_1
+; XTENSA-NEXT: and a9, a9, a10
+; XTENSA-NEXT: and a8, a8, a10
+; XTENSA-NEXT: slli a8, a8, 2
+; XTENSA-NEXT: or a8, a9, a8
+; XTENSA-NEXT: srli a9, a8, 1
+; XTENSA-NEXT: l32r a10, .LCPI10_2
+; XTENSA-NEXT: and a9, a9, a10
+; XTENSA-NEXT: and a8, a8, a10
+; XTENSA-NEXT: slli a8, a8, 1
+; XTENSA-NEXT: or a2, a9, a8
+; XTENSA-NEXT: ret
+ %tmp = call i16 @llvm.bitreverse.i16(i16 %a)
+ %tmp2 = call i16 @llvm.bswap.i16(i16 %tmp)
+ ret i16 %tmp2
+}
+
+define i32 @test_bitreverse_bswap_i32(i32 %a) nounwind {
+; XTENSA-LABEL: test_bitreverse_bswap_i32:
+; XTENSA: srli a8, a2, 4
+; XTENSA-NEXT: l32r a9, .LCPI11_0
+; XTENSA-NEXT: and a8, a8, a9
+; XTENSA-NEXT: and a9, a2, a9
+; XTENSA-NEXT: slli a9, a9, 4
+; XTENSA-NEXT: or a8, a8, a9
+; XTENSA-NEXT: srli a9, a8, 2
+; XTENSA-NEXT: l32r a10, .LCPI11_1
+; XTENSA-NEXT: and a9, a9, a10
+; XTENSA-NEXT: and a8, a8, a10
+; XTENSA-NEXT: slli a8, a8, 2
+; XTENSA-NEXT: or a8, a9, a8
+; XTENSA-NEXT: srli a9, a8, 1
+; XTENSA-NEXT: l32r a10, .LCPI11_2
+; XTENSA-NEXT: and a9, a9, a10
+; XTENSA-NEXT: and a8, a8, a10
+; XTENSA-NEXT: slli a8, a8, 1
+; XTENSA-NEXT: or a2, a9, a8
+; XTENSA-NEXT: ret
+ %tmp = call i32 @llvm.bitreverse.i32(i32 %a)
+ %tmp2 = call i32 @llvm.bswap.i32(i32 %tmp)
+ ret i32 %tmp2
+}
+
+define i64 @test_bitreverse_bswap_i64(i64 %a) nounwind {
+; XTENSA-LABEL: test_bitreverse_bswap_i64:
+; XTENSA: srli a8, a2, 4
+; XTENSA-NEXT: l32r a9, .LCPI12_0
+; XTENSA-NEXT: and a8, a8, a9
+; XTENSA-NEXT: and a10, a2, a9
+; XTENSA-NEXT: slli a10, a10, 4
+; XTENSA-NEXT: or a8, a8, a10
+; XTENSA-NEXT: srli a10, a8, 2
+; XTENSA-NEXT: l32r a11, .LCPI12_1
+; XTENSA-NEXT: and a10, a10, a11
+; XTENSA-NEXT: and a8, a8, a11
+; XTENSA-NEXT: slli a8, a8, 2
+; XTENSA-NEXT: or a8, a10, a8
+; XTENSA-NEXT: srli a10, a8, 1
+; XTENSA-NEXT: l32r a7, .LCPI12_2
+; XTENSA-NEXT: and a10, a10, a7
+; XTENSA-NEXT: and a8, a8, a7
+; XTENSA-NEXT: slli a8, a8, 1
+; XTENSA-NEXT: or a2, a10, a8
+; XTENSA-NEXT: srli a8, a3, 4
+; XTENSA-NEXT: and a8, a8, a9
+; XTENSA-NEXT: and a9, a3, a9
+; XTENSA-NEXT: slli a9, a9, 4
+; XTENSA-NEXT: or a8, a8, a9
+; XTENSA-NEXT: srli a9, a8, 2
+; XTENSA-NEXT: and a9, a9, a11
+; XTENSA-NEXT: and a8, a8, a11
+; XTENSA-NEXT: slli a8, a8, 2
+; XTENSA-NEXT: or a8, a9, a8
+; XTENSA-NEXT: srli a9, a8, 1
+; XTENSA-NEXT: and a9, a9, a7
+; XTENSA-NEXT: and a8, a8, a7
+; XTENSA-NEXT: slli a8, a8, 1
+; XTENSA-NEXT: or a3, a9, a8
+; XTENSA-NEXT: ret
+ %tmp = call i64 @llvm.bitreverse.i64(i64 %a)
+ %tmp2 = call i64 @llvm.bswap.i64(i64 %tmp)
+ ret i64 %tmp2
+}
diff --git a/llvm/test/CodeGen/Xtensa/ctlz-cttz-ctpop.ll b/llvm/test/CodeGen/Xtensa/ctlz-cttz-ctpop.ll
new file mode 100644
index 0000000000000..030f2a0fbfdc7
--- /dev/null
+++ b/llvm/test/CodeGen/Xtensa/ctlz-cttz-ctpop.ll
@@ -0,0 +1,531 @@
+; RUN: llc -mtriple=xtensa -verify-machineinstrs < %s \
+; RUN: | FileCheck -check-prefix=XTENSA %s
+
+declare i8 @llvm.cttz.i8(i8, i1)
+declare i16 @llvm.cttz.i16(i16, i1)
+declare i32 @llvm.cttz.i32(i32, i1)
+declare i64 @llvm.cttz.i64(i64, i1)
+declare i8 @llvm.ctlz.i8(i8, i1)
+declare i16 @llvm.ctlz.i16(i16, i1)
+declare i32 @llvm.ctlz.i32(i32, i1)
+declare i64 @llvm.ctlz.i64(i64, i1)
+declare i8 @llvm.ctpop.i8(i8)
+declare i16 @llvm.ctpop.i16(i16)
+declare i32 @llvm.ctpop.i32(i32)
+declare i64 @llvm.ctpop.i64(i64)
+
+define i8 @test_cttz_i8(i8 %a) nounwind {
+; XTENSA-LABEL: test_cttz_i8:
+; XTENSA: movi a8, 255
+; XTENSA-NEXT: and a9, a2, a8
+; XTENSA-NEXT: movi a8, 8
+; XTENSA-NEXT: beqz a9, .LBB0_2
+; XTENSA-NEXT: j .LBB0_1
+; XTENSA-NEXT: .LBB0_1: # %cond.false
+; XTENSA-NEXT: movi a8, -1
+; XTENSA-NEXT: xor a8, a2, a8
+; XTENSA-NEXT: addi a9, a2, -1
+; XTENSA-NEXT: and a8, a8, a9
+; XTENSA-NEXT: srli a9, a8, 1
+; XTENSA-NEXT: movi a10, 85
+; XTENSA-NEXT: and a9, a9, a10
+; XTENSA-NEXT: sub a8, a8, a9
+; XTENSA-NEXT: movi a9, 51
+; XTENSA-NEXT: and a10, a8, a9
+; XTENSA-NEXT: srli a8, a8, 2
+; XTENSA-NEXT: and a8, a8, a9
+; XTENSA-NEXT: add a8, a10, a8
+; XTENSA-NEXT: srli a9, a8, 4
+; XTENSA-NEXT: add a8, a8, a9
+; XTENSA-NEXT: movi a9, 15
+; XTENSA-NEXT: and a8, a8, a9
+; XTENSA-NEXT: .LBB0_2: # %cond.end
+; XTENSA-NEXT: or a2, a8, a8
+; XTENSA-NEXT: ret
+ %tmp = call i8 @llvm.cttz.i8(i8 %a, i1 false)
+ ret i8 %tmp
+}
+
+define i16 @test_cttz_i16(i16 %a) nounwind {
+; XTENSA-LABEL: test_cttz_i16:
+; XTENSA: l32r a8, .LCPI1_0
+; XTENSA-NEXT: and a9, a2, a8
+; XTENSA-NEXT: movi a8, 16
+; XTENSA-NEXT: beqz a9, .LBB1_2
+; XTENSA-NEXT: j .LBB1_1
+; XTENSA-NEXT: .LBB1_1: # %cond.false
+; XTENSA-NEXT: movi a8, -1
+; XTENSA-NEXT: xor a8, a2, a8
+; XTENSA-NEXT: addi a9, a2, -1
+; XTENSA-NEXT: and a8, a8, a9
+; XTENSA-NEXT: srli a9, a8, 1
+; XTENSA-NEXT: l32r a10, .LCPI1_1
+; XTENSA-NEXT: and a9, a9, a10
+; XTENSA-NEXT: sub a8, a8, a9
+; XTENSA-NEXT: l32r a9, .LCPI1_2
+; XTENSA-NEXT: and a10, a8, a9
+; XTENSA-NEXT: srli a8, a8, 2
+; XTENSA-NEXT: and a8, a8, a9
+; XTENSA-NEXT: add a8, a10, a8
+; XTENSA-NEXT: srli a9, a8, 4
+; XTENSA-NEXT: add a8, a8, a9
+; XTENSA-NEXT: movi a9, 15
+; XTENSA-NEXT: and a9, a8, a9
+; XTENSA-NEXT: l32r a10, .LCPI1_3
+; XTENSA-NEXT: and a8, a8, a10
+; XTENSA-NEXT: srli a8, a8, 8
+; XTENSA-NEXT: add a8, a9, a8
+; XTENSA-NEXT: .LBB1_2: # %cond.end
+; XTENSA-NEXT: or a2, a8, a8
+; XTENSA-NEXT: ret
+ %tmp = call i16 @llvm.cttz.i16(i16 %a, i1 false)
+ ret i16 %tmp
+}
+
+define i32 @test_cttz_i32(i32 %a) nounwind {
+; XTENSA-LABEL: test_cttz_i32:
+; XTENSA: addi a8, a1, -16
+; XTENSA-NEXT: or a1, a8, a8
+; XTENSA-NEXT: s32i a0, a1, 0 # 4-byte Folded Spill
+; XTENSA-NEXT: or a8, a2, a2
+; XTENSA-NEXT: movi a2, 32
+; XTENSA-NEXT: beqz a8, .LBB2_2
+; XTENSA-NEXT: j .LBB2_1
+; XTENSA-NEXT: .LBB2_1: # %cond.false
+; XTENSA-NEXT: neg a9, a8
+; XTENSA-NEXT: and a2, a8, a9
+; XTENSA-NEXT: l32r a3, .LCPI2_0
+; XTENSA-NEXT: l32r a8, .LCPI2_1
+; XTENSA-NEXT: callx0 a8
+; XTENSA-NEXT: movi a8, 27
+; XTENSA-NEXT: ssr a8
+; XTENSA-NEXT: srl a8, a2
+; XTENSA-NEXT: l32r a9, .LCPI2_2
+; XTENSA-NEXT: add a8, a9, a8
+; XTENSA-NEXT: l8ui a2, a8, 0
+; XTENSA-NEXT: .LBB2_2: # %cond.end
+; XTENSA-NEXT: l32i a0, a1, 0 # 4-byte Folded Reload
+; XTENSA-NEXT: addi a8, a1, 16
+; XTENSA-NEXT: or a1, a8, a8
+; XTENSA-NEXT: ret
+ %tmp = call i32 @llvm.cttz.i32(i32 %a, i1 false)
+ ret i32 %tmp
+}
+
+define i8 @test_cttz_i8_zero_undef(i8 %a) nounwind {
+; XTENSA-LABEL: test_cttz_i8_zero_undef:
+; XTENSA: movi a8, -1
+; XTENSA-NEXT: xor a8, a2, a8
+; XTENSA-NEXT: addi a9, a2, -1
+; XTENSA-NEXT: and a8, a8, a9
+; XTENSA-NEXT: srli a9, a8, 1
+; XTENSA-NEXT: movi a10, 85
+; XTENSA-NEXT: and a9, a9, a10
+; XTENSA-NEXT: sub a8, a8, a9
+; XTENSA-NEXT: movi a9, 51
+; XTENSA-NEXT: and a10, a8, a9
+; XTENSA-NEXT: srli a8, a8, 2
+; XTENSA-NEXT: and a8, a8, a9
+; XTENSA-NEXT: add a8, a10, a8
+; XTENSA-NEXT: srli a9, a8, 4
+; XTENSA-NEXT: add a8, a8, a9
+; XTENSA-NEXT: movi a9, 15
+; XTENSA-NEXT: and a2, a8, a9
+; XTENSA-NEXT: ret
+ %tmp = call i8 @llvm.cttz.i8(i8 %a, i1 true)
+ ret i8 %tmp
+}
+
+define i16 @test_cttz_i16_zero_undef(i16 %a) nounwind {
+; XTENSA-LABEL: test_cttz_i16_zero_undef:
+; XTENSA: movi a8, -1
+; XTENSA-NEXT: xor a8, a2, a8
+; XTENSA-NEXT: addi a9, a2, -1
+; XTENSA-NEXT: and a8, a8, a9
+; XTENSA-NEXT: srli a9, a8, 1
+; XTENSA-NEXT: l32r a10, .LCPI4_0
+; XTENSA-NEXT: and a9, a9, a10
+; XTENSA-NEXT: sub a8, a8, a9
+; XTENSA-NEXT: l32r a9, .LCPI4_1
+; XTENSA-NEXT: and a10, a8, a9
+; XTENSA-NEXT: srli a8, a8, 2
+; XTENSA-NEXT: and a8, a8, a9
+; XTENSA-NEXT: add a8, a10, a8
+; XTENSA-NEXT: srli a9, a8, 4
+; XTENSA-NEXT: add a8, a8, a9
+; XTENSA-NEXT: movi a9, 15
+; XTENSA-NEXT: and a9, a8, a9
+; XTENSA-NEXT: l32r a10, .LCPI4_2
+; XTENSA-NEXT: and a8, a8, a10
+; XTENSA-NEXT: srli a8, a8, 8
+; XTENSA-NEXT: add a2, a9, a8
+; XTENSA-NEXT: ret
+ %tmp = call i16 @llvm.cttz.i16(i16 %a, i1 true)
+ ret i16 %tmp
+}
+
+define i32 @test_cttz_i32_zero_undef(i32 %a) nounwind {
+; XTENSA-LABEL: test_cttz_i32_zero_undef:
+; XTENSA: addi a8, a1, -16
+; XTENSA-NEXT: or a1, a8, a8
+; XTENSA-NEXT: s32i a0, a1, 0 # 4-byte Folded Spill
+; XTENSA-NEXT: neg a8, a2
+; XTENSA-NEXT: and a2, a2, a8
+; XTENSA-NEXT: l32r a3, .LCPI5_0
+; XTENSA-NEXT: l32r a8, .LCPI5_1
+; XTENSA-NEXT: callx0 a8
+; XTENSA-NEXT: movi a8, 27
+; XTENSA-NEXT: ssr a8
+; XTENSA-NEXT: srl a8, a2
+; XTENSA-NEXT: l32r a9, .LCPI5_2
+; XTENSA-NEXT: add a8, a9, a8
+; XTENSA-NEXT: l8ui a2, a8, 0
+; XTENSA-NEXT: l32i a0, a1, 0 # 4-byte Folded Reload
+; XTENSA-NEXT: addi a8, a1, 16
+; XTENSA-NEXT: or a1, a8, a8
+; XTENSA-NEXT: ret
+ %tmp = call i32 @llvm.cttz.i32(i32 %a, i1 true)
+ ret i32 %tmp
+}
+
+define i8 @test_ctlz_i8(i8 %a) nounwind {
+; XTENSA-LABEL: test_ctlz_i8:
+; XTENSA: movi a8, 255
+; XTENSA-NEXT: and a9, a2, a8
+; XTENSA-NEXT: movi a8, 8
+; XTENSA-NEXT: beqz a9, .LBB6_2
+; XTENSA-NEXT: j .LBB6_1
+; XTENSA-NEXT: .LBB6_1: # %cond.false
+; XTENSA-NEXT: movi a8, 254
+; XTENSA-NEXT: and a8, a2, a8
+; XTENSA-NEXT: srli a8, a8, 1
+; XTENSA-NEXT: or a8, a2, a8
+; XTENSA-NEXT: movi a9, 252
+; XTENSA-NEXT: and a9, a8, a9
+; XTENSA-NEXT: srli a9, a9, 2
+; XTENSA-NEXT: or a8, a8, a9
+; XTENSA-NEXT: movi a9, 240
+; XTENSA-NEXT: and a9, a8, a9
+; XTENSA-NEXT: srli a9, a9, 4
+; XTENSA-NEXT: or a8, a8, a9
+; XTENSA-NEXT: movi a9, -1
+; XTENSA-NEXT: xor a8, a8, a9
+; XTENSA-NEXT: srli a9, a8, 1
+; XTENSA-NEXT: movi a10, 85
+; XTENSA-NEXT: and a9, a9, a10
+; XTENSA-NEXT: sub a8, a8, a9
+; XTENSA-NEXT: movi a9, 51
+; XTENSA-NEXT: and a10, a8, a9
+; XTENSA-NEXT: srli a8, a8, 2
+; XTENSA-NEXT: and a8, a8, a9
+; XTENSA-NEXT: add a8, a10, a8
+; XTENSA-NEXT: srli a9, a8, 4
+; XTENSA-NEXT: add a8, a8, a9
+; XTENSA-NEXT: movi a9, 15
+; XTENSA-NEXT: and a8, a8, a9
+; XTENSA-NEXT: .LBB6_2: # %cond.end
+; XTENSA-NEXT: or a2, a8, a8
+; XTENSA-NEXT: ret
+ %tmp = call i8 @llvm.ctlz.i8(i8 %a, i1 false)
+ ret i8 %tmp
+}
+
+define i16 @test_ctlz_i16(i16 %a) nounwind {
+; XTENSA-LABEL: test_ctlz_i16:
+; XTENSA: l32r a8, .LCPI7_0
+; XTENSA-NEXT: and a9, a2, a8
+; XTENSA-NEXT: movi a8, 16
+; XTENSA-NEXT: beqz a9, .LBB7_2
+; XTENSA-NEXT: j .LBB7_1
+; XTENSA-NEXT: .LBB7_1: # %cond.false
+; XTENSA-NEXT: l32r a8, .LCPI7_1
+; XTENSA-NEXT: and a8, a2, a8
+; XTENSA-NEXT: srli a8, a8, 1
+; XTENSA-NEXT: or a8, a2, a8
+; XTENSA-NEXT: l32r a9, .LCPI7_2
+; XTENSA-NEXT: and a9, a8, a9
+; XTENSA-NEXT: srli a9, a9, 2
+; XTENSA-NEXT: or a8, a8, a9
+; XTENSA-NEXT: l32r a9, .LCPI7_3
+; XTENSA-NEXT: and a9, a8, a9
+; XTENSA-NEXT: srli a9, a9, 4
+; XTENSA-NEXT: or a8, a8, a9
+; XTENSA-NEXT: l32r a9, .LCPI7_4
+; XTENSA-NEXT: and a9, a8, a9
+; XTENSA-NEXT: srli a9, a9, 8
+; XTENSA-NEXT: or a8, a8, a9
+; XTENSA-NEXT: movi a9, -1
+; XTENSA-NEXT: xor a8, a8, a9
+; XTENSA-NEXT: srli a9, a8, 1
+; XTENSA-NEXT: l32r a10, .LCPI7_5
+; XTENSA-NEXT: and a9, a9, a10
+; XTENSA-NEXT: sub a8, a8, a9
+; XTENSA-NEXT: l32r a9, .LCPI7_6
+; XTENSA-NEXT: and a10, a8, a9
+; XTENSA-NEXT: srli a8, a8, 2
+; XTENSA-NEXT: and a8, a8, a9
+; XTENSA-NEXT: add a8, a10, a8
+; XTENSA-NEXT: srli a9, a8, 4
+; XTENSA-NEXT: add a8, a8, a9
+; XTENSA-NEXT: movi a9, 15
+; XTENSA-NEXT: and a9, a8, a9
+; XTENSA-NEXT: l32r a10, .LCPI7_7
+; XTENSA-NEXT: and a8, a8, a10
+; XTENSA-NEXT: srli a8, a8, 8
+; XTENSA-NEXT: add a8, a9, a8
+; XTENSA-NEXT: .LBB7_2: # %cond.end
+; XTENSA-NEXT: or a2, a8, a8
+; XTENSA-NEXT: ret
+ %tmp = call i16 @llvm.ctlz.i16(i16 %a, i1 false)
+ ret i16 %tmp
+}
+
+define i32 @test_ctlz_i32(i32 %a) nounwind {
+; XTENSA-LABEL: test_ctlz_i32:
+; XTENSA: addi a8, a1, -16
+; XTENSA-NEXT: or a1, a8, a8
+; XTENSA-NEXT: s32i a0, a1, 0 # 4-byte Folded Spill
+; XTENSA-NEXT: or a8, a2, a2
+; XTENSA-NEXT: movi a2, 32
+; XTENSA-NEXT: beqz a8, .LBB8_2
+; XTENSA-NEXT: j .LBB8_1
+; XTENSA-NEXT: .LBB8_1: # %cond.false
+; XTENSA-NEXT: srli a9, a8, 1
+; XTENSA-NEXT: or a8, a8, a9
+; XTENSA-NEXT: srli a9, a8, 2
+; XTENSA-NEXT: or a8, a8, a9
+; XTENSA-NEXT: srli a9, a8, 4
+; XTENSA-NEXT: or a8, a8, a9
+; XTENSA-NEXT: srli a9, a8, 8
+; XTENSA-NEXT: or a8, a8, a9
+; XTENSA-NEXT: movi a9, 16
+; XTENSA-NEXT: ssr a9
+; XTENSA-NEXT: srl a9, a8
+; XTENSA-NEXT: or a8, a8, a9
+; XTENSA-NEXT: movi a9, -1
+; XTENSA-NEXT: xor a8, a8, a9
+; XTENSA-NEXT: srli a9, a8, 1
+; XTENSA-NEXT: l32r a10, .LCPI8_0
+; XTENSA-NEXT: and a9, a9, a10
+; XTENSA-NEXT: sub a8, a8, a9
+; XTENSA-NEXT: l32r a9, .LCPI8_1
+; XTENSA-NEXT: and a10, a8, a9
+; XTENSA-NEXT: srli a8, a8, 2
+; XTENSA-NEXT: and a8, a8, a9
+; XTENSA-NEXT: add a8, a10, a8
+; XTENSA-NEXT: srli a9, a8, 4
+; XTENSA-NEXT: add a8, a8, a9
+; XTENSA-NEXT: l32r a9, .LCPI8_2
+; XTENSA-NEXT: and a2, a8, a9
+; XTENSA-NEXT: l32r a3, .LCPI8_3
+; XTENSA-NEXT: l32r a8, .LCPI8_4
+; XTENSA-NEXT: callx0 a8
+; XTENSA-NEXT: movi a8, 24
+; XTENSA-NEXT: ssr a8
+; XTENSA-NEXT: srl a2, a2
+; XTENSA-NEXT: .LBB8_2: # %cond.end
+; XTENSA-NEXT: l32i a0, a1, 0 # 4-byte Folded Reload
+; XTENSA-NEXT: addi a8, a1, 16
+; XTENSA-NEXT: or a1, a8, a8
+; XTENSA-NEXT: ret
+ %tmp = call i32 @llvm.ctlz.i32(i32 %a, i1 false)
+ ret i32 %tmp
+}
+
+define i8 @test_ctlz_i8_zero_undef(i8 %a) nounwind {
+; XTENSA-LABEL: test_ctlz_i8_zero_undef:
+; XTENSA: movi a8, 254
+; XTENSA-NEXT: and a8, a2, a8
+; XTENSA-NEXT: srli a8, a8, 1
+; XTENSA-NEXT: or a8, a2, a8
+; XTENSA-NEXT: movi a9, 252
+; XTENSA-NEXT: and a9, a8, a9
+; XTENSA-NEXT: srli a9, a9, 2
+; XTENSA-NEXT: or a8, a8, a9
+; XTENSA-NEXT: movi a9, 240
+; XTENSA-NEXT: and a9, a8, a9
+; XTENSA-NEXT: srli a9, a9, 4
+; XTENSA-NEXT: or a8, a8, a9
+; XTENSA-NEXT: movi a9, -1
+; XTENSA-NEXT: xor a8, a8, a9
+; XTENSA-NEXT: srli a9, a8, 1
+; XTENSA-NEXT: movi a10, 85
+; XTENSA-NEXT: and a9, a9, a10
+; XTENSA-NEXT: sub a8, a8, a9
+; XTENSA-NEXT: movi a9, 51
+; XTENSA-NEXT: and a10, a8, a9
+; XTENSA-NEXT: srli a8, a8, 2
+; XTENSA-NEXT: and a8, a8, a9
+; XTENSA-NEXT: add a8, a10, a8
+; XTENSA-NEXT: srli a9, a8, 4
+; XTENSA-NEXT: add a8, a8, a9
+; XTENSA-NEXT: movi a9, 15
+; XTENSA-NEXT: and a2, a8, a9
+; XTENSA-NEXT: ret
+ %tmp = call i8 @llvm.ctlz.i8(i8 %a, i1 true)
+ ret i8 %tmp
+}
+
+define i16 @test_ctlz_i16_zero_undef(i16 %a) nounwind {
+; XTENSA-LABEL: test_ctlz_i16_zero_undef:
+; XTENSA: l32r a8, .LCPI10_0
+; XTENSA-NEXT: and a8, a2, a8
+; XTENSA-NEXT: srli a8, a8, 1
+; XTENSA-NEXT: or a8, a2, a8
+; XTENSA-NEXT: l32r a9, .LCPI10_1
+; XTENSA-NEXT: and a9, a8, a9
+; XTENSA-NEXT: srli a9, a9, 2
+; XTENSA-NEXT: or a8, a8, a9
+; XTENSA-NEXT: l32r a9, .LCPI10_2
+; XTENSA-NEXT: and a9, a8, a9
+; XTENSA-NEXT: srli a9, a9, 4
+; XTENSA-NEXT: or a8, a8, a9
+; XTENSA-NEXT: l32r a9, .LCPI10_3
+; XTENSA-NEXT: and a9, a8, a9
+; XTENSA-NEXT: srli a9, a9, 8
+; XTENSA-NEXT: or a8, a8, a9
+; XTENSA-NEXT: movi a9, -1
+; XTENSA-NEXT: xor a8, a8, a9
+; XTENSA-NEXT: srli a9, a8, 1
+; XTENSA-NEXT: l32r a10, .LCPI10_4
+; XTENSA-NEXT: and a9, a9, a10
+; XTENSA-NEXT: sub a8, a8, a9
+; XTENSA-NEXT: l32r a9, .LCPI10_5
+; XTENSA-NEXT: and a10, a8, a9
+; XTENSA-NEXT: srli a8, a8, 2
+; XTENSA-NEXT: and a8, a8, a9
+; XTENSA-NEXT: add a8, a10, a8
+; XTENSA-NEXT: srli a9, a8, 4
+; XTENSA-NEXT: add a8, a8, a9
+; XTENSA-NEXT: movi a9, 15
+; XTENSA-NEXT: and a9, a8, a9
+; XTENSA-NEXT: l32r a10, .LCPI10_6
+; XTENSA-NEXT: and a8, a8, a10
+; XTENSA-NEXT: srli a8, a8, 8
+; XTENSA-NEXT: add a2, a9, a8
+; XTENSA-NEXT: ret
+ %tmp = call i16 @llvm.ctlz.i16(i16 %a, i1 true)
+ ret i16 %tmp
+}
+
+define i32 @test_ctlz_i32_zero_undef(i32 %a) nounwind {
+; XTENSA-LABEL: test_ctlz_i32_zero_undef:
+; XTENSA: addi a8, a1, -16
+; XTENSA-NEXT: or a1, a8, a8
+; XTENSA-NEXT: s32i a0, a1, 0 # 4-byte Folded Spill
+; XTENSA-NEXT: srli a8, a2, 1
+; XTENSA-NEXT: or a8, a2, a8
+; XTENSA-NEXT: srli a9, a8, 2
+; XTENSA-NEXT: or a8, a8, a9
+; XTENSA-NEXT: srli a9, a8, 4
+; XTENSA-NEXT: or a8, a8, a9
+; XTENSA-NEXT: srli a9, a8, 8
+; XTENSA-NEXT: or a8, a8, a9
+; XTENSA-NEXT: movi a9, 16
+; XTENSA-NEXT: ssr a9
+; XTENSA-NEXT: srl a9, a8
+; XTENSA-NEXT: or a8, a8, a9
+; XTENSA-NEXT: movi a9, -1
+; XTENSA-NEXT: xor a8, a8, a9
+; XTENSA-NEXT: srli a9, a8, 1
+; XTENSA-NEXT: l32r a10, .LCPI11_0
+; XTENSA-NEXT: and a9, a9, a10
+; XTENSA-NEXT: sub a8, a8, a9
+; XTENSA-NEXT: l32r a9, .LCPI11_1
+; XTENSA-NEXT: and a10, a8, a9
+; XTENSA-NEXT: srli a8, a8, 2
+; XTENSA-NEXT: and a8, a8, a9
+; XTENSA-NEXT: add a8, a10, a8
+; XTENSA-NEXT: srli a9, a8, 4
+; XTENSA-NEXT: add a8, a8, a9
+; XTENSA-NEXT: l32r a9, .LCPI11_2
+; XTENSA-NEXT: and a2, a8, a9
+; XTENSA-NEXT: l32r a3, .LCPI11_3
+; XTENSA-NEXT: l32r a8, .LCPI11_4
+; XTENSA-NEXT: callx0 a8
+; XTENSA-NEXT: movi a8, 24
+; XTENSA-NEXT: ssr a8
+; XTENSA-NEXT: srl a2, a2
+; XTENSA-NEXT: l32i a0, a1, 0 # 4-byte Folded Reload
+; XTENSA-NEXT: addi a8, a1, 16
+; XTENSA-NEXT: or a1, a8, a8
+; XTENSA-NEXT: ret
+ %tmp = call i32 @llvm.ctlz.i32(i32 %a, i1 true)
+ ret i32 %tmp
+}
+
+define i8 @test_ctpop_i8(i8 %a) nounwind {
+; XTENSA-LABEL: test_ctpop_i8:
+; XTENSA: srli a8, a2, 1
+; XTENSA-NEXT: movi a9, 85
+; XTENSA-NEXT: and a8, a8, a9
+; XTENSA-NEXT: sub a8, a2, a8
+; XTENSA-NEXT: movi a9, 51
+; XTENSA-NEXT: and a10, a8, a9
+; XTENSA-NEXT: srli a8, a8, 2
+; XTENSA-NEXT: and a8, a8, a9
+; XTENSA-NEXT: add a8, a10, a8
+; XTENSA-NEXT: srli a9, a8, 4
+; XTENSA-NEXT: add a8, a8, a9
+; XTENSA-NEXT: movi a9, 15
+; XTENSA-NEXT: and a2, a8, a9
+; XTENSA-NEXT: ret
+ %1 = call i8 @llvm.ctpop.i8(i8 %a)
+ ret i8 %1
+}
+
+define i16 @test_ctpop_i16(i16 %a) nounwind {
+; XTENSA-LABEL: test_ctpop_i16:
+; XTENSA: srli a8, a2, 1
+; XTENSA-NEXT: l32r a9, .LCPI13_0
+; XTENSA-NEXT: and a8, a8, a9
+; XTENSA-NEXT: sub a8, a2, a8
+; XTENSA-NEXT: l32r a9, .LCPI13_1
+; XTENSA-NEXT: and a10, a8, a9
+; XTENSA-NEXT: srli a8, a8, 2
+; XTENSA-NEXT: and a8, a8, a9
+; XTENSA-NEXT: add a8, a10, a8
+; XTENSA-NEXT: srli a9, a8, 4
+; XTENSA-NEXT: add a8, a8, a9
+; XTENSA-NEXT: movi a9, 15
+; XTENSA-NEXT: and a9, a8, a9
+; XTENSA-NEXT: l32r a10, .LCPI13_2
+; XTENSA-NEXT: and a8, a8, a10
+; XTENSA-NEXT: srli a8, a8, 8
+; XTENSA-NEXT: add a2, a9, a8
+; XTENSA-NEXT: ret
+ %1 = call i16 @llvm.ctpop.i16(i16 %a)
+ ret i16 %1
+}
+
+define i32 @test_ctpop_i32(i32 %a) nounwind {
+; XTENSA-LABEL: test_ctpop_i32:
+; XTENSA: addi a8, a1, -16
+; XTENSA-NEXT: or a1, a8, a8
+; XTENSA-NEXT: s32i a0, a1, 0 # 4-byte Folded Spill
+; XTENSA-NEXT: srli a8, a2, 1
+; XTENSA-NEXT: l32r a9, .LCPI14_0
+; XTENSA-NEXT: and a8, a8, a9
+; XTENSA-NEXT: sub a8, a2, a8
+; XTENSA-NEXT: l32r a9, .LCPI14_1
+; XTENSA-NEXT: and a10, a8, a9
+; XTENSA-NEXT: srli a8, a8, 2
+; XTENSA-NEXT: and a8, a8, a9
+; XTENSA-NEXT: add a8, a10, a8
+; XTENSA-NEXT: srli a9, a8, 4
+; XTENSA-NEXT: add a8, a8, a9
+; XTENSA-NEXT: l32r a9, .LCPI14_2
+; XTENSA-NEXT: and a2, a8, a9
+; XTENSA-NEXT: l32r a3, .LCPI14_3
+; XTENSA-NEXT: l32r a8, .LCPI14_4
+; XTENSA-NEXT: callx0 a8
+; XTENSA-NEXT: movi a8, 24
+; XTENSA-NEXT: ssr a8
+; XTENSA-NEXT: srl a2, a2
+; XTENSA-NEXT: l32i a0, a1, 0 # 4-byte Folded Reload
+; XTENSA-NEXT: addi a8, a1, 16
+; XTENSA-NEXT: or a1, a8, a8
+; XTENSA-NEXT: ret
+ %1 = call i32 @llvm.ctpop.i32(i32 %a)
+ ret i32 %1
+}
diff --git a/llvm/test/CodeGen/Xtensa/div.ll b/llvm/test/CodeGen/Xtensa/div.ll
new file mode 100644
index 0000000000000..fcb58eb5bff53
--- /dev/null
+++ b/llvm/test/CodeGen/Xtensa/div.ll
@@ -0,0 +1,491 @@
+; RUN: llc -mtriple=xtensa -verify-machineinstrs < %s \
+; RUN: | FileCheck -check-prefix=XTENSA %s
+
+define i32 @udiv(i32 %a, i32 %b) nounwind {
+; XTENSA-LABEL: udiv:
+; XTENSA: addi a8, a1, -16
+; XTENSA-NEXT: or a1, a8, a8
+; XTENSA-NEXT: s32i a0, a1, 0 # 4-byte Folded Spill
+; XTENSA-NEXT: l32r a8, .LCPI0_0
+; XTENSA-NEXT: callx0 a8
+; XTENSA-NEXT: l32i a0, a1, 0 # 4-byte Folded Reload
+; XTENSA-NEXT: addi a8, a1, 16
+; XTENSA-NEXT: or a1, a8, a8
+; XTENSA-NEXT: ret
+ %1 = udiv i32 %a, %b
+ ret i32 %1
+}
+
+define i32 @udiv_constant(i32 %a) nounwind {
+; XTENSA-LABEL: udiv_constant:
+; XTENSA: addi a8, a1, -16
+; XTENSA-NEXT: or a1, a8, a8
+; XTENSA-NEXT: s32i a0, a1, 0 # 4-byte Folded Spill
+; XTENSA-NEXT: movi a3, 5
+; XTENSA-NEXT: l32r a8, .LCPI1_0
+; XTENSA-NEXT: callx0 a8
+; XTENSA-NEXT: l32i a0, a1, 0 # 4-byte Folded Reload
+; XTENSA-NEXT: addi a8, a1, 16
+; XTENSA-NEXT: or a1, a8, a8
+; XTENSA-NEXT: ret
+ %1 = udiv i32 %a, 5
+ ret i32 %1
+}
+
+define i32 @udiv_pow2(i32 %a) nounwind {
+; XTENSA-LABEL: udiv_pow2:
+; XTENSA: srli a2, a2, 3
+; XTENSA-NEXT: ret
+ %1 = udiv i32 %a, 8
+ ret i32 %1
+}
+
+define i32 @udiv_constant_lhs(i32 %a) nounwind {
+; XTENSA-LABEL: udiv_constant_lhs:
+; XTENSA: addi a8, a1, -16
+; XTENSA-NEXT: or a1, a8, a8
+; XTENSA-NEXT: s32i a0, a1, 0 # 4-byte Folded Spill
+; XTENSA-NEXT: or a3, a2, a2
+; XTENSA-NEXT: movi a2, 10
+; XTENSA-NEXT: l32r a8, .LCPI3_0
+; XTENSA-NEXT: callx0 a8
+; XTENSA-NEXT: l32i a0, a1, 0 # 4-byte Folded Reload
+; XTENSA-NEXT: addi a8, a1, 16
+; XTENSA-NEXT: or a1, a8, a8
+; XTENSA-NEXT: ret
+ %1 = udiv i32 10, %a
+ ret i32 %1
+}
+
+define i64 @udiv64(i64 %a, i64 %b) nounwind {
+; XTENSA-LABEL: udiv64:
+; XTENSA: addi a8, a1, -16
+; XTENSA-NEXT: or a1, a8, a8
+; XTENSA-NEXT: s32i a0, a1, 0 # 4-byte Folded Spill
+; XTENSA-NEXT: l32r a8, .LCPI4_0
+; XTENSA-NEXT: callx0 a8
+; XTENSA-NEXT: l32i a0, a1, 0 # 4-byte Folded Reload
+; XTENSA-NEXT: addi a8, a1, 16
+; XTENSA-NEXT: or a1, a8, a8
+; XTENSA-NEXT: ret
+ %1 = udiv i64 %a, %b
+ ret i64 %1
+}
+
+define i64 @udiv64_constant(i64 %a) nounwind {
+; XTENSA-LABEL: udiv64_constant:
+; XTENSA: addi a8, a1, -16
+; XTENSA-NEXT: or a1, a8, a8
+; XTENSA-NEXT: s32i a0, a1, 0 # 4-byte Folded Spill
+; XTENSA-NEXT: movi a4, 5
+; XTENSA-NEXT: movi a5, 0
+; XTENSA-NEXT: l32r a8, .LCPI5_0
+; XTENSA-NEXT: callx0 a8
+; XTENSA-NEXT: l32i a0, a1, 0 # 4-byte Folded Reload
+; XTENSA-NEXT: addi a8, a1, 16
+; XTENSA-NEXT: or a1, a8, a8
+; XTENSA-NEXT: ret
+ %1 = udiv i64 %a, 5
+ ret i64 %1
+}
+
+define i64 @udiv64_constant_lhs(i64 %a) nounwind {
+; XTENSA-LABEL: udiv64_constant_lhs:
+; XTENSA: addi a8, a1, -16
+; XTENSA-NEXT: or a1, a8, a8
+; XTENSA-NEXT: s32i a0, a1, 0 # 4-byte Folded Spill
+; XTENSA-NEXT: or a5, a3, a3
+; XTENSA-NEXT: or a4, a2, a2
+; XTENSA-NEXT: movi a2, 10
+; XTENSA-NEXT: movi a3, 0
+; XTENSA-NEXT: l32r a8, .LCPI6_0
+; XTENSA-NEXT: callx0 a8
+; XTENSA-NEXT: l32i a0, a1, 0 # 4-byte Folded Reload
+; XTENSA-NEXT: addi a8, a1, 16
+; XTENSA-NEXT: or a1, a8, a8
+; XTENSA-NEXT: ret
+ %1 = udiv i64 10, %a
+ ret i64 %1
+}
+
+define i8 @udiv8(i8 %a, i8 %b) nounwind {
+; XTENSA-LABEL: udiv8:
+; XTENSA: addi a8, a1, -16
+; XTENSA-NEXT: or a1, a8, a8
+; XTENSA-NEXT: s32i a0, a1, 0 # 4-byte Folded Spill
+; XTENSA-NEXT: movi a8, 255
+; XTENSA-NEXT: and a2, a2, a8
+; XTENSA-NEXT: and a3, a3, a8
+; XTENSA-NEXT: l32r a8, .LCPI7_0
+; XTENSA-NEXT: callx0 a8
+; XTENSA-NEXT: l32i a0, a1, 0 # 4-byte Folded Reload
+; XTENSA-NEXT: addi a8, a1, 16
+; XTENSA-NEXT: or a1, a8, a8
+; XTENSA-NEXT: ret
+ %1 = udiv i8 %a, %b
+ ret i8 %1
+}
+
+define i8 @udiv8_constant(i8 %a) nounwind {
+; XTENSA-LABEL: udiv8_constant:
+; XTENSA: addi a8, a1, -16
+; XTENSA-NEXT: or a1, a8, a8
+; XTENSA-NEXT: s32i a0, a1, 0 # 4-byte Folded Spill
+; XTENSA-NEXT: movi a8, 255
+; XTENSA-NEXT: and a2, a2, a8
+; XTENSA-NEXT: movi a3, 5
+; XTENSA-NEXT: l32r a8, .LCPI8_0
+; XTENSA-NEXT: callx0 a8
+; XTENSA-NEXT: l32i a0, a1, 0 # 4-byte Folded Reload
+; XTENSA-NEXT: addi a8, a1, 16
+; XTENSA-NEXT: or a1, a8, a8
+; XTENSA-NEXT: ret
+ %1 = udiv i8 %a, 5
+ ret i8 %1
+}
+
+define i8 @udiv8_pow2(i8 %a) nounwind {
+; XTENSA-LABEL: udiv8_pow2:
+; XTENSA: movi a8, 248
+; XTENSA-NEXT: and a8, a2, a8
+; XTENSA-NEXT: srli a2, a8, 3
+; XTENSA-NEXT: ret
+ %1 = udiv i8 %a, 8
+ ret i8 %1
+}
+
+define i8 @udiv8_constant_lhs(i8 %a) nounwind {
+; XTENSA-LABEL: udiv8_constant_lhs:
+; XTENSA: addi a8, a1, -16
+; XTENSA-NEXT: or a1, a8, a8
+; XTENSA-NEXT: s32i a0, a1, 0 # 4-byte Folded Spill
+; XTENSA-NEXT: movi a8, 255
+; XTENSA-NEXT: and a3, a2, a8
+; XTENSA-NEXT: movi a2, 10
+; XTENSA-NEXT: l32r a8, .LCPI10_0
+; XTENSA-NEXT: callx0 a8
+; XTENSA-NEXT: l32i a0, a1, 0 # 4-byte Folded Reload
+; XTENSA-NEXT: addi a8, a1, 16
+; XTENSA-NEXT: or a1, a8, a8
+; XTENSA-NEXT: ret
+ %1 = udiv i8 10, %a
+ ret i8 %1
+}
+
+define i16 @udiv16(i16 %a, i16 %b) nounwind {
+; XTENSA-LABEL: udiv16:
+; XTENSA: addi a8, a1, -16
+; XTENSA-NEXT: or a1, a8, a8
+; XTENSA-NEXT: s32i a0, a1, 0 # 4-byte Folded Spill
+; XTENSA-NEXT: l32r a8, .LCPI11_0
+; XTENSA-NEXT: and a2, a2, a8
+; XTENSA-NEXT: and a3, a3, a8
+; XTENSA-NEXT: l32r a8, .LCPI11_1
+; XTENSA-NEXT: callx0 a8
+; XTENSA-NEXT: l32i a0, a1, 0 # 4-byte Folded Reload
+; XTENSA-NEXT: addi a8, a1, 16
+; XTENSA-NEXT: or a1, a8, a8
+; XTENSA-NEXT: ret
+ %1 = udiv i16 %a, %b
+ ret i16 %1
+}
+
+define i16 @udiv16_constant(i16 %a) nounwind {
+; XTENSA-LABEL: udiv16_constant:
+; XTENSA: addi a8, a1, -16
+; XTENSA-NEXT: or a1, a8, a8
+; XTENSA-NEXT: s32i a0, a1, 0 # 4-byte Folded Spill
+; XTENSA-NEXT: l32r a8, .LCPI12_0
+; XTENSA-NEXT: and a2, a2, a8
+; XTENSA-NEXT: movi a3, 5
+; XTENSA-NEXT: l32r a8, .LCPI12_1
+; XTENSA-NEXT: callx0 a8
+; XTENSA-NEXT: l32i a0, a1, 0 # 4-byte Folded Reload
+; XTENSA-NEXT: addi a8, a1, 16
+; XTENSA-NEXT: or a1, a8, a8
+; XTENSA-NEXT: ret
+ %1 = udiv i16 %a, 5
+ ret i16 %1
+}
+
+define i16 @udiv16_pow2(i16 %a) nounwind {
+; XTENSA-LABEL: udiv16_pow2:
+; XTENSA: l32r a8, .LCPI13_0
+; XTENSA-NEXT: and a8, a2, a8
+; XTENSA-NEXT: srli a2, a8, 3
+; XTENSA-NEXT: ret
+ %1 = udiv i16 %a, 8
+ ret i16 %1
+}
+
+define i32 @sdiv(i32 %a, i32 %b) nounwind {
+; XTENSA-LABEL: sdiv:
+; XTENSA: addi a8, a1, -16
+; XTENSA-NEXT: or a1, a8, a8
+; XTENSA-NEXT: s32i a0, a1, 0 # 4-byte Folded Spill
+; XTENSA-NEXT: l32r a8, .LCPI14_0
+; XTENSA-NEXT: callx0 a8
+; XTENSA-NEXT: l32i a0, a1, 0 # 4-byte Folded Reload
+; XTENSA-NEXT: addi a8, a1, 16
+; XTENSA-NEXT: or a1, a8, a8
+; XTENSA-NEXT: ret
+ %1 = sdiv i32 %a, %b
+ ret i32 %1
+}
+
+define i32 @sdiv_constant_lhs(i32 %a) nounwind {
+; XTENSA-LABEL: sdiv_constant_lhs:
+; XTENSA: addi a8, a1, -16
+; XTENSA-NEXT: or a1, a8, a8
+; XTENSA-NEXT: s32i a0, a1, 0 # 4-byte Folded Spill
+; XTENSA-NEXT: or a3, a2, a2
+; XTENSA-NEXT: movi a2, -10
+; XTENSA-NEXT: l32r a8, .LCPI15_0
+; XTENSA-NEXT: callx0 a8
+; XTENSA-NEXT: l32i a0, a1, 0 # 4-byte Folded Reload
+; XTENSA-NEXT: addi a8, a1, 16
+; XTENSA-NEXT: or a1, a8, a8
+; XTENSA-NEXT: ret
+ %1 = sdiv i32 -10, %a
+ ret i32 %1
+}
+
+define i64 @sdiv64(i64 %a, i64 %b) nounwind {
+; XTENSA-LABEL: sdiv64:
+; XTENSA: addi a8, a1, -16
+; XTENSA-NEXT: or a1, a8, a8
+; XTENSA-NEXT: s32i a0, a1, 0 # 4-byte Folded Spill
+; XTENSA-NEXT: l32r a8, .LCPI16_0
+; XTENSA-NEXT: callx0 a8
+; XTENSA-NEXT: l32i a0, a1, 0 # 4-byte Folded Reload
+; XTENSA-NEXT: addi a8, a1, 16
+; XTENSA-NEXT: or a1, a8, a8
+; XTENSA-NEXT: ret
+ %1 = sdiv i64 %a, %b
+ ret i64 %1
+}
+
+define i64 @sdiv64_constant(i64 %a) nounwind {
+; XTENSA-LABEL: sdiv64_constant:
+; XTENSA: addi a8, a1, -16
+; XTENSA-NEXT: or a1, a8, a8
+; XTENSA-NEXT: s32i a0, a1, 0 # 4-byte Folded Spill
+; XTENSA-NEXT: movi a4, 5
+; XTENSA-NEXT: movi a5, 0
+; XTENSA-NEXT: l32r a8, .LCPI17_0
+; XTENSA-NEXT: callx0 a8
+; XTENSA-NEXT: l32i a0, a1, 0 # 4-byte Folded Reload
+; XTENSA-NEXT: addi a8, a1, 16
+; XTENSA-NEXT: or a1, a8, a8
+; XTENSA-NEXT: ret
+ %1 = sdiv i64 %a, 5
+ ret i64 %1
+}
+
+define i64 @sdiv64_constant_lhs(i64 %a) nounwind {
+; XTENSA-LABEL: sdiv64_constant_lhs:
+; XTENSA: addi a8, a1, -16
+; XTENSA-NEXT: or a1, a8, a8
+; XTENSA-NEXT: s32i a0, a1, 0 # 4-byte Folded Spill
+; XTENSA-NEXT: or a5, a3, a3
+; XTENSA-NEXT: or a4, a2, a2
+; XTENSA-NEXT: movi a2, 10
+; XTENSA-NEXT: movi a3, 0
+; XTENSA-NEXT: l32r a8, .LCPI18_0
+; XTENSA-NEXT: callx0 a8
+; XTENSA-NEXT: l32i a0, a1, 0 # 4-byte Folded Reload
+; XTENSA-NEXT: addi a8, a1, 16
+; XTENSA-NEXT: or a1, a8, a8
+; XTENSA-NEXT: ret
+ %1 = sdiv i64 10, %a
+ ret i64 %1
+}
+
+
+define i64 @sdiv64_sext_operands(i32 %a, i32 %b) nounwind {
+; XTENSA-LABEL: sdiv64_sext_operands:
+; XTENSA: addi a8, a1, -16
+; XTENSA-NEXT: or a1, a8, a8
+; XTENSA-NEXT: s32i a0, a1, 0 # 4-byte Folded Spill
+; XTENSA-NEXT: or a4, a3, a3
+; XTENSA-NEXT: srai a3, a2, 31
+; XTENSA-NEXT: srai a5, a4, 31
+; XTENSA-NEXT: l32r a8, .LCPI19_0
+; XTENSA-NEXT: callx0 a8
+; XTENSA-NEXT: l32i a0, a1, 0 # 4-byte Folded Reload
+; XTENSA-NEXT: addi a8, a1, 16
+; XTENSA-NEXT: or a1, a8, a8
+; XTENSA-NEXT: ret
+ %1 = sext i32 %a to i64
+ %2 = sext i32 %b to i64
+ %3 = sdiv i64 %1, %2
+ ret i64 %3
+}
+
+define i8 @sdiv8(i8 %a, i8 %b) nounwind {
+; XTENSA-LABEL: sdiv8:
+; XTENSA: addi a8, a1, -16
+; XTENSA-NEXT: or a1, a8, a8
+; XTENSA-NEXT: s32i a0, a1, 0 # 4-byte Folded Spill
+; XTENSA-NEXT: slli a8, a2, 24
+; XTENSA-NEXT: srai a2, a8, 24
+; XTENSA-NEXT: slli a8, a3, 24
+; XTENSA-NEXT: srai a3, a8, 24
+; XTENSA-NEXT: l32r a8, .LCPI20_0
+; XTENSA-NEXT: callx0 a8
+; XTENSA-NEXT: l32i a0, a1, 0 # 4-byte Folded Reload
+; XTENSA-NEXT: addi a8, a1, 16
+; XTENSA-NEXT: or a1, a8, a8
+; XTENSA-NEXT: ret
+ %1 = sdiv i8 %a, %b
+ ret i8 %1
+}
+
+define i8 @sdiv8_constant(i8 %a) nounwind {
+; XTENSA-LABEL: sdiv8_constant:
+; XTENSA: addi a8, a1, -16
+; XTENSA-NEXT: or a1, a8, a8
+; XTENSA-NEXT: s32i a0, a1, 0 # 4-byte Folded Spill
+; XTENSA-NEXT: slli a8, a2, 24
+; XTENSA-NEXT: srai a2, a8, 24
+; XTENSA-NEXT: movi a3, 5
+; XTENSA-NEXT: l32r a8, .LCPI21_0
+; XTENSA-NEXT: callx0 a8
+; XTENSA-NEXT: l32i a0, a1, 0 # 4-byte Folded Reload
+; XTENSA-NEXT: addi a8, a1, 16
+; XTENSA-NEXT: or a1, a8, a8
+; XTENSA-NEXT: ret
+ %1 = sdiv i8 %a, 5
+ ret i8 %1
+}
+
+define i8 @sdiv8_pow2(i8 %a) nounwind {
+; XTENSA-LABEL: sdiv8_pow2:
+; XTENSA: slli a8, a2, 24
+; XTENSA-NEXT: srai a8, a8, 24
+; XTENSA-NEXT: srli a8, a8, 12
+; XTENSA-NEXT: movi a9, 7
+; XTENSA-NEXT: and a8, a8, a9
+; XTENSA-NEXT: add a8, a2, a8
+; XTENSA-NEXT: slli a8, a8, 24
+; XTENSA-NEXT: srai a2, a8, 27
+; XTENSA-NEXT: ret
+ %1 = sdiv i8 %a, 8
+ ret i8 %1
+}
+
+define i8 @sdiv8_constant_lhs(i8 %a) nounwind {
+; XTENSA-LABEL: sdiv8_constant_lhs:
+; XTENSA: addi a8, a1, -16
+; XTENSA-NEXT: or a1, a8, a8
+; XTENSA-NEXT: s32i a0, a1, 0 # 4-byte Folded Spill
+; XTENSA-NEXT: slli a8, a2, 24
+; XTENSA-NEXT: srai a3, a8, 24
+; XTENSA-NEXT: movi a2, -10
+; XTENSA-NEXT: l32r a8, .LCPI23_0
+; XTENSA-NEXT: callx0 a8
+; XTENSA-NEXT: l32i a0, a1, 0 # 4-byte Folded Reload
+; XTENSA-NEXT: addi a8, a1, 16
+; XTENSA-NEXT: or a1, a8, a8
+; XTENSA-NEXT: ret
+ %1 = sdiv i8 -10, %a
+ ret i8 %1
+}
+
+define i16 @sdiv16(i16 %a, i16 %b) nounwind {
+; XTENSA-LABEL: sdiv16:
+; XTENSA: addi a8, a1, -16
+; XTENSA-NEXT: or a1, a8, a8
+; XTENSA-NEXT: s32i a0, a1, 0 # 4-byte Folded Spill
+; XTENSA-NEXT: slli a8, a2, 16
+; XTENSA-NEXT: srai a2, a8, 16
+; XTENSA-NEXT: slli a8, a3, 16
+; XTENSA-NEXT: srai a3, a8, 16
+; XTENSA-NEXT: l32r a8, .LCPI24_0
+; XTENSA-NEXT: callx0 a8
+; XTENSA-NEXT: l32i a0, a1, 0 # 4-byte Folded Reload
+; XTENSA-NEXT: addi a8, a1, 16
+; XTENSA-NEXT: or a1, a8, a8
+; XTENSA-NEXT: ret
+ %1 = sdiv i16 %a, %b
+ ret i16 %1
+}
+
+define i16 @sdiv16_constant(i16 %a) nounwind {
+; XTENSA-LABEL: sdiv16_constant:
+; XTENSA: addi a8, a1, -16
+; XTENSA-NEXT: or a1, a8, a8
+; XTENSA-NEXT: s32i a0, a1, 0 # 4-byte Folded Spill
+; XTENSA-NEXT: slli a8, a2, 16
+; XTENSA-NEXT: srai a2, a8, 16
+; XTENSA-NEXT: movi a3, 5
+; XTENSA-NEXT: l32r a8, .LCPI25_0
+; XTENSA-NEXT: callx0 a8
+; XTENSA-NEXT: l32i a0, a1, 0 # 4-byte Folded Reload
+; XTENSA-NEXT: addi a8, a1, 16
+; XTENSA-NEXT: or a1, a8, a8
+; XTENSA-NEXT: ret
+ %1 = sdiv i16 %a, 5
+ ret i16 %1
+}
+
+define i16 @sdiv16_constant_lhs(i16 %a) nounwind {
+; XTENSA-LABEL: sdiv16_constant_lhs:
+; XTENSA: addi a8, a1, -16
+; XTENSA-NEXT: or a1, a8, a8
+; XTENSA-NEXT: s32i a0, a1, 0 # 4-byte Folded Spill
+; XTENSA-NEXT: slli a8, a2, 16
+; XTENSA-NEXT: srai a3, a8, 16
+; XTENSA-NEXT: movi a2, -10
+; XTENSA-NEXT: l32r a8, .LCPI26_0
+; XTENSA-NEXT: callx0 a8
+; XTENSA-NEXT: l32i a0, a1, 0 # 4-byte Folded Reload
+; XTENSA-NEXT: addi a8, a1, 16
+; XTENSA-NEXT: or a1, a8, a8
+; XTENSA-NEXT: ret
+ %1 = sdiv i16 -10, %a
+ ret i16 %1
+}
+
+define i32 @sdiv_pow2(i32 %a) nounwind {
+; XTENSA-LABEL: sdiv_pow2:
+; XTENSA: srai a8, a2, 31
+; XTENSA-NEXT: movi a9, 29
+; XTENSA-NEXT: ssr a9
+; XTENSA-NEXT: srl a8, a8
+; XTENSA-NEXT: add a8, a2, a8
+; XTENSA-NEXT: srai a2, a8, 3
+; XTENSA-NEXT: ret
+ %1 = sdiv i32 %a, 8
+ ret i32 %1
+}
+
+define i32 @sdiv_pow2_2(i32 %a) nounwind {
+; XTENSA-LABEL: sdiv_pow2_2:
+; XTENSA: srai a8, a2, 31
+; XTENSA-NEXT: movi a9, 16
+; XTENSA-NEXT: ssr a9
+; XTENSA-NEXT: srl a8, a8
+; XTENSA-NEXT: add a8, a2, a8
+; XTENSA-NEXT: srai a2, a8, 16
+; XTENSA-NEXT: ret
+ %1 = sdiv i32 %a, 65536
+ ret i32 %1
+}
+
+define i16 @sdiv16_pow2(i16 %a) nounwind {
+; XTENSA-LABEL: sdiv16_pow2:
+; XTENSA: slli a8, a2, 16
+; XTENSA-NEXT: srai a8, a8, 16
+; XTENSA-NEXT: movi a9, 28
+; XTENSA-NEXT: ssr a9
+; XTENSA-NEXT: srl a8, a8
+; XTENSA-NEXT: movi a9, 7
+; XTENSA-NEXT: and a8, a8, a9
+; XTENSA-NEXT: add a8, a2, a8
+; XTENSA-NEXT: slli a8, a8, 16
+; XTENSA-NEXT: srai a2, a8, 19
+; XTENSA-NEXT: ret
+ %1 = sdiv i16 %a, 8
+ ret i16 %1
+}
diff --git a/llvm/test/CodeGen/Xtensa/mul.ll b/llvm/test/CodeGen/Xtensa/mul.ll
new file mode 100644
index 0000000000000..0be2885458163
--- /dev/null
+++ b/llvm/test/CodeGen/Xtensa/mul.ll
@@ -0,0 +1,636 @@
+; RUN: llc -mtriple=xtensa -verify-machineinstrs < %s \
+; RUN: | FileCheck -check-prefix=XTENSA %s
+
+define signext i32 @square(i32 %a) nounwind {
+; XTENSA-LABEL: square:
+; XTENSA: addi a8, a1, -16
+; XTENSA-NEXT: or a1, a8, a8
+; XTENSA-NEXT: s32i a0, a1, 0 # 4-byte Folded Spill
+; XTENSA-NEXT: l32r a8, .LCPI0_0
+; XTENSA-NEXT: or a3, a2, a2
+; XTENSA-NEXT: callx0 a8
+; XTENSA-NEXT: l32i a0, a1, 0 # 4-byte Folded Reload
+; XTENSA-NEXT: addi a8, a1, 16
+; XTENSA-NEXT: or a1, a8, a8
+; XTENSA-NEXT: ret
+ %1 = mul i32 %a, %a
+ ret i32 %1
+}
+
+define signext i32 @mul(i32 %a, i32 %b) nounwind {
+; XTENSA-LABEL: mul:
+; XTENSA: addi a8, a1, -16
+; XTENSA-NEXT: or a1, a8, a8
+; XTENSA-NEXT: s32i a0, a1, 0 # 4-byte Folded Spill
+; XTENSA-NEXT: l32r a8, .LCPI1_0
+; XTENSA-NEXT: callx0 a8
+; XTENSA-NEXT: l32i a0, a1, 0 # 4-byte Folded Reload
+; XTENSA-NEXT: addi a8, a1, 16
+; XTENSA-NEXT: or a1, a8, a8
+; XTENSA-NEXT: ret
+ %1 = mul i32 %a, %b
+ ret i32 %1
+}
+
+define signext i32 @mul_constant(i32 %a) nounwind {
+; XTENSA-LABEL: mul_constant:
+; XTENSA: addi a8, a1, -16
+; XTENSA-NEXT: or a1, a8, a8
+; XTENSA-NEXT: s32i a0, a1, 0 # 4-byte Folded Spill
+; XTENSA-NEXT: movi a3, 5
+; XTENSA-NEXT: l32r a8, .LCPI2_0
+; XTENSA-NEXT: callx0 a8
+; XTENSA-NEXT: l32i a0, a1, 0 # 4-byte Folded Reload
+; XTENSA-NEXT: addi a8, a1, 16
+; XTENSA-NEXT: or a1, a8, a8
+; XTENSA-NEXT: ret
+ %1 = mul i32 %a, 5
+ ret i32 %1
+}
+
+define i32 @mul_pow2(i32 %a) nounwind {
+; XTENSA-LABEL: mul_pow2:
+; XTENSA: slli a2, a2, 3
+; XTENSA-NEXT: ret
+ %1 = mul i32 %a, 8
+ ret i32 %1
+}
+
+define i64 @mul64(i64 %a, i64 %b) nounwind {
+; XTENSA-LABEL: mul64:
+; XTENSA: addi a8, a1, -16
+; XTENSA-NEXT: or a1, a8, a8
+; XTENSA-NEXT: s32i a0, a1, 0 # 4-byte Folded Spill
+; XTENSA-NEXT: l32r a8, .LCPI4_0
+; XTENSA-NEXT: callx0 a8
+; XTENSA-NEXT: l32i a0, a1, 0 # 4-byte Folded Reload
+; XTENSA-NEXT: addi a8, a1, 16
+; XTENSA-NEXT: or a1, a8, a8
+; XTENSA-NEXT: ret
+ %1 = mul i64 %a, %b
+ ret i64 %1
+}
+
+define i64 @mul64_constant(i64 %a) nounwind {
+; XTENSA-LABEL: mul64_constant:
+; XTENSA: addi a8, a1, -16
+; XTENSA-NEXT: or a1, a8, a8
+; XTENSA-NEXT: s32i a0, a1, 0 # 4-byte Folded Spill
+; XTENSA-NEXT: movi a4, 5
+; XTENSA-NEXT: movi a5, 0
+; XTENSA-NEXT: l32r a8, .LCPI5_0
+; XTENSA-NEXT: callx0 a8
+; XTENSA-NEXT: l32i a0, a1, 0 # 4-byte Folded Reload
+; XTENSA-NEXT: addi a8, a1, 16
+; XTENSA-NEXT: or a1, a8, a8
+; XTENSA-NEXT: ret
+ %1 = mul i64 %a, 5
+ ret i64 %1
+}
+
+define i32 @mulhs(i32 %a, i32 %b) nounwind {
+; XTENSA-LABEL: mulhs:
+; XTENSA: addi a8, a1, -16
+; XTENSA-NEXT: or a1, a8, a8
+; XTENSA-NEXT: s32i a0, a1, 0 # 4-byte Folded Spill
+; XTENSA-NEXT: or a4, a3, a3
+; XTENSA-NEXT: srai a3, a2, 31
+; XTENSA-NEXT: srai a5, a4, 31
+; XTENSA-NEXT: l32r a8, .LCPI6_0
+; XTENSA-NEXT: callx0 a8
+; XTENSA-NEXT: or a2, a3, a3
+; XTENSA-NEXT: l32i a0, a1, 0 # 4-byte Folded Reload
+; XTENSA-NEXT: addi a8, a1, 16
+; XTENSA-NEXT: or a1, a8, a8
+; XTENSA-NEXT: ret
+ %1 = sext i32 %a to i64
+ %2 = sext i32 %b to i64
+ %3 = mul i64 %1, %2
+ %4 = lshr i64 %3, 32
+ %5 = trunc i64 %4 to i32
+ ret i32 %5
+}
+
+define i32 @mulhs_positive_constant(i32 %a) nounwind {
+; XTENSA-LABEL: mulhs_positive_constant:
+; XTENSA: addi a8, a1, -16
+; XTENSA-NEXT: or a1, a8, a8
+; XTENSA-NEXT: s32i a0, a1, 0 # 4-byte Folded Spill
+; XTENSA-NEXT: srai a3, a2, 31
+; XTENSA-NEXT: movi a4, 5
+; XTENSA-NEXT: movi a5, 0
+; XTENSA-NEXT: l32r a8, .LCPI7_0
+; XTENSA-NEXT: callx0 a8
+; XTENSA-NEXT: or a2, a3, a3
+; XTENSA-NEXT: l32i a0, a1, 0 # 4-byte Folded Reload
+; XTENSA-NEXT: addi a8, a1, 16
+; XTENSA-NEXT: or a1, a8, a8
+; XTENSA-NEXT: ret
+ %1 = sext i32 %a to i64
+ %2 = mul i64 %1, 5
+ %3 = lshr i64 %2, 32
+ %4 = trunc i64 %3 to i32
+ ret i32 %4
+}
+
+define i32 @mulhs_negative_constant(i32 %a) nounwind {
+; XTENSA-LABEL: mulhs_negative_constant:
+; XTENSA: addi a8, a1, -16
+; XTENSA-NEXT: or a1, a8, a8
+; XTENSA-NEXT: s32i a0, a1, 0 # 4-byte Folded Spill
+; XTENSA-NEXT: srai a3, a2, 31
+; XTENSA-NEXT: movi a4, -5
+; XTENSA-NEXT: movi a5, -1
+; XTENSA-NEXT: l32r a8, .LCPI8_0
+; XTENSA-NEXT: callx0 a8
+; XTENSA-NEXT: or a2, a3, a3
+; XTENSA-NEXT: l32i a0, a1, 0 # 4-byte Folded Reload
+; XTENSA-NEXT: addi a8, a1, 16
+; XTENSA-NEXT: or a1, a8, a8
+; XTENSA-NEXT: ret
+ %1 = sext i32 %a to i64
+ %2 = mul i64 %1, -5
+ %3 = lshr i64 %2, 32
+ %4 = trunc i64 %3 to i32
+ ret i32 %4
+}
+
+define zeroext i32 @mulhu(i32 zeroext %a, i32 zeroext %b) nounwind {
+; XTENSA-LABEL: mulhu:
+; XTENSA: addi a8, a1, -16
+; XTENSA-NEXT: or a1, a8, a8
+; XTENSA-NEXT: s32i a0, a1, 0 # 4-byte Folded Spill
+; XTENSA-NEXT: or a4, a3, a3
+; XTENSA-NEXT: movi a3, 0
+; XTENSA-NEXT: l32r a8, .LCPI9_0
+; XTENSA-NEXT: or a5, a3, a3
+; XTENSA-NEXT: callx0 a8
+; XTENSA-NEXT: or a2, a3, a3
+; XTENSA-NEXT: l32i a0, a1, 0 # 4-byte Folded Reload
+; XTENSA-NEXT: addi a8, a1, 16
+; XTENSA-NEXT: or a1, a8, a8
+; XTENSA-NEXT: ret
+ %1 = zext i32 %a to i64
+ %2 = zext i32 %b to i64
+ %3 = mul i64 %1, %2
+ %4 = lshr i64 %3, 32
+ %5 = trunc i64 %4 to i32
+ ret i32 %5
+}
+
+define i32 @mulhsu(i32 %a, i32 %b) nounwind {
+; XTENSA-LABEL: mulhsu:
+; XTENSA: addi a8, a1, -16
+; XTENSA-NEXT: or a1, a8, a8
+; XTENSA-NEXT: s32i a0, a1, 0 # 4-byte Folded Spill
+; XTENSA-NEXT: or a4, a3, a3
+; XTENSA-NEXT: srai a5, a4, 31
+; XTENSA-NEXT: movi a3, 0
+; XTENSA-NEXT: l32r a8, .LCPI10_0
+; XTENSA-NEXT: callx0 a8
+; XTENSA-NEXT: or a2, a3, a3
+; XTENSA-NEXT: l32i a0, a1, 0 # 4-byte Folded Reload
+; XTENSA-NEXT: addi a8, a1, 16
+; XTENSA-NEXT: or a1, a8, a8
+; XTENSA-NEXT: ret
+ %1 = zext i32 %a to i64
+ %2 = sext i32 %b to i64
+ %3 = mul i64 %1, %2
+ %4 = lshr i64 %3, 32
+ %5 = trunc i64 %4 to i32
+ ret i32 %5
+}
+
+define i32 @mulhu_constant(i32 %a) nounwind {
+; XTENSA-LABEL: mulhu_constant:
+; XTENSA: addi a8, a1, -16
+; XTENSA-NEXT: or a1, a8, a8
+; XTENSA-NEXT: s32i a0, a1, 0 # 4-byte Folded Spill
+; XTENSA-NEXT: movi a4, 5
+; XTENSA-NEXT: movi a3, 0
+; XTENSA-NEXT: l32r a8, .LCPI11_0
+; XTENSA-NEXT: or a5, a3, a3
+; XTENSA-NEXT: callx0 a8
+; XTENSA-NEXT: or a2, a3, a3
+; XTENSA-NEXT: l32i a0, a1, 0 # 4-byte Folded Reload
+; XTENSA-NEXT: addi a8, a1, 16
+; XTENSA-NEXT: or a1, a8, a8
+; XTENSA-NEXT: ret
+ %1 = zext i32 %a to i64
+ %2 = mul i64 %1, 5
+ %3 = lshr i64 %2, 32
+ %4 = trunc i64 %3 to i32
+ ret i32 %4
+}
+
+define i32 @muli32_p65(i32 %a) nounwind {
+; XTENSA-LABEL: muli32_p65:
+; XTENSA: addi a8, a1, -16
+; XTENSA-NEXT: or a1, a8, a8
+; XTENSA-NEXT: s32i a0, a1, 0 # 4-byte Folded Spill
+; XTENSA-NEXT: movi a3, 65
+; XTENSA-NEXT: l32r a8, .LCPI12_0
+; XTENSA-NEXT: callx0 a8
+; XTENSA-NEXT: l32i a0, a1, 0 # 4-byte Folded Reload
+; XTENSA-NEXT: addi a8, a1, 16
+; XTENSA-NEXT: or a1, a8, a8
+; XTENSA-NEXT: ret
+ %1 = mul i32 %a, 65
+ ret i32 %1
+}
+
+define i32 @muli32_p63(i32 %a) nounwind {
+; XTENSA-LABEL: muli32_p63:
+; XTENSA: addi a8, a1, -16
+; XTENSA-NEXT: or a1, a8, a8
+; XTENSA-NEXT: s32i a0, a1, 0 # 4-byte Folded Spill
+; XTENSA-NEXT: movi a3, 63
+; XTENSA-NEXT: l32r a8, .LCPI13_0
+; XTENSA-NEXT: callx0 a8
+; XTENSA-NEXT: l32i a0, a1, 0 # 4-byte Folded Reload
+; XTENSA-NEXT: addi a8, a1, 16
+; XTENSA-NEXT: or a1, a8, a8
+; XTENSA-NEXT: ret
+ %1 = mul i32 %a, 63
+ ret i32 %1
+}
+
+define i64 @muli64_p65(i64 %a) nounwind {
+; XTENSA-LABEL: muli64_p65:
+; XTENSA: addi a8, a1, -16
+; XTENSA-NEXT: or a1, a8, a8
+; XTENSA-NEXT: s32i a0, a1, 0 # 4-byte Folded Spill
+; XTENSA-NEXT: movi a4, 65
+; XTENSA-NEXT: movi a5, 0
+; XTENSA-NEXT: l32r a8, .LCPI14_0
+; XTENSA-NEXT: callx0 a8
+; XTENSA-NEXT: l32i a0, a1, 0 # 4-byte Folded Reload
+; XTENSA-NEXT: addi a8, a1, 16
+; XTENSA-NEXT: or a1, a8, a8
+; XTENSA-NEXT: ret
+ %1 = mul i64 %a, 65
+ ret i64 %1
+}
+
+define i64 @muli64_p63(i64 %a) nounwind {
+; XTENSA-LABEL: muli64_p63:
+; XTENSA: addi a8, a1, -16
+; XTENSA-NEXT: or a1, a8, a8
+; XTENSA-NEXT: s32i a0, a1, 0 # 4-byte Folded Spill
+; XTENSA-NEXT: movi a4, 63
+; XTENSA-NEXT: movi a5, 0
+; XTENSA-NEXT: l32r a8, .LCPI15_0
+; XTENSA-NEXT: callx0 a8
+; XTENSA-NEXT: l32i a0, a1, 0 # 4-byte Folded Reload
+; XTENSA-NEXT: addi a8, a1, 16
+; XTENSA-NEXT: or a1, a8, a8
+; XTENSA-NEXT: ret
+ %1 = mul i64 %a, 63
+ ret i64 %1
+}
+
+define i32 @muli32_m63(i32 %a) nounwind {
+; XTENSA-LABEL: muli32_m63:
+; XTENSA: addi a8, a1, -16
+; XTENSA-NEXT: or a1, a8, a8
+; XTENSA-NEXT: s32i a0, a1, 0 # 4-byte Folded Spill
+; XTENSA-NEXT: movi a3, -63
+; XTENSA-NEXT: l32r a8, .LCPI16_0
+; XTENSA-NEXT: callx0 a8
+; XTENSA-NEXT: l32i a0, a1, 0 # 4-byte Folded Reload
+; XTENSA-NEXT: addi a8, a1, 16
+; XTENSA-NEXT: or a1, a8, a8
+; XTENSA-NEXT: ret
+ %1 = mul i32 %a, -63
+ ret i32 %1
+}
+
+define i32 @muli32_m65(i32 %a) nounwind {
+; XTENSA-LABEL: muli32_m65:
+; XTENSA: addi a8, a1, -16
+; XTENSA-NEXT: or a1, a8, a8
+; XTENSA-NEXT: s32i a0, a1, 0 # 4-byte Folded Spill
+; XTENSA-NEXT: movi a3, -65
+; XTENSA-NEXT: l32r a8, .LCPI17_0
+; XTENSA-NEXT: callx0 a8
+; XTENSA-NEXT: l32i a0, a1, 0 # 4-byte Folded Reload
+; XTENSA-NEXT: addi a8, a1, 16
+; XTENSA-NEXT: or a1, a8, a8
+; XTENSA-NEXT: ret
+ %1 = mul i32 %a, -65
+ ret i32 %1
+}
+
+define i64 @muli64_m63(i64 %a) nounwind {
+; XTENSA-LABEL: muli64_m63:
+; XTENSA: addi a8, a1, -16
+; XTENSA-NEXT: or a1, a8, a8
+; XTENSA-NEXT: s32i a0, a1, 0 # 4-byte Folded Spill
+; XTENSA-NEXT: movi a4, -63
+; XTENSA-NEXT: movi a5, -1
+; XTENSA-NEXT: l32r a8, .LCPI18_0
+; XTENSA-NEXT: callx0 a8
+; XTENSA-NEXT: l32i a0, a1, 0 # 4-byte Folded Reload
+; XTENSA-NEXT: addi a8, a1, 16
+; XTENSA-NEXT: or a1, a8, a8
+; XTENSA-NEXT: ret
+ %1 = mul i64 %a, -63
+ ret i64 %1
+}
+
+define i64 @muli64_m65(i64 %a) nounwind {
+; XTENSA-LABEL: muli64_m65:
+; XTENSA: addi a8, a1, -16
+; XTENSA-NEXT: or a1, a8, a8
+; XTENSA-NEXT: s32i a0, a1, 0 # 4-byte Folded Spill
+; XTENSA-NEXT: movi a4, -65
+; XTENSA-NEXT: movi a5, -1
+; XTENSA-NEXT: l32r a8, .LCPI19_0
+; XTENSA-NEXT: callx0 a8
+; XTENSA-NEXT: l32i a0, a1, 0 # 4-byte Folded Reload
+; XTENSA-NEXT: addi a8, a1, 16
+; XTENSA-NEXT: or a1, a8, a8
+; XTENSA-NEXT: ret
+ %1 = mul i64 %a, -65
+ ret i64 %1
+}
+
+define i32 @muli32_p384(i32 %a) nounwind {
+; XTENSA-LABEL: muli32_p384:
+; XTENSA: addi a8, a1, -16
+; XTENSA-NEXT: or a1, a8, a8
+; XTENSA-NEXT: s32i a0, a1, 0 # 4-byte Folded Spill
+; XTENSA-NEXT: movi a3, 384
+; XTENSA-NEXT: l32r a8, .LCPI20_0
+; XTENSA-NEXT: callx0 a8
+; XTENSA-NEXT: l32i a0, a1, 0 # 4-byte Folded Reload
+; XTENSA-NEXT: addi a8, a1, 16
+; XTENSA-NEXT: or a1, a8, a8
+; XTENSA-NEXT: ret
+ %1 = mul i32 %a, 384
+ ret i32 %1
+}
+
+define i32 @muli32_p12288(i32 %a) nounwind {
+; XTENSA-LABEL: muli32_p12288:
+; XTENSA: addi a8, a1, -16
+; XTENSA-NEXT: or a1, a8, a8
+; XTENSA-NEXT: s32i a0, a1, 0 # 4-byte Folded Spill
+; XTENSA-NEXT: l32r a3, .LCPI21_0
+; XTENSA-NEXT: l32r a8, .LCPI21_1
+; XTENSA-NEXT: callx0 a8
+; XTENSA-NEXT: l32i a0, a1, 0 # 4-byte Folded Reload
+; XTENSA-NEXT: addi a8, a1, 16
+; XTENSA-NEXT: or a1, a8, a8
+; XTENSA-NEXT: ret
+ %1 = mul i32 %a, 12288
+ ret i32 %1
+}
+
+define i32 @muli32_p4352(i32 %a) nounwind {
+; XTENSA-LABEL: muli32_p4352:
+; XTENSA: addi a8, a1, -16
+; XTENSA-NEXT: or a1, a8, a8
+; XTENSA-NEXT: s32i a0, a1, 0 # 4-byte Folded Spill
+; XTENSA-NEXT: l32r a3, .LCPI22_0
+; XTENSA-NEXT: l32r a8, .LCPI22_1
+; XTENSA-NEXT: callx0 a8
+; XTENSA-NEXT: l32i a0, a1, 0 # 4-byte Folded Reload
+; XTENSA-NEXT: addi a8, a1, 16
+; XTENSA-NEXT: or a1, a8, a8
+; XTENSA-NEXT: ret
+ %1 = mul i32 %a, 4352
+ ret i32 %1
+}
+
+define i32 @muli32_p3840(i32 %a) nounwind {
+; XTENSA-LABEL: muli32_p3840:
+; XTENSA: addi a8, a1, -16
+; XTENSA-NEXT: or a1, a8, a8
+; XTENSA-NEXT: s32i a0, a1, 0 # 4-byte Folded Spill
+; XTENSA-NEXT: l32r a3, .LCPI23_0
+; XTENSA-NEXT: l32r a8, .LCPI23_1
+; XTENSA-NEXT: callx0 a8
+; XTENSA-NEXT: l32i a0, a1, 0 # 4-byte Folded Reload
+; XTENSA-NEXT: addi a8, a1, 16
+; XTENSA-NEXT: or a1, a8, a8
+; XTENSA-NEXT: ret
+ %1 = mul i32 %a, 3840
+ ret i32 %1
+}
+
+define i32 @muli32_m3840(i32 %a) nounwind {
+; XTENSA-LABEL: muli32_m3840:
+; XTENSA: addi a8, a1, -16
+; XTENSA-NEXT: or a1, a8, a8
+; XTENSA-NEXT: s32i a0, a1, 0 # 4-byte Folded Spill
+; XTENSA-NEXT: l32r a3, .LCPI24_0
+; XTENSA-NEXT: l32r a8, .LCPI24_1
+; XTENSA-NEXT: callx0 a8
+; XTENSA-NEXT: l32i a0, a1, 0 # 4-byte Folded Reload
+; XTENSA-NEXT: addi a8, a1, 16
+; XTENSA-NEXT: or a1, a8, a8
+; XTENSA-NEXT: ret
+ %1 = mul i32 %a, -3840
+ ret i32 %1
+}
+
+define i32 @muli32_m4352(i32 %a) nounwind {
+; XTENSA-LABEL: muli32_m4352:
+; XTENSA: addi a8, a1, -16
+; XTENSA-NEXT: or a1, a8, a8
+; XTENSA-NEXT: s32i a0, a1, 0 # 4-byte Folded Spill
+; XTENSA-NEXT: l32r a3, .LCPI25_0
+; XTENSA-NEXT: l32r a8, .LCPI25_1
+; XTENSA-NEXT: callx0 a8
+; XTENSA-NEXT: l32i a0, a1, 0 # 4-byte Folded Reload
+; XTENSA-NEXT: addi a8, a1, 16
+; XTENSA-NEXT: or a1, a8, a8
+; XTENSA-NEXT: ret
+ %1 = mul i32 %a, -4352
+ ret i32 %1
+}
+
+define i64 @muli64_p4352(i64 %a) nounwind {
+; XTENSA-LABEL: muli64_p4352:
+; XTENSA: addi a8, a1, -16
+; XTENSA-NEXT: or a1, a8, a8
+; XTENSA-NEXT: s32i a0, a1, 0 # 4-byte Folded Spill
+; XTENSA-NEXT: l32r a4, .LCPI26_0
+; XTENSA-NEXT: movi a5, 0
+; XTENSA-NEXT: l32r a8, .LCPI26_1
+; XTENSA-NEXT: callx0 a8
+; XTENSA-NEXT: l32i a0, a1, 0 # 4-byte Folded Reload
+; XTENSA-NEXT: addi a8, a1, 16
+; XTENSA-NEXT: or a1, a8, a8
+; XTENSA-NEXT: ret
+ %1 = mul i64 %a, 4352
+ ret i64 %1
+}
+
+define i64 @muli64_p3840(i64 %a) nounwind {
+; XTENSA-LABEL: muli64_p3840:
+; XTENSA: addi a8, a1, -16
+; XTENSA-NEXT: or a1, a8, a8
+; XTENSA-NEXT: s32i a0, a1, 0 # 4-byte Folded Spill
+; XTENSA-NEXT: l32r a4, .LCPI27_0
+; XTENSA-NEXT: movi a5, 0
+; XTENSA-NEXT: l32r a8, .LCPI27_1
+; XTENSA-NEXT: callx0 a8
+; XTENSA-NEXT: l32i a0, a1, 0 # 4-byte Folded Reload
+; XTENSA-NEXT: addi a8, a1, 16
+; XTENSA-NEXT: or a1, a8, a8
+; XTENSA-NEXT: ret
+ %1 = mul i64 %a, 3840
+ ret i64 %1
+}
+
+define i64 @muli64_m4352(i64 %a) nounwind {
+; XTENSA-LABEL: muli64_m4352:
+; XTENSA: addi a8, a1, -16
+; XTENSA-NEXT: or a1, a8, a8
+; XTENSA-NEXT: s32i a0, a1, 0 # 4-byte Folded Spill
+; XTENSA-NEXT: l32r a4, .LCPI28_0
+; XTENSA-NEXT: movi a5, -1
+; XTENSA-NEXT: l32r a8, .LCPI28_1
+; XTENSA-NEXT: callx0 a8
+; XTENSA-NEXT: l32i a0, a1, 0 # 4-byte Folded Reload
+; XTENSA-NEXT: addi a8, a1, 16
+; XTENSA-NEXT: or a1, a8, a8
+; XTENSA-NEXT: ret
+ %1 = mul i64 %a, -4352
+ ret i64 %1
+}
+
+define i64 @muli64_m3840(i64 %a) nounwind {
+; XTENSA-LABEL: muli64_m3840:
+; XTENSA: addi a8, a1, -16
+; XTENSA-NEXT: or a1, a8, a8
+; XTENSA-NEXT: s32i a0, a1, 0 # 4-byte Folded Spill
+; XTENSA-NEXT: l32r a4, .LCPI29_0
+; XTENSA-NEXT: movi a5, -1
+; XTENSA-NEXT: l32r a8, .LCPI29_1
+; XTENSA-NEXT: callx0 a8
+; XTENSA-NEXT: l32i a0, a1, 0 # 4-byte Folded Reload
+; XTENSA-NEXT: addi a8, a1, 16
+; XTENSA-NEXT: or a1, a8, a8
+; XTENSA-NEXT: ret
+ %1 = mul i64 %a, -3840
+ ret i64 %1
+}
+
+define i128 @muli128_m3840(i128 %a) nounwind {
+; XTENSA-LABEL: muli128_m3840:
+; XTENSA: addi a8, a1, -16
+; XTENSA-NEXT: or a1, a8, a8
+; XTENSA-NEXT: s32i a0, a1, 8 # 4-byte Folded Spill
+; XTENSA-NEXT: movi a7, -1
+; XTENSA-NEXT: s32i a7, a1, 4
+; XTENSA-NEXT: s32i a7, a1, 0
+; XTENSA-NEXT: l32r a6, .LCPI30_0
+; XTENSA-NEXT: l32r a8, .LCPI30_1
+; XTENSA-NEXT: callx0 a8
+; XTENSA-NEXT: l32i a0, a1, 8 # 4-byte Folded Reload
+; XTENSA-NEXT: addi a8, a1, 16
+; XTENSA-NEXT: or a1, a8, a8
+; XTENSA-NEXT: ret
+ %1 = mul i128 %a, -3840
+ ret i128 %1
+}
+
+define i128 @muli128_m63(i128 %a) nounwind {
+; XTENSA-LABEL: muli128_m63:
+; XTENSA: addi a8, a1, -16
+; XTENSA-NEXT: or a1, a8, a8
+; XTENSA-NEXT: s32i a0, a1, 8 # 4-byte Folded Spill
+; XTENSA-NEXT: movi a7, -1
+; XTENSA-NEXT: s32i a7, a1, 4
+; XTENSA-NEXT: s32i a7, a1, 0
+; XTENSA-NEXT: movi a6, -63
+; XTENSA-NEXT: l32r a8, .LCPI31_0
+; XTENSA-NEXT: callx0 a8
+; XTENSA-NEXT: l32i a0, a1, 8 # 4-byte Folded Reload
+; XTENSA-NEXT: addi a8, a1, 16
+; XTENSA-NEXT: or a1, a8, a8
+; XTENSA-NEXT: ret
+ %1 = mul i128 %a, -63
+ ret i128 %1
+}
+
+define i64 @mulhsu_i64(i64 %a, i64 %b) nounwind {
+; XTENSA-LABEL: mulhsu_i64:
+; XTENSA: addi a8, a1, -16
+; XTENSA-NEXT: or a1, a8, a8
+; XTENSA-NEXT: s32i a0, a1, 8 # 4-byte Folded Spill
+; XTENSA-NEXT: or a7, a5, a5
+; XTENSA-NEXT: or a6, a4, a4
+; XTENSA-NEXT: srai a8, a7, 31
+; XTENSA-NEXT: s32i a8, a1, 4
+; XTENSA-NEXT: s32i a8, a1, 0
+; XTENSA-NEXT: movi a4, 0
+; XTENSA-NEXT: l32r a8, .LCPI32_0
+; XTENSA-NEXT: or a5, a4, a4
+; XTENSA-NEXT: callx0 a8
+; XTENSA-NEXT: or a2, a4, a4
+; XTENSA-NEXT: or a3, a5, a5
+; XTENSA-NEXT: l32i a0, a1, 8 # 4-byte Folded Reload
+; XTENSA-NEXT: addi a8, a1, 16
+; XTENSA-NEXT: or a1, a8, a8
+; XTENSA-NEXT: ret
+ %1 = zext i64 %a to i128
+ %2 = sext i64 %b to i128
+ %3 = mul i128 %1, %2
+ %4 = lshr i128 %3, 64
+ %5 = trunc i128 %4 to i64
+ ret i64 %5
+}
+
+define i8 @muladd_demand(i8 %x, i8 %y) nounwind {
+; XTENSA-LABEL: muladd_demand:
+; XTENSA: slli a8, a2, 1
+; XTENSA-NEXT: sub a8, a3, a8
+; XTENSA-NEXT: movi a9, 15
+; XTENSA-NEXT: and a2, a8, a9
+; XTENSA-NEXT: ret
+ %m = mul i8 %x, 14
+ %a = add i8 %y, %m
+ %r = and i8 %a, 15
+ ret i8 %r
+}
+
+define i8 @mulsub_demand(i8 %x, i8 %y) nounwind {
+; XTENSA-LABEL: mulsub_demand:
+; XTENSA: addx2 a8, a2, a3
+; XTENSA-NEXT: movi a9, 15
+; XTENSA-NEXT: and a2, a8, a9
+; XTENSA-NEXT: ret
+ %m = mul i8 %x, 14
+ %a = sub i8 %y, %m
+ %r = and i8 %a, 15
+ ret i8 %r
+}
+
+define i8 @muladd_demand_2(i8 %x, i8 %y) nounwind {
+; XTENSA-LABEL: muladd_demand_2:
+; XTENSA: slli a8, a2, 1
+; XTENSA-NEXT: sub a8, a3, a8
+; XTENSA-NEXT: movi a9, -16
+; XTENSA-NEXT: or a2, a8, a9
+; XTENSA-NEXT: ret
+ %m = mul i8 %x, 14
+ %a = add i8 %y, %m
+ %r = or i8 %a, 240
+ ret i8 %r
+}
+
+define i8 @mulsub_demand_2(i8 %x, i8 %y) nounwind {
+; XTENSA-LABEL: mulsub_demand_2:
+; XTENSA: addx2 a8, a2, a3
+; XTENSA-NEXT: movi a9, -16
+; XTENSA-NEXT: or a2, a8, a9
+; XTENSA-NEXT: ret
+ %m = mul i8 %x, 14
+ %a = sub i8 %y, %m
+ %r = or i8 %a, 240
+ ret i8 %r
+}
diff --git a/llvm/test/CodeGen/Xtensa/rotl-rotr.ll b/llvm/test/CodeGen/Xtensa/rotl-rotr.ll
new file mode 100644
index 0000000000000..1dc52fbc94b41
--- /dev/null
+++ b/llvm/test/CodeGen/Xtensa/rotl-rotr.ll
@@ -0,0 +1,500 @@
+; RUN: llc -mtriple=xtensa -verify-machineinstrs < %s \
+; RUN: | FileCheck -check-prefix=XTENSA %s
+
+define i32 @rotl_32(i32 %x, i32 %y) nounwind {
+; XTENSA-LABEL: rotl_32:
+; XTENSA: ssl a3
+; XTENSA-NEXT: sll a8, a2
+; XTENSA-NEXT: movi a9, 32
+; XTENSA-NEXT: sub a9, a9, a3
+; XTENSA-NEXT: ssr a9
+; XTENSA-NEXT: srl a9, a2
+; XTENSA-NEXT: or a2, a8, a9
+; XTENSA-NEXT: ret
+ %z = sub i32 32, %y
+ %b = shl i32 %x, %y
+ %c = lshr i32 %x, %z
+ %d = or i32 %b, %c
+ ret i32 %d
+}
+
+define i32 @rotr_32(i32 %x, i32 %y) nounwind {
+; XTENSA-LABEL: rotr_32:
+; XTENSA: ssr a3
+; XTENSA-NEXT: srl a8, a2
+; XTENSA-NEXT: movi a9, 32
+; XTENSA-NEXT: sub a9, a9, a3
+; XTENSA-NEXT: ssl a9
+; XTENSA-NEXT: sll a9, a2
+; XTENSA-NEXT: or a2, a8, a9
+; XTENSA-NEXT: ret
+ %z = sub i32 32, %y
+ %b = lshr i32 %x, %y
+ %c = shl i32 %x, %z
+ %d = or i32 %b, %c
+ ret i32 %d
+}
+
+define i64 @rotl_64(i64 %x, i64 %y) nounwind {
+; XTENSA-LABEL: rotl_64:
+; XTENSA: movi a8, 64
+; XTENSA-NEXT: sub a8, a8, a4
+; XTENSA-NEXT: ssr a8
+; XTENSA-NEXT: src a11, a3, a2
+; XTENSA-NEXT: movi a9, 32
+; XTENSA-NEXT: sub a9, a9, a4
+; XTENSA-NEXT: ssr a9
+; XTENSA-NEXT: srl a7, a3
+; XTENSA-NEXT: movi a10, 0
+; XTENSA-NEXT: blt a9, a10, .LBB2_2
+; XTENSA-NEXT: # %bb.1:
+; XTENSA-NEXT: or a11, a7, a7
+; XTENSA-NEXT: .LBB2_2:
+; XTENSA-NEXT: ssl a4
+; XTENSA-NEXT: sll a7, a2
+; XTENSA-NEXT: addi a5, a4, -32
+; XTENSA-NEXT: blt a5, a10, .LBB2_4
+; XTENSA-NEXT: # %bb.3:
+; XTENSA-NEXT: or a7, a10, a10
+; XTENSA-NEXT: .LBB2_4:
+; XTENSA-NEXT: ssl a4
+; XTENSA-NEXT: src a6, a3, a2
+; XTENSA-NEXT: ssl a5
+; XTENSA-NEXT: sll a4, a2
+; XTENSA-NEXT: blt a5, a10, .LBB2_6
+; XTENSA-NEXT: # %bb.5:
+; XTENSA-NEXT: or a6, a4, a4
+; XTENSA-NEXT: .LBB2_6:
+; XTENSA-NEXT: or a2, a7, a11
+; XTENSA-NEXT: ssr a8
+; XTENSA-NEXT: srl a8, a3
+; XTENSA-NEXT: blt a9, a10, .LBB2_8
+; XTENSA-NEXT: # %bb.7:
+; XTENSA-NEXT: or a8, a10, a10
+; XTENSA-NEXT: .LBB2_8:
+; XTENSA-NEXT: or a3, a6, a8
+; XTENSA-NEXT: ret
+ %z = sub i64 64, %y
+ %b = shl i64 %x, %y
+ %c = lshr i64 %x, %z
+ %d = or i64 %b, %c
+ ret i64 %d
+}
+
+define i64 @rotr_64(i64 %x, i64 %y) nounwind {
+; XTENSA-LABEL: rotr_64:
+; XTENSA: ssr a4
+; XTENSA-NEXT: src a10, a3, a2
+; XTENSA-NEXT: addi a8, a4, -32
+; XTENSA-NEXT: ssr a8
+; XTENSA-NEXT: srl a11, a3
+; XTENSA-NEXT: movi a9, 0
+; XTENSA-NEXT: blt a8, a9, .LBB3_2
+; XTENSA-NEXT: # %bb.1:
+; XTENSA-NEXT: or a10, a11, a11
+; XTENSA-NEXT: .LBB3_2:
+; XTENSA-NEXT: movi a11, 32
+; XTENSA-NEXT: sub a7, a11, a4
+; XTENSA-NEXT: movi a11, 64
+; XTENSA-NEXT: sub a11, a11, a4
+; XTENSA-NEXT: ssl a11
+; XTENSA-NEXT: sll a6, a2
+; XTENSA-NEXT: blt a7, a9, .LBB3_4
+; XTENSA-NEXT: # %bb.3:
+; XTENSA-NEXT: or a6, a9, a9
+; XTENSA-NEXT: .LBB3_4:
+; XTENSA-NEXT: ssl a11
+; XTENSA-NEXT: src a11, a3, a2
+; XTENSA-NEXT: ssl a7
+; XTENSA-NEXT: sll a5, a2
+; XTENSA-NEXT: blt a7, a9, .LBB3_6
+; XTENSA-NEXT: # %bb.5:
+; XTENSA-NEXT: or a11, a5, a5
+; XTENSA-NEXT: .LBB3_6:
+; XTENSA-NEXT: or a2, a10, a6
+; XTENSA-NEXT: ssr a4
+; XTENSA-NEXT: srl a10, a3
+; XTENSA-NEXT: blt a8, a9, .LBB3_8
+; XTENSA-NEXT: # %bb.7:
+; XTENSA-NEXT: or a10, a9, a9
+; XTENSA-NEXT: .LBB3_8:
+; XTENSA-NEXT: or a3, a10, a11
+; XTENSA-NEXT: ret
+ %z = sub i64 64, %y
+ %b = lshr i64 %x, %y
+ %c = shl i64 %x, %z
+ %d = or i64 %b, %c
+ ret i64 %d
+}
+
+define i32 @rotl_32_mask(i32 %x, i32 %y) nounwind {
+; XTENSA-LABEL: rotl_32_mask:
+; XTENSA: ssl a3
+; XTENSA-NEXT: sll a8, a2
+; XTENSA-NEXT: neg a9, a3
+; XTENSA-NEXT: movi a10, 31
+; XTENSA-NEXT: and a9, a9, a10
+; XTENSA-NEXT: ssr a9
+; XTENSA-NEXT: srl a9, a2
+; XTENSA-NEXT: or a2, a8, a9
+; XTENSA-NEXT: ret
+ %z = sub i32 0, %y
+ %and = and i32 %z, 31
+ %b = shl i32 %x, %y
+ %c = lshr i32 %x, %and
+ %d = or i32 %b, %c
+ ret i32 %d
+}
+
+define i32 @rotl_32_mask_and_63_and_31(i32 %x, i32 %y) nounwind {
+; XTENSA-LABEL: rotl_32_mask_and_63_and_31:
+; XTENSA: movi a8, 63
+; XTENSA-NEXT: and a8, a3, a8
+; XTENSA-NEXT: ssl a8
+; XTENSA-NEXT: sll a8, a2
+; XTENSA-NEXT: neg a9, a3
+; XTENSA-NEXT: movi a10, 31
+; XTENSA-NEXT: and a9, a9, a10
+; XTENSA-NEXT: ssr a9
+; XTENSA-NEXT: srl a9, a2
+; XTENSA-NEXT: or a2, a8, a9
+; XTENSA-NEXT: ret
+ %a = and i32 %y, 63
+ %b = shl i32 %x, %a
+ %c = sub i32 0, %y
+ %d = and i32 %c, 31
+ %e = lshr i32 %x, %d
+ %f = or i32 %b, %e
+ ret i32 %f
+}
+
+define i32 @rotr_32_mask(i32 %x, i32 %y) nounwind {
+; XTENSA-LABEL: rotr_32_mask:
+; XTENSA: ssr a3
+; XTENSA-NEXT: srl a8, a2
+; XTENSA-NEXT: neg a9, a3
+; XTENSA-NEXT: movi a10, 31
+; XTENSA-NEXT: and a9, a9, a10
+; XTENSA-NEXT: ssl a9
+; XTENSA-NEXT: sll a9, a2
+; XTENSA-NEXT: or a2, a8, a9
+; XTENSA-NEXT: ret
+ %z = sub i32 0, %y
+ %and = and i32 %z, 31
+ %b = lshr i32 %x, %y
+ %c = shl i32 %x, %and
+ %d = or i32 %b, %c
+ ret i32 %d
+}
+
+define i32 @rotr_32_mask_and_63_and_31(i32 %x, i32 %y) nounwind {
+; XTENSA-LABEL: rotr_32_mask_and_63_and_31:
+; XTENSA: movi a8, 63
+; XTENSA-NEXT: and a8, a3, a8
+; XTENSA-NEXT: ssr a8
+; XTENSA-NEXT: srl a8, a2
+; XTENSA-NEXT: neg a9, a3
+; XTENSA-NEXT: movi a10, 31
+; XTENSA-NEXT: and a9, a9, a10
+; XTENSA-NEXT: ssl a9
+; XTENSA-NEXT: sll a9, a2
+; XTENSA-NEXT: or a2, a8, a9
+; XTENSA-NEXT: ret
+ %a = and i32 %y, 63
+ %b = lshr i32 %x, %a
+ %c = sub i32 0, %y
+ %d = and i32 %c, 31
+ %e = shl i32 %x, %d
+ %f = or i32 %b, %e
+ ret i32 %f
+}
+
+define i64 @rotl_64_mask(i64 %x, i64 %y) nounwind {
+; XTENSA-LABEL: rotl_64_mask:
+; XTENSA: ssl a4
+; XTENSA-NEXT: src a10, a3, a2
+; XTENSA-NEXT: addi a8, a4, -32
+; XTENSA-NEXT: ssl a8
+; XTENSA-NEXT: sll a11, a2
+; XTENSA-NEXT: movi a9, 0
+; XTENSA-NEXT: blt a8, a9, .LBB8_2
+; XTENSA-NEXT: # %bb.1:
+; XTENSA-NEXT: or a10, a11, a11
+; XTENSA-NEXT: .LBB8_2:
+; XTENSA-NEXT: neg a11, a4
+; XTENSA-NEXT: movi a7, 63
+; XTENSA-NEXT: and a7, a11, a7
+; XTENSA-NEXT: ssr a7
+; XTENSA-NEXT: srl a11, a3
+; XTENSA-NEXT: addi a6, a7, -32
+; XTENSA-NEXT: blt a6, a9, .LBB8_4
+; XTENSA-NEXT: # %bb.3:
+; XTENSA-NEXT: or a11, a9, a9
+; XTENSA-NEXT: .LBB8_4:
+; XTENSA-NEXT: ssr a7
+; XTENSA-NEXT: src a7, a3, a2
+; XTENSA-NEXT: ssr a6
+; XTENSA-NEXT: srl a5, a3
+; XTENSA-NEXT: blt a6, a9, .LBB8_6
+; XTENSA-NEXT: # %bb.5:
+; XTENSA-NEXT: or a7, a5, a5
+; XTENSA-NEXT: .LBB8_6:
+; XTENSA-NEXT: or a3, a10, a11
+; XTENSA-NEXT: ssl a4
+; XTENSA-NEXT: sll a10, a2
+; XTENSA-NEXT: blt a8, a9, .LBB8_8
+; XTENSA-NEXT: # %bb.7:
+; XTENSA-NEXT: or a10, a9, a9
+; XTENSA-NEXT: .LBB8_8:
+; XTENSA-NEXT: or a2, a10, a7
+; XTENSA-NEXT: ret
+ %z = sub i64 0, %y
+ %and = and i64 %z, 63
+ %b = shl i64 %x, %y
+ %c = lshr i64 %x, %and
+ %d = or i64 %b, %c
+ ret i64 %d
+}
+
+define i64 @rotl_64_mask_and_127_and_63(i64 %x, i64 %y) nounwind {
+; XTENSA-LABEL: rotl_64_mask_and_127_and_63:
+; XTENSA: movi a8, 127
+; XTENSA-NEXT: and a8, a4, a8
+; XTENSA-NEXT: ssl a8
+; XTENSA-NEXT: src a11, a3, a2
+; XTENSA-NEXT: addi a9, a8, -32
+; XTENSA-NEXT: ssl a9
+; XTENSA-NEXT: sll a7, a2
+; XTENSA-NEXT: movi a10, 0
+; XTENSA-NEXT: blt a9, a10, .LBB9_2
+; XTENSA-NEXT: # %bb.1:
+; XTENSA-NEXT: or a11, a7, a7
+; XTENSA-NEXT: .LBB9_2:
+; XTENSA-NEXT: neg a7, a4
+; XTENSA-NEXT: movi a6, 63
+; XTENSA-NEXT: and a6, a7, a6
+; XTENSA-NEXT: ssr a6
+; XTENSA-NEXT: srl a7, a3
+; XTENSA-NEXT: addi a5, a6, -32
+; XTENSA-NEXT: blt a5, a10, .LBB9_4
+; XTENSA-NEXT: # %bb.3:
+; XTENSA-NEXT: or a7, a10, a10
+; XTENSA-NEXT: .LBB9_4:
+; XTENSA-NEXT: ssr a6
+; XTENSA-NEXT: src a6, a3, a2
+; XTENSA-NEXT: ssr a5
+; XTENSA-NEXT: srl a4, a3
+; XTENSA-NEXT: blt a5, a10, .LBB9_6
+; XTENSA-NEXT: # %bb.5:
+; XTENSA-NEXT: or a6, a4, a4
+; XTENSA-NEXT: .LBB9_6:
+; XTENSA-NEXT: or a3, a11, a7
+; XTENSA-NEXT: ssl a8
+; XTENSA-NEXT: sll a8, a2
+; XTENSA-NEXT: blt a9, a10, .LBB9_8
+; XTENSA-NEXT: # %bb.7:
+; XTENSA-NEXT: or a8, a10, a10
+; XTENSA-NEXT: .LBB9_8:
+; XTENSA-NEXT: or a2, a8, a6
+; XTENSA-NEXT: ret
+ %a = and i64 %y, 127
+ %b = shl i64 %x, %a
+ %c = sub i64 0, %y
+ %d = and i64 %c, 63
+ %e = lshr i64 %x, %d
+ %f = or i64 %b, %e
+ ret i64 %f
+}
+
+define i64 @rotr_64_mask(i64 %x, i64 %y) nounwind {
+; XTENSA-LABEL: rotr_64_mask:
+; XTENSA: ssr a4
+; XTENSA-NEXT: src a10, a3, a2
+; XTENSA-NEXT: addi a8, a4, -32
+; XTENSA-NEXT: ssr a8
+; XTENSA-NEXT: srl a11, a3
+; XTENSA-NEXT: movi a9, 0
+; XTENSA-NEXT: blt a8, a9, .LBB10_2
+; XTENSA-NEXT: # %bb.1:
+; XTENSA-NEXT: or a10, a11, a11
+; XTENSA-NEXT: .LBB10_2:
+; XTENSA-NEXT: neg a11, a4
+; XTENSA-NEXT: movi a7, 63
+; XTENSA-NEXT: and a7, a11, a7
+; XTENSA-NEXT: ssl a7
+; XTENSA-NEXT: sll a11, a2
+; XTENSA-NEXT: addi a6, a7, -32
+; XTENSA-NEXT: blt a6, a9, .LBB10_4
+; XTENSA-NEXT: # %bb.3:
+; XTENSA-NEXT: or a11, a9, a9
+; XTENSA-NEXT: .LBB10_4:
+; XTENSA-NEXT: ssl a7
+; XTENSA-NEXT: src a7, a3, a2
+; XTENSA-NEXT: ssl a6
+; XTENSA-NEXT: sll a5, a2
+; XTENSA-NEXT: blt a6, a9, .LBB10_6
+; XTENSA-NEXT: # %bb.5:
+; XTENSA-NEXT: or a7, a5, a5
+; XTENSA-NEXT: .LBB10_6:
+; XTENSA-NEXT: or a2, a10, a11
+; XTENSA-NEXT: ssr a4
+; XTENSA-NEXT: srl a10, a3
+; XTENSA-NEXT: blt a8, a9, .LBB10_8
+; XTENSA-NEXT: # %bb.7:
+; XTENSA-NEXT: or a10, a9, a9
+; XTENSA-NEXT: .LBB10_8:
+; XTENSA-NEXT: or a3, a10, a7
+; XTENSA-NEXT: ret
+ %z = sub i64 0, %y
+ %and = and i64 %z, 63
+ %b = lshr i64 %x, %y
+ %c = shl i64 %x, %and
+ %d = or i64 %b, %c
+ ret i64 %d
+}
+
+define i64 @rotr_64_mask_and_127_and_63(i64 %x, i64 %y) nounwind {
+; XTENSA-LABEL: rotr_64_mask_and_127_and_63:
+; XTENSA: movi a8, 127
+; XTENSA-NEXT: and a8, a4, a8
+; XTENSA-NEXT: ssr a8
+; XTENSA-NEXT: src a11, a3, a2
+; XTENSA-NEXT: addi a9, a8, -32
+; XTENSA-NEXT: ssr a9
+; XTENSA-NEXT: srl a7, a3
+; XTENSA-NEXT: movi a10, 0
+; XTENSA-NEXT: blt a9, a10, .LBB11_2
+; XTENSA-NEXT: # %bb.1:
+; XTENSA-NEXT: or a11, a7, a7
+; XTENSA-NEXT: .LBB11_2:
+; XTENSA-NEXT: neg a7, a4
+; XTENSA-NEXT: movi a6, 63
+; XTENSA-NEXT: and a6, a7, a6
+; XTENSA-NEXT: ssl a6
+; XTENSA-NEXT: sll a7, a2
+; XTENSA-NEXT: addi a5, a6, -32
+; XTENSA-NEXT: blt a5, a10, .LBB11_4
+; XTENSA-NEXT: # %bb.3:
+; XTENSA-NEXT: or a7, a10, a10
+; XTENSA-NEXT: .LBB11_4:
+; XTENSA-NEXT: ssl a6
+; XTENSA-NEXT: src a6, a3, a2
+; XTENSA-NEXT: ssl a5
+; XTENSA-NEXT: sll a4, a2
+; XTENSA-NEXT: blt a5, a10, .LBB11_6
+; XTENSA-NEXT: # %bb.5:
+; XTENSA-NEXT: or a6, a4, a4
+; XTENSA-NEXT: .LBB11_6:
+; XTENSA-NEXT: or a2, a11, a7
+; XTENSA-NEXT: ssr a8
+; XTENSA-NEXT: srl a8, a3
+; XTENSA-NEXT: blt a9, a10, .LBB11_8
+; XTENSA-NEXT: # %bb.7:
+; XTENSA-NEXT: or a8, a10, a10
+; XTENSA-NEXT: .LBB11_8:
+; XTENSA-NEXT: or a3, a8, a6
+; XTENSA-NEXT: ret
+ %a = and i64 %y, 127
+ %b = lshr i64 %x, %a
+ %c = sub i64 0, %y
+ %d = and i64 %c, 63
+ %e = shl i64 %x, %d
+ %f = or i64 %b, %e
+ ret i64 %f
+}
+
+define signext i32 @rotl_32_mask_shared(i32 signext %a, i32 signext %b, i32 signext %amt) nounwind {
+; XTENSA-LABEL: rotl_32_mask_shared:
+; XTENSA: movi a8, 31
+; XTENSA-NEXT: and a9, a4, a8
+; XTENSA-NEXT: ssl a9
+; XTENSA-NEXT: sll a10, a2
+; XTENSA-NEXT: neg a11, a4
+; XTENSA-NEXT: and a8, a11, a8
+; XTENSA-NEXT: ssr a8
+; XTENSA-NEXT: srl a8, a2
+; XTENSA-NEXT: or a8, a10, a8
+; XTENSA-NEXT: ssl a9
+; XTENSA-NEXT: sll a9, a3
+; XTENSA-NEXT: add a2, a8, a9
+; XTENSA-NEXT: ret
+ %maskedamt = and i32 %amt, 31
+ %1 = tail call i32 @llvm.fshl.i32(i32 %a, i32 %a, i32 %maskedamt)
+ %2 = shl i32 %b, %maskedamt
+ %3 = add i32 %1, %2
+ ret i32 %3
+}
+declare i32 @llvm.fshl.i32(i32, i32, i32)
+
+define signext i32 @rotr_32_mask_shared(i32 signext %a, i32 signext %b, i32 signext %amt) nounwind {
+; XTENSA-LABEL: rotr_32_mask_shared:
+; XTENSA: movi a8, 31
+; XTENSA-NEXT: and a9, a4, a8
+; XTENSA-NEXT: ssr a9
+; XTENSA-NEXT: srl a10, a2
+; XTENSA-NEXT: neg a11, a4
+; XTENSA-NEXT: and a8, a11, a8
+; XTENSA-NEXT: ssl a8
+; XTENSA-NEXT: sll a8, a2
+; XTENSA-NEXT: or a8, a10, a8
+; XTENSA-NEXT: ssl a9
+; XTENSA-NEXT: sll a9, a3
+; XTENSA-NEXT: add a2, a8, a9
+; XTENSA-NEXT: ret
+ %maskedamt = and i32 %amt, 31
+ %1 = tail call i32 @llvm.fshr.i32(i32 %a, i32 %a, i32 %maskedamt)
+ %2 = shl i32 %b, %maskedamt
+ %3 = add i32 %1, %2
+ ret i32 %3
+}
+declare i32 @llvm.fshr.i32(i32, i32, i32)
+
+define signext i32 @rotl_32_mask_multiple(i32 signext %a, i32 signext %b, i32 signext %amt) nounwind {
+; XTENSA-LABEL: rotl_32_mask_multiple:
+; XTENSA: movi a8, 31
+; XTENSA-NEXT: and a9, a4, a8
+; XTENSA-NEXT: ssl a9
+; XTENSA-NEXT: sll a10, a3
+; XTENSA-NEXT: neg a11, a4
+; XTENSA-NEXT: and a8, a11, a8
+; XTENSA-NEXT: ssr a8
+; XTENSA-NEXT: srl a11, a3
+; XTENSA-NEXT: or a10, a10, a11
+; XTENSA-NEXT: ssl a9
+; XTENSA-NEXT: sll a9, a2
+; XTENSA-NEXT: ssr a8
+; XTENSA-NEXT: srl a8, a2
+; XTENSA-NEXT: or a8, a9, a8
+; XTENSA-NEXT: add a2, a8, a10
+; XTENSA-NEXT: ret
+ %maskedamt = and i32 %amt, 31
+ %1 = tail call i32 @llvm.fshl.i32(i32 %a, i32 %a, i32 %maskedamt)
+ %2 = tail call i32 @llvm.fshl.i32(i32 %b, i32 %b, i32 %maskedamt)
+ %3 = add i32 %1, %2
+ ret i32 %3
+}
+
+define signext i32 @rotr_32_mask_multiple(i32 signext %a, i32 signext %b, i32 signext %amt) nounwind {
+; XTENSA-LABEL: rotr_32_mask_multiple:
+; XTENSA: movi a8, 31
+; XTENSA-NEXT: and a9, a4, a8
+; XTENSA-NEXT: ssr a9
+; XTENSA-NEXT: srl a10, a3
+; XTENSA-NEXT: neg a11, a4
+; XTENSA-NEXT: and a8, a11, a8
+; XTENSA-NEXT: ssl a8
+; XTENSA-NEXT: sll a11, a3
+; XTENSA-NEXT: or a10, a10, a11
+; XTENSA-NEXT: ssr a9
+; XTENSA-NEXT: srl a9, a2
+; XTENSA-NEXT: ssl a8
+; XTENSA-NEXT: sll a8, a2
+; XTENSA-NEXT: or a8, a9, a8
+; XTENSA-NEXT: add a2, a8, a10
+; XTENSA-NEXT: ret
+ %maskedamt = and i32 %amt, 31
+ %1 = tail call i32 @llvm.fshr.i32(i32 %a, i32 %a, i32 %maskedamt)
+ %2 = tail call i32 @llvm.fshr.i32(i32 %b, i32 %b, i32 %maskedamt)
+ %3 = add i32 %1, %2
+ ret i32 %3
+}
diff --git a/llvm/test/CodeGen/Xtensa/shift.ll b/llvm/test/CodeGen/Xtensa/shift.ll
new file mode 100644
index 0000000000000..acca8551fa621
--- /dev/null
+++ b/llvm/test/CodeGen/Xtensa/shift.ll
@@ -0,0 +1,72 @@
+; RUN: llc -mtriple=xtensa -verify-machineinstrs < %s \
+; RUN: | FileCheck %s
+
+define i64 @lshl_64(i64 %x, i64 %y) nounwind {
+; CHECK-LABEL: lshl_64:
+; CHECK: ssl a4
+; CHECK-NEXT: src a3, a3, a2
+; CHECK-NEXT: addi a8, a4, -32
+; CHECK-NEXT: ssl a8
+; CHECK-NEXT: sll a10, a2
+; CHECK-NEXT: movi a9, 0
+; CHECK-NEXT: blt a8, a9, .LBB0_2
+; CHECK-NEXT: # %bb.1:
+; CHECK-NEXT: or a3, a10, a10
+; CHECK-NEXT: .LBB0_2:
+; CHECK-NEXT: ssl a4
+; CHECK-NEXT: sll a2, a2
+; CHECK-NEXT: blt a8, a9, .LBB0_4
+; CHECK-NEXT: # %bb.3:
+; CHECK-NEXT: or a2, a9, a9
+; CHECK-NEXT: .LBB0_4:
+; CHECK-NEXT: ret
+ %c = shl i64 %x, %y
+ ret i64 %c
+}
+
+define i64 @lshr_64(i64 %x, i64 %y) nounwind {
+; CHECK-LABEL: lshr_64:
+; CHECK: ssr a4
+; CHECK-NEXT: src a2, a3, a2
+; CHECK-NEXT: addi a8, a4, -32
+; CHECK-NEXT: ssr a8
+; CHECK-NEXT: srl a10, a3
+; CHECK-NEXT: movi a9, 0
+; CHECK-NEXT: blt a8, a9, .LBB1_2
+; CHECK-NEXT: # %bb.1:
+; CHECK-NEXT: or a2, a10, a10
+; CHECK-NEXT: .LBB1_2:
+; CHECK-NEXT: ssr a4
+; CHECK-NEXT: srl a3, a3
+; CHECK-NEXT: blt a8, a9, .LBB1_4
+; CHECK-NEXT: # %bb.3:
+; CHECK-NEXT: or a3, a9, a9
+; CHECK-NEXT: .LBB1_4:
+; CHECK-NEXT: ret
+ %c = lshr i64 %x, %y
+ ret i64 %c
+}
+
+define i64 @ashr_64(i64 %x, i64 %y) nounwind {
+; CHECK-LABEL: ashr_64:
+; CHECK: ssr a4
+; CHECK-NEXT: src a2, a3, a2
+; CHECK-NEXT: addi a9, a4, -32
+; CHECK-NEXT: ssr a9
+; CHECK-NEXT: sra a8, a3
+; CHECK-NEXT: movi a10, 0
+; CHECK-NEXT: blt a9, a10, .LBB2_2
+; CHECK-NEXT: # %bb.1:
+; CHECK-NEXT: or a2, a8, a8
+; CHECK-NEXT: .LBB2_2:
+; CHECK-NEXT: ssr a4
+; CHECK-NEXT: sra a8, a3
+; CHECK-NEXT: blt a9, a10, .LBB2_4
+; CHECK-NEXT: # %bb.3:
+; CHECK-NEXT: srai a8, a3, 31
+; CHECK-NEXT: .LBB2_4:
+; CHECK-NEXT: or a3, a8, a8
+; CHECK-NEXT: ret
+ %c = ashr i64 %x, %y
+ ret i64 %c
+}
>From 55713dda7edad1fe2fc8b0b3db684add00f17360 Mon Sep 17 00:00:00 2001
From: Andrei Safronov <safronov at espressif.com>
Date: Fri, 26 Jul 2024 18:03:37 +0300
Subject: [PATCH 2/2] [Xtensa] Minor fixes in constant pool lowering.
---
llvm/lib/Target/Xtensa/XtensaISelLowering.cpp | 22 ++++-----
llvm/lib/Target/Xtensa/XtensaISelLowering.h | 2 +-
.../CodeGen/Xtensa/constantpool-aggregate.ll | 45 +++++++++++++++++++
3 files changed, 57 insertions(+), 12 deletions(-)
create mode 100644 llvm/test/CodeGen/Xtensa/constantpool-aggregate.ll
diff --git a/llvm/lib/Target/Xtensa/XtensaISelLowering.cpp b/llvm/lib/Target/Xtensa/XtensaISelLowering.cpp
index 8c30dbbad821e..e1a6dcc8bc7ae 100644
--- a/llvm/lib/Target/Xtensa/XtensaISelLowering.cpp
+++ b/llvm/lib/Target/Xtensa/XtensaISelLowering.cpp
@@ -690,25 +690,25 @@ SDValue XtensaTargetLowering::getAddrPCRel(SDValue Op,
return DAG.getNode(XtensaISD::PCREL_WRAPPER, DL, Ty, Op);
}
-SDValue XtensaTargetLowering::LowerConstantPool(ConstantPoolSDNode *CP,
+SDValue XtensaTargetLowering::LowerConstantPool(SDValue Op,
SelectionDAG &DAG) const {
- EVT PtrVT = getPointerTy(DAG.getDataLayout());
- auto C = const_cast<Constant *>(CP->getConstVal());
- auto T = const_cast<Type *>(CP->getType());
+ EVT PtrVT = Op.getValueType();
+ ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
+ Constant *C = CP->getConstVal();
+ Type *T = CP->getType();
SDValue Result;
// Do not use constant pool for aggregate or vector constant types,
// in such cases create global variable, for example to store tabel
// when we lower CTTZ operation.
- if (T->isAggregateType() || T->isVectorTy()) {
- auto AFI = DAG.getMachineFunction().getInfo<XtensaFunctionInfo>();
- auto M = const_cast<Module *>(
- DAG.getMachineFunction().getFunction().getParent());
+ if (T->isAggregateType()) {
+ MachineFunction &MF = DAG.getMachineFunction();
+ auto AFI = MF.getInfo<XtensaFunctionInfo>();
+ auto M = const_cast<Module *>(MF.getFunction().getParent());
auto GV = new GlobalVariable(
*M, T, /*isConstant=*/true, GlobalVariable::InternalLinkage, C,
Twine(DAG.getDataLayout().getPrivateGlobalPrefix()) + "CP" +
- Twine(DAG.getMachineFunction().getFunctionNumber()) + "_" +
- Twine(AFI->createLabelUId()));
+ Twine(MF.getFunctionNumber()) + "_" + Twine(AFI->createLabelUId()));
Result = DAG.getTargetConstantPool(GV, PtrVT, Align(4));
} else {
if (!CP->isMachineConstantPoolEntry()) {
@@ -898,7 +898,7 @@ SDValue XtensaTargetLowering::LowerOperation(SDValue Op,
case ISD::JumpTable:
return LowerJumpTable(Op, DAG);
case ISD::ConstantPool:
- return LowerConstantPool(cast<ConstantPoolSDNode>(Op), DAG);
+ return LowerConstantPool(Op, DAG);
case ISD::MUL:
return LowerMUL(Op, DAG);
case ISD::SELECT_CC:
diff --git a/llvm/lib/Target/Xtensa/XtensaISelLowering.h b/llvm/lib/Target/Xtensa/XtensaISelLowering.h
index b4c4929922cbf..8e18b50f211da 100644
--- a/llvm/lib/Target/Xtensa/XtensaISelLowering.h
+++ b/llvm/lib/Target/Xtensa/XtensaISelLowering.h
@@ -109,7 +109,7 @@ class XtensaTargetLowering : public TargetLowering {
SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerConstantPool(ConstantPoolSDNode *CP, SelectionDAG &DAG) const;
+ SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerMUL(SDValue Op, SelectionDAG &DAG) const;
diff --git a/llvm/test/CodeGen/Xtensa/constantpool-aggregate.ll b/llvm/test/CodeGen/Xtensa/constantpool-aggregate.ll
new file mode 100644
index 0000000000000..3ace3a6d604b3
--- /dev/null
+++ b/llvm/test/CodeGen/Xtensa/constantpool-aggregate.ll
@@ -0,0 +1,45 @@
+; RUN: llc -mtriple=xtensa -verify-machineinstrs < %s \
+; RUN: | FileCheck %s
+
+; Check that we place const array (CTTZ lookup table) in global variable,
+; instead of constant pool and place label to this table in constant pool.
+
+; CHECK: .literal_position
+; CHECK-NEXT: .literal .LCPI0_0, 125613361
+; CHECK-NEXT: .literal .LCPI0_1, __mulsi3
+; CHECK-NEXT: .literal .LCPI0_2, .LCP0_0
+; CHECK-NEXT: .global test_cttz_i32
+
+define i32 @test_cttz_i32(i32 %a) nounwind {
+; XTENSA-LABEL: test_cttz_i32:
+; XTENSA: addi a8, a1, -16
+; XTENSA-NEXT: or a1, a8, a8
+; XTENSA-NEXT: s32i a0, a1, 0 # 4-byte Folded Spill
+; XTENSA-NEXT: or a8, a2, a2
+; XTENSA-NEXT: movi a2, 32
+; XTENSA-NEXT: beqz a8, .LBB0_2
+; XTENSA-NEXT: j .LBB2_1
+; XTENSA-NEXT: .LBB2_1: # %cond.false
+; XTENSA-NEXT: neg a9, a8
+; XTENSA-NEXT: and a2, a8, a9
+; XTENSA-NEXT: l32r a3, .LCPI0_0
+; XTENSA-NEXT: l32r a8, .LCPI0_1
+; XTENSA-NEXT: callx0 a8
+; XTENSA-NEXT: movi a8, 27
+; XTENSA-NEXT: ssr a8
+; XTENSA-NEXT: srl a8, a2
+; XTENSA-NEXT: l32r a9, .LCPI0_2
+; XTENSA-NEXT: add a8, a9, a8
+; XTENSA-NEXT: l8ui a2, a8, 0
+; XTENSA-NEXT: .LBB2_2: # %cond.end
+; XTENSA-NEXT: l32i a0, a1, 0 # 4-byte Folded Reload
+; XTENSA-NEXT: addi a8, a1, 16
+; XTENSA-NEXT: or a1, a8, a8
+; XTENSA-NEXT: ret
+ %tmp = call i32 @llvm.cttz.i32(i32 %a, i1 false)
+ ret i32 %tmp
+}
+
+; CHECK: .LCP0_0:
+; CHECK-NEXT: .ascii "\000\001\034\002\035\016\030\003\036\026\024\017\031\021\004\b\037\033\r\027\025\023\020\007\032\f\022\006\013\005\n\t"
+; CHECK-NEXT: .size .LCP0_0, 32
More information about the llvm-commits
mailing list