[llvm] r209566 - ARM64: extract a 32-bit subreg when selecting an inreg extend
Tim Northover
tnorthover at apple.com
Sat May 24 00:05:44 PDT 2014
Author: tnorthover
Date: Sat May 24 02:05:42 2014
New Revision: 209566
URL: http://llvm.org/viewvc/llvm-project?rev=209566&view=rev
Log:
ARM64: extract a 32-bit subreg when selecting an inreg extend
After the load/store refactoring, we were sometimes trying to feed a
GPR64 into a 32-bit register offset operand. This failed in
copyPhysReg.
Modified:
llvm/trunk/lib/Target/ARM64/ARM64ISelDAGToDAG.cpp
llvm/trunk/test/CodeGen/ARM64/register-offset-addressing.ll
Modified: llvm/trunk/lib/Target/ARM64/ARM64ISelDAGToDAG.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM64/ARM64ISelDAGToDAG.cpp?rev=209566&r1=209565&r2=209566&view=diff
==============================================================================
--- llvm/trunk/lib/Target/ARM64/ARM64ISelDAGToDAG.cpp (original)
+++ llvm/trunk/lib/Target/ARM64/ARM64ISelDAGToDAG.cpp Sat May 24 02:05:42 2014
@@ -517,6 +517,21 @@ SDNode *ARM64DAGToDAGISel::SelectMULLV64
return CurDAG->getMachineNode(SMULLOpc, SDLoc(N), N->getValueType(0), Ops);
}
+/// Instructions that accept extend modifiers like UXTW expect the register
+/// being extended to be a GPR32, but the incoming DAG might be acting on a
+/// GPR64 (either via SEXT_INREG or AND). Extract the appropriate low bits if
+/// this is the case.
+static SDValue narrowIfNeeded(SelectionDAG *CurDAG, SDValue N) {
+ if (N.getValueType() == MVT::i32)
+ return N;
+
+ SDValue SubReg = CurDAG->getTargetConstant(ARM64::sub_32, MVT::i32);
+ MachineSDNode *Node = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
+ SDLoc(N), MVT::i32, N, SubReg);
+ return SDValue(Node, 0);
+}
+
+
/// SelectArithExtendedRegister - Select a "extended register" operand. This
/// operand folds in an extend followed by an optional left shift.
bool ARM64DAGToDAGISel::SelectArithExtendedRegister(SDValue N, SDValue &Reg,
@@ -551,13 +566,7 @@ bool ARM64DAGToDAGISel::SelectArithExten
// there might not be an actual 32-bit value in the program. We can
// (harmlessly) synthesize one by injected an EXTRACT_SUBREG here.
assert(Ext != ARM64_AM::UXTX && Ext != ARM64_AM::SXTX);
- if (Reg.getValueType() == MVT::i64) {
- SDValue SubReg = CurDAG->getTargetConstant(ARM64::sub_32, MVT::i32);
- MachineSDNode *Node = CurDAG->getMachineNode(
- TargetOpcode::EXTRACT_SUBREG, SDLoc(N), MVT::i32, Reg, SubReg);
- Reg = SDValue(Node, 0);
- }
-
+ Reg = narrowIfNeeded(CurDAG, Reg);
Shift = CurDAG->getTargetConstant(getArithExtendImm(Ext, ShiftVal), MVT::i32);
return isWorthFolding(N);
}
@@ -677,7 +686,7 @@ bool ARM64DAGToDAGISel::SelectExtendedSH
if (Ext == ARM64_AM::InvalidShiftExtend)
return false;
- Offset = N.getOperand(0).getOperand(0);
+ Offset = narrowIfNeeded(CurDAG, N.getOperand(0).getOperand(0));
SignExtend = CurDAG->getTargetConstant(Ext == ARM64_AM::SXTW, MVT::i32);
} else {
Offset = N.getOperand(0);
@@ -746,7 +755,7 @@ bool ARM64DAGToDAGISel::SelectAddrModeWR
if (IsExtendedRegisterWorthFolding &&
(Ext = getExtendTypeForNode(LHS, true)) != ARM64_AM::InvalidShiftExtend) {
Base = RHS;
- Offset = LHS.getOperand(0);
+ Offset = narrowIfNeeded(CurDAG, LHS.getOperand(0));
SignExtend = CurDAG->getTargetConstant(Ext == ARM64_AM::SXTW, MVT::i32);
if (isWorthFolding(LHS))
return true;
@@ -756,7 +765,7 @@ bool ARM64DAGToDAGISel::SelectAddrModeWR
if (IsExtendedRegisterWorthFolding &&
(Ext = getExtendTypeForNode(RHS, true)) != ARM64_AM::InvalidShiftExtend) {
Base = LHS;
- Offset = RHS.getOperand(0);
+ Offset = narrowIfNeeded(CurDAG, RHS.getOperand(0));
SignExtend = CurDAG->getTargetConstant(Ext == ARM64_AM::SXTW, MVT::i32);
if (isWorthFolding(RHS))
return true;
Modified: llvm/trunk/test/CodeGen/ARM64/register-offset-addressing.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM64/register-offset-addressing.ll?rev=209566&r1=209565&r2=209566&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/ARM64/register-offset-addressing.ll (original)
+++ llvm/trunk/test/CodeGen/ARM64/register-offset-addressing.ll Sat May 24 02:05:42 2014
@@ -1,7 +1,7 @@
; RUN: llc < %s -mtriple=arm64-apple-darwin | FileCheck %s
-define i8 @t1(i16* %a, i64 %b) {
-; CHECK: t1
+define i8 @test_64bit_add(i16* %a, i64 %b) {
+; CHECK-LABEL: test_64bit_add:
; CHECK: lsl [[REG:x[0-9]+]], x1, #1
; CHECK: ldrb w0, [x0, [[REG]]]
; CHECK: ret
@@ -10,3 +10,136 @@ define i8 @t1(i16* %a, i64 %b) {
%tmp3 = trunc i16 %tmp2 to i8
ret i8 %tmp3
}
+
+; These tests are trying to form SEXT and ZEXT operations that never leave i64
+; space, to make sure LLVM can adapt the offset register correctly.
+define void @ldst_8bit(i8* %base, i64 %offset) minsize {
+; CHECK-LABEL: ldst_8bit:
+
+ %off32.sext.tmp = shl i64 %offset, 32
+ %off32.sext = ashr i64 %off32.sext.tmp, 32
+ %addr8_sxtw = getelementptr i8* %base, i64 %off32.sext
+ %val8_sxtw = load volatile i8* %addr8_sxtw
+ %val32_signed = sext i8 %val8_sxtw to i32
+ store volatile i32 %val32_signed, i32* @var_32bit
+; CHECK: ldrsb {{w[0-9]+}}, [{{x[0-9]+}}, {{w[0-9]+}}, sxtw]
+
+ %addrint_uxtw = ptrtoint i8* %base to i64
+ %offset_uxtw = and i64 %offset, 4294967295
+ %addrint1_uxtw = add i64 %addrint_uxtw, %offset_uxtw
+ %addr_uxtw = inttoptr i64 %addrint1_uxtw to i8*
+ %val8_uxtw = load volatile i8* %addr_uxtw
+ %newval8 = add i8 %val8_uxtw, 1
+ store volatile i8 %newval8, i8* @var_8bit
+; CHECK: ldrb {{w[0-9]+}}, [{{x[0-9]+}}, {{w[0-9]+}}, uxtw]
+
+ ret void
+}
+
+
+define void @ldst_16bit(i16* %base, i64 %offset) minsize {
+; CHECK-LABEL: ldst_16bit:
+
+ %addrint_uxtw = ptrtoint i16* %base to i64
+ %offset_uxtw = and i64 %offset, 4294967295
+ %addrint1_uxtw = add i64 %addrint_uxtw, %offset_uxtw
+ %addr_uxtw = inttoptr i64 %addrint1_uxtw to i16*
+ %val8_uxtw = load volatile i16* %addr_uxtw
+ %newval8 = add i16 %val8_uxtw, 1
+ store volatile i16 %newval8, i16* @var_16bit
+; CHECK: ldrh {{w[0-9]+}}, [{{x[0-9]+}}, {{w[0-9]+}}, uxtw]
+
+ %base_sxtw = ptrtoint i16* %base to i64
+ %offset_sxtw.tmp = shl i64 %offset, 32
+ %offset_sxtw = ashr i64 %offset_sxtw.tmp, 32
+ %addrint_sxtw = add i64 %base_sxtw, %offset_sxtw
+ %addr_sxtw = inttoptr i64 %addrint_sxtw to i16*
+ %val16_sxtw = load volatile i16* %addr_sxtw
+ %val64_signed = sext i16 %val16_sxtw to i64
+ store volatile i64 %val64_signed, i64* @var_64bit
+; CHECK: ldrsh {{x[0-9]+}}, [{{x[0-9]+}}, {{w[0-9]+}}, sxtw]
+
+
+ %base_uxtwN = ptrtoint i16* %base to i64
+ %offset_uxtwN = and i64 %offset, 4294967295
+ %offset2_uxtwN = shl i64 %offset_uxtwN, 1
+ %addrint_uxtwN = add i64 %base_uxtwN, %offset2_uxtwN
+ %addr_uxtwN = inttoptr i64 %addrint_uxtwN to i16*
+ %val32 = load volatile i32* @var_32bit
+ %val16_trunc32 = trunc i32 %val32 to i16
+ store volatile i16 %val16_trunc32, i16* %addr_uxtwN
+; CHECK: strh {{w[0-9]+}}, [{{x[0-9]+}}, {{w[0-9]+}}, uxtw #1]
+ ret void
+}
+
+define void @ldst_32bit(i32* %base, i64 %offset) minsize {
+; CHECK-LABEL: ldst_32bit:
+
+ %addrint_uxtw = ptrtoint i32* %base to i64
+ %offset_uxtw = and i64 %offset, 4294967295
+ %addrint1_uxtw = add i64 %addrint_uxtw, %offset_uxtw
+ %addr_uxtw = inttoptr i64 %addrint1_uxtw to i32*
+ %val32_uxtw = load volatile i32* %addr_uxtw
+ %newval32 = add i32 %val32_uxtw, 1
+ store volatile i32 %newval32, i32* @var_32bit
+; CHECK: ldr {{w[0-9]+}}, [{{x[0-9]+}}, {{w[0-9]+}}, uxtw]
+
+ %base_sxtw = ptrtoint i32* %base to i64
+ %offset_sxtw.tmp = shl i64 %offset, 32
+ %offset_sxtw = ashr i64 %offset_sxtw.tmp, 32
+ %addrint_sxtw = add i64 %base_sxtw, %offset_sxtw
+ %addr_sxtw = inttoptr i64 %addrint_sxtw to i32*
+ %val32_sxtw = load volatile i32* %addr_sxtw
+ %val64_signed = sext i32 %val32_sxtw to i64
+ store volatile i64 %val64_signed, i64* @var_64bit
+; CHECK: ldrsw {{x[0-9]+}}, [{{x[0-9]+}}, {{w[0-9]+}}, sxtw]
+
+
+ %base_uxtwN = ptrtoint i32* %base to i64
+ %offset_uxtwN = and i64 %offset, 4294967295
+ %offset2_uxtwN = shl i64 %offset_uxtwN, 2
+ %addrint_uxtwN = add i64 %base_uxtwN, %offset2_uxtwN
+ %addr_uxtwN = inttoptr i64 %addrint_uxtwN to i32*
+ %val32 = load volatile i32* @var_32bit
+ store volatile i32 %val32, i32* %addr_uxtwN
+; CHECK: str {{w[0-9]+}}, [{{x[0-9]+}}, {{w[0-9]+}}, uxtw #2]
+ ret void
+}
+
+define void @ldst_64bit(i64* %base, i64 %offset) minsize {
+; CHECK-LABEL: ldst_64bit:
+
+ %addrint_uxtw = ptrtoint i64* %base to i64
+ %offset_uxtw = and i64 %offset, 4294967295
+ %addrint1_uxtw = add i64 %addrint_uxtw, %offset_uxtw
+ %addr_uxtw = inttoptr i64 %addrint1_uxtw to i64*
+ %val64_uxtw = load volatile i64* %addr_uxtw
+ %newval8 = add i64 %val64_uxtw, 1
+ store volatile i64 %newval8, i64* @var_64bit
+; CHECK: ldr {{x[0-9]+}}, [{{x[0-9]+}}, {{w[0-9]+}}, uxtw]
+
+ %base_sxtw = ptrtoint i64* %base to i64
+ %offset_sxtw.tmp = shl i64 %offset, 32
+ %offset_sxtw = ashr i64 %offset_sxtw.tmp, 32
+ %addrint_sxtw = add i64 %base_sxtw, %offset_sxtw
+ %addr_sxtw = inttoptr i64 %addrint_sxtw to i64*
+ %val64_sxtw = load volatile i64* %addr_sxtw
+ store volatile i64 %val64_sxtw, i64* @var_64bit
+; CHECK: ldr {{x[0-9]+}}, [{{x[0-9]+}}, {{w[0-9]+}}, sxtw]
+
+
+ %base_uxtwN = ptrtoint i64* %base to i64
+ %offset_uxtwN = and i64 %offset, 4294967295
+ %offset2_uxtwN = shl i64 %offset_uxtwN, 3
+ %addrint_uxtwN = add i64 %base_uxtwN, %offset2_uxtwN
+ %addr_uxtwN = inttoptr i64 %addrint_uxtwN to i64*
+ %val64 = load volatile i64* @var_64bit
+ store volatile i64 %val64, i64* %addr_uxtwN
+; CHECK: str {{x[0-9]+}}, [{{x[0-9]+}}, {{w[0-9]+}}, uxtw #3]
+ ret void
+}
+
+ at var_8bit = global i8 0
+ at var_16bit = global i16 0
+ at var_32bit = global i32 0
+ at var_64bit = global i64 0
More information about the llvm-commits
mailing list