[llvm] 8d433a0 - Teach the AArch64 backend to materialize immediates using a pair of ORR-immediate

Owen Anderson via llvm-commits llvm-commits at lists.llvm.org
Sat Feb 4 18:30:03 PST 2023


Author: Owen Anderson
Date: 2023-02-04T20:29:57-06:00
New Revision: 8d433a0ae55ac25ba0a77d733e1ee5e23d1eb9f7

URL: https://github.com/llvm/llvm-project/commit/8d433a0ae55ac25ba0a77d733e1ee5e23d1eb9f7
DIFF: https://github.com/llvm/llvm-project/commit/8d433a0ae55ac25ba0a77d733e1ee5e23d1eb9f7.diff

LOG: Teach the AArch64 backend to materialize immediates using a pair of ORR-immediate
instructions.

Credit to czwarich for figuring out the algorithm to test for this.

Reviewed By: dmgreen

Differential Revision: https://reviews.llvm.org/D140952

Added: 
    

Modified: 
    llvm/lib/Target/AArch64/AArch64ExpandImm.cpp
    llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp
    llvm/test/CodeGen/AArch64/arm64-movi.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AArch64/AArch64ExpandImm.cpp b/llvm/lib/Target/AArch64/AArch64ExpandImm.cpp
index 6b6fd6562642d..f120da7b863be 100644
--- a/llvm/lib/Target/AArch64/AArch64ExpandImm.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ExpandImm.cpp
@@ -239,6 +239,141 @@ static bool trySequenceOfOnes(uint64_t UImm,
   return true;
 }
 
+static uint64_t GetRunOfOnesStartingAt(uint64_t V, uint64_t StartPosition) {
+  uint64_t NumOnes = llvm::countTrailingOnes(V >> StartPosition);
+
+  uint64_t UnshiftedOnes;
+  if (NumOnes == 64) {
+    UnshiftedOnes = ~0ULL;
+  } else {
+    UnshiftedOnes = (1ULL << NumOnes) - 1;
+  }
+  return UnshiftedOnes << StartPosition;
+}
+
+static uint64_t rotl(uint64_t n, uint64_t d) {
+  if (d == 0)
+    return n;
+  return (n << d) | (n >> (64 - d));
+}
+
+static uint64_t rotr(uint64_t n, uint64_t d) {
+  if (d == 0)
+    return n;
+  return (n >> d) | (n << (64 - d));
+}
+
+static uint64_t MaximallyReplicateSubImmediate(uint64_t V, uint64_t Subset) {
+  uint64_t Result = Subset;
+
+  // 64, 32, 16, 8, 4, 2
+  for (uint64_t i = 0; i < 6; ++i) {
+    uint64_t Rotation = 1 << (6 - i);
+    uint64_t Closure = Result | rotl(Result, Rotation);
+    if (Closure != (Closure & V)) {
+      break;
+    }
+    Result = Closure;
+  }
+
+  return Result;
+}
+
+// Find the logical immediate that covers the most bits in RemainingBits,
+// allowing for additional bits to be set that were set in OriginalBits.
+static uint64_t maximalLogicalImmWithin(uint64_t RemainingBits,
+                                        uint64_t OriginalBits) {
+  // Find the first set bit.
+  uint32_t Position = llvm::countTrailingZeros(RemainingBits);
+
+  // Get the first run of set bits.
+  uint64_t FirstRun = GetRunOfOnesStartingAt(OriginalBits, Position);
+
+  // Replicate the run as many times as possible, as long as the bits are set in
+  // RemainingBits.
+  uint64_t MaximalImm = MaximallyReplicateSubImmediate(OriginalBits, FirstRun);
+
+  return MaximalImm;
+}
+
+static std::optional<std::pair<uint64_t, uint64_t>>
+decomposeIntoOrrOfLogicalImmediates(uint64_t UImm) {
+  if (UImm == 0 || ~UImm == 0)
+    return std::nullopt;
+
+  // Make sure we don't have a run of ones split around the rotation boundary.
+  uint32_t InitialTrailingOnes = llvm::countTrailingOnes(UImm);
+  uint64_t RotatedBits = rotr(UImm, InitialTrailingOnes);
+
+  // Find the largest logical immediate that fits within the full immediate.
+  uint64_t MaximalImm1 = maximalLogicalImmWithin(RotatedBits, RotatedBits);
+
+  // Remove all bits that are set by this mask.
+  uint64_t RemainingBits = RotatedBits & ~MaximalImm1;
+
+  // Find the largest logical immediate covering the remaining bits, allowing
+  // for additional bits to be set that were also set in the original immediate.
+  uint64_t MaximalImm2 = maximalLogicalImmWithin(RemainingBits, RotatedBits);
+
+  // If any bits still haven't been covered, then give up.
+  if (RemainingBits & ~MaximalImm2)
+    return std::nullopt;
+
+  // Make sure to un-rotate the immediates.
+  return std::make_pair(rotl(MaximalImm1, InitialTrailingOnes),
+                        rotl(MaximalImm2, InitialTrailingOnes));
+}
+
+// Attempt to expand an immediate as the ORR of a pair of logical immediates.
+static bool tryOrrOfLogicalImmediates(uint64_t UImm,
+                                      SmallVectorImpl<ImmInsnModel> &Insn) {
+  auto MaybeDecomposition = decomposeIntoOrrOfLogicalImmediates(UImm);
+  if (MaybeDecomposition == std::nullopt)
+    return false;
+  uint64_t Imm1 = MaybeDecomposition->first;
+  uint64_t Imm2 = MaybeDecomposition->second;
+
+  uint64_t Encoding1, Encoding2;
+  bool Imm1Success = AArch64_AM::processLogicalImmediate(Imm1, 64, Encoding1);
+  bool Imm2Success = AArch64_AM::processLogicalImmediate(Imm2, 64, Encoding2);
+
+  if (Imm1Success && Imm2Success) {
+    // Create the ORR-immediate instructions.
+    Insn.push_back({AArch64::ORRXri, 0, Encoding1});
+    Insn.push_back({AArch64::ORRXri, 1, Encoding2});
+    return true;
+  }
+
+  return false;
+}
+
+// Attempt to expand an immediate as the AND of a pair of logical immediates.
+// This is done by applying DeMorgan's law, under which logical immediates
+// are closed.
+static bool tryAndOfLogicalImmediates(uint64_t UImm,
+                                      SmallVectorImpl<ImmInsnModel> &Insn) {
+  // Apply DeMorgan's law to turn this into an ORR problem.
+  auto MaybeDecomposition = decomposeIntoOrrOfLogicalImmediates(~UImm);
+  if (MaybeDecomposition == std::nullopt)
+    return false;
+  uint64_t Imm1 = MaybeDecomposition->first;
+  uint64_t Imm2 = MaybeDecomposition->second;
+
+  uint64_t Encoding1, Encoding2;
+  bool Imm1Success = AArch64_AM::processLogicalImmediate(~Imm1, 64, Encoding1);
+  bool Imm2Success = AArch64_AM::processLogicalImmediate(~Imm2, 64, Encoding2);
+
+  if (Imm1Success && Imm2Success) {
+    // Materialize Imm1, the LHS of the AND
+    Insn.push_back({AArch64::ORRXri, 0, Encoding1});
+    // AND Imm1 with Imm2
+    Insn.push_back({AArch64::ANDXri, 1, Encoding2});
+    return true;
+  }
+
+  return false;
+}
+
 /// \brief Expand a MOVi32imm or MOVi64imm pseudo instruction to a
 /// MOVZ or MOVN of width BitSize followed by up to 3 MOVK instructions.
 static inline void expandMOVImmSimple(uint64_t Imm, unsigned BitSize,
@@ -372,6 +507,14 @@ void AArch64_IMM::expandMOVImm(uint64_t Imm, unsigned BitSize,
     }
   }
 
+  // Attempt to use a sequence of two ORR-immediate instructions.
+  if (tryOrrOfLogicalImmediates(Imm, Insn))
+    return;
+
+  // Attempt to use a sequence of ORR-immediate followed by AND-immediate.
+  if (tryAndOfLogicalImmediates(Imm, Insn))
+    return;
+
   // FIXME: Add more two-instruction sequences.
 
   // Three instruction sequences.

diff  --git a/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp b/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp
index 3370da479e3cc..0da93d0bb4d1c 100644
--- a/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp
@@ -148,10 +148,40 @@ bool AArch64ExpandPseudo::expandMOVImm(MachineBasicBlock &MBB,
 
     case AArch64::ORRWri:
     case AArch64::ORRXri:
-      MIBS.push_back(BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(I->Opcode))
-        .add(MI.getOperand(0))
-        .addReg(BitSize == 32 ? AArch64::WZR : AArch64::XZR)
-        .addImm(I->Op2));
+      if (I->Op1 == 0) {
+        MIBS.push_back(BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(I->Opcode))
+                           .add(MI.getOperand(0))
+                           .addReg(BitSize == 32 ? AArch64::WZR : AArch64::XZR)
+                           .addImm(I->Op2));
+      } else {
+        Register DstReg = MI.getOperand(0).getReg();
+        bool DstIsDead = MI.getOperand(0).isDead();
+        MIBS.push_back(
+            BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(I->Opcode))
+                .addReg(DstReg, RegState::Define |
+                                    getDeadRegState(DstIsDead && LastItem) |
+                                    RenamableState)
+                .addReg(DstReg)
+                .addImm(I->Op2));
+      }
+      break;
+    case AArch64::ANDXri:
+      if (I->Op1 == 0) {
+        MIBS.push_back(BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(I->Opcode))
+                           .add(MI.getOperand(0))
+                           .addReg(BitSize == 32 ? AArch64::WZR : AArch64::XZR)
+                           .addImm(I->Op2));
+      } else {
+        Register DstReg = MI.getOperand(0).getReg();
+        bool DstIsDead = MI.getOperand(0).isDead();
+        MIBS.push_back(
+            BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(I->Opcode))
+                .addReg(DstReg, RegState::Define |
+                                    getDeadRegState(DstIsDead && LastItem) |
+                                    RenamableState)
+                .addReg(DstReg)
+                .addImm(I->Op2));
+      }
       break;
     case AArch64::MOVNWi:
     case AArch64::MOVNXi:

diff  --git a/llvm/test/CodeGen/AArch64/arm64-movi.ll b/llvm/test/CodeGen/AArch64/arm64-movi.ll
index 9a1efba913af3..2ec58dbee0232 100644
--- a/llvm/test/CodeGen/AArch64/arm64-movi.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-movi.ll
@@ -109,14 +109,11 @@ define i64 @movz_skip1_movk() nounwind {
   ret i64 147695335379508
 }
 
-; FIXME: Prefer "mov w0, #2863311530; lsl x0, x0, #4"
-; or "mov x0, #-6148914691236517206; and x0, x0, #45812984480"
 define i64 @orr_lsl_pattern() nounwind {
 ; CHECK-LABEL: orr_lsl_pattern:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov x0, #43680
-; CHECK-NEXT:    movk x0, #43690, lsl #16
-; CHECK-NEXT:    movk x0, #10, lsl #32
+; CHECK-NEXT:    mov x0, #-6148914691236517206
+; CHECK-NEXT:    and x0, x0, #0x1fffffffe0
 ; CHECK-NEXT:    ret
   ret i64 45812984480
 }
@@ -322,13 +319,11 @@ define i64 @orr_movk15() nounwind {
   ret i64 549621596159
 }
 
-; FIXME: prefer "mov x0, #2147483646; orr x0, x0, #36028659580010496"
 define i64 @orr_movk16() nounwind {
 ; CHECK-LABEL: orr_movk16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov x0, #36028659580010496
-; CHECK-NEXT:    movk x0, #65534
-; CHECK-NEXT:    movk x0, #32767, lsl #16
+; CHECK-NEXT:    mov x0, #2147483646
+; CHECK-NEXT:    orr x0, x0, #0x7fffe0007fffe0
 ; CHECK-NEXT:    ret
   ret i64 36028661727494142
 }
@@ -351,13 +346,11 @@ define i64 @orr_movk18() nounwind {
   ret i64 137438953409
 }
 
-; FIXME: prefer "mov x0, #72340172838076673; and x0, x0, #2199023255296"
 define i64 @orr_and() nounwind {
 ; CHECK-LABEL: orr_and:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov x0, #256
-; CHECK-NEXT:    movk x0, #257, lsl #16
-; CHECK-NEXT:    movk x0, #257, lsl #32
+; CHECK-NEXT:    mov x0, #72340172838076673
+; CHECK-NEXT:    and x0, x0, #0xffffffffff00
 ; CHECK-NEXT:    ret
   ret i64 1103823438080
 }
@@ -395,59 +388,47 @@ define i64 @movn_eor() nounwind {
   ret i64 3689348814437076172
 }
 
-; FIXME: prefer "mov x0, #536866816; orr x0, x0, #0x3fff800000000000"
 define i64 @orr_orr_64() nounwind {
 ; CHECK-LABEL: orr_orr_64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov x0, #4611545280939032576
-; CHECK-NEXT:    movk x0, #61440
-; CHECK-NEXT:    movk x0, #8191, lsl #16
+; CHECK-NEXT:    mov x0, #536866816
+; CHECK-NEXT:    orr x0, x0, #0x3fff800000000000
 ; CHECK-NEXT:    ret
   ret i64 4611545281475899392
 }
 
-; FIXME: prefer "mov x0, #558551907040256; orr x0, x0, #0x1000100010001000"
 define i64 @orr_orr_32() nounwind {
 ; CHECK-LABEL: orr_orr_32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov x0, #-287953294993589248
-; CHECK-NEXT:    movk x0, #7169, lsl #16
-; CHECK-NEXT:    movk x0, #7169, lsl #48
+; CHECK-NEXT:    mov x0, #558551907040256
+; CHECK-NEXT:    orr x0, x0, #0x1c001c001c001c00
 ; CHECK-NEXT:    ret
   ret i64 2018171185438784512
 }
 
-; FIXME: prefer "mov x0, #281479271743489; orr x0, x0, #0x1000100010001000"
 define i64 @orr_orr_16() nounwind {
 ; CHECK-LABEL: orr_orr_16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov x0, #4097
-; CHECK-NEXT:    movk x0, #4097, lsl #16
-; CHECK-NEXT:    movk x0, #4097, lsl #32
-; CHECK-NEXT:    movk x0, #4097, lsl #48
+; CHECK-NEXT:    mov x0, #1152939097061330944
+; CHECK-NEXT:    orr x0, x0, #0x1000100010001
 ; CHECK-NEXT:    ret
   ret i64 1153220576333074433
 }
 
-; FIXME: prefer "mov x0, #144680345676153346; orr x0, x0, #0x1818181818181818"
 define i64 @orr_orr_8() nounwind {
 ; CHECK-LABEL: orr_orr_8:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov x0, #6682
-; CHECK-NEXT:    movk x0, #6682, lsl #16
-; CHECK-NEXT:    movk x0, #6682, lsl #32
-; CHECK-NEXT:    movk x0, #6682, lsl #48
+; CHECK-NEXT:    mov x0, #144680345676153346
+; CHECK-NEXT:    orr x0, x0, #0x1818181818181818
 ; CHECK-NEXT:    ret
   ret i64 1880844493789993498
 }
 
-; FIXME: prefer "mov x0, #-6148914691236517206; orr x0, x0, #0x0FFFFF0000000000"
 define i64 @orr_64_orr_8() nounwind {
 ; CHECK-LABEL: orr_64_orr_8:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov x0, #-6148914691236517206
-; CHECK-NEXT:    movk x0, #65450, lsl #32
-; CHECK-NEXT:    movk x0, #45055, lsl #48
+; CHECK-NEXT:    orr x0, x0, #0xfffff0000000000
 ; CHECK-NEXT:    ret
   ret i64 -5764607889538110806
 }


        


More information about the llvm-commits mailing list