[llvm] [SeparateConstOffsetFromGEP] Decompose constant xor operand if possible (PR #135788)
Sumanth Gundapaneni via llvm-commits
llvm-commits at lists.llvm.org
Mon Apr 21 14:29:26 PDT 2025
https://github.com/sgundapa updated https://github.com/llvm/llvm-project/pull/135788
>From e45f0aadcd864d4bd8d1bf2bfb1d7c2d486103a8 Mon Sep 17 00:00:00 2001
From: Sumanth Gundapaneni <sugundap at amd.com>
Date: Tue, 15 Apr 2025 08:31:38 -0500
Subject: [PATCH 1/3] Decompose Xors that are fed to GEPs
NOTE: This patch is not to be merged, just for evaluation.
---
.../Scalar/SeparateConstOffsetFromGEP.cpp | 166 ++++++++++++++++++
1 file changed, 166 insertions(+)
diff --git a/llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp b/llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp
index e048015298461..b0f7c7d862519 100644
--- a/llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp
+++ b/llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp
@@ -160,6 +160,7 @@
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/DepthFirstIterator.h"
#include "llvm/ADT/SmallVector.h"
+#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/MemoryBuiltins.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
@@ -198,6 +199,8 @@
using namespace llvm;
using namespace llvm::PatternMatch;
+#define DEBUG_TYPE "separate-offset-gep"
+
static cl::opt<bool> DisableSeparateConstOffsetFromGEP(
"disable-separate-const-offset-from-gep", cl::init(false),
cl::desc("Do not separate the constant offset from a GEP instruction"),
@@ -484,6 +487,9 @@ class SeparateConstOffsetFromGEP {
DenseMap<ExprKey, SmallVector<Instruction *, 2>> DominatingAdds;
DenseMap<ExprKey, SmallVector<Instruction *, 2>> DominatingSubs;
+
+ bool decomposeXor(Function &F);
+ Value *tryFoldXorToOrDisjoint(Instruction &I);
};
} // end anonymous namespace
@@ -1162,6 +1168,162 @@ bool SeparateConstOffsetFromGEP::splitGEP(GetElementPtrInst *GEP) {
return true;
}
+bool SeparateConstOffsetFromGEP::decomposeXor(Function &F) {
+ bool FunctionChanged = false;
+ SmallVector<std::pair<Instruction *, Value *>, 16> ReplacementsToMake;
+
+ for (BasicBlock &BB : F) {
+ for (Instruction &I : BB) {
+ if (I.getOpcode() == Instruction::Xor) {
+ if (Value *Replacement = tryFoldXorToOrDisjoint(I)) {
+ ReplacementsToMake.push_back({&I, Replacement});
+ FunctionChanged = true;
+ }
+ }
+ }
+ }
+
+ if (!ReplacementsToMake.empty()) {
+ LLVM_DEBUG(dbgs() << "Applying " << ReplacementsToMake.size()
+ << " XOR->OR Disjoint replacements in " << F.getName()
+ << "\n");
+ for (auto &Pair : ReplacementsToMake) {
+ Pair.first->replaceAllUsesWith(Pair.second);
+ }
+ for (auto &Pair : ReplacementsToMake) {
+ Pair.first->eraseFromParent();
+ }
+ }
+
+ return FunctionChanged;
+}
+
+static llvm::Instruction *findClosestSequentialXor(Value *A, Instruction &I) {
+ llvm::Instruction *ClosestUser = nullptr;
+ for (llvm::User *User : A->users()) {
+ if (auto *UserInst = llvm::dyn_cast<llvm::Instruction>(User)) {
+ if (UserInst->getOpcode() != Instruction::Xor || UserInst == &I)
+ continue;
+ if (!ClosestUser) {
+ ClosestUser = UserInst;
+ } else {
+ // Compare instruction positions.
+ if (UserInst->comesBefore(ClosestUser)) {
+ ClosestUser = UserInst;
+ }
+ }
+ }
+ }
+ return ClosestUser;
+}
+
+/// Try to transform I = xor(A, C1) into or disjoint(Y, C2)
+/// where Y = xor(A, C0) is another existing instruction dominating I,
+/// C2 = C0 ^ C1, and A is known to be disjoint with C2.
+///
+/// @param I The XOR instruction being visited.
+/// @return The replacement Value* if successful, nullptr otherwise.
+Value *SeparateConstOffsetFromGEP::tryFoldXorToOrDisjoint(Instruction &I) {
+ assert(I.getOpcode() == Instruction::Xor && "Instruction must be XOR");
+
+ // Check if I has at least one GEP user.
+ bool HasGepUser = false;
+ for (User *U : I.users()) {
+ if (isa<GetElementPtrInst>(U)) {
+ HasGepUser = true;
+ break;
+ }
+ }
+ // If no user is a GEP instruction, abort the transformation.
+ if (!HasGepUser) {
+ LLVM_DEBUG(
+ dbgs() << "SeparateConstOffsetFromGEP: Skipping XOR->OR DISJOINT for "
+ << I << " because it has no GEP users.\n");
+ return nullptr;
+ }
+
+ Value *Op0 = I.getOperand(0);
+ Value *Op1 = I.getOperand(1);
+ ConstantInt *C1 = dyn_cast<ConstantInt>(Op1);
+ Value *A = Op0;
+
+ // Bail out of there is not constant operand.
+ if (!C1) {
+ C1 = dyn_cast<ConstantInt>(Op0);
+ if (!C1)
+ return nullptr;
+ A = Op1;
+ }
+
+ if (isa<UndefValue>(A))
+ return nullptr;
+
+ APInt C1_APInt = C1->getValue();
+ unsigned BitWidth = C1_APInt.getBitWidth();
+ Type *Ty = I.getType();
+
+ // --- Step 2: Find Dominating Y = xor A, C0 ---
+ Instruction *FoundUserInst = nullptr; // Instruction Y
+ APInt C0_APInt;
+
+ auto UserInst = findClosestSequentialXor(A, I);
+
+ BinaryOperator *UserBO = cast<BinaryOperator>(UserInst);
+ Value *UserOp0 = UserBO->getOperand(0);
+ Value *UserOp1 = UserBO->getOperand(1);
+ ConstantInt *UserC = nullptr;
+ if (UserOp0 == A)
+ UserC = dyn_cast<ConstantInt>(UserOp1);
+ else if (UserOp1 == A)
+ UserC = dyn_cast<ConstantInt>(UserOp0);
+ if (UserC) {
+ if (DT->dominates(UserInst, &I)) {
+ FoundUserInst = UserInst;
+ C0_APInt = UserC->getValue();
+ }
+ }
+ if (!FoundUserInst)
+ return nullptr;
+
+ // Calculate C2.
+ APInt C2_APInt = C0_APInt ^ C1_APInt;
+
+ // Check Disjointness A & C2 == 0.
+ KnownBits KnownA(BitWidth);
+ AssumptionCache *AC = nullptr;
+ computeKnownBits(A, KnownA, *DL, 0, AC, &I, DT);
+
+ if ((KnownA.Zero & C2_APInt) != C2_APInt)
+ return nullptr;
+
+ IRBuilder<> Builder(&I);
+ Builder.SetInsertPoint(&I); // Access Builder directly
+ Constant *C2_Const = ConstantInt::get(Ty, C2_APInt);
+ Twine Name = I.getName(); // Create Twine explicitly
+ Value *NewOr = BinaryOperator::CreateDisjointOr(FoundUserInst, C2_Const, Name,
+ I.getIterator());
+ // Transformation Conditions Met.
+ LLVM_DEBUG(dbgs() << "SeparateConstOffsetFromGEP: Replacing " << I
+ << " (used by GEP) with " << *NewOr << " based on "
+ << *FoundUserInst << "\n");
+
+#if 0
+ // Preserve metadata
+ if (Instruction *NewOrInst = dyn_cast<Instruction>(NewOr)) {
+ NewOrInst->copyMetadata(I);
+ } else {
+ assert(false && "CreateNUWOr did not return an Instruction");
+ if (NewOr)
+ NewOr->deleteValue();
+ return nullptr;
+ }
+#endif
+
+ // Return the replacement value. runOnFunction will handle replacement &
+ // deletion.
+ return NewOr;
+}
+
bool SeparateConstOffsetFromGEPLegacyPass::runOnFunction(Function &F) {
if (skipFunction(F))
return false;
@@ -1181,6 +1343,10 @@ bool SeparateConstOffsetFromGEP::run(Function &F) {
DL = &F.getDataLayout();
bool Changed = false;
+
+ // Decompose xor in to "or disjoint" if possible.
+ Changed |= decomposeXor(F);
+
for (BasicBlock &B : F) {
if (!DT->isReachableFromEntry(&B))
continue;
>From def3b3e722b695aeb79eda829dbb45eeca7d73c2 Mon Sep 17 00:00:00 2001
From: Sumanth Gundapaneni <sugundap at amd.com>
Date: Mon, 21 Apr 2025 16:05:53 -0500
Subject: [PATCH 2/3] Push correctness fixes
---
.../Scalar/SeparateConstOffsetFromGEP.cpp | 100 ++++++-----
.../AMDGPU/xor-to-or-disjoint.ll | 163 ++++++++++++++++++
2 files changed, 221 insertions(+), 42 deletions(-)
create mode 100644 llvm/test/Transforms/SeparateConstOffsetFromGEP/AMDGPU/xor-to-or-disjoint.ll
diff --git a/llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp b/llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp
index b0f7c7d862519..19f166e5f6595 100644
--- a/llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp
+++ b/llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp
@@ -160,7 +160,6 @@
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/DepthFirstIterator.h"
#include "llvm/ADT/SmallVector.h"
-#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/MemoryBuiltins.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
@@ -1187,12 +1186,11 @@ bool SeparateConstOffsetFromGEP::decomposeXor(Function &F) {
LLVM_DEBUG(dbgs() << "Applying " << ReplacementsToMake.size()
<< " XOR->OR Disjoint replacements in " << F.getName()
<< "\n");
- for (auto &Pair : ReplacementsToMake) {
+ for (auto &Pair : ReplacementsToMake)
Pair.first->replaceAllUsesWith(Pair.second);
- }
- for (auto &Pair : ReplacementsToMake) {
+
+ for (auto &Pair : ReplacementsToMake)
Pair.first->eraseFromParent();
- }
}
return FunctionChanged;
@@ -1204,9 +1202,9 @@ static llvm::Instruction *findClosestSequentialXor(Value *A, Instruction &I) {
if (auto *UserInst = llvm::dyn_cast<llvm::Instruction>(User)) {
if (UserInst->getOpcode() != Instruction::Xor || UserInst == &I)
continue;
- if (!ClosestUser) {
+ if (!ClosestUser)
ClosestUser = UserInst;
- } else {
+ else {
// Compare instruction positions.
if (UserInst->comesBefore(ClosestUser)) {
ClosestUser = UserInst;
@@ -1217,9 +1215,14 @@ static llvm::Instruction *findClosestSequentialXor(Value *A, Instruction &I) {
return ClosestUser;
}
-/// Try to transform I = xor(A, C1) into or disjoint(Y, C2)
+/// Try to transform I = xor(A, C1) into or_disjoint(Y, C2)
/// where Y = xor(A, C0) is another existing instruction dominating I,
-/// C2 = C0 ^ C1, and A is known to be disjoint with C2.
+/// C2 = C1 - C0, and A is known to be disjoint with C2.
+///
+/// This transformation is beneficial particularly for GEPs because:
+/// 1. OR operations often map better to addressing modes than XOR
+/// 2. Disjoint OR operations preserve the semantics of the original XOR
+/// 3. This can enable further optimizations in the GEP offset folding pipeline
///
/// @param I The XOR instruction being visited.
/// @return The replacement Value* if successful, nullptr otherwise.
@@ -1237,7 +1240,7 @@ Value *SeparateConstOffsetFromGEP::tryFoldXorToOrDisjoint(Instruction &I) {
// If no user is a GEP instruction, abort the transformation.
if (!HasGepUser) {
LLVM_DEBUG(
- dbgs() << "SeparateConstOffsetFromGEP: Skipping XOR->OR DISJOINT for "
+ dbgs() << "SeparateConstOffsetFromGEP: Skipping XOR->OR DISJOINT for"
<< I << " because it has no GEP users.\n");
return nullptr;
}
@@ -1262,11 +1265,18 @@ Value *SeparateConstOffsetFromGEP::tryFoldXorToOrDisjoint(Instruction &I) {
unsigned BitWidth = C1_APInt.getBitWidth();
Type *Ty = I.getType();
- // --- Step 2: Find Dominating Y = xor A, C0 ---
- Instruction *FoundUserInst = nullptr; // Instruction Y
+ // Find Dominating Y = xor A, C0
+ Instruction *FoundUserInst = nullptr;
APInt C0_APInt;
- auto UserInst = findClosestSequentialXor(A, I);
+ // Find the closest XOR instruction using the same value.
+ Instruction *UserInst = findClosestSequentialXor(A, I);
+ if (!UserInst) {
+ LLVM_DEBUG(
+ dbgs() << "SeparateConstOffsetFromGEP: No dominating XOR found for" << I
+ << "\n");
+ return nullptr;
+ }
BinaryOperator *UserBO = cast<BinaryOperator>(UserInst);
Value *UserOp0 = UserBO->getOperand(0);
@@ -1276,51 +1286,57 @@ Value *SeparateConstOffsetFromGEP::tryFoldXorToOrDisjoint(Instruction &I) {
UserC = dyn_cast<ConstantInt>(UserOp1);
else if (UserOp1 == A)
UserC = dyn_cast<ConstantInt>(UserOp0);
- if (UserC) {
- if (DT->dominates(UserInst, &I)) {
- FoundUserInst = UserInst;
- C0_APInt = UserC->getValue();
- }
+ else {
+ LLVM_DEBUG(dbgs() << "SeparateConstOffsetFromGEP: Found XOR" << *UserInst
+ << " doesn't use value " << *A << "\n");
+ return nullptr;
}
- if (!FoundUserInst)
+
+ if (!UserC) {
+ LLVM_DEBUG(
+ dbgs()
+ << "SeparateConstOffsetFromGEP: Found XOR doesn't have constant operand"
+ << *UserInst << "\n");
return nullptr;
+ }
- // Calculate C2.
- APInt C2_APInt = C0_APInt ^ C1_APInt;
+ if (!DT->dominates(UserInst, &I)) {
+ LLVM_DEBUG(dbgs() << "SeparateConstOffsetFromGEP: Found XOR" << *UserInst
+ << " doesn't dominate " << I << "\n");
+ return nullptr;
+ }
+
+ FoundUserInst = UserInst;
+ C0_APInt = UserC->getValue();
+
+ // Calculate C2 = C1 - C0.
+ APInt C2_APInt = C1_APInt - C0_APInt;
// Check Disjointness A & C2 == 0.
KnownBits KnownA(BitWidth);
- AssumptionCache *AC = nullptr;
- computeKnownBits(A, KnownA, *DL, 0, AC, &I, DT);
+ computeKnownBits(A, KnownA, *DL, 0, nullptr, &I, DT);
- if ((KnownA.Zero & C2_APInt) != C2_APInt)
+ if ((KnownA.One & C2_APInt) != 0) {
+ LLVM_DEBUG(
+ dbgs() << "SeparateConstOffsetFromGEP: Disjointness check failed for"
+ << I << "\n");
return nullptr;
+ }
IRBuilder<> Builder(&I);
- Builder.SetInsertPoint(&I); // Access Builder directly
+ Builder.SetInsertPoint(&I);
Constant *C2_Const = ConstantInt::get(Ty, C2_APInt);
- Twine Name = I.getName(); // Create Twine explicitly
+ Twine Name = I.getName();
Value *NewOr = BinaryOperator::CreateDisjointOr(FoundUserInst, C2_Const, Name,
I.getIterator());
- // Transformation Conditions Met.
- LLVM_DEBUG(dbgs() << "SeparateConstOffsetFromGEP: Replacing " << I
- << " (used by GEP) with " << *NewOr << " based on "
- << *FoundUserInst << "\n");
-
-#if 0
// Preserve metadata
- if (Instruction *NewOrInst = dyn_cast<Instruction>(NewOr)) {
+ if (Instruction *NewOrInst = dyn_cast<Instruction>(NewOr))
NewOrInst->copyMetadata(I);
- } else {
- assert(false && "CreateNUWOr did not return an Instruction");
- if (NewOr)
- NewOr->deleteValue();
- return nullptr;
- }
-#endif
- // Return the replacement value. runOnFunction will handle replacement &
- // deletion.
+ LLVM_DEBUG(dbgs() << "SeparateConstOffsetFromGEP: Replacing" << I
+ << " (used by GEP) with" << *NewOr << " based on"
+ << *FoundUserInst << "\n");
+
return NewOr;
}
diff --git a/llvm/test/Transforms/SeparateConstOffsetFromGEP/AMDGPU/xor-to-or-disjoint.ll b/llvm/test/Transforms/SeparateConstOffsetFromGEP/AMDGPU/xor-to-or-disjoint.ll
new file mode 100644
index 0000000000000..808df95116f12
--- /dev/null
+++ b/llvm/test/Transforms/SeparateConstOffsetFromGEP/AMDGPU/xor-to-or-disjoint.ll
@@ -0,0 +1,163 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -mtriple=amdgcn-amd-amdhsa -passes=separate-const-offset-from-gep \
+; RUN: -S < %s | FileCheck %s
+
+
+; Test with GEP user and known bits: Ensure the transformation occurs when the xor has a GEP user
+define ptr @test_with_gep_user(ptr %ptr) {
+; CHECK-LABEL: define ptr @test_with_gep_user(
+; CHECK-SAME: ptr [[PTR:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: [[BASE:%.*]] = add i64 0, 0
+; CHECK-NEXT: [[XOR1:%.*]] = xor i64 [[BASE]], 8
+; CHECK-NEXT: [[XOR21:%.*]] = or disjoint i64 [[XOR1]], 16
+; CHECK-NEXT: [[GEP:%.*]] = getelementptr i8, ptr [[PTR]], i64 [[XOR21]]
+; CHECK-NEXT: ret ptr [[GEP]]
+;
+entry:
+ %base = add i64 0,0
+ %xor1 = xor i64 %base, 8
+ %xor2 = xor i64 %base, 24 ; Should be replaced with OR of %xor1 and 16
+ %gep = getelementptr i8, ptr %ptr, i64 %xor2
+ ret ptr %gep
+}
+
+
+; Test with non-GEP user: Ensure the transformation does not occur
+define i32 @test_with_non_gep_user(ptr %ptr) {
+; CHECK-LABEL: define i32 @test_with_non_gep_user(
+; CHECK-SAME: ptr [[PTR:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: [[BASE:%.*]] = add i32 0, 0
+; CHECK-NEXT: [[XOR1:%.*]] = xor i32 [[BASE]], 8
+; CHECK-NEXT: [[XOR2:%.*]] = xor i32 [[BASE]], 24
+; CHECK-NEXT: [[ADD:%.*]] = add i32 [[XOR2]], 5
+; CHECK-NEXT: ret i32 [[ADD]]
+;
+entry:
+ %base = add i32 0,0
+ %xor1 = xor i32 %base, 8
+ %xor2 = xor i32 %base, 24
+ %add = add i32 %xor2, 5
+ ret i32 %add
+}
+
+; Test with non-constant operand: Ensure the transformation does not occur
+define ptr @test_with_non_constant_operand(i64 %val, i64 %val2, ptr %ptr) {
+; CHECK-LABEL: define ptr @test_with_non_constant_operand(
+; CHECK-SAME: i64 [[VAL:%.*]], i64 [[VAL2:%.*]], ptr [[PTR:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: [[XOR1:%.*]] = xor i64 [[VAL]], [[VAL2]]
+; CHECK-NEXT: [[XOR2:%.*]] = xor i64 [[VAL]], 24
+; CHECK-NEXT: [[GEP:%.*]] = getelementptr i8, ptr [[PTR]], i64 [[XOR2]]
+; CHECK-NEXT: ret ptr [[GEP]]
+;
+entry:
+ %xor1 = xor i64 %val, %val2 ; Non-constant operand
+ %xor2 = xor i64 %val, 24
+ %gep = getelementptr i8, ptr %ptr, i64 %xor2
+ ret ptr %gep
+}
+
+; Test with unknown disjoint bits: Ensure the transformation does not occur
+define ptr @test_with_unknown_disjoint_bits(i64 %base, ptr %ptr) {
+; CHECK-LABEL: define ptr @test_with_unknown_disjoint_bits(
+; CHECK-SAME: i64 [[BASE:%.*]], ptr [[PTR:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: [[XOR1:%.*]] = xor i64 [[BASE]], 8
+; CHECK-NEXT: [[XOR21:%.*]] = or disjoint i64 [[XOR1]], 16
+; CHECK-NEXT: [[GEP:%.*]] = getelementptr i8, ptr [[PTR]], i64 [[XOR21]]
+; CHECK-NEXT: ret ptr [[GEP]]
+;
+entry:
+ %xor1 = xor i64 %base, 8
+ %xor2 = xor i64 %base, 24
+ %gep = getelementptr i8, ptr %ptr, i64 %xor2
+ ret ptr %gep
+}
+
+; Test with non-disjoint bits: Ensure the transformation does not occur
+define ptr @test_with_non_disjoint_bits(i64 %val, ptr %ptr) {
+; CHECK-LABEL: define ptr @test_with_non_disjoint_bits(
+; CHECK-SAME: i64 [[VAL:%.*]], ptr [[PTR:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: [[AND:%.*]] = and i64 [[VAL]], 31
+; CHECK-NEXT: [[XOR1:%.*]] = xor i64 [[AND]], 4
+; CHECK-NEXT: [[XOR21:%.*]] = or disjoint i64 [[XOR1]], 16
+; CHECK-NEXT: [[GEP:%.*]] = getelementptr i8, ptr [[PTR]], i64 [[XOR21]]
+; CHECK-NEXT: ret ptr [[GEP]]
+;
+entry:
+ %and = and i64 %val, 31 ; val can have bits 0-4 set
+ %xor1 = xor i64 %and, 4 ; Flips bit 2
+ %xor2 = xor i64 %and, 20 ; Flips bits 2 and 4, should NOT replace since bit 4 overlaps with possible val bits
+ %gep = getelementptr i8, ptr %ptr, i64 %xor2
+ ret ptr %gep
+}
+
+; Test with multiple xor operations in sequence
+define ptr @test_multiple_xors(ptr %ptr) {
+; CHECK-LABEL: define ptr @test_multiple_xors(
+; CHECK-SAME: ptr [[PTR:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: [[BASE:%.*]] = add i64 2, 0
+; CHECK-NEXT: [[XOR1:%.*]] = xor i64 [[BASE]], 8
+; CHECK-NEXT: [[XOR21:%.*]] = or disjoint i64 [[XOR1]], 16
+; CHECK-NEXT: [[XOR32:%.*]] = or disjoint i64 [[XOR1]], 24
+; CHECK-NEXT: [[XOR43:%.*]] = or disjoint i64 [[XOR1]], 64
+; CHECK-NEXT: [[GEP2:%.*]] = getelementptr i8, ptr [[PTR]], i64 [[XOR21]]
+; CHECK-NEXT: [[GEP3:%.*]] = getelementptr i8, ptr [[PTR]], i64 [[XOR32]]
+; CHECK-NEXT: [[GEP4:%.*]] = getelementptr i8, ptr [[PTR]], i64 [[XOR43]]
+; CHECK-NEXT: ret ptr [[GEP4]]
+;
+entry:
+ %base = add i64 2,0
+ %xor1 = xor i64 %base, 8
+ %xor2 = xor i64 %base, 24 ; Should be replaced with OR
+ %xor3 = xor i64 %base, 32
+ %xor4 = xor i64 %base, 72 ; Should be replaced with OR
+ %gep2 = getelementptr i8, ptr %ptr, i64 %xor2
+ %gep3 = getelementptr i8, ptr %ptr, i64 %xor3
+ %gep4 = getelementptr i8, ptr %ptr, i64 %xor4
+ ret ptr %gep4
+}
+
+
+; Test with operand order variations
+define ptr @test_operand_order(ptr %ptr) {
+; CHECK-LABEL: define ptr @test_operand_order(
+; CHECK-SAME: ptr [[PTR:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: [[BASE:%.*]] = add i64 2, 0
+; CHECK-NEXT: [[XOR1:%.*]] = xor i64 [[BASE]], 12
+; CHECK-NEXT: [[XOR21:%.*]] = or disjoint i64 [[XOR1]], 12
+; CHECK-NEXT: [[GEP:%.*]] = getelementptr i8, ptr [[PTR]], i64 [[XOR21]]
+; CHECK-NEXT: ret ptr [[GEP]]
+;
+entry:
+ %base = add i64 2,0
+ %xor1 = xor i64 %base, 12
+ %xor2 = xor i64 24, %base ; Operands reversed, should still be replaced
+ %gep = getelementptr i8, ptr %ptr, i64 %xor2
+ ret ptr %gep
+}
+
+
+; Test with multiple xor operations in sequence
+define ptr @aatest_multiple_xors(ptr %ptr) {
+; CHECK-LABEL: define ptr @aatest_multiple_xors(
+; CHECK-SAME: ptr [[PTR:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: [[BASE:%.*]] = add i64 2, 0
+; CHECK-NEXT: [[XOR1:%.*]] = xor i64 [[BASE]], 72
+; CHECK-NEXT: [[XOR21:%.*]] = or disjoint i64 [[XOR1]], -48
+; CHECK-NEXT: [[GEP:%.*]] = getelementptr i8, ptr [[PTR]], i64 [[XOR21]]
+; CHECK-NEXT: ret ptr [[GEP]]
+;
+entry:
+ %base = add i64 2,0
+ %xor1 = xor i64 %base, 72
+ %xor2 = xor i64 %base, 24 ; Should be replaced with OR
+ %gep = getelementptr i8, ptr %ptr, i64 %xor2
+ ret ptr %gep
+}
>From 3492929677473ae2bc59df6b70da6e3632494103 Mon Sep 17 00:00:00 2001
From: Sumanth Gundapaneni <sugundap at amd.com>
Date: Mon, 21 Apr 2025 16:29:00 -0500
Subject: [PATCH 3/3] Update lit test
---
.../AMDGPU/xor-to-or-disjoint.ll | 23 ++-----------------
1 file changed, 2 insertions(+), 21 deletions(-)
diff --git a/llvm/test/Transforms/SeparateConstOffsetFromGEP/AMDGPU/xor-to-or-disjoint.ll b/llvm/test/Transforms/SeparateConstOffsetFromGEP/AMDGPU/xor-to-or-disjoint.ll
index 808df95116f12..d1a04a70d4994 100644
--- a/llvm/test/Transforms/SeparateConstOffsetFromGEP/AMDGPU/xor-to-or-disjoint.ll
+++ b/llvm/test/Transforms/SeparateConstOffsetFromGEP/AMDGPU/xor-to-or-disjoint.ll
@@ -76,25 +76,6 @@ entry:
ret ptr %gep
}
-; Test with non-disjoint bits: Ensure the transformation does not occur
-define ptr @test_with_non_disjoint_bits(i64 %val, ptr %ptr) {
-; CHECK-LABEL: define ptr @test_with_non_disjoint_bits(
-; CHECK-SAME: i64 [[VAL:%.*]], ptr [[PTR:%.*]]) {
-; CHECK-NEXT: [[ENTRY:.*:]]
-; CHECK-NEXT: [[AND:%.*]] = and i64 [[VAL]], 31
-; CHECK-NEXT: [[XOR1:%.*]] = xor i64 [[AND]], 4
-; CHECK-NEXT: [[XOR21:%.*]] = or disjoint i64 [[XOR1]], 16
-; CHECK-NEXT: [[GEP:%.*]] = getelementptr i8, ptr [[PTR]], i64 [[XOR21]]
-; CHECK-NEXT: ret ptr [[GEP]]
-;
-entry:
- %and = and i64 %val, 31 ; val can have bits 0-4 set
- %xor1 = xor i64 %and, 4 ; Flips bit 2
- %xor2 = xor i64 %and, 20 ; Flips bits 2 and 4, should NOT replace since bit 4 overlaps with possible val bits
- %gep = getelementptr i8, ptr %ptr, i64 %xor2
- ret ptr %gep
-}
-
; Test with multiple xor operations in sequence
define ptr @test_multiple_xors(ptr %ptr) {
; CHECK-LABEL: define ptr @test_multiple_xors(
@@ -144,8 +125,8 @@ entry:
; Test with multiple xor operations in sequence
-define ptr @aatest_multiple_xors(ptr %ptr) {
-; CHECK-LABEL: define ptr @aatest_multiple_xors(
+define ptr @test_negative_offset(ptr %ptr) {
+; CHECK-LABEL: define ptr @test_negative_offset(
; CHECK-SAME: ptr [[PTR:%.*]]) {
; CHECK-NEXT: [[ENTRY:.*:]]
; CHECK-NEXT: [[BASE:%.*]] = add i64 2, 0
More information about the llvm-commits
mailing list